动手学深度学习课程竞赛:Classify Leaves
课程地址:30 第二部分完结竞赛:图片分类【动手学深度学习v2】_哔哩哔哩_bilibili
竞赛地址:Classify Leaves | Kaggle
李沐老师官方总结:43 树叶分类竞赛技术总结【动手学深度学习v2】_哔哩哔哩_bilibili
[数据预处理及建立不同数据集]
import torch
import numpy as np
import pandas as pd
from torchvision import transforms
from PIL import Image
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l
from torch.utils.data import Dataset, DataLoader
# 读取train和test图片列表
path = '/home/NAS/HUIDA/YaqinJiang/my/chapter_convolutional-modern/classify-leaves/'
train_csv = pd.read_csv(path + 'train.csv')
test_csv = pd.read_csv(path + 'test.csv')
# 由于train图片列表的排列有顺序性,为了尽量训练得全面,要先进行随机打乱
train_csv = train_csv.sample(frac=1).reset_index(drop=True)
# 将tarin的label按类别编号,用于预测对应
# 先创建一个标签到索引的映射
labels = sorted(list(set(train_csv.iloc[:, 1])))
label_index = {label: index for index, label in enumerate(labels)}
# 然后创建一个索引到标签的映射
index_label = {v: k for k, v in label_index.items()}
# 创建Dataset,用于读取train、valid和test数据
class LeavesDataSet(Dataset):# 设置k,作为第k折def __init__(self, data_csv, mode='train', k=0):self.mode = modeself.k = kself.data_csv = data_csvself.data_len = len(data_csv.index)self.fold_size = self.data_len // 5# 所有的train数据,最后训练的时候用if mode == 'all':self.all_images = np.asarray(self.data_csv.iloc[:, 0])self.all_labels = np.asarray(self.data_csv.iloc[:, 1])self.image_names = self.all_images self.label_names = self.all_labelsself.trans = transforms.Compose([transforms.Resize((224, 224)),# train数据做数据增强transforms.RandomHorizontalFlip(p=0.5),transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])elif mode == 'train':# 第k折前后均为train数据self.train_images = np.asarray(pd.concat([self.data_csv.iloc[0:self.k*self.fold_size, 0],self.data_csv.iloc[(self.k+1)*self.fold_size:, 0]]))self.train_labels = np.asarray(pd.concat([self.data_csv.iloc[0:self.k*self.fold_size, 1],self.data_csv.iloc[(self.k+1)*self.fold_size:, 1]])) self.image_names = self.train_images self.label_names = self.train_labelsself.trans = transforms.Compose([