当前位置: 首页 > news >正文

《动手学深度学习(PyTorch版)》笔记3.6

注:书中对代码的讲解并不详细,本文对很多细节做了详细注释。另外,书上的源代码是在Jupyter Notebook上运行的,较为分散,本文将代码集中起来,并加以完善,全部用vscode在python 3.9.18下测试通过。

Chapter3 Linear Neural Networks

3.6 Implementations of Softmax Regression from Scratch

import torch
import torchvision
import time
import matplotlib.pyplot as plt
import numpy as np 
from torch.utils import data
from torchvision import transforms
from d2l import torch as d2l
from IPython import display#初始化参数
batch_size=256
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size)
num_inputs=784
#图像有28*28像素,本节将其看作长度为784的向量
num_outputs=10
#softmax回归中输出与类别一样多,数据集有10个类别
W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
b = torch.zeros(num_outputs, requires_grad=True)#定义softmax操作
def softmax(X):X_exp=torch.exp(X)partition=X_exp.sum(1,keepdim=True)return X_exp/partition#结果每行和为1#定义softmax回归模型
#在将数据传递到模型之前,使用reshape将每个原始图像展开为向量
def net(X):return softmax(torch.matmul(X.reshape((-1,W.shape[0])),W)+b)y = torch.tensor([0, 2])
#有了y,我们知道在第一个样本中第一类是正确的预测;在第二个样本中第三类是正确的预测
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
#2个样本在3个类别上的预测概率
#print(y_hat[[0, 1], y])
#使用y作为y_hat中概率的索引
#我们选择第一个样本中第一个类的概率和第二个样本中第三个类的概率,即输出[y[0],y[1]]#定义交叉熵损失函数
def cross_entropy(y_hat,y):return -torch.log(y_hat[range(len(y_hat)),y])#定义一个用于对多个变量累加的的类
class Accumulator:#@savedef __init__(self,n):self.data=[0.0]*ndef add(self,*args):self.data=[a+float(b) for a,b in zip(self.data,args)]def reset(self):self.data=[0.0]*len(self.data)def __getitem__(self,idx):return self.data[idx]#计算分类精度
def accuracy(y_hat,y):#@save"""计算预测正确的数量"""if len(y_hat.shape)>1 and y_hat.shape[1]>1:#如果`y_hat`是矩阵,那么假定第二个维度存储每个类的预测分数y_hat=y_hat.argmax(axis=1)#使用`argmax`获得每行中最大元素的索引来获得预测类别cmp=y_hat.type(y.dtype)==y#由于等式运算符“`==`”对数据类型很敏感,因此我们将`y_hat`的数据类型转换为与`y`的数据类型一致。return float(cmp.type(y.dtype).sum())def evaluate_accuracy(net,data_iter):#@save"""计算在指定数据集上模型的精度"""if isinstance(net,torch.nn.Module):net.eval()#将模型设为评估模式metric=Accumulator(2)#Initializes an Accumulator with two variables: the number of correct predictions and the total number of predictions.with torch.no_grad():#disables gradient computationfor X,y in data_iter:metric.add(accuracy(net(X),y),y.numel())#y.numel() returns the total number of elements in y.return metric[0]/metric[1]#if __name__ == '__main__':#print(evaluate_accuracy(net,test_iter))#由于使用随机权重初始化net模型,因此该模型的精度接近于随机猜测,如在有10个类别情况下的精度接近0.1 #定义一个在动画中绘制数据的类
class Animator:def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,ylim=None, xscale='linear', yscale='linear',fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,figsize=(3.5, 2.5)):# 增量地绘制多条线if legend is None:#lengend:图例legend = []d2l.use_svg_display()self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)#"plt.subplots()" is called to create a figure (self.fig) and one or more subplots (self.axes).if nrows * ncols == 1:self.axes = [self.axes, ]# 使用lambda函数捕获参数self.config_axes = lambda: d2l.set_axes(self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)#A lambda function is used to create an anonymous function that is then assigned to the "self.config_axes" attribute. #This is a common pattern in Python, especially when a short, simple function is needed, and there's no intention to reuse it elsewhere in the code.# It provides a more compact and inline way to express the behavior.self.X, self.Y, self.fmts = None, None, fmtsdef add(self, x, y):#Adds data points to the plot.if not hasattr(y, "__len__"):#If y is not iterable (doesn't have a length), it is converted to a list to ensure it can be processed as a collection of values.y = [y]n = len(y)if not hasattr(x, "__len__"):#If x is not iterable, it is repeated n times to match the length of y.x = [x] * nif not self.X:#If "self.X" is not initialized, it is initialized as a list of empty lists, with one list for each element in y.self.X = [[] for _ in range(n)]if not self.Y:self.Y = [[] for _ in range(n)]for i, (a, b) in enumerate(zip(x, y)):if a is not None and b is not None:self.X[i].append(a)self.Y[i].append(b)self.axes[0].cla()# clears the current axis to prepare for the new datafor x, y, fmt in zip(self.X, self.Y, self.fmts):self.axes[0].plot(x, y, fmt)self.config_axes()#configures the axis using specified parameters.display.display(self.fig)display.clear_output(wait=True)#训练
def train_epoch_ch3(net, train_iter, loss, updater):  #@save#updater是更新模型参数的常用函数,在后文定义"""训练模型一个迭代周期"""# 将模型设置为训练模式if isinstance(net, torch.nn.Module):#checks if the object referred to by the variable net is an instance of the "torch.nn.Module" classnet.train()# 训练损失总和、训练准确度总和、样本数metric = Accumulator(3)for X, y in train_iter:# 计算梯度并更新参数y_hat = net(X)l = loss(y_hat, y)if isinstance(updater, torch.optim.Optimizer):#使用PyTorch内置的优化器和损失函数updater.zero_grad()# Clear previously calculated gradientsl.mean().backward()updater.step()else:#使用定制的优化器和损失函数l.sum().backward()updater(X.shape[0])metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())# 返回训练损失和训练精度return metric[0] / metric[2], metric[1] / metric[2]def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):"""训练模型"""# 创建一个用于动画绘制的实例animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],legend=['train loss', 'train acc', 'test acc'])for epoch in range(num_epochs):# 训练模型一个迭代周期,并获取训练损失和准确度train_metrics = train_epoch_ch3(net, train_iter, loss, updater)# 在测试集上评估模型精度test_acc = evaluate_accuracy(net, test_iter)# 将训练损失、训练准确度和测试准确度添加到动画中animator.add(epoch + 1, train_metrics + (test_acc,))# 获取最后一个周期的训练损失和训练准确度train_loss, train_acc = train_metrics# 检查训练损失、训练准确度和测试准确度的合理性assert train_loss < 0.5, train_loss#If the condition is False, it raises an AssertionError exception.assert train_acc <= 1 and train_acc > 0.7, train_accassert test_acc <= 1 and test_acc > 0.7, test_accdef updater(batch_size):return d2l.sgd([W, b], lr, batch_size)if __name__ == '__main__':lr = 0.1num_epochs = 10train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)#预测
#给定一系列图像,我们将比较它们的实际标签(文本输出的第一行)和模型预测(文本输出的第二行)
def predict_ch3(net, test_iter, n=6):"""预测标签"""# Iterate over the test dataset to get a batch of images and their true labelsfor X, y in test_iter:break# Get the true labels in text formattrues = d2l.get_fashion_mnist_labels(y)# Use the trained model to make predictions on the test batch and convert predictions to text labelspreds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1)) # Create titles for the images by combining true labels and predicted labelstitles = [true + '\n' + pred for true, pred in zip(trues, preds)]# Display a subset (n) of the images along with their true and predicted labelsd2l.show_images(X[0:n].reshape((n, 28, 28)), 1, n, titles=titles[0:n])if __name__ == '__main__':predict_ch3(net, test_iter)plt.show()#将折线图和预测结果的图像统一显示## 3.6 Implementations of Softmax Regression from Scratchimport torch
import torchvision
import time
import matplotlib.pyplot as plt
import numpy as np 
from torch.utils import data
from torchvision import transforms
from d2l import torch as d2l
from IPython import display#初始化参数
batch_size=256
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size)
num_inputs=784
#图像有28*28像素,本节将其看作长度为784的向量
num_outputs=10
#softmax回归中输出与类别一样多,数据集有10个类别
W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
b = torch.zeros(num_outputs, requires_grad=True)#定义softmax操作
def softmax(X):X_exp=torch.exp(X)partition=X_exp.sum(1,keepdim=True)return X_exp/partition#结果每行和为1#定义softmax回归模型
#在将数据传递到模型之前,使用reshape将每个原始图像展开为向量
def net(X):return softmax(torch.matmul(X.reshape((-1,W.shape[0])),W)+b)y = torch.tensor([0, 2])
#有了y,我们知道在第一个样本中第一类是正确的预测;在第二个样本中第三类是正确的预测
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
#2个样本在3个类别上的预测概率
#print(y_hat[[0, 1], y])
#使用y作为y_hat中概率的索引
#我们选择第一个样本中第一个类的概率和第二个样本中第三个类的概率,即输出[y[0],y[1]]#定义交叉熵损失函数
def cross_entropy(y_hat,y):return -torch.log(y_hat[range(len(y_hat)),y])#定义一个用于对多个变量累加的的类
class Accumulator:#@savedef __init__(self,n):self.data=[0.0]*ndef add(self,*args):self.data=[a+float(b) for a,b in zip(self.data,args)]def reset(self):self.data=[0.0]*len(self.data)def __getitem__(self,idx):return self.data[idx]#计算分类精度
def accuracy(y_hat,y):#@save"""计算预测正确的数量"""if len(y_hat.shape)>1 and y_hat.shape[1]>1:#如果`y_hat`是矩阵,那么假定第二个维度存储每个类的预测分数y_hat=y_hat.argmax(axis=1)#使用`argmax`获得每行中最大元素的索引来获得预测类别cmp=y_hat.type(y.dtype)==y#由于等式运算符“`==`”对数据类型很敏感,因此我们将`y_hat`的数据类型转换为与`y`的数据类型一致。return float(cmp.type(y.dtype).sum())def evaluate_accuracy(net,data_iter):#@save"""计算在指定数据集上模型的精度"""if isinstance(net,torch.nn.Module):net.eval()#将模型设为评估模式metric=Accumulator(2)#Initializes an Accumulator with two variables: the number of correct predictions and the total number of predictions.with torch.no_grad():#disables gradient computationfor X,y in data_iter:metric.add(accuracy(net(X),y),y.numel())#y.numel() returns the total number of elements in y.return metric[0]/metric[1]#if __name__ == '__main__':#print(evaluate_accuracy(net,test_iter))#由于使用随机权重初始化net模型,因此该模型的精度接近于随机猜测,如在有10个类别情况下的精度接近0.1 #定义一个在动画中绘制数据的类
class Animator:def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,ylim=None, xscale='linear', yscale='linear',fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,figsize=(3.5, 2.5)):# 增量地绘制多条线if legend is None:#lengend:图例legend = []d2l.use_svg_display()self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)#"plt.subplots()" is called to create a figure (self.fig) and one or more subplots (self.axes).if nrows * ncols == 1:self.axes = [self.axes, ]# 使用lambda函数捕获参数self.config_axes = lambda: d2l.set_axes(self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)#A lambda function is used to create an anonymous function that is then assigned to the "self.config_axes" attribute. #This is a common pattern in Python, especially when a short, simple function is needed, and there's no intention to reuse it elsewhere in the code.# It provides a more compact and inline way to express the behavior.self.X, self.Y, self.fmts = None, None, fmtsdef add(self, x, y):#Adds data points to the plot.if not hasattr(y, "__len__"):#If y is not iterable (doesn't have a length), it is converted to a list to ensure it can be processed as a collection of values.y = [y]n = len(y)if not hasattr(x, "__len__"):#If x is not iterable, it is repeated n times to match the length of y.x = [x] * nif not self.X:#If "self.X" is not initialized, it is initialized as a list of empty lists, with one list for each element in y.self.X = [[] for _ in range(n)]if not self.Y:self.Y = [[] for _ in range(n)]for i, (a, b) in enumerate(zip(x, y)):if a is not None and b is not None:self.X[i].append(a)self.Y[i].append(b)self.axes[0].cla()# clears the current axis to prepare for the new datafor x, y, fmt in zip(self.X, self.Y, self.fmts):self.axes[0].plot(x, y, fmt)self.config_axes()#configures the axis using specified parameters.display.display(self.fig)display.clear_output(wait=True)#训练
def train_epoch_ch3(net, train_iter, loss, updater):  #@save#updater是更新模型参数的常用函数,在后文定义"""训练模型一个迭代周期"""# 将模型设置为训练模式if isinstance(net, torch.nn.Module):#checks if the object referred to by the variable net is an instance of the "torch.nn.Module" classnet.train()# 训练损失总和、训练准确度总和、样本数metric = Accumulator(3)for X, y in train_iter:# 计算梯度并更新参数y_hat = net(X)l = loss(y_hat, y)if isinstance(updater, torch.optim.Optimizer):#使用PyTorch内置的优化器和损失函数updater.zero_grad()# Clear previously calculated gradientsl.mean().backward()updater.step()else:#使用定制的优化器和损失函数l.sum().backward()updater(X.shape[0])metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())# 返回训练损失和训练精度return metric[0] / metric[2], metric[1] / metric[2]def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):"""训练模型"""# 创建一个用于动画绘制的实例animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],legend=['train loss', 'train acc', 'test acc'])for epoch in range(num_epochs):# 训练模型一个迭代周期,并获取训练损失和准确度train_metrics = train_epoch_ch3(net, train_iter, loss, updater)# 在测试集上评估模型精度test_acc = evaluate_accuracy(net, test_iter)# 将训练损失、训练准确度和测试准确度添加到动画中animator.add(epoch + 1, train_metrics + (test_acc,))# 获取最后一个周期的训练损失和训练准确度train_loss, train_acc = train_metrics# 检查训练损失、训练准确度和测试准确度的合理性assert train_loss < 0.5, train_loss#If the condition is False, it raises an AssertionError exception.assert train_acc <= 1 and train_acc > 0.7, train_accassert test_acc <= 1 and test_acc > 0.7, test_accdef updater(batch_size):return d2l.sgd([W, b], lr, batch_size)if __name__ == '__main__':lr = 0.1num_epochs = 10train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)#预测
#给定一系列图像,我们将比较它们的实际标签(文本输出的第一行)和模型预测(文本输出的第二行)
def predict_ch3(net, test_iter, n=6):"""预测标签"""# Iterate over the test dataset to get a batch of images and their true labelsfor X, y in test_iter:break# Get the true labels in text formattrues = d2l.get_fashion_mnist_labels(y)# Use the trained model to make predictions on the test batch and convert predictions to text labelspreds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1)) # Create titles for the images by combining true labels and predicted labelstitles = [true + '\n' + pred for true, pred in zip(trues, preds)]# Display a subset (n) of the images along with their true and predicted labelsd2l.show_images(X[0:n].reshape((n, 28, 28)), 1, n, titles=titles[0:n])if __name__ == '__main__':predict_ch3(net, test_iter)plt.show()#将折线图和预测结果的图像统一显示

相关文章:

  • Elasticsearch8.11集群部署
  • PyTorch 中的nn.Conv2d 类
  • 我用Java开发了一个五子棋小游戏
  • Ask for Power Apps 消失了?
  • 【计算机网络】第三章·数据链路层(一)
  • 智能巡检机器人常见问题答疑
  • 【小白教程】幻兽帕鲁服务器一键搭建 | 支持更新 | 自定义配置
  • ThreadLocal详解
  • apipost和curl收不到服务器响应的HTTP/1.1 404 Not Found
  • 探索IOC和DI:解密Spring框架中的依赖注入魔法
  • 基于51单片机智能电子秤
  • 《vtk9 book》 官方web版 第2章 - 面向对象设计
  • jenkins安装配置,使用Docker发布maven项目全过程记录(1)
  • git checkout和git switch的区别
  • 微信小程序(十五)自定义导航栏
  • JavaScript 如何正确处理 Unicode 编码问题!
  • create-react-app做的留言板
  • ES6, React, Redux, Webpack写的一个爬 GitHub 的网页
  • gcc介绍及安装
  • magento 货币换算
  • Map集合、散列表、红黑树介绍
  • Python爬虫--- 1.3 BS4库的解析器
  • windows下使用nginx调试简介
  • 基于web的全景—— Pannellum小试
  • 技术:超级实用的电脑小技巧
  • 使用 Node.js 的 nodemailer 模块发送邮件(支持 QQ、163 等、支持附件)
  • 网页视频流m3u8/ts视频下载
  • #define 用法
  • #stm32驱动外设模块总结w5500模块
  • #设计模式#4.6 Flyweight(享元) 对象结构型模式
  • $var=htmlencode(“‘);alert(‘2“); 的个人理解
  • (4.10~4.16)
  • (附源码)ssm基于微信小程序的疫苗管理系统 毕业设计 092354
  • (小白学Java)Java简介和基本配置
  • ./indexer: error while loading shared libraries: libmysqlclient.so.18: cannot open shared object fil
  • .bat批处理(五):遍历指定目录下资源文件并更新
  • .NET CF命令行调试器MDbg入门(四) Attaching to Processes
  • .NET CORE 3.1 集成JWT鉴权和授权2
  • .net core MVC 通过 Filters 过滤器拦截请求及响应内容
  • .net redis定时_一场由fork引发的超时,让我们重新探讨了Redis的抖动问题
  • .NET 命令行参数包含应用程序路径吗?
  • .NET/C# 编译期能确定的字符串会在字符串暂存池中不会被 GC 垃圾回收掉
  • .NET成年了,然后呢?
  • .NET企业级应用架构设计系列之技术选型
  • @ComponentScan比较
  • @data注解_SpringBoot 使用WebSocket打造在线聊天室(基于注解)
  • [<MySQL优化总结>]
  • [BZOJ 4129]Haruna’s Breakfast(树上带修改莫队)
  • [C# 网络编程系列]专题六:UDP编程
  • [IDF]被改错的密码
  • [Linux] day07——查看及过滤文本
  • [linux] Key is stored in legacy trusted.gpg keyring
  • [Linux] PHP程序员玩转Linux系列-telnet轻松使用邮箱
  • [office] Excel自带的编辑函数求和方法 #其他#媒体
  • [OS] linux常见问题汇总