使用pytorch的auto_grad实现线性模型对mnist数据集多分类

使用pytorch的auto_grad实现线性模型对mnist数据集多分类,选取mnist100张图片,前80张为测试集,后20张为训练集,eporch 500次

知识储备

使用多个线性模型进行多分类 原理:每一个线性模型做二分类
多个线性模型 = 感知机,实质就是每一个线性模型做二分类

数据加载&归一化

1
2
3
4
5
6
7
8
9
10
11
12
13
import torch
from mnist import MNIST
import numpy as np
import pdb
from matplotlib import pyplot as plt
%matplotlib inline
mndata = MNIST('dataset/python-mnist')
image_data_all, image_label_all = mndata.load_training()
image_data = image_data_all[0:100]
image_data = np.array(image_data,dtype = np.float)/255
image_label = image_label_all[0:100]
image_label = np.array(image_label,dtype = np.int)
print(image_data.shape,image_label.shape)
(100, 784) (100,)

定义模型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def model(image_data_one,weights,bias):
"""
这里直接使用图片本身作特征,也可以提取features后传入模型中
"""
# image_data_one转化为二维
xt = torch.from_numpy(image_data_one.reshape(1,28*28))
y = xt.mm(weights)+bias
return y

def get_acc(image_data,image_label,weights,bias,start_i,end_i):
correct = 0
# 这里可以不加,因为loss计算于此无关
with torch.no_grad():
for i in range(start_i,end_i):
y = model(image_data[i],weights,bias)
# 获取第i张图片的label
gt = image_label[i]
# 获取与y最近接的label值
pred = torch.argmin(torch.from_numpy(np.array([torch.min((torch.abs(y-j))).item() for j in range(0,10)]))).item()
if gt == pred:
correct += 1
# 确保万一,除法分子或分母一个指定为float
return float(correct/float(end_i-start_i))
1
2
3
4
5
6
7
8
9
10
#显示训练集和测试集精度变换
def show_acc(train_accs,test_accs):
plt.figure(figsize = (10,4))
plt.title('train_accs and test_accs')
plt.plot(np.arange(len(train_accs)), train_accs, color='green', label='train_accs')
plt.plot(np.arange(len(test_accs)), test_accs, color='red', label='test_accs')
plt.legend() # 显示图例
plt.xlabel('index')
plt.ylabel('accs')
plt.show()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def train_model(image_data,image_label,weights,bias,lr):
loss_value_before=1000000000000000.
loss_value=10000000000000.
train_accs = []
test_accs = []
for epoch in range(0,500):
loss_value_before=loss_value
loss_value=0
for i in range(0,80):
y = model(image_data[i],weights,bias)
# 获取第i张图片的label
gt = image_label[i]
# 只关心一个值,更新的时候也只更新对应线性模型的weights和bias
loss = torch.sum((y[0,gt:gt+1]-gt).mul(y[0,gt:gt+1]-gt))
loss_value += loss.data.item()
loss.backward()
weights.data.sub_(weights.grad.data*lr)
weights.grad.data.zero_()
bias.data.sub_(bias.grad.data*lr)
bias.grad.data.zero_()

train_acc = get_acc(image_data,image_label,weights,bias,0,80)
test_acc = get_acc(image_data,image_label,weights,bias,80,100)
train_accs.append(train_acc)
test_accs.append(test_acc)
#print("epoch=%s,loss=%s/%s,train/test_acc=%s/%s,"%(epoch,loss_value,loss_value_before,train_acc,test_acc))
show_acc(train_accs,test_accs)

训练

1
2
3
4
5
weights = torch.randn(28*28,10,dtype = torch.float64,requires_grad = True)
bias = torch.zeros(10,dtype = torch.float64,requires_grad = True)
lr = 1e-3
# 对模型进行训练:
train_model(image_data,image_label,weights,bias,lr)

由于样本少,导致程序过拟合,结果是训练集精度高,测试集精度低。