MNIST 手写数字识别

加载本地图片

网上下载的资源是train-images-idx3-ubyte数据,需要解析成图片

import numpy as np
import cv2
import os
import struct

def save_mnist_to_jpg(mnist_image_file, mnist_label_file, save_dir):
    if 'train' in os.path.basename(mnist_image_file):
        prefix = 'train'
    else:
        prefix = 'test'

    labelIndex = 0
    imageIndex = 0
    i = 0
    lbdata = open(mnist_label_file, 'rb').read()
    magic, nums = struct.unpack_from(">II", lbdata, labelIndex)
    labelIndex += struct.calcsize('>II')

    imgdata = open(mnist_image_file, "rb").read()
    magic, nums, numRows, numColumns = struct.unpack_from('>IIII', imgdata, imageIndex)
    imageIndex += struct.calcsize('>IIII')

    for i in range(nums):
        label = struct.unpack_from('>B', lbdata, labelIndex)[0]
        labelIndex += struct.calcsize('>B')
        im = struct.unpack_from('>784B', imgdata, imageIndex)
        imageIndex += struct.calcsize('>784B')
        im = np.array(im, dtype='uint8')
        img = im.reshape(28, 28)
        save_name = os.path.join(save_dir, '{}_{}_{}.jpg'.format(prefix, i, label))
        cv2.imwrite(save_name, img)
        
train_images = r'D:\MyCode\MNIST\train-images-idx3-ubyte'  # 训练集图像的文件名
train_labels = r'D:\MyCode\MNIST\train-labels-idx1-ubyte'  # 训练集label的文件名
test_images = r'D:\MyCode\MNIST\t10k-images-idx3-ubyte'  # 测试集图像的文件名
test_labels = r'D:\MyCode\MNIST\t10k-labels-idx1-ubyte'  # 测试集label的文件名

save_train_dir = r'D:\MyCode\MNIST\raw\data\mk\train_images'
save_test_dir = r'D:\MyCode\MNIST\raw\data\mk\test_images/'

if not os.path.exists(save_train_dir):
    os.makedirs(save_train_dir)
if not os.path.exists(save_test_dir):
    os.makedirs(save_test_dir)

save_mnist_to_jpg(test_images, test_labels, save_test_dir)
save_mnist_to_jpg(train_images, train_labels, save_train_dir)

加载图片,制作dataset,由于图片的名称是按照一定格式命名的(train_index_no.jpg),直接从文件名中获取label

import pathlib

data_path = pathlib.Path(r'D:\MyCode\MNIST\raw\data\mk\train_images')
all_image_paths = list(data_path.glob('*.jpg'))  
all_image_paths = [str(path) for path in all_image_paths]  # 所有图片路径的列表
print(len(all_image_paths))

# 获取数据标签
all_image_labels=[]
for img_path in all_image_paths:
    file_name = pathlib.Path(img_path).name
    all_image_labels.append(file_name[0:len(file_name)-4].split('_')[2])
    
test_data_path = pathlib.Path(r'D:\MyCode\MNIST\raw\data\mk\test_images')
test_image_paths = list(test_data_path.glob('*.jpg'))  
test_image_paths = [str(path) for path in test_image_paths]  # 所有图片路径的列表
print(len(test_image_paths))

# 获取数据标签
test_image_labels=[]
for img_path in test_image_paths:
    file_name = pathlib.Path(img_path).name
    test_image_labels.append(file_name[0:len(file_name)-4].split('_')[2])
    

可以简单验证一下

for img,label,i in zip(all_image_paths,all_image_labels,range(0,len(all_image_paths))):
    print(img,'---->',label)
    if(i>10):
        break

创建dataset

import tensorflow as tf

def load_image_label(path,label):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img,channels=3)
    img = tf.image.resize(img,[28,28])
    img /=255.0 #归一化,在sklearn的学习中,明确指出神经网络算法对数据缩放敏感
    return  tf.reshape(img, [28*28]),tf.one_hot(int(label), depth=10) # 为什要reshape一下,看自定以网络就明白了

img_path_ds = tf.data.Dataset.from_tensor_slices((all_image_paths,all_image_labels))
img_lab_ds = img_path_ds.map(load_image_label)

img_test_path_ds =  tf.data.Dataset.from_tensor_slices((test_image_paths,test_image_labels))
img_test_lab_ds = img_test_path_ds.map(load_image_label)

自定义网络

定义网络层

class MyDense(keras.layers.Layer):
	def __init__(self, inp_dim, outp_dim):
		super(MyDense, self).__init__()
		self.kernel = self.add_weight('w', [inp_dim, outp_dim])
		self.bias = self.add_weight('b', [outp_dim])

	def call(self, inputs, training=None):
		out = inputs @ self.kernel + self.bias
		return out 

定义model

class MyModel(keras.Model):
	def __init__(self):
		super(MyModel, self).__init__()
		self.fc1 = MyDense(28*28, 256)
		self.fc2 = MyDense(256, 128)
		self.fc3 = MyDense(128, 64)
		self.fc4 = MyDense(64, 32)
		self.fc5 = MyDense(32, 10)

	def call(self, inputs, training=None):
		x = self.fc1(inputs)
		x = tf.nn.relu(x)
		x = self.fc2(x)
		x = tf.nn.relu(x)
		x = self.fc3(x)
		x = tf.nn.relu(x)
		x = self.fc4(x)
		x = tf.nn.relu(x)
		x = self.fc5(x) 
		return x

模型训练与保存

model = MyModel()

model.compile(
    optimizer=tf.keras.optimizers.Adam(lr=0.01),
	loss=tf.losses.CategoricalCrossentropy(from_logits=True),
	metrics=['accuracy']
)

history = model.fit(
    img_lab_ds.batch(128),
	epochs=20,
)

tf.saved_model.save(model,"my_model") # 自定义的模型不能保存为H5格式文件

测试

x,y = next(iter(img_test_lab_ds.batch(1)))
# print('predict x:', x)
print('predict y:', y)
out = model.predict(x)
print(np.argmax(out)) # 输出预测值的索引(可以与y对比一下)
model.evaluate(img_test_lab_ds.batch(128)) # 模型测试,测试在 img_test_lab_ds 上的性能表现

可视化

模型训练时,将训练的参数保存到日志中

import datetime

log_dir = "logs/tfit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

history = model.fit(
    img_lab_ds.batch(128),
	epochs=20,
    callbacks=[tensorboard_callback]
)

启动tensorboard,指定路径

tensorboard --logdir D:\MyCode\jupyter\demo\tensorflow\logs\tfit

根据提示,在浏览器中输入地址打开即可

ResNet50

ResNet(Residual Network,残差网络)是一个通用的深度学习架构。ResNet-50是ResNet架构的一个具体实现,拥有50层深的网络结构,它在ResNet的基础上,通过堆叠特定数量的残差模块来构建网络。

实际应用时,没必要手撸一个ResNet框架,借助tensorflow.kera.applications.ResNet50方法从服务器上下载一个,然后个性化改造一下。

一般将 ResNet50 去除最后一层后的网络作为新任务的特征提取子网络,即利用在 ImageNet 数据集上预训练好的网络参数初始化,并根据自定义任务的类别追加一个对应数据类别数的全连接分类层或子网络,从而可以在预训练网络的基础上快速、高效地学习新任务。

加载模型

import tensorflow as tf
from tensorflow import keras
import numpy as np

resnet_50 = keras.applications.ResNet50(include_top=False,weights='imagenet',input_shape=(240,190,3))
model = keras.Sequential([
    resnet_50,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(32)
    ])
model.summary()

加载数据

这里以野生动物为例

import pathlib

data_path =  pathlib.Path(r"D:\MyCode\wildlife identity")
all_image_paths = list(data_path.glob(r'*\*.jpeg'))  
all_image_paths = [str(path) for path in all_image_paths]  # 所有图片路径的列表
print(len(all_image_paths))

all_image_labels=[]
lab_dict={} # 对动物进行编码
lab_no =0
for img_path in all_image_paths:
    file_name = pathlib.Path(img_path).name
    lab_name = pathlib.Path(img_path).parent.name
    if(lab_name in lab_dict.keys()):
        all_image_labels.append(lab_dict[lab_name])
    else:
        all_image_labels.append(lab_no)
        lab_dict[lab_name]=lab_no
        lab_no+=1
        
def load_image_label(path,label):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img,channels=3)
    img = tf.image.resize(img,[240,190])
    img /=255.0 # 数据缩放
    return img,float(label)

img_path_ds = tf.data.Dataset.from_tensor_slices((all_image_paths,all_image_labels))
img_lab_ds = img_path_ds.map(load_image_label)

训练模型

model.compile(
    optimizer='adam', 
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
    metrics=['accuracy']   
)
history = model.fit(
    img_lab_ds.batch(32),
	epochs=32,
)

数据预测

test_path =  pathlib.Path(r"D:\MyCode\wildlife identity\test")
test_imgages= list(test_path.glob(r"*/*.jpg"))
test_imgages = [str(fname) for fname in test_imgages]

def load_test_img(fname):
    img = tf.io.read_file(fname)
    img = tf.image.decode_jpeg(img,channels=3)
    img = tf.image.resize(img,[240,190])
    img /=255.0 # 数据缩放
    # 获取lab
    lab_name = pathlib.Path(img_path).parent.name
    return img,lab_dict[lab_name]

for img_path in test_imgages:
    test_img,lab = load_test_img(img_path)
    print("真实标签:",lab)
    print("预测标签:",np.argmax(model.predict(tf.reshape(test_img,(1,240, 190, 3)))))
    print("\r")

由于数据集很小,训练精度不是很理想,可以通过提升epochs来提高精度,但这样做可能会导致模型过拟合。

Logo

有“AI”的1024 = 2048,欢迎大家加入2048 AI社区

更多推荐