基于卷积神经网络的实时手写数字识别(pycharm中实现)
这是我引入的库,各位看官可以直接复制代码到pycharm中直接安装,当然还要事先安装好PyTorch,这个我也遇到了各种各样的问题,也无法完全展示,就请劳烦观看其他作者文章了。,导致既没有GPU、电脑性能又差的我跑了足足一个半小时,话不多说,直接放完整代码了。还要注意一件事,因为我是自己训练的数据集,但电脑又是笔记本,这里就有区别了,先放个视频展示效果,以免浪费各位看官时间又没有达到预期效果。
·
先放个视频展示效果,以免浪费各位看官时间又没有达到预期效果
手写数字效果展示
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import tkinter as tk
from PIL import Image, ImageDraw
import numpy as np
这是我引入的库,各位看官可以直接复制代码到pycharm中直接安装,当然还要事先安装好PyTorch,这个我也遇到了各种各样的问题,也无法完全展示,就请劳烦观看其他作者文章了。
还要注意一件事,因为我是自己训练的数据集,但电脑又是笔记本,这里就有区别了,安装PyTorch的时候一定要注意自己电脑有没有GPU,导致既没有GPU、电脑性能又差的我跑了足足一个半小时,话不多说,直接放完整代码了
# -*- coding: utf-8 -*-
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import tkinter as tk
from PIL import Image, ImageDraw
import numpy as np
# ------------------ 模型定义 ------------------
class DigitRecognizer(nn.Module):
"""深度卷积神经网络模型[3,7](@ref)"""
def __init__(self):
super().__init__()
self.features = nn.Sequential(
nn.Conv2d(1, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.Conv2d(32, 64, 3, padding=1),
nn.MaxPool2d(2),
nn.Dropout(0.25),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.Conv2d(128, 256, 3, padding=1),
nn.MaxPool2d(2),
nn.Dropout(0.25)
)
self.classifier = nn.Sequential(
nn.Linear(256 * 7 * 7, 512),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(512, 10)
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
return self.classifier(x)
# ------------------ 数据预处理 ------------------
def get_dataloaders(batch_size=128):
"""数据加载管道[1,4](@ref)"""
train_transform = transforms.Compose([
transforms.RandomRotation(10),
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
test_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
train_set = datasets.MNIST(root='./data', train=True, download=True, transform=train_transform)
test_set = datasets.MNIST(root='./data', train=False, transform=test_transform)
return (
DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4),
DataLoader(test_set, batch_size=batch_size, shuffle=False)
)
# ------------------ 模型训练 ------------------
def train_model(epochs=15):
"""训练流程[3,8](@ref)"""
train_loader, test_loader = get_dataloaders()
model = DigitRecognizer().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=2)
best_acc = 0.0
for epoch in range(epochs):
model.train()
total_loss = 0.0
for images, labels in train_loader:
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
total_loss += loss.item()
# 验证阶段
model.eval()
correct = 0
with torch.no_grad():
for images, labels in test_loader:
images, labels = images.to(device), labels.to(device)
outputs = model(images)
pred = outputs.argmax(dim=1)
correct += (pred == labels).sum().item()
accuracy = 100 * correct / len(test_loader.dataset)
scheduler.step(accuracy)
if accuracy > best_acc:
best_acc = accuracy
torch.save(model.state_dict(), 'best_model.pth')
print(f'Epoch {epoch + 1}/{epochs} | Loss: {total_loss / len(train_loader):.4f} | Acc: {accuracy:.2f}%')
# ------------------ GUI界面 ------------------
class HandwritingApp:
"""实时手写识别界面[5,8](@ref)"""
def __init__(self):
self.window = tk.Tk()
self.window.title("手写数字识别")
# 初始化模型
self.model = DigitRecognizer()
try:
self.model.load_state_dict(torch.load('best_model.pth', map_location='cpu', weights_only=True))
except FileNotFoundError:
print("未找到模型文件,请先运行训练!")
exit()
self.model.eval()
# 界面组件
self.canvas = tk.Canvas(self.window, width=280, height=280, bg='black')
self.canvas.pack()
self.label = tk.Label(self.window, text="预测结果:", font=('Arial', 24))
self.label.pack()
# 控制按钮
btn_frame = tk.Frame(self.window)
btn_frame.pack(pady=10)
tk.Button(btn_frame, text="清空", command=self.clear_canvas).grid(row=0, column=0, padx=10)
tk.Button(btn_frame, text="识别", command=self.predict).grid(row=0, column=1, padx=10)
# 绘图参数
self.image = Image.new('L', (280, 280), 0)
self.draw = ImageDraw.Draw(self.image)
self.last_point = None
# 事件绑定
self.canvas.bind("<B1-Motion>", self.paint)
self.canvas.bind("<ButtonRelease-1>", self.reset_point)
def paint(self, event):
"""绘制手写轨迹"""
x, y = event.x, event.y
if self.last_point:
self.canvas.create_line(self.last_point[0], self.last_point[1], x, y,
width=15, fill='white', capstyle=tk.ROUND)
self.draw.line([self.last_point, (x, y)], fill=255, width=15)
self.last_point = (x, y)
def reset_point(self, event):
self.last_point = None
def clear_canvas(self):
self.canvas.delete("all")
self.image = Image.new('L', (280, 280), 0)
self.draw = ImageDraw.Draw(self.image)
self.label.config(text="预测结果:")
def preprocess(self):
"""图像预处理[4,7](@ref)"""
img = self.image.resize((28, 28))
tensor = transforms.ToTensor()(img)
tensor = transforms.Normalize((0.1307,), (0.3081,))(tensor)
return tensor.unsqueeze(0) # 添加batch维度
def predict(self):
"""执行预测[5,8](@ref)"""
tensor = self.preprocess()
with torch.no_grad():
output = self.model(tensor)
pred = torch.argmax(output).item()
self.label.config(text=f"预测结果:{pred}")
def run(self):
self.window.mainloop()
if __name__ == "__main__":
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 首次使用需取消注释以下代码进行训练
# train_model()
# 启动GUI界面
app = HandwritingApp()
app.run()
更多推荐


所有评论(0)