【numpy手搓DNN极简版】
·
简介
开始直接按照begin函数生成权重结构,例:begin(8,4,2)为输入层有8个输入,隐藏层4输出,输出层2输出。隐藏层默认用leaky_relu激活函数,输出层默认用sigmoid激活函数
公式
前向传播可以用y=ax+b函数代替,只不过函数中用的是矩阵运算。然后用激活函数z=sigmoid(y)激活输出就可以了。反向传播用dY/da=dY/dzdz/dydy/da=方差(Y,z)*d_sigmoid(y)x 得到结果后用a=a-dY/da就是后面用的 W[layer_idx] -=learnVdL_dw 其中learnV是学习率。用递归函数计算多隐藏层的前向反向传播。
使用示例
if __name__ == "__main__":
# 准备数据(两个样本,输入8维,输出2维)
X = [[0.1, 0.2, 0.3, 0.1, 0.2, 0.3, 0.9, 0.6],
[0.3, 0.5, 0.8, 0.3, 0.5, 0.8, 0.7, 0.3]]
Y = [[0.5, 0.8],
[0.6, 0.7]]
# load("my_model.npz") 可以直接加载模型
# 训练阶段(注释掉后可直接加载模型测试)
begin(8, 8, 4, 2)#可以设为多隐藏层
# begin(8, 4, 2) 也可以设为单隐藏层
randomWB() # 随机权重
train()#train(epochs=10000)不设参数默认跑10000次
test()#用X Y测试可以在此之前重设X Y
save("my_model.npz") #可以保存训练好的模型
完整代码
import numpy as np
X = []
Y = []
x = None
y = None
layers = [] # 每层神经元个数
W = [] # 权重列表
b = [] # 偏置列表
learnV = 0.1
z_list = [] # 每层的 z
a_list = [] # 每层的 a
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def d_sigmoid(x):
o = sigmoid(x)
return o * (1 - o)
def relu(x):
return np.maximum(0, x)
def d_relu(x):
return (x > 0).astype(float)
def leaky_relu(x, alpha=0.01):
return np.where(x > 0, x, alpha * x)
def d_leaky_relu(x, alpha=0.01):
return np.where(x > 0, 1.0, alpha)
def loss(L, y):
return np.sum((L - y) ** 2) / 2
def forward_recursive(layer_idx, inp):
"""递归前向传播,返回最后一层输出"""
if layer_idx == len(W):
return inp
z = inp @ W[layer_idx] + b[layer_idx]
global layers
# 最后一层用sigmoid,其他层用leaky_relu
if layer_idx + 1 == len(W):
a = sigmoid(z)
else:
a = leaky_relu(z)
z_list.append(z)
a_list.append(a)
return forward_recursive(layer_idx + 1, a)
def backward_recursive(layer_idx, grad_output, network_input):
"""递归反向传播"""
if layer_idx == 0:
current_input = network_input # (batch, in_units)
else:
current_input = a_list[layer_idx - 1] # (batch, in_units)
z = z_list[layer_idx] # (batch, out_units)
global layers
# 最后一层用sigmoid,其他层用leaky_relu
if layer_idx + 2 == layers.__len__():
da_dz = d_sigmoid(z)
else:
da_dz = d_leaky_relu(z)
dL_dz = grad_output * da_dz
dL_dw = current_input.T @ dL_dz
dL_db = np.sum(dL_dz, axis=0, keepdims=True)
grad_input = dL_dz @ W[layer_idx].T
# 防止梯度爆炸
dL_dw = np.clip(dL_dw, -1.0, 1.0)
dL_db = np.clip(dL_db, -1.0, 1.0)
W[layer_idx] -= learnV * dL_dw
b[layer_idx] -= learnV * dL_db.flatten()
if layer_idx > 0:
backward_recursive(layer_idx - 1, grad_input, network_input)
def begin(*args):
"""定义网络结构:begin(输入维度, 隐藏层1, ..., 输出维度)"""
global layers, W, b, x, y, isLoad
isLoad = False
layers = list(args)
if len(layers) < 2:
raise ValueError("至少需要输入维度和输出维度")
W.clear()
b.clear()
for i in range(len(layers) - 1):
in_dim, out_dim = layers[i], layers[i + 1]
W.append(np.random.normal(0, 0.1, size=(in_dim, out_dim)))
b.append(np.random.normal(0, 0.1, size=out_dim))
# 如果已有数据,则重新格式化
if X and Y:
x = np.array(X).reshape(-1, layers[0], 1)
y = np.array(Y).reshape(-1, layers[-1], 1)
else:
x = np.array([])
y = np.array([])
def randomWB():
"""随机初始化权重和偏置(保留当前结构)"""
for i in range(len(layers) - 1):
in_dim, out_dim = layers[i], layers[i + 1]
W[i] = np.random.normal(0, 0.1, size=(in_dim, out_dim))
b[i] = np.random.normal(0, 0.1, size=out_dim)
def train(epochs=10000, early_stop=1e-8, setLearnV=0.7):
global learnV, z_list, a_list, isLoad
for epoch in range(epochs):
learnV = 1 * (epochs - epoch) / epochs + 0.1
if isLoad:
learnV = setLearnV
total_loss = 0
for i in range(x.shape[0]):
inp = x[i] # (input_dim, 1)
target = y[i] # (output_dim, 1)
z_list.clear()
a_list.clear()
output = forward_recursive(0, inp.T) # (1, output_dim)
L_val = loss(output, target.T)
total_loss += L_val
dL_dy = output - target.T
backward_recursive(len(W) - 1, dL_dy, inp.T)
if epoch % 100 == 0:
print(f"Epoch {epoch}, Loss: {total_loss / x.shape[0]:.6f}")
if total_loss / x.shape[0] < early_stop:
print(f"提前停止于第 {epoch} 轮")
break
def test():
"""打印所有样本的网络输出"""
k = []
for i in range(x.shape[0]):
z_list.clear()
a_list.clear()
out = forward_recursive(0, x[i].T)
k.append(out)
# print(np.around(out, decimals=2))
return k
def save(filename):
"""保存模型:包含网络结构和参数"""
np.savez(filename,
layers=np.array(layers),
W=np.array(W, dtype=object), # 关键:dtype=object 允许不同形状
b=np.array(b, dtype=object))
def load(filename):
"""加载模型:自动恢复结构 layers 和参数 W, b"""
global layers, W, b, isLoad
isLoad = True
data = np.load(filename, allow_pickle=True)
layers = data['layers'].tolist()
W = [np.array(w, dtype=float) for w in data['W']] # 从 object 数组转回列表
b = [np.array(bias, dtype=float) for bias in data['b']]
# 确保每个元素是 numpy 数组(加载后仍是数组)
print(f"模型加载成功,结构: {layers}")
def set(X=[], Y=[]):
global x, y, layers;
x = np.array(X).reshape(-1, layers[0], 1)
y = np.array(Y).reshape(-1, layers[-1], 1)
# ========== 使用示例 ==========
if __name__ == "__main__":
# 准备数据(两个样本,输入8维,输出2维)
X = [[0.1, 0.2, 0.3, 0.1, 0.2, 0.3, 0.9, 0.6],
[0.3, 0.5, 0.8, 0.3, 0.5, 0.8, 0.7, 0.3]]
Y = [[0.5, 0.8],
[0.6, 0.7]]
# load("my_model.npz")
# 训练阶段(注释掉后可直接加载模型测试)
begin(8, 8, 4, 2)
randomWB()
train(epochs=10000)
test()
save("my_model.npz")
# 测试阶段:直接加载模型
更多推荐



所有评论(0)