一、先手撕一波基本原理:


二、下面基于numpy手撕神經網路的搭建,反向傳播,梯度下降更新引數:
1. Layer and loss function definition:
import numpy
np.random.seed(42)
def MSELoss(x, y):
assert x.shape == y.shape
return np.linalg.norm( x - y) ** 2
class LinearLayer:
def __init__(self, input_dim, output_dim):
# w,b初始值一定不能是全0,否則梯度永遠是0,無法更新
self.W = np.random.normal(0, 0.1, (input_dim, output_dim))
self.b = np.random.normal(0, 0.1, (1, output_dim))
self.dW = np.zeros((input_dim, output_dim))
self.db = np.zeros((1, output_dim))
def forward(self, X):
return np.matmul(X, self.W) + self.b
def backward(self, X, grad):
self.dW = np.matmul(X.T, grad)
self.db = np.matmul(grad.T, np.ones(X.shape[0]))
return np.matmul(grad, self.W.T)
def update(self, lr):
# 梯度下降更新引數
self.W = self.W - self.dW * lr
self.b = self.b - self.db * lr
class Relu:
def __init__(self):
pass
def forward(self, X):
return np.where(X < 0, 0, X)
def backward(self, X, grad):
return np.where(X > 0, 1, 0) * grad
2. Train:
#訓練資料:經典的異或分類問題
train_X = np.array([[0,0],[0,1],[1,0],[1,1]])
train_y = np.array([0,1,1,0])
#初始化網路,總共2層,輸入資料是2維,第一層3個節點,第二層1個節點作為輸出層,激活函式使用Relu
fc1 = LinearLayer(2,3)
relu1 = Relu()
fc2 = LinearLayer(3,1)
#學習率
learn_rate = 0.01
#開始訓練網路
for i in range(10000):
#前向傳播Forward,獲取網路輸出
input_x = train_X
fc1_out = fc1.forward(input_x)
relu1_out = relu1.forward(fc1_out)
fc2_out = fc2.forward(relu1_out)
output_y = fc2_out
#獲得網路當前輸出,計算損失loss
result = output_y.reshape(output_y.shape[0])
# (4,1) => (4,)
loss = MSELoss(train_y, result) # mean squared error loss
#將梯度反向逐層傳播,獲取要更新引數的梯度
grad = (result - train_y).reshape(result.shape[0],1)
grad = fc2.backward(relu1_out, grad)
grad = relu1.backward(fc1_out, grad)
grad = fc1.backward(input_x, grad)
#更新網路中線性層的引數
fc1.update(learn_rate)
fc2.update(learn_rate)
#判斷學習是否完成
if i % 100 == 0:
print(loss)
if loss < 0.001:
print("train over! 第%d次迭代" %(i))
break
3. Predict:
#將訓練好的層堆疊組合成model
model = [fc1, relu1, fc2]
#預測
def predict(model, X):
tmp = X
for layer in model:
tmp = layer.forward(tmp)
return np.where(tmp > 0.5, 1, 0)
print("*"*20)
X = np.array([[0,0],[0,1],[1,0],[1,1]])
result = predict(model, X)
print("預測資料")
print(X)
print("*"*20)
print("預測結果")
print(result)
轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/335398.html
標籤:AI
