引言

本著“凡我不能創造的，我就不能理解”的思想，本系列文章會基于純Python以及NumPy從零創建自己的深度學習框架，該框架類似PyTorch能實作自動求導，

要深入理解深度學習，從零開始創建的經驗非常重要，從自己可以理解的角度出發，盡量不適用外部完備的框架前提下，實作我們想要的模型，本系列文章的宗旨就是通過這樣的程序，讓大家切實掌握深度學習底層實作，而不是僅做一個調包俠，
本系列文章首發于微信公眾號：JavaNLP

關注公眾號

在上篇文章中，我們實作了反向傳播的模式代碼，同時正確地實作了加法運算和乘法運算，從今天開始，我們就來實作剩下的運算，本文實作了減法、除法、矩陣乘法和求和等運算，

實作減法運算

我們先撰寫測驗用例，再實作減法計算圖，

test_tensor_sub.py:

import numpy as np

from core.tensor import Tensor


def test_simple_sub():
    x = Tensor(1, requires_grad=True)
    y = Tensor(2, requires_grad=True)
    z = x - y
    z.backward()
    assert x.grad.data == 1.0
    assert y.grad.data == -1.0


def test_array_sub():
    x = Tensor([1, 2, 3], requires_grad=True)
    y = Tensor([4, 5, 6], requires_grad=True)

    z = x - y
    assert z.data.tolist() == [-3., -3., -3.]

    z.backward(Tensor([1, 1, 1]))

    assert x.grad.data.tolist() == [1, 1, 1]
    assert y.grad.data.tolist() == [-1, -1, -1]

    x -= 0.1
    assert x.grad is None
    np.testing.assert_array_almost_equal(x.data, [0.9, 1.9, 2.9])


def test_broadcast_sub():
    x = Tensor([[1, 2, 3], [4, 5, 6]], requires_grad=True)  # (2, 3)
    y = Tensor([7, 8, 9], requires_grad=True)  # (3, )

    z = x - y  # shape (2, 3)
    assert z.data.tolist() == [[-6, -6, -6], [-3, -3, -3]]

    z.backward(Tensor(np.ones_like(x.data)))

    assert x.grad.data.tolist() == [[1, 1, 1], [1, 1, 1]]
    assert y.grad.data.tolist() == [-2, -2, -2]

然后實作減法的計算圖，

減法的計算圖

class Sub(_Function):
    def forward(ctx, x: np.ndarray, y: np.ndarray) -> np.ndarray:
        '''
        實作 z = x - y
        '''
        ctx.save_for_backward(x.shape, y.shape)
        return x - y

    def backward(ctx, grad: Any) -> Any:
        shape_x, shape_y = ctx.saved_tensors
        return unbroadcast(grad, shape_x), unbroadcast(-grad, shape_y)

這些類都添加到ops.py中，然后跑一下測驗用例，結果為：

============================= test session starts ==============================
collecting ... collected 3 items

test_sub.py::test_simple_sub PASSED                                      [ 33%]<class 'numpy.ndarray'>

test_sub.py::test_array_sub PASSED                                       [ 66%]<class 'numpy.ndarray'>

test_sub.py::test_broadcast_sub PASSED                                   [100%]<class 'numpy.ndarray'>


============================== 3 passed in 0.36s ===============================

實作除法運算

撰寫測驗用例：

import numpy as np

from core.tensor import Tensor


def test_simple_div():
    '''
    測驗簡單的除法
    '''
    x = Tensor(1, requires_grad=True)
    y = Tensor(2, requires_grad=True)
    z = x / y
    z.backward()
    assert x.grad.data == 0.5
    assert y.grad.data == -0.25


def test_array_div():
    x = Tensor([1, 2, 3], requires_grad=True)
    y = Tensor([2, 4, 6], requires_grad=True)

    z = x / y

    assert z.data.tolist() == [0.5, 0.5, 0.5]
    assert x.data.tolist() == [1, 2, 3]

    z.backward(Tensor([1, 1, 1]))

    np.testing.assert_array_almost_equal(x.grad.data, [0.5, 0.25, 1 / 6])
    np.testing.assert_array_almost_equal(y.grad.data, [-0.25, -1 / 8, -1 / 12])

    x /= 0.1
    assert x.grad is None
    assert x.data.tolist() == [10, 20, 30]


def test_broadcast_div():
    x = Tensor([[1, 1, 1], [2, 2, 2]], requires_grad=True)  # (2, 3)
    y = Tensor([4, 4, 4], requires_grad=True)  # (3, )

    z = x / y  # (2,3) * (3,) => (2,3) * (2,3) -> (2,3)

    assert z.data.tolist() == [[0.25, 0.25, 0.25], [0.5, 0.5, 0.5]]

    z.backward(Tensor([[1, 1, 1, ], [1, 1, 1]]))

    assert x.grad.data.tolist() == [[1/4, 1/4, 1/4], [1/4, 1/4, 1/4]]
    assert y.grad.data.tolist() == [-3/16, -3/16, -3/16]

除法計算圖

# Python3 只有 __truediv__ 相關魔法方法
class TrueDiv(_Function):

    def forward(ctx, x: ndarray, y: ndarray) -> ndarray:
        '''
        實作 z = x / y
        '''
        ctx.save_for_backward(x, y)
        return x / y

    def backward(ctx, grad: ndarray) -> Tuple[ndarray, ndarray]:
        x, y = ctx.saved_tensors
        return unbroadcast(grad / y, x.shape), unbroadcast(grad * (-x / y ** 2), y.shape)

由于Python3只有 __truediv__ 相關魔法方法，因為為了簡單，也將我們的除法命名為TrueDiv，

同時修改tensor中的register方法，

至此，加減乘除都實作好了，下面我們來實作矩陣乘法，

實作矩陣乘法

先寫測驗用例：

import numpy as np
import torch

from core.tensor import Tensor
from torch import tensor


def test_simple_matmul():
    x = Tensor([[1, 2], [3, 4], [5, 6]], requires_grad=True)  # (3,2)
    y = Tensor([[2], [3]], requires_grad=True)  # (2, 1)

    z = x @ y  # (3,2) @ (2, 1) -> (3,1)

    assert z.data.tolist() == [[8], [18], [28]]

    grad = Tensor(np.ones_like(z.data))
    z.backward(grad)

    np.testing.assert_array_equal(x.grad.data, grad.data @ y.data.T)
    np.testing.assert_array_equal(y.grad.data, x.data.T @ grad.data)


def test_broadcast_matmul():
    x = Tensor(np.arange(2 * 2 * 4).reshape((2, 2, 4)), requires_grad=True)  # (2, 2, 4)
    y = Tensor(np.arange(2 * 4).reshape((4, 2)), requires_grad=True)  # (4, 2)

    z = x @ y  # (2,2,4) @ (4,2) -> (2,2,4) @ (1,4,2) => (2,2,4) @ (2,4,2)  -> (2,2,2)
    assert z.shape == (2, 2, 2)

    # 引入torch.tensor進行測驗
    tx = tensor(x.data, dtype=torch.float, requires_grad=True)
    ty = tensor(y.data, dtype=torch.float, requires_grad=True)
    tz = tx @ ty

    assert z.data.tolist() == tz.data.tolist()

    grad = np.ones_like(z.data)
    z.backward(Tensor(grad))
    tz.backward(tensor(grad))

    # 和老大哥 pytorch保持一致就行了
    assert np.allclose(x.grad.data, tx.grad.numpy())
    assert np.allclose(y.grad.data, ty.grad.numpy())

這里矩陣乘法有點復雜，不過都在理解廣播和常見的乘法中分析過了，同時我們引入了torch僅用作測驗，

Matmul前向傳播

在常見運算的計算圖中對句子乘法的反向傳播進行了分析，我們下面就來實作：

class Matmul(_Function):
    def forward(ctx, x: ndarray, y: ndarray) -> ndarray:
        '''
        z = x @ y
        '''
        assert x.ndim > 1 and y.ndim > 1, f"the dim number of x or y must >=2, actual x:{x.ndim}  and y:{y.ndim}"
        ctx.save_for_backward(x, y)
        return x @ y

    def backward(ctx, grad: ndarray) -> Tuple[ndarray, ndarray]:
        x, y = ctx.saved_tensors
        return unbroadcast(grad @ y.swapaxes(-2, -1), x.shape), unbroadcast(x.swapaxes(-2, -1) @ grad, y.shape)

為了適應 (2,2,4) @ (4,2) -> (2,2,4) @ (1,4,2) => (2,2,4) @ (2,4,2) -> (2,2,2)的情況，通過swapaxes交換最后兩個維度的軸，而不是簡單的轉置T，

下面來實作聚合運算，像Sum和Max這些，

實作求和運算

先看測驗用例：

import numpy as np

from core.tensor import Tensor


def test_simple_sum():
    x = Tensor([1, 2, 3], requires_grad=True)
    y = x.sum()

    assert y.data == 6

    y.backward()

    assert x.grad.data.tolist() == [1, 1, 1]


def test_sum_with_grad():
    x = Tensor([1, 2, 3], requires_grad=True)
    y = x.sum()

    y.backward(Tensor(3))

    assert x.grad.data.tolist() == [3, 3, 3]


def test_matrix_sum():
    x = Tensor([[1, 2, 3], [4, 5, 6]], requires_grad=True)  # (2,3)
    y = x.sum()
    assert y.data == 21

    y.backward()

    assert x.grad.data.tolist() == np.ones_like(x.data).tolist()


def test_matrix_with_axis():
    x = Tensor([[1, 2, 3], [4, 5, 6]], requires_grad=True)  # (2,3)
    y = x.sum(axis=0)  # keepdims = False

    assert y.shape == (3,)
    assert y.data.tolist() == [5, 7, 9]

    y.backward([1, 1, 1])

    assert x.grad.data.tolist() == [[1, 1, 1], [1, 1, 1]]


def test_matrix_with_keepdims():
    x = Tensor([[1, 2, 3], [4, 5, 6]], requires_grad=True)  # (2,3)
    y = x.sum(axis=0, keepdims=True)  # keepdims = True
    assert y.shape == (1, 3)
    assert y.data.tolist() == [[5, 7, 9]]
    y.backward([1, 1, 1])

    assert x.grad.data.tolist() == [[1, 1, 1], [1, 1, 1]]

求和運算計算圖

class Sum(_Function):
    def forward(ctx, x: ndarray, axis=None, keepdims=False) -> ndarray:
        ctx.save_for_backward(x.shape)
        return x.sum(axis, keepdims=keepdims)

    def backward(ctx, grad: ndarray) -> ndarray:
        x_shape, = ctx.saved_tensors
        # 將梯度廣播成input_shape形狀,梯度的維度要和輸入的維度一致
        return np.broadcast_to(grad, x_shape)

我們這里支持keepdims引數，

下面實作一元操作，比較簡單，根據計算圖可以直接寫出來，

實作Log運算

測驗用例：

import math

import numpy as np

from core.tensor import Tensor


def test_simple_log():
    x = Tensor(10, requires_grad=True)
    z = x.log()

    np.testing.assert_array_almost_equal(z.data, math.log(10))

    z.backward()

    np.testing.assert_array_almost_equal(x.grad.data.tolist(), 0.1)


def test_array_log():
    x = Tensor([1, 2, 3], requires_grad=True)
    z = x.log()

    np.testing.assert_array_almost_equal(z.data, np.log([1, 2, 3]))

    z.backward([1, 1, 1])

    np.testing.assert_array_almost_equal(x.grad.data.tolist(), [1, 0.5, 1 / 3])

Log計算圖

class Log(_Function):
    def forward(ctx, x: ndarray) -> ndarray:
        ctx.save_for_backward(x)
        # log = ln
        return np.log(x)

    def backward(ctx, grad: ndarray) -> ndarray:
        x, = ctx.saved_tensors
        return grad / x

實作Exp運算

測驗用例：

import numpy as np

from core.tensor import Tensor


def test_simple_exp():
    x = Tensor(2, requires_grad=True)
    z = x.exp()  # e^2

    np.testing.assert_array_almost_equal(z.data, np.exp(2))

    z.backward()

    np.testing.assert_array_almost_equal(x.grad.data, np.exp(2))


def test_array_exp():
    x = Tensor([1, 2, 3], requires_grad=True)
    z = x.exp()

    np.testing.assert_array_almost_equal(z.data, np.exp([1, 2, 3]))

    z.backward([1, 1, 1])

    np.testing.assert_array_almost_equal(x.grad.data, np.exp([1, 2, 3]))

Exp計算圖

class Exp(_Function):
    def forward(ctx, x: ndarray) -> ndarray:
        ctx.save_for_backward(x)
        return np.exp(x)

    def backward(ctx, grad: ndarray) -> ndarray:
        x, = ctx.saved_tensors
        return np.exp(x)

實作Pow運算

from core.tensor import Tensor


def test_simple_pow():
    x = Tensor(2, requires_grad=True)
    y = 2
    z = x ** y

    assert z.data == 4

    z.backward()

    assert x.grad.data == 4


def test_array_pow():
    x = Tensor([1, 2, 3], requires_grad=True)
    y = 3
    z = x ** y

    assert z.data.tolist() == [1, 8, 27]

    z.backward([1, 1, 1])

    assert x.grad.data.tolist() == [3, 12, 27]

Pow計算圖

class Pow(_Function):
    def forward(ctx, x: ndarray, c: ndarray) -> ndarray:
        ctx.save_for_backward(x, c)
        return x ** c

    def backward(ctx, grad: ndarray) -> Tuple[ndarray, None]:
        x, c = ctx.saved_tensors
        # 把c當成一個常量，不需要計算梯度
        return grad * c * x ** (c - 1), None

實作 y = x c y = x^c y=xc，這里 c c c看成是常量，變數是 x x x，常量 c c c不需要計算梯度，我們回傳None即可，

實作取負數

其實就是加一個負號y = -x，

import numpy as np

from core.tensor import Tensor


def test_simple_exp():
    x = Tensor(2, requires_grad=True)
    z = -x  # -2

    assert z.data == -2

    z.backward()

    assert x.grad.data == -1


def test_array_exp():
    x = Tensor([1, 2, 3], requires_grad=True)

    z = -x

    np.testing.assert_array_equal(z.data, [-1, -2, -3])

    z.backward([1, 1, 1])

    np.testing.assert_array_equal(x.grad.data, [-1, -1, -1])

實作負數的計算圖

class Neg(_Function):
    def forward(ctx, x: ndarray) -> ndarray:
        return -x

    def backward(ctx, grad: ndarray) -> ndarray:
        return -grad

總結

本文實作了常見運算的計算圖，下篇文章會實作剩下的諸如求最大值、切片、變形和轉置等運算，

轉載請註明出處，本文鏈接：https://www.uj5u.com/qita/397334.html

標籤：AI

上一篇：【一起入門MachineLearning】中科院機器學習-期末題庫-【計算題5+單選題19+單選題20+簡答題21】

下一篇：電商資料分析：GROW品類增長模型

從零實作深度學習框架——實作常見運算的計算圖(上)

引言