7章畳み込みニューラルネットワーク

7.1 全体の構造

7.2 畳み込み層

7.2.1 全結合層の問題点

7.2.2 畳み込み演算

7.2.3 パディング

7.2.4 ストライド

7.2.5 3次元データの畳み込み演算

7.2.6 ブロックで考える

7.2.7 バッチ処理

7.3 プーリング層

7.3.1 プーリング層の特徴

7.4　Convolution／Poolingレイヤの実装

7.4.1 4次元配列

～

julia> x = rand(10, 1, 28, 28); # ランダムにデータを生成

julia> size(x)
(10, 1, 28, 28)

～

julia> size(x[1, :, :, :]) # (1, 28, 28)
julia> size(x[2, :, :, :]) # (1, 28, 28)

～

julia> size(x[1, 1, :, :]) # (28, 28)

7.4.2 im2colによる展開

7.4.3 Convolutionレイヤの実装

～

im2col(input_data, filter_h, filter_w, stride=1, pad=0)

input_data ── (データ数, チャンネル, 高さ, 幅)の4次元配列からなる入力データ
filter_h ── フィルターの高さ
filter_w ── フィルターの横幅
stride ── ストライド
pad ── パディング

～

x1 = rand(1, 3, 7, 7);
col1 = im2col(x1, 5, 5, stride=1, pad=0)
print(size(col1)) # ( 9, 75)

x2 = rand(10, 3, 7, 7);
col2 = im2col(x2, 5, 5, stride=1, pad=0)
print(size(col2)) # ( 90, 75)

～

mutable struct Convolution
    W
    b
    stride
    pad
end

function Convolution(W, b; stride=1, pad=0)
    return Convolution(W, b, stride, pad)
end

function forward(self::Convolution, x)
    FN, C, FH, FW = size(self.W)
    N, C, H, W = size(x)
    out_h = fld1(H + 2*self.pad - FH, self.stride)
    out_w = fld1(W + 2*self.pad - FW, self.stride)

    col = im2col(x, FH, FW, stride=self.stride, pad=self.pad)
    col_W = reshape(self.W, (FN, :))'
    out = col * col_W .+ self.b

    out = permutedims(reshape(out, (N, out_h, out_w, :)), [1, 4, 2, 3])
    
    return out
end

～

7.4.4 Poolingレイヤの実装

～

mutable struct Pooling
    pool_h
    pool_w
    stride
    pad
end

function Pooling(pool_h, pool_w; stride=2, pad=0)
    return Pooling(pool_h, pool_w, stride, pad)
end

function forward(self::Pooling, x)
    N, C, H, W = size(x)
    out_h = fld1(H - self.pool_h, self.stride)
    out_w = fld1(W - self.pool_w, self.stride)

    # 展開（１）
    col = im2col(x, self.pool_h, self.pool_w, stride=self.stride, pad=self.pad)
    col = reshape(col, (:, self.pool_h*self.pool_w))

    # 最大値（２）
    out = maximum(col, dims=2)
    # 整形（３）
    out = permutedims(reshape(out, (N, out_h, out_w, C)), (1, 4, 2, 3))

    return out
end

～

7.5 CNNの実装

～

mutable struct SimpleConvNet
    params::Dict
    layers::OrderedDict
    last_layer 
end

function SimpleConvNet(input_dim=(1, 28, 28), 
                       conv_param=Dict("filter_num"=>30, "filter_size"=>5, 
                                       "pad"=>0, "stride"=>1),
                       hidden_size=100, output_size=10, weight_init_std=0.01)
    filter_num = conv_param["filter_num"]
    filter_size = conv_param["filter_size"]
    filter_pad = conv_param["pad"]
    filter_stride = conv_param["stride"]
    input_size = input_dim[2]
    conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
    pool_output_size = round(Int, filter_num * (conv_output_size/2) * (conv_output_size/2))

～

    params = Dict(
        "W1" => weight_init_std * randn(filter_num, input_dim[1], filter_size, filter_size),
        "b1" => zeros(1, filter_num),
        "W2" => weight_init_std * randn(pool_output_size, hidden_size),
        "b2" => zeros(1, hidden_size),
        "W3" => weight_init_std * randn(hidden_size, output_size),
        "b3" => zeros(1, output_size)
    )

～

    layers = OrderedDict(
        "Conv1" => Convolution(params["W1"], params["b1"],
                               stride=conv_param["stride"],
                               pad=conv_param["pad"]),
        "Relu1" => Relu(),
        "Pool1" => Pooling(2, 2, stride=2),
        "Affine1" => Affine(params["W2"], params["b2"]),
        "Relu2" => Relu(),
        "Affine2" => Affine(params["W3"], params["b3"]),
    )
    last_layer = SoftmaxWithLoss()

    return SimpleConvNet(params, layers, last_layer)
end

～

function predict(self::SimpleConvNet, x)
    for (_,layer) = self.layers
        x = forward(layer, x)
    end
    return x
end

function loss(self::SimpleConvNet, x, t)
    y = predict(self, x)
    return forward(self.last_layer, y, t)
end

～

function gradient(self::SimpleConvNet, x, t)
    # forward
    loss(self, x, t)

    # backward
    dout = 1
    dout = backward(self.last_layer, dout)

    layers = reverse(collect(values(self.layers)))
    for layer = layers
        dout = backward(layer, dout)
    end

    # 設定
    grads = Dict(
        "W1" => self.layers["Conv1"].dW,
        "b1" => self.layers["Conv1"].db,
        "W2" => self.layers["Affine1"].dW,
        "b2" => self.layers["Affine1"].db,
        "W3" => self.layers["Affine2"].dW,
        "b3" => self.layers["Affine2"].db
    )
    return grads
end

dlfs_by_julia

7章畳み込みニューラルネットワーク

7.1 全体の構造

7.2 畳み込み層

7.2.1 全結合層の問題点

7.2.2 畳み込み演算

7.2.3 パディング

7.2.4 ストライド

7.2.5 3次元データの畳み込み演算

7.2.6 ブロックで考える

7.2.7 バッチ処理

7.3 プーリング層

7.3.1 プーリング層の特徴

7.4　Convolution／Poolingレイヤの実装

7.4.1 4次元配列

7.4.2 im2colによる展開

7.4.3 Convolutionレイヤの実装

7.4.4 Poolingレイヤの実装

7.5 CNNの実装

7.6 CNNの可視化

7.6.1 1層目の重みの可視化

7.6.2 階層構造による情報抽出

7.7 代表的なCNN

7.7.1 LeNet

7.7.2 AlexNet

7.8 まとめ

7章 畳み込みニューラルネットワーク

7.1 全体の構造

7.2 畳み込み層

7.2.1 全結合層の問題点

7.2.2 畳み込み演算

7.2.3 パディング

7.2.4 ストライド

7.2.5 3次元データの畳み込み演算

7.2.6 ブロックで考える

7.2.7 バッチ処理

7.3 プーリング層

7.3.1 プーリング層の特徴

7.4 Convolution／Poolingレイヤの実装

7.4.1 4次元配列

7.4.2 im2colによる展開

7.4.3 Convolutionレイヤの実装

7.4.4 Poolingレイヤの実装

7.5 CNNの実装

7.6 CNNの可視化

7.6.1 1層目の重みの可視化

7.6.2 階層構造による情報抽出

7.7 代表的なCNN

7.7.1 LeNet

7.7.2 AlexNet

7.8 まとめ

7章畳み込みニューラルネットワーク

7.4　Convolution／Poolingレイヤの実装