7章 畳み込みニューラルネットワーク
7.1 全体の構造
7.2 畳み込み層
7.2.1 全結合層の問題点
7.2.2 畳み込み演算
7.2.3 パディング
7.2.4 ストライド
7.2.5 3次元データの畳み込み演算
7.2.6 ブロックで考える
7.2.7 バッチ処理
7.3 プーリング層
7.3.1 プーリング層の特徴
7.4 Convolution/Poolingレイヤの実装
7.4.1 4次元配列
~
julia> x = rand(10, 1, 28, 28); # ランダムにデータを生成
julia> size(x)
(10, 1, 28, 28)
~
julia> size(x[1, :, :, :]) # (1, 28, 28)
julia> size(x[2, :, :, :]) # (1, 28, 28)
~
julia> size(x[1, 1, :, :]) # (28, 28)
7.4.2 im2colによる展開
7.4.3 Convolutionレイヤの実装
~
im2col(input_data, filter_h, filter_w, stride=1, pad=0)
- input_data ── (データ数, チャンネル, 高さ, 幅)の4次元配列からなる入力データ
- filter_h ── フィルターの高さ
- filter_w ── フィルターの横幅
- stride ── ストライド
- pad ── パディング
~
x1 = rand(1, 3, 7, 7);
col1 = im2col(x1, 5, 5, stride=1, pad=0)
print(size(col1)) # ( 9, 75)
x2 = rand(10, 3, 7, 7);
col2 = im2col(x2, 5, 5, stride=1, pad=0)
print(size(col2)) # ( 90, 75)
~
mutable struct Convolution
W
b
stride
pad
end
function Convolution(W, b; stride=1, pad=0)
return Convolution(W, b, stride, pad)
end
function forward(self::Convolution, x)
FN, C, FH, FW = size(self.W)
N, C, H, W = size(x)
out_h = fld1(H + 2*self.pad - FH, self.stride)
out_w = fld1(W + 2*self.pad - FW, self.stride)
col = im2col(x, FH, FW, stride=self.stride, pad=self.pad)
col_W = reshape(self.W, (FN, :))'
out = col * col_W .+ self.b
out = permutedims(reshape(out, (N, out_h, out_w, :)), [1, 4, 2, 3])
return out
end
~
7.4.4 Poolingレイヤの実装
~
mutable struct Pooling
pool_h
pool_w
stride
pad
end
function Pooling(pool_h, pool_w; stride=2, pad=0)
return Pooling(pool_h, pool_w, stride, pad)
end
function forward(self::Pooling, x)
N, C, H, W = size(x)
out_h = fld1(H - self.pool_h, self.stride)
out_w = fld1(W - self.pool_w, self.stride)
# 展開(1)
col = im2col(x, self.pool_h, self.pool_w, stride=self.stride, pad=self.pad)
col = reshape(col, (:, self.pool_h*self.pool_w))
# 最大値(2)
out = maximum(col, dims=2)
# 整形(3)
out = permutedims(reshape(out, (N, out_h, out_w, C)), (1, 4, 2, 3))
return out
end
~
7.5 CNNの実装
~
mutable struct SimpleConvNet
params::Dict
layers::OrderedDict
last_layer
end
function SimpleConvNet(input_dim=(1, 28, 28),
conv_param=Dict("filter_num"=>30, "filter_size"=>5,
"pad"=>0, "stride"=>1),
hidden_size=100, output_size=10, weight_init_std=0.01)
filter_num = conv_param["filter_num"]
filter_size = conv_param["filter_size"]
filter_pad = conv_param["pad"]
filter_stride = conv_param["stride"]
input_size = input_dim[2]
conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
pool_output_size = round(Int, filter_num * (conv_output_size/2) * (conv_output_size/2))
~
params = Dict(
"W1" => weight_init_std * randn(filter_num, input_dim[1], filter_size, filter_size),
"b1" => zeros(1, filter_num),
"W2" => weight_init_std * randn(pool_output_size, hidden_size),
"b2" => zeros(1, hidden_size),
"W3" => weight_init_std * randn(hidden_size, output_size),
"b3" => zeros(1, output_size)
)
~
layers = OrderedDict(
"Conv1" => Convolution(params["W1"], params["b1"],
stride=conv_param["stride"],
pad=conv_param["pad"]),
"Relu1" => Relu(),
"Pool1" => Pooling(2, 2, stride=2),
"Affine1" => Affine(params["W2"], params["b2"]),
"Relu2" => Relu(),
"Affine2" => Affine(params["W3"], params["b3"]),
)
last_layer = SoftmaxWithLoss()
return SimpleConvNet(params, layers, last_layer)
end
~
function predict(self::SimpleConvNet, x)
for (_,layer) = self.layers
x = forward(layer, x)
end
return x
end
function loss(self::SimpleConvNet, x, t)
y = predict(self, x)
return forward(self.last_layer, y, t)
end
~
function gradient(self::SimpleConvNet, x, t)
# forward
loss(self, x, t)
# backward
dout = 1
dout = backward(self.last_layer, dout)
layers = reverse(collect(values(self.layers)))
for layer = layers
dout = backward(layer, dout)
end
# 設定
grads = Dict(
"W1" => self.layers["Conv1"].dW,
"b1" => self.layers["Conv1"].db,
"W2" => self.layers["Affine1"].dW,
"b2" => self.layers["Affine1"].db,
"W3" => self.layers["Affine2"].dW,
"b3" => self.layers["Affine2"].db
)
return grads
end
~