本文共 11447 字,大约阅读时间需要 38 分钟。
文章全部
0x01 make_convolutional_layer
终于又回到了make_convolutional_layer
这个函数
//make_convolutional_layer if(binary){ l.binary_weights = calloc(l.nweights, sizeof(float)); l.cweights = calloc(l.nweights, sizeof(char)); l.scales = calloc(n, sizeof(float)); } if(xnor){ l.binary_weights = calloc(l.nweights, sizeof(float)); l.binary_input = calloc(l.inputs*l.batch, sizeof(float)); } if(batch_normalize){ l.scales = calloc(n, sizeof(float)); l.scale_updates = calloc(n, sizeof(float)); for(i = 0; i < n; ++i){ l.scales[i] = 1; } l.mean = calloc(n, sizeof(float)); l.variance = calloc(n, sizeof(float)); l.mean_delta = calloc(n, sizeof(float)); l.variance_delta = calloc(n, sizeof(float)); l.rolling_mean = calloc(n, sizeof(float)); l.rolling_variance = calloc(n, sizeof(float)); l.x = calloc(l.batch*l.outputs, sizeof(float)); l.x_norm = calloc(l.batch*l.outputs, sizeof(float)); }... fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); return l;}
如果你把之前的几篇文章都看过的话,那么这里的参数意义你应该很清楚了。这里面我唯一要说的几个内容是关于cuda编程的,但是我会把这部分内容放到本系列文章的最后去说,如果你感兴趣的话,可以到时候去看看。
至此我们终于结束了make_convolutional_layer
函数
0x02 parse_convolutional
大家可以回到(二)中的0x0103
//parse_convolutional convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,groups,size,stride,padding,activation, batch_normalize, binary, xnor, params.net->adam); layer.flipped = option_find_int_quiet(options, "flipped", 0); layer.dot = option_find_float_quiet(options, "dot", 0); return layer;}
后面没什么好说的,回到parse_network_cfg
0x03 parse_network_cfg
时隔多日,又回到了这里(二)0x0102
//parse_network_cfg if(lt == CONVOLUTIONAL){ l = parse_convolutional(options, params); }else if(lt == DECONVOLUTIONAL){ l = parse_deconvolutional(options, params); }
我们看这个parse_deconvolutional
函数
0x00301 parse_deconvolutional
layer parse_deconvolutional(list *options, size_params params){ int n = option_find_int(options, "filters",1); int size = option_find_int(options, "size",1); int stride = option_find_int(options, "stride",1); char *activation_s = option_find_str(options, "activation", "logistic"); ACTIVATION activation = get_activation(activation_s); int batch,h,w,c; h = params.h; w = params.w; c = params.c; batch=params.batch; if(!(h && w && c)) error("Layer before deconvolutional layer must output image."); int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); int pad = option_find_int_quiet(options, "pad",0); int padding = option_find_int_quiet(options, "padding",0); if(pad) padding = size/2; layer l = make_deconvolutional_layer(batch,h,w,c,n,size,stride,padding, activation, batch_normalize, params.net->adam); return l;}
上面的一些参数我在之前的文章中已经说过了,这里就不再说明了。直接看关键函数make_deconvolutional_layer
layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam){ int i; layer l = { 0}; l.type = DECONVOLUTIONAL;... l.forward = forward_deconvolutional_layer; l.backward = backward_deconvolutional_layer; l.update = update_deconvolutional_layer;
前面的参数信息我这里也不再提了,直接看关键的三个函数,先看第一个forward_deconvolutional_layer
0x030101 forward_deconvolutional_layer
void forward_deconvolutional_layer(const layer l, network net){ int i; int m = l.size*l.size*l.n; int n = l.h*l.w; int k = l.c; fill_cpu(l.outputs*l.batch, 0, l.output, 1); for(i = 0; i < l.batch; ++i){ float *a = l.weights; float *b = net.input + i*l.c*l.h*l.w; float *c = net.workspace; gemm_cpu(1,0,m,n,k,1,a,m,b,n,0,c,n); col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output+i*l.outputs); } if (l.batch_normalize) { forward_batchnorm_layer(l, net); } else { add_bias(l.output, l.biases, l.batch, l.n, l.out_w*l.out_h); } activate_array(l.output, l.batch*l.n*l.out_w*l.out_h, l.activation);}
这里的函数我在之前的文章中都分析过,我这里主要分析一下这个函数的逻辑。
我们可以对比之前的卷积层,对比后发现区别有两个:
A
转置了col2im_cpu
函数放在了卷积函数的后面
这几点说明了什么?deconvolutional
确实是一种convolutional
,只是它是一种转置的卷积。
0x030102 backward_deconvolutional_layer
void backward_deconvolutional_layer(layer l, network net){ int i; gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); if(l.batch_normalize){ backward_batchnorm_layer(l, net); } else { backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h); } //if(net.delta) memset(net.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float)); for(i = 0; i < l.batch; ++i){ int m = l.c; int n = l.size*l.size*l.n; int k = l.h*l.w; float *a = net.input + i*m*k; float *b = net.workspace; float *c = l.weight_updates; im2col_cpu(l.delta + i*l.outputs, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, b); gemm_cpu(0,1,m,n,k,1,a,k,b,k,1,c,n); if(net.delta){ int m = l.c; int n = l.h*l.w; int k = l.size*l.size*l.n; float *a = l.weights; float *b = net.workspace; float *c = net.delta + i*n*m; gemm_cpu(0,0,m,n,k,1,a,k,b,n,1,c,n); } }}
这个函数的理解和之前的backward_convolutional_layer
没有太大区别,而且变化也不大。
0x030103 update_deconvolutional_layer
void update_deconvolutional_layer(layer l, update_args a){ float learning_rate = a.learning_rate*l.learning_rate_scale; float momentum = a.momentum; float decay = a.decay; int batch = a.batch; int size = l.size*l.size*l.c*l.n; axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1); scal_cpu(l.n, momentum, l.bias_updates, 1); if(l.scales){ axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1); scal_cpu(l.n, momentum, l.scale_updates, 1); } axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1); axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1); scal_cpu(size, momentum, l.weight_updates, 1);}
同样的这个函数也只是起到了更新参数的作用,和之前的update_convolutional_layer
一样。我们回到make_deconvolutional_layer
函数
//make_deconvolutional_layer l.batch_normalize = batch_normalize; if(batch_normalize){ l.scales = calloc(n, sizeof(float)); l.scale_updates = calloc(n, sizeof(float)); for(i = 0; i < n; ++i){ l.scales[i] = 1; }...
这里都是一些参数的配置,我在之前文章中都有说过,这里不再重复。
好的,parse_deconvolutional
这个函数就结束了。
0x0302 parse_local
我们回到parse_network_cfg
函数
else if(lt == LOCAL){ l = parse_local(options, params);
我们来看parse_local
这个函数
local_layer parse_local(list *options, size_params params){ int n = option_find_int(options, "filters",1); int size = option_find_int(options, "size",1); int stride = option_find_int(options, "stride",1); int pad = option_find_int(options, "pad",0); char *activation_s = option_find_str(options, "activation", "logistic"); ACTIVATION activation = get_activation(activation_s); int batch,h,w,c; h = params.h; w = params.w; c = params.c; batch=params.batch; if(!(h && w && c)) error("Layer before local layer must output image."); local_layer layer = make_local_layer(batch,h,w,c,n,size,stride,pad,activation); return layer;}
上面的一些参数我在之前的文章中已经说过了,这里就不再说明了。直接看关键函数make_local_layer
local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation){ ... l.forward = forward_local_layer; l.backward = backward_local_layer; l.update = update_local_layer; ... fprintf(stderr, "Local Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); return l;}
同样的前面的参数配置我们不管了,直接看三个关键的函数,先看第一个forward_local_layer
0x030201 forward_local_layer
void forward_local_layer(const local_layer l, network net){ int out_h = local_out_height(l); int out_w = local_out_width(l); int i, j; int locations = out_h * out_w; for(i = 0; i < l.batch; ++i){ copy_cpu(l.outputs, l.biases, 1, l.output + i*l.outputs, 1); } for(i = 0; i < l.batch; ++i){ float *input = net.input + i*l.w*l.h*l.c; im2col_cpu(input, l.c, l.h, l.w, l.size, l.stride, l.pad, net.workspace); float *output = l.output + i*l.outputs; for(j = 0; j < locations; ++j){ float *a = l.weights + j*l.size*l.size*l.c*l.n; float *b = net.workspace + j; float *c = output + j; int m = l.n; int n = 1; int k = l.size*l.size*l.c; gemm(0,0,m,n,k,1,a,k,b,locations,1,c,locations); } } activate_array(l.output, l.outputs*l.batch, l.activation);}
我们先看前面两个函数
int local_out_height(local_layer l){ int h = l.h; if (!l.pad) h -= l.size; else h -= 1; return h/l.stride + 1;}int local_out_width(local_layer l){ int w = l.w; if (!l.pad) w -= l.size; else w -= 1; return w/l.stride + 1;}
这两个函数同样是计算卷积后的图像的高度和宽度,和我们之前的卷积层计算公式对比
(l.h + 2*l.pad - l.size) / l.stride + 1
和卷积层不一样的是这里没有考虑pad
。
我们可以对比之前的卷积层,对比后发现唯一一个区别就是参数b
,我们这里的参数b
是变化的,这个恰恰符合了local layer
的原理。local layer
就是一种权重不共享的卷积层(早期的AlexNet和GoogleNet中有所应用)。
我这里说的可能有一些抽象¬_¬,有时间画个图吧,先就这样了。
0x030202 backward_local_layer
void backward_local_layer(local_layer l, network net){ int i, j; int locations = l.out_w*l.out_h; gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); for(i = 0; i < l.batch; ++i){ axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1); } for(i = 0; i < l.batch; ++i){ float *input = net.input + i*l.w*l.h*l.c; im2col_cpu(input, l.c, l.h, l.w, l.size, l.stride, l.pad, net.workspace); for(j = 0; j < locations; ++j){ float *a = l.delta + i*l.outputs + j; float *b = net.workspace + j; float *c = l.weight_updates + j*l.size*l.size*l.c*l.n; int m = l.n; int n = l.size*l.size*l.c; int k = 1; gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n); } if(net.delta){ for(j = 0; j < locations; ++j){ float *a = l.weights + j*l.size*l.size*l.c*l.n; float *b = l.delta + i*l.outputs + j; float *c = net.workspace + j; int m = l.size*l.size*l.c; int n = 1; int k = l.n; gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations); } col2im_cpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta+i*l.c*l.h*l.w); } }}
这里和之前backward_convolutional_layer
的区别还是在b
0x030203 update_local_layer
void update_local_layer(local_layer l, update_args a){ float learning_rate = a.learning_rate*l.learning_rate_scale; float momentum = a.momentum; float decay = a.decay; int batch = a.batch; int locations = l.out_w*l.out_h; int size = l.size*l.size*l.c*l.n*locations; axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1); scal_cpu(l.outputs, momentum, l.bias_updates, 1); axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1); axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1); scal_cpu(size, momentum, l.weight_updates, 1);}
这个函数没什么好说的,就是更新参数信息。
至此parse_local
函数就分析完了,下一章我们会回到parse_network_cfg
函数
由于本人水平有限,文中有不对之处,希望大家指出,谢谢^_^!
转载地址:https://coordinate.blog.csdn.net/article/details/78875613 如侵犯您的版权,请留言回复原文章的地址,我们会给您删除此文章,给您带来不便请您谅解!