1 // Copyright (c) 2016-2017 Intel Corporation
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "include/include_all.cl"
16 KERNEL(convolution_grad_weights_gpu_ref)(
17 const __global UNIT_TYPE* input_grad,
18 __global UNIT_TYPE* output,
19 __global UNIT_TYPE* filter,
21 __global UNIT_TYPE* bias,
24 __global UNIT_TYPE* prev_grad_w,
26 __global UNIT_TYPE* prev_grad_b,
29 const __global UNIT_TYPE* input,
33 const uint ofm_ifm = get_global_id(0);
34 const uint id_x = (uint)get_global_id(1);
35 const uint id_y = (uint)get_global_id(2);
36 const uint ifm = ofm_ifm % INPUT1_FEATURE_NUM;
37 const uint ofm = ofm_ifm / INPUT1_FEATURE_NUM;
39 const int in_x = id_x - PADDING_SIZE_X;
40 const int in_y = id_y - PADDING_SIZE_Y;
42 ACCUMULATOR_TYPE grad_w = 0;
45 ACCUMULATOR_TYPE grad_b = 0;
48 uint weights_idx = ofm * FILTER_OFM_PITCH + ifm * FILTER_IFM_PITCH + id_y * FILTER_Y_PITCH + id_x * FILTER_X_PITCH;
50 for(int b = 0; b < INPUT0_BATCH_NUM; b++)
52 UNIT_TYPE result = UNIT_VAL_ZERO;
55 UNIT_TYPE result_bias = UNIT_VAL_ZERO;
58 const uint grad_split_offset = split_idx * INPUT0_FEATURE_PITCH * FILTER_OFM_NUM;
59 const uint in_split_offset = split_idx * INPUT1_FEATURE_PITCH * FILTER_IFM_NUM;
61 for (uint i = 0; i < INPUT0_SIZE_Y; i++)
63 for (uint j = 0; j < INPUT0_SIZE_X; j++)
65 const int input_offset_y = in_y + i * STRIDE_SIZE_Y;
66 const bool zero_y = input_offset_y >= INPUT1_SIZE_Y || input_offset_y < 0;
67 const int input_offset_x = in_x + j * STRIDE_SIZE_X;
68 const bool zero_x = input_offset_x >= INPUT1_SIZE_X || input_offset_x < 0;
70 uint input_grad_idx = grad_split_offset + b*INPUT0_BATCH_PITCH + ofm*INPUT0_FEATURE_PITCH + j*INPUT0_X_PITCH + i*INPUT0_Y_PITCH;
71 UNIT_TYPE grad = input_grad[input_grad_idx];
73 if(!zero_x && !zero_y)
75 uint input_idx = in_split_offset + b*INPUT1_BATCH_PITCH + ifm*INPUT1_FEATURE_PITCH + (uint)input_offset_x*INPUT1_X_PITCH + (uint)input_offset_y*INPUT1_Y_PITCH;
77 result = fma(input[input_idx], grad, result);
79 uint input_grad_idx = grad_split_offset + b*INPUT0_BATCH_PITCH + ofm*INPUT0_FEATURE_PITCH + j*INPUT0_X_PITCH + i*INPUT0_Y_PITCH;
80 result = fma(input[input_idx], input_grad[input_grad_idx], result);
92 grad_b += result_bias;
97 UNIT_TYPE update_gradient_w = lr * (grad_w + DECAY_RATE * filter[weights_idx]) + prev_grad_w[weights_idx] * MOMENTUM_FACTOR;
98 filter[weights_idx] -= update_gradient_w;
99 prev_grad_w[weights_idx] = update_gradient_w;
101 filter[weights_idx] -= lr * grad_w + DECAY_RATE * lr * filter[weights_idx];
105 if(ifm == 0 && id_x == 0 && id_y == 0)
108 UNIT_TYPE update_gradient_b = lr * grad_b + prev_grad_b[ofm] * MOMENTUM_FACTOR;
109 bias[ofm] -= update_gradient_b;
110 prev_grad_b[ofm] = update_gradient_b;
112 bias[ofm] -= lr * grad_b;