inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_grad_weights_ref.cl

   1 // Copyright (c) 2016-2017 Intel Corporation
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //      http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 #include "include/include_all.cl"
  16 KERNEL(convolution_grad_weights_gpu_ref)(
  17     const __global UNIT_TYPE* input_grad,
  18     __global UNIT_TYPE* output,
  19     __global float* filter,
  20 #if BIAS_TERM
  21     __global float* bias,
  22 #endif
  23 #if MOMENTUM
  24     __global float* prev_grad_w,
  25 #if BIAS_TERM
  26     __global float* prev_grad_b,
  27 #endif
  28 #endif
  29     const __global UNIT_TYPE* input,
  30     uint split_idx,
  31     float lr)
  32 {
  33     const uint ofm_ifm       = get_global_id(0);
  34     const uint id_x          = (uint)get_global_id(1);
  35     const uint id_y          = (uint)get_global_id(2);
  36     const uint ifm           = ofm_ifm % INPUT1_FEATURE_NUM;
  37     const uint ofm           = ofm_ifm / INPUT1_FEATURE_NUM;
  38
  39     const int in_x    = id_x - PADDING_SIZE_X;
  40     const int in_y    = id_y - PADDING_SIZE_Y;
  41
  42     ACCUMULATOR_TYPE grad_w = 0;
  43
  44 #if BIAS_TERM
  45     ACCUMULATOR_TYPE grad_b = 0;
  46 #endif
  47
  48     uint weights_idx = ofm * FILTER_OFM_PITCH + ifm * FILTER_IFM_PITCH + id_y * FILTER_Y_PITCH + id_x * FILTER_X_PITCH;
  49
  50     for(int b = 0; b < INPUT0_BATCH_NUM; b++)
  51     {
  52         ACCUMULATOR_TYPE result = ACCUMULATOR_TYPE_ZERO;
  53
  54 #if BIAS_TERM
  55         ACCUMULATOR_TYPE result_bias = ACCUMULATOR_TYPE_ZERO;
  56 #endif
  57
  58         const uint grad_split_offset = split_idx * INPUT0_FEATURE_PITCH * FILTER_OFM_NUM;
  59         const uint in_split_offset = split_idx * INPUT1_FEATURE_PITCH * FILTER_IFM_NUM;
  60
  61         for (uint i = 0; i < INPUT0_SIZE_Y; i++)
  62         {
  63             for (uint j = 0; j < INPUT0_SIZE_X; j++)
  64             {
  65                 const int input_offset_y = in_y + i * STRIDE_SIZE_Y;
  66                 const bool zero_y = input_offset_y >= INPUT1_SIZE_Y || input_offset_y < 0;
  67                 const int input_offset_x = in_x + j * STRIDE_SIZE_X;
  68                 const bool zero_x = input_offset_x >= INPUT1_SIZE_X || input_offset_x < 0;
  69 #if BIAS_TERM
  70                 uint input_grad_idx = grad_split_offset + b*INPUT0_BATCH_PITCH + ofm*INPUT0_FEATURE_PITCH + j*INPUT0_X_PITCH + i*INPUT0_Y_PITCH;
  71                 ACCUMULATOR_TYPE grad = TO_ACCUMULATOR_TYPE(input_grad[input_grad_idx]);
  72 #endif
  73                 if(!zero_x && !zero_y)
  74                 {
  75                     uint input_idx = in_split_offset + b*INPUT1_BATCH_PITCH + ifm*INPUT1_FEATURE_PITCH + (uint)input_offset_x*INPUT1_X_PITCH + (uint)input_offset_y*INPUT1_Y_PITCH;
  76 #if BIAS_TERM
  77                     result = fma(TO_ACCUMULATOR_TYPE(input[input_idx]), grad, result);
  78 #else
  79                     uint input_grad_idx = grad_split_offset + b*INPUT0_BATCH_PITCH + ofm*INPUT0_FEATURE_PITCH + j*INPUT0_X_PITCH + i*INPUT0_Y_PITCH;
  80                     result = fma(TO_ACCUMULATOR_TYPE(input[input_idx]), TO_ACCUMULATOR_TYPE(input_grad[input_grad_idx]), result);
  81 #endif
  82                 }
  83 #if BIAS_TERM
  84                 result_bias += grad;
  85 #endif
  86             }
  87         }
  88
  89         grad_w += result;
  90
  91 #if BIAS_TERM
  92         grad_b += result_bias;
  93 #endif
  94     }
  95
  96 #if OUTPUT_GRAD_W
  97     output[weights_idx] = grad_w;
  98 #else
  99     #if MOMENTUM
 100         float update_gradient_w = lr * (grad_w + DECAY_RATE * filter[weights_idx]) + prev_grad_w[weights_idx] * MOMENTUM_FACTOR;
 101         filter[weights_idx] -= update_gradient_w;
 102         prev_grad_w[weights_idx] = update_gradient_w;
 103     #else
 104         filter[weights_idx] -= lr * grad_w + DECAY_RATE * lr * filter[weights_idx];
 105     #endif
 106
 107 #if BIAS_TERM
 108         if(ifm == 0 && id_x == 0 && id_y == 0)
 109         {
 110 #if MOMENTUM
 111         float update_gradient_b = lr * grad_b + prev_grad_b[ofm] * MOMENTUM_FACTOR;
 112         bias[ofm] -= update_gradient_b;
 113         prev_grad_b[ofm] = update_gradient_b;
 114 #else
 115         bias[ofm] -= lr * grad_b;
 116 #endif
 117         }
 118 #endif
 119
 120 #endif
 121
 122 }