inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_grad_weights_yxfb.cl

   1 // Copyright (c) 2018 Intel Corporation
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //      http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 #include "include/include_all.cl"
  16
  17 __attribute__((intel_reqd_sub_group_size(16)))
  18 KERNEL(convolution_grad_weights_gpu_ref)(
  19     const __global UNIT_TYPE* input_grad,
  20     __global UNIT_TYPE* output,
  21     __global UNIT_TYPE* filter,
  22 #if BIAS_TERM
  23     __global UNIT_TYPE* bias,
  24 #endif
  25 #if MOMENTUM
  26     __global UNIT_TYPE* prev_grad_w,
  27 #if BIAS_TERM
  28     __global UNIT_TYPE* prev_grad_b,
  29 #endif
  30 #endif
  31     const __global UNIT_TYPE* input,
  32     uint split_idx,
  33     float lr)
  34 {
  35     const uint local_id = get_local_id(0);
  36     const uint ofm_ifm  = get_global_id(1);
  37     const uint id_x_y   = get_global_id(2);
  38
  39     const uint id_x     = id_x_y % FILTER_SIZE_X;
  40     const uint id_y     = id_x_y / FILTER_SIZE_X;
  41     const uint ifm      = ofm_ifm % INPUT1_FEATURE_NUM;
  42     const uint ofm      = ofm_ifm / INPUT1_FEATURE_NUM;
  43
  44     const int in_x      = id_x - PADDING_SIZE_X;
  45     const int in_y      = id_y - PADDING_SIZE_Y;
  46
  47     ACCUMULATOR_TYPE grad_w = 0;
  48 #if BIAS_TERM
  49     ACCUMULATOR_TYPE grad_b = 0;
  50 #endif
  51
  52     const uint grad_split_offset = split_idx * INPUT0_FEATURE_PITCH * FILTER_OFM_NUM;
  53     const uint in_split_offset = split_idx * INPUT1_FEATURE_PITCH * FILTER_IFM_NUM;
  54
  55     uint weights_idx = ofm * FILTER_OFM_PITCH + ifm * FILTER_IFM_PITCH + id_y * FILTER_Y_PITCH + id_x * FILTER_X_PITCH;
  56
  57     for(int y = 0; y < INPUT0_SIZE_Y; y++)
  58     {
  59         const int input_offset_y = in_y + y * STRIDE_SIZE_Y;
  60         const bool zero_y = input_offset_y >= INPUT1_SIZE_Y || input_offset_y < 0;
  61         for (uint x = 0; x < INPUT0_SIZE_X; x++)
  62         {
  63             const int input_offset_x = in_x + x * STRIDE_SIZE_X;
  64             const bool zero_x = input_offset_x >= INPUT1_SIZE_X || input_offset_x < 0;
  65             for (uint b = 0; b < INPUT0_BATCH_NUM / 16; b++)
  66             {
  67 #if BIAS_TERM
  68                 uint input_grad_idx = grad_split_offset + b*16*INPUT0_BATCH_PITCH + ofm*INPUT0_FEATURE_PITCH + x*INPUT0_X_PITCH + y*INPUT0_Y_PITCH;
  69                 UNIT_TYPE grad = as_float(intel_sub_group_block_read((const __global uint*)(input_grad + input_grad_idx)));
  70                 grad_b += grad;
  71 #endif
  72                 if(!zero_x && !zero_y)
  73                 {
  74                 uint input_idx = in_split_offset + b*16*INPUT1_BATCH_PITCH + ifm*INPUT1_FEATURE_PITCH + (uint)input_offset_x*INPUT1_X_PITCH + (uint)input_offset_y*INPUT1_Y_PITCH;
  75 #if BIAS_TERM
  76                 grad_w = fma(as_float(intel_sub_group_block_read((const __global uint*)(input + input_idx))), grad, grad_w);
  77 #else
  78                 uint input_grad_idx = grad_split_offset + b*16*INPUT0_BATCH_PITCH + ofm*INPUT0_FEATURE_PITCH + x*INPUT0_X_PITCH + y*INPUT0_Y_PITCH;
  79                 grad_w = fma(as_float(intel_sub_group_block_read((const __global uint*)(input + input_idx))), as_float(intel_sub_group_block_read((const __global uint*)(input_grad + input_grad_idx))), grad_w);
  80 #endif
  81                 }
  82             }
  83         }
  84     }
  85
  86     grad_w = sub_group_reduce_add(grad_w);
  87 #if BIAS_TERM
  88     grad_b = sub_group_reduce_add(grad_b);
  89 #endif
  90
  91     if (local_id == 0)
  92     {
  93 #if OUTPUT_GRAD_W
  94         output[weights_idx] = grad_w;
  95 #else
  96     #if MOMENTUM
  97             UNIT_TYPE update_gradient_w = lr * (grad_w + DECAY_RATE * filter[weights_idx]) + prev_grad_w[weights_idx] * MOMENTUM_FACTOR;
  98             filter[weights_idx] -= update_gradient_w;
  99             prev_grad_w[weights_idx] = update_gradient_w;
 100     #else
 101             filter[weights_idx] -= lr * (grad_w + DECAY_RATE * filter[weights_idx]);
 102     #endif
 103
 104 #if BIAS_TERM
 105         if(ifm == 0 && id_x == 0 && id_y == 0)
 106         {
 107 #if MOMENTUM
 108             UNIT_TYPE update_gradient_b = lr * grad_b + prev_grad_b[ofm] * MOMENTUM_FACTOR;
 109             bias[ofm] -= update_gradient_b;
 110             prev_grad_b[ofm] = update_gradient_b;
 111 #else
 112             bias[ofm] -= lr * grad_b;
 113 #endif
 114         }
 115 #endif
 116 #endif
 117     }
 118 }