inference-engine/thirdparty/clDNN/src/gpu/convolution_grad_weights_gpu.cpp

   1 /*
   2 // Copyright (c) 2018 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include "convolution_grad_weights_inst.h"
  18 #include "primitive_gpu_base.h"
  19 #include "implementation_map.h"
  20 #include "error_handler.h"
  21 #include "network_impl.h"
  22 #include "kernel_selector_helper.h"
  23 #include "convolution_grad_weights/convolution_grad_weights_kernel_selector.h"
  24 #include "convolution_grad_weights/convolution_grad_weights_kernel_base.h"
  25 namespace cldnn { namespace gpu {
  26
  27 struct convolution_grad_weights_gpu : typed_primitive_gpu_impl<convolution_grad_weights>
  28 {
  29     using parent = typed_primitive_gpu_impl<convolution_grad_weights>;
  30     using parent::parent;
  31
  32 protected:
  33
  34     virtual bool validate_impl(const typed_primitive_inst<convolution_grad_weights>& instance) const override
  35     {
  36         bool res = true;
  37
  38         CLDNN_ERROR_NOT_EQUAL(_outer.id(), "convolution_grad_weights filling value", _outer.get_output_layout().data_padding.filling_value(), "padding mode", 0.0f, "Unknown padding mode in convolution_grad_weights.");
  39         // Check whether all memory elements use the same unit type (FP16 or FP32).
  40         CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input grad memory", instance.input_memory().get_layout().data_type, "output memory", instance.output_memory().get_layout().data_type, "");
  41         CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input memory", instance.input_memory(1).get_layout().data_type, "output memory", instance.output_memory().get_layout().data_type, "");
  42         CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Fp32", data_types::f32, "filter memory", instance.weights_memory(0).get_layout().data_type, "");
  43
  44         if (instance.use_momentum())
  45         {
  46             CLDNN_ERROR_LAYOUT_MISMATCH(_outer.id(), "Filter memory", instance.weights_memory(0).get_layout(), "previous weights grad memory", _outer.prev_weights_grad(0).get_output_layout(), "");
  47             CLDNN_ERROR_LAYOUT_MISMATCH(_outer.id(), "Bias memory", instance.bias_memory(0).get_layout(), "previous bias grad memory", _outer.prev_bias_grad(0).get_output_layout(), "");
  48         }
  49
  50         return res;
  51     }
  52
  53     virtual kernel::kernel_arguments_data get_arguments(typed_primitive_inst<convolution_grad_weights>& instance, int32_t split) const override
  54     {
  55         kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
  56
  57         args.weights    = &instance.weights_memory(split);
  58         args.bias       = instance.bias_term() ? &instance.bias_memory(split) : nullptr;
  59         args.prev_weights_grad = instance.use_momentum() ? &instance.prev_weights_grad(split) : nullptr;
  60         args.prev_bias_grad = instance.bias_term() ? instance.use_momentum() ? &instance.prev_bias_grad(split) : nullptr : nullptr;
  61         args.lr         = instance.get_network().get_learning_rate();
  62
  63         return args;
  64     }
  65
  66     virtual int32_t get_split() const override
  67     {
  68         return _outer.get_split();
  69     }
  70
  71 public:
  72
  73     static primitive_impl* create(const convolution_grad_weights_node& arg)
  74     {
  75         const auto& primitive = arg.get_primitive();
  76         const auto& weights_layout = arg.weights(0).get_output_layout();
  77
  78         switch (weights_layout.fused_format())
  79         {
  80         case fuse(data_types::f32, format::bfyx):
  81         case fuse(data_types::f32, format::yxfb):
  82         case fuse(data_types::f16, format::bfyx):
  83         case fuse(data_types::f16, format::yxfb):
  84             break;
  85         default:
  86             throw std::runtime_error("convolution_grad_weights weights format unsupported");
  87         }
  88
  89         const auto& weights_size = weights_layout.size;
  90
  91         const auto& split = primitive->split();
  92         const auto& stride = primitive->stride;
  93 #if 0 // TODO: support dilation
  94         const auto& dilation = primitive->dilation;
  95 #else
  96         const tensor dilation = {0,0,1,1};
  97 #endif
  98         const auto depthwise_separable_opt = arg.get_depthwise_sep_opt();
  99         const auto output_grad_w = arg.output_grad_w();
 100
 101         const auto& input_offset = primitive->input_offset;
 102
 103         auto conv_grad_weights_params = get_default_learning_params<kernel_selector::convolution_grad_weights_params>(arg, depthwise_separable_opt ? 1 : split);
 104         auto conv_grad_weights_optional_params = get_default_learning_optional_params<kernel_selector::convolution_grad_weights_optional_params>(arg.get_program());
 105
 106         conv_grad_weights_params.depthwise_separable_opt = depthwise_separable_opt;
 107         conv_grad_weights_params.output_grad_w = output_grad_w;
 108
 109         conv_grad_weights_params.gradient = true;
 110         conv_grad_weights_params.inputs.push_back(convert_data_tensor(arg.get_dependency(1).get_output_layout()));
 111
 112         conv_grad_weights_params.split = split;
 113         conv_grad_weights_params.filterSize = {
 114             (uint32_t)weights_size.spatial[0],
 115             (uint32_t)weights_size.spatial[1],
 116         };
 117
 118         conv_grad_weights_params.padding = {
 119             (uint32_t)std::max(-input_offset.spatial[0], 0),
 120             (uint32_t)std::max(-input_offset.spatial[1], 0)
 121         };
 122
 123         conv_grad_weights_params.stride = {
 124             (uint32_t)stride.spatial[0],
 125             (uint32_t)stride.spatial[1]
 126         };
 127
 128         conv_grad_weights_params.dilation = {
 129             (uint32_t)dilation.spatial[0],
 130             (uint32_t)dilation.spatial[1]
 131         };
 132
 133         auto& kernel_selector = kernel_selector::convolution_grad_weights_kernel_selector::Instance();
 134         auto best_kernels = kernel_selector.GetBestKernels(conv_grad_weights_params, conv_grad_weights_optional_params);
 135
 136         CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
 137
 138         auto deconv = new convolution_grad_weights_gpu(arg, best_kernels[0]);
 139
 140         return deconv;
 141     }
 142 };
 143
 144 namespace{
 145     struct attach {
 146         attach() {
 147             implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), convolution_grad_weights_gpu::create);
 148             implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), convolution_grad_weights_gpu::create);
 149             implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), convolution_grad_weights_gpu::create);
 150             implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), convolution_grad_weights_gpu::create);
 151             implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), convolution_grad_weights_gpu::create);
 152             implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), convolution_grad_weights_gpu::create);
 153         }
 154         ~attach() {}
 155     };
 156     attach attach_impl;
 157 }
 158 } }