inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include "deconvolution_inst.h"
  18 #include "primitive_gpu_base.h"
  19 #include "implementation_map.h"
  20 #include "error_handler.h"
  21 #include "kernel_selector_helper.h"
  22 #include "deconvolution/deconvolution_kernel_selector.h"
  23 #include "deconvolution/deconvolution_kernel_base.h"
  24
  25 namespace cldnn { namespace gpu {
  26
  27 struct deconvolution_gpu : typed_primitive_gpu_impl<deconvolution>
  28 {
  29     using parent = typed_primitive_gpu_impl<deconvolution>;
  30     using parent::parent;
  31
  32 protected:
  33
  34     // TODO: share it with convolution and fully connected
  35     virtual bool validate_impl(const typed_primitive_inst<deconvolution>& instance) const override
  36     {
  37         bool res = true;
  38
  39         CLDNN_ERROR_NOT_EQUAL(_outer.id(), "deconvolution filling value", _outer.get_output_layout().data_padding.filling_value(), "padding mode", 0.0f, "Unknown padding mode in deconvolution.");
  40         // Check whether all memory elements use the same unit type (FP16 or FP32).
  41         CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input memory", instance.input_memory().get_layout().data_type, "output memory", instance.output_memory().get_layout().data_type, "");
  42         CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input memory", instance.input_memory().get_layout().data_type, "filter memory", instance.weights_memory(0).get_layout().data_type, "");
  43
  44         return res;
  45     }
  46
  47     virtual kernel::kernel_arguments_data get_arguments(typed_primitive_inst<deconvolution>& instance, int32_t split) const override
  48     {
  49         kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
  50         auto* desc = static_cast<const deconvolution*>(instance.desc().get());
  51         int dep_size = (int)(desc->weights.size() + desc->bias.size() + 1);
  52
  53         args.weights    = &instance.weights_memory(split);
  54         args.bias       = instance.bias_term() ? &instance.bias_memory(split) : nullptr;
  55
  56         if (int(instance.dependencies().size()) > dep_size)
  57             args.inputs.emplace_back(&instance.dep_memory(dep_size));
  58
  59         return args;
  60     }
  61
  62     virtual int32_t get_split() const override
  63     {
  64         return _outer.get_split();
  65     }
  66
  67     virtual uint32_t get_groups() const override
  68     {
  69         return _outer.get_groups();
  70     }
  71
  72 public:
  73
  74     static primitive_impl* create(const deconvolution_node& arg)
  75     {
  76         const auto& primitive = arg.get_primitive();
  77         const auto& weights_layout = arg.weights(0).get_output_layout();
  78
  79         switch (weights_layout.fused_format())
  80         {
  81             // FP32 (float)
  82         case fuse(data_types::f32, format::bfyx):
  83         case fuse(data_types::f32, format::yxfb):
  84         case fuse(data_types::f16, format::bfyx):
  85         case fuse(data_types::f16, format::yxfb):
  86             break;
  87         default:
  88             throw std::runtime_error("deconvolution weights format unsupported");
  89         }
  90
  91         const auto& weights_size = weights_layout.size;
  92
  93         const auto& split = primitive->split();
  94         const auto& stride = primitive->stride;
  95 #if 0 // TODO: support dilation
  96         const auto& dilation = primitive->dilation;
  97 #else
  98         const tensor dilation = {0,0,1,1};
  99 #endif
 100         const auto depthwise_separable_opt = arg.get_depthwise_sep_opt();
 101         const auto actual_split = depthwise_separable_opt ? (decltype(split))1 : split;
 102
 103         const auto& input_offset = primitive->input_offset;
 104         const auto& groups = primitive->groups;
 105
 106         auto deconv_params = get_weights_bias_default_params<kernel_selector::deconvolution_params>(arg, (groups > 1 && !depthwise_separable_opt) ? groups : actual_split, groups);
 107         auto deconv_optional_params = get_default_weights_bias_optional_params<kernel_selector::deconvolution_optional_params>(arg.get_program());
 108
 109         if(primitive->with_activation)
 110             convert_activation_func_params(primitive, deconv_params.activation);
 111
 112         deconv_params.depthwise_separable_opt = depthwise_separable_opt;
 113
 114         deconv_params.split = split;
 115         deconv_params.groups = groups;
 116         deconv_params.filterSize = {
 117             (uint32_t)weights_size.spatial[0],
 118             (uint32_t)weights_size.spatial[1],
 119         };
 120
 121         deconv_params.padding = {
 122             (uint32_t)std::max(-input_offset.spatial[0], 0),
 123             (uint32_t)std::max(-input_offset.spatial[1], 0)
 124         };
 125
 126         deconv_params.stride = {
 127             (uint32_t)stride.spatial[0],
 128             (uint32_t)stride.spatial[1]
 129         };
 130
 131         deconv_params.dilation = {
 132             (uint32_t)dilation.spatial[0],
 133             (uint32_t)dilation.spatial[1]
 134         };
 135
 136         deconv_params.gradient = primitive->gradient();
 137
 138         if (arg.get_dependencies().size() > primitive->weights.size() + primitive->bias.size() + 1)
 139         {
 140             deconv_params.fused_eltwise = true;
 141             deconv_params.inputs.push_back(convert_data_tensor(arg.fused_sum().get_output_layout()));
 142         }
 143
 144         auto& kernel_selector = kernel_selector::deconvolution_kernel_selector::Instance();
 145         auto best_kernels = kernel_selector.GetBestKernels(deconv_params, deconv_optional_params);
 146
 147         CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with these arguments");
 148         auto deconv = new deconvolution_gpu(arg, best_kernels[0]);
 149
 150         return deconv;
 151     }
 152 };
 153
 154 namespace{
 155     struct attach {
 156         attach() {
 157             implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), deconvolution_gpu::create);
 158             implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), deconvolution_gpu::create);
 159             implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), deconvolution_gpu::create);
 160             implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), deconvolution_gpu::create);
 161             implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), deconvolution_gpu::create);
 162             implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), deconvolution_gpu::create);
 163         }
 164         ~attach() {}
 165     };
 166     attach attach_impl;
 167 }
 168 } }