Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / gpu / deconvolution_gpu.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include "deconvolution_inst.h"
18 #include "primitive_gpu_base.h"
19 #include "implementation_map.h"
20 #include "error_handler.h"
21 #include "kernel_selector_helper.h"
22 #include "deconvolution/deconvolution_kernel_selector.h"
23 #include "deconvolution/deconvolution_kernel_base.h"
24
25 namespace cldnn { namespace gpu {
26
27 struct deconvolution_gpu : typed_primitive_gpu_impl<deconvolution>
28 {
29     using parent = typed_primitive_gpu_impl<deconvolution>;
30     using parent::parent;
31
32 protected:
33
34     // TODO: share it with convolution and fully connected
35     virtual bool validate_impl(const typed_primitive_inst<deconvolution>& instance) const override
36     {
37         bool res = true;
38
39         CLDNN_ERROR_NOT_EQUAL(_outer.id(), "deconvolution filling value", _outer.get_output_layout().data_padding.filling_value(), "padding mode", 0.0f, "Unknown padding mode in deconvolution.");
40         // Check whether all memory elements use the same unit type (FP16 or FP32).
41         CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input memory", instance.input_memory().get_layout().data_type, "output memory", instance.output_memory().get_layout().data_type, "");
42         CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input memory", instance.input_memory().get_layout().data_type, "filter memory", instance.weights_memory(0).get_layout().data_type, "");
43
44         return res;
45     }
46
47     virtual kernel::kernel_arguments_data get_arguments(typed_primitive_inst<deconvolution>& instance, int32_t split) const override
48     {
49         kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
50         auto* desc = static_cast<const deconvolution*>(instance.desc().get());
51         int dep_size = (int)(desc->weights.size() + desc->bias.size() + 1);
52
53         args.weights    = &instance.weights_memory(split);
54         args.bias       = instance.bias_term() ? &instance.bias_memory(split) : nullptr;
55
56         if (int(instance.dependencies().size()) > dep_size)
57             args.inputs.emplace_back(&instance.dep_memory(dep_size));
58
59         return args;
60     }
61
62     virtual int32_t get_split() const override
63     { 
64         return _outer.get_split(); 
65     }
66
67     virtual uint32_t get_groups() const override
68     {
69         return _outer.get_groups();
70     }
71
72 public:
73
74     static primitive_impl* create(const deconvolution_node& arg)
75     {
76         const auto& primitive = arg.get_primitive();
77         const auto& weights_layout = arg.weights(0).get_output_layout();
78
79         switch (weights_layout.fused_format())
80         {
81             // FP32 (float)
82         case fuse(data_types::f32, format::bfyx):
83         case fuse(data_types::f32, format::yxfb):
84         case fuse(data_types::f16, format::bfyx):
85         case fuse(data_types::f16, format::yxfb):
86             break;
87         default:
88             throw std::runtime_error("deconvolution weights format unsupported");
89         }
90
91         const auto& weights_size = weights_layout.size;
92
93         const auto& split = primitive->split();
94         const auto& stride = primitive->stride;
95 #if 0 // TODO: support dilation
96         const auto& dilation = primitive->dilation;
97 #else
98         const tensor dilation = {0,0,1,1};
99 #endif
100         const auto depthwise_separable_opt = arg.get_depthwise_sep_opt();
101         const auto actual_split = depthwise_separable_opt ? (decltype(split))1 : split;
102
103         const auto& input_offset = primitive->input_offset;
104         const auto& groups = primitive->groups;
105
106         auto deconv_params = get_weights_bias_default_params<kernel_selector::deconvolution_params>(arg, (groups > 1 && !depthwise_separable_opt) ? groups : actual_split, groups);
107         auto deconv_optional_params = get_default_weights_bias_optional_params<kernel_selector::deconvolution_optional_params>(arg.get_program());
108
109         if(primitive->with_activation)
110             convert_activation_func_params(primitive, deconv_params.activation);
111
112         deconv_params.depthwise_separable_opt = depthwise_separable_opt;
113
114         deconv_params.split = split;
115         deconv_params.groups = groups;
116         deconv_params.filterSize = {
117             (uint32_t)weights_size.spatial[0],
118             (uint32_t)weights_size.spatial[1],
119         };
120
121         deconv_params.padding = {
122             (uint32_t)std::max(-input_offset.spatial[0], 0),
123             (uint32_t)std::max(-input_offset.spatial[1], 0)
124         };
125
126         deconv_params.stride = {
127             (uint32_t)stride.spatial[0],
128             (uint32_t)stride.spatial[1]
129         };
130
131         deconv_params.dilation = {
132             (uint32_t)dilation.spatial[0],
133             (uint32_t)dilation.spatial[1]
134         };
135
136         deconv_params.gradient = primitive->gradient();
137
138         if (arg.get_dependencies().size() > primitive->weights.size() + primitive->bias.size() + 1)
139         {
140             deconv_params.fused_eltwise = true;
141             deconv_params.inputs.push_back(convert_data_tensor(arg.fused_sum().get_output_layout()));
142         }
143
144         auto& kernel_selector = kernel_selector::deconvolution_kernel_selector::Instance();
145         auto best_kernels = kernel_selector.GetBestKernels(deconv_params, deconv_optional_params);
146
147         CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with these arguments");
148         auto deconv = new deconvolution_gpu(arg, best_kernels[0]);
149
150         return deconv;
151     }
152 };
153
154 namespace{
155     struct attach {
156         attach() {
157             implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), deconvolution_gpu::create);
158             implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), deconvolution_gpu::create);
159             implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), deconvolution_gpu::create);
160             implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), deconvolution_gpu::create);
161             implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), deconvolution_gpu::create);
162             implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), deconvolution_gpu::create);
163         }
164         ~attach() {}
165     };
166     attach attach_impl;
167 }
168 } }