Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / gpu / convolution_grad_weights_gpu.cpp
1 /*
2 // Copyright (c) 2018 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include "convolution_grad_weights_inst.h"
18 #include "primitive_gpu_base.h"
19 #include "implementation_map.h"
20 #include "error_handler.h"
21 #include "network_impl.h"
22 #include "kernel_selector_helper.h"
23 #include "convolution_grad_weights/convolution_grad_weights_kernel_selector.h"
24 #include "convolution_grad_weights/convolution_grad_weights_kernel_base.h"
25 namespace cldnn { namespace gpu {
26
27 struct convolution_grad_weights_gpu : typed_primitive_gpu_impl<convolution_grad_weights>
28 {
29     using parent = typed_primitive_gpu_impl<convolution_grad_weights>;
30     using parent::parent;
31
32 protected:
33
34     virtual bool validate_impl(const typed_primitive_inst<convolution_grad_weights>& instance) const override
35     {
36         bool res = true;
37
38         CLDNN_ERROR_NOT_EQUAL(_outer.id(), "convolution_grad_weights filling value", _outer.get_output_layout().data_padding.filling_value(), "padding mode", 0.0f, "Unknown padding mode in convolution_grad_weights.");
39         // Check whether all memory elements use the same unit type (FP16 or FP32).
40         CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input grad memory", instance.input_memory().get_layout().data_type, "output memory", instance.output_memory().get_layout().data_type, "");
41         CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input memory", instance.input_memory(1).get_layout().data_type, "output memory", instance.output_memory().get_layout().data_type, "");
42         CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Fp32", data_types::f32, "filter memory", instance.weights_memory(0).get_layout().data_type, "");
43
44         if (instance.use_momentum())
45         {
46             CLDNN_ERROR_LAYOUT_MISMATCH(_outer.id(), "Filter memory", instance.weights_memory(0).get_layout(), "previous weights grad memory", _outer.prev_weights_grad(0).get_output_layout(), "");
47             CLDNN_ERROR_LAYOUT_MISMATCH(_outer.id(), "Bias memory", instance.bias_memory(0).get_layout(), "previous bias grad memory", _outer.prev_bias_grad(0).get_output_layout(), "");
48         }
49
50         return res;
51     }
52
53     virtual kernel::kernel_arguments_data get_arguments(typed_primitive_inst<convolution_grad_weights>& instance, int32_t split) const override
54     {
55         kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
56
57         args.weights    = &instance.weights_memory(split);
58         args.bias       = instance.bias_term() ? &instance.bias_memory(split) : nullptr;
59         args.prev_weights_grad = instance.use_momentum() ? &instance.prev_weights_grad(split) : nullptr;
60         args.prev_bias_grad = instance.bias_term() ? instance.use_momentum() ? &instance.prev_bias_grad(split) : nullptr : nullptr;
61         args.lr         = instance.get_network().get_learning_rate();
62
63         return args;
64     }
65
66     virtual int32_t get_split() const override
67     { 
68         return _outer.get_split(); 
69     }
70
71 public:
72
73     static primitive_impl* create(const convolution_grad_weights_node& arg)
74     {
75         const auto& primitive = arg.get_primitive();
76         const auto& weights_layout = arg.weights(0).get_output_layout();
77
78         switch (weights_layout.fused_format())
79         {
80         case fuse(data_types::f32, format::bfyx):
81         case fuse(data_types::f32, format::yxfb):
82         case fuse(data_types::f16, format::bfyx):
83         case fuse(data_types::f16, format::yxfb):
84             break;
85         default:
86             throw std::runtime_error("convolution_grad_weights weights format unsupported");
87         }
88
89         const auto& weights_size = weights_layout.size;
90
91         const auto& split = primitive->split();
92         const auto& stride = primitive->stride;
93 #if 0 // TODO: support dilation
94         const auto& dilation = primitive->dilation;
95 #else
96         const tensor dilation = {0,0,1,1};
97 #endif
98         const auto depthwise_separable_opt = arg.get_depthwise_sep_opt();
99         const auto output_grad_w = arg.output_grad_w();
100
101         const auto& input_offset = primitive->input_offset;
102
103         auto conv_grad_weights_params = get_default_learning_params<kernel_selector::convolution_grad_weights_params>(arg, depthwise_separable_opt ? 1 : split);
104         auto conv_grad_weights_optional_params = get_default_learning_optional_params<kernel_selector::convolution_grad_weights_optional_params>(arg.get_program());
105
106         conv_grad_weights_params.depthwise_separable_opt = depthwise_separable_opt;
107         conv_grad_weights_params.output_grad_w = output_grad_w;
108
109         conv_grad_weights_params.gradient = true;
110         conv_grad_weights_params.inputs.push_back(convert_data_tensor(arg.get_dependency(1).get_output_layout()));
111
112         conv_grad_weights_params.split = split;
113         conv_grad_weights_params.filterSize = {
114             (uint32_t)weights_size.spatial[0],
115             (uint32_t)weights_size.spatial[1],
116         };
117
118         conv_grad_weights_params.padding = {
119             (uint32_t)std::max(-input_offset.spatial[0], 0),
120             (uint32_t)std::max(-input_offset.spatial[1], 0)
121         };
122
123         conv_grad_weights_params.stride = {
124             (uint32_t)stride.spatial[0],
125             (uint32_t)stride.spatial[1]
126         };
127
128         conv_grad_weights_params.dilation = {
129             (uint32_t)dilation.spatial[0],
130             (uint32_t)dilation.spatial[1]
131         };
132
133         auto& kernel_selector = kernel_selector::convolution_grad_weights_kernel_selector::Instance();
134         auto best_kernels = kernel_selector.GetBestKernels(conv_grad_weights_params, conv_grad_weights_optional_params);
135
136         CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
137
138         auto deconv = new convolution_grad_weights_gpu(arg, best_kernels[0]);
139
140         return deconv;
141     }
142 };
143
144 namespace{
145     struct attach {
146         attach() {
147             implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), convolution_grad_weights_gpu::create);
148             implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), convolution_grad_weights_gpu::create);
149             implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), convolution_grad_weights_gpu::create);
150             implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), convolution_grad_weights_gpu::create);
151             implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), convolution_grad_weights_gpu::create);
152             implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), convolution_grad_weights_gpu::create);
153         }
154         ~attach() {}
155     };
156     attach attach_impl;
157 }
158 } }