2 // Copyright (c) 2018 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include "convolution_grad_weights_inst.h"
18 #include "primitive_gpu_base.h"
19 #include "implementation_map.h"
20 #include "error_handler.h"
21 #include "network_impl.h"
22 #include "kernel_selector_helper.h"
23 #include "convolution_grad_weights/convolution_grad_weights_kernel_selector.h"
24 #include "convolution_grad_weights/convolution_grad_weights_kernel_base.h"
30 struct convolution_grad_weights_gpu : typed_primitive_gpu_impl<convolution_grad_weights> {
31 using parent = typed_primitive_gpu_impl<convolution_grad_weights>;
35 bool validate_impl(const typed_primitive_inst<convolution_grad_weights>& instance) const override {
38 CLDNN_ERROR_NOT_EQUAL(_outer.id(),
39 "convolution_grad_weights filling value",
40 _outer.get_output_layout().data_padding.filling_value(),
43 "Unknown padding mode in convolution_grad_weights.");
44 // Check whether all memory elements use the same unit type (FP16 or FP32).
45 CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(),
47 instance.input_memory().get_layout().data_type,
49 instance.output_memory().get_layout().data_type,
51 CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(),
53 instance.input_memory(1).get_layout().data_type,
55 instance.output_memory().get_layout().data_type,
57 CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(),
61 instance.weights_memory(0).get_layout().data_type,
64 if (instance.use_momentum()) {
65 CLDNN_ERROR_LAYOUT_MISMATCH(_outer.id(),
67 instance.weights_memory(0).get_layout(),
68 "previous weights grad memory",
69 _outer.prev_weights_grad(0).get_output_layout(),
71 CLDNN_ERROR_LAYOUT_MISMATCH(_outer.id(),
73 instance.bias_memory(0).get_layout(),
74 "previous bias grad memory",
75 _outer.prev_bias_grad(0).get_output_layout(),
82 kernel::kernel_arguments_data get_arguments(typed_primitive_inst<convolution_grad_weights>& instance,
83 int32_t split) const override {
84 kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
86 args.weights = (memory_impl::cptr) &instance.weights_memory(split);
87 args.bias = (memory_impl::cptr) (instance.bias_term() ? &instance.bias_memory(split) : nullptr);
88 args.prev_weights_grad = (memory_impl::cptr) (instance.use_momentum() ? &instance.prev_weights_grad(split) : nullptr);
90 (memory_impl::cptr) (instance.bias_term() ? instance.use_momentum() ? &instance.prev_bias_grad(split) : nullptr : nullptr);
91 args.lr = instance.get_network().get_learning_rate();
96 int32_t get_split() const override { return _outer.get_split(); }
99 static primitive_impl* create(const convolution_grad_weights_node& arg) {
100 const auto& primitive = arg.get_primitive();
101 const auto& weights_layout = arg.weights(0).get_output_layout();
103 switch (weights_layout.fused_format()) {
104 case fuse(data_types::f32, format::bfyx):
105 case fuse(data_types::f32, format::yxfb):
106 case fuse(data_types::f16, format::bfyx):
107 case fuse(data_types::f16, format::yxfb):
110 throw std::runtime_error("convolution_grad_weights weights format unsupported");
113 const auto& weights_size = weights_layout.size;
115 const auto& split = primitive->split();
116 const auto& stride = primitive->stride;
117 #if 0 // TODO: support dilation
118 const auto& dilation = primitive->dilation;
120 const tensor dilation = {0, 0, 1, 1};
122 const auto depthwise_separable_opt = arg.get_depthwise_sep_opt();
123 const auto output_grad_w = arg.output_grad_w();
125 const auto& input_offset = primitive->input_offset;
127 auto conv_grad_weights_params = get_default_learning_params<kernel_selector::convolution_grad_weights_params>(
129 depthwise_separable_opt ? 1 : split);
130 auto conv_grad_weights_optional_params =
131 get_default_learning_optional_params<kernel_selector::convolution_grad_weights_optional_params>(
134 conv_grad_weights_params.depthwise_separable_opt = depthwise_separable_opt;
135 conv_grad_weights_params.output_grad_w = output_grad_w;
137 conv_grad_weights_params.gradient = true;
138 conv_grad_weights_params.inputs.push_back(convert_data_tensor(arg.get_dependency(1).get_output_layout()));
140 conv_grad_weights_params.split = split;
141 conv_grad_weights_params.filterSize = {
142 (uint32_t)weights_size.spatial[0],
143 (uint32_t)weights_size.spatial[1],
146 conv_grad_weights_params.padding = {(uint32_t)std::max(-input_offset.spatial[0], 0),
147 (uint32_t)std::max(-input_offset.spatial[1], 0)};
149 conv_grad_weights_params.stride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1]};
151 conv_grad_weights_params.dilation = {(uint32_t)dilation.spatial[0], (uint32_t)dilation.spatial[1]};
153 auto& kernel_selector = kernel_selector::convolution_grad_weights_kernel_selector::Instance();
154 auto best_kernels = kernel_selector.GetBestKernels(conv_grad_weights_params, conv_grad_weights_optional_params);
156 CLDNN_ERROR_BOOL(arg.id(),
157 "Best_kernel.empty()",
158 best_kernels.empty(),
159 "Cannot find a proper kernel with this arguments");
161 auto deconv = new convolution_grad_weights_gpu(arg, best_kernels[0]);
170 implementation_map<convolution_grad_weights>::add(
171 std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
172 convolution_grad_weights_gpu::create);
173 implementation_map<convolution_grad_weights>::add(
174 std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
175 convolution_grad_weights_gpu::create);
176 implementation_map<convolution_grad_weights>::add(
177 std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
178 convolution_grad_weights_gpu::create);
179 implementation_map<convolution_grad_weights>::add(
180 std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
181 convolution_grad_weights_gpu::create);
182 implementation_map<convolution_grad_weights>::add(
183 std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
184 convolution_grad_weights_gpu::create);
185 implementation_map<convolution_grad_weights>::add(
186 std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
187 convolution_grad_weights_gpu::create);