2 // Copyright (c) 2018 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include "convolution_grad_weights_inst.h"
18 #include "primitive_gpu_base.h"
19 #include "implementation_map.h"
20 #include "error_handler.h"
21 #include "network_impl.h"
22 #include "kernel_selector_helper.h"
23 #include "convolution_grad_weights/convolution_grad_weights_kernel_selector.h"
24 #include "convolution_grad_weights/convolution_grad_weights_kernel_base.h"
25 namespace cldnn { namespace gpu {
27 struct convolution_grad_weights_gpu : typed_primitive_gpu_impl<convolution_grad_weights>
29 using parent = typed_primitive_gpu_impl<convolution_grad_weights>;
34 virtual bool validate_impl(const typed_primitive_inst<convolution_grad_weights>& instance) const override
38 CLDNN_ERROR_NOT_EQUAL(_outer.id(), "convolution_grad_weights filling value", _outer.get_output_layout().data_padding.filling_value(), "padding mode", 0.0f, "Unknown padding mode in convolution_grad_weights.");
39 // Check whether all memory elements use the same unit type (FP16 or FP32).
40 CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input grad memory", instance.input_memory().get_layout().data_type, "output memory", instance.output_memory().get_layout().data_type, "");
41 CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input memory", instance.input_memory(1).get_layout().data_type, "output memory", instance.output_memory().get_layout().data_type, "");
42 CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Fp32", data_types::f32, "filter memory", instance.weights_memory(0).get_layout().data_type, "");
44 if (instance.use_momentum())
46 CLDNN_ERROR_LAYOUT_MISMATCH(_outer.id(), "Filter memory", instance.weights_memory(0).get_layout(), "previous weights grad memory", _outer.prev_weights_grad(0).get_output_layout(), "");
47 CLDNN_ERROR_LAYOUT_MISMATCH(_outer.id(), "Bias memory", instance.bias_memory(0).get_layout(), "previous bias grad memory", _outer.prev_bias_grad(0).get_output_layout(), "");
53 virtual kernel::kernel_arguments_data get_arguments(typed_primitive_inst<convolution_grad_weights>& instance, int32_t split) const override
55 kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
57 args.weights = &instance.weights_memory(split);
58 args.bias = instance.bias_term() ? &instance.bias_memory(split) : nullptr;
59 args.prev_weights_grad = instance.use_momentum() ? &instance.prev_weights_grad(split) : nullptr;
60 args.prev_bias_grad = instance.bias_term() ? instance.use_momentum() ? &instance.prev_bias_grad(split) : nullptr : nullptr;
61 args.lr = instance.get_network().get_learning_rate();
66 virtual int32_t get_split() const override
68 return _outer.get_split();
73 static primitive_impl* create(const convolution_grad_weights_node& arg)
75 const auto& primitive = arg.get_primitive();
76 const auto& weights_layout = arg.weights(0).get_output_layout();
78 switch (weights_layout.fused_format())
80 case fuse(data_types::f32, format::bfyx):
81 case fuse(data_types::f32, format::yxfb):
82 case fuse(data_types::f16, format::bfyx):
83 case fuse(data_types::f16, format::yxfb):
86 throw std::runtime_error("convolution_grad_weights weights format unsupported");
89 const auto& weights_size = weights_layout.size;
91 const auto& split = primitive->split();
92 const auto& stride = primitive->stride;
93 #if 0 // TODO: support dilation
94 const auto& dilation = primitive->dilation;
96 const tensor dilation = {0,0,1,1};
98 const auto depthwise_separable_opt = arg.get_depthwise_sep_opt();
99 const auto output_grad_w = arg.output_grad_w();
101 const auto& input_offset = primitive->input_offset;
103 auto conv_grad_weights_params = get_default_learning_params<kernel_selector::convolution_grad_weights_params>(arg, depthwise_separable_opt ? 1 : split);
104 auto conv_grad_weights_optional_params = get_default_learning_optional_params<kernel_selector::convolution_grad_weights_optional_params>(arg.get_program());
106 conv_grad_weights_params.depthwise_separable_opt = depthwise_separable_opt;
107 conv_grad_weights_params.output_grad_w = output_grad_w;
109 conv_grad_weights_params.gradient = true;
110 conv_grad_weights_params.inputs.push_back(convert_data_tensor(arg.get_dependency(1).get_output_layout()));
112 conv_grad_weights_params.split = split;
113 conv_grad_weights_params.filterSize = {
114 (uint32_t)weights_size.spatial[0],
115 (uint32_t)weights_size.spatial[1],
118 conv_grad_weights_params.padding = {
119 (uint32_t)std::max(-input_offset.spatial[0], 0),
120 (uint32_t)std::max(-input_offset.spatial[1], 0)
123 conv_grad_weights_params.stride = {
124 (uint32_t)stride.spatial[0],
125 (uint32_t)stride.spatial[1]
128 conv_grad_weights_params.dilation = {
129 (uint32_t)dilation.spatial[0],
130 (uint32_t)dilation.spatial[1]
133 auto& kernel_selector = kernel_selector::convolution_grad_weights_kernel_selector::Instance();
134 auto best_kernels = kernel_selector.GetBestKernels(conv_grad_weights_params, conv_grad_weights_optional_params);
136 CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
138 auto deconv = new convolution_grad_weights_gpu(arg, best_kernels[0]);
147 implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), convolution_grad_weights_gpu::create);
148 implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), convolution_grad_weights_gpu::create);
149 implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), convolution_grad_weights_gpu::create);
150 implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), convolution_grad_weights_gpu::create);
151 implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), convolution_grad_weights_gpu::create);
152 implementation_map<convolution_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), convolution_grad_weights_gpu::create);