2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include "deconvolution_inst.h"
18 #include "primitive_gpu_base.h"
19 #include "implementation_map.h"
20 #include "error_handler.h"
21 #include "kernel_selector_helper.h"
22 #include "deconvolution/deconvolution_kernel_selector.h"
23 #include "deconvolution/deconvolution_kernel_base.h"
25 namespace cldnn { namespace gpu {
27 struct deconvolution_gpu : typed_primitive_gpu_impl<deconvolution>
29 using parent = typed_primitive_gpu_impl<deconvolution>;
34 // TODO: share it with convolution and fully connected
35 virtual bool validate_impl(const typed_primitive_inst<deconvolution>& instance) const override
39 CLDNN_ERROR_NOT_EQUAL(_outer.id(), "deconvolution filling value", _outer.get_output_layout().data_padding.filling_value(), "padding mode", 0.0f, "Unknown padding mode in deconvolution.");
40 // Check whether all memory elements use the same unit type (FP16 or FP32).
41 CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input memory", instance.input_memory().get_layout().data_type, "output memory", instance.output_memory().get_layout().data_type, "");
42 CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input memory", instance.input_memory().get_layout().data_type, "filter memory", instance.weights_memory(0).get_layout().data_type, "");
47 virtual kernel::kernel_arguments_data get_arguments(typed_primitive_inst<deconvolution>& instance, int32_t split) const override
49 kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
50 auto* desc = static_cast<const deconvolution*>(instance.desc().get());
51 int dep_size = (int)(desc->weights.size() + desc->bias.size() + 1);
53 args.weights = &instance.weights_memory(split);
54 args.bias = instance.bias_term() ? &instance.bias_memory(split) : nullptr;
56 if (int(instance.dependencies().size()) > dep_size)
57 args.inputs.emplace_back(&instance.dep_memory(dep_size));
62 virtual int32_t get_split() const override
64 return _outer.get_split();
67 virtual uint32_t get_groups() const override
69 return _outer.get_groups();
74 static primitive_impl* create(const deconvolution_node& arg)
76 const auto& primitive = arg.get_primitive();
77 const auto& weights_layout = arg.weights(0).get_output_layout();
79 switch (weights_layout.fused_format())
82 case fuse(data_types::f32, format::bfyx):
83 case fuse(data_types::f32, format::yxfb):
84 case fuse(data_types::f16, format::bfyx):
85 case fuse(data_types::f16, format::yxfb):
88 throw std::runtime_error("deconvolution weights format unsupported");
91 const auto& weights_size = weights_layout.size;
93 const auto& split = primitive->split();
94 const auto& stride = primitive->stride;
95 #if 0 // TODO: support dilation
96 const auto& dilation = primitive->dilation;
98 const tensor dilation = {0,0,1,1};
100 const auto depthwise_separable_opt = arg.get_depthwise_sep_opt();
101 const auto actual_split = depthwise_separable_opt ? (decltype(split))1 : split;
103 const auto& input_offset = primitive->input_offset;
104 const auto& groups = primitive->groups;
106 auto deconv_params = get_weights_bias_default_params<kernel_selector::deconvolution_params>(arg, (groups > 1 && !depthwise_separable_opt) ? groups : actual_split, groups);
107 auto deconv_optional_params = get_default_weights_bias_optional_params<kernel_selector::deconvolution_optional_params>(arg.get_program());
109 if(primitive->with_activation)
110 convert_activation_func_params(primitive, deconv_params.activation);
112 deconv_params.depthwise_separable_opt = depthwise_separable_opt;
114 deconv_params.split = split;
115 deconv_params.groups = groups;
116 deconv_params.filterSize = {
117 (uint32_t)weights_size.spatial[0],
118 (uint32_t)weights_size.spatial[1],
121 deconv_params.padding = {
122 (uint32_t)std::max(-input_offset.spatial[0], 0),
123 (uint32_t)std::max(-input_offset.spatial[1], 0)
126 deconv_params.stride = {
127 (uint32_t)stride.spatial[0],
128 (uint32_t)stride.spatial[1]
131 deconv_params.dilation = {
132 (uint32_t)dilation.spatial[0],
133 (uint32_t)dilation.spatial[1]
136 deconv_params.gradient = primitive->gradient();
138 if (arg.get_dependencies().size() > primitive->weights.size() + primitive->bias.size() + 1)
140 deconv_params.fused_eltwise = true;
141 deconv_params.inputs.push_back(convert_data_tensor(arg.fused_sum().get_output_layout()));
144 auto& kernel_selector = kernel_selector::deconvolution_kernel_selector::Instance();
145 auto best_kernels = kernel_selector.GetBestKernels(deconv_params, deconv_optional_params);
147 CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with these arguments");
148 auto deconv = new deconvolution_gpu(arg, best_kernels[0]);
157 implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), deconvolution_gpu::create);
158 implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), deconvolution_gpu::create);
159 implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), deconvolution_gpu::create);
160 implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), deconvolution_gpu::create);
161 implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), deconvolution_gpu::create);
162 implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), deconvolution_gpu::create);