4c4dd08f3656cf2ee50c6392a84a741ae81e1dd0
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / gpu / reorder_gpu.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include "reorder_inst.h"
18 #include "primitive_gpu_base.h"
19 #include "implementation_map.h"
20 #include "kernel_selector_helper.h"
21 #include "reorder/reorder_kernel_selector.h"
22 #include "reorder/reorder_kernel_base.h"
23 #include "error_handler.h"
24
25 namespace cldnn {
26 namespace gpu {
27
28 struct reorder_gpu : typed_primitive_gpu_impl<reorder> {
29     using parent = typed_primitive_gpu_impl<reorder>;
30     using parent::parent;
31
32 protected:
33     bool optimized_out(reorder_inst& instance) const override {
34         return parent::optimized_out(instance) || _outer.can_be_optimized();
35     }
36
37     kernel::kernel_arguments_data get_arguments(reorder_inst& instance, int32_t split) const override {
38         kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
39
40         if (_outer.has_mean()) {
41             args.bias = (memory_impl::cptr) &instance.mean_memory();
42         }
43
44         return args;
45     }
46
47 public:
48     static primitive_impl* create(const reorder_node& arg) {
49         auto&& input_layout = arg.input().get_output_layout();
50         auto&& output_layout = arg.get_output_layout();
51
52         auto reorder_params = get_default_params<kernel_selector::reorder_params>(arg);
53         auto reorder_optional_params =
54             get_default_optional_params<kernel_selector::reorder_optional_params>(arg.get_program());
55
56         if (arg.get_output_layout().data_padding) {
57             reorder_params.has_padded_output = true;
58         }
59
60         if (arg.has_mean()) {
61             const auto& mean_layout = arg.mean().get_output_layout();
62             reorder_params.mean = convert_data_tensor(mean_layout);
63             reorder_params.mode = kernel_selector::mean_subtruct_mode::IN_BUFFER;
64         } else if (arg.get_primitive()->subtract_per_feature.empty() == false) {
65             reorder_params.mode = kernel_selector::mean_subtruct_mode::INSIDE_PARAMS;
66             reorder_params.meanValues = arg.get_primitive()->subtract_per_feature;
67         } else {
68             reorder_params.mode = kernel_selector::mean_subtruct_mode::NONE;
69         }
70
71         if (reorder_params.mode != kernel_selector::mean_subtruct_mode::NONE) {
72             switch (arg.get_primitive()->mean_mode) {
73                 case cldnn_reorder_mean_mode::mean_none:
74                     reorder_params.mean_op = kernel_selector::mean_op::NONE;
75                     break;
76                 case cldnn_reorder_mean_mode::mean_mul:
77                     reorder_params.mean_op = kernel_selector::mean_op::MUL;
78                     break;
79                 case cldnn_reorder_mean_mode::mean_subtract:
80                     reorder_params.mean_op = kernel_selector::mean_op::SUB;
81                     break;
82                 case cldnn_reorder_mean_mode::mean_div:
83                     reorder_params.mean_op = kernel_selector::mean_op::DIV;
84                     break;
85                 default:
86                     throw std::out_of_range(arg.id() + ": unsupported mean_mode value.");
87             }
88         }
89
90         if (output_layout.format == format::winograd_2x3_s1_data) {
91             reorder_params.winograd_input_offset_x = arg.get_input_offset().spatial[0];
92             reorder_params.winograd_input_offset_y = arg.get_input_offset().spatial[1];
93             reorder_params.winograd_nr_tiles_x = ceil_div(output_layout.size.spatial[0], 4);
94         }
95
96         reorder_params.winograd = input_layout.format.is_winograd() || output_layout.format.is_winograd();
97
98         auto& kernel_selector = kernel_selector::reorder_kernel_selector::Instance();
99         auto best_kernels = kernel_selector.GetBestKernels(reorder_params, reorder_optional_params);
100
101         CLDNN_ERROR_BOOL(arg.id(),
102                          "Best_kernel.empty()",
103                          best_kernels.empty(),
104                          "Cannot find a proper kernel with this arguments");
105
106         auto reorder = new reorder_gpu(arg, best_kernels[0]);
107
108         return reorder;
109     }
110 };
111
112 namespace {
113 struct attach {
114     attach() { implementation_map<reorder>::add({{engine_types::ocl, reorder_gpu::create}}); }
115     ~attach() {}
116 };
117
118 attach attach_impl;
119
120 }  // namespace
121 }  // namespace gpu
122 }  // namespace cldnn