Publishing R3
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / gpu / scale_gpu.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include "scale_inst.h"
18 #include "primitive_gpu_base.h"
19 #include "implementation_map.h"
20 #include "kernel_selector_helper.h"
21 #include "eltwise/eltwise_kernel_selector.h"
22 #include "eltwise/eltwise_kernel_base.h"
23 #include "error_handler.h"
24
25 using namespace cldnn;
26
27 namespace cldnn { namespace gpu {
28
29
30 struct scale_gpu : typed_primitive_gpu_impl<scale>
31 {
32     using parent = typed_primitive_gpu_impl<scale>;
33     using parent::parent;
34
35 protected:
36
37     virtual kernel::kernel_arguments_data get_arguments(typed_primitive_inst<scale>& instance, int32_t) const override
38     {
39         kernel::kernel_arguments_data args;
40         args.inputs = { &instance.input_memory(), &instance.scale_memory() };
41         args.output = &instance.output_memory();
42
43         if (_outer.bias_term())
44         {
45             args.inputs.push_back(&instance.bias_memory());
46         }
47         return args;
48     }
49
50 public:
51
52     static primitive_impl* create(const scale_node& arg) 
53     { 
54         auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
55         auto ew_optional_params = get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
56
57         ew_params.inputs.push_back(convert_data_tensor(arg.scale_in().get_output_layout()));
58
59         ew_params.eltwiseParams.operations.push_back({
60             { kernel_selector::eltwise_params::InputType::Buffer(0), kernel_selector::eltwise_params::InputType::Buffer(1) },
61             kernel_selector::eltwise_mode::MUL });
62
63         if (arg.bias_term())
64         {
65             ew_params.inputs.push_back(convert_data_tensor(arg.bias().get_output_layout()));
66             ew_params.eltwiseParams.operations.push_back({
67                 { kernel_selector::eltwise_params::InputType::Intermediate(0), kernel_selector::eltwise_params::InputType::Buffer(2) },
68                 kernel_selector::eltwise_mode::ADD });
69         }
70
71         ew_params.eltwiseParams.layoutBased = true;
72
73         auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
74         auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
75
76         CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
77
78         auto scale = new scale_gpu(arg, best_kernels[0]);
79
80         return scale;
81     }
82 };
83
84 namespace {
85     struct attach {
86         attach() {
87             auto val_fw = scale_gpu::create;
88
89             implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
90             implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
91             implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
92             implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
93             implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
94             implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
95         }
96         ~attach() {}
97     };
98     attach attach_impl;
99 }
100 } }