Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / gpu / broadcast_gpu.cpp
1 // Copyright (c) 2018 Intel Corporation
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15
16 #include "broadcast_inst.h"
17
18 #include "primitive_gpu_base.h"
19 #include "implementation_map.h"
20 #include "kernel_selector_helper.h"
21 #include "broadcast/broadcast_kernel_selector.h"
22 #include "broadcast/broadcast_kernel_base.h"
23 #include "error_handler.h"
24
25 namespace cldnn { namespace gpu {
26
27 struct broadcast_gpu : typed_primitive_gpu_impl<broadcast>
28 {
29     using parent = typed_primitive_gpu_impl<broadcast>;
30     using parent::parent;
31
32
33     static primitive_impl* create(const broadcast_node& arg)
34     { 
35         auto bc_params          = get_default_params<kernel_selector::broadcast_params>(arg, 1);
36         auto bc_optional_params = get_default_optional_params<kernel_selector::broadcast_optional_params>(arg.get_program());
37
38         const auto& broadcast_axes = arg.get_primitive()->broadcast_axes;
39         uint16_t index = (uint16_t) 0;
40         uint16_t input_index = (uint16_t) broadcast_axes.size();
41
42         //bfyx format
43         for (size_t i = 0; i < 4; ++i)
44         {
45             if (std::find(broadcast_axes.begin(), broadcast_axes.end(), i) != broadcast_axes.end())
46             {
47                 bc_params.input_order.push_back(index);
48                 ++index;
49             }
50             else
51             {
52                 bc_params.input_order.push_back(input_index);
53                 ++input_index;
54             }
55         }
56
57         auto& kernel_selector = kernel_selector::broadcast_kernel_selector::Instance();
58         auto best_kernels = kernel_selector.GetBestKernels(bc_params, bc_optional_params);
59
60         CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
61
62         return new broadcast_gpu(arg, best_kernels[0]);
63     }
64 };
65
66 namespace {
67     struct attach {
68         attach() {
69             auto val_fw = broadcast_gpu::create;
70
71             implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
72             implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
73             implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i8,  format::bfyx), val_fw);
74             implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::u8,  format::bfyx), val_fw);
75             implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
76             implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
77         }
78         ~attach() = default;
79     };
80
81     attach attach_impl;
82
83 }
84 } }