7a903f6aa5dee8610be517f152e4b790fb9fc0cb
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / gpu / activation_gpu.cpp
1 /*
2 // Copyright (c) 2016-2019 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include "activation_inst.h"
18 #include "primitive_gpu_base.h"
19 #include "implementation_map.h"
20 #include "error_handler.h"
21 #include "kernel_selector_helper.h"
22 #include "activation/activation_kernel_selector.h"
23 #include "activation/activation_kernel_base.h"
24 #include "api/CPP/activation.hpp"
25
26 namespace cldnn {
27 namespace gpu {
28
29 struct activation_gpu : typed_primitive_gpu_impl<activation> {
30     using parent = typed_primitive_gpu_impl<activation>;
31     using parent::parent;
32
33     kernel::kernel_arguments_data get_arguments(typed_primitive_inst<activation>& instance,
34                                                         int32_t split) const override {
35         kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
36
37         if (_outer.is_parameterized()) {
38             args.slope = (memory_impl::cptr) &instance.slope_memory();
39         }
40
41         return args;
42     }
43
44     static primitive_impl* create(const activation_node& arg) {
45         auto activation_params = get_default_params<kernel_selector::activation_params>(arg);
46         auto activation_optional_params =
47             get_default_optional_params<kernel_selector::activation_optional_params>(arg.get_program());
48
49         convert_new_activation_func(arg.get_primitive(), activation_params.activation);
50
51         if (arg.is_parameterized()) {
52             const auto& slope_layout = arg.slope_input().get_output_layout();
53             const auto& output_layout = arg.get_output_layout();
54
55             const auto params_num =
56                 kernel_selector::GetActivationAdditionalParamsNumber(activation_params.activation.function);
57
58             CLDNN_ERROR_LESS_THAN(arg.id(),
59                                   "Slope layout size count",
60                                   slope_layout.size.count(),
61                                   "output_layout.size.feature[0] * params_num",
62                                   static_cast<size_t>(output_layout.size.feature[0] * params_num),
63                                   "Error - not enough data inside additional params buffer");
64
65             activation_params.inputActivationParams.push_back(convert_data_tensor(slope_layout));
66         }
67
68         auto& kernel_selector = kernel_selector::activation_kernel_selector::Instance();
69         auto best_kernels = kernel_selector.GetBestKernels(activation_params, activation_optional_params);
70         CLDNN_ERROR_BOOL(arg.id(),
71                          "Best_kernel.empty()",
72                          best_kernels.empty(),
73                          "Cannot find a proper kernel with this arguments");
74
75         auto activation = new activation_gpu(arg, best_kernels[0]);
76
77         return activation;
78     }
79 };
80
81 namespace {
82 struct attach {
83     attach() {
84         auto val_fw = activation_gpu::create;
85
86         implementation_map<activation>::add({
87             {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
88             {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
89             {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
90             {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
91             {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
92             {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
93             {std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw},
94             {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw},
95             {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw},
96             {std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw},
97             {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw},
98             {std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw},
99             // block f16 format
100             {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx_f16), val_fw},
101             {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx_f16), val_fw},
102             // 3D
103             {std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw},
104             {std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw},
105             {std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw},
106         });
107     }
108     ~attach() {}
109 };
110 attach attach_impl;
111 }  // namespace
112 }  // namespace gpu
113 }  // namespace cldnn