Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / gpu / kernel.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18
19 #include <iterator>
20 #include "kernel.h"
21 #include "memory_gpu.h"
22
23 namespace cldnn { namespace gpu {
24
25 namespace {
26     inline cl::NDRange toNDRange(const std::vector<size_t>& v)
27     {
28         switch (v.size())
29         {
30         case 1:
31             return cl::NDRange(v[0]);
32         case 2:
33             return cl::NDRange(v[0], v[1]);
34         case 3:
35             return cl::NDRange(v[0], v[1], v[2]);
36         default:
37             return cl::NullRange;
38         }
39     }
40
41     void set_arguments(
42         cl::Kernel& kernel,
43         const kernel_selector::kernel_arguments& args,
44         const kernel::kernel_arguments_data& data)
45     {
46         for (uint32_t i = 0; i < static_cast<uint32_t>(args.size()); i++)
47         {
48             cl_int status = CL_INVALID_ARG_VALUE;
49
50             switch (args[i].t)
51             {
52             case kernel_selector::kernel_argument_types::INPUT:
53                 if (args[i].index < data.inputs.size() && data.inputs[args[i].index])
54                 {
55                     const auto& input_mem = data.inputs[args[i].index];
56                     if (input_mem)
57                     {
58                         status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*input_mem).get_buffer());
59                     }
60                 }
61                 break;
62             case kernel_selector::kernel_argument_types::INTERNAL_BUFFER:
63                 if (args[i].index < data.intermediates.size() && data.intermediates[args[i].index])
64                 {
65                     const auto& input_mem = data.intermediates[args[i].index];
66                     if (input_mem)
67                     {
68                         status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*input_mem).get_buffer());
69                     }
70                 }
71                 break;
72             case kernel_selector::kernel_argument_types::OUTPUT:
73                 if (data.output)
74                 {
75                     if (data.output->get_layout().format.is_image_2d())
76                         status = kernel.setArg(i, dynamic_cast<const gpu::gpu_image2d&>(*data.output).get_buffer());
77                     else
78                         status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.output).get_buffer());
79                 }
80                 break;
81             case kernel_selector::kernel_argument_types::WEIGHTS:
82                 if (data.weights)
83                 {
84                     if (data.weights->get_layout().format.is_image_2d())
85                         status = kernel.setArg(i, dynamic_cast<const gpu::gpu_image2d&>(*data.weights).get_buffer());
86                     else
87                         status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.weights).get_buffer());
88                 }
89                 break;
90             case kernel_selector::kernel_argument_types::BIAS:
91                 if (data.bias)
92                 {
93                     status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.bias).get_buffer());
94                 }
95                 break;
96             case kernel_selector::kernel_argument_types::PREV_WEIGHTS_GRADIENT:
97                 if (data.prev_weights_grad)
98                 {
99                     if (data.prev_weights_grad->get_layout().format.is_image_2d())
100                         status = kernel.setArg(i, dynamic_cast<const gpu::gpu_image2d&>(*data.prev_weights_grad).get_buffer());
101                     else
102                         status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.prev_weights_grad).get_buffer());
103                 }
104                 break;
105             case kernel_selector::kernel_argument_types::PREV_BIAS_GRADIENT:
106                 if (data.prev_bias_grad)
107                 {
108                     status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.prev_bias_grad).get_buffer());
109                 }
110                 break;
111             case kernel_selector::kernel_argument_types::WEIGHTS_QUANTIZATION_FACTORS:
112                 if (data.weights_quantization_factors)
113                 {
114                     status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.weights_quantization_factors).get_buffer());
115                 }
116                 break;
117             case kernel_selector::kernel_argument_types::OUTPUT_CALIBRATION_FACTORS:
118                 if (args[i].index == 0)
119                 {
120                     if (data.output_calibration_factors)
121                     {
122                         status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.output_calibration_factors).get_buffer());
123                     }
124                 }
125                 else
126                 {
127                     size_t new_idx = args[i].index - 1;
128                     if (new_idx < data.fused_op_calibration_factors.size() && data.fused_op_calibration_factors[new_idx])
129                     {
130                         status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.fused_op_calibration_factors[new_idx]).get_buffer());
131                     }
132                 }
133
134                 break;
135             case kernel_selector::kernel_argument_types::SCALE_TABLE:
136                 if (data.scale_table)
137                 {
138                     status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.scale_table).get_buffer());
139                 }
140                 break;
141             case kernel_selector::kernel_argument_types::SLOPE:
142                 if (data.slope)
143                 {
144                     status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.slope).get_buffer());
145                 }
146                 break;
147             case kernel_selector::kernel_argument_types::SPLIT:
148                 status = kernel.setArg(i, data.split);
149                 break;
150             case kernel_selector::kernel_argument_types::LEARNING_RATE:
151                 status = kernel.setArg(i, data.lr);
152                 break;
153             case kernel_selector::kernel_argument_types::SCALAR:
154                 if (data.scalars && args[i].index < data.scalars->size())
155                 {
156                     const auto& scalar = (*data.scalars)[args[i].index];
157                     switch (scalar.t)
158                     {
159                     case kernel_selector::kernel_scalar_argument_types::UINT8:
160                         status = kernel.setArg(i, scalar.v.u8);
161                         break;
162                     case kernel_selector::kernel_scalar_argument_types::UINT16:
163                         status = kernel.setArg(i, scalar.v.u16);
164                         break;
165                     case kernel_selector::kernel_scalar_argument_types::UINT32:
166                         status = kernel.setArg(i, scalar.v.u32);
167                         break;
168                     case kernel_selector::kernel_scalar_argument_types::UINT64:
169                         status = kernel.setArg(i, scalar.v.u64);
170                         break;
171                     case kernel_selector::kernel_scalar_argument_types::INT8:
172                         status = kernel.setArg(i, scalar.v.s8);
173                         break;
174                     case kernel_selector::kernel_scalar_argument_types::INT16:
175                         status = kernel.setArg(i, scalar.v.s16);
176                         break;
177                     case kernel_selector::kernel_scalar_argument_types::INT32:
178                         status = kernel.setArg(i, scalar.v.s32);
179                         break;
180                     case kernel_selector::kernel_scalar_argument_types::INT64:
181                         status = kernel.setArg(i, scalar.v.s64);
182                         break;
183                     case kernel_selector::kernel_scalar_argument_types::FLOAT32:
184                         status = kernel.setArg(i, scalar.v.f32);
185                         break;
186                     case kernel_selector::kernel_scalar_argument_types::FLOAT64:
187                         status = kernel.setArg(i, scalar.v.f64);
188                         break;
189                     default:
190                         break;
191                     }
192                 }
193                 break;
194             case kernel_selector::kernel_argument_types::RECURRENT: // RNN/LSTM/GRU layers
195                 if (data.recurrent)
196                 {
197                     if (data.recurrent->get_layout().format.is_image_2d())
198                         status = kernel.setArg(i, dynamic_cast<const gpu::gpu_image2d&>(*data.recurrent).get_buffer());
199                     else
200                         status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.recurrent).get_buffer());
201                 }
202                 break;
203             case kernel_selector::kernel_argument_types::HIDDEN: // RNN/LSTM/GRU layers
204                 if (data.hidden)
205                 {
206                     if (data.hidden->get_layout().format.is_image_2d())
207                         status = kernel.setArg(i, dynamic_cast<const gpu::gpu_image2d&>(*data.hidden).get_buffer());
208                     else
209                         status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.hidden).get_buffer());
210                 }
211                 break;
212             case kernel_selector::kernel_argument_types::CELL: // LSTMlayers
213                 if (data.cell)
214                 {
215                     if (data.cell->get_layout().format.is_image_2d())
216                         status = kernel.setArg(i, dynamic_cast<const gpu::gpu_image2d&>(*data.cell).get_buffer());
217                     else
218                         status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.cell).get_buffer());
219                 }
220                 break;
221             default:
222                 break;
223             }
224
225             if (status != CL_SUCCESS)
226             {
227                 throw std::runtime_error("Error set args\n");
228             }
229         }
230     }
231 }
232
233 event_impl::ptr kernel::run(
234     const kernel_selector::cl_kernel_data& kernel_data,
235     const std::vector<event_impl::ptr>& dependencies,
236     const kernel_arguments_data& args) const
237 {
238     auto clkernel = context()->get_kernels_cache().get_kernel(_kernel_id, _one_time_kernel);
239     try {
240         set_arguments(clkernel, kernel_data.arguments, args);
241     }
242     catch (cl::Error const& err) {
243         throw ocl_error(err);
244     }
245
246     return context()->enqueue_kernel(clkernel, toNDRange(kernel_data.workGroups.global), toNDRange(kernel_data.workGroups.local), dependencies);
247 }
248
249 } }