2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
21 #include "memory_gpu.h"
23 namespace cldnn { namespace gpu {
26 inline cl::NDRange toNDRange(const std::vector<size_t>& v)
31 return cl::NDRange(v[0]);
33 return cl::NDRange(v[0], v[1]);
35 return cl::NDRange(v[0], v[1], v[2]);
43 const kernel_selector::kernel_arguments& args,
44 const kernel::kernel_arguments_data& data)
46 for (uint32_t i = 0; i < static_cast<uint32_t>(args.size()); i++)
48 cl_int status = CL_INVALID_ARG_VALUE;
52 case kernel_selector::kernel_argument_types::INPUT:
53 if (args[i].index < data.inputs.size() && data.inputs[args[i].index])
55 const auto& input_mem = data.inputs[args[i].index];
58 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*input_mem).get_buffer());
62 case kernel_selector::kernel_argument_types::INTERNAL_BUFFER:
63 if (args[i].index < data.intermediates.size() && data.intermediates[args[i].index])
65 const auto& input_mem = data.intermediates[args[i].index];
68 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*input_mem).get_buffer());
72 case kernel_selector::kernel_argument_types::OUTPUT:
75 if (data.output->get_layout().format.is_image_2d())
76 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_image2d&>(*data.output).get_buffer());
78 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.output).get_buffer());
81 case kernel_selector::kernel_argument_types::WEIGHTS:
84 if (data.weights->get_layout().format.is_image_2d())
85 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_image2d&>(*data.weights).get_buffer());
87 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.weights).get_buffer());
90 case kernel_selector::kernel_argument_types::BIAS:
93 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.bias).get_buffer());
96 case kernel_selector::kernel_argument_types::PREV_WEIGHTS_GRADIENT:
97 if (data.prev_weights_grad)
99 if (data.prev_weights_grad->get_layout().format.is_image_2d())
100 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_image2d&>(*data.prev_weights_grad).get_buffer());
102 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.prev_weights_grad).get_buffer());
105 case kernel_selector::kernel_argument_types::PREV_BIAS_GRADIENT:
106 if (data.prev_bias_grad)
108 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.prev_bias_grad).get_buffer());
111 case kernel_selector::kernel_argument_types::WEIGHTS_QUANTIZATION_FACTORS:
112 if (data.weights_quantization_factors)
114 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.weights_quantization_factors).get_buffer());
117 case kernel_selector::kernel_argument_types::OUTPUT_CALIBRATION_FACTORS:
118 if (args[i].index == 0)
120 if (data.output_calibration_factors)
122 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.output_calibration_factors).get_buffer());
127 size_t new_idx = args[i].index - 1;
128 if (new_idx < data.fused_op_calibration_factors.size() && data.fused_op_calibration_factors[new_idx])
130 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.fused_op_calibration_factors[new_idx]).get_buffer());
135 case kernel_selector::kernel_argument_types::SCALE_TABLE:
136 if (data.scale_table)
138 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.scale_table).get_buffer());
141 case kernel_selector::kernel_argument_types::SLOPE:
144 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.slope).get_buffer());
147 case kernel_selector::kernel_argument_types::SPLIT:
148 status = kernel.setArg(i, data.split);
150 case kernel_selector::kernel_argument_types::LEARNING_RATE:
151 status = kernel.setArg(i, data.lr);
153 case kernel_selector::kernel_argument_types::SCALAR:
154 if (data.scalars && args[i].index < data.scalars->size())
156 const auto& scalar = (*data.scalars)[args[i].index];
159 case kernel_selector::kernel_scalar_argument_types::UINT8:
160 status = kernel.setArg(i, scalar.v.u8);
162 case kernel_selector::kernel_scalar_argument_types::UINT16:
163 status = kernel.setArg(i, scalar.v.u16);
165 case kernel_selector::kernel_scalar_argument_types::UINT32:
166 status = kernel.setArg(i, scalar.v.u32);
168 case kernel_selector::kernel_scalar_argument_types::UINT64:
169 status = kernel.setArg(i, scalar.v.u64);
171 case kernel_selector::kernel_scalar_argument_types::INT8:
172 status = kernel.setArg(i, scalar.v.s8);
174 case kernel_selector::kernel_scalar_argument_types::INT16:
175 status = kernel.setArg(i, scalar.v.s16);
177 case kernel_selector::kernel_scalar_argument_types::INT32:
178 status = kernel.setArg(i, scalar.v.s32);
180 case kernel_selector::kernel_scalar_argument_types::INT64:
181 status = kernel.setArg(i, scalar.v.s64);
183 case kernel_selector::kernel_scalar_argument_types::FLOAT32:
184 status = kernel.setArg(i, scalar.v.f32);
186 case kernel_selector::kernel_scalar_argument_types::FLOAT64:
187 status = kernel.setArg(i, scalar.v.f64);
194 case kernel_selector::kernel_argument_types::RECURRENT: // RNN/LSTM/GRU layers
197 if (data.recurrent->get_layout().format.is_image_2d())
198 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_image2d&>(*data.recurrent).get_buffer());
200 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.recurrent).get_buffer());
203 case kernel_selector::kernel_argument_types::HIDDEN: // RNN/LSTM/GRU layers
206 if (data.hidden->get_layout().format.is_image_2d())
207 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_image2d&>(*data.hidden).get_buffer());
209 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.hidden).get_buffer());
212 case kernel_selector::kernel_argument_types::CELL: // LSTMlayers
215 if (data.cell->get_layout().format.is_image_2d())
216 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_image2d&>(*data.cell).get_buffer());
218 status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.cell).get_buffer());
225 if (status != CL_SUCCESS)
227 throw std::runtime_error("Error set args\n");
233 event_impl::ptr kernel::run(
234 const kernel_selector::cl_kernel_data& kernel_data,
235 const std::vector<event_impl::ptr>& dependencies,
236 const kernel_arguments_data& args) const
238 auto clkernel = context()->get_kernels_cache().get_kernel(_kernel_id, _one_time_kernel);
240 set_arguments(clkernel, kernel_data.arguments, args);
242 catch (cl::Error const& err) {
243 throw ocl_error(err);
246 return context()->enqueue_kernel(clkernel, toNDRange(kernel_data.workGroups.global), toNDRange(kernel_data.workGroups.local), dependencies);