IVGCVSW-3296 Add CL backend support for ResizeNearestNeighbour
[platform/upstream/armnn.git] / src / backends / cl / ClWorkloadFactory.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include "ClWorkloadFactory.hpp"
6 #include "ClBackendId.hpp"
7
8 #include <Layer.hpp>
9
10 #include <armnn/Exceptions.hpp>
11 #include <armnn/Utils.hpp>
12
13 #include <backendsCommon/CpuTensorHandle.hpp>
14 #include <backendsCommon/MakeWorkloadHelper.hpp>
15 #include <backendsCommon/MemCopyWorkload.hpp>
16
17 #include <cl/ClTensorHandle.hpp>
18 #include <cl/workloads/ClWorkloads.hpp>
19 #include <cl/workloads/ClWorkloadUtils.hpp>
20
21 #include <arm_compute/core/CL/CLKernelLibrary.h>
22 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
23 #include <arm_compute/runtime/CL/CLScheduler.h>
24
25 #include <boost/polymorphic_cast.hpp>
26 #include <boost/format.hpp>
27 #include <boost/log/trivial.hpp>
28
29 namespace armnn
30 {
31
32 namespace
33 {
34 static const BackendId s_Id{ClBackendId()};
35 }
36
37 bool ClWorkloadFactory::IsLayerSupported(const Layer& layer,
38                                          Optional<DataType> dataType,
39                                          std::string& outReasonIfUnsupported)
40 {
41     return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
42 }
43
44 const BackendId& ClWorkloadFactory::GetBackendId() const
45 {
46     return s_Id;
47 }
48
49 template <typename FloatWorkload, typename Uint8Workload, typename QueueDescriptorType, typename... Args>
50 std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor,
51                                                            const WorkloadInfo& info,
52                                                            Args&&... args)
53 {
54     try
55     {
56         return MakeWorkloadHelper<FloatWorkload, Uint8Workload>(descriptor, info, std::forward<Args>(args)...);
57     }
58     catch (const cl::Error& clError)
59     {
60         throw WrapClError(clError, CHECK_LOCATION());
61     }
62 }
63
64 template <typename Workload, typename QueueDescriptorType, typename... Args>
65 std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor,
66                                                            const WorkloadInfo& info,
67                                                            Args&&... args)
68 {
69     try
70     {
71         return std::make_unique<Workload>(descriptor, info, std::forward<Args>(args)...);
72     }
73     catch (const cl::Error& clError)
74     {
75         throw WrapClError(clError, CHECK_LOCATION());
76     }
77 }
78
79 ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager)
80     : m_MemoryManager(memoryManager)
81 {
82 }
83
84 std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
85 {
86     std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo);
87     tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
88
89     return tensorHandle;
90 }
91
92 std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
93                                                                      DataLayout dataLayout) const
94 {
95     std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo, dataLayout);
96     tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
97
98     return tensorHandle;
99 }
100
101 std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle&      parent,
102                                                                         TensorShape const&   subTensorShape,
103                                                                         unsigned int const* subTensorOrigin) const
104 {
105     arm_compute::Coordinates coords;
106     arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
107
108     coords.set_num_dimensions(subTensorShape.GetNumDimensions());
109     for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
110     {
111         // Arm compute indexes tensor coords in reverse order.
112         unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
113         coords.set(i, boost::numeric_cast<int>(subTensorOrigin[revertedIndex]));
114     }
115
116     const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.GetShape());
117     if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape))
118     {
119         return nullptr;
120     }
121
122     return std::make_unique<ClSubTensorHandle>(
123         boost::polymorphic_downcast<IClTensorHandle*>(&parent), shape, coords);
124 }
125
126 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
127                                                           const WorkloadInfo& info) const
128 {
129     return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
130 }
131
132 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
133                                                            const WorkloadInfo& info) const
134 {
135     return MakeWorkloadHelper<CopyMemGenericWorkload, CopyMemGenericWorkload, CopyMemGenericWorkload, NullWorkload,
136         CopyMemGenericWorkload>(descriptor, info);
137 }
138
139 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
140                                                                const WorkloadInfo&              info) const
141 {
142     return MakeWorkload<ClActivationWorkload>(descriptor, info);
143 }
144
145 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
146                                                             const WorkloadInfo&           info) const
147 {
148     return MakeWorkload<ClSoftmaxFloatWorkload, ClSoftmaxUint8Workload>(descriptor, info,
149                                                                         m_MemoryManager->GetIntraLayerManager());
150 }
151
152 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
153                                                              const WorkloadInfo&            info) const
154 {
155     return MakeWorkload<ClSplitterWorkload>(descriptor, info);
156 }
157
158 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor,
159                                                                   const WorkloadInfo&          info) const
160 {
161     return CreateConcat(descriptor, info);
162 }
163
164 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateFullyConnected(
165     const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const
166 {
167     return MakeWorkload<ClFullyConnectedWorkload, ClFullyConnectedWorkload>(descriptor, info,
168                                                                             m_MemoryManager->GetIntraLayerManager());
169 }
170
171 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
172                                                                    const WorkloadInfo&           info) const
173 {
174     return MakeWorkload<ClPermuteWorkload>(descriptor, info);
175 }
176
177 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
178                                                                      const WorkloadInfo&           info) const
179 {
180     return MakeWorkload<ClPooling2dWorkload>(descriptor, info);
181 }
182
183 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreatePrelu(const armnn::PreluQueueDescriptor &descriptor,
184                                                                  const armnn::WorkloadInfo &info) const
185 {
186     return MakeWorkload<ClPreluWorkload>(descriptor, info);
187 }
188
189 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
190                                                                          const WorkloadInfo&               info) const
191 {
192     return MakeWorkload<ClConvolution2dWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
193 }
194
195 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthwiseConvolution2d(
196     const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
197 {
198     return MakeWorkload<ClDepthwiseConvolutionWorkload>(descriptor, info);
199 }
200
201 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDetectionPostProcess(
202     const armnn::DetectionPostProcessQueueDescriptor& descriptor, const armnn::WorkloadInfo& info) const
203 {
204     return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
205 }
206
207 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor,
208                                                                const WorkloadInfo& info) const
209 {
210     return std::make_unique<ClDequantizeWorkload>(descriptor, info);
211 }
212
213 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor,
214                                                                          const WorkloadInfo&                 info) const
215 {
216     return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(descriptor, info);
217 }
218
219 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
220                                                                     const WorkloadInfo&            info) const
221 {
222     return MakeWorkload<ClAdditionWorkload>(descriptor, info);
223 }
224
225 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMultiplication(
226     const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const
227 {
228     return MakeWorkload<ClMultiplicationWorkload>(descriptor, info);
229 }
230
231 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateDivision(
232     const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const
233 {
234     return MakeWorkload<ClDivisionFloatWorkload, NullWorkload>(descriptor, info);
235 }
236
237 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor,
238                                                                        const WorkloadInfo& info) const
239 {
240     return MakeWorkload<ClSubtractionWorkload>(descriptor, info);
241 }
242
243 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateBatchNormalization(
244     const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
245 {
246     return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info);
247 }
248
249 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
250                                                                    const WorkloadInfo& info) const
251 {
252     if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
253     {
254         throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload");
255     }
256
257     return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
258 }
259
260 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
261                                                                   const WorkloadInfo& info) const
262 {
263     return MakeWorkload<ClResizeWorkload>(descriptor, info);
264 }
265
266 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateResizeBilinear(
267     const ResizeBilinearQueueDescriptor& descriptor,
268     const WorkloadInfo& info) const
269 {
270     ResizeQueueDescriptor resizeDescriptor;
271     resizeDescriptor.m_Inputs  = descriptor.m_Inputs;
272     resizeDescriptor.m_Outputs = descriptor.m_Outputs;
273
274     resizeDescriptor.m_Parameters.m_Method       = ResizeMethod::Bilinear;
275     resizeDescriptor.m_Parameters.m_DataLayout   = descriptor.m_Parameters.m_DataLayout;
276     resizeDescriptor.m_Parameters.m_TargetHeight = descriptor.m_Parameters.m_TargetHeight;
277     resizeDescriptor.m_Parameters.m_TargetWidth  = descriptor.m_Parameters.m_TargetWidth;
278
279     return CreateResize(resizeDescriptor, info);
280 }
281
282 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFakeQuantization(
283     const FakeQuantizationQueueDescriptor& descriptor,
284     const WorkloadInfo& info) const
285 {
286     return nullptr;
287 }
288
289 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor,
290                                                                     const WorkloadInfo& info) const
291 {
292     return std::make_unique<ClQuantizeWorkload>(descriptor, info);
293 }
294
295 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
296     const WorkloadInfo& info) const
297 {
298     return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(descriptor, info);
299 }
300
301 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor,
302                                                                   const WorkloadInfo&          info) const
303 {
304     return MakeWorkload<ClConcatWorkload>(descriptor, info);
305 }
306
307 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
308     const WorkloadInfo& info) const
309 {
310     return MakeWorkload<ClConstantWorkload>(descriptor, info);
311 }
312
313 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
314     const WorkloadInfo& info) const
315 {
316     return MakeWorkload<ClReshapeWorkload>(descriptor, info);
317 }
318
319 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
320     const WorkloadInfo& info) const
321 {
322     return MakeWorkload<ClSpaceToBatchNdWorkload>(descriptor, info);
323 }
324
325 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
326     const WorkloadInfo& info) const
327 {
328     return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(descriptor, info);
329 }
330
331 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
332     const WorkloadInfo& info) const
333 {
334     return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(descriptor, info);
335 }
336
337 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp16ToFp32(
338     const ConvertFp16ToFp32QueueDescriptor& descriptor,
339     const WorkloadInfo& info) const
340 {
341     return MakeWorkload<ClConvertFp16ToFp32Workload>(descriptor, info);
342 }
343
344 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp32ToFp16(
345     const ConvertFp32ToFp16QueueDescriptor& descriptor,
346     const WorkloadInfo& info) const
347 {
348     return MakeWorkload<ClConvertFp32ToFp16Workload>(descriptor, info);
349 }
350
351 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor,
352                                                             const WorkloadInfo& info) const
353 {
354     return MakeWorkload<ClMaximumWorkload>(descriptor, info);
355 }
356
357 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
358                                                          const WorkloadInfo& info) const
359 {
360     return std::make_unique<ClMeanWorkload>(descriptor, info);
361 }
362
363 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
364                                                         const WorkloadInfo& info) const
365 {
366     return MakeWorkload<ClPadWorkload>(descriptor, info);
367 }
368
369 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateEqual(const EqualQueueDescriptor& descriptor,
370                                                                const WorkloadInfo& info) const
371 {
372     return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
373 }
374
375 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
376                                                                    const WorkloadInfo& info) const
377 {
378     return MakeWorkload<ClBatchToSpaceNdWorkload>(descriptor, info);
379 }
380
381 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
382                                                                  const WorkloadInfo& info) const
383 {
384     return MakeWorkload<ClStridedSliceWorkload>(descriptor, info);
385 }
386
387 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor,
388                                                             const WorkloadInfo& info) const
389 {
390     return MakeWorkload<ClMinimumWorkload>(descriptor, info);
391 }
392
393 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateGreater(const GreaterQueueDescriptor& descriptor,
394                                                             const WorkloadInfo& info) const
395 {
396     return MakeWorkload<ClGreaterFloat32Workload, ClGreaterUint8Workload>(descriptor, info);
397 }
398
399 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
400                                                           const WorkloadInfo& info) const
401 {
402     return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
403 }
404
405 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateRsqrt(const RsqrtQueueDescriptor& descriptor,
406                                                           const WorkloadInfo& info) const
407 {
408     return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
409 }
410
411 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
412                                                                 const WorkloadInfo& info) const
413 {
414     return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
415 }
416
417 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateGather(const armnn::GatherQueueDescriptor& descriptor,
418                                                            const armnn::WorkloadInfo& info) const
419 {
420     return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
421 }
422
423 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateTransposeConvolution2d(
424     const TransposeConvolution2dQueueDescriptor& descriptor,
425     const WorkloadInfo& info) const
426 {
427     return MakeWorkload<ClTransposeConvolution2dWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
428 }
429
430 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
431                                                                  const WorkloadInfo& info) const
432 {
433     return MakeWorkload<ClSpaceToDepthWorkload>(descriptor, info);
434 }
435
436 } // namespace armnn