2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // See LICENSE file in the project root for full license information.
5 #include "backends/NeonWorkloadFactory.hpp"
6 #include "backends/NeonWorkloadUtils.hpp"
7 #include "backends/NeonWorkloads.hpp"
8 #include "backends/MemCopyWorkload.hpp"
9 #include "backends/NeonTensorHandle.hpp"
11 #include "test/CreateWorkloadClNeon.hpp"
13 BOOST_AUTO_TEST_SUITE(CreateWorkloadNeon)
18 bool TestNeonTensorHandleInfo(armnn::INeonTensorHandle* handle, const armnn::TensorInfo& expectedInfo)
20 using namespace armnn::armcomputetensorutils;
22 const arm_compute::ITensorInfo* handleInfo = handle->GetTensor().info();
23 const arm_compute::TensorInfo expectedAclInfo = BuildArmComputeTensorInfo(expectedInfo);
25 if (handleInfo->data_type() != expectedAclInfo.data_type())
30 if (handleInfo->num_dimensions() != expectedAclInfo.num_dimensions())
35 if (handleInfo->quantization_info() != expectedAclInfo.quantization_info())
40 for (std::size_t d = 0; d < expectedAclInfo.num_dimensions(); ++d)
42 if (handleInfo->dimension(d) != expectedAclInfo.dimension(d))
53 template <typename ActivationWorkloadType, typename armnn::DataType DataType>
54 static void NeonCreateActivationWorkloadTest()
57 NeonWorkloadFactory factory;
58 auto workload = CreateActivationWorkloadTest<ActivationWorkloadType, DataType>
61 // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
62 ActivationQueueDescriptor queueDescriptor = workload->GetData();
63 auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
64 auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
65 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType)));
66 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType)));
69 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
70 BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload)
72 NeonCreateActivationWorkloadTest<NeonActivationFloat32Workload, DataType::Float16>();
76 BOOST_AUTO_TEST_CASE(CreateActivationFloat32Workload)
78 NeonCreateActivationWorkloadTest<NeonActivationFloat32Workload, DataType::Float32>();
81 template <typename AdditionWorkloadType, typename armnn::DataType DataType>
82 static void NeonCreateAdditionWorkloadTest()
85 NeonWorkloadFactory factory;
86 auto workload = CreateAdditionWorkloadTest<AdditionWorkloadType, DataType>(factory, graph);
88 // Checks that inputs/outputs are as we expect them (see definition of CreateAdditionWorkloadTest).
89 AdditionQueueDescriptor queueDescriptor = workload->GetData();
90 auto inputHandle1 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
91 auto inputHandle2 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[1]);
92 auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
93 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType)));
94 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType)));
95 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType)));
98 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
99 BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload)
101 NeonCreateAdditionWorkloadTest<NeonAdditionFloat32Workload, DataType::Float16>();
105 BOOST_AUTO_TEST_CASE(CreateAdditionFloat32Workload)
107 NeonCreateAdditionWorkloadTest<NeonAdditionFloat32Workload, DataType::Float32>();
110 template <typename BatchNormalizationWorkloadType, typename armnn::DataType DataType>
111 static void NeonCreateBatchNormalizationWorkloadTest()
114 NeonWorkloadFactory factory;
115 auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>(factory, graph);
117 // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
118 BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
119 auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
120 auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
121 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({2, 3, 1, 1}, DataType)));
122 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3, 1, 1}, DataType)));
125 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
126 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16Workload)
128 NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationFloat32Workload, DataType::Float16>();
132 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat32Workload)
134 NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationFloat32Workload, DataType::Float32>();
137 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
138 static void NeonCreateConvolution2dWorkloadTest()
141 NeonWorkloadFactory factory;
142 auto workload = CreateConvolution2dWorkloadTest<Convolution2dWorkloadType,
143 DataType>(factory, graph);
145 // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
146 Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
147 auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
148 auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
149 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({2, 3, 8, 16}, DataType)));
150 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 2, 2, 10}, DataType)));
153 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
154 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16Workload)
156 NeonCreateConvolution2dWorkloadTest<NeonConvolution2dFloat32Workload, DataType::Float16>();
160 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat32Workload)
162 NeonCreateConvolution2dWorkloadTest<NeonConvolution2dFloat32Workload, DataType::Float32>();
165 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
166 static void NeonCreateFullyConnectedWorkloadTest()
169 NeonWorkloadFactory factory;
170 auto workload = CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType,
171 DataType>(factory, graph);
173 // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
174 FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
175 auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
176 auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
177 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType)));
178 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType)));
181 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
182 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16Workload)
184 NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedFloat32Workload, DataType::Float16>();
188 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat32Workload)
190 NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedFloat32Workload, DataType::Float32>();
193 template <typename MultiplicationWorkloadType, typename armnn::DataType DataType>
194 static void NeonCreateMultiplicationWorkloadTest()
197 NeonWorkloadFactory factory;
198 auto workload = CreateMultiplicationWorkloadTest<MultiplicationWorkloadType,
199 DataType>(factory, graph);
201 // Checks that inputs/outputs are as we expect them (see definition of CreateMultiplicationWorkloadTest).
202 MultiplicationQueueDescriptor queueDescriptor = workload->GetData();
203 auto inputHandle1 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
204 auto inputHandle2 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[1]);
205 auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
206 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType)));
207 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType)));
208 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType)));
211 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
212 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16Workload)
214 NeonCreateMultiplicationWorkloadTest<NeonMultiplicationFloat32Workload, DataType::Float16>();
218 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat32Workload)
220 NeonCreateMultiplicationWorkloadTest<NeonMultiplicationFloat32Workload, DataType::Float32>();
223 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
224 static void NeonCreateNormalizationWorkloadTest()
227 NeonWorkloadFactory factory;
228 auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph);
230 // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
231 NormalizationQueueDescriptor queueDescriptor = workload->GetData();
232 auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
233 auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
234 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 5, 5, 1}, DataType)));
235 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 5, 5, 1}, DataType)));
238 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
239 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16Workload)
241 NeonCreateNormalizationWorkloadTest<NeonNormalizationFloat32Workload, DataType::Float16>();
245 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32Workload)
247 NeonCreateNormalizationWorkloadTest<NeonNormalizationFloat32Workload, DataType::Float32>();
250 template <typename Pooling2dWorkloadType, typename armnn::DataType DataType>
251 static void NeonCreatePooling2dWorkloadTest()
254 NeonWorkloadFactory factory;
255 auto workload = CreatePooling2dWorkloadTest<Pooling2dWorkloadType, DataType>
258 // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
259 Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
260 auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
261 auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
262 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 2, 5, 5}, DataType)));
263 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 2, 2, 4}, DataType)));
266 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
267 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16Workload)
269 NeonCreatePooling2dWorkloadTest<NeonPooling2dFloat32Workload, DataType::Float16>();
273 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat32Workload)
275 NeonCreatePooling2dWorkloadTest<NeonPooling2dFloat32Workload, DataType::Float32>();
278 BOOST_AUTO_TEST_CASE(CreatePooling2dUint8Workload)
280 NeonCreatePooling2dWorkloadTest<NeonPooling2dUint8Workload, DataType::QuantisedAsymm8>();
283 template <typename ReshapeWorkloadType, typename armnn::DataType DataType>
284 static void NeonCreateReshapeWorkloadTest()
287 NeonWorkloadFactory factory;
288 auto workload = CreateReshapeWorkloadTest<ReshapeWorkloadType, DataType>(factory, graph);
290 // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
291 ReshapeQueueDescriptor queueDescriptor = workload->GetData();
292 auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
293 auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
294 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
295 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, DataType)));
298 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
299 BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload)
301 NeonCreateReshapeWorkloadTest<NeonReshapeFloat32Workload, DataType::Float16>();
305 BOOST_AUTO_TEST_CASE(CreateReshapeFloat32Workload)
307 NeonCreateReshapeWorkloadTest<NeonReshapeFloat32Workload, DataType::Float32>();
310 BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload)
312 NeonCreateReshapeWorkloadTest<NeonReshapeUint8Workload, DataType::QuantisedAsymm8>();
315 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
316 static void NeonCreateSoftmaxWorkloadTest()
319 NeonWorkloadFactory factory;
320 auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
322 // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest).
323 SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
324 auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
325 auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
326 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
327 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({4, 1}, DataType)));
330 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
331 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16Workload)
333 NeonCreateSoftmaxWorkloadTest<NeonSoftmaxFloat32Workload, DataType::Float16>();
337 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat32Workload)
339 NeonCreateSoftmaxWorkloadTest<NeonSoftmaxFloat32Workload, DataType::Float32>();
342 BOOST_AUTO_TEST_CASE(CreateSplitterWorkload)
345 NeonWorkloadFactory factory;
346 auto workload = CreateSplitterWorkloadTest<NeonSplitterFloat32Workload, DataType::Float32>(factory, graph);
348 // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
349 SplitterQueueDescriptor queueDescriptor = workload->GetData();
350 auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
351 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32)));
353 auto outputHandle0 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
354 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32)));
356 auto outputHandle1 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[1]);
357 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32)));
359 auto outputHandle2 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[2]);
360 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32)));
363 BOOST_AUTO_TEST_CASE(CreateSplitterMerger)
365 // Tests that it is possible to decide which output of the splitter layer
366 // should be lined to which input of the merger layer.
367 // We tested that is is possible to specify 0th output
368 // of the splitter to be the 1st input to the merger, and the 1st output of the splitter to be 0th input
372 NeonWorkloadFactory factory;
375 CreateSplitterMergerWorkloadTest<NeonSplitterFloat32Workload, NeonMergerFloat32Workload,
376 DataType::Float32>(factory, graph);
378 auto wlSplitter = std::move(workloads.first);
379 auto wlMerger = std::move(workloads.second);
381 //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
382 armnn::INeonTensorHandle* sOut0 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
383 armnn::INeonTensorHandle* sOut1 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
384 armnn::INeonTensorHandle* mIn0 = dynamic_cast<armnn::INeonTensorHandle*>(wlMerger->GetData().m_Inputs[0]);
385 armnn::INeonTensorHandle* mIn1 = dynamic_cast<armnn::INeonTensorHandle*>(wlMerger->GetData().m_Inputs[1]);
392 bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
394 BOOST_TEST(validDataPointers);
397 BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs)
399 // Tests that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
400 // We created a splitter with two outputs. That each of those outputs is used by two different activation layers
403 NeonWorkloadFactory factory;
404 std::unique_ptr<NeonSplitterFloat32Workload> wlSplitter;
405 std::unique_ptr<NeonActivationFloat32Workload> wlActiv0_0;
406 std::unique_ptr<NeonActivationFloat32Workload> wlActiv0_1;
407 std::unique_ptr<NeonActivationFloat32Workload> wlActiv1_0;
408 std::unique_ptr<NeonActivationFloat32Workload> wlActiv1_1;
410 CreateSplitterMultipleInputsOneOutputWorkloadTest<NeonSplitterFloat32Workload,
411 NeonActivationFloat32Workload, DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
412 wlActiv1_0, wlActiv1_1);
414 armnn::INeonTensorHandle* sOut0 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
415 armnn::INeonTensorHandle* sOut1 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
416 armnn::INeonTensorHandle* activ0_0Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
417 armnn::INeonTensorHandle* activ0_1Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
418 armnn::INeonTensorHandle* activ1_0Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
419 armnn::INeonTensorHandle* activ1_1Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
424 BOOST_TEST(activ0_0Im);
425 BOOST_TEST(activ0_1Im);
426 BOOST_TEST(activ1_0Im);
427 BOOST_TEST(activ1_1Im);
429 bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
430 (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
432 BOOST_TEST(validDataPointers);
435 BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsNeon)
437 NeonWorkloadFactory factory;
438 CreateMemCopyWorkloads<INeonTensorHandle>(factory);
441 BOOST_AUTO_TEST_SUITE_END()