Release 18.08
[platform/upstream/armnn.git] / src / armnn / test / EndToEndTest.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // See LICENSE file in the project root for full license information.
4 //
5 #include <boost/test/unit_test.hpp>
6
7 #include "armnn/Descriptors.hpp"
8 #include "armnn/IRuntime.hpp"
9 #include "armnn/INetwork.hpp"
10
11 #include "backends/test/QuantizeHelper.hpp"
12 #include <boost/core/ignore_unused.hpp>
13
14 #include <set>
15
16 BOOST_AUTO_TEST_SUITE(EndToEnd)
17
18 namespace
19 {
20 template<typename T>
21 bool IsFloatIterFunc(T iter)
22 {
23     boost::ignore_unused(iter);
24     return IsFloatingPointIterator<T>::value;
25 }
26 } //namespace
27
28 BOOST_AUTO_TEST_CASE(QuantizedHelper)
29 {
30     std::vector<float> fArray;
31     BOOST_TEST(IsFloatIterFunc(fArray.begin()) == true);
32     BOOST_TEST(IsFloatIterFunc(fArray.cbegin()) == true);
33
34     std::vector<double> dArray;
35     BOOST_TEST(IsFloatIterFunc(dArray.begin()) == true);
36
37     std::vector<int> iArray;
38     BOOST_TEST(IsFloatIterFunc(iArray.begin()) == false);
39
40     float floats[5];
41     BOOST_TEST(IsFloatIterFunc(&floats[0]) == true);
42
43     int ints[5];
44     BOOST_TEST(IsFloatIterFunc(&ints[0]) == false);
45 }
46
47 BOOST_AUTO_TEST_CASE(Unsigned8)
48 {
49     using namespace armnn;
50
51     // Create runtime in which test will run
52     armnn::IRuntime::CreationOptions options;
53     armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
54
55     // Builds up the structure of the network.
56     armnn::INetworkPtr net(INetwork::Create());
57
58     IConnectableLayer* input = net->AddInputLayer(0, "input");
59     IConnectableLayer* softmax = net->AddSoftmaxLayer(SoftmaxDescriptor(), "softmax");
60     IConnectableLayer* output  = net->AddOutputLayer(0, "output");
61
62     input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0));
63     softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0));
64
65     // Sets the tensors in the network.
66     TensorInfo inputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8);
67     inputTensorInfo.SetQuantizationOffset(100);
68     inputTensorInfo.SetQuantizationScale(10000.0f);
69     input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
70
71     TensorInfo outputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8);
72     outputTensorInfo.SetQuantizationOffset(0);
73     outputTensorInfo.SetQuantizationScale(1.0f/255.0f);
74     softmax->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
75
76     // optimize the network
77     std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
78     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
79
80     // Loads it into the runtime.
81     NetworkId netId;
82     auto error = runtime->LoadNetwork(netId, std::move(optNet));
83     BOOST_TEST(error == Status::Success);
84
85     // Creates structures for input & output.
86     std::vector<uint8_t> inputData
87     {
88         1, 10, 3, 200, 5 // Some inputs - one of which is sufficiently larger than the others to saturate softmax.
89     };
90     std::vector<uint8_t> outputData(5);
91
92     armnn::InputTensors inputTensors
93     {
94         {0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
95     };
96     armnn::OutputTensors outputTensors
97     {
98         {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
99     };
100
101     // Does the inference.
102     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
103
104     // Checks the results.
105     BOOST_TEST(outputData[0] == 0);
106     BOOST_TEST(outputData[1] == 0);
107     BOOST_TEST(outputData[2] == 0);
108     BOOST_TEST(outputData[3] == 255); // softmax has been saturated.
109     BOOST_TEST(outputData[4] == 0);
110 }
111
112 template <typename T>
113 void ConstantUsageTest(const std::vector<armnn::Compute>& computeDevice,
114     const armnn::TensorInfo& commonTensorInfo,
115     const std::vector<T>& inputData,
116     const std::vector<T>& constantData,
117     const std::vector<T>& expectedOutputData)
118 {
119     using namespace armnn;
120
121     // Create runtime in which test will run
122     armnn::IRuntime::CreationOptions options;
123     armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
124
125     // Builds up the structure of the network.
126     INetworkPtr net(INetwork::Create());
127
128     IConnectableLayer* input = net->AddInputLayer(0);
129     IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
130     IConnectableLayer* add = net->AddAdditionLayer();
131     IConnectableLayer* output = net->AddOutputLayer(0);
132
133     input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
134     constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
135     add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
136
137     // Sets the tensors in the network.
138     input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
139     constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
140     add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
141
142     // optimize the network
143     IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
144
145     // Loads it into the runtime.
146     NetworkId netId;
147     runtime->LoadNetwork(netId, std::move(optNet));
148
149     // Creates structures for input & output.
150     std::vector<T> outputData(inputData.size());
151
152     InputTensors inputTensors
153     {
154         {0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
155     };
156     OutputTensors outputTensors
157     {
158         {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
159     };
160
161     // Does the inference.
162     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
163
164     // Checks the results.
165     BOOST_TEST(outputData == expectedOutputData);
166 }
167
168 static void ConstantUsageFloat32Test(const std::vector<armnn::Compute>& computeDevice)
169 {
170     const armnn::TensorInfo commonTensorInfo({ 2, 3 }, armnn::DataType::Float32);
171
172     ConstantUsageTest(computeDevice,
173         commonTensorInfo,
174         std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
175         std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
176         std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }  // Expected output.
177     );
178 }
179
180 static void ConstantUsageUint8Test(const std::vector<armnn::Compute>& computeDevice)
181 {
182     armnn::TensorInfo commonTensorInfo({ 2, 3 }, armnn::DataType::QuantisedAsymm8);
183
184     const float scale = 0.023529f;
185     const int8_t offset = -43;
186
187     commonTensorInfo.SetQuantizationScale(scale);
188     commonTensorInfo.SetQuantizationOffset(offset);
189
190     ConstantUsageTest(computeDevice,
191         commonTensorInfo,
192         QuantizedVector<uint8_t>(scale, offset, { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }), // Input.
193         QuantizedVector<uint8_t>(scale, offset, { 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }), // Const input.
194         QuantizedVector<uint8_t>(scale, offset, { 7.f, 7.f, 7.f, 7.f, 7.f, 7.f })  // Expected output.
195     );
196 }
197
198 BOOST_AUTO_TEST_CASE(ConstantUsage_Ref_Float32)
199 {
200     std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
201     ConstantUsageFloat32Test(backends);
202 }
203
204 #if ARMCOMPUTENEON_ENABLED
205 BOOST_AUTO_TEST_CASE(ConstantUsage_Neon_Float32)
206 {
207     ConstantUsageFloat32Test({armnn::Compute::CpuAcc});
208 }
209 #endif
210
211 #if ARMCOMPUTECL_ENABLED
212 BOOST_AUTO_TEST_CASE(ConstantUsage_Cl_Float32)
213 {
214     ConstantUsageFloat32Test({armnn::Compute::GpuAcc});
215 }
216 #endif
217
218 BOOST_AUTO_TEST_CASE(ConstantUsage_Ref_Uint8)
219 {
220     std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
221     ConstantUsageUint8Test(backends);
222 }
223
224 BOOST_AUTO_TEST_CASE(TrivialAdd)
225 {
226     // This test was designed to match "AddTwo" in android nn/runtime/test/TestTrivialModel.cpp.
227
228     using namespace armnn;
229
230     // Create runtime in which test will run
231     armnn::IRuntime::CreationOptions options;
232     armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
233
234     // Builds up the structure of the network.
235     armnn::INetworkPtr net(INetwork::Create());
236
237     IConnectableLayer* input1 = net->AddInputLayer(0);
238     IConnectableLayer* input2 = net->AddInputLayer(1);
239     IConnectableLayer* add    = net->AddAdditionLayer();
240     IConnectableLayer* output = net->AddOutputLayer(0);
241
242     input1->GetOutputSlot(0).Connect(add->GetInputSlot(0));
243     input2->GetOutputSlot(0).Connect(add->GetInputSlot(1));
244     add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
245
246     // Sets the tensors in the network.
247     TensorInfo tensorInfo(TensorShape({3, 4}), DataType::Float32);
248     input1->GetOutputSlot(0).SetTensorInfo(tensorInfo);
249     input2->GetOutputSlot(0).SetTensorInfo(tensorInfo);
250     add->GetOutputSlot(0).SetTensorInfo(tensorInfo);
251
252     // optimize the network
253     std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
254     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
255
256     // Loads it into the runtime.
257     NetworkId netId;
258     runtime->LoadNetwork(netId, std::move(optNet));
259
260     // Creates structures for input & output - matching android nn test.
261     std::vector<float> input1Data
262     {
263         1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f
264     };
265     std::vector<float> input2Data
266     {
267         100.f, 200.f, 300.f, 400.f, 500.f, 600.f, 700.f, 800.f, 900.f, 1000.f, 1100.f, 1200.f
268     };
269     std::vector<float> outputData(12);
270
271     InputTensors inputTensors
272     {
273         {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), input1Data.data())},
274         {1,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), input2Data.data())}
275     };
276     OutputTensors outputTensors
277     {
278         {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
279     };
280
281     // Does the inference.
282     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
283
284     // Checks the results
285     BOOST_TEST(outputData[0] == 101);
286     BOOST_TEST(outputData[1] == 202);
287     BOOST_TEST(outputData[2] == 303);
288     BOOST_TEST(outputData[3] == 404);
289     BOOST_TEST(outputData[4] == 505);
290     BOOST_TEST(outputData[5] == 606);
291     BOOST_TEST(outputData[6] == 707);
292     BOOST_TEST(outputData[7] == 808);
293     BOOST_TEST(outputData[8] == 909);
294     BOOST_TEST(outputData[9] == 1010);
295     BOOST_TEST(outputData[10] == 1111);
296     BOOST_TEST(outputData[11] == 1212);
297 }
298
299 BOOST_AUTO_TEST_CASE(MultipleOutputs)
300 {
301     using namespace armnn;
302
303     // Create runtime in which test will run
304     armnn::IRuntime::CreationOptions options;
305     armnn::IRuntimePtr  runtime(armnn::IRuntime::Create(options));
306
307     // Builds up the structure of the network.
308     INetworkPtr net(INetwork::Create());
309
310     IConnectableLayer* input = net->AddInputLayer(0);
311
312     // ReLu1
313     ActivationDescriptor activation1Descriptor;
314     activation1Descriptor.m_Function = ActivationFunction::BoundedReLu;
315     activation1Descriptor.m_A = 1.f;
316     activation1Descriptor.m_B = -1.f;
317     IConnectableLayer* activation1 = net->AddActivationLayer(activation1Descriptor);
318
319     // ReLu6
320     ActivationDescriptor activation2Descriptor;
321     activation2Descriptor.m_Function = ActivationFunction::BoundedReLu;
322     activation2Descriptor.m_A = 6.0f;
323     IConnectableLayer* activation2 = net->AddActivationLayer(activation2Descriptor);
324
325     // BoundedReLu(min=2, max=5)
326     ActivationDescriptor activation3Descriptor;
327     activation3Descriptor.m_Function = ActivationFunction::BoundedReLu;
328     activation3Descriptor.m_A = 5.0f;
329     activation3Descriptor.m_B = 2.0f;
330     IConnectableLayer* activation3 = net->AddActivationLayer(activation3Descriptor);
331
332     IConnectableLayer* output1 = net->AddOutputLayer(0);
333     IConnectableLayer* output2 = net->AddOutputLayer(1);
334     IConnectableLayer* output3 = net->AddOutputLayer(2);
335
336     input->GetOutputSlot(0).Connect(activation1->GetInputSlot(0));
337     input->GetOutputSlot(0).Connect(activation2->GetInputSlot(0));
338     input->GetOutputSlot(0).Connect(activation3->GetInputSlot(0));
339
340     activation1->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
341     activation2->GetOutputSlot(0).Connect(output2->GetInputSlot(0));
342     activation3->GetOutputSlot(0).Connect(output3->GetInputSlot(0));
343
344     // Sets the tensors in the network.
345     TensorInfo tensorInfo(TensorShape({ 10 }), DataType::Float32);
346     input->GetOutputSlot(0).SetTensorInfo(tensorInfo);
347     activation1->GetOutputSlot(0).SetTensorInfo(tensorInfo);
348     activation2->GetOutputSlot(0).SetTensorInfo(tensorInfo);
349     activation3->GetOutputSlot(0).SetTensorInfo(tensorInfo);
350
351     // optimize the network
352     std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
353     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
354
355     // Loads it into the runtime.
356     NetworkId netId;
357     runtime->LoadNetwork(netId, std::move(optNet));
358
359     // Creates structures for input & output.
360     const std::vector<float> inputData{ 3.f, 5.f, 2.f, 3.f, 7.f, 0.f, -2.f, -1.f, 3.f, 3.f };
361
362     std::vector<float> output1Data(inputData.size());
363     std::vector<float> output2Data(inputData.size());
364     std::vector<float> output3Data(inputData.size());
365
366     InputTensors inputTensors
367     {
368         {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
369     };
370     OutputTensors outputTensors
371     {
372         {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), output1Data.data())},
373         {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), output2Data.data())},
374         {2,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 2), output3Data.data())}
375     };
376
377     // Does the inference.
378     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
379
380     // Checks the results.
381     BOOST_TEST(output1Data == std::vector<float>({ 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, -1.f, -1.f, 1.f, 1.f })); // ReLu1
382     BOOST_TEST(output2Data == std::vector<float>({ 3.f, 5.f, 2.f, 3.f, 6.f, 0.f, 0.f, 0.f, 3.f, 3.f })); // ReLu6
383     BOOST_TEST(output3Data == std::vector<float>({ 3.f, 5.f, 2.f, 3.f, 5.f, 2.f, 2.f, 2.f, 3.f, 3.f })); // [2, 5]
384 }
385
386 #if ARMCOMPUTENEON_ENABLED
387 BOOST_AUTO_TEST_CASE(FallbackToCpuRef)
388 {
389     using namespace armnn;
390
391     // Create runtime in which test will run and allow fallback to CpuRef.
392     IRuntime::CreationOptions options;
393     IRuntimePtr runtime(IRuntime::Create(options));
394
395     // Builds up the structure of the network.
396     INetworkPtr net(INetwork::Create());
397
398     IConnectableLayer* input = net->AddInputLayer(0);
399
400     // This layer configuration isn't supported by CpuAcc but we allow fallback to CpuRef so it shoud pass.
401     NormalizationDescriptor descriptor;
402     IConnectableLayer* pooling = net->AddNormalizationLayer(descriptor);
403
404     IConnectableLayer* output = net->AddOutputLayer(0);
405
406     input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
407     pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
408
409     input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
410     pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
411
412     // optimize the network
413     std::vector<Compute> backends = {Compute::CpuAcc, Compute::CpuRef};
414     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
415
416     // Load it into the runtime. It should pass.
417     NetworkId netId;
418     BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == Status::Success);
419 }
420 #endif // ARMCOMPUTENEON_ENABLED
421
422 BOOST_AUTO_TEST_CASE(ErrorOnLoadNetwork)
423 {
424     using namespace armnn;
425
426     // Create runtime in which test will run
427     // Note we don't allow falling back to CpuRef if an operation (excluding inputs, outputs, etc.) isn't supported
428     IRuntime::CreationOptions options;
429     IRuntimePtr runtime(IRuntime::Create(options));
430
431     // build up the structure of the network
432     INetworkPtr net(INetwork::Create());
433
434     IConnectableLayer* input = net->AddInputLayer(0);
435
436     // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so Optimize will return null.
437     NormalizationDescriptor descriptor;
438     IConnectableLayer* pooling = net->AddNormalizationLayer(descriptor);
439
440     IConnectableLayer* output = net->AddOutputLayer(0);
441
442     input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
443     pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
444
445     input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
446     pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
447
448     // optimize the network
449     std::vector<Compute> backends = {Compute::CpuAcc};
450     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
451     BOOST_CHECK(!optNet);
452 }
453
454 BOOST_AUTO_TEST_SUITE_END()