Release 18.08

[platform/upstream/armnn.git] / src / armnn / test / EndToEndTest.cpp
diff --git a/src/armnn/test/EndToEndTest.cpp b/src/armnn/test/EndToEndTest.cpp

index 5ed84d2..4a8a0df 100644 (file)
--- a/src/armnn/test/EndToEndTest.cpp
+++ b/src/armnn/test/EndToEndTest.cpp
@@ -11,6 +11,8 @@
  #include "backends/test/QuantizeHelper.hpp"
  #include <boost/core/ignore_unused.hpp>
  
+#include <set>
+
  BOOST_AUTO_TEST_SUITE(EndToEnd)
  
  namespace
@@ -47,9 +49,10 @@ BOOST_AUTO_TEST_CASE(Unsigned8)
      using namespace armnn;
  
      // Create runtime in which test will run
-    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef));
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
  
-    // build up the structure of the network
+    // Builds up the structure of the network.
      armnn::INetworkPtr net(INetwork::Create());
  
      IConnectableLayer* input = net->AddInputLayer(0, "input");
@@ -59,7 +62,7 @@ BOOST_AUTO_TEST_CASE(Unsigned8)
      input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0));
      softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0));
  
-    // set the tensors in the network
+    // Sets the tensors in the network.
      TensorInfo inputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8);
      inputTensorInfo.SetQuantizationOffset(100);
      inputTensorInfo.SetQuantizationScale(10000.0f);
@@ -71,17 +74,18 @@ BOOST_AUTO_TEST_CASE(Unsigned8)
      softmax->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
  
      // optimize the network
-    IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec());
+    std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+    IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
  
-    // load it into the runtime
+    // Loads it into the runtime.
      NetworkId netId;
      auto error = runtime->LoadNetwork(netId, std::move(optNet));
      BOOST_TEST(error == Status::Success);
  
-    // create structures for input & output
+    // Creates structures for input & output.
      std::vector<uint8_t> inputData
      {
-        1, 10, 3, 200, 5 // some inputs - one of which is sufficiently larger than the others to saturate softmax
+        1, 10, 3, 200, 5 // Some inputs - one of which is sufficiently larger than the others to saturate softmax.
      };
      std::vector<uint8_t> outputData(5);
  
@@ -94,19 +98,19 @@ BOOST_AUTO_TEST_CASE(Unsigned8)
          {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
      };
  
-    // do the inference
+    // Does the inference.
      runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
  
-    // check the results
+    // Checks the results.
      BOOST_TEST(outputData[0] == 0);
      BOOST_TEST(outputData[1] == 0);
      BOOST_TEST(outputData[2] == 0);
-    BOOST_TEST(outputData[3] == 255); // softmax has been saturated
+    BOOST_TEST(outputData[3] == 255); // softmax has been saturated.
      BOOST_TEST(outputData[4] == 0);
  }
  
  template <typename T>
-void ConstantUsageTest(armnn::Compute computeDevice,
+void ConstantUsageTest(const std::vector<armnn::Compute>& computeDevice,
      const armnn::TensorInfo& commonTensorInfo,
      const std::vector<T>& inputData,
      const std::vector<T>& constantData,
@@ -115,9 +119,10 @@ void ConstantUsageTest(armnn::Compute computeDevice,
      using namespace armnn;
  
      // Create runtime in which test will run
-    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(computeDevice));
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
  
-    // build up the structure of the network
+    // Builds up the structure of the network.
      INetworkPtr net(INetwork::Create());
  
      IConnectableLayer* input = net->AddInputLayer(0);
@@ -129,19 +134,19 @@ void ConstantUsageTest(armnn::Compute computeDevice,
      constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
      add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
  
-    // set the tensors in the network
+    // Sets the tensors in the network.
      input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
      constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
      add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
  
      // optimize the network
-    IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec());
+    IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
  
-    // load it into the runtime
+    // Loads it into the runtime.
      NetworkId netId;
      runtime->LoadNetwork(netId, std::move(optNet));
  
-    // create structures for input & output
+    // Creates structures for input & output.
      std::vector<T> outputData(inputData.size());
  
      InputTensors inputTensors
@@ -153,26 +158,26 @@ void ConstantUsageTest(armnn::Compute computeDevice,
          {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
      };
  
-    // do the inference
+    // Does the inference.
      runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
  
-    // check the results
+    // Checks the results.
      BOOST_TEST(outputData == expectedOutputData);
  }
  
-static void ConstantUsageFloat32Test(armnn::Compute computeDevice)
+static void ConstantUsageFloat32Test(const std::vector<armnn::Compute>& computeDevice)
  {
      const armnn::TensorInfo commonTensorInfo({ 2, 3 }, armnn::DataType::Float32);
  
      ConstantUsageTest(computeDevice,
          commonTensorInfo,
-        std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // input
-        std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // const input
-        std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }  // expected output
+        std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
+        std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
+        std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }  // Expected output.
      );
  }
  
-static void ConstantUsageUint8Test(armnn::Compute computeDevice)
+static void ConstantUsageUint8Test(const std::vector<armnn::Compute>& computeDevice)
  {
      armnn::TensorInfo commonTensorInfo({ 2, 3 }, armnn::DataType::QuantisedAsymm8);
  
@@ -184,46 +189,49 @@ static void ConstantUsageUint8Test(armnn::Compute computeDevice)
  
      ConstantUsageTest(computeDevice,
          commonTensorInfo,
-        QuantizedVector<uint8_t>(scale, offset, { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }), // input
-        QuantizedVector<uint8_t>(scale, offset, { 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }), // const input
-        QuantizedVector<uint8_t>(scale, offset, { 7.f, 7.f, 7.f, 7.f, 7.f, 7.f })  // expected output
+        QuantizedVector<uint8_t>(scale, offset, { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }), // Input.
+        QuantizedVector<uint8_t>(scale, offset, { 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }), // Const input.
+        QuantizedVector<uint8_t>(scale, offset, { 7.f, 7.f, 7.f, 7.f, 7.f, 7.f })  // Expected output.
      );
  }
  
  BOOST_AUTO_TEST_CASE(ConstantUsage_Ref_Float32)
  {
-    ConstantUsageFloat32Test(armnn::Compute::CpuRef);
+    std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+    ConstantUsageFloat32Test(backends);
  }
  
  #if ARMCOMPUTENEON_ENABLED
  BOOST_AUTO_TEST_CASE(ConstantUsage_Neon_Float32)
  {
-    ConstantUsageFloat32Test(armnn::Compute::CpuAcc);
+    ConstantUsageFloat32Test({armnn::Compute::CpuAcc});
  }
  #endif
  
  #if ARMCOMPUTECL_ENABLED
  BOOST_AUTO_TEST_CASE(ConstantUsage_Cl_Float32)
  {
-    ConstantUsageFloat32Test(armnn::Compute::GpuAcc);
+    ConstantUsageFloat32Test({armnn::Compute::GpuAcc});
  }
  #endif
  
  BOOST_AUTO_TEST_CASE(ConstantUsage_Ref_Uint8)
  {
-    ConstantUsageUint8Test(armnn::Compute::CpuRef);
+    std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+    ConstantUsageUint8Test(backends);
  }
  
  BOOST_AUTO_TEST_CASE(TrivialAdd)
  {
-    // This test was designed to match "AddTwo" in android nn/runtime/test/TestTrivialModel.cpp
+    // This test was designed to match "AddTwo" in android nn/runtime/test/TestTrivialModel.cpp.
  
      using namespace armnn;
  
      // Create runtime in which test will run
-    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef));
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
  
-    // build up the structure of the network
+    // Builds up the structure of the network.
      armnn::INetworkPtr net(INetwork::Create());
  
      IConnectableLayer* input1 = net->AddInputLayer(0);
@@ -235,20 +243,21 @@ BOOST_AUTO_TEST_CASE(TrivialAdd)
      input2->GetOutputSlot(0).Connect(add->GetInputSlot(1));
      add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
  
-    // set the tensors in the network
+    // Sets the tensors in the network.
      TensorInfo tensorInfo(TensorShape({3, 4}), DataType::Float32);
      input1->GetOutputSlot(0).SetTensorInfo(tensorInfo);
      input2->GetOutputSlot(0).SetTensorInfo(tensorInfo);
      add->GetOutputSlot(0).SetTensorInfo(tensorInfo);
  
      // optimize the network
-    IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec());
+    std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+    IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
  
-    // load it into the runtime
+    // Loads it into the runtime.
      NetworkId netId;
      runtime->LoadNetwork(netId, std::move(optNet));
  
-    // create structures for input & output - matching android nn test
+    // Creates structures for input & output - matching android nn test.
      std::vector<float> input1Data
      {
          1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f
@@ -269,10 +278,10 @@ BOOST_AUTO_TEST_CASE(TrivialAdd)
          {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
      };
  
-    // do the inference
+    // Does the inference.
      runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
  
-    // check the results
+    // Checks the results
      BOOST_TEST(outputData[0] == 101);
      BOOST_TEST(outputData[1] == 202);
      BOOST_TEST(outputData[2] == 303);
@@ -292,9 +301,10 @@ BOOST_AUTO_TEST_CASE(MultipleOutputs)
      using namespace armnn;
  
      // Create runtime in which test will run
-    armnn::IRuntimePtr  runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef));
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr  runtime(armnn::IRuntime::Create(options));
  
-    // build up the structure of the network
+    // Builds up the structure of the network.
      INetworkPtr net(INetwork::Create());
  
      IConnectableLayer* input = net->AddInputLayer(0);
@@ -331,7 +341,7 @@ BOOST_AUTO_TEST_CASE(MultipleOutputs)
      activation2->GetOutputSlot(0).Connect(output2->GetInputSlot(0));
      activation3->GetOutputSlot(0).Connect(output3->GetInputSlot(0));
  
-    // set the tensors in the network
+    // Sets the tensors in the network.
      TensorInfo tensorInfo(TensorShape({ 10 }), DataType::Float32);
      input->GetOutputSlot(0).SetTensorInfo(tensorInfo);
      activation1->GetOutputSlot(0).SetTensorInfo(tensorInfo);
@@ -339,13 +349,14 @@ BOOST_AUTO_TEST_CASE(MultipleOutputs)
      activation3->GetOutputSlot(0).SetTensorInfo(tensorInfo);
  
      // optimize the network
-    IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec());
+    std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+    IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
  
-    // load it into the runtime
+    // Loads it into the runtime.
      NetworkId netId;
      runtime->LoadNetwork(netId, std::move(optNet));
  
-    // create structures for input & output
+    // Creates structures for input & output.
      const std::vector<float> inputData{ 3.f, 5.f, 2.f, 3.f, 7.f, 0.f, -2.f, -1.f, 3.f, 3.f };
  
      std::vector<float> output1Data(inputData.size());
@@ -363,32 +374,66 @@ BOOST_AUTO_TEST_CASE(MultipleOutputs)
          {2,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 2), output3Data.data())}
      };
  
-    // do the inference
+    // Does the inference.
      runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
  
-    // check the results
+    // Checks the results.
      BOOST_TEST(output1Data == std::vector<float>({ 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, -1.f, -1.f, 1.f, 1.f })); // ReLu1
      BOOST_TEST(output2Data == std::vector<float>({ 3.f, 5.f, 2.f, 3.f, 6.f, 0.f, 0.f, 0.f, 3.f, 3.f })); // ReLu6
      BOOST_TEST(output3Data == std::vector<float>({ 3.f, 5.f, 2.f, 3.f, 5.f, 2.f, 2.f, 2.f, 3.f, 3.f })); // [2, 5]
  }
  
  #if ARMCOMPUTENEON_ENABLED
+BOOST_AUTO_TEST_CASE(FallbackToCpuRef)
+{
+    using namespace armnn;
+
+    // Create runtime in which test will run and allow fallback to CpuRef.
+    IRuntime::CreationOptions options;
+    IRuntimePtr runtime(IRuntime::Create(options));
+
+    // Builds up the structure of the network.
+    INetworkPtr net(INetwork::Create());
+
+    IConnectableLayer* input = net->AddInputLayer(0);
+
+    // This layer configuration isn't supported by CpuAcc but we allow fallback to CpuRef so it shoud pass.
+    NormalizationDescriptor descriptor;
+    IConnectableLayer* pooling = net->AddNormalizationLayer(descriptor);
+
+    IConnectableLayer* output = net->AddOutputLayer(0);
+
+    input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
+    pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
+    pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
+
+    // optimize the network
+    std::vector<Compute> backends = {Compute::CpuAcc, Compute::CpuRef};
+    IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+    // Load it into the runtime. It should pass.
+    NetworkId netId;
+    BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == Status::Success);
+}
+#endif // ARMCOMPUTENEON_ENABLED
+
  BOOST_AUTO_TEST_CASE(ErrorOnLoadNetwork)
  {
      using namespace armnn;
  
      // Create runtime in which test will run
      // Note we don't allow falling back to CpuRef if an operation (excluding inputs, outputs, etc.) isn't supported
-    armnn::IRuntime::CreationOptions options(armnn::Compute::CpuAcc);
-    options.m_UseCpuRefAsFallback = false;
-    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+    IRuntime::CreationOptions options;
+    IRuntimePtr runtime(IRuntime::Create(options));
  
      // build up the structure of the network
      INetworkPtr net(INetwork::Create());
  
      IConnectableLayer* input = net->AddInputLayer(0);
  
-    // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so LoadNetwork will fail.
+    // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so Optimize will return null.
      NormalizationDescriptor descriptor;
      IConnectableLayer* pooling = net->AddNormalizationLayer(descriptor);
  
@@ -401,12 +446,9 @@ BOOST_AUTO_TEST_CASE(ErrorOnLoadNetwork)
      pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
  
      // optimize the network
-    IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec());
-
-    // Load it into the runtime. It should fail.
-    NetworkId netId;
-    BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == Status::Failure);
+    std::vector<Compute> backends = {Compute::CpuAcc};
+    IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+    BOOST_CHECK(!optNet);
  }
-#endif // ARMCOMPUTENEON_ENABLED
  
  BOOST_AUTO_TEST_SUITE_END()