IVGCVSW-5012 Enable zero copy for Neon
authorNarumol Prangnawarat <narumol.prangnawarat@arm.com>
Fri, 14 Aug 2020 10:51:12 +0000 (11:51 +0100)
committerJim Flynn <jim.flynn@arm.com>
Sun, 16 Aug 2020 14:47:00 +0000 (14:47 +0000)
 * Allow memory import if padding is not required in Neon
 * AddMockImportBackend for fallback tests
 * Refactor GraphUtils
 * Memory import unit tests
 * Fallback unit tests

Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Change-Id: Ic2e141e12774bf6d915e77745b6f6d2d83d9b82d

15 files changed:
src/armnn/LoadedNetwork.cpp
src/armnn/Network.cpp
src/armnn/test/GraphTests.cpp
src/armnn/test/GraphUtils.cpp
src/armnn/test/GraphUtils.hpp
src/backends/backendsCommon/test/CMakeLists.txt
src/backends/backendsCommon/test/mockBackend/MockImportBackend.cpp [new file with mode: 0644]
src/backends/backendsCommon/test/mockBackend/MockImportBackend.hpp [new file with mode: 0644]
src/backends/backendsCommon/test/mockBackend/MockImportLayerSupport.hpp [new file with mode: 0644]
src/backends/neon/NeonTensorHandleFactory.cpp
src/backends/neon/NeonTensorHandleFactory.hpp
src/backends/neon/test/CMakeLists.txt
src/backends/neon/test/NeonEndToEndTests.cpp
src/backends/neon/test/NeonFallbackTests.cpp [new file with mode: 0644]
src/backends/neon/test/NeonTensorHandleTests.cpp

index 593539d..4a293b9 100644 (file)
@@ -168,6 +168,7 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
         switch (layer->GetType())
         {
         case LayerType::Input:
+        case LayerType::MemImport:
             {
                 // If IsImportEnabled is true then we need to set IsMemoryManaged to false when creating TensorHandles
                 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsImportEnabled);
index 132924a..94a9961 100644 (file)
@@ -912,7 +912,15 @@ EdgeStrategy CalculateEdgeStrategy(BackendsMap& backends,
 
             if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
             {
-                return EdgeStrategy::ExportToTarget;
+                auto srcCapability = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::PaddingRequired);
+                auto dstCapability = dstFactory->GetCapabilities(&connectedLayer,
+                                                                 &connectedLayer,
+                                                                 CapabilityClass::PaddingRequired);
+                // Do not require memory copy if the source and destination do not require padding.
+                if (srcCapability.empty() && dstCapability.empty())
+                {
+                    return EdgeStrategy::ExportToTarget;
+                }
             }
         }
     }
index a3c42b6..5a17c1c 100644 (file)
 #include <boost/cast.hpp>
 #include <boost/test/unit_test.hpp>
 
-/// Checks that first comes before second in the order.
-bool CheckOrder(const armnn::Graph& graph, const armnn::Layer* first, const armnn::Layer* second)
-{
-    graph.Print();
-
-    const auto& order = graph.TopologicalSort();
-
-    auto firstPos = std::find(order.begin(), order.end(), first);
-    auto secondPos = std::find(firstPos, order.end(), second);
-
-    return (secondPos != order.end());
-}
-
 BOOST_AUTO_TEST_SUITE(Graph)
 
 BOOST_AUTO_TEST_CASE(ClassGraph)
index 36db900..bc6b562 100644 (file)
@@ -63,3 +63,16 @@ bool IsConnected(armnn::Layer* srcLayer, armnn::Layer* destLayer,
     }
     return false;
 }
+
+/// Checks that first comes before second in the order.
+bool CheckOrder(const armnn::Graph& graph, const armnn::Layer* first, const armnn::Layer* second)
+{
+    graph.Print();
+
+    const auto& order = graph.TopologicalSort();
+
+    auto firstPos = std::find(order.begin(), order.end(), first);
+    auto secondPos = std::find(firstPos, order.end(), second);
+
+    return (secondPos != order.end());
+}
index b51e4d1..60d03dc 100644 (file)
@@ -21,3 +21,5 @@ bool IsConnected(armnn::Layer* srcLayer, armnn::Layer* destLayer,
                  unsigned int srcSlot, unsigned int destSlot,
                  const armnn::TensorInfo& expectedTensorInfo);
 
+bool CheckOrder(const armnn::Graph& graph, const armnn::Layer* first, const armnn::Layer* second);
+
index f87a69b..ccbfdc6 100644 (file)
@@ -159,6 +159,9 @@ list(APPEND armnnBackendsCommonUnitTests_sources
     layerTests/SubtractionTestImpl.hpp
     layerTests/TransposeConvolution2dTestImpl.cpp
     layerTests/TransposeConvolution2dTestImpl.hpp
+    mockBackend/MockImportBackend.cpp
+    mockBackend/MockImportBackend.hpp
+    mockBackend/MockImportLayerSupport.hpp
 )
 
 if (ARMNNREF)
diff --git a/src/backends/backendsCommon/test/mockBackend/MockImportBackend.cpp b/src/backends/backendsCommon/test/mockBackend/MockImportBackend.cpp
new file mode 100644 (file)
index 0000000..ebe9434
--- /dev/null
@@ -0,0 +1,115 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "MockImportBackend.hpp"
+#include "MockImportLayerSupport.hpp"
+
+#include <armnn/BackendRegistry.hpp>
+#include <armnn/backends/IBackendContext.hpp>
+#include <armnn/backends/IMemoryManager.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
+#include <reference/RefWorkloadFactory.hpp>
+#include <reference/RefTensorHandleFactory.hpp>
+
+#include <Optimizer.hpp>
+
+namespace armnn
+{
+
+MockImportBackendInitialiser::MockImportBackendInitialiser()
+{
+    BackendRegistryInstance().Register(MockImportBackend::GetIdStatic(),
+                                       []()
+                                       {
+                                           return IBackendInternalUniquePtr(new MockImportBackend);
+                                       });
+}
+
+MockImportBackendInitialiser::~MockImportBackendInitialiser()
+{
+    try
+    {
+        BackendRegistryInstance().Deregister(MockImportBackend::GetIdStatic());
+    }
+    catch (...)
+    {
+        std::cerr << "could not deregister mock import backend" << std::endl;
+    }
+}
+
+const BackendId& MockImportBackend::GetIdStatic()
+{
+    static const BackendId s_Id{ MockImportBackendId() };
+    return s_Id;
+}
+
+IBackendInternal::IWorkloadFactoryPtr MockImportBackend::CreateWorkloadFactory(
+    const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
+{
+    return std::make_unique<RefWorkloadFactory>(PolymorphicPointerDowncast<RefMemoryManager>(memoryManager));
+}
+
+IBackendInternal::IWorkloadFactoryPtr MockImportBackend::CreateWorkloadFactory(
+    class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const
+{
+    auto memoryManager = std::make_shared<RefMemoryManager>();
+
+    tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
+    tensorHandleFactoryRegistry.RegisterFactory(std::make_unique<RefTensorHandleFactory>(memoryManager));
+
+    return std::make_unique<RefWorkloadFactory>(PolymorphicPointerDowncast<RefMemoryManager>(memoryManager));
+}
+
+IBackendInternal::IBackendContextPtr MockImportBackend::CreateBackendContext(const IRuntime::CreationOptions&) const
+{
+    return IBackendContextPtr{};
+}
+
+IBackendInternal::IBackendProfilingContextPtr MockImportBackend::CreateBackendProfilingContext(
+    const IRuntime::CreationOptions&, IBackendProfilingPtr&)
+{
+    return IBackendProfilingContextPtr{};
+}
+
+IBackendInternal::IMemoryManagerUniquePtr MockImportBackend::CreateMemoryManager() const
+{
+    return std::make_unique<RefMemoryManager>();
+}
+
+IBackendInternal::Optimizations MockImportBackend::GetOptimizations() const
+{
+    return Optimizations{};
+}
+
+IBackendInternal::ILayerSupportSharedPtr MockImportBackend::GetLayerSupport() const
+{
+    static ILayerSupportSharedPtr layerSupport{new MockImportLayerSupport};
+    return layerSupport;
+}
+
+OptimizationViews MockImportBackend::OptimizeSubgraphView(const SubgraphView& subgraph) const
+{
+    OptimizationViews optimizationViews;
+
+    optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
+
+    return optimizationViews;
+}
+
+std::vector<ITensorHandleFactory::FactoryId> MockImportBackend::GetHandleFactoryPreferences() const
+{
+    return std::vector<ITensorHandleFactory::FactoryId> { RefTensorHandleFactory::GetIdStatic() };
+}
+
+void MockImportBackend::RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry)
+{
+    auto memoryManager = std::make_shared<RefMemoryManager>();
+
+    registry.RegisterMemoryManager(memoryManager);
+    registry.RegisterFactory(std::make_unique<RefTensorHandleFactory>(memoryManager));
+}
+
+} // namespace armnn
diff --git a/src/backends/backendsCommon/test/mockBackend/MockImportBackend.hpp b/src/backends/backendsCommon/test/mockBackend/MockImportBackend.hpp
new file mode 100644 (file)
index 0000000..ecc661f
--- /dev/null
@@ -0,0 +1,53 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/backends/IBackendInternal.hpp>
+
+namespace armnn
+{
+
+constexpr const char* MockImportBackendId() { return "MockRef"; }
+
+class MockImportBackendInitialiser
+{
+public:
+    MockImportBackendInitialiser();
+    ~MockImportBackendInitialiser();
+};
+
+class MockImportBackend : public IBackendInternal
+{
+public:
+    MockImportBackend()  = default;
+    ~MockImportBackend() = default;
+
+    static const BackendId& GetIdStatic();
+    const BackendId& GetId() const override { return GetIdStatic(); }
+
+    IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override;
+
+    IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(
+        const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override;
+
+    IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(
+        class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const override;
+
+    IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override;
+
+    IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(
+        const IRuntime::CreationOptions& creationOptions, IBackendProfilingPtr& backendProfiling) override;
+
+    IBackendInternal::Optimizations GetOptimizations() const override;
+    IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override;
+
+    OptimizationViews OptimizeSubgraphView(const SubgraphView& subgraph) const override;
+
+    std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override;
+
+    void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry) override;
+};
+
+} // namespace armnn
diff --git a/src/backends/backendsCommon/test/mockBackend/MockImportLayerSupport.hpp b/src/backends/backendsCommon/test/mockBackend/MockImportLayerSupport.hpp
new file mode 100644 (file)
index 0000000..75e637e
--- /dev/null
@@ -0,0 +1,46 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include <backendsCommon/LayerSupportBase.hpp>
+
+namespace armnn
+{
+
+class MockImportLayerSupport : public LayerSupportBase
+{
+public:
+    bool IsAdditionSupported(const TensorInfo& input0,
+                             const TensorInfo& input1,
+                             const TensorInfo& output,
+                             Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override
+    {
+        IgnoreUnused(input0);
+        IgnoreUnused(input1);
+        IgnoreUnused(output);
+        IgnoreUnused(reasonIfUnsupported);
+        return true;
+    }
+
+    bool IsInputSupported(const TensorInfo& input,
+                          Optional<std::string&> reasonIfUnsupported) const override
+    {
+        IgnoreUnused(input);
+        IgnoreUnused(reasonIfUnsupported);
+        return true;
+    }
+
+    bool IsOutputSupported(const TensorInfo& output,
+                           Optional<std::string&> reasonIfUnsupported) const override
+    {
+        IgnoreUnused(output);
+        IgnoreUnused(reasonIfUnsupported);
+        return true;
+    }
+};
+
+} // namespace armnn
index 53d5a04..ae6ab59 100644 (file)
@@ -103,12 +103,12 @@ bool NeonTensorHandleFactory::SupportsSubTensors() const
 
 MemorySourceFlags NeonTensorHandleFactory::GetExportFlags() const
 {
-    return 0;
+    return m_ExportFlags;
 }
 
 MemorySourceFlags NeonTensorHandleFactory::GetImportFlags() const
 {
-    return 0;
+    return m_ImportFlags;
 }
 
 std::vector<Capability> NeonTensorHandleFactory::GetCapabilities(const IConnectableLayer* layer,
index ae45aad..2ca67c9 100644 (file)
@@ -41,7 +41,9 @@ class NeonTensorHandleFactory : public ITensorHandleFactory
 {
 public:
     NeonTensorHandleFactory(std::weak_ptr<NeonMemoryManager> mgr)
-                            : m_MemoryManager(mgr)
+                            : m_MemoryManager(mgr),
+                              m_ImportFlags(static_cast<MemorySourceFlags>(MemorySource::Malloc)),
+                              m_ExportFlags(static_cast<MemorySourceFlags>(MemorySource::Malloc))
     {}
 
     std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
@@ -76,6 +78,8 @@ public:
 
 private:
     mutable std::shared_ptr<NeonMemoryManager> m_MemoryManager;
+    MemorySourceFlags m_ImportFlags;
+    MemorySourceFlags m_ExportFlags;
 };
 
 } // namespace armnn
index 16c066b..dd13b63 100644 (file)
@@ -6,6 +6,7 @@
 list(APPEND armnnNeonBackendUnitTests_sources
     NeonCreateWorkloadTests.cpp
     NeonEndToEndTests.cpp
+    NeonFallbackTests.cpp
     NeonJsonPrinterTests.cpp
     NeonLayerSupportTests.cpp
     NeonLayerTests.cpp
index ffbae51..dc0a609 100644 (file)
@@ -410,27 +410,27 @@ BOOST_AUTO_TEST_CASE(NeonExportNonAlignedOutputPointerTest)
     ExportNonAlignedOutputPointerTest(defaultBackends);
 }
 
-BOOST_AUTO_TEST_CASE(NeonImportAlignedPointerTest, * boost::unit_test::disabled())
+BOOST_AUTO_TEST_CASE(NeonImportAlignedPointerTest)
 {
     ImportAlignedPointerTest(defaultBackends);
 }
 
-BOOST_AUTO_TEST_CASE(NeonImportOnlyWorkload, * boost::unit_test::disabled())
+BOOST_AUTO_TEST_CASE(NeonImportOnlyWorkload)
 {
     ImportOnlyWorkload(defaultBackends);
 }
 
-BOOST_AUTO_TEST_CASE(NeonExportOnlyWorkload, * boost::unit_test::disabled())
+BOOST_AUTO_TEST_CASE(NeonExportOnlyWorkload)
 {
     ExportOnlyWorkload(defaultBackends);
 }
 
-BOOST_AUTO_TEST_CASE(NeonImportAndExportWorkload, * boost::unit_test::disabled())
+BOOST_AUTO_TEST_CASE(NeonImportAndExportWorkload)
 {
     ImportAndExportWorkload(defaultBackends);
 }
 
-BOOST_AUTO_TEST_CASE(NeonExportOutputWithSeveralOutputSlotConnectionsTest, * boost::unit_test::disabled())
+BOOST_AUTO_TEST_CASE(NeonExportOutputWithSeveralOutputSlotConnectionsTest)
 {
     ExportOutputWithSeveralOutputSlotConnectionsTest(defaultBackends);
 }
diff --git a/src/backends/neon/test/NeonFallbackTests.cpp b/src/backends/neon/test/NeonFallbackTests.cpp
new file mode 100644 (file)
index 0000000..cf4d91b
--- /dev/null
@@ -0,0 +1,547 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <backendsCommon/test/CommonTestUtils.hpp>
+#include <backendsCommon/test/mockBackend/MockImportBackend.hpp>
+
+#include <test/GraphUtils.hpp>
+
+#include <boost/test/unit_test.hpp>
+
+BOOST_AUTO_TEST_SUITE(NeonFallback)
+
+std::vector<armnn::BackendId> defaultBackends = { armnn::Compute::CpuAcc };
+
+BOOST_AUTO_TEST_CASE(FallbackImportToCpuAcc)
+{
+    using namespace armnn;
+
+    // Create a mock backend object
+    MockImportBackendInitialiser initialiser; // Register the Mock Backend
+    auto backendObjPtr = CreateBackendObject(MockImportBackendId());
+    BOOST_TEST((backendObjPtr != nullptr));
+
+    BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
+    if (backendIds.find("MockRef") == backendIds.end())
+    {
+        std::string message = "Cannot load MockRef";
+        BOOST_FAIL(message);
+    }
+
+    // Create runtime in which test will run and allow fallback to CpuRef.
+    IRuntime::CreationOptions options;
+    IRuntimePtr runtime(IRuntime::Create(options));
+
+    // Builds up the structure of the network.
+    INetworkPtr net(INetwork::Create());
+
+    IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
+    IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
+    IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
+    IConnectableLayer* add = net->AddAdditionLayer("add");
+    IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+    IConnectableLayer* output = net->AddOutputLayer(0, "output");
+
+    input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+    input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+    input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
+    add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
+    sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
+
+    input0->GetOutputSlot(0).SetTensorInfo(info);
+    input1->GetOutputSlot(0).SetTensorInfo(info);
+    input2->GetOutputSlot(0).SetTensorInfo(info);
+    add->GetOutputSlot(0).SetTensorInfo(info);
+    sub->GetOutputSlot(0).SetTensorInfo(info);
+
+    // optimize the network
+    std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
+    IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+    OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
+    Graph& graph = optNetObjPtr->GetGraph();
+
+    armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
+    armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
+    armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
+    armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
+    armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
+    armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
+    armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
+
+    // Checks order is valid.
+    BOOST_TEST(CheckOrder(graph, layer0, layer1));
+    BOOST_TEST(CheckOrder(graph, layer1, layer2));
+    BOOST_TEST(CheckOrder(graph, layer2, layer3));
+    BOOST_TEST(CheckOrder(graph, layer3, layer4));
+    BOOST_TEST(CheckOrder(graph, layer4, layer5));
+    BOOST_TEST(CheckOrder(graph, layer5, layer6));
+
+    // Load it into the runtime. It should pass.
+    NetworkId netId;
+    std::string ignoredErrorMessage;
+    INetworkProperties networkProperties(true, true);
+
+    runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
+
+    // Creates structures for input & output
+    std::vector<float> inputData0
+    {
+        1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
+    };
+    std::vector<float> inputData1
+    {
+        0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
+    };
+    std::vector<float> inputData2
+    {
+        12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
+    };
+
+    std::vector<float> outputData(12);
+
+    std::vector<float> expectedOutput
+    {
+        11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
+    };
+
+    InputTensors inputTensors
+    {
+        { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
+        { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
+        { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
+    };
+    OutputTensors outputTensors
+    {
+        { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
+    };
+
+    runtime->GetProfiler(netId)->EnableProfiling(true);
+
+    // Do the inference
+    runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+    // Retrieve the Profiler.Print() output to get the workload execution
+    ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+    std::stringstream ss;
+    profilerManager.GetProfiler()->Print(ss);;
+    std::string dump = ss.str();
+
+    // Contains ImportMemGeneric
+    std::size_t found = dump.find("ImportMemGeneric");
+    BOOST_TEST(found != std::string::npos);
+
+    // Contains SyncMemGeneric
+    found = dump.find("SyncMemGeneric");
+    BOOST_TEST(found != std::string::npos);
+
+    // Does not contain CopyMemGeneric
+    found = dump.find("CopyMemGeneric");
+    BOOST_TEST(found == std::string::npos);
+
+    // Use memory import between backends
+    BOOST_TEST((layer4->GetType() == LayerType::MemImport));
+
+    // Check output is as expected
+    BOOST_TEST(outputData == expectedOutput);
+}
+
+BOOST_AUTO_TEST_CASE(FallbackPaddingCopyToCpuAcc)
+{
+    using namespace armnn;
+
+    // Create a mock backend object
+    MockImportBackendInitialiser initialiser; // Register the Mock Backend
+    auto backendObjPtr = CreateBackendObject(MockImportBackendId());
+    BOOST_TEST((backendObjPtr != nullptr));
+
+    BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
+    if (backendIds.find("MockRef") == backendIds.end())
+    {
+        std::string message = "Cannot load MockRef";
+        BOOST_FAIL(message);
+    }
+
+    // Create runtime in which test will run and allow fallback to CpuRef.
+    IRuntime::CreationOptions options;
+    IRuntimePtr runtime(IRuntime::Create(options));
+
+    // Builds up the structure of the network.
+    INetworkPtr net(INetwork::Create());
+
+    Pooling2dDescriptor desc;
+
+    IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
+    IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
+    IConnectableLayer* add = net->AddAdditionLayer("add");
+    IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
+    IConnectableLayer* output = net->AddOutputLayer(0, "output");
+
+    input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+    input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+    add->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
+    pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
+    TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
+
+    input0->GetOutputSlot(0).SetTensorInfo(info);
+    input1->GetOutputSlot(0).SetTensorInfo(info);
+    add->GetOutputSlot(0).SetTensorInfo(info);
+    pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
+
+    // optimize the network
+    std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
+    IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+    OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
+    Graph& graph = optNetObjPtr->GetGraph();
+
+    armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
+    armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
+    armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "add");
+    armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ add (0) -> pooling (0) ]");
+    armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "pooling");
+    armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
+
+    // Checks order is valid.
+    BOOST_TEST(CheckOrder(graph, layer0, layer1));
+    BOOST_TEST(CheckOrder(graph, layer1, layer2));
+    BOOST_TEST(CheckOrder(graph, layer2, layer3));
+    BOOST_TEST(CheckOrder(graph, layer3, layer4));
+    BOOST_TEST(CheckOrder(graph, layer4, layer5));
+
+    // Load it into the runtime. It should pass.
+    NetworkId netId;
+    std::string ignoredErrorMessage;
+    INetworkProperties networkProperties(true, true);
+
+    runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
+
+    // Creates structures for input & output
+    std::vector<float> inputData0
+    {
+        1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
+    };
+    std::vector<float> inputData1
+    {
+        0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
+    };
+
+    std::vector<float> outputData(2);
+
+    std::vector<float> expectedOutput
+    {
+        6.0f, 12.0f
+    };
+
+    InputTensors inputTensors
+    {
+        { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
+        { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }
+    };
+    OutputTensors outputTensors
+    {
+        { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
+    };
+
+    runtime->GetProfiler(netId)->EnableProfiling(true);
+
+    // Do the inference
+    runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+    // Retrieve the Profiler.Print() output to get the workload execution
+    ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+    std::stringstream ss;
+    profilerManager.GetProfiler()->Print(ss);;
+    std::string dump = ss.str();
+
+    // Contains CopyMemGeneric between the backends
+    std::size_t found = dump.find("CopyMemGeneric");
+    BOOST_TEST(found != std::string::npos);
+
+    // Contains SyncMemGeneric for the output
+    found = dump.find("SyncMemGeneric");
+    BOOST_TEST(found != std::string::npos);
+
+    // Does not contain ImportMemGeneric
+    found = dump.find("ImportMemGeneric");
+    BOOST_TEST(found == std::string::npos);
+
+    // Use memory import between backends
+    BOOST_TEST((layer3->GetType() == LayerType::MemCopy));
+
+    // Check output is as expected
+    BOOST_TEST(outputData == expectedOutput);
+}
+
+BOOST_AUTO_TEST_CASE(FallbackImportFromCpuAcc)
+{
+    using namespace armnn;
+
+    // Create a mock backend object
+    MockImportBackendInitialiser initialiser; // Register the Mock Backend
+    auto backendObjPtr = CreateBackendObject(MockImportBackendId());
+    BOOST_TEST((backendObjPtr != nullptr));
+
+    BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
+    if (backendIds.find("MockRef") == backendIds.end())
+    {
+        std::string message = "Cannot load MockRef";
+        BOOST_FAIL(message);
+    }
+
+    // Create runtime in which test will run and allow fallback to CpuRef.
+    IRuntime::CreationOptions options;
+    IRuntimePtr runtime(IRuntime::Create(options));
+
+    // Builds up the structure of the network.
+    INetworkPtr net(INetwork::Create());
+
+    IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
+    IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
+    IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
+    IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+    IConnectableLayer* add = net->AddAdditionLayer("add");
+    IConnectableLayer* output = net->AddOutputLayer(0, "output");
+
+    input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
+    input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
+    input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+    sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+    add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
+
+    input0->GetOutputSlot(0).SetTensorInfo(info);
+    input1->GetOutputSlot(0).SetTensorInfo(info);
+    input2->GetOutputSlot(0).SetTensorInfo(info);
+    sub->GetOutputSlot(0).SetTensorInfo(info);
+    add->GetOutputSlot(0).SetTensorInfo(info);
+
+    // optimize the network
+    std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
+    IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+    OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
+    Graph& graph = optNetObjPtr->GetGraph();
+
+    armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
+    armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
+    armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
+    armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
+    armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
+    armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
+    armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
+
+    // Checks order is valid.
+    BOOST_TEST(CheckOrder(graph, layer0, layer1));
+    BOOST_TEST(CheckOrder(graph, layer1, layer2));
+    BOOST_TEST(CheckOrder(graph, layer2, layer3));
+    BOOST_TEST(CheckOrder(graph, layer3, layer4));
+    BOOST_TEST(CheckOrder(graph, layer4, layer5));
+    BOOST_TEST(CheckOrder(graph, layer5, layer6));
+
+    // Load it into the runtime. It should pass.
+    NetworkId netId;
+    std::string ignoredErrorMessage;
+    INetworkProperties networkProperties(true, true);
+
+    runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
+
+    // Creates structures for input & output
+    std::vector<float> inputData0
+    {
+        1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
+    };
+    std::vector<float> inputData1
+    {
+        0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
+    };
+    std::vector<float> inputData2
+    {
+        12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
+    };
+
+    std::vector<float> outputData(12);
+
+    std::vector<float> expectedOutput
+    {
+        13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
+    };
+
+    InputTensors inputTensors
+    {
+        { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
+        { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
+        { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
+    };
+    OutputTensors outputTensors
+    {
+        { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
+    };
+
+    runtime->GetProfiler(netId)->EnableProfiling(true);
+
+    // Do the inference
+    runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+    // Retrieve the Profiler.Print() output to get the workload execution
+    ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+    std::stringstream ss;
+    profilerManager.GetProfiler()->Print(ss);;
+    std::string dump = ss.str();
+
+    // Contains ImportMemGeneric
+    std::size_t found = dump.find("ImportMemGeneric");
+    BOOST_TEST(found != std::string::npos);
+
+    // Contains SyncMemGeneric
+    found = dump.find("SyncMemGeneric");
+    BOOST_TEST(found != std::string::npos);
+
+    // Does not contain CopyMemGeneric
+    found = dump.find("CopyMemGeneric");
+    BOOST_TEST(found == std::string::npos);
+
+    // Use memory import between backends
+    BOOST_TEST((layer4->GetType() == LayerType::MemImport));
+
+    // Check output is as expected
+    BOOST_TEST(outputData == expectedOutput);
+}
+
+BOOST_AUTO_TEST_CASE(FallbackPaddingCopyFromCpuAcc)
+{
+    using namespace armnn;
+
+    // Create a mock backend object
+    MockImportBackendInitialiser initialiser; // Register the Mock Backend
+    auto backendObjPtr = CreateBackendObject(MockImportBackendId());
+    BOOST_TEST((backendObjPtr != nullptr));
+
+    BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
+    if (backendIds.find("MockRef") == backendIds.end())
+    {
+        std::string message = "Cannot load MockRef";
+        BOOST_FAIL(message);
+    }
+
+    // Create runtime in which test will run and allow fallback to CpuRef.
+    IRuntime::CreationOptions options;
+    IRuntimePtr runtime(IRuntime::Create(options));
+
+    // Builds up the structure of the network.
+    INetworkPtr net(INetwork::Create());
+
+    Pooling2dDescriptor desc;
+
+    IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
+    IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
+    IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
+    IConnectableLayer* add = net->AddAdditionLayer("add");
+    IConnectableLayer* output = net->AddOutputLayer(0, "output");
+
+    input0->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
+    input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+    pooling->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+    add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    TensorInfo inputInfo = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
+    TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
+
+    input0->GetOutputSlot(0).SetTensorInfo(inputInfo);
+    input1->GetOutputSlot(0).SetTensorInfo(poolingInfo);
+    pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
+    add->GetOutputSlot(0).SetTensorInfo(poolingInfo);
+
+    // optimize the network
+    std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
+    IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+    OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
+    Graph& graph = optNetObjPtr->GetGraph();
+
+    armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
+    armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
+    armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "pooling");
+    armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ pooling (0) -> add (0) ]");
+    armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "add");
+    armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
+
+    // Checks order is valid.
+    BOOST_TEST(CheckOrder(graph, layer0, layer1));
+    BOOST_TEST(CheckOrder(graph, layer1, layer2));
+    BOOST_TEST(CheckOrder(graph, layer2, layer3));
+    BOOST_TEST(CheckOrder(graph, layer3, layer4));
+    BOOST_TEST(CheckOrder(graph, layer4, layer5));
+
+    // Load it into the runtime. It should pass.
+    NetworkId netId;
+    std::string ignoredErrorMessage;
+    INetworkProperties networkProperties(true, true);
+
+    runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
+
+    // Creates structures for input & output
+    std::vector<float> inputData0
+    {
+        1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f
+    };
+    std::vector<float> inputData1
+    {
+        -1.0f, 3.0f
+    };
+
+    std::vector<float> outputData(2);
+
+    std::vector<float> expectedOutput
+    {
+        5.0f, 15.0f
+    };
+
+    InputTensors inputTensors
+    {
+        { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
+        { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }
+    };
+    OutputTensors outputTensors
+    {
+        { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
+    };
+
+    runtime->GetProfiler(netId)->EnableProfiling(true);
+
+    // Do the inference
+    runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+    // Retrieve the Profiler.Print() output to get the workload execution
+    ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+    std::stringstream ss;
+    profilerManager.GetProfiler()->Print(ss);;
+    std::string dump = ss.str();
+
+    // Contains CopyMemGeneric between the backends
+    std::size_t found = dump.find("CopyMemGeneric");
+    BOOST_TEST(found != std::string::npos);
+
+    // Contains SyncMemGeneric for the output
+    found = dump.find("SyncMemGeneric");
+    BOOST_TEST(found != std::string::npos);
+
+    // Does not contain ImportMemGeneric
+    found = dump.find("ImportMemGeneric");
+    BOOST_TEST(found == std::string::npos);
+
+    // Use memory import between backends
+    BOOST_TEST((layer3->GetType() == LayerType::MemCopy));
+
+    // Check output is as expected
+    BOOST_TEST(outputData == expectedOutput);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
index 8b3e3fd..c6a562f 100644 (file)
@@ -12,6 +12,7 @@
 #include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <test/GraphUtils.hpp>
+#include <arm_compute/runtime/Allocator.h>
 
 #include <boost/test/unit_test.hpp>
 
@@ -160,4 +161,77 @@ BOOST_AUTO_TEST_CASE(ConcatOnXorYSubTensorsNoPaddinRequiredTest)
     }
 }
 
+BOOST_AUTO_TEST_CASE(NeonTensorHandleFactoryMemoryManaged)
+{
+    std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>(
+        std::make_unique<arm_compute::Allocator>(),
+        BaseMemoryManager::MemoryAffinity::Offset);
+    NeonTensorHandleFactory handleFactory(memoryManager);
+    TensorInfo info({ 1, 1, 2, 1 }, DataType::Float32);
+
+    // create TensorHandle with memory managed
+    auto handle = handleFactory.CreateTensorHandle(info, true);
+    handle->Manage();
+    handle->Allocate();
+
+    memoryManager->Acquire();
+    {
+        float* buffer = reinterpret_cast<float*>(handle->Map());
+        BOOST_CHECK(buffer != nullptr); // Yields a valid pointer
+        buffer[0] = 1.5f;
+        buffer[1] = 2.5f;
+        BOOST_CHECK(buffer[0] == 1.5f); // Memory is writable and readable
+        BOOST_CHECK(buffer[1] == 2.5f); // Memory is writable and readable
+    }
+    memoryManager->Release();
+
+    memoryManager->Acquire();
+    {
+        float* buffer = reinterpret_cast<float*>(handle->Map());
+        BOOST_CHECK(buffer != nullptr); // Yields a valid pointer
+        buffer[0] = 3.5f;
+        buffer[1] = 4.5f;
+        BOOST_CHECK(buffer[0] == 3.5f); // Memory is writable and readable
+        BOOST_CHECK(buffer[1] == 4.5f); // Memory is writable and readable
+    }
+    memoryManager->Release();
+
+    float testPtr[2] = { 2.5f, 5.5f };
+    // Cannot import as import is disabled
+    BOOST_CHECK(!handle->Import(static_cast<void*>(testPtr), MemorySource::Malloc));
+}
+
+BOOST_AUTO_TEST_CASE(NeonTensorHandleFactoryImport)
+{
+    std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>(
+        std::make_unique<arm_compute::Allocator>(),
+        BaseMemoryManager::MemoryAffinity::Offset);
+    NeonTensorHandleFactory handleFactory(memoryManager);
+    TensorInfo info({ 1, 1, 2, 1 }, DataType::Float32);
+
+    // create TensorHandle without memory managed
+    auto handle = handleFactory.CreateTensorHandle(info, false);
+    handle->Manage();
+    handle->Allocate();
+    memoryManager->Acquire();
+
+    // No buffer allocated when import is enabled
+    BOOST_CHECK((PolymorphicDowncast<NeonTensorHandle*>(handle.get()))->GetTensor().buffer() == nullptr);
+
+    float testPtr[2] = { 2.5f, 5.5f };
+    // Correctly import
+    BOOST_CHECK(handle->Import(static_cast<void*>(testPtr), MemorySource::Malloc));
+    float* buffer = reinterpret_cast<float*>(handle->Map());
+    BOOST_CHECK(buffer != nullptr); // Yields a valid pointer after import
+    BOOST_CHECK(buffer == testPtr); // buffer is pointing to testPtr
+    // Memory is writable and readable with correct value
+    BOOST_CHECK(buffer[0] == 2.5f);
+    BOOST_CHECK(buffer[1] == 5.5f);
+    buffer[0] = 3.5f;
+    buffer[1] = 10.0f;
+    BOOST_CHECK(buffer[0] == 3.5f);
+    BOOST_CHECK(buffer[1] == 10.0f);
+    memoryManager->Release();
+}
+
 BOOST_AUTO_TEST_SUITE_END()