From 4f1e8e47e29d42ed5862cccb29cf183853c4a86c Mon Sep 17 00:00:00 2001 From: David Monahan Date: Wed, 4 Sep 2019 09:22:10 +0100 Subject: [PATCH] IVGCVSW-3687 Add INetworkProperties to LoadNetwork * Allows users to specify if Import/Export should be used Signed-off-by: David Monahan Change-Id: I64da26a6acbeb91ef72d31b6ccc01bb1447f624d --- include/armnn/Exceptions.hpp | 10 +++ include/armnn/IRuntime.hpp | 21 ++++- src/armnn/LoadedNetwork.cpp | 50 +++++++---- src/armnn/LoadedNetwork.hpp | 7 +- src/armnn/Runtime.cpp | 14 ++- src/armnn/Runtime.hpp | 7 +- .../backendsCommon/test/EndToEndTestImpl.hpp | 99 +++++++++++++++++----- src/backends/reference/test/RefEndToEndTests.cpp | 7 +- 8 files changed, 167 insertions(+), 48 deletions(-) diff --git a/include/armnn/Exceptions.hpp b/include/armnn/Exceptions.hpp index 1740a8c..f8e0b43 100644 --- a/include/armnn/Exceptions.hpp +++ b/include/armnn/Exceptions.hpp @@ -115,6 +115,16 @@ class RuntimeException : public Exception using Exception::Exception; }; +class MemoryImportException : public Exception +{ + using Exception::Exception; +}; + +class MemoryExportException : public Exception +{ + using Exception::Exception; +}; + template void ConditionalThrow(bool condition, const std::string& message) { diff --git a/include/armnn/IRuntime.hpp b/include/armnn/IRuntime.hpp index 41e1c47..68965cf 100644 --- a/include/armnn/IRuntime.hpp +++ b/include/armnn/IRuntime.hpp @@ -23,6 +23,18 @@ class IGpuAccTunedParameters; class IRuntime; using IRuntimePtr = std::unique_ptr; +struct INetworkProperties +{ + INetworkProperties(bool importEnabled = false, bool exportEnabled = false) + : m_ImportEnabled(importEnabled), + m_ExportEnabled(exportEnabled) {} + + const bool m_ImportEnabled; + const bool m_ExportEnabled; + + virtual ~INetworkProperties() {} +}; + class IRuntime { public: @@ -82,7 +94,12 @@ public: /// @return armnn::Status virtual Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network, - std::string & errorMessage) = 0; + std::string& errorMessage) = 0; + + virtual Status LoadNetwork(NetworkId& networkIdOut, + IOptimizedNetworkPtr network, + std::string& errorMessage, + const INetworkProperties& networkProperties) = 0; virtual TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const = 0; virtual TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const = 0; @@ -163,4 +180,4 @@ protected: virtual ~IGpuAccTunedParameters() {}; }; -} +} // namespace armnn diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp index 5b64085..1000ece 100644 --- a/src/armnn/LoadedNetwork.cpp +++ b/src/armnn/LoadedNetwork.cpp @@ -41,7 +41,8 @@ std::string ToErrorMessage(const char * prefix, const ExceptionType & error) } // anonymous std::unique_ptr LoadedNetwork::MakeLoadedNetwork(std::unique_ptr net, - std::string & errorMessage) + std::string& errorMessage, + const INetworkProperties& networkProperties) { std::unique_ptr loadedNetwork; @@ -55,7 +56,7 @@ std::unique_ptr LoadedNetwork::MakeLoadedNetwork(std::unique_ptr< try { - loadedNetwork.reset(new LoadedNetwork(std::move(net))); + loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties)); } catch (const armnn::RuntimeException& error) { @@ -73,8 +74,11 @@ std::unique_ptr LoadedNetwork::MakeLoadedNetwork(std::unique_ptr< return loadedNetwork; } -LoadedNetwork::LoadedNetwork(std::unique_ptr net) - : m_OptimizedNetwork(std::move(net)) +LoadedNetwork::LoadedNetwork(std::unique_ptr net, + const INetworkProperties& networkProperties) : + m_OptimizedNetwork(std::move(net)), + m_IsImportEnabled(networkProperties.m_ImportEnabled), + m_IsExportEnabled(networkProperties.m_ExportEnabled) { // Create a profiler and register it for the current thread. m_Profiler = std::make_shared(); @@ -392,7 +396,7 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tens info.m_OutputTensorInfos.push_back(outputTensorInfo); MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags(); - if (CheckFlag(importFlags, MemorySource::Malloc)) // Try import the input tensor + if (CheckFlag(importFlags, MemorySource::Malloc) && m_IsImportEnabled) // Try import the input tensor { // This assumes a CPU Tensor handle void* mem = tensorHandle->Map(false); @@ -402,13 +406,16 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tens return; // No need for a workload since the import has been done. } tensorHandle->Unmap(); + throw MemoryImportException("EnqueueInput: Memory Import failed"); } + else + { + // Create a mem copy workload for input since we did not import + auto inputWorkload = std::make_unique(inputQueueDescriptor, info); - // Create a mem copy workload for input since we could not import - auto inputWorkload = std::make_unique(inputQueueDescriptor, info); - - BOOST_ASSERT_MSG(inputWorkload, "No input workload created"); - m_InputQueue.push_back(move(inputWorkload)); + BOOST_ASSERT_MSG(inputWorkload, "No input workload created"); + m_InputQueue.push_back(move(inputWorkload)); + } } void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo) @@ -444,7 +451,8 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten // b) The tensor has zero padding // c) There is only one connection to the OutputSlot and it is to an OutputLayer. // d) The output pointer is allocated via malloc. (Other types will be supported in a later release) - if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input) + if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input + && m_IsExportEnabled) { if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1) { @@ -467,17 +475,23 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten return; //No need to add the output workload below } + else + { + throw MemoryExportException("EnqueueOutput: Memory Export failed"); + } } } } + else + { + // If we got here then we couldn't import the memory, so add an output workload which performs a memcopy. + outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle); + info.m_InputTensorInfos.push_back(inputTensorInfo); - // If we got here then we couldn't import the memory, so add an output workload which performs a memcopy. - outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle); - info.m_InputTensorInfos.push_back(inputTensorInfo); - - auto outputWorkload = std::make_unique(outputQueueDescriptor, info); - BOOST_ASSERT_MSG(outputWorkload, "No output workload created"); - m_OutputQueue.push_back(move(outputWorkload)); + auto outputWorkload = std::make_unique(outputQueueDescriptor, info); + BOOST_ASSERT_MSG(outputWorkload, "No output workload created"); + m_OutputQueue.push_back(move(outputWorkload)); + } } void LoadedNetwork::AllocateWorkingMemory() diff --git a/src/armnn/LoadedNetwork.hpp b/src/armnn/LoadedNetwork.hpp index 808a932..08c09b8 100644 --- a/src/armnn/LoadedNetwork.hpp +++ b/src/armnn/LoadedNetwork.hpp @@ -41,7 +41,8 @@ public: Status EnqueueWorkload(const InputTensors& inputTensors, const OutputTensors& outputTensors); static std::unique_ptr MakeLoadedNetwork(std::unique_ptr net, - std::string & errorMessage); + std::string & errorMessage, + const INetworkProperties& networkProperties); // NOTE we return by reference as the purpose of this method is only to provide // access to the private m_Profiler and in theory we should not need to increment @@ -55,7 +56,7 @@ public: private: void AllocateWorkingMemory(); - LoadedNetwork(std::unique_ptr net); + LoadedNetwork(std::unique_ptr net, const INetworkProperties& networkProperties); void EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo); @@ -84,6 +85,8 @@ private: mutable std::mutex m_WorkingMemMutex; bool m_IsWorkingMemAllocated=false; + bool m_IsImportEnabled=false; + bool m_IsExportEnabled=false; TensorHandleFactoryRegistry m_TensorHandleFactoryRegistry; }; diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp index 9e87484..e478356 100644 --- a/src/armnn/Runtime.cpp +++ b/src/armnn/Runtime.cpp @@ -49,7 +49,16 @@ Status Runtime::LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr inNetw Status Runtime::LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr inNetwork, - std::string & errorMessage) + std::string& errorMessage) +{ + INetworkProperties networkProperties; + return LoadNetwork(networkIdOut, std::move(inNetwork), errorMessage, networkProperties); +} + +Status Runtime::LoadNetwork(NetworkId& networkIdOut, + IOptimizedNetworkPtr inNetwork, + std::string& errorMessage, + const INetworkProperties& networkProperties) { IOptimizedNetwork* rawNetwork = inNetwork.release(); @@ -62,7 +71,8 @@ Status Runtime::LoadNetwork(NetworkId& networkIdOut, unique_ptr loadedNetwork = LoadedNetwork::MakeLoadedNetwork( std::unique_ptr(boost::polymorphic_downcast(rawNetwork)), - errorMessage); + errorMessage, + networkProperties); if (!loadedNetwork) { diff --git a/src/armnn/Runtime.hpp b/src/armnn/Runtime.hpp index 35684f1..a028c87 100644 --- a/src/armnn/Runtime.hpp +++ b/src/armnn/Runtime.hpp @@ -38,7 +38,12 @@ public: /// @return armnn::Status virtual Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network, - std::string & errorMessage) override; + std::string& errorMessage) override; + + virtual Status LoadNetwork(NetworkId& networkIdOut, + IOptimizedNetworkPtr network, + std::string& errorMessage, + const INetworkProperties& networkProperties) override; virtual TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const override; virtual TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const override; diff --git a/src/backends/backendsCommon/test/EndToEndTestImpl.hpp b/src/backends/backendsCommon/test/EndToEndTestImpl.hpp index 040782b..ecc8806 100644 --- a/src/backends/backendsCommon/test/EndToEndTestImpl.hpp +++ b/src/backends/backendsCommon/test/EndToEndTestImpl.hpp @@ -172,7 +172,7 @@ void EndToEndLayerTestImpl(INetworkPtr network, } } -inline void ImportNonAlignedPointerTest(std::vector backends) +inline void ImportNonAlignedInputPointerTest(std::vector backends) { using namespace armnn; @@ -201,7 +201,10 @@ inline void ImportNonAlignedPointerTest(std::vector backends) // Loads it into the runtime. NetworkId netId; - runtime->LoadNetwork(netId, std::move(optNet)); + std::string ignoredErrorMessage; + // Enable Importing + INetworkProperties networkProperties(true, true); + runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); // Creates structures for input & output std::vector inputData @@ -214,8 +217,8 @@ inline void ImportNonAlignedPointerTest(std::vector backends) std::vector outputData(5); - // Misaligned output - float* misalignedOutputData = reinterpret_cast(reinterpret_cast(outputData.data()) + 1); + // Aligned output + float * alignedOutputData = outputData.data(); InputTensors inputTensors { @@ -223,31 +226,80 @@ inline void ImportNonAlignedPointerTest(std::vector backends) }; OutputTensors outputTensors { - {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)} + {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)} }; // The result of the inference is not important, just the fact that there // should not be CopyMemGeneric workloads. runtime->GetProfiler(netId)->EnableProfiling(true); - // Do the inference - runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + // Do the inference and expect it to fail with a ImportMemoryException + BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException); +} - // Retrieve the Profiler.Print() output to get the workload execution - ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); - std::stringstream ss; - profilerManager.GetProfiler()->Print(ss);; - std::string dump = ss.str(); +inline void ImportNonAlignedOutputPointerTest(std::vector backends) +{ + using namespace armnn; - // Contains RefNormalizationWorkload - std::size_t found = dump.find("RefNormalizationWorkload"); - BOOST_TEST(found != std::string::npos); - // No Contains SyncMemGeneric (Created when importing the output tensor handle) - found = dump.find("SyncMemGeneric"); - BOOST_TEST(found == std::string::npos); - // Contains CopyMemGeneric - found = dump.find("CopyMemGeneric"); - BOOST_TEST(found != std::string::npos); + // Create runtime in which test will run + IRuntime::CreationOptions options; + IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + // build up the structure of the network + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0); + + NormalizationDescriptor descriptor; + IConnectableLayer* norm = net->AddNormalizationLayer(descriptor); + + IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(norm->GetInputSlot(0)); + norm->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32)); + norm->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32)); + + // Optimize the network + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + + // Loads it into the runtime. + NetworkId netId; + std::string ignoredErrorMessage; + // Enable Importing + INetworkProperties networkProperties(true, true); + runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); + + // Creates structures for input & output + std::vector inputData + { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f + }; + + // Aligned input + float * alignedInputData = inputData.data(); + + std::vector outputData(5); + + // Misaligned output + float* misalignedOutputData = reinterpret_cast(reinterpret_cast(outputData.data()) + 1); + + InputTensors inputTensors + { + {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)}, + }; + OutputTensors outputTensors + { + {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)} + }; + + // The result of the inference is not important, just the fact that there + // should not be CopyMemGeneric workloads. + runtime->GetProfiler(netId)->EnableProfiling(true); + + // Do the inference and expect it to fail with a ImportMemoryException + BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException); } inline void ImportAlignedPointerTest(std::vector backends) @@ -279,7 +331,10 @@ inline void ImportAlignedPointerTest(std::vector backends) // Loads it into the runtime. NetworkId netId; - runtime->LoadNetwork(netId, std::move(optNet)); + std::string ignoredErrorMessage; + // Enable Importing + INetworkProperties networkProperties(true, true); + runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); // Creates structures for input & output std::vector inputData diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp index 6024f15..52454a2 100644 --- a/src/backends/reference/test/RefEndToEndTests.cpp +++ b/src/backends/reference/test/RefEndToEndTests.cpp @@ -973,7 +973,12 @@ BOOST_AUTO_TEST_CASE(RefResizeNearestNeighborEndToEndInt16NhwcTest) // Only run these tests on non Android platforms BOOST_AUTO_TEST_CASE(RefImportNonAlignedPointerTest) { - ImportNonAlignedPointerTest(defaultBackends); + ImportNonAlignedInputPointerTest(defaultBackends); +} + +BOOST_AUTO_TEST_CASE(RefExportNonAlignedPointerTest) +{ + ImportNonAlignedOutputPointerTest(defaultBackends); } BOOST_AUTO_TEST_CASE(RefImportAlignedPointerTest) -- 2.7.4