auto backend = backendIt->second.get();
auto tensorHandleFactoryIds = backend->GetHandleFactoryPreferences();
bool found = false;
- boost::ignore_unused(found);
for (auto preference : tensorHandleFactoryIds)
{
{
auto srcPref = srcOutputSlot.GetTensorHandleFactoryId();
auto srcFactory = registry.GetFactory(srcPref);
+
if (srcFactory)
{
bool canExportImport =
- (factory->GetImportFlags() & srcFactory->GetExportFlags()) != 0;
+ (factory->GetImportFlags() & srcFactory->GetExportFlags()) != 0;
+
if (factory->SupportsMapUnmap() || canExportImport)
{
compOutputSlot.SetTensorHandleFactory(preference);
// b) The tensor has zero padding
// c) There is only one connection to the OutputSlot and it is to an OutputLayer.
// d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
- if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1)
+ if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
{
- MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
- if (CheckFlag(importFlags, MemorySource::Malloc))
+ if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1)
{
- void* mem = tensorHandle->Map(false);
- bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc);
- tensorHandle->Unmap();
-
- if (importOk)
+ MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
+ if (CheckFlag(importFlags, MemorySource::Malloc))
{
- // Insert synchronization workload
- MemSyncQueueDescriptor syncDesc;
- syncDesc.m_Inputs.push_back(inputTensorHandle);
- info.m_InputTensorInfos.push_back(inputTensorInfo);
- auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
- BOOST_ASSERT_MSG(syncWorkload, "No sync workload created");
- m_OutputQueue.push_back(move(syncWorkload));
-
- return; //No need to add the output workload below
+ void *mem = tensorHandle->Map(false);
+ bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc);
+ tensorHandle->Unmap();
+
+ if (importOk)
+ {
+ // Insert synchronization workload
+ MemSyncQueueDescriptor syncDesc;
+ syncDesc.m_Inputs.push_back(inputTensorHandle);
+ info.m_InputTensorInfos.push_back(inputTensorInfo);
+ auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
+ BOOST_ASSERT_MSG(syncWorkload, "No sync workload created");
+ m_OutputQueue.push_back(move(syncWorkload));
+
+ return; //No need to add the output workload below
+ }
}
}
}
return nullptr;
}
- const FactoryId GetId() const override { return m_Id; }
+ const FactoryId& GetId() const override { return m_Id; }
bool SupportsSubTensors() const override { return true; }
return nullptr;
}
- const FactoryId GetId() const override { return m_Id; }
+ const FactoryId& GetId() const override { return m_Id; }
bool SupportsSubTensors() const override { return true; }
virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
DataLayout dataLayout) const = 0;
- virtual const FactoryId GetId() const = 0;
+ virtual const FactoryId& GetId() const = 0;
virtual bool SupportsSubTensors() const = 0;
return s_Id;
}
-const FactoryId ClTensorHandleFactory::GetId() const
+const FactoryId& ClTensorHandleFactory::GetId() const
{
return GetIdStatic();
}
static const FactoryId& GetIdStatic();
- const FactoryId GetId() const override;
+ const FactoryId& GetId() const override;
bool SupportsSubTensors() const override;
return s_Id;
}
-const FactoryId NeonTensorHandleFactory::GetId() const
+const FactoryId& NeonTensorHandleFactory::GetId() const
{
return GetIdStatic();
}
return m_ImportFlags;
}
-} // namespace armnn
\ No newline at end of file
+} // namespace armnn
static const FactoryId& GetIdStatic();
- const FactoryId GetId() const override;
+ const FactoryId& GetId() const override;
bool SupportsSubTensors() const override;
RefRegistryInitializer.cpp
RefWorkloadFactory.cpp
RefWorkloadFactory.hpp
+ RefTensorHandleFactory.hpp
+ RefTensorHandleFactory.cpp
)
add_library(armnnRefBackend OBJECT ${armnnRefBackend_sources})
#include "RefBackendId.hpp"
#include "RefWorkloadFactory.hpp"
#include "RefLayerSupport.hpp"
+#include "RefTensorHandleFactory.hpp"
#include <backendsCommon/IBackendContext.hpp>
#include <backendsCommon/IMemoryManager.hpp>
return std::make_unique<RefWorkloadFactory>(boost::polymorphic_pointer_downcast<RefMemoryManager>(memoryManager));
}
+IBackendInternal::IWorkloadFactoryPtr RefBackend::CreateWorkloadFactory(
+ class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const
+{
+ auto memoryManager = std::make_shared<RefMemoryManager>();
+
+ tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
+
+ return std::make_unique<RefWorkloadFactory>(boost::polymorphic_pointer_downcast<RefMemoryManager>(memoryManager));
+}
+
IBackendInternal::IBackendContextPtr RefBackend::CreateBackendContext(const IRuntime::CreationOptions&) const
{
return IBackendContextPtr{};
return optimizationViews;
}
+std::vector<ITensorHandleFactory::FactoryId> RefBackend::GetHandleFactoryPreferences() const
+{
+ return std::vector<ITensorHandleFactory::FactoryId> { RefTensorHandleFactory::GetIdStatic() };
+}
+
+void RefBackend::RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry)
+{
+ auto memoryManager = std::make_shared<RefMemoryManager>();
+
+ registry.RegisterMemoryManager(memoryManager);
+ registry.RegisterFactory(std::make_unique<RefTensorHandleFactory>(memoryManager));
+}
+
} // namespace armnn
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(
const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override;
+ IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(
+ class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const override;
+
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override;
IBackendInternal::Optimizations GetOptimizations() const override;
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override;
OptimizationViews OptimizeSubgraphView(const SubgraphView& subgraph) const override;
+
+ std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override;
+
+ void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry) override;
};
} // namespace armnn
m_TensorInfo(tensorInfo),
m_MemoryManager(memoryManager),
m_Pool(nullptr),
- m_UnmanagedMemory(nullptr)
+ m_UnmanagedMemory(nullptr),
+ m_ImportFlags(static_cast<MemorySourceFlags>(MemorySource::Undefined)),
+ m_Imported(false)
+{
+
+}
+
+RefTensorHandle::RefTensorHandle(const TensorInfo& tensorInfo, std::shared_ptr<RefMemoryManager> &memoryManager,
+ MemorySourceFlags importFlags)
+ : m_TensorInfo(tensorInfo),
+ m_MemoryManager(memoryManager),
+ m_Pool(nullptr),
+ m_UnmanagedMemory(nullptr),
+ m_ImportFlags(importFlags),
+ m_Imported(false)
{
}
memcpy(dest, src, m_TensorInfo.GetNumBytes());
}
+bool RefTensorHandle::Import(void* memory, MemorySource source)
+{
+
+ if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
+ {
+ if (source == MemorySource::Malloc)
+ {
+ // Checks the 16 byte memory alignment.
+ if (reinterpret_cast<uint64_t>(memory) % 16)
+ {
+ return false;
+ }
+
+ // m_UnmanagedMemory not yet allocated.
+ if (!m_Imported && !m_UnmanagedMemory)
+ {
+ m_UnmanagedMemory = memory;
+ m_Imported = true;
+ return true;
+ }
+
+ // m_UnmanagedMemory initially allocated with Allocate().
+ if (!m_Imported && m_UnmanagedMemory)
+ {
+ return false;
+ }
+
+ // m_UnmanagedMemory previously imported.
+ if (m_Imported)
+ {
+ m_UnmanagedMemory = memory;
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
}
public:
RefTensorHandle(const TensorInfo& tensorInfo, std::shared_ptr<RefMemoryManager> &memoryManager);
+ RefTensorHandle(const TensorInfo& tensorInfo, std::shared_ptr<RefMemoryManager> &memoryManager,
+ MemorySourceFlags importFlags);
+
~RefTensorHandle();
virtual void Manage() override;
return m_TensorInfo;
}
+ virtual MemorySourceFlags GetImportFlags() const override
+ {
+ return m_ImportFlags;
+ }
+
+ virtual bool Import(void* memory, MemorySource source) override;
+
private:
// Only used for testing
void CopyOutTo(void*) const override;
std::shared_ptr<RefMemoryManager> m_MemoryManager;
RefMemoryManager::Pool* m_Pool;
mutable void *m_UnmanagedMemory;
+ MemorySourceFlags m_ImportFlags;
+ bool m_Imported;
};
}
--- /dev/null
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefTensorHandleFactory.hpp"
+#include "RefTensorHandle.hpp"
+
+#include <boost/core/ignore_unused.hpp>
+
+namespace armnn
+{
+
+using FactoryId = ITensorHandleFactory::FactoryId;
+
+const FactoryId& RefTensorHandleFactory::GetIdStatic()
+{
+ static const FactoryId s_Id(RefTensorHandleFactoryId());
+ return s_Id;
+}
+
+std::unique_ptr<ITensorHandle> RefTensorHandleFactory::CreateSubTensorHandle(ITensorHandle& parent,
+ TensorShape const& subTensorShape,
+ unsigned int const* subTensorOrigin) const
+{
+ boost::ignore_unused(parent, subTensorShape, subTensorOrigin);
+ return nullptr;
+}
+
+std::unique_ptr<ITensorHandle> RefTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
+{
+ return std::make_unique<RefTensorHandle>(tensorInfo, m_MemoryManager, m_ImportFlags);
+}
+
+std::unique_ptr<ITensorHandle> RefTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
+ DataLayout dataLayout) const
+{
+ boost::ignore_unused(dataLayout);
+ return std::make_unique<RefTensorHandle>(tensorInfo, m_MemoryManager, m_ImportFlags);
+}
+
+const FactoryId& RefTensorHandleFactory::GetId() const
+{
+ return GetIdStatic();
+}
+
+bool RefTensorHandleFactory::SupportsSubTensors() const
+{
+ return false;
+}
+
+MemorySourceFlags RefTensorHandleFactory::GetExportFlags() const
+{
+ return m_ExportFlags;
+}
+
+MemorySourceFlags RefTensorHandleFactory::GetImportFlags() const
+{
+ return m_ImportFlags;
+}
+
+} // namespace armnn
\ No newline at end of file
--- /dev/null
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "RefMemoryManager.hpp"
+
+#include <backendsCommon/ITensorHandleFactory.hpp>
+
+namespace armnn
+{
+
+constexpr const char * RefTensorHandleFactoryId() { return "Arm/Ref/TensorHandleFactory"; }
+
+class RefTensorHandleFactory : public ITensorHandleFactory
+{
+
+public:
+ RefTensorHandleFactory(std::shared_ptr<RefMemoryManager> mgr)
+ : m_MemoryManager(mgr),
+ m_ImportFlags(static_cast<MemorySourceFlags>(MemorySource::Malloc)),
+ m_ExportFlags(static_cast<MemorySourceFlags>(MemorySource::Malloc))
+ {}
+
+ std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
+ TensorShape const& subTensorShape,
+ unsigned int const* subTensorOrigin) const override;
+
+ std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override;
+
+ std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+ DataLayout dataLayout) const override;
+
+ static const FactoryId& GetIdStatic();
+
+ const FactoryId& GetId() const override;
+
+ bool SupportsSubTensors() const override;
+
+ MemorySourceFlags GetExportFlags() const override;
+
+ MemorySourceFlags GetImportFlags() const override;
+
+private:
+ mutable std::shared_ptr<RefMemoryManager> m_MemoryManager;
+ MemorySourceFlags m_ImportFlags;
+ MemorySourceFlags m_ExportFlags;
+
+};
+
+} // namespace armnn
+
RefTensorHandle.cpp \
RefWorkloadFactory.cpp \
RefRegistryInitializer.cpp \
+ RefTensorHandleFactory.cpp \
workloads/Activation.cpp \
workloads/BatchNormImpl.cpp \
workloads/BatchToSpaceNd.cpp \
BOOST_TEST(outputData[3] == 2);
}
+BOOST_AUTO_TEST_CASE(RefNoCopyWorkloads)
+{
+ using namespace armnn;
+
+ // Create runtime in which test will run
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ // build up the structure of the network
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input = net->AddInputLayer(0);
+
+ NormalizationDescriptor descriptor;
+ IConnectableLayer* norm = net->AddNormalizationLayer(descriptor);
+
+ IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(norm->GetInputSlot(0));
+ norm->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
+ norm->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
+
+ // Optimize the network
+ IOptimizedNetworkPtr optNet = Optimize(*net, defaultBackends, runtime->GetDeviceSpec());
+
+ // Loads it into the runtime.
+ NetworkId netId;
+ runtime->LoadNetwork(netId, std::move(optNet));
+
+ // Creates structures for input & output
+ std::vector<float> inputData
+ {
+ 1.0f, 2.0f, 3.0f, 4.0f
+ };
+
+ std::vector<float> outputData(4);
+
+ InputTensors inputTensors
+ {
+ {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
+ };
+ OutputTensors outputTensors
+ {
+ {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
+ };
+
+ // The result of the inference is not important, just the fact that there
+ // should not be CopyMemGeneric workloads.
+ runtime->GetProfiler(netId)->EnableProfiling(true);
+
+ // Do the inference
+ runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+ // Retrieve the Profiler.Print() output to get the workload execution
+ ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+ std::stringstream ss;
+ profilerManager.GetProfiler()->Print(ss);;
+ std::string dump = ss.str();
+
+ // Contains RefNormalizationWorkload
+ std::size_t found = dump.find("RefNormalizationWorkload");
+ BOOST_TEST(found != std::string::npos);
+ // Contains SyncMemGeneric
+ found = dump.find("SyncMemGeneric");
+ BOOST_TEST(found != std::string::npos);
+ // No contains CopyMemGeneric
+ found = dump.find("CopyMemGeneric");
+ BOOST_TEST(found == std::string::npos);
+}
+
BOOST_AUTO_TEST_CASE(RefEqualSimpleEndToEndTest)
{
const std::vector<uint8_t> expectedOutput({ 1, 1, 1, 1, 0, 0, 0, 0,
memoryManager->Release();
}
+BOOST_AUTO_TEST_CASE(CheckSourceType)
+{
+ std::shared_ptr<RefMemoryManager> memoryManager = std::make_shared<RefMemoryManager>();
+
+ TensorInfo info({1}, DataType::Float32);
+ RefTensorHandle handle(info, memoryManager, static_cast<unsigned int>(MemorySource::Malloc));
+
+ // This pointer will be deleted in the handle destructor
+ int* testPtr = new int(4);
+
+ // Not supported
+ BOOST_CHECK(!handle.Import(static_cast<void *>(testPtr), MemorySource::DmaBuf));
+
+ // Not supported
+ BOOST_CHECK(!handle.Import(static_cast<void *>(testPtr), MemorySource::DmaBufProtected));
+
+ // Supported
+ BOOST_CHECK(handle.Import(static_cast<void *>(testPtr), MemorySource::Malloc));
+}
+
+BOOST_AUTO_TEST_CASE(ReusePointer)
+{
+ std::shared_ptr<RefMemoryManager> memoryManager = std::make_shared<RefMemoryManager>();
+
+ TensorInfo info({1}, DataType::Float32);
+ RefTensorHandle handle(info, memoryManager, static_cast<unsigned int>(MemorySource::Malloc));
+
+ // This pointer will be deleted in the handle destructor
+ int* testPtr = new int(4);
+
+ handle.Import(static_cast<void *>(testPtr), MemorySource::Malloc);
+
+ // Reusing previously Imported pointer
+ BOOST_CHECK(handle.Import(static_cast<void *>(testPtr), MemorySource::Malloc));
+}
+
+BOOST_AUTO_TEST_CASE(MisalignedPointer)
+{
+ std::shared_ptr<RefMemoryManager> memoryManager = std::make_shared<RefMemoryManager>();
+
+ TensorInfo info({2}, DataType::Float32);
+ RefTensorHandle handle(info, memoryManager, static_cast<unsigned int>(MemorySource::Malloc));
+
+ // Allocates a 2 int array
+ int* testPtr = new int[2];
+ int* misalignedPtr = testPtr + 1;
+
+ BOOST_CHECK(!handle.Import(static_cast<void *>(misalignedPtr), MemorySource::Malloc));
+
+ delete[] testPtr;
+}
+
BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file