25 #include <boost/polymorphic_cast.hpp> 26 #include <boost/assert.hpp> 27 #include <boost/format.hpp> 38 template <
typename ExceptionType>
39 std::string ToErrorMessage(
const char * prefix,
const ExceptionType &
error)
42 ss << prefix <<
" " << error.what();
46 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
51 std::string layerName = layer.GetNameStr().empty() ?
"<Unnamed>" : layer.GetNameStr();
52 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
56 for (
auto&& input : layer.GetInputSlots())
58 const IOutputSlot* source = input.GetConnectedOutputSlot();
59 BOOST_ASSERT(source != NULL);
61 source->GetOwningLayerGuid(),
66 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
67 std::unique_ptr<IWorkload>& workload,
72 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
73 layer.GetBackendId().Get(),
86 std::string& errorMessage,
89 std::unique_ptr<LoadedNetwork> loadedNetwork;
91 auto Fail = [&](
const std::exception&
error) -> std::unique_ptr<LoadedNetwork>
93 errorMessage = ToErrorMessage(
"An error occurred when preparing the network workloads: ", error);
96 return std::unique_ptr<LoadedNetwork>();
101 loadedNetwork.reset(
new LoadedNetwork(std::move(net), networkProperties));
111 catch (
const std::runtime_error& error)
116 return loadedNetwork;
119 LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
121 m_OptimizedNetwork(std::move(net)),
126 m_Profiler = std::make_shared<Profiler>();
134 for (
auto&& layer : order)
136 auto const& backendId = layer->GetBackendId();
137 if (m_Backends.count(backendId) == 0)
140 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
149 m_WorkloadFactories.emplace(
150 std::make_pair(backendId, std::make_pair(std::move(workloadFactory),
nullptr)));
157 m_WorkloadFactories.emplace(
158 std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager)));
163 for (
auto&& layer : order)
165 auto& workloadFactory = GetWorkloadFactory(*layer);
167 switch (layer->GetType())
172 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsImportEnabled);
179 if((layer->GetNumOutputSlots() == 1) &&
180 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
181 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() ==
LayerType::Output))
183 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsExportEnabled);
187 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
201 for (
auto&& layer : order)
206 AddLayerStructure(timelineUtils, *layer, networkGuid);
211 switch (layer->GetType())
221 auto workload = layer->CreateWorkload(workloadFactory);
225 const char*
const layerName =
226 layer->GetNameStr().length() != 0 ? layer->GetName() :
"<Unnamed>";
228 boost::format(
"No workload created for layer (name: '%1%' type: '%2%') (compute '%3%')")
229 % layerName % static_cast<int>(layer->GetType()) % layer->GetBackendId().Get()
236 AddWorkloadStructure(timelineUtils, workload, *layer);
239 m_WorkloadQueue.push_back(move(workload));
241 layer->ReleaseConstantData();
250 timelineUtils->Commit();
254 m_OptimizedNetwork->GetGraph().AllocateDynamicBuffers();
257 for (
auto& workload : m_WorkloadQueue)
259 workload->PostAllocationConfigure();
265 for (
auto&& inputLayer : m_OptimizedNetwork->GetGraph().GetInputLayers())
267 BOOST_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1,
"Input layer should have exactly 1 output slot");
268 if (inputLayer->GetBindingId() == layerId)
270 return inputLayer->GetOutputSlot(0).GetTensorInfo();
279 for (
auto&& outputLayer : m_OptimizedNetwork->GetGraph().GetOutputLayers())
281 BOOST_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1,
"Output layer should have exactly 1 input slot");
282 BOOST_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(),
"Input slot on Output layer must be connected");
283 if (outputLayer->GetBindingId() == layerId)
285 return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
296 auto it = m_WorkloadFactories.find(layer.
GetBackendId());
297 if (it == m_WorkloadFactories.end())
301 boost::format(
"No workload factory for %1% to be used for layer: %2%")
307 workloadFactory = it->second.first.get();
309 BOOST_ASSERT_MSG(workloadFactory,
"No workload factory");
311 std::string reasonIfUnsupported;
313 "Factory does not support layer");
314 boost::ignore_unused(reasonIfUnsupported);
315 return *workloadFactory;
325 : m_TensorHandle(std::move(handle))
331 ITensorHandle* GetTensorHandle()
const {
return m_TensorHandle.get(); }
336 std::unique_ptr<ITensorHandle> m_TensorHandle;
342 const std::vector<TensorPin>& pins,
343 char const* bindingPointDesc)
345 auto it = std::find_if(pins.begin(), pins.end(),
346 [id](
const TensorPin& pin)
348 return pin.GetBindingId() == id;
351 if (it != pins.end())
358 boost::format(
"No tensor supplied for %1% %2%") % bindingPointDesc %
id));
368 m_InputTensorPins.reserve(inputTensors.size());
369 m_OutputTensorPins.reserve(outputTensors.size());
371 for (
auto inputTensorPair : inputTensors)
373 auto inputTensor = inputTensorPair.second;
375 std::unique_ptr<ITensorHandle> tensorHandle =
376 std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
379 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
382 for (
auto outputTensorPair : outputTensors)
384 auto outputTensor = outputTensorPair.second;
386 std::unique_ptr<ITensorHandle> tensorHandle =
387 std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
390 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
396 return GetTensorPin(
id, m_InputTensorPins,
"input");
401 return GetTensorPin(
id, m_OutputTensorPins,
"output");
406 std::vector<TensorPin> m_InputTensorPins;
407 std::vector<TensorPin> m_OutputTensorPins;
417 const Graph& graph = m_OptimizedNetwork->GetGraph();
422 ARMNN_LOG(
warning) <<
"IRuntime::EnqueueWorkload()::Less than two nodes in graph";
427 WorkloadData workloadData(inputTensors, outputTensors);
435 m_InputQueue.clear();
439 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
440 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
444 m_OutputQueue.clear();
448 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
449 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
463 bool executionSucceeded =
true;
472 executionSucceeded = Execute(timelineUtils, inferenceGuid);
479 timelineUtils->Commit();
491 if (tensorHandle ==
nullptr)
499 inputQueueDescriptor.
m_Inputs.push_back(tensorHandle);
502 BOOST_ASSERT_MSG(layer.
GetNumOutputSlots() == 1,
"Can only handle Input Layer with one output");
504 const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
506 BOOST_ASSERT_MSG(outputTensorHandle !=
nullptr,
507 "Data should have been allocated.");
508 inputQueueDescriptor.
m_Outputs.push_back(outputTensorHandle);
512 if (m_IsImportEnabled)
517 void* mem = tensorHandle->
Map(
false);
520 tensorHandle->
Unmap();
523 tensorHandle->
Unmap();
534 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor,
info);
536 BOOST_ASSERT_MSG(inputWorkload,
"No input workload created");
542 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
543 timelineUtils->Commit();
546 m_InputQueue.push_back(move(inputWorkload));
557 if (tensorHandle ==
nullptr)
565 outputQueueDescriptor.
m_Outputs.push_back(tensorHandle);
568 BOOST_ASSERT_MSG(layer.
GetNumInputSlots() == 1,
"Output Layer should have exactly one input.");
573 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
575 BOOST_ASSERT_MSG(inputTensorHandle !=
nullptr,
"Data should have been allocated.");
584 if (m_IsExportEnabled && (layer.
GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
591 void *mem = tensorHandle->
Map(
false);
593 tensorHandle->
Unmap();
599 syncDesc.
m_Inputs.push_back(inputTensorHandle);
601 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
602 BOOST_ASSERT_MSG(syncWorkload,
"No sync workload created");
603 m_OutputQueue.push_back(move(syncWorkload));
617 throw MemoryExportException(
"EnqueueOutput: Memory Export failed, attempting to export Input Layer");
623 outputQueueDescriptor.
m_Inputs.push_back(inputTensorHandle);
626 std::unique_ptr<IWorkload> outputWorkload =
627 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor,
info);
628 BOOST_ASSERT_MSG(outputWorkload,
"No output workload created");
634 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
635 timelineUtils->Commit();
638 m_OutputQueue.push_back(move(outputWorkload));
642 void LoadedNetwork::AllocateWorkingMemory()
644 if (m_IsWorkingMemAllocated)
648 for (
auto&& workloadFactory : m_WorkloadFactories)
653 memoryManager->Acquire();
656 m_TensorHandleFactoryRegistry.AquireMemory();
657 m_IsWorkingMemAllocated =
true;
662 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
663 if (!m_IsWorkingMemAllocated)
668 for (
auto&& workloadFactory : m_WorkloadFactories)
673 memoryManager->Release();
676 m_TensorHandleFactoryRegistry.ReleaseMemory();
677 m_IsWorkingMemAllocated =
false;
680 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
685 auto Fail = [&](
const std::exception&
error)
687 ARMNN_LOG(error) <<
"An error occurred attempting to execute a workload: " << error.what();
693 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
694 AllocateWorkingMemory();
697 for (
auto& input : m_InputQueue)
701 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(input->GetGuid(),
707 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
711 for (
auto& workload : m_WorkloadQueue)
715 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
721 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
724 for (
auto& output: m_OutputQueue)
728 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(output->GetGuid(),
734 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
742 catch (
const std::runtime_error& error)
752 for (
auto&& workloadPtr: m_WorkloadQueue)
754 workloadPtr.get()->RegisterDebugCallback(func);
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< OptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties)
const OutputHandler & GetOutputHandler(unsigned int i=0) const
virtual ARMNN_NO_DEPRECATE_WARN_END IMemoryManagerUniquePtr CreateMemoryManager() const
size_t GetNumLayers() const
unsigned int GetNumOutputSlots() const override
static ARMNN_DLLEXPORT ProfilingStaticGuid WORKLOAD_GUID
FactoryFunction GetFactory(const BackendId &id) const
LayerType GetType() const
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
Graph & TopologicalSort()
Sorts layers in topological order and return this.
virtual bool Import(void *memory, MemorySource source)
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
static ARMNN_DLLEXPORT ProfilingStaticGuid ARMNN_PROFILING_EOL_EVENT_CLASS
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
size_t GetNumOutputs() const
OutputLayersAccessor GetOutputLayers() const
std::vector< TensorInfo > m_OutputTensorInfos
virtual const void * Map(bool blocking=true) const =0
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors)
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
const BackendId & GetBackendId() const
const std::string & GetNameStr() const
ProfilingDynamicGuid NextGuid() override
Return the next random Guid in the sequence.
const std::vector< InputSlot > & GetInputSlots() const
#define ARMNN_LOG(severity)
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
static ARMNN_DLLEXPORT ProfilingStaticGuid INFERENCE_GUID
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
static std::unique_ptr< TimelineUtilityMethods > GetTimelineUtils()
BackendRegistry & BackendRegistryInstance()
static ARMNN_DLLEXPORT ProfilingStaticGuid BACKENDID_GUID
unsigned int MemorySourceFlags
const bool m_ExportEnabled
static ARMNN_DLLEXPORT ProfilingStaticGuid NETWORK_GUID
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
const bool m_ImportEnabled
std::vector< TensorInfo > m_InputTensorInfos
virtual void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry &)
uint32_t IncrementCounterValue(uint16_t counterUid) override
Base class for all ArmNN exceptions so that users can filter to just those.
InputLayersAccessor GetInputLayers() const
virtual void Unmap() const =0
Unmap the tensor data.
static ProfilingService & Instance()
static ARMNN_DLLEXPORT ProfilingStaticGuid LAYER_GUID
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
void RegisterProfiler(Profiler *profiler)
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
Strongly typed guids to distinguish between those generated at runtime, and those that are statically...
std::vector< ITensorHandle * > m_Outputs
std::vector< ITensorHandle * > m_Inputs
size_t GetNumInputs() const
static ARMNN_DLLEXPORT ProfilingStaticGuid ARMNN_PROFILING_SOL_EVENT_CLASS
void RegisterDebugCallback(const DebugCallbackFunction &func)
unsigned int GetNumInputSlots() const override
static ProfilerManager & GetInstance()
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
bool SupportsTensorAllocatorAPI() const
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
const std::string & Get() const