ArmNN  NotReleased
LoadedNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LoadedNetwork.hpp"
7 #include "Layer.hpp"
8 #include "Graph.hpp"
9 #include "Network.hpp"
10 #include "Runtime.hpp"
11 #include "Profiling.hpp"
12 #include "HeapProfiling.hpp"
13 
15 #include <armnn/Logging.hpp>
16 
21 
23 #include <ProfilingService.hpp>
24 
25 #include <boost/polymorphic_cast.hpp>
26 #include <boost/assert.hpp>
27 #include <boost/format.hpp>
28 
29 namespace armnn
30 {
31 
32 using namespace std;
33 using namespace armnn::profiling;
34 
35 namespace
36 {
37 
38 template <typename ExceptionType>
39 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
40 {
41  std::stringstream ss;
42  ss << prefix << " " << error.what();
43  return ss.str();
44 }
45 
46 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
47  const Layer& layer,
48  ProfilingGuid networkGuid)
49 {
50  // Add layer to the post-optimisation network structure
51  std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
52  timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
53  networkGuid,
54  layerName,
56  for (auto&& input : layer.GetInputSlots())
57  {
58  const IOutputSlot* source = input.GetConnectedOutputSlot();
59  BOOST_ASSERT(source != NULL);
60  timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
61  source->GetOwningLayerGuid(),
62  layer.GetGuid());
63  }
64 }
65 
66 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
67  std::unique_ptr<IWorkload>& workload,
68  const Layer& layer)
69 {
70  // Add workload to the post-optimisation network structure
71  timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
72  timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
73  layer.GetBackendId().Get(),
75 
76  // Link the workload to the layer
77  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
78  layer.GetGuid(),
79  workload->GetGuid());
80 
81 }
82 
83 } // anonymous
84 
85 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
86  std::string& errorMessage,
87  const INetworkProperties& networkProperties)
88 {
89  std::unique_ptr<LoadedNetwork> loadedNetwork;
90 
91  auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
92  {
93  errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
94  ARMNN_LOG(error) << errorMessage;
95 
96  return std::unique_ptr<LoadedNetwork>();
97  };
98 
99  try
100  {
101  loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties));
102  }
103  catch (const armnn::RuntimeException& error)
104  {
105  return Fail(error);
106  }
107  catch (const armnn::Exception& error)
108  {
109  return Fail(error);
110  }
111  catch (const std::runtime_error& error)
112  {
113  return Fail(error);
114  }
115 
116  return loadedNetwork;
117 }
118 
119 LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
120  const INetworkProperties& networkProperties) :
121  m_OptimizedNetwork(std::move(net)),
122  m_IsImportEnabled(networkProperties.m_ImportEnabled),
123  m_IsExportEnabled(networkProperties.m_ExportEnabled)
124 {
125  // Create a profiler and register it for the current thread.
126  m_Profiler = std::make_shared<Profiler>();
128 
129  Graph& order = m_OptimizedNetwork->GetGraph().TopologicalSort();
130  //First create tensor handlers, backends and workload factories.
131  //Handlers are created before workloads are.
132  //Because workload creation can modify some of the handlers,
133  //(for example the splitter and concat layers).
134  for (auto&& layer : order)
135  {
136  auto const& backendId = layer->GetBackendId();
137  if (m_Backends.count(backendId) == 0)
138  {
139  auto createBackend = BackendRegistryInstance().GetFactory(backendId);
140  auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
141 
142  IBackendInternal* backend = it.first->second.get();
143 
144  if (backend->SupportsTensorAllocatorAPI())
145  {
146  backend->RegisterTensorHandleFactories(m_TensorHandleFactoryRegistry);
147 
148  auto workloadFactory = backend->CreateWorkloadFactory(m_TensorHandleFactoryRegistry);
149  m_WorkloadFactories.emplace(
150  std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr)));
151  }
152  else
153  {
155  auto workloadFactory = backend->CreateWorkloadFactory(memoryManager);
156 
157  m_WorkloadFactories.emplace(
158  std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager)));
159  }
160  }
161  }
162 
163  for (auto&& layer : order)
164  {
165  auto& workloadFactory = GetWorkloadFactory(*layer);
166 
167  switch (layer->GetType())
168  {
169  case LayerType::Input:
170  {
171  // If IsImportEnabled is true then we need to set IsMemoryManaged to false when creating TensorHandles
172  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsImportEnabled);
173  break;
174  }
175  default:
176  {
177  // Look for the layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
178  // If Export is enabled disable memory management so we can export, otherwise we do a copy
179  if((layer->GetNumOutputSlots() == 1) &&
180  (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
181  (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
182  {
183  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsExportEnabled);
184  }
185  else
186  {
187  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
188  }
189  }
190  }
191  }
192 
193  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
194  std::unique_ptr<TimelineUtilityMethods> timelineUtils = TimelineUtilityMethods::GetTimelineUtils();
195  if (timelineUtils)
196  {
197  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
198  }
199 
200  //Then create workloads.
201  for (auto&& layer : order)
202  {
203  if (timelineUtils)
204  {
205  // Add layer to the post-optimisation network structure
206  AddLayerStructure(timelineUtils, *layer, networkGuid);
207  }
208 
209  const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
210 
211  switch (layer->GetType())
212  {
213  case LayerType::Input:
214  case LayerType::Output:
215  {
216  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
217  break;
218  }
219  default:
220  {
221  auto workload = layer->CreateWorkload(workloadFactory);
222 
223  if (!workload)
224  {
225  const char* const layerName =
226  layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
227  throw InvalidArgumentException(boost::str(
228  boost::format("No workload created for layer (name: '%1%' type: '%2%') (compute '%3%')")
229  % layerName % static_cast<int>(layer->GetType()) % layer->GetBackendId().Get()
230  ));
231  }
232 
233  if (timelineUtils)
234  {
235  // Add workload to the post-optimisation network structure
236  AddWorkloadStructure(timelineUtils, workload, *layer);
237  }
238 
239  m_WorkloadQueue.push_back(move(workload));
240  // release the constant data in the layer..
241  layer->ReleaseConstantData();
242  break;
243  }
244  }
245  }
246 
247  if (timelineUtils)
248  {
249  // Commit to send the post-optimisation network structure
250  timelineUtils->Commit();
251  }
252 
253  // Set up memory.
254  m_OptimizedNetwork->GetGraph().AllocateDynamicBuffers();
255 
256  // Now that the intermediate tensor memory has been set-up, do any post allocation configuration for each workload.
257  for (auto& workload : m_WorkloadQueue)
258  {
259  workload->PostAllocationConfigure();
260  }
261 }
262 
264 {
265  for (auto&& inputLayer : m_OptimizedNetwork->GetGraph().GetInputLayers())
266  {
267  BOOST_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
268  if (inputLayer->GetBindingId() == layerId)
269  {
270  return inputLayer->GetOutputSlot(0).GetTensorInfo();
271  }
272  }
273 
274  throw InvalidArgumentException(boost::str(boost::format("No input layer is associated with id %1%") % layerId));
275 }
276 
278 {
279  for (auto&& outputLayer : m_OptimizedNetwork->GetGraph().GetOutputLayers())
280  {
281  BOOST_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
282  BOOST_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
283  if (outputLayer->GetBindingId() == layerId)
284  {
285  return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
286  }
287  }
288 
289  throw InvalidArgumentException(boost::str(boost::format("No output layer is associated with id %1%") % layerId));
290 }
291 
292 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
293 {
294  const IWorkloadFactory* workloadFactory = nullptr;
295 
296  auto it = m_WorkloadFactories.find(layer.GetBackendId());
297  if (it == m_WorkloadFactories.end())
298  {
299  throw RuntimeException(
300  boost::str(
301  boost::format("No workload factory for %1% to be used for layer: %2%")
302  % layer.GetBackendId().Get()
303  % layer.GetNameStr()),
304  CHECK_LOCATION());
305  }
306 
307  workloadFactory = it->second.first.get();
308 
309  BOOST_ASSERT_MSG(workloadFactory, "No workload factory");
310 
311  std::string reasonIfUnsupported;
312  BOOST_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer, {}, reasonIfUnsupported),
313  "Factory does not support layer");
314  boost::ignore_unused(reasonIfUnsupported);
315  return *workloadFactory;
316 }
317 
318 namespace {
319 
320 // Non-copyable class owning accelerator-specific tensor data.
321 class TensorPin
322 {
323 public:
324  TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
325  : m_TensorHandle(std::move(handle))
326  , m_TensorInfo(info)
327  , m_Id(id)
328  {
329  }
330 
331  ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
332  const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
333  LayerBindingId GetBindingId() const { return m_Id; }
334 
335 private:
336  std::unique_ptr<ITensorHandle> m_TensorHandle;
337  TensorInfo m_TensorInfo;
338  LayerBindingId m_Id;
339 };
340 
341 static const TensorPin& GetTensorPin(LayerBindingId id,
342  const std::vector<TensorPin>& pins,
343  char const* bindingPointDesc)
344 {
345  auto it = std::find_if(pins.begin(), pins.end(),
346  [id](const TensorPin& pin)
347  {
348  return pin.GetBindingId() == id;
349  });
350 
351  if (it != pins.end())
352  {
353  return *it;
354  }
355  else
356  {
357  throw InvalidArgumentException(boost::str(
358  boost::format("No tensor supplied for %1% %2%") % bindingPointDesc % id));
359  }
360 }
361 
362 // Stores data that needs to be kept accessible for the entire execution of a workload.
363 class WorkloadData
364 {
365 public:
366  WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
367  {
368  m_InputTensorPins.reserve(inputTensors.size());
369  m_OutputTensorPins.reserve(outputTensors.size());
370 
371  for (auto inputTensorPair : inputTensors)
372  {
373  auto inputTensor = inputTensorPair.second;
374 
375  std::unique_ptr<ITensorHandle> tensorHandle =
376  std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
377  LayerBindingId layerId = inputTensorPair.first;
378 
379  m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
380  }
381 
382  for (auto outputTensorPair : outputTensors)
383  {
384  auto outputTensor = outputTensorPair.second;
385 
386  std::unique_ptr<ITensorHandle> tensorHandle =
387  std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
388  LayerBindingId layerId = outputTensorPair.first;
389 
390  m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
391  }
392  }
393 
394  const TensorPin& GetInputTensorPin(LayerBindingId id) const
395  {
396  return GetTensorPin(id, m_InputTensorPins, "input");
397  }
398 
399  const TensorPin& GetOutputTensorPin(LayerBindingId id) const
400  {
401  return GetTensorPin(id, m_OutputTensorPins, "output");
402  }
403 
404 private:
405 
406  std::vector<TensorPin> m_InputTensorPins;
407  std::vector<TensorPin> m_OutputTensorPins;
408 };
409 
410 }
411 
413  const OutputTensors& outputTensors)
414 {
416 
417  const Graph& graph = m_OptimizedNetwork->GetGraph();
418 
419  // Walk graph to determine the order of execution.
420  if (graph.GetNumLayers() < 2)
421  {
422  ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
423  return Status::Failure;
424  }
425 
426  // Data that must be kept alive for the entire execution of the workload.
427  WorkloadData workloadData(inputTensors, outputTensors);
428 
429  if (graph.GetNumInputs() != inputTensors.size())
430  {
431  throw InvalidArgumentException("Number of inputs provided does not match network.");
432  }
433 
434  // For each input to the network, call EnqueueInput with the data passed by the user.
435  m_InputQueue.clear();
436  m_InputQueue.reserve(graph.GetNumInputs());
437  for (const BindableLayer* inputLayer : graph.GetInputLayers())
438  {
439  const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
440  EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
441  }
442 
443  // For each output to the network, call EnqueueOutput with the data passed by the user.
444  m_OutputQueue.clear();
445  m_OutputQueue.reserve(graph.GetNumOutputs());
446  for (const BindableLayer* outputLayer : graph.GetOutputLayers())
447  {
448  const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
449  EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
450  }
451 
452  std::unique_ptr<TimelineUtilityMethods> timelineUtils = TimelineUtilityMethods::GetTimelineUtils();
454  if (timelineUtils)
455  {
456  // Add inference timeline trace if profiling is enabled.
457  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
458  timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
459  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink, networkGuid, inferenceGuid);
460  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
461  }
462 
463  bool executionSucceeded = true;
464 
465  {
466  if (profiling::ProfilingService::Instance().IsProfilingEnabled())
467  {
468  profiling::ProfilingService::Instance().IncrementCounterValue(armnn::profiling::INFERENCES_RUN);
469  }
471  ARMNN_SCOPED_HEAP_PROFILING("Executing");
472  executionSucceeded = Execute(timelineUtils, inferenceGuid);
473  }
474 
475  if (timelineUtils)
476  {
477  // Add end of life of the inference timeline if profiling is enabled.
478  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
479  timelineUtils->Commit();
480  }
481  return executionSucceeded ? Status::Success : Status::Failure;
482 }
483 
484 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
485 {
486  if (layer.GetType() != LayerType::Input)
487  {
488  throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
489  }
490 
491  if (tensorHandle == nullptr)
492  {
493  throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
494  }
495 
496  InputQueueDescriptor inputQueueDescriptor;
498 
499  inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
500  info.m_InputTensorInfos.push_back(tensorInfo);
501 
502  BOOST_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
503  const OutputHandler& handler = layer.GetOutputHandler();
504  const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
505  ITensorHandle* outputTensorHandle = handler.GetData();
506  BOOST_ASSERT_MSG(outputTensorHandle != nullptr,
507  "Data should have been allocated.");
508  inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
509  info.m_OutputTensorInfos.push_back(outputTensorInfo);
510 
511  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
512  if (m_IsImportEnabled) // Try import the input tensor
513  {
514  if(CheckFlag(importFlags, MemorySource::Malloc) )
515  {
516  // This assumes a CPU Tensor handle
517  void* mem = tensorHandle->Map(false);
518  if (outputTensorHandle->Import(mem, MemorySource::Malloc))
519  {
520  tensorHandle->Unmap();
521  return; // No need for a workload since the import has been done.
522  }
523  tensorHandle->Unmap();
524  throw MemoryImportException("EnqueueInput: Memory Import failed");
525  }
526  else
527  {
528  throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
529  }
530  }
531  else
532  {
533  // Create a mem copy workload for input since we did not import
534  std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
535 
536  BOOST_ASSERT_MSG(inputWorkload, "No input workload created");
537 
538  std::unique_ptr<TimelineUtilityMethods> timelineUtils = TimelineUtilityMethods::GetTimelineUtils();
539  if (timelineUtils)
540  {
541  // Add Input Workload to the post-optimisation network structure
542  AddWorkloadStructure(timelineUtils, inputWorkload, layer);
543  timelineUtils->Commit();
544  }
545 
546  m_InputQueue.push_back(move(inputWorkload));
547  }
548 }
549 
550 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
551 {
552  if (layer.GetType() != LayerType::Output)
553  {
554  throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
555  }
556 
557  if (tensorHandle == nullptr)
558  {
559  throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
560  }
561 
562  OutputQueueDescriptor outputQueueDescriptor;
564 
565  outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
566  info.m_OutputTensorInfos.push_back(tensorInfo);
567 
568  BOOST_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
569 
570  // Gets the output handler from the previous node.
571  const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
572 
573  const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
574  ITensorHandle* inputTensorHandle = outputHandler.GetData();
575  BOOST_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
576 
577  // Try import the output tensor.
578  // Note: We can only import the output pointer if all of the following hold true:
579  // a) The imported pointer is aligned sufficiently
580  // b) The tensor has zero padding
581  // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
582  // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
583  // e) m_IsExportEnabled must be set to true
584  if (m_IsExportEnabled && (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
585  {
586  if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
587  {
588  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
589  if (CheckFlag(importFlags, MemorySource::Malloc))
590  {
591  void *mem = tensorHandle->Map(false);
592  bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc);
593  tensorHandle->Unmap();
594 
595  if (importOk)
596  {
597  // Insert synchronization workload
598  MemSyncQueueDescriptor syncDesc;
599  syncDesc.m_Inputs.push_back(inputTensorHandle);
600  info.m_InputTensorInfos.push_back(inputTensorInfo);
601  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
602  BOOST_ASSERT_MSG(syncWorkload, "No sync workload created");
603  m_OutputQueue.push_back(move(syncWorkload));
604  }
605  else
606  {
607  throw MemoryExportException("EnqueueOutput: Memory Export failed");
608  }
609  }
610  else
611  {
612  throw MemoryExportException("EnqueueOutput: Memory Export failed, backend does not support Export");
613  }
614  }
615  else
616  {
617  throw MemoryExportException("EnqueueOutput: Memory Export failed, attempting to export Input Layer");
618  }
619  }
620  else
621  {
622  // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
623  outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
624  info.m_InputTensorInfos.push_back(inputTensorInfo);
625 
626  std::unique_ptr<IWorkload> outputWorkload =
627  std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
628  BOOST_ASSERT_MSG(outputWorkload, "No output workload created");
629 
630  std::unique_ptr<TimelineUtilityMethods> timelineUtils = TimelineUtilityMethods::GetTimelineUtils();
631  if (timelineUtils)
632  {
633  // Add Output Workload to the post-optimisation network structure
634  AddWorkloadStructure(timelineUtils, outputWorkload, layer);
635  timelineUtils->Commit();
636  }
637 
638  m_OutputQueue.push_back(move(outputWorkload));
639  }
640 }
641 
642 void LoadedNetwork::AllocateWorkingMemory()
643 {
644  if (m_IsWorkingMemAllocated)
645  {
646  return;
647  }
648  for (auto&& workloadFactory : m_WorkloadFactories)
649  {
650  IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second;
651  if (memoryManager)
652  {
653  memoryManager->Acquire();
654  }
655  }
656  m_TensorHandleFactoryRegistry.AquireMemory();
657  m_IsWorkingMemAllocated = true;
658 }
659 
661 {
662  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
663  if (!m_IsWorkingMemAllocated)
664  {
665  return;
666  }
667  // Informs the memory managers to release memory in it's respective memory group
668  for (auto&& workloadFactory : m_WorkloadFactories)
669  {
670  IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second;
671  if (memoryManager)
672  {
673  memoryManager->Release();
674  }
675  }
676  m_TensorHandleFactoryRegistry.ReleaseMemory();
677  m_IsWorkingMemAllocated = false;
678 }
679 
680 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
681  profiling::ProfilingGuid inferenceGuid)
682 {
683  bool success = true;
684 
685  auto Fail = [&](const std::exception& error)
686  {
687  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
688  success = false;
689  };
690 
691  try
692  {
693  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
694  AllocateWorkingMemory();
695 
696  ProfilingDynamicGuid workloadInferenceID(0);
697  for (auto& input : m_InputQueue)
698  {
699  if(timelineUtils)
700  {
701  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(input->GetGuid(),
702  inferenceGuid);
703  }
704  input->Execute();
705  if(timelineUtils)
706  {
707  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
708  }
709  }
710 
711  for (auto& workload : m_WorkloadQueue)
712  {
713  if(timelineUtils)
714  {
715  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
716  inferenceGuid);
717  }
718  workload->Execute();
719  if(timelineUtils)
720  {
721  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
722  }
723  }
724  for (auto& output: m_OutputQueue)
725  {
726  if(timelineUtils)
727  {
728  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(output->GetGuid(),
729  inferenceGuid);
730  }
731  output->Execute();
732  if(timelineUtils)
733  {
734  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
735  }
736  }
737  }
738  catch (const RuntimeException& error)
739  {
740  Fail(error);
741  }
742  catch (const std::runtime_error& error)
743  {
744  Fail(error);
745  }
746 
747  return success;
748 }
749 
751 {
752  for (auto&& workloadPtr: m_WorkloadQueue)
753  {
754  workloadPtr.get()->RegisterDebugCallback(func);
755  }
756 }
757 
758 }
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:170
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< OptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties)
const OutputHandler & GetOutputHandler(unsigned int i=0) const
Definition: Layer.hpp:221
virtual ARMNN_NO_DEPRECATE_WARN_END IMemoryManagerUniquePtr CreateMemoryManager() const
size_t GetNumLayers() const
Definition: Graph.hpp:187
unsigned int GetNumOutputSlots() const override
Definition: Layer.hpp:308
static ARMNN_DLLEXPORT ProfilingStaticGuid WORKLOAD_GUID
FactoryFunction GetFactory(const BackendId &id) const
LayerType GetType() const
Definition: Layer.hpp:259
Status
Definition: Types.hpp:26
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:173
virtual bool Import(void *memory, MemorySource source)
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:226
static ARMNN_DLLEXPORT ProfilingStaticGuid ARMNN_PROFILING_EOL_EVENT_CLASS
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
size_t GetNumOutputs() const
Definition: Graph.hpp:177
OutputLayersAccessor GetOutputLayers() const
Definition: Graph.hpp:185
std::vector< TensorInfo > m_OutputTensorInfos
virtual const void * Map(bool blocking=true) const =0
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors)
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
const BackendId & GetBackendId() const
Definition: Layer.hpp:263
const std::string & GetNameStr() const
Definition: Layer.hpp:216
ProfilingDynamicGuid NextGuid() override
Return the next random Guid in the sequence.
#define CHECK_LOCATION()
Definition: Exceptions.hpp:169
const std::vector< InputSlot > & GetInputSlots() const
Definition: Layer.hpp:231
#define ARMNN_LOG(severity)
Definition: Logging.hpp:163
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Definition: Types.hpp:241
static ARMNN_DLLEXPORT ProfilingStaticGuid INFERENCE_GUID
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
static std::unique_ptr< TimelineUtilityMethods > GetTimelineUtils()
BackendRegistry & BackendRegistryInstance()
static ARMNN_DLLEXPORT ProfilingStaticGuid BACKENDID_GUID
unsigned int MemorySourceFlags
const bool m_ExportEnabled
Definition: IRuntime.hpp:33
static ARMNN_DLLEXPORT ProfilingStaticGuid NETWORK_GUID
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:225
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
const bool m_ImportEnabled
Definition: IRuntime.hpp:32
std::vector< TensorInfo > m_InputTensorInfos
virtual void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry &)
uint32_t IncrementCounterValue(uint16_t counterUid) override
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
InputLayersAccessor GetInputLayers() const
Definition: Graph.hpp:181
virtual void Unmap() const =0
Unmap the tensor data.
static ProfilingService & Instance()
static ARMNN_DLLEXPORT ProfilingStaticGuid LAYER_GUID
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
void RegisterProfiler(Profiler *profiler)
Definition: Profiling.cpp:493
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
Strongly typed guids to distinguish between those generated at runtime, and those that are statically...
Definition: Types.hpp:291
std::vector< ITensorHandle * > m_Outputs
std::vector< ITensorHandle * > m_Inputs
size_t GetNumInputs() const
Definition: Graph.hpp:176
static ARMNN_DLLEXPORT ProfilingStaticGuid ARMNN_PROFILING_SOL_EVENT_CLASS
void RegisterDebugCallback(const DebugCallbackFunction &func)
unsigned int GetNumInputSlots() const override
Definition: Layer.hpp:307
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:486
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:168
bool SupportsTensorAllocatorAPI() const
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
const std::string & Get() const
Definition: BackendId.hpp:136