2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
6 #include "LoadedNetwork.hpp"
10 #include "Runtime.hpp"
11 #include "Profiling.hpp"
12 #include "HeapProfiling.hpp"
14 #include <backendsCommon/CpuTensorHandle.hpp>
15 #include <backendsCommon/BackendRegistry.hpp>
17 #include <boost/polymorphic_cast.hpp>
18 #include <boost/assert.hpp>
19 #include <boost/format.hpp>
20 #include <boost/log/trivial.hpp>
30 template <typename ExceptionType>
31 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
34 ss << prefix << " " << error.what();
40 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
41 const IRuntime::CreationOptions& options,
42 std::string & errorMessage)
44 std::unique_ptr<LoadedNetwork> loadedNetwork;
46 auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
48 errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
49 BOOST_LOG_TRIVIAL(error) << errorMessage;
51 return std::unique_ptr<LoadedNetwork>();
56 loadedNetwork.reset(new LoadedNetwork(std::move(net), options));
58 catch (const armnn::RuntimeException& error)
62 catch (const armnn::Exception& error)
66 catch (const std::runtime_error& error)
74 LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
75 const IRuntime::CreationOptions& options)
76 : m_OptimizedNetwork(std::move(net))
77 , m_WorkingMemLock(m_WorkingMemMutex, std::defer_lock)
79 // Create a profiler and register it for the current thread.
80 m_Profiler = std::make_shared<Profiler>();
81 ProfilerManager::GetInstance().RegisterProfiler(m_Profiler.get());
83 Graph& order = m_OptimizedNetwork->GetGraph().TopologicalSort();
84 //First create tensor handlers, backends and workload factories.
85 //Handlers are created before workloads are.
86 //Because workload creation can modify some of the handlers,
87 //(for example the splitter and merger layers).
88 for (auto&& layer : order)
90 auto const& backend = layer->GetBackendId();
91 if (m_Backends.count(backend) == 0)
93 auto createBackend = BackendRegistryInstance().GetFactory(backend);
94 auto it = m_Backends.emplace(std::make_pair(backend, createBackend(EmptyInitializer())));
95 m_WorkloadFactories.emplace(std::make_pair(backend,
96 it.first->second->CreateWorkloadFactory()));
98 layer->CreateTensorHandles(m_OptimizedNetwork->GetGraph(), GetWorkloadFactory(*layer));
101 //Then create workloads.
102 for (auto&& layer : order)
104 const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
106 switch (layer->GetType())
108 case LayerType::Input:
109 case LayerType::Output:
111 // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
116 auto workload = layer->CreateWorkload(m_OptimizedNetwork->GetGraph(), workloadFactory);
120 const char* const layerName = layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
121 throw InvalidArgumentException(boost::str(
122 boost::format("No workload created for layer (name: '%1%' type: '%2%') (compute '%3%')")
123 % layerName % static_cast<int>(layer->GetType()) % layer->GetBackendId().Get()
127 m_WorkloadQueue.push_back(move(workload));
128 // release the constant data in the layer..
129 layer->ReleaseConstantData();
136 m_OptimizedNetwork->GetGraph().AllocateDynamicBuffers();
138 // Finalize the workload factories before execution.
139 for (auto&& workloadFactory : m_WorkloadFactories)
141 workloadFactory.second->Finalize();
145 TensorInfo LoadedNetwork::GetInputTensorInfo(LayerBindingId layerId) const
147 for (auto&& inputLayer : m_OptimizedNetwork->GetGraph().GetInputLayers())
149 BOOST_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
150 if (inputLayer->GetBindingId() == layerId)
152 return inputLayer->GetOutputSlot(0).GetTensorInfo();
156 throw InvalidArgumentException(boost::str(boost::format("No input layer is associated with id %1%") % layerId));
159 TensorInfo LoadedNetwork::GetOutputTensorInfo(LayerBindingId layerId) const
161 for (auto&& outputLayer : m_OptimizedNetwork->GetGraph().GetOutputLayers())
163 BOOST_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
164 BOOST_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
165 if (outputLayer->GetBindingId() == layerId)
167 return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
171 throw InvalidArgumentException(boost::str(boost::format("No output layer is associated with id %1%") % layerId));
174 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
176 const IWorkloadFactory* workloadFactory = nullptr;
178 auto it = m_WorkloadFactories.find(layer.GetBackendId());
179 if (it == m_WorkloadFactories.end())
181 throw RuntimeException(
183 boost::format("No workload factory for %1% to be used for layer: %2%")
184 % layer.GetBackendId().Get()
185 % layer.GetNameStr()),
189 workloadFactory = it->second.get();
191 BOOST_ASSERT_MSG(workloadFactory, "No workload factory");
193 std::string reasonIfUnsupported;
194 BOOST_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer, {}, reasonIfUnsupported),
195 "Factory does not support layer");
196 boost::ignore_unused(reasonIfUnsupported);
197 return *workloadFactory;
202 // Non-copyable class owning accelerator-specific tensor data.
206 TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
207 : m_TensorHandle(std::move(handle))
213 ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
214 const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
215 LayerBindingId GetBindingId() const { return m_Id; }
218 std::unique_ptr<ITensorHandle> m_TensorHandle;
219 TensorInfo m_TensorInfo;
223 static const TensorPin& GetTensorPin(LayerBindingId id,
224 const std::vector<TensorPin>& pins,
225 char const* bindingPointDesc)
227 auto it = std::find_if(pins.begin(), pins.end(),
228 [id](const TensorPin& pin)
230 return pin.GetBindingId() == id;
233 if (it != pins.end())
239 throw InvalidArgumentException(boost::str(
240 boost::format("No tensor supplied for %1% %2%") % bindingPointDesc % id));
244 // Stores data that needs to be kept accessible for the entire execution of a workload.
248 WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
250 m_InputTensorPins.reserve(inputTensors.size());
251 m_OutputTensorPins.reserve(outputTensors.size());
253 for (auto inputTensorPair : inputTensors)
255 auto inputTensor = inputTensorPair.second;
257 std::unique_ptr<ITensorHandle> tensorHandle =
258 std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
259 LayerBindingId layerId = inputTensorPair.first;
261 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
264 for (auto outputTensorPair : outputTensors)
266 auto outputTensor = outputTensorPair.second;
268 std::unique_ptr<ITensorHandle> tensorHandle =
269 std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
270 LayerBindingId layerId = outputTensorPair.first;
272 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
276 const TensorPin& GetInputTensorPin(LayerBindingId id) const
278 return GetTensorPin(id, m_InputTensorPins, "input");
281 const TensorPin& GetOutputTensorPin(LayerBindingId id) const
283 return GetTensorPin(id, m_OutputTensorPins, "output");
288 std::vector<TensorPin> m_InputTensorPins;
289 std::vector<TensorPin> m_OutputTensorPins;
294 Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors,
295 const OutputTensors& outputTensors)
297 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "EnqueueWorkload");
299 const Graph& graph = m_OptimizedNetwork->GetGraph();
301 // Walk graph to determine the order of execution.
302 if (graph.GetNumLayers() < 2)
304 BOOST_LOG_TRIVIAL(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
305 return Status::Failure;
308 // Data that must be kept alive for the entire execution of the workload.
309 WorkloadData workloadData(inputTensors, outputTensors);
311 if (graph.GetNumInputs() != inputTensors.size())
313 throw InvalidArgumentException("Number of inputs provided does not match network.");
316 // For each input to the network, call EnqueueInput with the data passed by the user.
317 m_InputQueue.clear();
318 m_InputQueue.reserve(graph.GetNumInputs());
319 for (const BindableLayer* inputLayer : graph.GetInputLayers())
321 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
322 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
325 // For each output to the network, call EnqueueOutput with the data passed by the user.
326 m_OutputQueue.clear();
327 m_OutputQueue.reserve(graph.GetNumOutputs());
328 for (const BindableLayer* outputLayer : graph.GetOutputLayers())
330 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
331 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
334 bool executionSucceeded = true;
337 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Execute");
338 ARMNN_SCOPED_HEAP_PROFILING("Executing");
339 executionSucceeded = Execute();
342 return executionSucceeded ? Status::Success : Status::Failure;
345 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
347 if (layer.GetType() != LayerType::Input)
349 throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
352 if (tensorHandle == nullptr)
354 throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
357 InputQueueDescriptor inputQueueDescriptor;
360 inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
361 info.m_InputTensorInfos.push_back(tensorInfo);
363 BOOST_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
364 const OutputHandler& handler = layer.GetOutputHandler();
365 const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
366 ITensorHandle* outputTensorHandle = handler.GetData();
367 BOOST_ASSERT_MSG(outputTensorHandle != nullptr,
368 "Data should have been allocated.");
369 inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
370 info.m_OutputTensorInfos.push_back(outputTensorInfo);
372 const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer);
373 auto inputWorkload = workloadFactory.CreateInput(inputQueueDescriptor, info);
374 BOOST_ASSERT_MSG(inputWorkload, "No input workload created");
375 m_InputQueue.push_back(move(inputWorkload));
378 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
380 if (layer.GetType() != LayerType::Output)
382 throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
385 if (tensorHandle == nullptr)
387 throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
390 OutputQueueDescriptor outputQueueDescriptor;
393 outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
394 info.m_OutputTensorInfos.push_back(tensorInfo);
396 BOOST_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
398 // Gets the output handler from the previous node.
399 const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
401 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
402 ITensorHandle* inputTensorHandle = outputHandler.GetData();
403 BOOST_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
405 outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
406 info.m_InputTensorInfos.push_back(inputTensorInfo);
408 const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer);
409 auto outputWorkload = workloadFactory.CreateOutput(outputQueueDescriptor, info);
410 BOOST_ASSERT_MSG(outputWorkload, "No output workload created");
411 m_OutputQueue.push_back(move(outputWorkload));
414 void LoadedNetwork::AllocateWorkingMemory()
416 BOOST_ASSERT_MSG(m_WorkingMemLock.owns_lock(), "Cannot allocate working memory if mutex is not already locked.");
417 if (m_IsWorkingMemAllocated)
421 for (auto&& workloadFactory : m_WorkloadFactories)
423 workloadFactory.second->Acquire();
425 m_IsWorkingMemAllocated = true;
428 void LoadedNetwork::FreeWorkingMemory()
430 std::lock_guard<UniqueMutexLock> lockGuard(m_WorkingMemLock);
431 if (!m_IsWorkingMemAllocated)
435 // Informs the memory managers to release memory in it's respective memory group
436 for (auto&& workloadFactory : m_WorkloadFactories)
438 workloadFactory.second->Release();
440 m_IsWorkingMemAllocated = false;
443 bool LoadedNetwork::Execute()
447 auto Fail = [&](const std::exception& error)
449 BOOST_LOG_TRIVIAL(error) << "An error occurred attempting to execute a workload: " << error.what();
455 std::lock_guard<UniqueMutexLock> lockGuard(m_WorkingMemLock);
456 AllocateWorkingMemory();
458 for (auto& input : m_InputQueue)
463 for (auto& workload : m_WorkloadQueue)
468 for (auto& output: m_OutputQueue)
473 catch (const RuntimeException& error)
477 catch (const std::runtime_error& error)