2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // See LICENSE file in the project root for full license information.
6 #include "LoadedNetwork.hpp"
10 #include "Runtime.hpp"
11 #include "Profiling.hpp"
12 #include "HeapProfiling.hpp"
14 #ifdef ARMCOMPUTECL_ENABLED
15 #include <arm_compute/core/CL/OpenCL.h>
18 #include <backends/CpuTensorHandle.hpp>
20 #include <boost/polymorphic_cast.hpp>
21 #include <boost/assert.hpp>
22 #include <boost/format.hpp>
23 #include <boost/log/trivial.hpp>
33 template <typename ExceptionType>
34 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
37 ss << prefix << " " << error.what();
41 #if ARMCOMPUTECL_ENABLED
42 std::string ToErrorMessage(const char * prefix, const cl::Error& error)
45 ss << prefix << " " << error.what() << ". CL error code is: " << error.err();
52 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
53 std::string & errorMessage)
55 std::unique_ptr<LoadedNetwork> loadedNetwork;
59 loadedNetwork.reset(new LoadedNetwork(std::move(net)));
61 catch (const std::runtime_error& error)
63 errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
64 BOOST_LOG_TRIVIAL(error) << errorMessage;
65 return std::unique_ptr<LoadedNetwork>();
67 catch (const armnn::Exception& error)
69 errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
70 BOOST_LOG_TRIVIAL(error) << errorMessage;
71 return std::unique_ptr<LoadedNetwork>();
73 #if ARMCOMPUTECL_ENABLED
74 catch (const cl::Error& error)
76 errorMessage = ToErrorMessage("A CL error occurred attempting to prepare a network workload: ", error);
77 BOOST_LOG_TRIVIAL(error) << errorMessage;
78 return std::unique_ptr<LoadedNetwork>();
85 LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net)
87 , m_OptimizedNetwork(std::move(net))
89 // Create a profiler and register it for the current thread.
90 m_Profiler = std::make_shared<Profiler>();
91 ProfilerManager::GetInstance().RegisterProfiler(m_Profiler.get());
93 Graph& order = m_OptimizedNetwork->GetGraph().TopologicalSort();
94 //First create tensor handlers.
95 //Handlers are created before workloads are.
96 //Because workload creation can modify some of the handlers,
97 //(for example the splitter and merger layers).
98 for (auto&& layer : order)
100 layer->CreateTensorHandles(m_OptimizedNetwork->GetGraph(), GetWorkloadFactory(*layer));
103 //Then create workloads.
104 for (auto&& layer : order)
106 const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
108 switch (layer->GetType())
110 case LayerType::Input:
111 case LayerType::Output:
113 // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
118 auto workload = layer->CreateWorkload(m_OptimizedNetwork->GetGraph(), workloadFactory);
122 const char* const layerName = layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
123 throw InvalidArgumentException(boost::str(
124 boost::format("No workload created for layer (name: '%1%' type: '%2%') (compute '%3%')")
125 % layerName % static_cast<int>(layer->GetType()) % layer->GetComputeDevice()
129 m_WorkloadQueue.push_back(move(workload));
130 // release the constant data in the layer..
131 layer->ReleaseConstantData();
138 m_OptimizedNetwork->GetGraph().AllocateDynamicBuffers();
140 // Finalize the workload factories before execution.
146 TensorInfo LoadedNetwork::GetInputTensorInfo(LayerBindingId layerId) const
148 for (auto&& inputLayer : m_OptimizedNetwork->GetGraph().GetInputLayers())
150 BOOST_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
151 if (inputLayer->GetBindingId() == layerId)
153 return inputLayer->GetOutputSlot(0).GetTensorInfo();
157 throw InvalidArgumentException(boost::str(boost::format("No input layer is associated with id %1%") % layerId));
160 TensorInfo LoadedNetwork::GetOutputTensorInfo(LayerBindingId layerId) const
162 for (auto&& outputLayer : m_OptimizedNetwork->GetGraph().GetOutputLayers())
164 BOOST_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
165 BOOST_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
166 if (outputLayer->GetBindingId() == layerId)
168 return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
172 throw InvalidArgumentException(boost::str(boost::format("No output layer is associated with id %1%") % layerId));
175 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
177 const IWorkloadFactory* workloadFactory = nullptr;
179 switch (layer.GetComputeDevice())
181 case Compute::CpuAcc:
183 workloadFactory = &m_CpuAcc;
186 case Compute::GpuAcc:
188 workloadFactory = &m_GpuAcc;
191 case Compute::CpuRef:
193 workloadFactory = &m_CpuRef;
202 BOOST_ASSERT_MSG(workloadFactory, "No workload factory");
204 std::string reasonIfUnsupported;
205 BOOST_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer, {}, reasonIfUnsupported),
206 "Factory does not support layer");
207 boost::ignore_unused(reasonIfUnsupported);
209 return *workloadFactory;
214 // Non-copyable class owning accelerator-specific tensor data.
218 TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
219 : m_TensorHandle(std::move(handle))
225 ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
226 const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
227 LayerBindingId GetBindingId() const { return m_Id; }
230 std::unique_ptr<ITensorHandle> m_TensorHandle;
231 TensorInfo m_TensorInfo;
235 static const TensorPin& GetTensorPin(LayerBindingId id,
236 const std::vector<TensorPin>& pins,
237 char const* bindingPointDesc)
239 auto it = std::find_if(pins.begin(), pins.end(),
240 [id](const TensorPin& pin)
242 return pin.GetBindingId() == id;
245 if (it != pins.end())
251 throw InvalidArgumentException(boost::str(
252 boost::format("No tensor supplied for %1% %2%") % bindingPointDesc % id));
256 // Stores data that needs to be kept accessible for the entire execution of a workload.
260 WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
262 m_InputTensorPins.reserve(inputTensors.size());
263 m_OutputTensorPins.reserve(outputTensors.size());
265 for (auto inputTensorPair : inputTensors)
267 auto inputTensor = inputTensorPair.second;
269 std::unique_ptr<ITensorHandle> tensorHandle =
270 std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
271 LayerBindingId layerId = inputTensorPair.first;
273 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
276 for (auto outputTensorPair : outputTensors)
278 auto outputTensor = outputTensorPair.second;
280 std::unique_ptr<ITensorHandle> tensorHandle =
281 std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
282 LayerBindingId layerId = outputTensorPair.first;
284 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
288 const TensorPin& GetInputTensorPin(LayerBindingId id) const
290 return GetTensorPin(id, m_InputTensorPins, "input");
293 const TensorPin& GetOutputTensorPin(LayerBindingId id) const
295 return GetTensorPin(id, m_OutputTensorPins, "output");
300 std::vector<TensorPin> m_InputTensorPins;
301 std::vector<TensorPin> m_OutputTensorPins;
306 Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors,
307 const OutputTensors& outputTensors)
309 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "EnqueueWorkload");
311 const Graph& graph = m_OptimizedNetwork->GetGraph();
313 // Walk graph to determine the order of execution.
314 if (graph.GetNumLayers() < 2)
316 BOOST_LOG_TRIVIAL(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
317 return Status::Failure;
320 // Data that must be kept alive for the entire execution of the workload.
321 WorkloadData workloadData(inputTensors, outputTensors);
323 if (graph.GetNumInputs() != inputTensors.size())
325 throw InvalidArgumentException("Number of inputs provided does not match network.");
328 // For each input to the network, call EnqueueInput with the data passed by the user.
329 for (const BindableLayer* inputLayer : graph.GetInputLayers())
331 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
332 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
335 // For each output to the network, call EnqueueOutput with the data passed by the user.
336 for (const BindableLayer* outputLayer : graph.GetOutputLayers())
338 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
339 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
342 bool executionSucceeded = true;
345 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Execute");
346 ARMNN_SCOPED_HEAP_PROFILING("Executing");
347 executionSucceeded = Execute();
350 // Hack: get rid of inputs and outputs we added.
351 TidyWorkloadQueue(graph.GetNumInputs(), graph.GetNumOutputs());
353 return executionSucceeded ? Status::Success : Status::Failure;
356 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
358 if (layer.GetType() != LayerType::Input)
360 throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
363 if (tensorHandle == nullptr)
365 throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
368 InputQueueDescriptor inputQueueDescriptor;
371 inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
372 info.m_InputTensorInfos.push_back(tensorInfo);
374 BOOST_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
375 const OutputHandler& handler = layer.GetOutputHandler();
376 const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
377 ITensorHandle* outputTensorHandle = handler.GetData();
378 BOOST_ASSERT_MSG(outputTensorHandle != nullptr,
379 "Data should have been allocated.");
380 inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
381 info.m_OutputTensorInfos.push_back(outputTensorInfo);
383 const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer);
384 auto inputWorkload = workloadFactory.CreateInput(inputQueueDescriptor, info);
385 BOOST_ASSERT_MSG(inputWorkload, "No input workload created");
386 m_WorkloadQueue.insert(m_WorkloadQueue.begin(), move(inputWorkload));
389 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
391 if (layer.GetType() != LayerType::Output)
393 throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
396 if (tensorHandle == nullptr)
398 throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
401 OutputQueueDescriptor outputQueueDescriptor;
404 outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
405 info.m_OutputTensorInfos.push_back(tensorInfo);
407 BOOST_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
409 // Gets the output handler from the previous node.
410 const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
412 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
413 ITensorHandle* inputTensorHandle = outputHandler.GetData();
414 BOOST_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
416 outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
417 info.m_InputTensorInfos.push_back(inputTensorInfo);
419 const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer);
420 auto outputWorkload = workloadFactory.CreateOutput(outputQueueDescriptor, info);
421 BOOST_ASSERT_MSG(outputWorkload, "No output workload created");
422 m_WorkloadQueue.push_back(move(outputWorkload));
425 bool LoadedNetwork::Execute()
435 for (size_t i = 0; i < m_WorkloadQueue.size(); ++i)
437 m_WorkloadQueue[i]->Execute();
440 #if ARMCOMPUTECL_ENABLED
441 catch (const cl::Error& error)
443 BOOST_LOG_TRIVIAL(error) << "A CL error occurred attempting to execute a workload: "
444 << error.what() << ". CL error code is: " << error.err();
448 catch (const std::runtime_error& error)
450 BOOST_LOG_TRIVIAL(error) << "An error occurred attempting to execute a workload: " << error.what();
454 // Informs the memory managers to release memory in it's respective memory group
462 void LoadedNetwork::TidyWorkloadQueue(size_t numInputs, size_t numOutputs)
464 m_WorkloadQueue.erase(m_WorkloadQueue.begin(), m_WorkloadQueue.begin() + boost::numeric_cast<long>(numInputs));
465 m_WorkloadQueue.erase(m_WorkloadQueue.end() - boost::numeric_cast<long>(numOutputs), m_WorkloadQueue.end());