IVGCVSW-1946: Remove armnn/src from the include paths
[platform/upstream/armnn.git] / src / armnn / LoadedNetwork.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "LoadedNetwork.hpp"
7 #include "Layer.hpp"
8 #include "Graph.hpp"
9 #include "Network.hpp"
10 #include "Runtime.hpp"
11 #include "Profiling.hpp"
12 #include "HeapProfiling.hpp"
13
14 #include <backendsCommon/CpuTensorHandle.hpp>
15 #include <backendsCommon/BackendRegistry.hpp>
16
17 #include <boost/polymorphic_cast.hpp>
18 #include <boost/assert.hpp>
19 #include <boost/format.hpp>
20 #include <boost/log/trivial.hpp>
21
22 namespace armnn
23 {
24
25 using namespace std;
26
27 namespace
28 {
29
30 template <typename ExceptionType>
31 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
32 {
33     std::stringstream ss;
34     ss << prefix << " " << error.what();
35     return ss.str();
36 }
37
38 } // anonymous
39
40 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
41                                                                 const IRuntime::CreationOptions& options,
42                                                                 std::string & errorMessage)
43 {
44     std::unique_ptr<LoadedNetwork> loadedNetwork;
45
46     auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
47     {
48         errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
49         BOOST_LOG_TRIVIAL(error) << errorMessage;
50
51         return std::unique_ptr<LoadedNetwork>();
52     };
53
54     try
55     {
56         loadedNetwork.reset(new LoadedNetwork(std::move(net), options));
57     }
58     catch (const armnn::RuntimeException& error)
59     {
60         return Fail(error);
61     }
62     catch (const armnn::Exception& error)
63     {
64         return Fail(error);
65     }
66     catch (const std::runtime_error& error)
67     {
68         return Fail(error);
69     }
70
71     return loadedNetwork;
72 }
73
74 LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
75                              const IRuntime::CreationOptions& options)
76     : m_OptimizedNetwork(std::move(net))
77     , m_WorkingMemLock(m_WorkingMemMutex, std::defer_lock)
78 {
79     // Create a profiler and register it for the current thread.
80     m_Profiler = std::make_shared<Profiler>();
81     ProfilerManager::GetInstance().RegisterProfiler(m_Profiler.get());
82
83     Graph& order = m_OptimizedNetwork->GetGraph().TopologicalSort();
84     //First create tensor handlers, backends and workload factories.
85     //Handlers are created before workloads are.
86     //Because workload creation can modify some of the handlers,
87     //(for example the splitter and merger layers).
88     for (auto&& layer : order)
89     {
90         auto const& backend = layer->GetBackendId();
91         if (m_Backends.count(backend) == 0)
92         {
93             auto createBackend = BackendRegistryInstance().GetFactory(backend);
94             auto it = m_Backends.emplace(std::make_pair(backend, createBackend(EmptyInitializer())));
95             m_WorkloadFactories.emplace(std::make_pair(backend,
96                                                        it.first->second->CreateWorkloadFactory()));
97         }
98         layer->CreateTensorHandles(m_OptimizedNetwork->GetGraph(), GetWorkloadFactory(*layer));
99     }
100
101     //Then create workloads.
102     for (auto&& layer : order)
103     {
104         const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
105
106         switch (layer->GetType())
107         {
108         case LayerType::Input:
109         case LayerType::Output:
110             {
111                 // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
112                 break;
113             }
114         default:
115             {
116                 auto workload = layer->CreateWorkload(m_OptimizedNetwork->GetGraph(), workloadFactory);
117
118                 if (!workload)
119                 {
120                     const char* const layerName = layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
121                     throw InvalidArgumentException(boost::str(
122                         boost::format("No workload created for layer (name: '%1%' type: '%2%') (compute '%3%')")
123                         % layerName % static_cast<int>(layer->GetType()) % layer->GetBackendId().Get()
124                     ));
125                 }
126
127                 m_WorkloadQueue.push_back(move(workload));
128                 // release the constant data in the layer..
129                 layer->ReleaseConstantData();
130                 break;
131             }
132         }
133     }
134
135     // Set up memory.
136     m_OptimizedNetwork->GetGraph().AllocateDynamicBuffers();
137
138     // Finalize the workload factories before execution.
139     for (auto&& workloadFactory : m_WorkloadFactories)
140     {
141         workloadFactory.second->Finalize();
142     }
143 }
144
145 TensorInfo LoadedNetwork::GetInputTensorInfo(LayerBindingId layerId) const
146 {
147     for (auto&& inputLayer : m_OptimizedNetwork->GetGraph().GetInputLayers())
148     {
149         BOOST_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
150         if (inputLayer->GetBindingId() == layerId)
151         {
152             return inputLayer->GetOutputSlot(0).GetTensorInfo();
153         }
154     }
155
156     throw InvalidArgumentException(boost::str(boost::format("No input layer is associated with id %1%") % layerId));
157 }
158
159 TensorInfo LoadedNetwork::GetOutputTensorInfo(LayerBindingId layerId) const
160 {
161     for (auto&& outputLayer : m_OptimizedNetwork->GetGraph().GetOutputLayers())
162     {
163         BOOST_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
164         BOOST_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
165         if (outputLayer->GetBindingId() == layerId)
166         {
167             return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
168         }
169     }
170
171     throw InvalidArgumentException(boost::str(boost::format("No output layer is associated with id %1%") % layerId));
172 }
173
174 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
175 {
176     const IWorkloadFactory* workloadFactory = nullptr;
177
178     auto it = m_WorkloadFactories.find(layer.GetBackendId());
179     if (it ==  m_WorkloadFactories.end())
180     {
181         throw RuntimeException(
182             boost::str(
183                 boost::format("No workload factory for %1% to be used for layer: %2%")
184                 % layer.GetBackendId().Get()
185                 % layer.GetNameStr()),
186             CHECK_LOCATION());
187     }
188
189     workloadFactory = it->second.get();
190
191     BOOST_ASSERT_MSG(workloadFactory, "No workload factory");
192
193     std::string reasonIfUnsupported;
194     BOOST_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer, {}, reasonIfUnsupported),
195         "Factory does not support layer");
196     boost::ignore_unused(reasonIfUnsupported);
197     return *workloadFactory;
198 }
199
200 namespace {
201
202 // Non-copyable class owning accelerator-specific tensor data.
203 class TensorPin
204 {
205 public:
206     TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
207         : m_TensorHandle(std::move(handle))
208         , m_TensorInfo(info)
209         , m_Id(id)
210     {
211     }
212
213     ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
214     const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
215     LayerBindingId GetBindingId() const { return m_Id; }
216
217 private:
218     std::unique_ptr<ITensorHandle> m_TensorHandle;
219     TensorInfo m_TensorInfo;
220     LayerBindingId m_Id;
221 };
222
223 static const TensorPin& GetTensorPin(LayerBindingId id,
224     const std::vector<TensorPin>& pins,
225     char const* bindingPointDesc)
226 {
227     auto it = std::find_if(pins.begin(), pins.end(),
228         [id](const TensorPin& pin)
229     {
230         return pin.GetBindingId() == id;
231     });
232
233     if (it != pins.end())
234     {
235         return *it;
236     }
237     else
238     {
239         throw InvalidArgumentException(boost::str(
240             boost::format("No tensor supplied for %1% %2%") % bindingPointDesc % id));
241     }
242 }
243
244 // Stores data that needs to be kept accessible for the entire execution of a workload.
245 class WorkloadData
246 {
247 public:
248     WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
249     {
250         m_InputTensorPins.reserve(inputTensors.size());
251         m_OutputTensorPins.reserve(outputTensors.size());
252
253         for (auto inputTensorPair : inputTensors)
254         {
255             auto inputTensor = inputTensorPair.second;
256
257             std::unique_ptr<ITensorHandle> tensorHandle =
258                 std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
259             LayerBindingId layerId = inputTensorPair.first;
260
261             m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
262         }
263
264         for (auto outputTensorPair : outputTensors)
265         {
266             auto outputTensor = outputTensorPair.second;
267
268             std::unique_ptr<ITensorHandle> tensorHandle =
269                 std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
270             LayerBindingId layerId = outputTensorPair.first;
271
272             m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
273         }
274     }
275
276     const TensorPin& GetInputTensorPin(LayerBindingId id) const
277     {
278         return GetTensorPin(id, m_InputTensorPins, "input");
279     }
280
281     const TensorPin& GetOutputTensorPin(LayerBindingId id) const
282     {
283         return GetTensorPin(id, m_OutputTensorPins, "output");
284     }
285
286 private:
287
288     std::vector<TensorPin> m_InputTensorPins;
289     std::vector<TensorPin> m_OutputTensorPins;
290 };
291
292 }
293
294 Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors,
295                                       const OutputTensors& outputTensors)
296 {
297     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "EnqueueWorkload");
298
299     const Graph& graph = m_OptimizedNetwork->GetGraph();
300
301     // Walk graph to determine the order of execution.
302     if (graph.GetNumLayers() < 2)
303     {
304         BOOST_LOG_TRIVIAL(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
305         return Status::Failure;
306     }
307
308     // Data that must be kept alive for the entire execution of the workload.
309     WorkloadData workloadData(inputTensors, outputTensors);
310
311     if (graph.GetNumInputs() != inputTensors.size())
312     {
313         throw InvalidArgumentException("Number of inputs provided does not match network.");
314     }
315
316     // For each input to the network, call EnqueueInput with the data passed by the user.
317     m_InputQueue.clear();
318     m_InputQueue.reserve(graph.GetNumInputs());
319     for (const BindableLayer* inputLayer : graph.GetInputLayers())
320     {
321         const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
322         EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
323     }
324
325     // For each output to the network, call EnqueueOutput with the data passed by the user.
326     m_OutputQueue.clear();
327     m_OutputQueue.reserve(graph.GetNumOutputs());
328     for (const BindableLayer* outputLayer : graph.GetOutputLayers())
329     {
330         const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
331         EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
332     }
333
334     bool executionSucceeded = true;
335
336     {
337         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Execute");
338         ARMNN_SCOPED_HEAP_PROFILING("Executing");
339         executionSucceeded = Execute();
340     }
341
342     return executionSucceeded ? Status::Success : Status::Failure;
343 }
344
345 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
346 {
347     if (layer.GetType() != LayerType::Input)
348     {
349         throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
350     }
351
352     if (tensorHandle == nullptr)
353     {
354         throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
355     }
356
357     InputQueueDescriptor inputQueueDescriptor;
358     WorkloadInfo info;
359
360     inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
361     info.m_InputTensorInfos.push_back(tensorInfo);
362
363     BOOST_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
364     const OutputHandler& handler = layer.GetOutputHandler();
365     const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
366     ITensorHandle* outputTensorHandle = handler.GetData();
367     BOOST_ASSERT_MSG(outputTensorHandle != nullptr,
368                      "Data should have been allocated.");
369     inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
370     info.m_OutputTensorInfos.push_back(outputTensorInfo);
371
372     const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer);
373     auto inputWorkload = workloadFactory.CreateInput(inputQueueDescriptor, info);
374     BOOST_ASSERT_MSG(inputWorkload, "No input workload created");
375     m_InputQueue.push_back(move(inputWorkload));
376 }
377
378 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
379 {
380     if (layer.GetType() != LayerType::Output)
381     {
382         throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
383     }
384
385     if (tensorHandle == nullptr)
386     {
387         throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
388     }
389
390     OutputQueueDescriptor outputQueueDescriptor;
391     WorkloadInfo info;
392
393     outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
394     info.m_OutputTensorInfos.push_back(tensorInfo);
395
396     BOOST_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
397
398     // Gets the output handler from the previous node.
399     const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
400
401     const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
402     ITensorHandle* inputTensorHandle = outputHandler.GetData();
403     BOOST_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
404
405     outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
406     info.m_InputTensorInfos.push_back(inputTensorInfo);
407
408     const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer);
409     auto outputWorkload = workloadFactory.CreateOutput(outputQueueDescriptor, info);
410     BOOST_ASSERT_MSG(outputWorkload, "No output workload created");
411     m_OutputQueue.push_back(move(outputWorkload));
412 }
413
414 void LoadedNetwork::AllocateWorkingMemory()
415 {
416     BOOST_ASSERT_MSG(m_WorkingMemLock.owns_lock(), "Cannot allocate working memory if mutex is not already locked.");
417     if (m_IsWorkingMemAllocated)
418     {
419         return;
420     }
421     for (auto&& workloadFactory : m_WorkloadFactories)
422     {
423         workloadFactory.second->Acquire();
424     }
425     m_IsWorkingMemAllocated = true;
426 }
427
428 void LoadedNetwork::FreeWorkingMemory()
429 {
430     std::lock_guard<UniqueMutexLock> lockGuard(m_WorkingMemLock);
431     if (!m_IsWorkingMemAllocated)
432     {
433         return;
434     }
435     // Informs the memory managers to release memory in it's respective memory group
436     for (auto&& workloadFactory : m_WorkloadFactories)
437     {
438         workloadFactory.second->Release();
439     }
440     m_IsWorkingMemAllocated = false;
441 }
442
443 bool LoadedNetwork::Execute()
444 {
445     bool success = true;
446
447     auto Fail = [&](const std::exception& error)
448     {
449         BOOST_LOG_TRIVIAL(error) << "An error occurred attempting to execute a workload: " << error.what();
450         success = false;
451     };
452
453     try
454     {
455         std::lock_guard<UniqueMutexLock> lockGuard(m_WorkingMemLock);
456         AllocateWorkingMemory();
457
458         for (auto& input : m_InputQueue)
459         {
460             input->Execute();
461         }
462
463         for (auto& workload : m_WorkloadQueue)
464         {
465             workload->Execute();
466         }
467
468         for (auto& output: m_OutputQueue)
469         {
470             output->Execute();
471         }
472     }
473     catch (const RuntimeException& error)
474     {
475         Fail(error);
476     }
477     catch (const std::runtime_error& error)
478     {
479         Fail(error);
480     }
481
482     return success;
483 }
484
485 }