Release 18.08
[platform/upstream/armnn.git] / src / armnn / Network.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // See LICENSE file in the project root for full license information.
4 //
5 #include "Network.hpp"
6 #include "Graph.hpp"
7 #include "Layer.hpp"
8 #include "DeviceSpec.hpp"
9 #include "backends/CpuTensorHandle.hpp"
10 #include "backends/WorkloadFactory.hpp"
11 #include "Optimizer.hpp"
12 #include "armnn/Exceptions.hpp"
13
14 #include <armnn/Utils.hpp>
15 #include <armnn/TypesUtils.hpp>
16
17 #include <fcntl.h>
18 #include <algorithm>
19 #include <fstream>
20 #include <memory>
21 #include <vector>
22 #include <algorithm>
23
24 #include <boost/assert.hpp>
25 #include <boost/format.hpp>
26 #include <boost/log/trivial.hpp>
27 #include <boost/numeric/conversion/converter_policies.hpp>
28 #include <boost/cast.hpp>
29
30 #include "optimizations/All.hpp"
31
32 namespace armnn
33 {
34
35 armnn::INetwork* INetwork::CreateRaw()
36 {
37     return new Network();
38 }
39
40 armnn::INetworkPtr INetwork::Create()
41 {
42     return INetworkPtr(CreateRaw(), &INetwork::Destroy);
43 }
44
45 void INetwork::Destroy(INetwork* network)
46 {
47     delete boost::polymorphic_downcast<Network*>(network);
48 }
49
50 Status Network::PrintGraph()
51 {
52     m_Graph->Print();
53     return Status::Success;
54 }
55
56 void IOptimizedNetwork::Destroy(IOptimizedNetwork* network)
57 {
58     delete boost::polymorphic_downcast<OptimizedNetwork*>(network);
59 }
60
61 Status OptimizedNetwork::PrintGraph()
62 {
63     m_Graph->Print();
64     return Status::Success;
65 }
66
67 Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const
68 {
69     return m_Graph->SerializeToDot(stream);
70 }
71
72 IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
73                               const std::vector<armnn::Compute>& backendPreferences,
74                               const IDeviceSpec& deviceSpec,
75                               const OptimizerOptions& options)
76 {
77     if (backendPreferences.empty()) {
78         throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified");
79     }
80     const Network& network = *boost::polymorphic_downcast<const Network*>(&inNetwork);
81     std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph());
82
83     auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy);
84
85     OptimizedNetwork* optNetObjPtr = boost::polymorphic_downcast<OptimizedNetwork*>(optNet.get());
86
87     // Perform optimisation passes
88     using namespace optimizations;
89     Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(SquashEqualPermuteSiblings(),
90                                                                 SquashEqualReshapeSiblings(),
91                                                                 OptimizeInversePermutes(),
92                                                                 MovePermuteUp(),
93                                                                 PermuteAsReshape(),
94                                                                 OptimizeConsecutiveReshapes()));
95
96     // Infer the tensor infos for all output slots. Throws an exception on failure.
97     optNetObjPtr->GetGraph().InferTensorInfos();
98
99     // if Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
100     if (options.m_ReduceFp32ToFp16)
101     {
102         Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(Fp32NetworkToFp16Converter()));
103     }
104
105     // We know that DeviceSpec should be the only implementation of IDeviceSpec.
106     const DeviceSpec& spec = *boost::polymorphic_downcast<const DeviceSpec*>(&deviceSpec);
107
108     // determine which of the preferred backends we have available for use
109     // and whether we have specified CpuRef as one of those backends.
110     bool cpuRefUsed = false;
111     std::vector<armnn::Compute> availablePreferredBackends;
112     for (const armnn::Compute& backend : backendPreferences)
113     {
114         // Check if the backend is in the available backend devices.
115         if (std::find(spec.m_SupportedComputeDevices.begin(),
116                       spec.m_SupportedComputeDevices.end(), backend) !=
117                       spec.m_SupportedComputeDevices.end())
118         {
119             availablePreferredBackends.push_back(backend);
120             if (armnn::Compute::CpuRef == backend) {
121                 cpuRefUsed = true;
122             }
123         }
124     }
125     if (availablePreferredBackends.empty()) {
126         BOOST_LOG_TRIVIAL(warning) << "None of the preferred backends " << backendPreferences
127                                    << " are supported. Current platform provides " << spec.m_SupportedComputeDevices;
128         return {nullptr, &IOptimizedNetwork::Destroy};
129     }
130
131     auto ReturnWithError = [&](Layer* layer)
132     {
133         BOOST_LOG_TRIVIAL(warning) << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
134                     << " is not supported on any preferred backend " << backendPreferences;
135         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
136     };
137
138     // Assign a compute device for all nodes
139     for (auto&& layer : optNetObjPtr->GetGraph())
140     {
141         DataType dataType = layer->GetDataType();
142         std::string reasonIfUnsupported;
143         bool found = false;
144         for (const armnn::Compute& backend : availablePreferredBackends)
145         {
146             // need to set the compute device on the layer
147             // before we can check if it is supported
148             layer->SetComputeDevice(backend);
149             if (!IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported))
150             {
151                 if (dataType == DataType::Float16)
152                 {
153                     if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
154                         && layer->GetType() != LayerType::ConvertFp32ToFp16
155                         && layer->GetType() != LayerType::ConvertFp16ToFp32)
156                     {
157                         // Insert FP16 -> FP32 conversion layer before current layer
158                         std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers =
159                             InsertConvertFp16ToFp32LayersBefore(optNetObjPtr->GetGraph(), *layer);
160
161                         // Insert FP32 -> FP16 conversion layer after current layer
162                         std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers =
163                             InsertConvertFp32ToFp16LayersAfter(optNetObjPtr->GetGraph(), *layer);
164
165                         // Assign a supported backend to the newly introduced conversion layers
166                         auto AssignFirstSupportedBackend = [&](Layer* layer, Compute preferredBackend)
167                         {
168                             bool supportedBackendFound = false;
169                             std::string reasonIfUnsupported;
170
171                             // Try preferred backend first
172                             layer->SetComputeDevice(preferredBackend);
173                             if (IWorkloadFactory::IsLayerSupported(*layer, boost::none, reasonIfUnsupported))
174                             {
175                                 supportedBackendFound = true;
176                             }
177                             else
178                             {
179                                 for (const Compute& backend : availablePreferredBackends)
180                                 {
181                                     // Skip preferred backend (we already determined that it is not supported)
182                                     if (backend == preferredBackend)
183                                     {
184                                         continue;
185                                     }
186
187                                     layer->SetComputeDevice(backend);
188                                     if (IWorkloadFactory::IsLayerSupported(*layer, boost::none, reasonIfUnsupported))
189                                     {
190                                         supportedBackendFound = true;
191                                         break;
192                                     }
193                                 }
194                             }
195
196                             return supportedBackendFound;
197                         };
198
199                         for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
200                         {
201                             if (!AssignFirstSupportedBackend(convertLayer, backend))
202                             {
203                                 return ReturnWithError(convertLayer);
204                             }
205                         }
206
207                         for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
208                         {
209                             if (!AssignFirstSupportedBackend(convertLayer, backend))
210                             {
211                                 return ReturnWithError(convertLayer);
212                             }
213                         }
214
215                         found = true;
216                         break;
217                     }
218                 }
219                 BOOST_LOG_TRIVIAL(warning) << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
220                                            << " is not supported on requested backend " << layer->GetComputeDevice()
221                                            << " (reason: " << reasonIfUnsupported
222                                            << "), falling back to the next backend.";
223             }
224             else
225             {
226                 found = true;
227                 break;
228             }
229         }
230
231         // If the layer is unsupported by any devices, log and return a null network.
232         if (!found) {
233             // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
234             //       fallback we should set the compute device on the layer to CpuRef (these are not
235             //       available as accelerated operations, or are only available under certain
236             //       conditions, currently they comprise MemCopy, Constant, Permute)
237             armnn::LayerType layerType = layer->GetType();
238             if (!cpuRefUsed && (layerType == armnn::LayerType::MemCopy ||
239                                 layerType == armnn::LayerType::Constant ||
240                                 layerType == armnn::LayerType::Permute))
241             {
242                 layer->SetComputeDevice(armnn::Compute::CpuRef);
243             }
244             else
245             {
246                 return ReturnWithError(layer);
247             }
248         }
249     }
250
251     Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(OptimizeInverseConversionsFp16(),
252                                                                 OptimizeInverseConversionsFp32()));
253
254     optNetObjPtr->GetGraph().AddCopyLayers();
255
256     // Convert constants
257     Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(ConvertConstantsFloatToHalf()));
258     Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(ConvertConstantsHalfToFloat()));
259
260     return optNet;
261 }
262
263 Network::Network()
264 : m_Graph(std::make_unique<Graph>())
265 {
266 }
267
268 Network::~Network()
269 {
270 }
271
272 IConnectableLayer* Network::AddInputLayer(LayerBindingId id, const char* name)
273 {
274     return m_Graph->AddLayer<InputLayer>(id, name);
275 }
276
277 IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor,
278                                                        const ConstTensor& weights,
279                                                        const ConstTensor* biases,
280                                                        const char* name)
281 {
282     if (fullyConnectedDescriptor.m_BiasEnabled && (biases == nullptr))
283     {
284         throw InvalidArgumentException("AddFullyConnectedLayer: biases cannot be NULL");
285     }
286
287     const auto layer = m_Graph->AddLayer<FullyConnectedLayer>(fullyConnectedDescriptor, name);
288
289     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
290
291     if (fullyConnectedDescriptor.m_BiasEnabled)
292     {
293         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(*biases);
294     }
295
296     return layer;
297 }
298
299 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
300                                                    const ConstTensor& weights,
301                                                    const char* name)
302 {
303     return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, nullptr, name);
304 }
305
306 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
307                                                    const ConstTensor& weights,
308                                                    const ConstTensor& biases,
309                                                    const char* name)
310 {
311     return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, &biases, name);
312 }
313
314 IConnectableLayer* Network::AddConvolution2dLayerImpl(const Convolution2dDescriptor& convolution2dDescriptor,
315                                                       const ConstTensor& weights,
316                                                       const ConstTensor* biases,
317                                                       const char* name)
318 {
319     if (convolution2dDescriptor.m_BiasEnabled && (biases == nullptr))
320     {
321         throw InvalidArgumentException("AddConvolution2dLayer: biases cannot be NULL");
322     }
323
324     const auto layer = m_Graph->AddLayer<Convolution2dLayer>(convolution2dDescriptor, name);
325
326     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
327
328     if (convolution2dDescriptor.m_BiasEnabled)
329     {
330         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(*biases);
331     }
332
333     return layer;
334 }
335
336 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
337                                                   const ConstTensor& weights,
338                                                   const char* name)
339 {
340     return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, nullptr, name);
341 }
342 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
343                                                   const ConstTensor& weights,
344                                                   const ConstTensor& biases,
345                                                   const char* name)
346 {
347     return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, &biases, name);
348 }
349
350 IConnectableLayer* Network::AddDepthwiseConvolution2dLayerImpl(
351     const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
352     const ConstTensor& weights,
353     const ConstTensor* biases,
354     const char* name)
355 {
356     if (convolution2dDescriptor.m_BiasEnabled && (biases == nullptr))
357     {
358         throw InvalidArgumentException("AddDepthwiseConvolution2dLayer: biases cannot be NULL");
359     }
360
361     const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor,
362             name);
363
364     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
365
366     if (convolution2dDescriptor.m_BiasEnabled)
367     {
368         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(*biases);
369     }
370
371     return layer;
372 }
373
374 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
375     const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
376     const ConstTensor& weights,
377     const char* name)
378 {
379     return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, nullptr, name);
380 }
381 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
382     const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
383     const ConstTensor& weights,
384     const ConstTensor& biases,
385     const char* name)
386 {
387     return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, &biases, name);
388 }
389
390 IConnectableLayer* Network::AddPermuteLayer(const PermuteDescriptor& permuteDescriptor,
391                                             const char* name)
392 {
393     return m_Graph->AddLayer<PermuteLayer>(permuteDescriptor, name);
394 }
395
396 IConnectableLayer* Network::AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor,
397     const char* name)
398 {
399     return m_Graph->AddLayer<Pooling2dLayer>(pooling2dDescriptor, name);
400 }
401
402 IConnectableLayer* Network::AddActivationLayer(const ActivationDescriptor& activationDescriptor,
403     const char* name)
404 {
405     return m_Graph->AddLayer<ActivationLayer>(activationDescriptor, name);
406 }
407
408 IConnectableLayer* Network::AddNormalizationLayer(const NormalizationDescriptor&
409 normalizationDescriptor,
410     const char* name)
411 {
412     return m_Graph->AddLayer<NormalizationLayer>(normalizationDescriptor, name);
413 }
414
415 IConnectableLayer* Network::AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor,
416     const char* name)
417 {
418     return m_Graph->AddLayer<SoftmaxLayer>(softmaxDescriptor, name);
419 }
420
421 IConnectableLayer* Network::AddSplitterLayer(const ViewsDescriptor& splitterDescriptor,
422     const char* name)
423 {
424     return m_Graph->AddLayer<SplitterLayer>(splitterDescriptor, name);
425 }
426
427 IConnectableLayer* Network::AddMergerLayer(const OriginsDescriptor& mergerDescriptor,
428     const char* name)
429 {
430     return m_Graph->AddLayer<MergerLayer>(mergerDescriptor, name);
431 }
432
433 IConnectableLayer* Network::AddAdditionLayer(const char* name)
434 {
435     return m_Graph->AddLayer<AdditionLayer>(name);
436 }
437
438 IConnectableLayer* Network::AddMultiplicationLayer(const char* name)
439 {
440     return m_Graph->AddLayer<MultiplicationLayer>(name);
441 }
442
443 IConnectableLayer* Network::AddOutputLayer(LayerBindingId id, const char* name)
444 {
445     return m_Graph->AddLayer<OutputLayer>(id, name);
446 }
447
448 IConnectableLayer* Network::AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc,
449                                                        const ConstTensor&                  mean,
450                                                        const ConstTensor&                  variance,
451                                                        const ConstTensor&                  beta,
452                                                        const ConstTensor&                  gamma,
453                                                        const char*                         name)
454 {
455     const auto layer = m_Graph->AddLayer<BatchNormalizationLayer>(desc, name);
456
457     layer->m_Mean = std::make_unique<ScopedCpuTensorHandle>(mean);
458     layer->m_Variance = std::make_unique<ScopedCpuTensorHandle>(variance);
459     layer->m_Beta = std::make_unique<ScopedCpuTensorHandle>(beta);
460     layer->m_Gamma = std::make_unique<ScopedCpuTensorHandle>(gamma);
461
462     return layer;
463 }
464
465 IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor&
466 resizeDescriptor, const char* name)
467 {
468     return m_Graph->AddLayer<ResizeBilinearLayer>(resizeDescriptor,name);
469 }
470
471 IConnectableLayer* Network::AddL2NormalizationLayer(const char* name)
472 {
473     return m_Graph->AddLayer<L2NormalizationLayer>(name);
474 }
475
476 IConnectableLayer* Network::AddConstantLayer(const ConstTensor& input, const char* name)
477 {
478     auto layer = m_Graph->AddLayer<ConstantLayer>(name);
479
480     layer->m_LayerOutput = std::make_unique<ScopedCpuTensorHandle>(input);
481
482     return layer;
483 }
484
485 IConnectableLayer* Network::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor,
486                                             const char* name)
487 {
488     return m_Graph->AddLayer<ReshapeLayer>(reshapeDescriptor, name);
489 }
490
491 IConnectableLayer* Network::AddFloorLayer(const char* name)
492 {
493     return m_Graph->AddLayer<FloorLayer>(name);
494 }
495
496 IConnectableLayer* Network::AddLstmLayer(const LstmDescriptor&  descriptor,
497                                          const LstmInputParams& params,
498                                          const char* name)
499 {
500     const auto layer = m_Graph->AddLayer<LstmLayer>(descriptor, name);
501
502     //Lstm Basic Parameters
503     layer->m_BasicParameters.m_InputToForgetWeights =
504         std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
505     layer->m_BasicParameters.m_InputToCellWeights =
506         std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
507     layer->m_BasicParameters.m_InputToOutputWeights =
508         std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
509     layer->m_BasicParameters.m_RecurrentToForgetWeights =
510         std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
511     layer->m_BasicParameters.m_RecurrentToCellWeights =
512         std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
513     layer->m_BasicParameters.m_RecurrentToOutputWeights =
514         std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
515     layer->m_BasicParameters.m_ForgetGateBias =
516             std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
517     layer->m_BasicParameters.m_CellBias =
518             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
519     layer->m_BasicParameters.m_OutputGateBias =
520             std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
521
522     //Lstm Cifg parameters
523     if(!descriptor.m_CifgEnabled)
524     {
525         if(params.m_InputToInputWeights == nullptr)
526         {
527             throw InvalidArgumentException("AddLstmLayer: Input To Input Weights cannot be NULL");
528         }
529         if(params.m_RecurrentToInputWeights == nullptr)
530         {
531             throw InvalidArgumentException(
532                     "AddLstmLayer: Recurrent To Input Weights cannot be NULL");
533         }
534         if(params.m_InputGateBias == nullptr)
535         {
536             throw InvalidArgumentException("AddLstmLayer: Input Gate Bias cannot be NULL");
537         }
538         layer->m_CifgParameters.m_InputToInputWeights =
539             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
540         layer->m_CifgParameters.m_RecurrentToInputWeights =
541             std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
542         // In the VTS tests, cell-to-input weights may be null, even if the other CIFG params are not.
543         if(params.m_CellToInputWeights != nullptr)
544         {
545             layer->m_CifgParameters.m_CellToInputWeights =
546                     std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
547         }
548         layer->m_CifgParameters.m_InputGateBias =
549             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
550     }
551
552     //Lstm projection parameters
553     if(descriptor.m_ProjectionEnabled)
554     {
555         if(params.m_ProjectionWeights == nullptr)
556         {
557             throw InvalidArgumentException("AddLstmLayer: Projection Weights cannot be NULL");
558         }
559         layer->m_ProjectionParameters.m_ProjectionWeights =
560             std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
561         if(params.m_ProjectionBias != nullptr)
562         {
563             layer->m_ProjectionParameters.m_ProjectionBias =
564                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
565         }
566     }
567
568     //Lstm Peephole params
569     if(descriptor.m_PeepholeEnabled)
570     {
571         if(params.m_CellToForgetWeights == nullptr)
572         {
573             throw InvalidArgumentException("AddLstmLayer: Cell To Forget Weights cannot be NULL");
574         }
575         if(params.m_CellToOutputWeights == nullptr)
576         {
577             throw InvalidArgumentException("AddLstmLayer: Cell To Output Weights cannot be NULL");
578         }
579         layer->m_PeepholeParameters.m_CellToForgetWeights =
580             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
581         layer->m_PeepholeParameters.m_CellToOutputWeights =
582             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
583     }
584     return layer;
585 }
586
587 OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph)
588     : m_Graph(std::move(graph))
589 {
590 }
591
592 OptimizedNetwork::~OptimizedNetwork()
593 {
594 }
595
596 } // namespace armnn