IVGCVSW-4449 Add missing QLstm nullptr checks
[platform/upstream/armnn.git] / src / armnn / Network.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "Network.hpp"
7 #include "Graph.hpp"
8 #include "Layer.hpp"
9 #include "DeviceSpec.hpp"
10 #include "Optimizer.hpp"
11 #include "SubgraphViewSelector.hpp"
12 #include "BackendSettings.hpp"
13 #include "optimizations/All.hpp"
14
15 #include <backendsCommon/CpuTensorHandle.hpp>
16 #include <backendsCommon/WorkloadFactory.hpp>
17 #include <armnn/backends/IBackendInternal.hpp>
18 #include <backendsCommon/TensorHandleFactoryRegistry.hpp>
19
20 #include <armnn/Exceptions.hpp>
21 #include <armnn/Utils.hpp>
22 #include <armnn/TypesUtils.hpp>
23 #include <armnn/BackendRegistry.hpp>
24 #include <armnn/Logging.hpp>
25 #include <armnn/utility/Assert.hpp>
26 #include <armnn/utility/IgnoreUnused.hpp>
27 #include <armnn/utility/PolymorphicDowncast.hpp>
28
29 #include <ProfilingService.hpp>
30
31 #include <fcntl.h>
32 #include <algorithm>
33 #include <fstream>
34 #include <memory>
35 #include <vector>
36 #include <algorithm>
37
38 #include <boost/format.hpp>
39 #include <boost/numeric/conversion/converter_policies.hpp>
40 #include <boost/cast.hpp>
41
42 namespace armnn
43 {
44
45 armnn::INetwork* INetwork::CreateRaw()
46 {
47     return new Network();
48 }
49
50 armnn::INetworkPtr INetwork::Create()
51 {
52     return INetworkPtr(CreateRaw(), &INetwork::Destroy);
53 }
54
55 void INetwork::Destroy(INetwork* network)
56 {
57     delete PolymorphicDowncast<Network*>(network);
58 }
59
60 void IOptimizedNetwork::Destroy(IOptimizedNetwork* network)
61 {
62     delete PolymorphicDowncast<OptimizedNetwork*>(network);
63 }
64
65 Status OptimizedNetwork::PrintGraph()
66 {
67     m_Graph->Print();
68     return Status::Success;
69 }
70
71 Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const
72 {
73     return m_Graph->SerializeToDot(stream);
74 }
75
76 void ReportError(const std::string& errorMessage,
77                  Optional<std::vector<std::string>&> errorMessages)
78 {
79     std::stringstream fullErrorMessage;
80     fullErrorMessage << "ERROR: " << errorMessage;
81     ARMNN_LOG(warning) << fullErrorMessage.str();
82     if (errorMessages)
83     {
84         errorMessages.value().push_back(fullErrorMessage.str());
85     }
86 }
87
88 void ReportWarning(const std::string& warningMessage,
89                    Optional<std::vector<std::string>&> warningMessages)
90 {
91     std::stringstream fullWarningMessage;
92     fullWarningMessage << "WARNING: " << warningMessage;
93     ARMNN_LOG(warning) << fullWarningMessage.str();
94     if (warningMessages)
95     {
96         warningMessages.value().push_back(fullWarningMessage.str());
97     }
98 }
99
100 OptimizationResult ReturnWithError(OptimizationResult res,
101                                    const Layer* layer,
102                                    const BackendSettings& backendSettings,
103                                    Optional<std::vector<std::string>&> errMessages)
104 {
105     std::stringstream failureMsg;
106     failureMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
107                << " is not supported on any preferred backend " << backendSettings.m_PreferredBackends;
108     ReportError(failureMsg.str(), errMessages);
109
110     res.m_Error = true;
111     return res;
112 }
113
114
115 bool CheckScaleSetOnQuantizedType(Layer* layer, Optional<std::vector<std::string>&> errMessages)
116 {
117     bool noErrors = true;
118     unsigned int numOutputs = layer->GetNumOutputSlots();
119     for (unsigned int i = 0; i < numOutputs; i++) {
120         OutputSlot& outputSlot = layer->GetOutputSlot(i);
121         TensorInfo info = outputSlot.GetTensorInfo();
122         if (DataType::QAsymmU8 == info.GetDataType()) {
123             if (0.f == info.GetQuantizationScale()) {
124                 noErrors = false;
125                 std::stringstream ss;
126                 ss << "output " << i << " of layer " << GetLayerTypeAsCString(layer->GetType())
127                    << " (" << layer->GetNameStr() << ") is of type"
128                    << " Quantized 8 bit but its scale parameter has not been set";
129                 ReportError(ss.str(), errMessages);
130             }
131             // Softmax under QuantisedAsymm8 must always be scale (1.0f/256.0f) and offset 0
132             if ((info.GetQuantizationScale() != (1.0f / 256.0f) ||
133                  info.GetQuantizationOffset() != 0) &&
134                  layer->GetType() == armnn::LayerType::Softmax)
135             {
136                 std::stringstream ss;
137                 ss << "Quantization parameters for Softmax layer (Scale: " <<
138                 info.GetQuantizationScale() << " and Offset: " << info.GetQuantizationOffset() <<
139                 ") are incorrect and have been updated to Scale: 0.00390625 and Offset: 0";
140                 ARMNN_LOG(warning) << ss.str();
141                 info.SetQuantizationScale((1.0f /256.0f));
142                 info.SetQuantizationOffset(0);
143                 outputSlot.SetTensorInfo(info);
144             }
145         }
146     }
147     return noErrors;
148 }
149
150 template <typename LayerT>
151 LayerT* ConvertBf16ToFp32Weight(Layer* l)
152 {
153     LayerT* layer = PolymorphicDowncast<LayerT*>(l);
154     if ((layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
155          && layer->m_Weight)
156     {
157         const TensorInfo& info = layer->m_Weight->GetTensorInfo();
158
159         if (info.GetDataType() == DataType::BFloat16)
160         {
161             std::vector<float> newValues(info.GetNumElements());
162
163             armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(
164                 layer->m_Weight->template GetTensor<armnn::BFloat16>(), info.GetNumElements(), newValues.data());
165
166             TensorInfo newInfo(info.GetShape(), DataType::Float32);
167             ConstTensor newInput(newInfo, newValues);
168             layer->m_Weight.reset(new ScopedCpuTensorHandle(newInput));
169         }
170     }
171     return layer;
172 }
173
174 OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings,
175                                             Graph& graph,
176                                             Layer* layer,
177                                             BackendId backend,
178                                             DataType dataTypeIn,
179                                             DataType dataTypeOut,
180                                             const std::vector<BackendId>& availablePreferredBackends,
181                                             std::string& reasonIfUnsupported,
182                                             Optional<std::vector<std::string>&> errMessages)
183 {
184     OptimizationResult result;
185
186     // Helper lambda to compose meaningful error message before returning with error
187     auto ReturnError = [&](const Layer* layer)
188         {
189             return ReturnWithError(result, layer, backendSettings, errMessages);
190         };
191
192     // need to set the compute device on the layer
193     // before we can check if it is supported
194     layer->SetBackendId(backend);
195     if (!IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported))
196     {
197         if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
198         {
199             if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
200                 && layer->GetType() != LayerType::ConvertFp32ToFp16
201                 && layer->GetType() != LayerType::ConvertFp16ToFp32)
202             {
203                 // Insert FP16 -> FP32 conversion layer before current layer
204                 std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers;
205                 if (dataTypeIn == DataType::Float16)
206                 {
207                     convertFp16ToFp32Layers =
208                         InsertConvertFp16ToFp32LayersBefore(graph, *layer);
209                 }
210
211                 // Insert FP32 -> FP16 conversion layer after current layer
212                 std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers;
213                 if (dataTypeOut == DataType::Float16)
214                 {
215                     convertFp32ToFp16Layers =
216                         InsertConvertFp32ToFp16LayersAfter(graph, *layer);
217                 }
218
219                 // Assign a supported backend to the newly introduced conversion layers
220                 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
221                     {
222                         bool supportedBackendFound = false;
223                         std::string reasonIfUnsupported;
224
225                         // Try preferred backend first
226                         layer->SetBackendId(preferredBackend);
227                         if (IWorkloadFactory::IsLayerSupported(*layer,
228                                                                EmptyOptional(),
229                                                                reasonIfUnsupported))
230                         {
231                             supportedBackendFound = true;
232                         }
233                         else
234                         {
235                             for (const auto& backend : availablePreferredBackends)
236                             {
237                                 // Skip preferred backend (we already determined that it is not supported)
238                                 if (backend == preferredBackend)
239                                 {
240                                     continue;
241                                 }
242
243                                 layer->SetBackendId(backend);
244                                 if (IWorkloadFactory::IsLayerSupported(*layer,
245                                                                        EmptyOptional(),
246                                                                        reasonIfUnsupported))
247                                 {
248                                     supportedBackendFound = true;
249                                     break;
250                                 }
251                             }
252                         }
253
254                         return supportedBackendFound;
255                     };
256
257                 for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
258                 {
259                     if (!AssignFirstSupportedBackend(convertLayer, backend))
260                     {
261                         return ReturnError(convertLayer);
262                     }
263                 }
264
265                 for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
266                 {
267                     if (!AssignFirstSupportedBackend(convertLayer, backend))
268                     {
269                         return ReturnError(convertLayer);
270                     }
271                 }
272
273                 return result;
274             }
275         }
276         else if (dataTypeIn == DataType::BFloat16 || dataTypeOut == DataType::BFloat16)
277         {
278             if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
279                 && layer->GetType() != LayerType::ConvertFp32ToBf16
280                 && layer->GetType() != LayerType::ConvertBf16ToFp32)
281             {
282                 // Insert BF16 -> FP32 conversion layer before current layer
283                 std::vector<ConvertBf16ToFp32Layer*> convertBf16ToFp32Layers;
284                 if (dataTypeIn == DataType::BFloat16)
285                 {
286                     convertBf16ToFp32Layers =
287                         InsertConvertBf16ToFp32LayersBefore(graph, *layer);
288                     if (layer->GetType() == LayerType::Convolution2d)
289                     {
290                         ConvertBf16ToFp32Weight<Convolution2dLayer>(layer);
291                     }
292                     else if (layer->GetType() == LayerType::FullyConnected)
293                     {
294                         ConvertBf16ToFp32Weight<FullyConnectedLayer>(layer);
295                     }
296                 }
297
298                 // Insert FP32 -> BF16 conversion layer after current layer
299                 std::vector<ConvertFp32ToBf16Layer*> convertFp32ToBf16Layers;
300                 if (dataTypeOut == DataType::BFloat16)
301                 {
302                     convertFp32ToBf16Layers =
303                         InsertConvertFp32ToBf16LayersAfter(graph, *layer);
304                 }
305
306                 // Assign a supported backend to the newly introduced conversion layers
307                 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
308                     {
309                         bool supportedBackendFound = false;
310                         std::string reasonIfUnsupported;
311
312                         // Try preferred backend first
313                         layer->SetBackendId(preferredBackend);
314                         if (IWorkloadFactory::IsLayerSupported(*layer,
315                                                                EmptyOptional(),
316                                                                reasonIfUnsupported))
317                         {
318                             supportedBackendFound = true;
319                         }
320                         else
321                         {
322                             for (const auto& backend : availablePreferredBackends)
323                             {
324                                 // Skip preferred backend (we already determined that it is not supported)
325                                 if (backend == preferredBackend)
326                                 {
327                                     continue;
328                                 }
329
330                                 layer->SetBackendId(backend);
331                                 if (IWorkloadFactory::IsLayerSupported(*layer,
332                                                                        EmptyOptional(),
333                                                                        reasonIfUnsupported))
334                                 {
335                                     supportedBackendFound = true;
336                                     break;
337                                 }
338                             }
339                         }
340
341                         return supportedBackendFound;
342                     };
343
344                 for (ConvertBf16ToFp32Layer* convertLayer : convertBf16ToFp32Layers)
345                 {
346                     if (!AssignFirstSupportedBackend(convertLayer, backend))
347                     {
348                         return ReturnError(convertLayer);
349                     }
350                 }
351
352                 for (ConvertFp32ToBf16Layer* convertLayer : convertFp32ToBf16Layers)
353                 {
354                     if (!AssignFirstSupportedBackend(convertLayer, backend))
355                     {
356                         return ReturnError(convertLayer);
357                     }
358                 }
359
360                 return result;
361             }
362         }
363
364         std::stringstream warningMsg;
365         warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
366                    << " is not supported on requested backend " << layer->GetBackendId().Get()
367                    << " for input data type " << GetDataTypeName(dataTypeIn)
368                    << " and output data type " << GetDataTypeName(dataTypeOut)
369                    << " (reason: " << reasonIfUnsupported
370                    << "), falling back to the next backend.";
371         ReportWarning(warningMsg.str(), errMessages);
372
373         return OptimizationResult(true, false);
374     }
375     else
376     {
377         return result;
378     }
379 }
380
381
382 OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
383                                   BackendSettings& backendSettings,
384                                   Graph::Iterator& firstLayer,
385                                   Graph::Iterator& lastLayer,
386                                   Optional<std::vector<std::string>&> errMessages)
387 {
388     OptimizationResult result;
389
390     // Helper lambda to compose meaningful error message before returning with error
391     auto ReturnError = [&](const Layer* layer)
392         {
393             return ReturnWithError(result, layer, backendSettings, errMessages);
394         };
395
396
397     auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
398     if (availablePreferredBackends.empty())
399     {
400         std::stringstream failureMsg;
401         failureMsg << "No preferred backends are available";
402         ReportError(failureMsg.str(), errMessages);
403
404         result.m_Error = true;
405         return result;
406     }
407
408     for (auto it = firstLayer; it != lastLayer; ++it)
409     {
410         auto layer = *it;
411
412         DataType dataTypeIn  = layer->GetNumInputSlots() == 0 ? DataType::Float32 :
413             layer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType();
414         DataType dataTypeOut = layer->GetNumOutputSlots() == 0 ? DataType::Float32 :
415             layer->GetOutputSlot(0).GetTensorInfo().GetDataType();
416
417         std::string reasonIfUnsupported;
418         bool found = false;
419         if (!CheckScaleSetOnQuantizedType(layer, errMessages))
420         {
421             // don't bomb immediately, find all the quantized outputs
422             // which haven't had a scale set and report them all back.
423             result.m_Error = true;
424         }
425
426         // First try assign layer to hint backend
427         if (layer->GetBackendHint().has_value() &&
428             backendSettings.IsBackendSupported(layer->GetBackendHint().value()) &&
429             AttemptBackendAssignment(backendSettings,
430                                      optNetObjPtr->GetGraph(),
431                                      layer,
432                                      layer->GetBackendHint().value(),
433                                      dataTypeIn,
434                                      dataTypeOut,
435                                      availablePreferredBackends,
436                                      reasonIfUnsupported,
437                                      errMessages).IsOk())
438         {
439             found = true;
440             backendSettings.m_SelectedBackends.insert(layer->GetBackendHint().value());
441         }
442         else
443         {
444             // Try assign layer to prefered list of backends
445             for (const auto& backend : availablePreferredBackends)
446             {
447                 if (layer->GetBackendHint().has_value() &&
448                     layer->GetBackendHint().value() == backend)
449                 {
450                     continue; //Don't re-test the backend hint
451                 }
452
453                 OptimizationResult res = AttemptBackendAssignment(backendSettings,
454                                                                   optNetObjPtr->GetGraph(),
455                                                                   layer,
456                                                                   backend,
457                                                                   dataTypeIn,
458                                                                   dataTypeOut,
459                                                                   availablePreferredBackends,
460                                                                   reasonIfUnsupported,
461                                                                   errMessages);
462
463                 if (res.IsOk())
464                 {
465                     found = true;
466                     backendSettings.m_SelectedBackends.insert(backend);
467                     break;
468                 }
469                 else if (res.IsError())
470                 {
471                    return res;  // Cannot continue.
472                    // Note: we don't need to log the error as it would already
473                    // be logged in AttemptBackendAssignment().
474                 }
475                 else
476                 {
477                     ARMNN_ASSERT_MSG(res.IsWarningOnly(), "OptimizationResult in unexpected state.");
478                 }
479             }
480         }
481
482         // If the layer is unsupported by any devices, log and return a null network.
483         if (!found)
484         {
485             // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
486             //       fallback we should set the compute device on the layer to CpuRef (these are not
487             //       available as accelerated operations, or are only available under certain
488             //       conditions, currently they comprise MemCopy, Constant, Permute)
489             armnn::LayerType layerType = layer->GetType();
490             if (!backendSettings.IsCpuRefUsed() && (layerType == armnn::LayerType::MemCopy ||
491                                                     layerType == armnn::LayerType::Constant ||
492                                                     layerType == armnn::LayerType::Permute))
493             {
494                 BackendId cpuBackendId(armnn::Compute::CpuRef);
495                 layer->SetBackendId(cpuBackendId);
496                 backendSettings.m_SelectedBackends.insert(cpuBackendId);
497             }
498             else
499             {
500                 return ReturnError(layer);
501             }
502         }
503     }
504
505     return result;
506 }
507
508 OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
509                                   BackendSettings& backendSettings,
510                                   SubgraphView& subgraph,
511                                   Optional<std::vector<std::string>&> errMessages)
512 {
513     Graph::Iterator firstLayer = subgraph.begin();
514     Graph::Iterator lastLayer  = subgraph.end();
515     return AssignBackends(optNetObjPtr,
516                           backendSettings,
517                           firstLayer,
518                           lastLayer,
519                           errMessages);
520 }
521
522 BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry& handleFactoryRegistry,
523                                     BackendSettings& backendSettings)
524 {
525     BackendsMap backends;
526     auto const& backendRegistry = BackendRegistryInstance();
527     for (auto&& selectedBackend : backendSettings.m_SupportedBackends)
528     {
529         auto backendFactory = backendRegistry.GetFactory(selectedBackend);
530         auto backendObjPtr = backendFactory();
531         ARMNN_ASSERT(backendObjPtr);
532
533         backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry);
534
535         backends[backendObjPtr->GetId()] = std::move(backendObjPtr);
536     }
537
538     return backends;
539 }
540
541 OptimizationResult ApplyBackendOptimizations(OptimizedNetwork* optNetObjPtr,
542                                              BackendSettings& backendSettings,
543                                              BackendsMap& backends,
544                                              Optional<std::vector<std::string>&> errMessages)
545 {
546     ARMNN_ASSERT(optNetObjPtr);
547
548     OptimizationResult result;
549
550     // Get the optimized graph
551     Graph& optGraph = optNetObjPtr->GetGraph();
552
553     // Run backend specific optimizations
554     for (auto&& selectedBackend : backendSettings.m_SelectedBackends)
555     {
556         auto backendObjPtr = backends.find(selectedBackend)->second.get();
557         ARMNN_ASSERT(backendObjPtr);
558
559         // Select sub-graphs based on backend
560         SubgraphViewSelector::Subgraphs subgraphs =
561                 SubgraphViewSelector::SelectSubgraphs(optGraph,
562                                                       // Select layers assigned to the requested backend
563                                                       [&backendObjPtr](const Layer& layer)
564                                                       {
565                                                           return layer.GetType() != LayerType::Input &&
566                                                                  layer.GetType() != LayerType::Output &&
567                                                                  layer.GetBackendId() == backendObjPtr->GetId();
568                                                       });
569         if (subgraphs.empty())
570         {
571             // No sub-graphs found, try with next selected backend
572             continue;
573         }
574
575         // Try to optimize each sub-graph
576         for (auto& subgraph : subgraphs)
577         {
578             // Try to optimize the current sub-graph
579             OptimizationViews optimizationViews = backendObjPtr->OptimizeSubgraphView(*subgraph);
580             ARMNN_ASSERT(optimizationViews.Validate(*subgraph));
581
582             // Optimization attempted, check the resulting optimized sub-graph
583             for (auto& substitution : optimizationViews.GetSubstitutions())
584             {
585                 // Sub-graph optimized, substitute the sub-graph with the new optimized one in the main optimized graph
586                 SubgraphView& replacementSubgraph   = substitution.m_ReplacementSubgraph;
587                 SubgraphView& substitutableSubgraph = substitution.m_SubstitutableSubgraph;
588                 optGraph.SubstituteSubgraph(substitutableSubgraph, replacementSubgraph);
589
590                 // Assign the current backend to the optimized sub-graph
591                 std::for_each(replacementSubgraph.begin(), replacementSubgraph.end(), [&selectedBackend](Layer* l)
592                     {
593                         ARMNN_ASSERT(l);
594                         l->SetBackendId(selectedBackend);
595                     });
596             }
597
598             if (!optimizationViews.GetFailedSubgraphs().empty())
599             {
600                 std::stringstream warningMsg;
601                 warningMsg << "Some sub-graph(s) failed to optimized on " << backendObjPtr->GetId() << " backend.";
602                 ReportWarning(warningMsg.str(), errMessages);
603
604                 // Failed to optimize the given sub-graph, re-assign the sub-graph layers to other available backends
605                 BackendSettings settingsCopy(backendSettings);
606                 if (!backendObjPtr->GetId().IsCpuRef())
607                 {
608                     // Add the current backend to the list of backends to ignore
609                     settingsCopy.m_IgnoredBackends.insert(backendObjPtr->GetId());
610                 }
611
612                 int count=0;
613                 for (auto& failedSubgraph : optimizationViews.GetFailedSubgraphs())
614                 {
615                     // An error occurred: the optimization was attempted but not performed, try different backends
616                     std::stringstream subgraphMsg;
617                     subgraphMsg << "Re-assigning backends to " << failedSubgraph.GetLayers().size()
618                                 << " layers inside sub-graph " << count++;
619                     ReportWarning(subgraphMsg.str(), errMessages);
620
621                     OptimizationResult reassignmentResult = AssignBackends(optNetObjPtr,
622                                                                            settingsCopy,
623                                                                            *subgraph,
624                                                                            errMessages);
625                     if (reassignmentResult.m_Error)
626                     {
627                         // Failed to re-assign one of the remaining backends to each layer of the sub-graph
628                         result.m_Error = true;
629                         return result;
630                     }
631                 }
632             }
633         }
634     }
635
636     return result;
637 }
638
639 bool RequiresCopy(ITensorHandleFactory::FactoryId src,
640                   ITensorHandleFactory::FactoryId dst,
641                   TensorHandleFactoryRegistry& registry)
642 {
643     if (src != dst)
644     {
645         ITensorHandleFactory* srcFactory = registry.GetFactory(src);
646         ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
647
648         if (srcFactory && dstFactory &&
649             (srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0)
650         {
651             return false;
652         }
653         return true;
654     }
655     return false;
656 }
657
658 // Find the handle factory for the input layer which results in fewest required copies.
659 ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backends,
660                                                             OutputSlot& slot,
661                                                             TensorHandleFactoryRegistry& registry)
662 {
663     Layer& layer = slot.GetOwningLayer();
664     ARMNN_ASSERT(layer.GetType() == LayerType::Input);
665
666     // Explicitly select the tensorhandle factory for InputLayer because the rules for it are slightly different. It
667     // doesn't matter which backend it is assigned to because they all use the same implementation, which
668     // requires Map/Unmap support. This means that, so long as the handle type supports map/unmap semantics, we can
669     // select a factory with maximum compatibility with the layers connected to the InputLayer.
670
671     // First ensure the from backends can support the TensorHandeAPI
672     auto frmBackend = backends.find(layer.GetBackendId());
673     if (frmBackend == backends.end() ||
674         !frmBackend->second->SupportsTensorAllocatorAPI())
675     {
676         return ITensorHandleFactory::LegacyFactoryId;
677     }
678
679     // Go through all connections to the output slot and determine the TensorHandleFactory which results in the
680     // fewest copies.
681     std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
682     int topScore = 0;
683     ITensorHandleFactory::FactoryId topChoice = ITensorHandleFactory::LegacyFactoryId;
684
685     for (auto&& connection : slot.GetConnections())
686     {
687         const Layer& connectedLayer = connection->GetOwningLayer();
688
689         auto toBackend = backends.find(connectedLayer.GetBackendId());
690         ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
691
692         if (!toBackend->second.get()->SupportsTensorAllocatorAPI())
693         {
694             // The destination backend does not support the tensor allocator API, move to the next one
695             continue;
696         }
697
698         auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
699         for (auto&& dst : dstPrefs)
700         {
701             // Input layers use the mem copy workload or import, so the selected factory must
702             // support either the map/unmap API or Import API
703             ITensorHandleFactory* factory = registry.GetFactory(dst);
704             if (!factory->SupportsMapUnmap() &&
705                 !CheckFlag(factory->GetImportFlags(), MemorySource::Malloc)) // Just support cpu mem imports for now
706             {
707                 // The current tensor handle factory does not support the map/unmap or import
708                 // strategy, move to the next one
709                 continue;
710             }
711
712             auto it = factoryScores.find(dst);
713             if (it == factoryScores.end())
714             {
715                 // Add new score to the table
716                 factoryScores[dst] = 0;
717                 if (topChoice == ITensorHandleFactory::LegacyFactoryId)
718                 {
719                     topChoice = dst;
720                 }
721             }
722             else
723             {
724                 // Increase the score
725                 factoryScores[dst]++;
726
727                 // Track the best option
728                 if (factoryScores[dst] > topScore)
729                 {
730                     topScore = factoryScores[dst];
731                     topChoice = dst;
732                 }
733             }
734         }
735     }
736
737     return topChoice;
738 }
739
740 // Find the handle factory for the output layer which results in fewest required copies.
741 ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput(BackendsMap& backends,
742                                                             OutputSlot& slot,
743                                                             TensorHandleFactoryRegistry& registry)
744 {
745     IgnoreUnused(backends, slot, registry);
746     return ITensorHandleFactory::DeferredFactoryId;
747 }
748
749 // For all handle factories supported on the source backend, we wish to find the one which requires the fewest copies
750 // when considering all connections.
751 ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends,
752                                                     OutputSlot& outputSlot,
753                                                     TensorHandleFactoryRegistry& registry)
754 {
755     // First ensure the from backends can support the TensorHandeAPI
756     Layer& layer = outputSlot.GetOwningLayer();
757     auto frmBackend = backends.find(layer.GetBackendId());
758     if (frmBackend == backends.end() ||
759         !frmBackend->second->SupportsTensorAllocatorAPI())
760     {
761         return ITensorHandleFactory::LegacyFactoryId;
762     }
763
764     // Connections to Output Layers requires support for map/unmap on the TensorHandle.
765     bool requiresMapUnmap = false;
766     for (auto&& connection : outputSlot.GetConnections())
767     {
768         const Layer& connectedLayer = connection->GetOwningLayer();
769         if (connectedLayer.GetType() == LayerType::Output)
770         {
771             requiresMapUnmap = true;
772         }
773     }
774
775     IBackendInternal* srcBackend = frmBackend->second.get();
776     auto srcPrefs = srcBackend->GetHandleFactoryPreferences();
777
778     // Initialize the scores
779     std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
780     for (auto&& pref : srcPrefs)
781     {
782         if (requiresMapUnmap) // Only consider factories that support map/unmap if required
783         {
784             ITensorHandleFactory* factory = registry.GetFactory(pref);
785             if (!factory->SupportsMapUnmap())
786             {
787                 // The current tensor handle factory does not support the map/unmap strategy, move to the next one
788                 continue;
789             }
790         }
791
792         auto it = factoryScores.find(pref);
793         if (it == factoryScores.end())
794         {
795             // Add new score to the table
796             factoryScores[pref] = 0;
797         }
798     }
799
800     // Score each handle factory based on how many times it requires copies on the slot connections
801     for (auto&& connection : outputSlot.GetConnections())
802     {
803         const Layer& connectedLayer = connection->GetOwningLayer();
804
805         auto toBackend = backends.find(connectedLayer.GetBackendId());
806         ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
807
808         auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
809         for (auto&& src : srcPrefs)
810         {
811             if (factoryScores.find(src) == factoryScores.end()) // Don't consider excluded factories
812             {
813                 continue;
814             }
815
816             for (auto&& dst : dstPrefs)
817             {
818                 if (RequiresCopy(src, dst, registry))
819                 {
820                     // Copy avoided, increase the score
821                     factoryScores[src]++;
822                     break;
823                 }
824             }
825         }
826     }
827
828     // Find the lowest score
829     int minScore = std::numeric_limits<int>::max();
830     for (auto it : factoryScores)
831     {
832         minScore = std::min(minScore, it.second);
833     }
834
835     // Collect factories matching the best(lowest) score
836     std::vector<ITensorHandleFactory::FactoryId> optimalFactories;
837     for (auto it : factoryScores)
838     {
839         if (it.second == minScore)
840         {
841             optimalFactories.push_back(it.first);
842         }
843     }
844
845     // For all compatible Factories matching the best score, find the preferred one for the current layer.
846     for (auto&& srcPref : srcPrefs)
847     {
848         for (auto&& comp : optimalFactories)
849         {
850             if (comp == srcPref)
851             {
852                 return comp;
853             }
854         }
855     }
856
857     return ITensorHandleFactory::LegacyFactoryId;
858 }
859
860 EdgeStrategy CalculateEdgeStrategy(BackendsMap& backends,
861                                    ITensorHandleFactory::FactoryId srcFactoryId,
862                                    const Layer& layer,
863                                    const Layer& connectedLayer,
864                                    TensorHandleFactoryRegistry& registry)
865 {
866     auto toBackend = backends.find(connectedLayer.GetBackendId());
867     ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
868
869     auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
870
871     // Legacy API check for backward compatibility
872     if (srcFactoryId == ITensorHandleFactory::LegacyFactoryId || dstPrefs.empty())
873     {
874         if (layer.GetBackendId() != connectedLayer.GetBackendId())
875         {
876             return EdgeStrategy::CopyToTarget;
877         }
878         else
879         {
880             return EdgeStrategy::DirectCompatibility;
881         }
882     }
883
884     // TensorHandleFactory API present, so perform more sophisticated strategies.
885     // Dst Output layers don't require copy because they use import or map/unmap
886     if (connectedLayer.GetType() == LayerType::Output)
887     {
888         return EdgeStrategy::DirectCompatibility;
889     }
890
891     // Search for direct match in prefs
892     for (auto&& pref : dstPrefs)
893     {
894         if (pref == srcFactoryId)
895         {
896             return EdgeStrategy::DirectCompatibility;
897         }
898     }
899
900     // Search for export/import options
901     ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
902     if (srcFactory->GetExportFlags() != 0)
903     {
904         for (auto&& pref : dstPrefs)
905         {
906             ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
907
908             // Handles cases when a destPref is not listed in TensorHandleFactoryRegistry
909             if (!dstFactory) {
910                 continue;
911             }
912
913             if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
914             {
915                 return EdgeStrategy::ExportToTarget;
916             }
917         }
918     }
919
920     // Search for copy options via map/unmap
921     if (srcFactory->SupportsMapUnmap())
922     {
923         for (auto&& pref : dstPrefs)
924         {
925             ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
926             if (dstFactory && dstFactory->SupportsMapUnmap())
927             {
928                 return EdgeStrategy::CopyToTarget;
929             }
930         }
931     }
932
933     return EdgeStrategy::Undefined;
934 }
935
936 // Select the TensorHandleFactories and the corresponding memory strategy
937 OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
938                                               BackendsMap& backends,
939                                               TensorHandleFactoryRegistry& registry,
940                                               Optional<std::vector<std::string>&> errMessages)
941 {
942     OptimizationResult result;
943
944     optGraph.ForEachLayer([&backends, &registry, &result, &errMessages](Layer* layer)
945     {
946         ARMNN_ASSERT(layer);
947
948         // Lets make sure the backend is in our list of supported backends. Something went wrong during backend
949         // assignment if this check fails
950         ARMNN_ASSERT(backends.find(layer->GetBackendId()) != backends.end());
951
952         // Check each output separately
953         for (unsigned int slotIdx = 0; slotIdx < layer->GetNumOutputSlots(); slotIdx++)
954         {
955             OutputSlot& outputSlot = layer->GetOutputSlot(slotIdx);
956
957             ITensorHandleFactory::FactoryId slotOption = ITensorHandleFactory::LegacyFactoryId;
958
959             // Calculate the factory to use which results in the fewest copies being made.
960             switch(layer->GetType())
961             {
962                 case LayerType::Input:
963                     slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry);
964                     break;
965                 case LayerType::Output:
966                     slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
967                     break;
968                 default:
969                     slotOption = CalculateSlotOption(backends, outputSlot, registry);
970                     break;
971             }
972             outputSlot.SetTensorHandleFactory(slotOption);
973
974             // Now determine the "best" edge strategy for each connection given the slotOption.
975             unsigned int connectionIdx = 0;
976             for (auto&& connection : outputSlot.GetConnections())
977             {
978                 const Layer& connectedLayer = connection->GetOwningLayer();
979
980                 EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer, registry);
981
982                 if (strategy == EdgeStrategy::Undefined)
983                 {
984                     result.m_Error = true;
985                     if (errMessages)
986                     {
987                         errMessages.value().emplace_back("Could not find valid strategy required for compatibility"
988                                                          " between backends.");
989                     }
990                     return;
991                 }
992
993                 outputSlot.SetEdgeStrategy(connectionIdx, strategy);
994
995                 connectionIdx++;
996             }
997         }
998     });
999
1000     return result;
1001 }
1002
1003 IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
1004                               const std::vector<BackendId>& backendPreferences,
1005                               const IDeviceSpec& deviceSpec,
1006                               const OptimizerOptions& options,
1007                               Optional<std::vector<std::string>&> messages)
1008 {
1009     if (backendPreferences.empty())
1010     {
1011         throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified");
1012     }
1013
1014     if (options.m_ReduceFp32ToFp16 && options.m_ReduceFp32ToBf16)
1015     {
1016         throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
1017     }
1018
1019     const Network& network = *PolymorphicDowncast<const Network*>(&inNetwork);
1020     std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph());
1021
1022     auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy);
1023
1024     OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
1025
1026     // Get the optimized graph
1027     Graph& optGraph = optNetObjPtr->GetGraph();
1028
1029     // Perform optimisation passes
1030     using namespace optimizations;
1031     Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(),
1032                                                 SquashEqualTransposeSiblings(),
1033                                                 SquashEqualReshapeSiblings(),
1034                                                 OptimizeInversePermutes(),
1035                                                 OptimizeInverseTransposes(),
1036                                                 MovePermuteUp(),
1037                                                 MoveTransposeUp(),
1038                                                 PermuteAsReshape(),
1039                                                 TransposeAsReshape(),
1040                                                 OptimizeConsecutiveReshapes(),
1041                                                 FoldPadIntoConvolution2d(),
1042                                                 PermuteAndBatchToSpaceAsDepthToSpace(),
1043                                                 TransposeAndBatchToSpaceAsDepthToSpace()));
1044
1045     // Infer the tensor infos for all output slots. Throws an exception on failure
1046     optGraph.InferTensorInfos();
1047
1048     // If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
1049     if (options.m_ReduceFp32ToFp16)
1050     {
1051         Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
1052         Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
1053     }
1054
1055     // If Fp32 to Bf16 optimization is set convert Fp32 network to Bf16
1056     // Convert input of Convolution2d and FullyConnected from Fp32 to Bf16
1057     // Only Constant weight of Convolution2d and FullyConnected are converted from Fp32 to Bf16
1058     if (options.m_ReduceFp32ToBf16)
1059     {
1060         Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToBf16Converter()));
1061     }
1062
1063     // Initialize backend settings
1064     BackendSettings backendSettings(backendPreferences, deviceSpec);
1065     if (backendSettings.GetAvailablePreferredBackends().empty())
1066     {
1067         std::stringstream failureMsg;
1068         failureMsg << "None of the preferred backends " << backendPreferences
1069                    << " are supported. Current platform provides " << backendSettings.m_SupportedBackends;
1070         ReportError(failureMsg.str(), messages);
1071         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
1072     }
1073
1074     // Create a map to temporarily hold initialized backend objects
1075     TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
1076     BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
1077
1078     // Assign an available backend to each layer
1079     Graph::Iterator firstLayer = optGraph.begin();
1080     Graph::Iterator lastLayer  = optGraph.end();
1081     OptimizationResult assignBackendsResult = AssignBackends(optNetObjPtr,
1082                                                              backendSettings,
1083                                                              firstLayer,
1084                                                              lastLayer,
1085                                                              messages);
1086     if (assignBackendsResult.m_Error)
1087     {
1088         // Failed to assign a backend to each layer
1089         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
1090     }
1091
1092     Optimizer::Pass(optGraph, MakeOptimizations(OptimizeInverseConversionsFp16(),
1093                                                 OptimizeInverseConversionsFp32()));
1094
1095     // Apply the backend-specific optimizations
1096     OptimizationResult backendOptimizationResult = ApplyBackendOptimizations(optNetObjPtr,
1097                                                                              backendSettings,
1098                                                                              backends,
1099                                                                              messages);
1100     if (backendOptimizationResult.m_Error)
1101     {
1102         // Failed to apply the backend-specific optimizations
1103         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
1104     }
1105
1106     // If the debug flag is set, then insert a DebugLayer after each layer
1107     // Doing this after applying the backend optimizations as they might have changed some layers
1108     if (options.m_Debug)
1109     {
1110         Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer()));
1111     }
1112
1113     // Calculate the compatibility strategies for tensor handles
1114     OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph,
1115                                                                    backends,
1116                                                                    tensorHandleFactoryRegistry,
1117                                                                    messages);
1118     if (strategyResult.m_Error)
1119     {
1120         // Failed to apply the backend-specific optimizations
1121         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
1122     }
1123
1124     // Based on the tensor handle strategy determined above, insert copy layers where required.
1125     optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
1126
1127     // Convert constants
1128     Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
1129     Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat()));
1130
1131     // Run backend specific optimizations (deprecated)
1132     for (auto&& chosenBackend : backendSettings.m_SelectedBackends)
1133     {
1134         auto factoryFun = BackendRegistryInstance().GetFactory(chosenBackend);
1135         auto backendPtr = factoryFun();
1136         ARMNN_ASSERT(backendPtr.get() != nullptr);
1137
1138         ARMNN_NO_DEPRECATE_WARN_BEGIN
1139         auto backendSpecificOptimizations = backendPtr->GetOptimizations();
1140         ARMNN_NO_DEPRECATE_WARN_END
1141
1142         if (!backendSpecificOptimizations.empty())
1143         {
1144             Optimizer::Pass(optNetObjPtr->GetGraph(), backendSpecificOptimizations);
1145         }
1146     }
1147
1148     return optNet;
1149 }
1150
1151 Network::Network()
1152 : m_Graph(std::make_unique<Graph>())
1153 {
1154 }
1155
1156 Network::~Network()
1157 {
1158 }
1159
1160 Status Network::PrintGraph()
1161 {
1162     m_Graph->Print();
1163     return Status::Success;
1164 }
1165
1166 IConnectableLayer* Network::AddInputLayer(LayerBindingId id, const char* name)
1167 {
1168     return m_Graph->AddLayer<InputLayer>(id, name);
1169 }
1170
1171 IConnectableLayer* Network::AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
1172                                             const char* name)
1173 {
1174     return m_Graph->AddLayer<BatchToSpaceNdLayer>(batchToSpaceNdDescriptor, name);
1175 }
1176
1177 IConnectableLayer* Network::AddComparisonLayer(const ComparisonDescriptor& comparisonDescriptor,
1178                                                const char* name)
1179 {
1180     return m_Graph->AddLayer<ComparisonLayer>(comparisonDescriptor, name);
1181 }
1182
1183 IConnectableLayer* Network::AddElementwiseUnaryLayer(const ElementwiseUnaryDescriptor& elementwiseUnaryDescriptor,
1184                                                      const char* name)
1185 {
1186     return m_Graph->AddLayer<ElementwiseUnaryLayer>(elementwiseUnaryDescriptor, name);
1187 }
1188
1189 IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1190                                                        const ConstTensor& weights,
1191                                                        const Optional<ConstTensor>& biases,
1192                                                        const char* name)
1193 {
1194     if (fullyConnectedDescriptor.m_BiasEnabled && !biases.has_value())
1195     {
1196         throw InvalidArgumentException("AddFullyConnectedLayer: biases cannot be empty");
1197     }
1198
1199     const auto layer = m_Graph->AddLayer<FullyConnectedLayer>(fullyConnectedDescriptor, name);
1200
1201     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1202
1203     if (fullyConnectedDescriptor.m_BiasEnabled)
1204     {
1205         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1206     }
1207
1208     return layer;
1209 }
1210
1211 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1212                                                    const ConstTensor& weights,
1213                                                    const Optional<ConstTensor>& biases,
1214                                                    const char* name)
1215 {
1216     return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, biases, name);
1217 }
1218
1219 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1220                                                    const ConstTensor& weights,
1221                                                    const char* name)
1222 {
1223     Optional<ConstTensor> biases;
1224     return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, biases, name);
1225 }
1226
1227 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1228                                                    const ConstTensor& weights,
1229                                                    const ConstTensor& biases,
1230                                                    const char* name)
1231 {
1232     Optional<ConstTensor> optionalBiases(biases);
1233     return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, optionalBiases, name);
1234 }
1235
1236 IConnectableLayer* Network::AddConcatLayer(const ConcatDescriptor& concatDescriptor,
1237                                            const char* name)
1238 {
1239     return m_Graph->AddLayer<ConcatLayer>(concatDescriptor, name);
1240 }
1241
1242 IConnectableLayer* Network::AddConvolution2dLayerImpl(const Convolution2dDescriptor& convolution2dDescriptor,
1243                                                       const ConstTensor& weights,
1244                                                       const Optional<ConstTensor>& biases,
1245                                                       const char* name)
1246 {
1247     if (convolution2dDescriptor.m_BiasEnabled && !biases.has_value())
1248     {
1249         throw InvalidArgumentException("AddConvolution2dLayer: biases cannot be empty");
1250     }
1251
1252     const auto layer = m_Graph->AddLayer<Convolution2dLayer>(convolution2dDescriptor, name);
1253
1254     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1255
1256     if (convolution2dDescriptor.m_BiasEnabled)
1257     {
1258         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1259     }
1260
1261     return layer;
1262 }
1263
1264 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1265                                                   const ConstTensor& weights,
1266                                                   const Optional<ConstTensor>& biases,
1267                                                   const char* name)
1268 {
1269     return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1270 }
1271
1272 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1273                                                   const ConstTensor& weights,
1274                                                   const char* name)
1275 {
1276     Optional<ConstTensor> biases;
1277     return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1278 }
1279
1280 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1281                                                   const ConstTensor& weights,
1282                                                   const ConstTensor& biases,
1283                                                   const char* name)
1284 {
1285     Optional<ConstTensor> optionalBiases(biases);
1286     return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, optionalBiases, name);
1287 }
1288
1289 IConnectableLayer* Network::AddDepthwiseConvolution2dLayerImpl(
1290     const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1291     const ConstTensor& weights,
1292     const Optional<ConstTensor>& biases,
1293     const char* name)
1294 {
1295     if (convolution2dDescriptor.m_BiasEnabled && !biases.has_value())
1296     {
1297         throw InvalidArgumentException("AddDepthwiseConvolution2dLayer: biases cannot be empty");
1298     }
1299
1300     const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name);
1301
1302     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1303
1304     if (convolution2dDescriptor.m_BiasEnabled)
1305     {
1306         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1307     }
1308
1309     return layer;
1310 }
1311
1312 IConnectableLayer* Network::AddDepthToSpaceLayer(const DepthToSpaceDescriptor& depthToSpaceDescriptor,
1313                                                  const char* name)
1314 {
1315     return m_Graph->AddLayer<DepthToSpaceLayer>(depthToSpaceDescriptor, name);
1316 }
1317
1318 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1319         const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1320         const ConstTensor& weights,
1321         const Optional<ConstTensor>& biases,
1322         const char* name)
1323 {
1324     return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1325 }
1326
1327 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1328     const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1329     const ConstTensor& weights,
1330     const char* name)
1331 {
1332     Optional<ConstTensor> biases;
1333     return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1334 }
1335
1336 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1337     const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1338     const ConstTensor& weights,
1339     const ConstTensor& biases,
1340     const char* name)
1341 {
1342     Optional<ConstTensor> optionalBiases(biases);
1343     return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, optionalBiases, name);
1344 }
1345
1346 IConnectableLayer* Network::AddDetectionPostProcessLayer(const armnn::DetectionPostProcessDescriptor& descriptor,
1347                                                          const ConstTensor& anchors, const char* name)
1348 {
1349     const auto layer = m_Graph->AddLayer<DetectionPostProcessLayer>(descriptor, name);
1350
1351     layer->m_Anchors = std::make_unique<ScopedCpuTensorHandle>(anchors);
1352
1353     return layer;
1354 }
1355
1356 IConnectableLayer* Network::AddPermuteLayer(const PermuteDescriptor& permuteDescriptor,
1357                                             const char* name)
1358 {
1359     return m_Graph->AddLayer<PermuteLayer>(permuteDescriptor, name);
1360 }
1361
1362 IConnectableLayer* Network::AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor,
1363     const char* name)
1364 {
1365     return m_Graph->AddLayer<Pooling2dLayer>(pooling2dDescriptor, name);
1366 }
1367
1368 IConnectableLayer* Network::AddActivationLayer(const ActivationDescriptor& activationDescriptor,
1369     const char* name)
1370 {
1371     return m_Graph->AddLayer<ActivationLayer>(activationDescriptor, name);
1372 }
1373
1374 IConnectableLayer* Network::AddArgMinMaxLayer(const ArgMinMaxDescriptor& argMinMaxDescriptor,
1375                                               const char* name)
1376 {
1377     return m_Graph->AddLayer<ArgMinMaxLayer>(argMinMaxDescriptor, name);
1378 }
1379
1380 IConnectableLayer* Network::AddNormalizationLayer(const NormalizationDescriptor&
1381 normalizationDescriptor,
1382     const char* name)
1383 {
1384     return m_Graph->AddLayer<NormalizationLayer>(normalizationDescriptor, name);
1385 }
1386
1387 IConnectableLayer* Network::AddSliceLayer(const SliceDescriptor& sliceDescriptor, const char* name)
1388 {
1389     return m_Graph->AddLayer<SliceLayer>(sliceDescriptor, name);
1390 }
1391
1392 IConnectableLayer* Network::AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor,
1393     const char* name)
1394 {
1395     return m_Graph->AddLayer<SoftmaxLayer>(softmaxDescriptor, name);
1396 }
1397
1398 IConnectableLayer* Network::AddSplitterLayer(const ViewsDescriptor& splitterDescriptor,
1399     const char* name)
1400 {
1401     return m_Graph->AddLayer<SplitterLayer>(splitterDescriptor, name);
1402 }
1403
1404 IConnectableLayer* Network::AddMaximumLayer(const char* name)
1405 {
1406     return m_Graph->AddLayer<MaximumLayer>(name);
1407 }
1408
1409 IConnectableLayer* Network::AddMinimumLayer(const char* name)
1410 {
1411     return m_Graph->AddLayer<MinimumLayer>(name);
1412 }
1413
1414 IConnectableLayer* Network::AddMergerLayer(const MergerDescriptor& mergerDescriptor,
1415                                            const char* name)
1416 {
1417     return AddConcatLayer(mergerDescriptor, name);
1418 }
1419
1420 IConnectableLayer* Network::AddAbsLayer(const char * name)
1421 {
1422     return AddElementwiseUnaryLayer(ElementwiseUnaryDescriptor(UnaryOperation::Abs), name);
1423 }
1424
1425 IConnectableLayer* Network::AddAdditionLayer(const char* name)
1426 {
1427     return m_Graph->AddLayer<AdditionLayer>(name);
1428 }
1429
1430 IConnectableLayer* Network::AddMultiplicationLayer(const char* name)
1431 {
1432     return m_Graph->AddLayer<MultiplicationLayer>(name);
1433 }
1434
1435 IConnectableLayer* Network::AddOutputLayer(LayerBindingId id, const char* name)
1436 {
1437     return m_Graph->AddLayer<OutputLayer>(id, name);
1438 }
1439
1440 IConnectableLayer* Network::AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc,
1441                                                        const ConstTensor&                  mean,
1442                                                        const ConstTensor&                  variance,
1443                                                        const ConstTensor&                  beta,
1444                                                        const ConstTensor&                  gamma,
1445                                                        const char*                         name)
1446 {
1447     const auto layer = m_Graph->AddLayer<BatchNormalizationLayer>(desc, name);
1448
1449     layer->m_Mean = std::make_unique<ScopedCpuTensorHandle>(mean);
1450     layer->m_Variance = std::make_unique<ScopedCpuTensorHandle>(variance);
1451     layer->m_Beta = std::make_unique<ScopedCpuTensorHandle>(beta);
1452     layer->m_Gamma = std::make_unique<ScopedCpuTensorHandle>(gamma);
1453
1454     return layer;
1455 }
1456
1457 IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor& descriptor,
1458                                                    const char* name)
1459 {
1460     ResizeDescriptor resizeDescriptor;
1461     resizeDescriptor.m_Method       = ResizeMethod::Bilinear;
1462     resizeDescriptor.m_DataLayout   = descriptor.m_DataLayout;
1463     resizeDescriptor.m_TargetWidth  = descriptor.m_TargetWidth;
1464     resizeDescriptor.m_TargetHeight = descriptor.m_TargetHeight;
1465
1466     return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
1467 }
1468
1469 IConnectableLayer* Network::AddResizeLayer(const ResizeDescriptor&
1470 resizeDescriptor, const char* name)
1471 {
1472     return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
1473 }
1474
1475 IConnectableLayer* Network::AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor& desc,
1476                                                           const char* name)
1477 {
1478     return m_Graph->AddLayer<InstanceNormalizationLayer>(desc, name);
1479 }
1480
1481 IConnectableLayer* Network::AddL2NormalizationLayer(const L2NormalizationDescriptor& desc,
1482                                                     const char* name)
1483 {
1484     return m_Graph->AddLayer<L2NormalizationLayer>(desc, name);
1485 }
1486
1487 IConnectableLayer* Network::AddLogSoftmaxLayer(const LogSoftmaxDescriptor& desc,
1488                                                const char* name)
1489 {
1490     return m_Graph->AddLayer<LogSoftmaxLayer>(desc, name);
1491 }
1492
1493 IConnectableLayer* Network::AddConstantLayer(const ConstTensor& input, const char* name)
1494 {
1495     auto layer = m_Graph->AddLayer<ConstantLayer>(name);
1496
1497     layer->m_LayerOutput = std::make_unique<ScopedCpuTensorHandle>(input);
1498
1499     return layer;
1500 }
1501
1502 IConnectableLayer* Network::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor,
1503                                             const char* name)
1504 {
1505     return m_Graph->AddLayer<ReshapeLayer>(reshapeDescriptor, name);
1506 }
1507
1508 IConnectableLayer* Network::AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
1509                                                    const char* name)
1510 {
1511     return m_Graph->AddLayer<SpaceToBatchNdLayer>(spaceToBatchNdDescriptor, name);
1512 }
1513
1514 IConnectableLayer* Network::AddSpaceToDepthLayer(const SpaceToDepthDescriptor& spaceToDepthDescriptor,
1515                                                  const char* name)
1516 {
1517     return m_Graph->AddLayer<SpaceToDepthLayer>(spaceToDepthDescriptor, name);
1518 }
1519
1520 IConnectableLayer* Network::AddFloorLayer(const char* name)
1521 {
1522     return m_Graph->AddLayer<FloorLayer>(name);
1523 }
1524
1525 IConnectableLayer* Network::AddLstmLayer(const LstmDescriptor&  descriptor,
1526                                          const LstmInputParams& params,
1527                                          const char* name)
1528 {
1529     const auto layer = m_Graph->AddLayer<LstmLayer>(descriptor, name);
1530
1531     //Lstm Basic Parameters
1532     layer->m_BasicParameters.m_InputToForgetWeights =
1533         std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
1534     layer->m_BasicParameters.m_InputToCellWeights =
1535         std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
1536     layer->m_BasicParameters.m_InputToOutputWeights =
1537         std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
1538     layer->m_BasicParameters.m_RecurrentToForgetWeights =
1539         std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
1540     layer->m_BasicParameters.m_RecurrentToCellWeights =
1541         std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
1542     layer->m_BasicParameters.m_RecurrentToOutputWeights =
1543         std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
1544     layer->m_BasicParameters.m_ForgetGateBias =
1545             std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
1546     layer->m_BasicParameters.m_CellBias =
1547             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
1548     layer->m_BasicParameters.m_OutputGateBias =
1549             std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
1550
1551     //Lstm Cifg parameters
1552     if(!descriptor.m_CifgEnabled)
1553     {
1554         if(params.m_InputToInputWeights == nullptr)
1555         {
1556             throw InvalidArgumentException("AddLstmLayer: Input To Input Weights cannot be NULL "
1557                                            "when CIFG is disabled.");
1558         }
1559         if(params.m_RecurrentToInputWeights == nullptr)
1560         {
1561             throw InvalidArgumentException(
1562                     "AddLstmLayer: Recurrent To Input Weights cannot be NULL "
1563                     "when CIFG is disabled.");
1564         }
1565         if(params.m_InputGateBias == nullptr)
1566         {
1567             throw InvalidArgumentException("AddLstmLayer: Input Gate Bias cannot be NULL "
1568                                            "when CIFG is disabled.");
1569         }
1570         layer->m_CifgParameters.m_InputToInputWeights =
1571             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
1572         layer->m_CifgParameters.m_RecurrentToInputWeights =
1573             std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
1574         layer->m_CifgParameters.m_InputGateBias =
1575             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
1576     }
1577
1578     //Lstm projection parameters
1579     if(descriptor.m_ProjectionEnabled)
1580     {
1581         if(params.m_ProjectionWeights == nullptr)
1582         {
1583             throw InvalidArgumentException("AddLstmLayer: Projection Weights cannot be NULL "
1584                                            "when projection is enabled.");
1585         }
1586         layer->m_ProjectionParameters.m_ProjectionWeights =
1587             std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
1588         if(params.m_ProjectionBias != nullptr)
1589         {
1590             layer->m_ProjectionParameters.m_ProjectionBias =
1591                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
1592         }
1593     }
1594
1595     //Lstm Peephole params
1596     if(descriptor.m_PeepholeEnabled)
1597     {
1598         if(!descriptor.m_CifgEnabled)
1599         {
1600             if(params.m_CellToInputWeights == nullptr)
1601             {
1602                 throw InvalidArgumentException("AddLstmLayer: Cell To Input Weights cannot be NULL "
1603                                                "when Peephole is enabled and CIFG disabled.");
1604             }
1605
1606             layer->m_PeepholeParameters.m_CellToInputWeights =
1607                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
1608         }
1609
1610         if(params.m_CellToForgetWeights == nullptr)
1611         {
1612             throw InvalidArgumentException("AddLstmLayer: Cell To Forget Weights cannot be NULL "
1613                                            "when Peephole is enabled.");
1614         }
1615         if(params.m_CellToOutputWeights == nullptr)
1616         {
1617             throw InvalidArgumentException("AddLstmLayer: Cell To Output Weights cannot be NULL "
1618                                            "when Peephole is enabled.");
1619         }
1620
1621         layer->m_PeepholeParameters.m_CellToForgetWeights =
1622             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
1623         layer->m_PeepholeParameters.m_CellToOutputWeights =
1624             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
1625     }
1626
1627     //Lstm Layer Normalization params
1628     if(descriptor.m_LayerNormEnabled)
1629     {
1630         if(!descriptor.m_CifgEnabled)
1631         {
1632             if(params.m_InputLayerNormWeights == nullptr)
1633             {
1634                 throw InvalidArgumentException("AddLstmLayer: Input layer normalization weights cannot be NULL "
1635                                                "when layer normalization is enabled and CIFG disabled.");
1636             }
1637             layer->m_LayerNormParameters.m_InputLayerNormWeights =
1638                     std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputLayerNormWeights));
1639         }
1640
1641         if(params.m_ForgetLayerNormWeights == nullptr)
1642         {
1643             throw InvalidArgumentException("AddLstmLayer: Forget layer normalization weights cannot be NULL "
1644                                            "when layer normalization is enabled.");
1645         }
1646         if(params.m_CellLayerNormWeights == nullptr)
1647         {
1648             throw InvalidArgumentException("AddLstmLayer: Cell layer normalization weights cannot be NULL "
1649                                            "when layer normalization is enabled.");
1650         }
1651         if(params.m_OutputLayerNormWeights == nullptr)
1652         {
1653             throw InvalidArgumentException("AddLstmLayer: Output layer normalization weights cannot be NULL "
1654                                            "when layer normalization is enabled.");
1655         }
1656         layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
1657                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetLayerNormWeights));
1658         layer->m_LayerNormParameters.m_CellLayerNormWeights =
1659                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellLayerNormWeights));
1660         layer->m_LayerNormParameters.m_OutputLayerNormWeights =
1661                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputLayerNormWeights));
1662     }
1663     return layer;
1664 }
1665
1666 IConnectableLayer* Network::AddDivisionLayer(const char* name)
1667 {
1668     return m_Graph->AddLayer<DivisionLayer>(name);
1669 }
1670
1671 IConnectableLayer* Network::AddSubtractionLayer(const char* name)
1672 {
1673     return m_Graph->AddLayer<SubtractionLayer>(name);
1674 }
1675
1676 IConnectableLayer* Network::AddMeanLayer(const MeanDescriptor& meanDescriptor, const char* name)
1677 {
1678     return m_Graph->AddLayer<MeanLayer>(meanDescriptor,name);
1679 }
1680
1681 IConnectableLayer* Network::AddPadLayer(const PadDescriptor& padDescriptor, const char* name)
1682 {
1683     return m_Graph->AddLayer<PadLayer>(padDescriptor,name);
1684 }
1685
1686 IConnectableLayer *Network::AddQuantizeLayer(const char *name)
1687 {
1688     return m_Graph->AddLayer<QuantizeLayer>(name);
1689 }
1690
1691 IConnectableLayer* Network::AddDequantizeLayer(const char* name)
1692 {
1693     return m_Graph->AddLayer<DequantizeLayer>(name);
1694 }
1695
1696 IConnectableLayer* Network::AddStridedSliceLayer(const StridedSliceDescriptor& stridedSliceDescriptor,
1697                                                  const char* name)
1698 {
1699     return m_Graph->AddLayer<StridedSliceLayer>(stridedSliceDescriptor, name);
1700 }
1701
1702 IConnectableLayer* Network::AddGreaterLayer(const char* name)
1703 {
1704     return AddComparisonLayer(ComparisonDescriptor(ComparisonOperation::Greater), name);
1705 }
1706
1707 IConnectableLayer* Network::AddEqualLayer(const char* name)
1708 {
1709     return AddComparisonLayer(ComparisonDescriptor(ComparisonOperation::Equal), name);
1710 }
1711
1712 IConnectableLayer* Network::AddRsqrtLayer(const char * name)
1713 {
1714     return AddElementwiseUnaryLayer(ElementwiseUnaryDescriptor(UnaryOperation::Rsqrt), name);
1715 }
1716
1717 IConnectableLayer* Network::AddGatherLayer(const char* name)
1718 {
1719     return m_Graph->AddLayer<GatherLayer>(name);
1720 }
1721
1722 IConnectableLayer* Network::AddMergeLayer(const char* name)
1723 {
1724     return m_Graph->AddLayer<MergeLayer>(name);
1725 }
1726
1727 IConnectableLayer* Network::AddSwitchLayer(const char* name)
1728 {
1729     return m_Graph->AddLayer<SwitchLayer>(name);
1730 }
1731
1732 IConnectableLayer* Network::AddPreluLayer(const char* name)
1733 {
1734     return m_Graph->AddLayer<PreluLayer>(name);
1735 }
1736
1737 IConnectableLayer* Network::AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor& descriptor,
1738                                                            const ConstTensor& weights,
1739                                                            const Optional<ConstTensor>& biases,
1740                                                            const char* name)
1741 {
1742     if (descriptor.m_BiasEnabled && !biases.has_value())
1743     {
1744         throw InvalidArgumentException("AddTransposeConvolution2dLayer: Biases cannot be empty");
1745     }
1746
1747     const auto layer = m_Graph->AddLayer<TransposeConvolution2dLayer>(descriptor, name);
1748
1749     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1750
1751     if (descriptor.m_BiasEnabled)
1752     {
1753         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1754     }
1755
1756     return layer;
1757 }
1758
1759 IConnectableLayer* Network::AddTransposeLayer(const TransposeDescriptor& transposeDescriptor,
1760                                               const char* name)
1761 {
1762     return m_Graph->AddLayer<TransposeLayer>(transposeDescriptor, name);
1763 }
1764
1765 IConnectableLayer* Network::AddStackLayer(const StackDescriptor& stackDescriptor,
1766                                           const char* name)
1767 {
1768     return m_Graph->AddLayer<StackLayer>(stackDescriptor, name);
1769 }
1770
1771
1772 IConnectableLayer* Network::AddStandInLayer(const StandInDescriptor& desc,
1773                                             const char* name)
1774 {
1775     return m_Graph->AddLayer<StandInLayer>(desc, name);
1776 }
1777
1778 IConnectableLayer* Network::AddQuantizedLstmLayer(const QuantizedLstmInputParams& params,
1779                                                   const char* name)
1780 {
1781     const auto layer = m_Graph->AddLayer<QuantizedLstmLayer>(name);
1782
1783     // InputToX weights
1784     layer->m_QuantizedLstmParameters.m_InputToInputWeights =
1785             std::make_unique<ScopedCpuTensorHandle>(params.GetInputToInputWeights());
1786     layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
1787             std::make_unique<ScopedCpuTensorHandle>(params.GetInputToForgetWeights());
1788     layer->m_QuantizedLstmParameters.m_InputToCellWeights =
1789             std::make_unique<ScopedCpuTensorHandle>(params.GetInputToCellWeights());
1790     layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
1791             std::make_unique<ScopedCpuTensorHandle>(params.GetInputToOutputWeights());
1792
1793     // RecurrentToX weights
1794     layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
1795             std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToInputWeights());
1796     layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
1797             std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToForgetWeights());
1798     layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
1799             std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToCellWeights());
1800     layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
1801             std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToOutputWeights());
1802
1803     // Bias
1804     layer->m_QuantizedLstmParameters.m_InputGateBias =
1805             std::make_unique<ScopedCpuTensorHandle>(params.GetInputGateBias());
1806     layer->m_QuantizedLstmParameters.m_ForgetGateBias =
1807             std::make_unique<ScopedCpuTensorHandle>(params.GetForgetGateBias());
1808     layer->m_QuantizedLstmParameters.m_CellBias =
1809             std::make_unique<ScopedCpuTensorHandle>(params.GetCellBias());
1810     layer->m_QuantizedLstmParameters.m_OutputGateBias =
1811             std::make_unique<ScopedCpuTensorHandle>(params.GetOutputGateBias());
1812
1813     return layer;
1814 }
1815
1816 IConnectableLayer* Network::AddQLstmLayer(const QLstmDescriptor&  descriptor,
1817                                           const LstmInputParams& params,
1818                                           const char* name)
1819 {
1820     const auto layer = m_Graph->AddLayer<QLstmLayer>(descriptor, name);
1821
1822     // QLstm Basic Parameters
1823     layer->m_BasicParameters.m_InputToForgetWeights =
1824             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
1825     layer->m_BasicParameters.m_InputToCellWeights =
1826             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
1827     layer->m_BasicParameters.m_InputToOutputWeights =
1828             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
1829     layer->m_BasicParameters.m_RecurrentToForgetWeights =
1830             std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
1831     layer->m_BasicParameters.m_RecurrentToCellWeights =
1832             std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
1833     layer->m_BasicParameters.m_RecurrentToOutputWeights =
1834             std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
1835     layer->m_BasicParameters.m_ForgetGateBias =
1836             std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
1837     layer->m_BasicParameters.m_CellBias =
1838             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
1839     layer->m_BasicParameters.m_OutputGateBias =
1840             std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
1841
1842     // QLstm Cifg parameters
1843     if(!descriptor.m_CifgEnabled)
1844     {
1845         if(params.m_InputToInputWeights == nullptr)
1846         {
1847             throw InvalidArgumentException("AddQLstmLayer: Input To Input Weights cannot be NULL");
1848         }
1849
1850         if(params.m_RecurrentToInputWeights == nullptr)
1851         {
1852             throw InvalidArgumentException(
1853                     "AddQLstmLayer: Recurrent To Input Weights cannot be NULL");
1854         }
1855
1856         if(params.m_InputGateBias == nullptr)
1857         {
1858             throw InvalidArgumentException("AddQLstmLayer: Input Gate Bias cannot be NULL");
1859         }
1860
1861         layer->m_CifgParameters.m_InputToInputWeights =
1862                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
1863         layer->m_CifgParameters.m_RecurrentToInputWeights =
1864                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
1865         layer->m_CifgParameters.m_InputGateBias =
1866                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
1867     }
1868
1869     // QLstm Projection parameters
1870     if(descriptor.m_ProjectionEnabled)
1871     {
1872         if(params.m_ProjectionWeights == nullptr)
1873         {
1874             throw InvalidArgumentException("AddQLstmLayer: Projection Weights cannot be NULL");
1875         }
1876
1877         layer->m_ProjectionParameters.m_ProjectionWeights =
1878                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
1879
1880         // Projection bias is optional even if projection is enabled
1881         if(params.m_ProjectionWeights != nullptr)
1882         {
1883             layer->m_ProjectionParameters.m_ProjectionBias =
1884                     std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
1885         }
1886
1887     }
1888
1889     // QLstm Peephole params
1890     if(descriptor.m_PeepholeEnabled)
1891     {
1892         if(params.m_CellToForgetWeights == nullptr)
1893         {
1894             throw InvalidArgumentException("AddQLstmLayer: Cell To Forget Weights cannot be NULL");
1895         }
1896
1897         if(params.m_CellToOutputWeights == nullptr)
1898         {
1899             throw InvalidArgumentException("AddQLstmLayer: Cell To Output Weights cannot be NULL");
1900         }
1901
1902         if(!descriptor.m_CifgEnabled)
1903         {
1904             if(params.m_CellToInputWeights == nullptr)
1905             {
1906                 throw InvalidArgumentException("AddQLstmLayer: Cell To Input Weights cannot be NULL");
1907             }
1908
1909             layer->m_PeepholeParameters.m_CellToInputWeights =
1910                     std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
1911         }
1912
1913         layer->m_PeepholeParameters.m_CellToForgetWeights =
1914                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
1915         layer->m_PeepholeParameters.m_CellToOutputWeights =
1916                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
1917     }
1918
1919     // QLstm Layer Normalization params
1920     if(descriptor.m_LayerNormEnabled)
1921     {
1922         if(params.m_ForgetLayerNormWeights == nullptr)
1923         {
1924             throw InvalidArgumentException("AddQLstmLayer: Forget layer normalization weights cannot be NULL");
1925         }
1926
1927         if(params.m_CellLayerNormWeights == nullptr)
1928         {
1929             throw InvalidArgumentException("AddQLstmLayer: Cell layer normalization weights cannot be NULL");
1930         }
1931
1932         if(params.m_OutputLayerNormWeights == nullptr)
1933         {
1934             throw InvalidArgumentException("AddQLstmLayer: Output layer normalization weights cannot be NULL");
1935         }
1936
1937         if(!descriptor.m_CifgEnabled)
1938         {
1939             if(params.m_InputLayerNormWeights == nullptr)
1940             {
1941                 throw InvalidArgumentException("AddQLstmLayer: Input layer normalization weights cannot be NULL");
1942             }
1943
1944             layer->m_LayerNormParameters.m_InputLayerNormWeights =
1945                     std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputLayerNormWeights));
1946         }
1947
1948         layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
1949                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetLayerNormWeights));
1950         layer->m_LayerNormParameters.m_CellLayerNormWeights =
1951                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellLayerNormWeights));
1952         layer->m_LayerNormParameters.m_OutputLayerNormWeights =
1953                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputLayerNormWeights));
1954     }
1955     return layer;
1956 }
1957
1958 void Network::Accept(ILayerVisitor& visitor) const
1959 {
1960     for (auto layer : GetGraph())
1961     {
1962         layer->Accept(visitor);
1963     };
1964 }
1965
1966 OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph)
1967     : m_Graph(std::move(graph)), m_Guid(profiling::ProfilingService::GetNextGuid())
1968 {
1969 }
1970
1971 OptimizedNetwork::~OptimizedNetwork()
1972 {
1973 }
1974
1975 } // namespace armnn