src/armnn/Network.cpp

   1 //
   2 // Copyright © 2017 Arm Ltd. All rights reserved.
   3 // SPDX-License-Identifier: MIT
   4 //
   5
   6 #include "Network.hpp"
   7 #include "Graph.hpp"
   8 #include "Layer.hpp"
   9 #include "DeviceSpec.hpp"
  10 #include "Optimizer.hpp"
  11 #include "SubgraphViewSelector.hpp"
  12 #include "BackendSettings.hpp"
  13 #include "optimizations/All.hpp"
  14
  15 #include <backendsCommon/CpuTensorHandle.hpp>
  16 #include <backendsCommon/WorkloadFactory.hpp>
  17 #include <armnn/backends/IBackendInternal.hpp>
  18 #include <backendsCommon/TensorHandleFactoryRegistry.hpp>
  19
  20 #include <armnn/Exceptions.hpp>
  21 #include <armnn/Utils.hpp>
  22 #include <armnn/TypesUtils.hpp>
  23 #include <armnn/BackendRegistry.hpp>
  24 #include <armnn/Logging.hpp>
  25 #include <armnn/utility/Assert.hpp>
  26 #include <armnn/utility/IgnoreUnused.hpp>
  27 #include <armnn/utility/PolymorphicDowncast.hpp>
  28
  29 #include <ProfilingService.hpp>
  30
  31 #include <fcntl.h>
  32 #include <algorithm>
  33 #include <fstream>
  34 #include <memory>
  35 #include <vector>
  36 #include <algorithm>
  37
  38 #include <boost/format.hpp>
  39 #include <boost/numeric/conversion/converter_policies.hpp>
  40 #include <boost/cast.hpp>
  41
  42 namespace armnn
  43 {
  44
  45 armnn::INetwork* INetwork::CreateRaw()
  46 {
  47     return new Network();
  48 }
  49
  50 armnn::INetworkPtr INetwork::Create()
  51 {
  52     return INetworkPtr(CreateRaw(), &INetwork::Destroy);
  53 }
  54
  55 void INetwork::Destroy(INetwork* network)
  56 {
  57     delete PolymorphicDowncast<Network*>(network);
  58 }
  59
  60 void IOptimizedNetwork::Destroy(IOptimizedNetwork* network)
  61 {
  62     delete PolymorphicDowncast<OptimizedNetwork*>(network);
  63 }
  64
  65 Status OptimizedNetwork::PrintGraph()
  66 {
  67     m_Graph->Print();
  68     return Status::Success;
  69 }
  70
  71 Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const
  72 {
  73     return m_Graph->SerializeToDot(stream);
  74 }
  75
  76 void ReportError(const std::string& errorMessage,
  77                  Optional<std::vector<std::string>&> errorMessages)
  78 {
  79     std::stringstream fullErrorMessage;
  80     fullErrorMessage << "ERROR: " << errorMessage;
  81     ARMNN_LOG(warning) << fullErrorMessage.str();
  82     if (errorMessages)
  83     {
  84         errorMessages.value().push_back(fullErrorMessage.str());
  85     }
  86 }
  87
  88 void ReportWarning(const std::string& warningMessage,
  89                    Optional<std::vector<std::string>&> warningMessages)
  90 {
  91     std::stringstream fullWarningMessage;
  92     fullWarningMessage << "WARNING: " << warningMessage;
  93     ARMNN_LOG(warning) << fullWarningMessage.str();
  94     if (warningMessages)
  95     {
  96         warningMessages.value().push_back(fullWarningMessage.str());
  97     }
  98 }
  99
 100 OptimizationResult ReturnWithError(OptimizationResult res,
 101                                    const Layer* layer,
 102                                    const BackendSettings& backendSettings,
 103                                    Optional<std::vector<std::string>&> errMessages)
 104 {
 105     std::stringstream failureMsg;
 106     failureMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
 107                << " is not supported on any preferred backend " << backendSettings.m_PreferredBackends;
 108     ReportError(failureMsg.str(), errMessages);
 109
 110     res.m_Error = true;
 111     return res;
 112 }
 113
 114
 115 bool CheckScaleSetOnQuantizedType(Layer* layer, Optional<std::vector<std::string>&> errMessages)
 116 {
 117     bool noErrors = true;
 118     unsigned int numOutputs = layer->GetNumOutputSlots();
 119     for (unsigned int i = 0; i < numOutputs; i++) {
 120         OutputSlot& outputSlot = layer->GetOutputSlot(i);
 121         TensorInfo info = outputSlot.GetTensorInfo();
 122         if (DataType::QAsymmU8 == info.GetDataType()) {
 123             if (0.f == info.GetQuantizationScale()) {
 124                 noErrors = false;
 125                 std::stringstream ss;
 126                 ss << "output " << i << " of layer " << GetLayerTypeAsCString(layer->GetType())
 127                    << " (" << layer->GetNameStr() << ") is of type"
 128                    << " Quantized 8 bit but its scale parameter has not been set";
 129                 ReportError(ss.str(), errMessages);
 130             }
 131             // Softmax under QuantisedAsymm8 must always be scale (1.0f/256.0f) and offset 0
 132             if ((info.GetQuantizationScale() != (1.0f / 256.0f) ||
 133                  info.GetQuantizationOffset() != 0) &&
 134                  layer->GetType() == armnn::LayerType::Softmax)
 135             {
 136                 std::stringstream ss;
 137                 ss << "Quantization parameters for Softmax layer (Scale: " <<
 138                 info.GetQuantizationScale() << " and Offset: " << info.GetQuantizationOffset() <<
 139                 ") are incorrect and have been updated to Scale: 0.00390625 and Offset: 0";
 140                 ARMNN_LOG(warning) << ss.str();
 141                 info.SetQuantizationScale((1.0f /256.0f));
 142                 info.SetQuantizationOffset(0);
 143                 outputSlot.SetTensorInfo(info);
 144             }
 145         }
 146     }
 147     return noErrors;
 148 }
 149
 150 template <typename LayerT>
 151 LayerT* ConvertBf16ToFp32Weight(Layer* l)
 152 {
 153     LayerT* layer = PolymorphicDowncast<LayerT*>(l);
 154     if ((layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
 155          && layer->m_Weight)
 156     {
 157         const TensorInfo& info = layer->m_Weight->GetTensorInfo();
 158
 159         if (info.GetDataType() == DataType::BFloat16)
 160         {
 161             std::vector<float> newValues(info.GetNumElements());
 162
 163             armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(
 164                 layer->m_Weight->template GetTensor<armnn::BFloat16>(), info.GetNumElements(), newValues.data());
 165
 166             TensorInfo newInfo(info.GetShape(), DataType::Float32);
 167             ConstTensor newInput(newInfo, newValues);
 168             layer->m_Weight.reset(new ScopedCpuTensorHandle(newInput));
 169         }
 170     }
 171     return layer;
 172 }
 173
 174 OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings,
 175                                             Graph& graph,
 176                                             Layer* layer,
 177                                             BackendId backend,
 178                                             DataType dataTypeIn,
 179                                             DataType dataTypeOut,
 180                                             const std::vector<BackendId>& availablePreferredBackends,
 181                                             std::string& reasonIfUnsupported,
 182                                             Optional<std::vector<std::string>&> errMessages)
 183 {
 184     OptimizationResult result;
 185
 186     // Helper lambda to compose meaningful error message before returning with error
 187     auto ReturnError = [&](const Layer* layer)
 188         {
 189             return ReturnWithError(result, layer, backendSettings, errMessages);
 190         };
 191
 192     // need to set the compute device on the layer
 193     // before we can check if it is supported
 194     layer->SetBackendId(backend);
 195     if (!IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported))
 196     {
 197         if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
 198         {
 199             if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
 200                 && layer->GetType() != LayerType::ConvertFp32ToFp16
 201                 && layer->GetType() != LayerType::ConvertFp16ToFp32)
 202             {
 203                 // Insert FP16 -> FP32 conversion layer before current layer
 204                 std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers;
 205                 if (dataTypeIn == DataType::Float16)
 206                 {
 207                     convertFp16ToFp32Layers =
 208                         InsertConvertFp16ToFp32LayersBefore(graph, *layer);
 209                 }
 210
 211                 // Insert FP32 -> FP16 conversion layer after current layer
 212                 std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers;
 213                 if (dataTypeOut == DataType::Float16)
 214                 {
 215                     convertFp32ToFp16Layers =
 216                         InsertConvertFp32ToFp16LayersAfter(graph, *layer);
 217                 }
 218
 219                 // Assign a supported backend to the newly introduced conversion layers
 220                 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
 221                     {
 222                         bool supportedBackendFound = false;
 223                         std::string reasonIfUnsupported;
 224
 225                         // Try preferred backend first
 226                         layer->SetBackendId(preferredBackend);
 227                         if (IWorkloadFactory::IsLayerSupported(*layer,
 228                                                                EmptyOptional(),
 229                                                                reasonIfUnsupported))
 230                         {
 231                             supportedBackendFound = true;
 232                         }
 233                         else
 234                         {
 235                             for (const auto& backend : availablePreferredBackends)
 236                             {
 237                                 // Skip preferred backend (we already determined that it is not supported)
 238                                 if (backend == preferredBackend)
 239                                 {
 240                                     continue;
 241                                 }
 242
 243                                 layer->SetBackendId(backend);
 244                                 if (IWorkloadFactory::IsLayerSupported(*layer,
 245                                                                        EmptyOptional(),
 246                                                                        reasonIfUnsupported))
 247                                 {
 248                                     supportedBackendFound = true;
 249                                     break;
 250                                 }
 251                             }
 252                         }
 253
 254                         return supportedBackendFound;
 255                     };
 256
 257                 for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
 258                 {
 259                     if (!AssignFirstSupportedBackend(convertLayer, backend))
 260                     {
 261                         return ReturnError(convertLayer);
 262                     }
 263                 }
 264
 265                 for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
 266                 {
 267                     if (!AssignFirstSupportedBackend(convertLayer, backend))
 268                     {
 269                         return ReturnError(convertLayer);
 270                     }
 271                 }
 272
 273                 return result;
 274             }
 275         }
 276         else if (dataTypeIn == DataType::BFloat16 || dataTypeOut == DataType::BFloat16)
 277         {
 278             if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
 279                 && layer->GetType() != LayerType::ConvertFp32ToBf16
 280                 && layer->GetType() != LayerType::ConvertBf16ToFp32)
 281             {
 282                 // Insert BF16 -> FP32 conversion layer before current layer
 283                 std::vector<ConvertBf16ToFp32Layer*> convertBf16ToFp32Layers;
 284                 if (dataTypeIn == DataType::BFloat16)
 285                 {
 286                     convertBf16ToFp32Layers =
 287                         InsertConvertBf16ToFp32LayersBefore(graph, *layer);
 288                     if (layer->GetType() == LayerType::Convolution2d)
 289                     {
 290                         ConvertBf16ToFp32Weight<Convolution2dLayer>(layer);
 291                     }
 292                     else if (layer->GetType() == LayerType::FullyConnected)
 293                     {
 294                         ConvertBf16ToFp32Weight<FullyConnectedLayer>(layer);
 295                     }
 296                 }
 297
 298                 // Insert FP32 -> BF16 conversion layer after current layer
 299                 std::vector<ConvertFp32ToBf16Layer*> convertFp32ToBf16Layers;
 300                 if (dataTypeOut == DataType::BFloat16)
 301                 {
 302                     convertFp32ToBf16Layers =
 303                         InsertConvertFp32ToBf16LayersAfter(graph, *layer);
 304                 }
 305
 306                 // Assign a supported backend to the newly introduced conversion layers
 307                 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
 308                     {
 309                         bool supportedBackendFound = false;
 310                         std::string reasonIfUnsupported;
 311
 312                         // Try preferred backend first
 313                         layer->SetBackendId(preferredBackend);
 314                         if (IWorkloadFactory::IsLayerSupported(*layer,
 315                                                                EmptyOptional(),
 316                                                                reasonIfUnsupported))
 317                         {
 318                             supportedBackendFound = true;
 319                         }
 320                         else
 321                         {
 322                             for (const auto& backend : availablePreferredBackends)
 323                             {
 324                                 // Skip preferred backend (we already determined that it is not supported)
 325                                 if (backend == preferredBackend)
 326                                 {
 327                                     continue;
 328                                 }
 329
 330                                 layer->SetBackendId(backend);
 331                                 if (IWorkloadFactory::IsLayerSupported(*layer,
 332                                                                        EmptyOptional(),
 333                                                                        reasonIfUnsupported))
 334                                 {
 335                                     supportedBackendFound = true;
 336                                     break;
 337                                 }
 338                             }
 339                         }
 340
 341                         return supportedBackendFound;
 342                     };
 343
 344                 for (ConvertBf16ToFp32Layer* convertLayer : convertBf16ToFp32Layers)
 345                 {
 346                     if (!AssignFirstSupportedBackend(convertLayer, backend))
 347                     {
 348                         return ReturnError(convertLayer);
 349                     }
 350                 }
 351
 352                 for (ConvertFp32ToBf16Layer* convertLayer : convertFp32ToBf16Layers)
 353                 {
 354                     if (!AssignFirstSupportedBackend(convertLayer, backend))
 355                     {
 356                         return ReturnError(convertLayer);
 357                     }
 358                 }
 359
 360                 return result;
 361             }
 362         }
 363
 364         std::stringstream warningMsg;
 365         warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
 366                    << " is not supported on requested backend " << layer->GetBackendId().Get()
 367                    << " for input data type " << GetDataTypeName(dataTypeIn)
 368                    << " and output data type " << GetDataTypeName(dataTypeOut)
 369                    << " (reason: " << reasonIfUnsupported
 370                    << "), falling back to the next backend.";
 371         ReportWarning(warningMsg.str(), errMessages);
 372
 373         return OptimizationResult(true, false);
 374     }
 375     else
 376     {
 377         return result;
 378     }
 379 }
 380
 381
 382 OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
 383                                   BackendSettings& backendSettings,
 384                                   Graph::Iterator& firstLayer,
 385                                   Graph::Iterator& lastLayer,
 386                                   Optional<std::vector<std::string>&> errMessages)
 387 {
 388     OptimizationResult result;
 389
 390     // Helper lambda to compose meaningful error message before returning with error
 391     auto ReturnError = [&](const Layer* layer)
 392         {
 393             return ReturnWithError(result, layer, backendSettings, errMessages);
 394         };
 395
 396
 397     auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
 398     if (availablePreferredBackends.empty())
 399     {
 400         std::stringstream failureMsg;
 401         failureMsg << "No preferred backends are available";
 402         ReportError(failureMsg.str(), errMessages);
 403
 404         result.m_Error = true;
 405         return result;
 406     }
 407
 408     for (auto it = firstLayer; it != lastLayer; ++it)
 409     {
 410         auto layer = *it;
 411
 412         DataType dataTypeIn  = layer->GetNumInputSlots() == 0 ? DataType::Float32 :
 413             layer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType();
 414         DataType dataTypeOut = layer->GetNumOutputSlots() == 0 ? DataType::Float32 :
 415             layer->GetOutputSlot(0).GetTensorInfo().GetDataType();
 416
 417         std::string reasonIfUnsupported;
 418         bool found = false;
 419         if (!CheckScaleSetOnQuantizedType(layer, errMessages))
 420         {
 421             // don't bomb immediately, find all the quantized outputs
 422             // which haven't had a scale set and report them all back.
 423             result.m_Error = true;
 424         }
 425
 426         // First try assign layer to hint backend
 427         if (layer->GetBackendHint().has_value() &&
 428             backendSettings.IsBackendSupported(layer->GetBackendHint().value()) &&
 429             AttemptBackendAssignment(backendSettings,
 430                                      optNetObjPtr->GetGraph(),
 431                                      layer,
 432                                      layer->GetBackendHint().value(),
 433                                      dataTypeIn,
 434                                      dataTypeOut,
 435                                      availablePreferredBackends,
 436                                      reasonIfUnsupported,
 437                                      errMessages).IsOk())
 438         {
 439             found = true;
 440             backendSettings.m_SelectedBackends.insert(layer->GetBackendHint().value());
 441         }
 442         else
 443         {
 444             // Try assign layer to prefered list of backends
 445             for (const auto& backend : availablePreferredBackends)
 446             {
 447                 if (layer->GetBackendHint().has_value() &&
 448                     layer->GetBackendHint().value() == backend)
 449                 {
 450                     continue; //Don't re-test the backend hint
 451                 }
 452
 453                 OptimizationResult res = AttemptBackendAssignment(backendSettings,
 454                                                                   optNetObjPtr->GetGraph(),
 455                                                                   layer,
 456                                                                   backend,
 457                                                                   dataTypeIn,
 458                                                                   dataTypeOut,
 459                                                                   availablePreferredBackends,
 460                                                                   reasonIfUnsupported,
 461                                                                   errMessages);
 462
 463                 if (res.IsOk())
 464                 {
 465                     found = true;
 466                     backendSettings.m_SelectedBackends.insert(backend);
 467                     break;
 468                 }
 469                 else if (res.IsError())
 470                 {
 471                    return res;  // Cannot continue.
 472                    // Note: we don't need to log the error as it would already
 473                    // be logged in AttemptBackendAssignment().
 474                 }
 475                 else
 476                 {
 477                     ARMNN_ASSERT_MSG(res.IsWarningOnly(), "OptimizationResult in unexpected state.");
 478                 }
 479             }
 480         }
 481
 482         // If the layer is unsupported by any devices, log and return a null network.
 483         if (!found)
 484         {
 485             // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
 486             //       fallback we should set the compute device on the layer to CpuRef (these are not
 487             //       available as accelerated operations, or are only available under certain
 488             //       conditions, currently they comprise MemCopy, Constant, Permute)
 489             armnn::LayerType layerType = layer->GetType();
 490             if (!backendSettings.IsCpuRefUsed() && (layerType == armnn::LayerType::MemCopy ||
 491                                                     layerType == armnn::LayerType::Constant ||
 492                                                     layerType == armnn::LayerType::Permute))
 493             {
 494                 BackendId cpuBackendId(armnn::Compute::CpuRef);
 495                 layer->SetBackendId(cpuBackendId);
 496                 backendSettings.m_SelectedBackends.insert(cpuBackendId);
 497             }
 498             else
 499             {
 500                 return ReturnError(layer);
 501             }
 502         }
 503     }
 504
 505     return result;
 506 }
 507
 508 OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
 509                                   BackendSettings& backendSettings,
 510                                   SubgraphView& subgraph,
 511                                   Optional<std::vector<std::string>&> errMessages)
 512 {
 513     Graph::Iterator firstLayer = subgraph.begin();
 514     Graph::Iterator lastLayer  = subgraph.end();
 515     return AssignBackends(optNetObjPtr,
 516                           backendSettings,
 517                           firstLayer,
 518                           lastLayer,
 519                           errMessages);
 520 }
 521
 522 BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry& handleFactoryRegistry,
 523                                     BackendSettings& backendSettings)
 524 {
 525     BackendsMap backends;
 526     auto const& backendRegistry = BackendRegistryInstance();
 527     for (auto&& selectedBackend : backendSettings.m_SupportedBackends)
 528     {
 529         auto backendFactory = backendRegistry.GetFactory(selectedBackend);
 530         auto backendObjPtr = backendFactory();
 531         ARMNN_ASSERT(backendObjPtr);
 532
 533         backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry);
 534
 535         backends[backendObjPtr->GetId()] = std::move(backendObjPtr);
 536     }
 537
 538     return backends;
 539 }
 540
 541 OptimizationResult ApplyBackendOptimizations(OptimizedNetwork* optNetObjPtr,
 542                                              BackendSettings& backendSettings,
 543                                              BackendsMap& backends,
 544                                              Optional<std::vector<std::string>&> errMessages)
 545 {
 546     ARMNN_ASSERT(optNetObjPtr);
 547
 548     OptimizationResult result;
 549
 550     // Get the optimized graph
 551     Graph& optGraph = optNetObjPtr->GetGraph();
 552
 553     // Run backend specific optimizations
 554     for (auto&& selectedBackend : backendSettings.m_SelectedBackends)
 555     {
 556         auto backendObjPtr = backends.find(selectedBackend)->second.get();
 557         ARMNN_ASSERT(backendObjPtr);
 558
 559         // Select sub-graphs based on backend
 560         SubgraphViewSelector::Subgraphs subgraphs =
 561                 SubgraphViewSelector::SelectSubgraphs(optGraph,
 562                                                       // Select layers assigned to the requested backend
 563                                                       [&backendObjPtr](const Layer& layer)
 564                                                       {
 565                                                           return layer.GetType() != LayerType::Input &&
 566                                                                  layer.GetType() != LayerType::Output &&
 567                                                                  layer.GetBackendId() == backendObjPtr->GetId();
 568                                                       });
 569         if (subgraphs.empty())
 570         {
 571             // No sub-graphs found, try with next selected backend
 572             continue;
 573         }
 574
 575         // Try to optimize each sub-graph
 576         for (auto& subgraph : subgraphs)
 577         {
 578             // Try to optimize the current sub-graph
 579             OptimizationViews optimizationViews = backendObjPtr->OptimizeSubgraphView(*subgraph);
 580             ARMNN_ASSERT(optimizationViews.Validate(*subgraph));
 581
 582             // Optimization attempted, check the resulting optimized sub-graph
 583             for (auto& substitution : optimizationViews.GetSubstitutions())
 584             {
 585                 // Sub-graph optimized, substitute the sub-graph with the new optimized one in the main optimized graph
 586                 SubgraphView& replacementSubgraph   = substitution.m_ReplacementSubgraph;
 587                 SubgraphView& substitutableSubgraph = substitution.m_SubstitutableSubgraph;
 588                 optGraph.SubstituteSubgraph(substitutableSubgraph, replacementSubgraph);
 589
 590                 // Assign the current backend to the optimized sub-graph
 591                 std::for_each(replacementSubgraph.begin(), replacementSubgraph.end(), [&selectedBackend](Layer* l)
 592                     {
 593                         ARMNN_ASSERT(l);
 594                         l->SetBackendId(selectedBackend);
 595                     });
 596             }
 597
 598             if (!optimizationViews.GetFailedSubgraphs().empty())
 599             {
 600                 std::stringstream warningMsg;
 601                 warningMsg << "Some sub-graph(s) failed to optimized on " << backendObjPtr->GetId() << " backend.";
 602                 ReportWarning(warningMsg.str(), errMessages);
 603
 604                 // Failed to optimize the given sub-graph, re-assign the sub-graph layers to other available backends
 605                 BackendSettings settingsCopy(backendSettings);
 606                 if (!backendObjPtr->GetId().IsCpuRef())
 607                 {
 608                     // Add the current backend to the list of backends to ignore
 609                     settingsCopy.m_IgnoredBackends.insert(backendObjPtr->GetId());
 610                 }
 611
 612                 int count=0;
 613                 for (auto& failedSubgraph : optimizationViews.GetFailedSubgraphs())
 614                 {
 615                     // An error occurred: the optimization was attempted but not performed, try different backends
 616                     std::stringstream subgraphMsg;
 617                     subgraphMsg << "Re-assigning backends to " << failedSubgraph.GetLayers().size()
 618                                 << " layers inside sub-graph " << count++;
 619                     ReportWarning(subgraphMsg.str(), errMessages);
 620
 621                     OptimizationResult reassignmentResult = AssignBackends(optNetObjPtr,
 622                                                                            settingsCopy,
 623                                                                            *subgraph,
 624                                                                            errMessages);
 625                     if (reassignmentResult.m_Error)
 626                     {
 627                         // Failed to re-assign one of the remaining backends to each layer of the sub-graph
 628                         result.m_Error = true;
 629                         return result;
 630                     }
 631                 }
 632             }
 633         }
 634     }
 635
 636     return result;
 637 }
 638
 639 bool RequiresCopy(ITensorHandleFactory::FactoryId src,
 640                   ITensorHandleFactory::FactoryId dst,
 641                   TensorHandleFactoryRegistry& registry)
 642 {
 643     if (src != dst)
 644     {
 645         ITensorHandleFactory* srcFactory = registry.GetFactory(src);
 646         ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
 647
 648         if (srcFactory && dstFactory &&
 649             (srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0)
 650         {
 651             return false;
 652         }
 653         return true;
 654     }
 655     return false;
 656 }
 657
 658 // Find the handle factory for the input layer which results in fewest required copies.
 659 ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backends,
 660                                                             OutputSlot& slot,
 661                                                             TensorHandleFactoryRegistry& registry)
 662 {
 663     Layer& layer = slot.GetOwningLayer();
 664     ARMNN_ASSERT(layer.GetType() == LayerType::Input);
 665
 666     // Explicitly select the tensorhandle factory for InputLayer because the rules for it are slightly different. It
 667     // doesn't matter which backend it is assigned to because they all use the same implementation, which
 668     // requires Map/Unmap support. This means that, so long as the handle type supports map/unmap semantics, we can
 669     // select a factory with maximum compatibility with the layers connected to the InputLayer.
 670
 671     // First ensure the from backends can support the TensorHandeAPI
 672     auto frmBackend = backends.find(layer.GetBackendId());
 673     if (frmBackend == backends.end() ||
 674         !frmBackend->second->SupportsTensorAllocatorAPI())
 675     {
 676         return ITensorHandleFactory::LegacyFactoryId;
 677     }
 678
 679     // Go through all connections to the output slot and determine the TensorHandleFactory which results in the
 680     // fewest copies.
 681     std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
 682     int topScore = 0;
 683     ITensorHandleFactory::FactoryId topChoice = ITensorHandleFactory::LegacyFactoryId;
 684
 685     for (auto&& connection : slot.GetConnections())
 686     {
 687         const Layer& connectedLayer = connection->GetOwningLayer();
 688
 689         auto toBackend = backends.find(connectedLayer.GetBackendId());
 690         ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
 691
 692         if (!toBackend->second.get()->SupportsTensorAllocatorAPI())
 693         {
 694             // The destination backend does not support the tensor allocator API, move to the next one
 695             continue;
 696         }
 697
 698         auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
 699         for (auto&& dst : dstPrefs)
 700         {
 701             // Input layers use the mem copy workload or import, so the selected factory must
 702             // support either the map/unmap API or Import API
 703             ITensorHandleFactory* factory = registry.GetFactory(dst);
 704             if (!factory->SupportsMapUnmap() &&
 705                 !CheckFlag(factory->GetImportFlags(), MemorySource::Malloc)) // Just support cpu mem imports for now
 706             {
 707                 // The current tensor handle factory does not support the map/unmap or import
 708                 // strategy, move to the next one
 709                 continue;
 710             }
 711
 712             auto it = factoryScores.find(dst);
 713             if (it == factoryScores.end())
 714             {
 715                 // Add new score to the table
 716                 factoryScores[dst] = 0;
 717                 if (topChoice == ITensorHandleFactory::LegacyFactoryId)
 718                 {
 719                     topChoice = dst;
 720                 }
 721             }
 722             else
 723             {
 724                 // Increase the score
 725                 factoryScores[dst]++;
 726
 727                 // Track the best option
 728                 if (factoryScores[dst] > topScore)
 729                 {
 730                     topScore = factoryScores[dst];
 731                     topChoice = dst;
 732                 }
 733             }
 734         }
 735     }
 736
 737     return topChoice;
 738 }
 739
 740 // Find the handle factory for the output layer which results in fewest required copies.
 741 ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput(BackendsMap& backends,
 742                                                             OutputSlot& slot,
 743                                                             TensorHandleFactoryRegistry& registry)
 744 {
 745     IgnoreUnused(backends, slot, registry);
 746     return ITensorHandleFactory::DeferredFactoryId;
 747 }
 748
 749 // For all handle factories supported on the source backend, we wish to find the one which requires the fewest copies
 750 // when considering all connections.
 751 ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends,
 752                                                     OutputSlot& outputSlot,
 753                                                     TensorHandleFactoryRegistry& registry)
 754 {
 755     // First ensure the from backends can support the TensorHandeAPI
 756     Layer& layer = outputSlot.GetOwningLayer();
 757     auto frmBackend = backends.find(layer.GetBackendId());
 758     if (frmBackend == backends.end() ||
 759         !frmBackend->second->SupportsTensorAllocatorAPI())
 760     {
 761         return ITensorHandleFactory::LegacyFactoryId;
 762     }
 763
 764     // Connections to Output Layers requires support for map/unmap on the TensorHandle.
 765     bool requiresMapUnmap = false;
 766     for (auto&& connection : outputSlot.GetConnections())
 767     {
 768         const Layer& connectedLayer = connection->GetOwningLayer();
 769         if (connectedLayer.GetType() == LayerType::Output)
 770         {
 771             requiresMapUnmap = true;
 772         }
 773     }
 774
 775     IBackendInternal* srcBackend = frmBackend->second.get();
 776     auto srcPrefs = srcBackend->GetHandleFactoryPreferences();
 777
 778     // Initialize the scores
 779     std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
 780     for (auto&& pref : srcPrefs)
 781     {
 782         if (requiresMapUnmap) // Only consider factories that support map/unmap if required
 783         {
 784             ITensorHandleFactory* factory = registry.GetFactory(pref);
 785             if (!factory->SupportsMapUnmap())
 786             {
 787                 // The current tensor handle factory does not support the map/unmap strategy, move to the next one
 788                 continue;
 789             }
 790         }
 791
 792         auto it = factoryScores.find(pref);
 793         if (it == factoryScores.end())
 794         {
 795             // Add new score to the table
 796             factoryScores[pref] = 0;
 797         }
 798     }
 799
 800     // Score each handle factory based on how many times it requires copies on the slot connections
 801     for (auto&& connection : outputSlot.GetConnections())
 802     {
 803         const Layer& connectedLayer = connection->GetOwningLayer();
 804
 805         auto toBackend = backends.find(connectedLayer.GetBackendId());
 806         ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
 807
 808         auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
 809         for (auto&& src : srcPrefs)
 810         {
 811             if (factoryScores.find(src) == factoryScores.end()) // Don't consider excluded factories
 812             {
 813                 continue;
 814             }
 815
 816             for (auto&& dst : dstPrefs)
 817             {
 818                 if (RequiresCopy(src, dst, registry))
 819                 {
 820                     // Copy avoided, increase the score
 821                     factoryScores[src]++;
 822                     break;
 823                 }
 824             }
 825         }
 826     }
 827
 828     // Find the lowest score
 829     int minScore = std::numeric_limits<int>::max();
 830     for (auto it : factoryScores)
 831     {
 832         minScore = std::min(minScore, it.second);
 833     }
 834
 835     // Collect factories matching the best(lowest) score
 836     std::vector<ITensorHandleFactory::FactoryId> optimalFactories;
 837     for (auto it : factoryScores)
 838     {
 839         if (it.second == minScore)
 840         {
 841             optimalFactories.push_back(it.first);
 842         }
 843     }
 844
 845     // For all compatible Factories matching the best score, find the preferred one for the current layer.
 846     for (auto&& srcPref : srcPrefs)
 847     {
 848         for (auto&& comp : optimalFactories)
 849         {
 850             if (comp == srcPref)
 851             {
 852                 return comp;
 853             }
 854         }
 855     }
 856
 857     return ITensorHandleFactory::LegacyFactoryId;
 858 }
 859
 860 EdgeStrategy CalculateEdgeStrategy(BackendsMap& backends,
 861                                    ITensorHandleFactory::FactoryId srcFactoryId,
 862                                    const Layer& layer,
 863                                    const Layer& connectedLayer,
 864                                    TensorHandleFactoryRegistry& registry)
 865 {
 866     auto toBackend = backends.find(connectedLayer.GetBackendId());
 867     ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
 868
 869     auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
 870
 871     // Legacy API check for backward compatibility
 872     if (srcFactoryId == ITensorHandleFactory::LegacyFactoryId || dstPrefs.empty())
 873     {
 874         if (layer.GetBackendId() != connectedLayer.GetBackendId())
 875         {
 876             return EdgeStrategy::CopyToTarget;
 877         }
 878         else
 879         {
 880             return EdgeStrategy::DirectCompatibility;
 881         }
 882     }
 883
 884     // TensorHandleFactory API present, so perform more sophisticated strategies.
 885     // Dst Output layers don't require copy because they use import or map/unmap
 886     if (connectedLayer.GetType() == LayerType::Output)
 887     {
 888         return EdgeStrategy::DirectCompatibility;
 889     }
 890
 891     // Search for direct match in prefs
 892     for (auto&& pref : dstPrefs)
 893     {
 894         if (pref == srcFactoryId)
 895         {
 896             return EdgeStrategy::DirectCompatibility;
 897         }
 898     }
 899
 900     // Search for export/import options
 901     ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
 902     if (srcFactory->GetExportFlags() != 0)
 903     {
 904         for (auto&& pref : dstPrefs)
 905         {
 906             ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
 907
 908             // Handles cases when a destPref is not listed in TensorHandleFactoryRegistry
 909             if (!dstFactory) {
 910                 continue;
 911             }
 912
 913             if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
 914             {
 915                 return EdgeStrategy::ExportToTarget;
 916             }
 917         }
 918     }
 919
 920     // Search for copy options via map/unmap
 921     if (srcFactory->SupportsMapUnmap())
 922     {
 923         for (auto&& pref : dstPrefs)
 924         {
 925             ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
 926             if (dstFactory && dstFactory->SupportsMapUnmap())
 927             {
 928                 return EdgeStrategy::CopyToTarget;
 929             }
 930         }
 931     }
 932
 933     return EdgeStrategy::Undefined;
 934 }
 935
 936 // Select the TensorHandleFactories and the corresponding memory strategy
 937 OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
 938                                               BackendsMap& backends,
 939                                               TensorHandleFactoryRegistry& registry,
 940                                               Optional<std::vector<std::string>&> errMessages)
 941 {
 942     OptimizationResult result;
 943
 944     optGraph.ForEachLayer([&backends, &registry, &result, &errMessages](Layer* layer)
 945     {
 946         ARMNN_ASSERT(layer);
 947
 948         // Lets make sure the backend is in our list of supported backends. Something went wrong during backend
 949         // assignment if this check fails
 950         ARMNN_ASSERT(backends.find(layer->GetBackendId()) != backends.end());
 951
 952         // Check each output separately
 953         for (unsigned int slotIdx = 0; slotIdx < layer->GetNumOutputSlots(); slotIdx++)
 954         {
 955             OutputSlot& outputSlot = layer->GetOutputSlot(slotIdx);
 956
 957             ITensorHandleFactory::FactoryId slotOption = ITensorHandleFactory::LegacyFactoryId;
 958
 959             // Calculate the factory to use which results in the fewest copies being made.
 960             switch(layer->GetType())
 961             {
 962                 case LayerType::Input:
 963                     slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry);
 964                     break;
 965                 case LayerType::Output:
 966                     slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
 967                     break;
 968                 default:
 969                     slotOption = CalculateSlotOption(backends, outputSlot, registry);
 970                     break;
 971             }
 972             outputSlot.SetTensorHandleFactory(slotOption);
 973
 974             // Now determine the "best" edge strategy for each connection given the slotOption.
 975             unsigned int connectionIdx = 0;
 976             for (auto&& connection : outputSlot.GetConnections())
 977             {
 978                 const Layer& connectedLayer = connection->GetOwningLayer();
 979
 980                 EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer, registry);
 981
 982                 if (strategy == EdgeStrategy::Undefined)
 983                 {
 984                     result.m_Error = true;
 985                     if (errMessages)
 986                     {
 987                         errMessages.value().emplace_back("Could not find valid strategy required for compatibility"
 988                                                          " between backends.");
 989                     }
 990                     return;
 991                 }
 992
 993                 outputSlot.SetEdgeStrategy(connectionIdx, strategy);
 994
 995                 connectionIdx++;
 996             }
 997         }
 998     });
 999
1000     return result;
1001 }
1002
1003 IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
1004                               const std::vector<BackendId>& backendPreferences,
1005                               const IDeviceSpec& deviceSpec,
1006                               const OptimizerOptions& options,
1007                               Optional<std::vector<std::string>&> messages)
1008 {
1009     if (backendPreferences.empty())
1010     {
1011         throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified");
1012     }
1013
1014     if (options.m_ReduceFp32ToFp16 && options.m_ReduceFp32ToBf16)
1015     {
1016         throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
1017     }
1018
1019     const Network& network = *PolymorphicDowncast<const Network*>(&inNetwork);
1020     std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph());
1021
1022     auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy);
1023
1024     OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
1025
1026     // Get the optimized graph
1027     Graph& optGraph = optNetObjPtr->GetGraph();
1028
1029     // Perform optimisation passes
1030     using namespace optimizations;
1031     Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(),
1032                                                 SquashEqualTransposeSiblings(),
1033                                                 SquashEqualReshapeSiblings(),
1034                                                 OptimizeInversePermutes(),
1035                                                 OptimizeInverseTransposes(),
1036                                                 MovePermuteUp(),
1037                                                 MoveTransposeUp(),
1038                                                 PermuteAsReshape(),
1039                                                 TransposeAsReshape(),
1040                                                 OptimizeConsecutiveReshapes(),
1041                                                 FoldPadIntoConvolution2d(),
1042                                                 PermuteAndBatchToSpaceAsDepthToSpace(),
1043                                                 TransposeAndBatchToSpaceAsDepthToSpace()));
1044
1045     // Infer the tensor infos for all output slots. Throws an exception on failure
1046     optGraph.InferTensorInfos();
1047
1048     // If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
1049     if (options.m_ReduceFp32ToFp16)
1050     {
1051         Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
1052         Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
1053     }
1054
1055     // If Fp32 to Bf16 optimization is set convert Fp32 network to Bf16
1056     // Convert input of Convolution2d and FullyConnected from Fp32 to Bf16
1057     // Only Constant weight of Convolution2d and FullyConnected are converted from Fp32 to Bf16
1058     if (options.m_ReduceFp32ToBf16)
1059     {
1060         Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToBf16Converter()));
1061     }
1062
1063     // Initialize backend settings
1064     BackendSettings backendSettings(backendPreferences, deviceSpec);
1065     if (backendSettings.GetAvailablePreferredBackends().empty())
1066     {
1067         std::stringstream failureMsg;
1068         failureMsg << "None of the preferred backends " << backendPreferences
1069                    << " are supported. Current platform provides " << backendSettings.m_SupportedBackends;
1070         ReportError(failureMsg.str(), messages);
1071         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
1072     }
1073
1074     // Create a map to temporarily hold initialized backend objects
1075     TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
1076     BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
1077
1078     // Assign an available backend to each layer
1079     Graph::Iterator firstLayer = optGraph.begin();
1080     Graph::Iterator lastLayer  = optGraph.end();
1081     OptimizationResult assignBackendsResult = AssignBackends(optNetObjPtr,
1082                                                              backendSettings,
1083                                                              firstLayer,
1084                                                              lastLayer,
1085                                                              messages);
1086     if (assignBackendsResult.m_Error)
1087     {
1088         // Failed to assign a backend to each layer
1089         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
1090     }
1091
1092     Optimizer::Pass(optGraph, MakeOptimizations(OptimizeInverseConversionsFp16(),
1093                                                 OptimizeInverseConversionsFp32()));
1094
1095     // Apply the backend-specific optimizations
1096     OptimizationResult backendOptimizationResult = ApplyBackendOptimizations(optNetObjPtr,
1097                                                                              backendSettings,
1098                                                                              backends,
1099                                                                              messages);
1100     if (backendOptimizationResult.m_Error)
1101     {
1102         // Failed to apply the backend-specific optimizations
1103         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
1104     }
1105
1106     // If the debug flag is set, then insert a DebugLayer after each layer
1107     // Doing this after applying the backend optimizations as they might have changed some layers
1108     if (options.m_Debug)
1109     {
1110         Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer()));
1111     }
1112
1113     // Calculate the compatibility strategies for tensor handles
1114     OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph,
1115                                                                    backends,
1116                                                                    tensorHandleFactoryRegistry,
1117                                                                    messages);
1118     if (strategyResult.m_Error)
1119     {
1120         // Failed to apply the backend-specific optimizations
1121         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
1122     }
1123
1124     // Based on the tensor handle strategy determined above, insert copy layers where required.
1125     optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
1126
1127     // Convert constants
1128     Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
1129     Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat()));
1130
1131     // Run backend specific optimizations (deprecated)
1132     for (auto&& chosenBackend : backendSettings.m_SelectedBackends)
1133     {
1134         auto factoryFun = BackendRegistryInstance().GetFactory(chosenBackend);
1135         auto backendPtr = factoryFun();
1136         ARMNN_ASSERT(backendPtr.get() != nullptr);
1137
1138         ARMNN_NO_DEPRECATE_WARN_BEGIN
1139         auto backendSpecificOptimizations = backendPtr->GetOptimizations();
1140         ARMNN_NO_DEPRECATE_WARN_END
1141
1142         if (!backendSpecificOptimizations.empty())
1143         {
1144             Optimizer::Pass(optNetObjPtr->GetGraph(), backendSpecificOptimizations);
1145         }
1146     }
1147
1148     return optNet;
1149 }
1150
1151 Network::Network()
1152 : m_Graph(std::make_unique<Graph>())
1153 {
1154 }
1155
1156 Network::~Network()
1157 {
1158 }
1159
1160 Status Network::PrintGraph()
1161 {
1162     m_Graph->Print();
1163     return Status::Success;
1164 }
1165
1166 IConnectableLayer* Network::AddInputLayer(LayerBindingId id, const char* name)
1167 {
1168     return m_Graph->AddLayer<InputLayer>(id, name);
1169 }
1170
1171 IConnectableLayer* Network::AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
1172                                             const char* name)
1173 {
1174     return m_Graph->AddLayer<BatchToSpaceNdLayer>(batchToSpaceNdDescriptor, name);
1175 }
1176
1177 IConnectableLayer* Network::AddComparisonLayer(const ComparisonDescriptor& comparisonDescriptor,
1178                                                const char* name)
1179 {
1180     return m_Graph->AddLayer<ComparisonLayer>(comparisonDescriptor, name);
1181 }
1182
1183 IConnectableLayer* Network::AddElementwiseUnaryLayer(const ElementwiseUnaryDescriptor& elementwiseUnaryDescriptor,
1184                                                      const char* name)
1185 {
1186     return m_Graph->AddLayer<ElementwiseUnaryLayer>(elementwiseUnaryDescriptor, name);
1187 }
1188
1189 IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1190                                                        const ConstTensor& weights,
1191                                                        const Optional<ConstTensor>& biases,
1192                                                        const char* name)
1193 {
1194     if (fullyConnectedDescriptor.m_BiasEnabled && !biases.has_value())
1195     {
1196         throw InvalidArgumentException("AddFullyConnectedLayer: biases cannot be empty");
1197     }
1198
1199     const auto layer = m_Graph->AddLayer<FullyConnectedLayer>(fullyConnectedDescriptor, name);
1200
1201     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1202
1203     if (fullyConnectedDescriptor.m_BiasEnabled)
1204     {
1205         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1206     }
1207
1208     return layer;
1209 }
1210
1211 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1212                                                    const ConstTensor& weights,
1213                                                    const Optional<ConstTensor>& biases,
1214                                                    const char* name)
1215 {
1216     return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, biases, name);
1217 }
1218
1219 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1220                                                    const ConstTensor& weights,
1221                                                    const char* name)
1222 {
1223     Optional<ConstTensor> biases;
1224     return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, biases, name);
1225 }
1226
1227 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1228                                                    const ConstTensor& weights,
1229                                                    const ConstTensor& biases,
1230                                                    const char* name)
1231 {
1232     Optional<ConstTensor> optionalBiases(biases);
1233     return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, optionalBiases, name);
1234 }
1235
1236 IConnectableLayer* Network::AddConcatLayer(const ConcatDescriptor& concatDescriptor,
1237                                            const char* name)
1238 {
1239     return m_Graph->AddLayer<ConcatLayer>(concatDescriptor, name);
1240 }
1241
1242 IConnectableLayer* Network::AddConvolution2dLayerImpl(const Convolution2dDescriptor& convolution2dDescriptor,
1243                                                       const ConstTensor& weights,
1244                                                       const Optional<ConstTensor>& biases,
1245                                                       const char* name)
1246 {
1247     if (convolution2dDescriptor.m_BiasEnabled && !biases.has_value())
1248     {
1249         throw InvalidArgumentException("AddConvolution2dLayer: biases cannot be empty");
1250     }
1251
1252     const auto layer = m_Graph->AddLayer<Convolution2dLayer>(convolution2dDescriptor, name);
1253
1254     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1255
1256     if (convolution2dDescriptor.m_BiasEnabled)
1257     {
1258         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1259     }
1260
1261     return layer;
1262 }
1263
1264 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1265                                                   const ConstTensor& weights,
1266                                                   const Optional<ConstTensor>& biases,
1267                                                   const char* name)
1268 {
1269     return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1270 }
1271
1272 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1273                                                   const ConstTensor& weights,
1274                                                   const char* name)
1275 {
1276     Optional<ConstTensor> biases;
1277     return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1278 }
1279
1280 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1281                                                   const ConstTensor& weights,
1282                                                   const ConstTensor& biases,
1283                                                   const char* name)
1284 {
1285     Optional<ConstTensor> optionalBiases(biases);
1286     return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, optionalBiases, name);
1287 }
1288
1289 IConnectableLayer* Network::AddDepthwiseConvolution2dLayerImpl(
1290     const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1291     const ConstTensor& weights,
1292     const Optional<ConstTensor>& biases,
1293     const char* name)
1294 {
1295     if (convolution2dDescriptor.m_BiasEnabled && !biases.has_value())
1296     {
1297         throw InvalidArgumentException("AddDepthwiseConvolution2dLayer: biases cannot be empty");
1298     }
1299
1300     const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name);
1301
1302     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1303
1304     if (convolution2dDescriptor.m_BiasEnabled)
1305     {
1306         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1307     }
1308
1309     return layer;
1310 }
1311
1312 IConnectableLayer* Network::AddDepthToSpaceLayer(const DepthToSpaceDescriptor& depthToSpaceDescriptor,
1313                                                  const char* name)
1314 {
1315     return m_Graph->AddLayer<DepthToSpaceLayer>(depthToSpaceDescriptor, name);
1316 }
1317
1318 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1319         const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1320         const ConstTensor& weights,
1321         const Optional<ConstTensor>& biases,
1322         const char* name)
1323 {
1324     return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1325 }
1326
1327 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1328     const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1329     const ConstTensor& weights,
1330     const char* name)
1331 {
1332     Optional<ConstTensor> biases;
1333     return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1334 }
1335
1336 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1337     const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1338     const ConstTensor& weights,
1339     const ConstTensor& biases,
1340     const char* name)
1341 {
1342     Optional<ConstTensor> optionalBiases(biases);
1343     return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, optionalBiases, name);
1344 }
1345
1346 IConnectableLayer* Network::AddDetectionPostProcessLayer(const armnn::DetectionPostProcessDescriptor& descriptor,
1347                                                          const ConstTensor& anchors, const char* name)
1348 {
1349     const auto layer = m_Graph->AddLayer<DetectionPostProcessLayer>(descriptor, name);
1350
1351     layer->m_Anchors = std::make_unique<ScopedCpuTensorHandle>(anchors);
1352
1353     return layer;
1354 }
1355
1356 IConnectableLayer* Network::AddPermuteLayer(const PermuteDescriptor& permuteDescriptor,
1357                                             const char* name)
1358 {
1359     return m_Graph->AddLayer<PermuteLayer>(permuteDescriptor, name);
1360 }
1361
1362 IConnectableLayer* Network::AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor,
1363     const char* name)
1364 {
1365     return m_Graph->AddLayer<Pooling2dLayer>(pooling2dDescriptor, name);
1366 }
1367
1368 IConnectableLayer* Network::AddActivationLayer(const ActivationDescriptor& activationDescriptor,
1369     const char* name)
1370 {
1371     return m_Graph->AddLayer<ActivationLayer>(activationDescriptor, name);
1372 }
1373
1374 IConnectableLayer* Network::AddArgMinMaxLayer(const ArgMinMaxDescriptor& argMinMaxDescriptor,
1375                                               const char* name)
1376 {
1377     return m_Graph->AddLayer<ArgMinMaxLayer>(argMinMaxDescriptor, name);
1378 }
1379
1380 IConnectableLayer* Network::AddNormalizationLayer(const NormalizationDescriptor&
1381 normalizationDescriptor,
1382     const char* name)
1383 {
1384     return m_Graph->AddLayer<NormalizationLayer>(normalizationDescriptor, name);
1385 }
1386
1387 IConnectableLayer* Network::AddSliceLayer(const SliceDescriptor& sliceDescriptor, const char* name)
1388 {
1389     return m_Graph->AddLayer<SliceLayer>(sliceDescriptor, name);
1390 }
1391
1392 IConnectableLayer* Network::AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor,
1393     const char* name)
1394 {
1395     return m_Graph->AddLayer<SoftmaxLayer>(softmaxDescriptor, name);
1396 }
1397
1398 IConnectableLayer* Network::AddSplitterLayer(const ViewsDescriptor& splitterDescriptor,
1399     const char* name)
1400 {
1401     return m_Graph->AddLayer<SplitterLayer>(splitterDescriptor, name);
1402 }
1403
1404 IConnectableLayer* Network::AddMaximumLayer(const char* name)
1405 {
1406     return m_Graph->AddLayer<MaximumLayer>(name);
1407 }
1408
1409 IConnectableLayer* Network::AddMinimumLayer(const char* name)
1410 {
1411     return m_Graph->AddLayer<MinimumLayer>(name);
1412 }
1413
1414 IConnectableLayer* Network::AddMergerLayer(const MergerDescriptor& mergerDescriptor,
1415                                            const char* name)
1416 {
1417     return AddConcatLayer(mergerDescriptor, name);
1418 }
1419
1420 IConnectableLayer* Network::AddAbsLayer(const char * name)
1421 {
1422     return AddElementwiseUnaryLayer(ElementwiseUnaryDescriptor(UnaryOperation::Abs), name);
1423 }
1424
1425 IConnectableLayer* Network::AddAdditionLayer(const char* name)
1426 {
1427     return m_Graph->AddLayer<AdditionLayer>(name);
1428 }
1429
1430 IConnectableLayer* Network::AddMultiplicationLayer(const char* name)
1431 {
1432     return m_Graph->AddLayer<MultiplicationLayer>(name);
1433 }
1434
1435 IConnectableLayer* Network::AddOutputLayer(LayerBindingId id, const char* name)
1436 {
1437     return m_Graph->AddLayer<OutputLayer>(id, name);
1438 }
1439
1440 IConnectableLayer* Network::AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc,
1441                                                        const ConstTensor&                  mean,
1442                                                        const ConstTensor&                  variance,
1443                                                        const ConstTensor&                  beta,
1444                                                        const ConstTensor&                  gamma,
1445                                                        const char*                         name)
1446 {
1447     const auto layer = m_Graph->AddLayer<BatchNormalizationLayer>(desc, name);
1448
1449     layer->m_Mean = std::make_unique<ScopedCpuTensorHandle>(mean);
1450     layer->m_Variance = std::make_unique<ScopedCpuTensorHandle>(variance);
1451     layer->m_Beta = std::make_unique<ScopedCpuTensorHandle>(beta);
1452     layer->m_Gamma = std::make_unique<ScopedCpuTensorHandle>(gamma);
1453
1454     return layer;
1455 }
1456
1457 IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor& descriptor,
1458                                                    const char* name)
1459 {
1460     ResizeDescriptor resizeDescriptor;
1461     resizeDescriptor.m_Method       = ResizeMethod::Bilinear;
1462     resizeDescriptor.m_DataLayout   = descriptor.m_DataLayout;
1463     resizeDescriptor.m_TargetWidth  = descriptor.m_TargetWidth;
1464     resizeDescriptor.m_TargetHeight = descriptor.m_TargetHeight;
1465
1466     return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
1467 }
1468
1469 IConnectableLayer* Network::AddResizeLayer(const ResizeDescriptor&
1470 resizeDescriptor, const char* name)
1471 {
1472     return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
1473 }
1474
1475 IConnectableLayer* Network::AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor& desc,
1476                                                           const char* name)
1477 {
1478     return m_Graph->AddLayer<InstanceNormalizationLayer>(desc, name);
1479 }
1480
1481 IConnectableLayer* Network::AddL2NormalizationLayer(const L2NormalizationDescriptor& desc,
1482                                                     const char* name)
1483 {
1484     return m_Graph->AddLayer<L2NormalizationLayer>(desc, name);
1485 }
1486
1487 IConnectableLayer* Network::AddLogSoftmaxLayer(const LogSoftmaxDescriptor& desc,
1488                                                const char* name)
1489 {
1490     return m_Graph->AddLayer<LogSoftmaxLayer>(desc, name);
1491 }
1492
1493 IConnectableLayer* Network::AddConstantLayer(const ConstTensor& input, const char* name)
1494 {
1495     auto layer = m_Graph->AddLayer<ConstantLayer>(name);
1496
1497     layer->m_LayerOutput = std::make_unique<ScopedCpuTensorHandle>(input);
1498
1499     return layer;
1500 }
1501
1502 IConnectableLayer* Network::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor,
1503                                             const char* name)
1504 {
1505     return m_Graph->AddLayer<ReshapeLayer>(reshapeDescriptor, name);
1506 }
1507
1508 IConnectableLayer* Network::AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
1509                                                    const char* name)
1510 {
1511     return m_Graph->AddLayer<SpaceToBatchNdLayer>(spaceToBatchNdDescriptor, name);
1512 }
1513
1514 IConnectableLayer* Network::AddSpaceToDepthLayer(const SpaceToDepthDescriptor& spaceToDepthDescriptor,
1515                                                  const char* name)
1516 {
1517     return m_Graph->AddLayer<SpaceToDepthLayer>(spaceToDepthDescriptor, name);
1518 }
1519
1520 IConnectableLayer* Network::AddFloorLayer(const char* name)
1521 {
1522     return m_Graph->AddLayer<FloorLayer>(name);
1523 }
1524
1525 IConnectableLayer* Network::AddLstmLayer(const LstmDescriptor&  descriptor,
1526                                          const LstmInputParams& params,
1527                                          const char* name)
1528 {
1529     const auto layer = m_Graph->AddLayer<LstmLayer>(descriptor, name);
1530
1531     //Lstm Basic Parameters
1532     layer->m_BasicParameters.m_InputToForgetWeights =
1533         std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
1534     layer->m_BasicParameters.m_InputToCellWeights =
1535         std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
1536     layer->m_BasicParameters.m_InputToOutputWeights =
1537         std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
1538     layer->m_BasicParameters.m_RecurrentToForgetWeights =
1539         std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
1540     layer->m_BasicParameters.m_RecurrentToCellWeights =
1541         std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
1542     layer->m_BasicParameters.m_RecurrentToOutputWeights =
1543         std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
1544     layer->m_BasicParameters.m_ForgetGateBias =
1545             std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
1546     layer->m_BasicParameters.m_CellBias =
1547             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
1548     layer->m_BasicParameters.m_OutputGateBias =
1549             std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
1550
1551     //Lstm Cifg parameters
1552     if(!descriptor.m_CifgEnabled)
1553     {
1554         if(params.m_InputToInputWeights == nullptr)
1555         {
1556             throw InvalidArgumentException("AddLstmLayer: Input To Input Weights cannot be NULL "
1557                                            "when CIFG is disabled.");
1558         }
1559         if(params.m_RecurrentToInputWeights == nullptr)
1560         {
1561             throw InvalidArgumentException(
1562                     "AddLstmLayer: Recurrent To Input Weights cannot be NULL "
1563                     "when CIFG is disabled.");
1564         }
1565         if(params.m_InputGateBias == nullptr)
1566         {
1567             throw InvalidArgumentException("AddLstmLayer: Input Gate Bias cannot be NULL "
1568                                            "when CIFG is disabled.");
1569         }
1570         layer->m_CifgParameters.m_InputToInputWeights =
1571             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
1572         layer->m_CifgParameters.m_RecurrentToInputWeights =
1573             std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
1574         layer->m_CifgParameters.m_InputGateBias =
1575             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
1576     }
1577
1578     //Lstm projection parameters
1579     if(descriptor.m_ProjectionEnabled)
1580     {
1581         if(params.m_ProjectionWeights == nullptr)
1582         {
1583             throw InvalidArgumentException("AddLstmLayer: Projection Weights cannot be NULL "
1584                                            "when projection is enabled.");
1585         }
1586         layer->m_ProjectionParameters.m_ProjectionWeights =
1587             std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
1588         if(params.m_ProjectionBias != nullptr)
1589         {
1590             layer->m_ProjectionParameters.m_ProjectionBias =
1591                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
1592         }
1593     }
1594
1595     //Lstm Peephole params
1596     if(descriptor.m_PeepholeEnabled)
1597     {
1598         if(!descriptor.m_CifgEnabled)
1599         {
1600             if(params.m_CellToInputWeights == nullptr)
1601             {
1602                 throw InvalidArgumentException("AddLstmLayer: Cell To Input Weights cannot be NULL "
1603                                                "when Peephole is enabled and CIFG disabled.");
1604             }
1605
1606             layer->m_PeepholeParameters.m_CellToInputWeights =
1607                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
1608         }
1609
1610         if(params.m_CellToForgetWeights == nullptr)
1611         {
1612             throw InvalidArgumentException("AddLstmLayer: Cell To Forget Weights cannot be NULL "
1613                                            "when Peephole is enabled.");
1614         }
1615         if(params.m_CellToOutputWeights == nullptr)
1616         {
1617             throw InvalidArgumentException("AddLstmLayer: Cell To Output Weights cannot be NULL "
1618                                            "when Peephole is enabled.");
1619         }
1620
1621         layer->m_PeepholeParameters.m_CellToForgetWeights =
1622             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
1623         layer->m_PeepholeParameters.m_CellToOutputWeights =
1624             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
1625     }
1626
1627     //Lstm Layer Normalization params
1628     if(descriptor.m_LayerNormEnabled)
1629     {
1630         if(!descriptor.m_CifgEnabled)
1631         {
1632             if(params.m_InputLayerNormWeights == nullptr)
1633             {
1634                 throw InvalidArgumentException("AddLstmLayer: Input layer normalization weights cannot be NULL "
1635                                                "when layer normalization is enabled and CIFG disabled.");
1636             }
1637             layer->m_LayerNormParameters.m_InputLayerNormWeights =
1638                     std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputLayerNormWeights));
1639         }
1640
1641         if(params.m_ForgetLayerNormWeights == nullptr)
1642         {
1643             throw InvalidArgumentException("AddLstmLayer: Forget layer normalization weights cannot be NULL "
1644                                            "when layer normalization is enabled.");
1645         }
1646         if(params.m_CellLayerNormWeights == nullptr)
1647         {
1648             throw InvalidArgumentException("AddLstmLayer: Cell layer normalization weights cannot be NULL "
1649                                            "when layer normalization is enabled.");
1650         }
1651         if(params.m_OutputLayerNormWeights == nullptr)
1652         {
1653             throw InvalidArgumentException("AddLstmLayer: Output layer normalization weights cannot be NULL "
1654                                            "when layer normalization is enabled.");
1655         }
1656         layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
1657                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetLayerNormWeights));
1658         layer->m_LayerNormParameters.m_CellLayerNormWeights =
1659                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellLayerNormWeights));
1660         layer->m_LayerNormParameters.m_OutputLayerNormWeights =
1661                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputLayerNormWeights));
1662     }
1663     return layer;
1664 }
1665
1666 IConnectableLayer* Network::AddDivisionLayer(const char* name)
1667 {
1668     return m_Graph->AddLayer<DivisionLayer>(name);
1669 }
1670
1671 IConnectableLayer* Network::AddSubtractionLayer(const char* name)
1672 {
1673     return m_Graph->AddLayer<SubtractionLayer>(name);
1674 }
1675
1676 IConnectableLayer* Network::AddMeanLayer(const MeanDescriptor& meanDescriptor, const char* name)
1677 {
1678     return m_Graph->AddLayer<MeanLayer>(meanDescriptor,name);
1679 }
1680
1681 IConnectableLayer* Network::AddPadLayer(const PadDescriptor& padDescriptor, const char* name)
1682 {
1683     return m_Graph->AddLayer<PadLayer>(padDescriptor,name);
1684 }
1685
1686 IConnectableLayer *Network::AddQuantizeLayer(const char *name)
1687 {
1688     return m_Graph->AddLayer<QuantizeLayer>(name);
1689 }
1690
1691 IConnectableLayer* Network::AddDequantizeLayer(const char* name)
1692 {
1693     return m_Graph->AddLayer<DequantizeLayer>(name);
1694 }
1695
1696 IConnectableLayer* Network::AddStridedSliceLayer(const StridedSliceDescriptor& stridedSliceDescriptor,
1697                                                  const char* name)
1698 {
1699     return m_Graph->AddLayer<StridedSliceLayer>(stridedSliceDescriptor, name);
1700 }
1701
1702 IConnectableLayer* Network::AddGreaterLayer(const char* name)
1703 {
1704     return AddComparisonLayer(ComparisonDescriptor(ComparisonOperation::Greater), name);
1705 }
1706
1707 IConnectableLayer* Network::AddEqualLayer(const char* name)
1708 {
1709     return AddComparisonLayer(ComparisonDescriptor(ComparisonOperation::Equal), name);
1710 }
1711
1712 IConnectableLayer* Network::AddRsqrtLayer(const char * name)
1713 {
1714     return AddElementwiseUnaryLayer(ElementwiseUnaryDescriptor(UnaryOperation::Rsqrt), name);
1715 }
1716
1717 IConnectableLayer* Network::AddGatherLayer(const char* name)
1718 {
1719     return m_Graph->AddLayer<GatherLayer>(name);
1720 }
1721
1722 IConnectableLayer* Network::AddMergeLayer(const char* name)
1723 {
1724     return m_Graph->AddLayer<MergeLayer>(name);
1725 }
1726
1727 IConnectableLayer* Network::AddSwitchLayer(const char* name)
1728 {
1729     return m_Graph->AddLayer<SwitchLayer>(name);
1730 }
1731
1732 IConnectableLayer* Network::AddPreluLayer(const char* name)
1733 {
1734     return m_Graph->AddLayer<PreluLayer>(name);
1735 }
1736
1737 IConnectableLayer* Network::AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor& descriptor,
1738                                                            const ConstTensor& weights,
1739                                                            const Optional<ConstTensor>& biases,
1740                                                            const char* name)
1741 {
1742     if (descriptor.m_BiasEnabled && !biases.has_value())
1743     {
1744         throw InvalidArgumentException("AddTransposeConvolution2dLayer: Biases cannot be empty");
1745     }
1746
1747     const auto layer = m_Graph->AddLayer<TransposeConvolution2dLayer>(descriptor, name);
1748
1749     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1750
1751     if (descriptor.m_BiasEnabled)
1752     {
1753         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1754     }
1755
1756     return layer;
1757 }
1758
1759 IConnectableLayer* Network::AddTransposeLayer(const TransposeDescriptor& transposeDescriptor,
1760                                               const char* name)
1761 {
1762     return m_Graph->AddLayer<TransposeLayer>(transposeDescriptor, name);
1763 }
1764
1765 IConnectableLayer* Network::AddStackLayer(const StackDescriptor& stackDescriptor,
1766                                           const char* name)
1767 {
1768     return m_Graph->AddLayer<StackLayer>(stackDescriptor, name);
1769 }
1770
1771
1772 IConnectableLayer* Network::AddStandInLayer(const StandInDescriptor& desc,
1773                                             const char* name)
1774 {
1775     return m_Graph->AddLayer<StandInLayer>(desc, name);
1776 }
1777
1778 IConnectableLayer* Network::AddQuantizedLstmLayer(const QuantizedLstmInputParams& params,
1779                                                   const char* name)
1780 {
1781     const auto layer = m_Graph->AddLayer<QuantizedLstmLayer>(name);
1782
1783     // InputToX weights
1784     layer->m_QuantizedLstmParameters.m_InputToInputWeights =
1785             std::make_unique<ScopedCpuTensorHandle>(params.GetInputToInputWeights());
1786     layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
1787             std::make_unique<ScopedCpuTensorHandle>(params.GetInputToForgetWeights());
1788     layer->m_QuantizedLstmParameters.m_InputToCellWeights =
1789             std::make_unique<ScopedCpuTensorHandle>(params.GetInputToCellWeights());
1790     layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
1791             std::make_unique<ScopedCpuTensorHandle>(params.GetInputToOutputWeights());
1792
1793     // RecurrentToX weights
1794     layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
1795             std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToInputWeights());
1796     layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
1797             std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToForgetWeights());
1798     layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
1799             std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToCellWeights());
1800     layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
1801             std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToOutputWeights());
1802
1803     // Bias
1804     layer->m_QuantizedLstmParameters.m_InputGateBias =
1805             std::make_unique<ScopedCpuTensorHandle>(params.GetInputGateBias());
1806     layer->m_QuantizedLstmParameters.m_ForgetGateBias =
1807             std::make_unique<ScopedCpuTensorHandle>(params.GetForgetGateBias());
1808     layer->m_QuantizedLstmParameters.m_CellBias =
1809             std::make_unique<ScopedCpuTensorHandle>(params.GetCellBias());
1810     layer->m_QuantizedLstmParameters.m_OutputGateBias =
1811             std::make_unique<ScopedCpuTensorHandle>(params.GetOutputGateBias());
1812
1813     return layer;
1814 }
1815
1816 IConnectableLayer* Network::AddQLstmLayer(const QLstmDescriptor&  descriptor,
1817                                           const LstmInputParams& params,
1818                                           const char* name)
1819 {
1820     const auto layer = m_Graph->AddLayer<QLstmLayer>(descriptor, name);
1821
1822     // QLstm Basic Parameters
1823     layer->m_BasicParameters.m_InputToForgetWeights =
1824             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
1825     layer->m_BasicParameters.m_InputToCellWeights =
1826             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
1827     layer->m_BasicParameters.m_InputToOutputWeights =
1828             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
1829     layer->m_BasicParameters.m_RecurrentToForgetWeights =
1830             std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
1831     layer->m_BasicParameters.m_RecurrentToCellWeights =
1832             std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
1833     layer->m_BasicParameters.m_RecurrentToOutputWeights =
1834             std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
1835     layer->m_BasicParameters.m_ForgetGateBias =
1836             std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
1837     layer->m_BasicParameters.m_CellBias =
1838             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
1839     layer->m_BasicParameters.m_OutputGateBias =
1840             std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
1841
1842     // QLstm Cifg parameters
1843     if(!descriptor.m_CifgEnabled)
1844     {
1845         if(params.m_InputToInputWeights == nullptr)
1846         {
1847             throw InvalidArgumentException("AddQLstmLayer: Input To Input Weights cannot be NULL");
1848         }
1849
1850         if(params.m_RecurrentToInputWeights == nullptr)
1851         {
1852             throw InvalidArgumentException(
1853                     "AddQLstmLayer: Recurrent To Input Weights cannot be NULL");
1854         }
1855
1856         if(params.m_InputGateBias == nullptr)
1857         {
1858             throw InvalidArgumentException("AddQLstmLayer: Input Gate Bias cannot be NULL");
1859         }
1860
1861         layer->m_CifgParameters.m_InputToInputWeights =
1862                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
1863         layer->m_CifgParameters.m_RecurrentToInputWeights =
1864                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
1865         layer->m_CifgParameters.m_InputGateBias =
1866                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
1867     }
1868
1869     // QLstm Projection parameters
1870     if(descriptor.m_ProjectionEnabled)
1871     {
1872         if(params.m_ProjectionWeights == nullptr)
1873         {
1874             throw InvalidArgumentException("AddQLstmLayer: Projection Weights cannot be NULL");
1875         }
1876
1877         layer->m_ProjectionParameters.m_ProjectionWeights =
1878                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
1879
1880         // Projection bias is optional even if projection is enabled
1881         if(params.m_ProjectionWeights != nullptr)
1882         {
1883             layer->m_ProjectionParameters.m_ProjectionBias =
1884                     std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
1885         }
1886
1887     }
1888
1889     // QLstm Peephole params
1890     if(descriptor.m_PeepholeEnabled)
1891     {
1892         if(params.m_CellToForgetWeights == nullptr)
1893         {
1894             throw InvalidArgumentException("AddQLstmLayer: Cell To Forget Weights cannot be NULL");
1895         }
1896
1897         if(params.m_CellToOutputWeights == nullptr)
1898         {
1899             throw InvalidArgumentException("AddQLstmLayer: Cell To Output Weights cannot be NULL");
1900         }
1901
1902         if(!descriptor.m_CifgEnabled)
1903         {
1904             if(params.m_CellToInputWeights == nullptr)
1905             {
1906                 throw InvalidArgumentException("AddQLstmLayer: Cell To Input Weights cannot be NULL");
1907             }
1908
1909             layer->m_PeepholeParameters.m_CellToInputWeights =
1910                     std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
1911         }
1912
1913         layer->m_PeepholeParameters.m_CellToForgetWeights =
1914                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
1915         layer->m_PeepholeParameters.m_CellToOutputWeights =
1916                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
1917     }
1918
1919     // QLstm Layer Normalization params
1920     if(descriptor.m_LayerNormEnabled)
1921     {
1922         if(params.m_ForgetLayerNormWeights == nullptr)
1923         {
1924             throw InvalidArgumentException("AddQLstmLayer: Forget layer normalization weights cannot be NULL");
1925         }
1926
1927         if(params.m_CellLayerNormWeights == nullptr)
1928         {
1929             throw InvalidArgumentException("AddQLstmLayer: Cell layer normalization weights cannot be NULL");
1930         }
1931
1932         if(params.m_OutputLayerNormWeights == nullptr)
1933         {
1934             throw InvalidArgumentException("AddQLstmLayer: Output layer normalization weights cannot be NULL");
1935         }
1936
1937         if(!descriptor.m_CifgEnabled)
1938         {
1939             if(params.m_InputLayerNormWeights == nullptr)
1940             {
1941                 throw InvalidArgumentException("AddQLstmLayer: Input layer normalization weights cannot be NULL");
1942             }
1943
1944             layer->m_LayerNormParameters.m_InputLayerNormWeights =
1945                     std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputLayerNormWeights));
1946         }
1947
1948         layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
1949                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetLayerNormWeights));
1950         layer->m_LayerNormParameters.m_CellLayerNormWeights =
1951                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellLayerNormWeights));
1952         layer->m_LayerNormParameters.m_OutputLayerNormWeights =
1953                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputLayerNormWeights));
1954     }
1955     return layer;
1956 }
1957
1958 void Network::Accept(ILayerVisitor& visitor) const
1959 {
1960     for (auto layer : GetGraph())
1961     {
1962         layer->Accept(visitor);
1963     };
1964 }
1965
1966 OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph)
1967     : m_Graph(std::move(graph)), m_Guid(profiling::ProfilingService::GetNextGuid())
1968 {
1969 }
1970
1971 OptimizedNetwork::~OptimizedNetwork()
1972 {
1973 }
1974
1975 } // namespace armnn