src/armnn/Network.cpp

   1 //
   2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
   3 // SPDX-License-Identifier: MIT
   4 //
   5
   6 #include "Network.hpp"
   7 #include "Graph.hpp"
   8 #include "Layer.hpp"
   9 #include "DeviceSpec.hpp"
  10 #include "Optimizer.hpp"
  11 #include "SubgraphViewSelector.hpp"
  12 #include "BackendSettings.hpp"
  13 #include "optimizations/All.hpp"
  14
  15 #include <backendsCommon/CpuTensorHandle.hpp>
  16 #include <backendsCommon/WorkloadFactory.hpp>
  17 #include <armnn/backends/IBackendInternal.hpp>
  18 #include <backendsCommon/TensorHandleFactoryRegistry.hpp>
  19
  20 #include <armnn/Exceptions.hpp>
  21 #include <armnn/Utils.hpp>
  22 #include <armnn/TypesUtils.hpp>
  23 #include <armnn/BackendRegistry.hpp>
  24 #include <armnn/Logging.hpp>
  25 #include <armnn/utility/Assert.hpp>
  26 #include <armnn/utility/IgnoreUnused.hpp>
  27 #include <armnn/utility/PolymorphicDowncast.hpp>
  28
  29 #include <ProfilingService.hpp>
  30
  31 #include <fcntl.h>
  32 #include <algorithm>
  33 #include <fstream>
  34 #include <memory>
  35 #include <vector>
  36 #include <algorithm>
  37
  38 #include <boost/format.hpp>
  39 #include <boost/numeric/conversion/converter_policies.hpp>
  40
  41 namespace armnn
  42 {
  43
  44 armnn::INetwork* INetwork::CreateRaw(NetworkOptions networkOptions)
  45 {
  46     return new Network(networkOptions);
  47 }
  48
  49 armnn::INetworkPtr INetwork::Create(NetworkOptions networkOptions)
  50 {
  51     return INetworkPtr(CreateRaw(networkOptions), &INetwork::Destroy);
  52 }
  53
  54 void INetwork::Destroy(INetwork* network)
  55 {
  56     delete PolymorphicDowncast<Network*>(network);
  57 }
  58
  59 void IOptimizedNetwork::Destroy(IOptimizedNetwork* network)
  60 {
  61     delete PolymorphicDowncast<OptimizedNetwork*>(network);
  62 }
  63
  64 Status OptimizedNetwork::PrintGraph()
  65 {
  66     m_Graph->Print();
  67     return Status::Success;
  68 }
  69
  70 Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const
  71 {
  72     return m_Graph->SerializeToDot(stream);
  73 }
  74
  75 void ReportError(const std::string& errorMessage,
  76                  Optional<std::vector<std::string>&> errorMessages)
  77 {
  78     std::stringstream fullErrorMessage;
  79     fullErrorMessage << "ERROR: " << errorMessage;
  80     ARMNN_LOG(warning) << fullErrorMessage.str();
  81     if (errorMessages)
  82     {
  83         errorMessages.value().push_back(fullErrorMessage.str());
  84     }
  85 }
  86
  87 void ReportWarning(const std::string& warningMessage,
  88                    Optional<std::vector<std::string>&> warningMessages)
  89 {
  90     std::stringstream fullWarningMessage;
  91     fullWarningMessage << "WARNING: " << warningMessage;
  92     ARMNN_LOG(warning) << fullWarningMessage.str();
  93     if (warningMessages)
  94     {
  95         warningMessages.value().push_back(fullWarningMessage.str());
  96     }
  97 }
  98
  99 OptimizationResult ReturnWithError(OptimizationResult res,
 100                                    const Layer* layer,
 101                                    const BackendSettings& backendSettings,
 102                                    Optional<std::vector<std::string>&> errMessages)
 103 {
 104     std::stringstream failureMsg;
 105     failureMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
 106                << " is not supported on any preferred backend " << backendSettings.m_PreferredBackends;
 107     ReportError(failureMsg.str(), errMessages);
 108
 109     res.m_Error = true;
 110     return res;
 111 }
 112
 113
 114 bool CheckScaleSetOnQuantizedType(Layer* layer, Optional<std::vector<std::string>&> errMessages)
 115 {
 116     bool noErrors = true;
 117     unsigned int numOutputs = layer->GetNumOutputSlots();
 118     for (unsigned int i = 0; i < numOutputs; i++) {
 119         OutputSlot& outputSlot = layer->GetOutputSlot(i);
 120         TensorInfo info = outputSlot.GetTensorInfo();
 121         if (DataType::QAsymmU8 == info.GetDataType()) {
 122             if (0.f == info.GetQuantizationScale()) {
 123                 noErrors = false;
 124                 std::stringstream ss;
 125                 ss << "output " << i << " of layer " << GetLayerTypeAsCString(layer->GetType())
 126                    << " (" << layer->GetNameStr() << ") is of type"
 127                    << " Quantized 8 bit but its scale parameter has not been set";
 128                 ReportError(ss.str(), errMessages);
 129             }
 130             // Softmax under QuantisedAsymm8 must always be scale (1.0f/256.0f) and offset 0
 131             if ((info.GetQuantizationScale() != (1.0f / 256.0f) ||
 132                  info.GetQuantizationOffset() != 0) &&
 133                  layer->GetType() == armnn::LayerType::Softmax)
 134             {
 135                 std::stringstream ss;
 136                 ss << "Quantization parameters for Softmax layer (Scale: " <<
 137                 info.GetQuantizationScale() << " and Offset: " << info.GetQuantizationOffset() <<
 138                 ") are incorrect and have been updated to Scale: 0.00390625 and Offset: 0";
 139                 ARMNN_LOG(warning) << ss.str();
 140                 info.SetQuantizationScale((1.0f /256.0f));
 141                 info.SetQuantizationOffset(0);
 142                 outputSlot.SetTensorInfo(info);
 143             }
 144         }
 145     }
 146     return noErrors;
 147 }
 148
 149 template <typename LayerT>
 150 LayerT* ConvertBf16ToFp32Weight(Layer* l)
 151 {
 152     LayerT* layer = PolymorphicDowncast<LayerT*>(l);
 153     if ((layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
 154          && layer->m_Weight)
 155     {
 156         const TensorInfo& info = layer->m_Weight->GetTensorInfo();
 157
 158         if (info.GetDataType() == DataType::BFloat16)
 159         {
 160             std::vector<float> newValues(info.GetNumElements());
 161
 162             armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(
 163                 layer->m_Weight->template GetTensor<armnn::BFloat16>(), info.GetNumElements(), newValues.data());
 164
 165             TensorInfo newInfo(info.GetShape(), DataType::Float32);
 166             ConstTensor newInput(newInfo, newValues);
 167             layer->m_Weight.reset(new ScopedCpuTensorHandle(newInput));
 168         }
 169     }
 170     return layer;
 171 }
 172
 173 OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings,
 174                                             Graph& graph,
 175                                             Layer* layer,
 176                                             BackendId backend,
 177                                             DataType dataTypeIn,
 178                                             DataType dataTypeOut,
 179                                             const std::vector<BackendId>& availablePreferredBackends,
 180                                             std::string& reasonIfUnsupported,
 181                                             Optional<std::vector<std::string>&> errMessages)
 182 {
 183     OptimizationResult result;
 184
 185     // Helper lambda to compose meaningful error message before returning with error
 186     auto ReturnError = [&](const Layer* layer)
 187         {
 188             return ReturnWithError(result, layer, backendSettings, errMessages);
 189         };
 190
 191     // need to set the compute device on the layer
 192     // before we can check if it is supported
 193     layer->SetBackendId(backend);
 194     if (!IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported))
 195     {
 196         if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
 197         {
 198             if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
 199                 && layer->GetType() != LayerType::ConvertFp32ToFp16
 200                 && layer->GetType() != LayerType::ConvertFp16ToFp32)
 201             {
 202                 // Insert FP16 -> FP32 conversion layer before current layer
 203                 std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers;
 204                 if (dataTypeIn == DataType::Float16)
 205                 {
 206                     convertFp16ToFp32Layers =
 207                         InsertConvertFp16ToFp32LayersBefore(graph, *layer);
 208                 }
 209
 210                 // Insert FP32 -> FP16 conversion layer after current layer
 211                 std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers;
 212                 if (dataTypeOut == DataType::Float16)
 213                 {
 214                     convertFp32ToFp16Layers =
 215                         InsertConvertFp32ToFp16LayersAfter(graph, *layer);
 216                 }
 217
 218                 // Assign a supported backend to the newly introduced conversion layers
 219                 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
 220                     {
 221                         bool supportedBackendFound = false;
 222                         std::string reasonIfUnsupported;
 223
 224                         // Try preferred backend first
 225                         layer->SetBackendId(preferredBackend);
 226                         if (IWorkloadFactory::IsLayerSupported(*layer,
 227                                                                EmptyOptional(),
 228                                                                reasonIfUnsupported))
 229                         {
 230                             supportedBackendFound = true;
 231                         }
 232                         else
 233                         {
 234                             for (const auto& backend : availablePreferredBackends)
 235                             {
 236                                 // Skip preferred backend (we already determined that it is not supported)
 237                                 if (backend == preferredBackend)
 238                                 {
 239                                     continue;
 240                                 }
 241
 242                                 layer->SetBackendId(backend);
 243                                 if (IWorkloadFactory::IsLayerSupported(*layer,
 244                                                                        EmptyOptional(),
 245                                                                        reasonIfUnsupported))
 246                                 {
 247                                     supportedBackendFound = true;
 248                                     break;
 249                                 }
 250                             }
 251                         }
 252
 253                         return supportedBackendFound;
 254                     };
 255
 256                 for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
 257                 {
 258                     if (!AssignFirstSupportedBackend(convertLayer, backend))
 259                     {
 260                         return ReturnError(convertLayer);
 261                     }
 262                 }
 263
 264                 for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
 265                 {
 266                     if (!AssignFirstSupportedBackend(convertLayer, backend))
 267                     {
 268                         return ReturnError(convertLayer);
 269                     }
 270                 }
 271
 272                 return result;
 273             }
 274         }
 275         else if (dataTypeIn == DataType::BFloat16 || dataTypeOut == DataType::BFloat16)
 276         {
 277             if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
 278                 && layer->GetType() != LayerType::ConvertFp32ToBf16
 279                 && layer->GetType() != LayerType::ConvertBf16ToFp32)
 280             {
 281                 // Insert BF16 -> FP32 conversion layer before current layer
 282                 std::vector<ConvertBf16ToFp32Layer*> convertBf16ToFp32Layers;
 283                 if (dataTypeIn == DataType::BFloat16)
 284                 {
 285                     convertBf16ToFp32Layers =
 286                         InsertConvertBf16ToFp32LayersBefore(graph, *layer);
 287                     if (layer->GetType() == LayerType::Convolution2d)
 288                     {
 289                         ConvertBf16ToFp32Weight<Convolution2dLayer>(layer);
 290                     }
 291                     else if (layer->GetType() == LayerType::FullyConnected)
 292                     {
 293                         ConvertBf16ToFp32Weight<FullyConnectedLayer>(layer);
 294                     }
 295                 }
 296
 297                 // Insert FP32 -> BF16 conversion layer after current layer
 298                 std::vector<ConvertFp32ToBf16Layer*> convertFp32ToBf16Layers;
 299                 if (dataTypeOut == DataType::BFloat16)
 300                 {
 301                     convertFp32ToBf16Layers =
 302                         InsertConvertFp32ToBf16LayersAfter(graph, *layer);
 303                 }
 304
 305                 // Assign a supported backend to the newly introduced conversion layers
 306                 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
 307                     {
 308                         bool supportedBackendFound = false;
 309                         std::string reasonIfUnsupported;
 310
 311                         // Try preferred backend first
 312                         layer->SetBackendId(preferredBackend);
 313                         if (IWorkloadFactory::IsLayerSupported(*layer,
 314                                                                EmptyOptional(),
 315                                                                reasonIfUnsupported))
 316                         {
 317                             supportedBackendFound = true;
 318                         }
 319                         else
 320                         {
 321                             for (const auto& backend : availablePreferredBackends)
 322                             {
 323                                 // Skip preferred backend (we already determined that it is not supported)
 324                                 if (backend == preferredBackend)
 325                                 {
 326                                     continue;
 327                                 }
 328
 329                                 layer->SetBackendId(backend);
 330                                 if (IWorkloadFactory::IsLayerSupported(*layer,
 331                                                                        EmptyOptional(),
 332                                                                        reasonIfUnsupported))
 333                                 {
 334                                     supportedBackendFound = true;
 335                                     break;
 336                                 }
 337                             }
 338                         }
 339
 340                         return supportedBackendFound;
 341                     };
 342
 343                 for (ConvertBf16ToFp32Layer* convertLayer : convertBf16ToFp32Layers)
 344                 {
 345                     if (!AssignFirstSupportedBackend(convertLayer, backend))
 346                     {
 347                         return ReturnError(convertLayer);
 348                     }
 349                 }
 350
 351                 for (ConvertFp32ToBf16Layer* convertLayer : convertFp32ToBf16Layers)
 352                 {
 353                     if (!AssignFirstSupportedBackend(convertLayer, backend))
 354                     {
 355                         return ReturnError(convertLayer);
 356                     }
 357                 }
 358
 359                 return result;
 360             }
 361         }
 362
 363         std::stringstream warningMsg;
 364         warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
 365                    << " is not supported on requested backend " << layer->GetBackendId().Get()
 366                    << " for input data type " << GetDataTypeName(dataTypeIn)
 367                    << " and output data type " << GetDataTypeName(dataTypeOut)
 368                    << " (reason: " << reasonIfUnsupported
 369                    << "), falling back to the next backend.";
 370         ReportWarning(warningMsg.str(), errMessages);
 371
 372         return OptimizationResult(true, false);
 373     }
 374     else
 375     {
 376         return result;
 377     }
 378 }
 379
 380
 381 OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
 382                                   BackendSettings& backendSettings,
 383                                   Graph::Iterator& firstLayer,
 384                                   Graph::Iterator& lastLayer,
 385                                   Optional<std::vector<std::string>&> errMessages)
 386 {
 387     OptimizationResult result;
 388
 389     // Helper lambda to compose meaningful error message before returning with error
 390     auto ReturnError = [&](const Layer* layer)
 391         {
 392             return ReturnWithError(result, layer, backendSettings, errMessages);
 393         };
 394
 395
 396     auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
 397     if (availablePreferredBackends.empty())
 398     {
 399         std::stringstream failureMsg;
 400         failureMsg << "No preferred backends are available";
 401         ReportError(failureMsg.str(), errMessages);
 402
 403         result.m_Error = true;
 404         return result;
 405     }
 406
 407     for (auto it = firstLayer; it != lastLayer; ++it)
 408     {
 409         auto layer = *it;
 410
 411         DataType dataTypeIn  = layer->GetNumInputSlots() == 0 ? DataType::Float32 :
 412             layer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType();
 413         DataType dataTypeOut = layer->GetNumOutputSlots() == 0 ? DataType::Float32 :
 414             layer->GetOutputSlot(0).GetTensorInfo().GetDataType();
 415
 416         std::string reasonIfUnsupported;
 417         bool found = false;
 418         if (!CheckScaleSetOnQuantizedType(layer, errMessages))
 419         {
 420             // don't bomb immediately, find all the quantized outputs
 421             // which haven't had a scale set and report them all back.
 422             result.m_Error = true;
 423         }
 424
 425         // First try assign layer to hint backend
 426         if (layer->GetBackendHint().has_value() &&
 427             backendSettings.IsBackendSupported(layer->GetBackendHint().value()) &&
 428             AttemptBackendAssignment(backendSettings,
 429                                      optNetObjPtr->GetGraph(),
 430                                      layer,
 431                                      layer->GetBackendHint().value(),
 432                                      dataTypeIn,
 433                                      dataTypeOut,
 434                                      availablePreferredBackends,
 435                                      reasonIfUnsupported,
 436                                      errMessages).IsOk())
 437         {
 438             found = true;
 439             backendSettings.m_SelectedBackends.insert(layer->GetBackendHint().value());
 440         }
 441         else
 442         {
 443             // Try assign layer to prefered list of backends
 444             for (const auto& backend : availablePreferredBackends)
 445             {
 446                 if (layer->GetBackendHint().has_value() &&
 447                     layer->GetBackendHint().value() == backend)
 448                 {
 449                     continue; //Don't re-test the backend hint
 450                 }
 451
 452                 OptimizationResult res = AttemptBackendAssignment(backendSettings,
 453                                                                   optNetObjPtr->GetGraph(),
 454                                                                   layer,
 455                                                                   backend,
 456                                                                   dataTypeIn,
 457                                                                   dataTypeOut,
 458                                                                   availablePreferredBackends,
 459                                                                   reasonIfUnsupported,
 460                                                                   errMessages);
 461
 462                 if (res.IsOk())
 463                 {
 464                     found = true;
 465                     backendSettings.m_SelectedBackends.insert(backend);
 466                     break;
 467                 }
 468                 else if (res.IsError())
 469                 {
 470                    return res;  // Cannot continue.
 471                    // Note: we don't need to log the error as it would already
 472                    // be logged in AttemptBackendAssignment().
 473                 }
 474                 else
 475                 {
 476                     ARMNN_ASSERT_MSG(res.IsWarningOnly(), "OptimizationResult in unexpected state.");
 477                 }
 478             }
 479         }
 480
 481         // If the layer is unsupported by any devices, log and return a null network.
 482         if (!found)
 483         {
 484             // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
 485             //       fallback we should set the compute device on the layer to CpuRef (these are not
 486             //       available as accelerated operations, or are only available under certain
 487             //       conditions, currently they comprise MemCopy, Constant, Permute)
 488             armnn::LayerType layerType = layer->GetType();
 489             if (!backendSettings.IsCpuRefUsed() && (layerType == armnn::LayerType::MemCopy ||
 490                                                     layerType == armnn::LayerType::Constant ||
 491                                                     layerType == armnn::LayerType::Permute))
 492             {
 493                 BackendId cpuBackendId(armnn::Compute::CpuRef);
 494                 layer->SetBackendId(cpuBackendId);
 495                 backendSettings.m_SelectedBackends.insert(cpuBackendId);
 496             }
 497             else
 498             {
 499                 return ReturnError(layer);
 500             }
 501         }
 502     }
 503
 504     return result;
 505 }
 506
 507 OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
 508                                   BackendSettings& backendSettings,
 509                                   SubgraphView& subgraph,
 510                                   Optional<std::vector<std::string>&> errMessages)
 511 {
 512     Graph::Iterator firstLayer = subgraph.begin();
 513     Graph::Iterator lastLayer  = subgraph.end();
 514     return AssignBackends(optNetObjPtr,
 515                           backendSettings,
 516                           firstLayer,
 517                           lastLayer,
 518                           errMessages);
 519 }
 520
 521 BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry& handleFactoryRegistry,
 522                                     BackendSettings& backendSettings)
 523 {
 524     BackendsMap backends;
 525     auto const& backendRegistry = BackendRegistryInstance();
 526     for (auto&& selectedBackend : backendSettings.m_SupportedBackends)
 527     {
 528         auto backendFactory = backendRegistry.GetFactory(selectedBackend);
 529         auto backendObjPtr = backendFactory();
 530         ARMNN_ASSERT(backendObjPtr);
 531
 532         backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry);
 533
 534         backends[backendObjPtr->GetId()] = std::move(backendObjPtr);
 535     }
 536
 537     return backends;
 538 }
 539
 540 OptimizationResult ApplyBackendOptimizations(OptimizedNetwork* optNetObjPtr,
 541                                              BackendSettings& backendSettings,
 542                                              BackendsMap& backends,
 543                                              Optional<std::vector<std::string>&> errMessages)
 544 {
 545     ARMNN_ASSERT(optNetObjPtr);
 546
 547     OptimizationResult result;
 548
 549     // Get the optimized graph
 550     Graph& optGraph = optNetObjPtr->GetGraph();
 551
 552     // Run backend specific optimizations
 553     for (auto&& selectedBackend : backendSettings.m_SelectedBackends)
 554     {
 555         auto backendObjPtr = backends.find(selectedBackend)->second.get();
 556         ARMNN_ASSERT(backendObjPtr);
 557
 558         // Select sub-graphs based on backend
 559         SubgraphViewSelector::Subgraphs subgraphs =
 560                 SubgraphViewSelector::SelectSubgraphs(optGraph,
 561                                                       // Select layers assigned to the requested backend
 562                                                       [&backendObjPtr](const Layer& layer)
 563                                                       {
 564                                                           return layer.GetType() != LayerType::Input &&
 565                                                                  layer.GetType() != LayerType::Output &&
 566                                                                  layer.GetBackendId() == backendObjPtr->GetId();
 567                                                       });
 568         if (subgraphs.empty())
 569         {
 570             // No sub-graphs found, try with next selected backend
 571             continue;
 572         }
 573
 574         // Try to optimize each sub-graph
 575         for (auto& subgraph : subgraphs)
 576         {
 577             // Try to optimize the current sub-graph
 578             OptimizationViews optimizationViews = backendObjPtr->OptimizeSubgraphView(*subgraph);
 579             ARMNN_ASSERT(optimizationViews.Validate(*subgraph));
 580
 581             // Optimization attempted, check the resulting optimized sub-graph
 582             for (auto& substitution : optimizationViews.GetSubstitutions())
 583             {
 584                 // Sub-graph optimized, substitute the sub-graph with the new optimized one in the main optimized graph
 585                 SubgraphView& replacementSubgraph   = substitution.m_ReplacementSubgraph;
 586                 SubgraphView& substitutableSubgraph = substitution.m_SubstitutableSubgraph;
 587                 optGraph.SubstituteSubgraph(substitutableSubgraph, replacementSubgraph);
 588
 589                 // Assign the current backend to the optimized sub-graph
 590                 std::for_each(replacementSubgraph.begin(), replacementSubgraph.end(), [&selectedBackend](Layer* l)
 591                     {
 592                         ARMNN_ASSERT(l);
 593                         l->SetBackendId(selectedBackend);
 594                     });
 595             }
 596
 597             if (!optimizationViews.GetFailedSubgraphs().empty())
 598             {
 599                 std::stringstream warningMsg;
 600                 warningMsg << "Some sub-graph(s) failed to optimized on " << backendObjPtr->GetId() << " backend.";
 601                 ReportWarning(warningMsg.str(), errMessages);
 602
 603                 // Failed to optimize the given sub-graph, re-assign the sub-graph layers to other available backends
 604                 BackendSettings settingsCopy(backendSettings);
 605                 if (!backendObjPtr->GetId().IsCpuRef())
 606                 {
 607                     // Add the current backend to the list of backends to ignore
 608                     settingsCopy.m_IgnoredBackends.insert(backendObjPtr->GetId());
 609                 }
 610
 611                 int count=0;
 612                 for (auto& failedSubgraph : optimizationViews.GetFailedSubgraphs())
 613                 {
 614                     // An error occurred: the optimization was attempted but not performed, try different backends
 615                     std::stringstream subgraphMsg;
 616                     subgraphMsg << "Re-assigning backends to " << failedSubgraph.GetLayers().size()
 617                                 << " layers inside sub-graph " << count++;
 618                     ReportWarning(subgraphMsg.str(), errMessages);
 619
 620                     OptimizationResult reassignmentResult = AssignBackends(optNetObjPtr,
 621                                                                            settingsCopy,
 622                                                                            *subgraph,
 623                                                                            errMessages);
 624                     if (reassignmentResult.m_Error)
 625                     {
 626                         // Failed to re-assign one of the remaining backends to each layer of the sub-graph
 627                         result.m_Error = true;
 628                         return result;
 629                     }
 630                 }
 631             }
 632         }
 633     }
 634
 635     return result;
 636 }
 637
 638 bool RequiresCopy(ITensorHandleFactory::FactoryId src,
 639                   ITensorHandleFactory::FactoryId dst,
 640                   TensorHandleFactoryRegistry& registry)
 641 {
 642     if (src != dst)
 643     {
 644         ITensorHandleFactory* srcFactory = registry.GetFactory(src);
 645         ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
 646
 647         if (srcFactory && dstFactory &&
 648             (srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0)
 649         {
 650             return false;
 651         }
 652         return true;
 653     }
 654     return false;
 655 }
 656
 657 // Find the handle factory for the input layer which results in fewest required copies.
 658 ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backends,
 659                                                             OutputSlot& slot,
 660                                                             TensorHandleFactoryRegistry& registry)
 661 {
 662     Layer& layer = slot.GetOwningLayer();
 663     ARMNN_ASSERT(layer.GetType() == LayerType::Input);
 664
 665     // Explicitly select the tensorhandle factory for InputLayer because the rules for it are slightly different. It
 666     // doesn't matter which backend it is assigned to because they all use the same implementation, which
 667     // requires Map/Unmap support. This means that, so long as the handle type supports map/unmap semantics, we can
 668     // select a factory with maximum compatibility with the layers connected to the InputLayer.
 669
 670     // First ensure the from backends can support the TensorHandeAPI
 671     auto frmBackend = backends.find(layer.GetBackendId());
 672     if (frmBackend == backends.end() ||
 673         !frmBackend->second->SupportsTensorAllocatorAPI())
 674     {
 675         return ITensorHandleFactory::LegacyFactoryId;
 676     }
 677
 678     // Go through all connections to the output slot and determine the TensorHandleFactory which results in the
 679     // fewest copies.
 680     std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
 681     int topScore = 0;
 682     ITensorHandleFactory::FactoryId topChoice = ITensorHandleFactory::LegacyFactoryId;
 683
 684     for (auto&& connection : slot.GetConnections())
 685     {
 686         const Layer& connectedLayer = connection->GetOwningLayer();
 687
 688         auto toBackend = backends.find(connectedLayer.GetBackendId());
 689         ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
 690
 691         if (!toBackend->second.get()->SupportsTensorAllocatorAPI())
 692         {
 693             // The destination backend does not support the tensor allocator API, move to the next one
 694             continue;
 695         }
 696
 697         auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
 698         for (auto&& dst : dstPrefs)
 699         {
 700             // Input layers use the mem copy workload or import, so the selected factory must
 701             // support either the map/unmap API or Import API
 702             ITensorHandleFactory* factory = registry.GetFactory(dst);
 703             if (!factory->SupportsMapUnmap() &&
 704                 !CheckFlag(factory->GetImportFlags(), MemorySource::Malloc)) // Just support cpu mem imports for now
 705             {
 706                 // The current tensor handle factory does not support the map/unmap or import
 707                 // strategy, move to the next one
 708                 continue;
 709             }
 710
 711             auto it = factoryScores.find(dst);
 712             if (it == factoryScores.end())
 713             {
 714                 // Add new score to the table
 715                 factoryScores[dst] = 0;
 716                 if (topChoice == ITensorHandleFactory::LegacyFactoryId)
 717                 {
 718                     topChoice = dst;
 719                 }
 720             }
 721             else
 722             {
 723                 // Increase the score
 724                 factoryScores[dst]++;
 725
 726                 // Track the best option
 727                 if (factoryScores[dst] > topScore)
 728                 {
 729                     topScore = factoryScores[dst];
 730                     topChoice = dst;
 731                 }
 732             }
 733         }
 734     }
 735
 736     return topChoice;
 737 }
 738
 739 // Find the handle factory for the output layer which results in fewest required copies.
 740 ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput(BackendsMap& backends,
 741                                                             OutputSlot& slot,
 742                                                             TensorHandleFactoryRegistry& registry)
 743 {
 744     IgnoreUnused(backends, slot, registry);
 745     return ITensorHandleFactory::DeferredFactoryId;
 746 }
 747
 748 // For all handle factories supported on the source backend, we wish to find the one which requires the fewest copies
 749 // when considering all connections.
 750 ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends,
 751                                                     OutputSlot& outputSlot,
 752                                                     TensorHandleFactoryRegistry& registry)
 753 {
 754     // First ensure the from backends can support the TensorHandeAPI
 755     Layer& layer = outputSlot.GetOwningLayer();
 756     auto frmBackend = backends.find(layer.GetBackendId());
 757     if (frmBackend == backends.end() ||
 758         !frmBackend->second->SupportsTensorAllocatorAPI())
 759     {
 760         return ITensorHandleFactory::LegacyFactoryId;
 761     }
 762
 763     // Connections to Output Layers requires support for map/unmap on the TensorHandle.
 764     bool requiresMapUnmap = false;
 765     for (auto&& connection : outputSlot.GetConnections())
 766     {
 767         const Layer& connectedLayer = connection->GetOwningLayer();
 768         if (connectedLayer.GetType() == LayerType::Output)
 769         {
 770             requiresMapUnmap = true;
 771         }
 772     }
 773
 774     IBackendInternal* srcBackend = frmBackend->second.get();
 775     auto srcPrefs = srcBackend->GetHandleFactoryPreferences();
 776
 777     // Initialize the scores
 778     std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
 779     for (auto&& pref : srcPrefs)
 780     {
 781         if (requiresMapUnmap) // Only consider factories that support map/unmap if required
 782         {
 783             ITensorHandleFactory* factory = registry.GetFactory(pref);
 784             if (!factory->SupportsMapUnmap())
 785             {
 786                 // The current tensor handle factory does not support the map/unmap strategy, move to the next one
 787                 continue;
 788             }
 789         }
 790
 791         auto it = factoryScores.find(pref);
 792         if (it == factoryScores.end())
 793         {
 794             // Add new score to the table
 795             factoryScores[pref] = 0;
 796         }
 797     }
 798
 799     // Score each handle factory based on how many times it requires copies on the slot connections
 800     for (auto&& connection : outputSlot.GetConnections())
 801     {
 802         const Layer& connectedLayer = connection->GetOwningLayer();
 803
 804         auto toBackend = backends.find(connectedLayer.GetBackendId());
 805         ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
 806
 807         auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
 808         for (auto&& src : srcPrefs)
 809         {
 810             if (factoryScores.find(src) == factoryScores.end()) // Don't consider excluded factories
 811             {
 812                 continue;
 813             }
 814
 815             for (auto&& dst : dstPrefs)
 816             {
 817                 if (RequiresCopy(src, dst, registry))
 818                 {
 819                     // Copy avoided, increase the score
 820                     factoryScores[src]++;
 821                     break;
 822                 }
 823             }
 824         }
 825     }
 826
 827     // Find the lowest score
 828     int minScore = std::numeric_limits<int>::max();
 829     for (auto it : factoryScores)
 830     {
 831         minScore = std::min(minScore, it.second);
 832     }
 833
 834     // Collect factories matching the best(lowest) score
 835     std::vector<ITensorHandleFactory::FactoryId> optimalFactories;
 836     for (auto it : factoryScores)
 837     {
 838         if (it.second == minScore)
 839         {
 840             optimalFactories.push_back(it.first);
 841         }
 842     }
 843
 844     // For all compatible Factories matching the best score, find the preferred one for the current layer.
 845     for (auto&& srcPref : srcPrefs)
 846     {
 847         for (auto&& comp : optimalFactories)
 848         {
 849             if (comp == srcPref)
 850             {
 851                 return comp;
 852             }
 853         }
 854     }
 855
 856     return ITensorHandleFactory::LegacyFactoryId;
 857 }
 858
 859 EdgeStrategy CalculateEdgeStrategy(BackendsMap& backends,
 860                                    ITensorHandleFactory::FactoryId srcFactoryId,
 861                                    const Layer& layer,
 862                                    const Layer& connectedLayer,
 863                                    TensorHandleFactoryRegistry& registry,
 864                                    bool importEnabled)
 865 {
 866     auto toBackend = backends.find(connectedLayer.GetBackendId());
 867     ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
 868
 869     auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
 870
 871     // Legacy API check for backward compatibility
 872     if (srcFactoryId == ITensorHandleFactory::LegacyFactoryId || dstPrefs.empty())
 873     {
 874         if (layer.GetBackendId() != connectedLayer.GetBackendId())
 875         {
 876             return EdgeStrategy::CopyToTarget;
 877         }
 878         else
 879         {
 880             return EdgeStrategy::DirectCompatibility;
 881         }
 882     }
 883
 884     // TensorHandleFactory API present, so perform more sophisticated strategies.
 885     // Dst Output layers don't require copy because they use import or map/unmap
 886     if (connectedLayer.GetType() == LayerType::Output)
 887     {
 888         return EdgeStrategy::DirectCompatibility;
 889     }
 890
 891     // Search for direct match in prefs
 892     for (auto&& pref : dstPrefs)
 893     {
 894         if (pref == srcFactoryId)
 895         {
 896             return EdgeStrategy::DirectCompatibility;
 897         }
 898     }
 899
 900     // Search for export/import options
 901     ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
 902     if (srcFactory->GetExportFlags() != 0 && importEnabled)
 903     {
 904         for (auto&& pref : dstPrefs)
 905         {
 906             ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
 907
 908             // Handles cases when a destPref is not listed in TensorHandleFactoryRegistry
 909             if (!dstFactory) {
 910                 continue;
 911             }
 912
 913             if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
 914             {
 915                 auto srcCapability = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::PaddingRequired);
 916                 auto dstCapability = dstFactory->GetCapabilities(&connectedLayer,
 917                                                                  &connectedLayer,
 918                                                                  CapabilityClass::PaddingRequired);
 919                 // Do not require memory copy if the source and destination do not require padding.
 920                 if (srcCapability.empty() && dstCapability.empty())
 921                 {
 922                     return EdgeStrategy::ExportToTarget;
 923                 }
 924             }
 925         }
 926     }
 927
 928     // Search for copy options via map/unmap
 929     if (srcFactory->SupportsMapUnmap())
 930     {
 931         for (auto&& pref : dstPrefs)
 932         {
 933             ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
 934             if (dstFactory && dstFactory->SupportsMapUnmap())
 935             {
 936                 return EdgeStrategy::CopyToTarget;
 937             }
 938         }
 939     }
 940
 941     return EdgeStrategy::Undefined;
 942 }
 943
 944 // Select the TensorHandleFactories and the corresponding memory strategy
 945 OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
 946                                               BackendsMap& backends,
 947                                               TensorHandleFactoryRegistry& registry,
 948                                               bool importEnabled,
 949                                               Optional<std::vector<std::string>&> errMessages)
 950 {
 951     OptimizationResult result;
 952
 953     optGraph.ForEachLayer([&backends, &registry, &result, &errMessages, importEnabled](Layer* layer)
 954     {
 955         ARMNN_ASSERT(layer);
 956
 957         // Lets make sure the backend is in our list of supported backends. Something went wrong during backend
 958         // assignment if this check fails
 959         ARMNN_ASSERT(backends.find(layer->GetBackendId()) != backends.end());
 960
 961         // Check each output separately
 962         for (unsigned int slotIdx = 0; slotIdx < layer->GetNumOutputSlots(); slotIdx++)
 963         {
 964             OutputSlot& outputSlot = layer->GetOutputSlot(slotIdx);
 965
 966             ITensorHandleFactory::FactoryId slotOption = ITensorHandleFactory::LegacyFactoryId;
 967
 968             // Calculate the factory to use which results in the fewest copies being made.
 969             switch(layer->GetType())
 970             {
 971                 case LayerType::Input:
 972                     slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry);
 973                     break;
 974                 case LayerType::Output:
 975                     slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
 976                     break;
 977                 default:
 978                     slotOption = CalculateSlotOption(backends, outputSlot, registry);
 979                     break;
 980             }
 981             outputSlot.SetTensorHandleFactory(slotOption);
 982
 983             // Now determine the "best" edge strategy for each connection given the slotOption.
 984             unsigned int connectionIdx = 0;
 985             for (auto&& connection : outputSlot.GetConnections())
 986             {
 987                 const Layer& connectedLayer = connection->GetOwningLayer();
 988
 989                 EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer,
 990                                                               registry, importEnabled);
 991
 992                 if (strategy == EdgeStrategy::Undefined)
 993                 {
 994                     result.m_Error = true;
 995                     if (errMessages)
 996                     {
 997                         errMessages.value().emplace_back("Could not find valid strategy required for compatibility"
 998                                                          " between backends.");
 999                     }
1000                     return;
1001                 }
1002
1003                 outputSlot.SetEdgeStrategy(connectionIdx, strategy);
1004
1005                 connectionIdx++;
1006             }
1007         }
1008     });
1009
1010     return result;
1011 }
1012
1013 IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
1014                               const std::vector<BackendId>& backendPreferences,
1015                               const IDeviceSpec& deviceSpec,
1016                               const OptimizerOptions& options,
1017                               Optional<std::vector<std::string>&> messages)
1018 {
1019     if (backendPreferences.empty())
1020     {
1021         throw InvalidArgumentException("Invoked Optimize with no backends specified");
1022     }
1023
1024     if (options.m_ReduceFp32ToFp16 && options.m_ReduceFp32ToBf16)
1025     {
1026         throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
1027     }
1028
1029     const Network& network = *PolymorphicDowncast<const Network*>(&inNetwork);
1030     std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph());
1031
1032     auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph), options.m_ModelOptions),
1033                                        &IOptimizedNetwork::Destroy);
1034
1035     OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
1036
1037     // Get the optimized graph
1038     Graph& optGraph = optNetObjPtr->GetGraph();
1039
1040     // Perform AddBroadcastReshapeLayer optimisation
1041     using namespace optimizations;
1042     Optimizer::Pass(optGraph, MakeOptimizations(AddBroadcastReshapeLayer()));
1043
1044     // Infer the tensor infos for all output slots. Throws an exception on failure
1045     optGraph.InferTensorInfos();
1046
1047     // Perform optimisation passes
1048     Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(),
1049                                                 SquashEqualTransposeSiblings(),
1050                                                 SquashEqualReshapeSiblings(),
1051                                                 OptimizeInversePermutes(),
1052                                                 OptimizeInverseTransposes(),
1053                                                 MovePermuteUp(),
1054                                                 MoveTransposeUp(),
1055                                                 PermuteAsReshape(),
1056                                                 TransposeAsReshape(),
1057                                                 OptimizeConsecutiveReshapes(),
1058                                                 FoldPadIntoConvolution2d(),
1059                                                 PermuteAndBatchToSpaceAsDepthToSpace(),
1060                                                 TransposeAndBatchToSpaceAsDepthToSpace()));
1061
1062     // If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
1063     if (options.m_ReduceFp32ToFp16)
1064     {
1065         Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
1066         Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
1067     }
1068
1069     // If Fp32 to Bf16 optimization is set convert Fp32 network to Bf16
1070     // Convert input of Convolution2d and FullyConnected from Fp32 to Bf16
1071     // Only Constant weight of Convolution2d and FullyConnected are converted from Fp32 to Bf16
1072     if (options.m_ReduceFp32ToBf16)
1073     {
1074         Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToBf16Converter()));
1075     }
1076
1077     // Initialize backend settings
1078     BackendSettings backendSettings(backendPreferences, deviceSpec);
1079     if (backendSettings.GetAvailablePreferredBackends().empty())
1080     {
1081         std::stringstream failureMsg;
1082         failureMsg << "None of the preferred backends " << backendPreferences
1083                    << " are supported. Current platform provides " << backendSettings.m_SupportedBackends;
1084         ReportError(failureMsg.str(), messages);
1085         throw InvalidArgumentException(failureMsg.str());
1086     }
1087
1088     // Create a map to temporarily hold initialized backend objects
1089     TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
1090     BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
1091
1092     // Assign an available backend to each layer
1093     Graph::Iterator firstLayer = optGraph.begin();
1094     Graph::Iterator lastLayer  = optGraph.end();
1095     OptimizationResult assignBackendsResult = AssignBackends(optNetObjPtr,
1096                                                              backendSettings,
1097                                                              firstLayer,
1098                                                              lastLayer,
1099                                                              messages);
1100     if (assignBackendsResult.m_Error)
1101     {
1102         // Failed to assign a backend to each layer
1103         throw InvalidArgumentException("Failed to assign a backend to each layer");
1104     }
1105
1106     Optimizer::Pass(optGraph, MakeOptimizations(OptimizeInverseConversionsFp16(),
1107                                                 OptimizeInverseConversionsFp32()));
1108
1109     // Apply the backend-specific optimizations
1110     OptimizationResult backendOptimizationResult = ApplyBackendOptimizations(optNetObjPtr,
1111                                                                              backendSettings,
1112                                                                              backends,
1113                                                                              messages);
1114     if (backendOptimizationResult.m_Error)
1115     {
1116         // Failed to apply the backend-specific optimizations
1117         throw InvalidArgumentException("Failed to apply the backend-specific optimizations");
1118     }
1119
1120     // If the debug flag is set, then insert a DebugLayer after each layer
1121     // Doing this after applying the backend optimizations as they might have changed some layers
1122     if (options.m_Debug)
1123     {
1124         Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer()));
1125     }
1126
1127     // Calculate the compatibility strategies for tensor handles
1128     OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph,
1129                                                                    backends,
1130                                                                    tensorHandleFactoryRegistry,
1131                                                                    options.m_ImportEnabled,
1132                                                                    messages);
1133     if (strategyResult.m_Error)
1134     {
1135         // Failed to apply the backend-specific optimizations
1136         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
1137     }
1138
1139     // Based on the tensor handle strategy determined above, insert copy layers where required.
1140     optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
1141
1142     // Convert constants
1143     Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
1144     Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat()));
1145
1146     // Run backend specific optimizations (deprecated)
1147     for (auto&& chosenBackend : backendSettings.m_SelectedBackends)
1148     {
1149         auto factoryFun = BackendRegistryInstance().GetFactory(chosenBackend);
1150         auto backendPtr = factoryFun();
1151         ARMNN_ASSERT(backendPtr.get() != nullptr);
1152
1153         ARMNN_NO_DEPRECATE_WARN_BEGIN
1154         auto backendSpecificOptimizations = backendPtr->GetOptimizations();
1155         ARMNN_NO_DEPRECATE_WARN_END
1156
1157         if (!backendSpecificOptimizations.empty())
1158         {
1159             Optimizer::Pass(optNetObjPtr->GetGraph(), backendSpecificOptimizations);
1160         }
1161     }
1162
1163     return optNet;
1164 }
1165 bool Network::GetShapeInferenceMethod()
1166 {
1167     if (m_NetworkOptions.size() > 0 && m_NetworkOptions[0].GetBackendId().Get() == "ShapeInferenceMethod")
1168     {
1169         return m_NetworkOptions[0].GetOption(0).GetValue().AsBool();
1170     }
1171
1172     return false;
1173 }
1174 Network::Network(NetworkOptions networkOptions)
1175 : m_NetworkOptions(networkOptions),
1176   m_Graph(std::make_unique<Graph>(GetShapeInferenceMethod()))
1177 {}
1178
1179 Network::~Network()
1180 {
1181 }
1182
1183 Status Network::PrintGraph()
1184 {
1185     m_Graph->Print();
1186     return Status::Success;
1187 }
1188
1189 IConnectableLayer* Network::AddInputLayer(LayerBindingId id, const char* name)
1190 {
1191     return m_Graph->AddLayer<InputLayer>(id, name);
1192 }
1193
1194 IConnectableLayer* Network::AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
1195                                             const char* name)
1196 {
1197     return m_Graph->AddLayer<BatchToSpaceNdLayer>(batchToSpaceNdDescriptor, name);
1198 }
1199
1200 IConnectableLayer* Network::AddComparisonLayer(const ComparisonDescriptor& comparisonDescriptor,
1201                                                const char* name)
1202 {
1203     return m_Graph->AddLayer<ComparisonLayer>(comparisonDescriptor, name);
1204 }
1205
1206 IConnectableLayer* Network::AddElementwiseUnaryLayer(const ElementwiseUnaryDescriptor& elementwiseUnaryDescriptor,
1207                                                      const char* name)
1208 {
1209     return m_Graph->AddLayer<ElementwiseUnaryLayer>(elementwiseUnaryDescriptor, name);
1210 }
1211
1212 IConnectableLayer* Network::AddFillLayer(const FillDescriptor& fillDescriptor,
1213                                          const char* name)
1214 {
1215     return m_Graph->AddLayer<FillLayer>(fillDescriptor, name);
1216 }
1217
1218 IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1219                                                        const ConstTensor& weights,
1220                                                        const Optional<ConstTensor>& biases,
1221                                                        const char* name)
1222 {
1223     if (fullyConnectedDescriptor.m_BiasEnabled && !biases.has_value())
1224     {
1225         throw InvalidArgumentException("AddFullyConnectedLayer: biases cannot be empty");
1226     }
1227
1228     const auto layer = m_Graph->AddLayer<FullyConnectedLayer>(fullyConnectedDescriptor, name);
1229
1230     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1231
1232     if (fullyConnectedDescriptor.m_BiasEnabled)
1233     {
1234         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1235     }
1236
1237     return layer;
1238 }
1239
1240 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1241                                                    const ConstTensor& weights,
1242                                                    const Optional<ConstTensor>& biases,
1243                                                    const char* name)
1244 {
1245     return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, biases, name);
1246 }
1247
1248 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1249                                                    const ConstTensor& weights,
1250                                                    const char* name)
1251 {
1252     Optional<ConstTensor> biases;
1253     return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, biases, name);
1254 }
1255
1256 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1257                                                    const ConstTensor& weights,
1258                                                    const ConstTensor& biases,
1259                                                    const char* name)
1260 {
1261     Optional<ConstTensor> optionalBiases(biases);
1262     return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, optionalBiases, name);
1263 }
1264
1265 IConnectableLayer* Network::AddConcatLayer(const ConcatDescriptor& concatDescriptor,
1266                                            const char* name)
1267 {
1268     return m_Graph->AddLayer<ConcatLayer>(concatDescriptor, name);
1269 }
1270
1271 IConnectableLayer* Network::AddConvolution2dLayerImpl(const Convolution2dDescriptor& convolution2dDescriptor,
1272                                                       const ConstTensor& weights,
1273                                                       const Optional<ConstTensor>& biases,
1274                                                       const char* name)
1275 {
1276     if (convolution2dDescriptor.m_BiasEnabled && !biases.has_value())
1277     {
1278         throw InvalidArgumentException("AddConvolution2dLayer: biases cannot be empty");
1279     }
1280
1281     const auto layer = m_Graph->AddLayer<Convolution2dLayer>(convolution2dDescriptor, name);
1282
1283     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1284
1285     if (convolution2dDescriptor.m_BiasEnabled)
1286     {
1287         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1288     }
1289
1290     return layer;
1291 }
1292
1293 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1294                                                   const ConstTensor& weights,
1295                                                   const Optional<ConstTensor>& biases,
1296                                                   const char* name)
1297 {
1298     return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1299 }
1300
1301 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1302                                                   const ConstTensor& weights,
1303                                                   const char* name)
1304 {
1305     Optional<ConstTensor> biases;
1306     return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1307 }
1308
1309 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1310                                                   const ConstTensor& weights,
1311                                                   const ConstTensor& biases,
1312                                                   const char* name)
1313 {
1314     Optional<ConstTensor> optionalBiases(biases);
1315     return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, optionalBiases, name);
1316 }
1317
1318 IConnectableLayer* Network::AddDepthwiseConvolution2dLayerImpl(
1319     const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1320     const ConstTensor& weights,
1321     const Optional<ConstTensor>& biases,
1322     const char* name)
1323 {
1324     if (convolution2dDescriptor.m_BiasEnabled && !biases.has_value())
1325     {
1326         throw InvalidArgumentException("AddDepthwiseConvolution2dLayer: biases cannot be empty");
1327     }
1328
1329     const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name);
1330
1331     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1332
1333     if (convolution2dDescriptor.m_BiasEnabled)
1334     {
1335         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1336     }
1337
1338     return layer;
1339 }
1340
1341 IConnectableLayer* Network::AddDepthToSpaceLayer(const DepthToSpaceDescriptor& depthToSpaceDescriptor,
1342                                                  const char* name)
1343 {
1344     return m_Graph->AddLayer<DepthToSpaceLayer>(depthToSpaceDescriptor, name);
1345 }
1346
1347 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1348         const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1349         const ConstTensor& weights,
1350         const Optional<ConstTensor>& biases,
1351         const char* name)
1352 {
1353     return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1354 }
1355
1356 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1357     const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1358     const ConstTensor& weights,
1359     const char* name)
1360 {
1361     Optional<ConstTensor> biases;
1362     return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1363 }
1364
1365 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1366     const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1367     const ConstTensor& weights,
1368     const ConstTensor& biases,
1369     const char* name)
1370 {
1371     Optional<ConstTensor> optionalBiases(biases);
1372     return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, optionalBiases, name);
1373 }
1374
1375 IConnectableLayer* Network::AddDetectionPostProcessLayer(const armnn::DetectionPostProcessDescriptor& descriptor,
1376                                                          const ConstTensor& anchors, const char* name)
1377 {
1378     const auto layer = m_Graph->AddLayer<DetectionPostProcessLayer>(descriptor, name);
1379
1380     layer->m_Anchors = std::make_unique<ScopedCpuTensorHandle>(anchors);
1381
1382     return layer;
1383 }
1384
1385 IConnectableLayer* Network::AddPermuteLayer(const PermuteDescriptor& permuteDescriptor,
1386                                             const char* name)
1387 {
1388     return m_Graph->AddLayer<PermuteLayer>(permuteDescriptor, name);
1389 }
1390
1391 IConnectableLayer* Network::AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor,
1392     const char* name)
1393 {
1394     return m_Graph->AddLayer<Pooling2dLayer>(pooling2dDescriptor, name);
1395 }
1396
1397 IConnectableLayer* Network::AddActivationLayer(const ActivationDescriptor& activationDescriptor,
1398     const char* name)
1399 {
1400     return m_Graph->AddLayer<ActivationLayer>(activationDescriptor, name);
1401 }
1402
1403 IConnectableLayer* Network::AddArgMinMaxLayer(const ArgMinMaxDescriptor& argMinMaxDescriptor,
1404                                               const char* name)
1405 {
1406     return m_Graph->AddLayer<ArgMinMaxLayer>(argMinMaxDescriptor, name);
1407 }
1408
1409 IConnectableLayer* Network::AddNormalizationLayer(const NormalizationDescriptor&
1410 normalizationDescriptor,
1411     const char* name)
1412 {
1413     return m_Graph->AddLayer<NormalizationLayer>(normalizationDescriptor, name);
1414 }
1415
1416 IConnectableLayer* Network::AddSliceLayer(const SliceDescriptor& sliceDescriptor, const char* name)
1417 {
1418     return m_Graph->AddLayer<SliceLayer>(sliceDescriptor, name);
1419 }
1420
1421 IConnectableLayer* Network::AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor,
1422     const char* name)
1423 {
1424     return m_Graph->AddLayer<SoftmaxLayer>(softmaxDescriptor, name);
1425 }
1426
1427 IConnectableLayer* Network::AddSplitterLayer(const ViewsDescriptor& splitterDescriptor,
1428     const char* name)
1429 {
1430     return m_Graph->AddLayer<SplitterLayer>(splitterDescriptor, name);
1431 }
1432
1433 IConnectableLayer* Network::AddMaximumLayer(const char* name)
1434 {
1435     return m_Graph->AddLayer<MaximumLayer>(name);
1436 }
1437
1438 IConnectableLayer* Network::AddMinimumLayer(const char* name)
1439 {
1440     return m_Graph->AddLayer<MinimumLayer>(name);
1441 }
1442
1443 IConnectableLayer* Network::AddMergerLayer(const MergerDescriptor& mergerDescriptor,
1444                                            const char* name)
1445 {
1446     return AddConcatLayer(mergerDescriptor, name);
1447 }
1448
1449 IConnectableLayer* Network::AddAbsLayer(const char * name)
1450 {
1451     return AddElementwiseUnaryLayer(ElementwiseUnaryDescriptor(UnaryOperation::Abs), name);
1452 }
1453
1454 IConnectableLayer* Network::AddAdditionLayer(const char* name)
1455 {
1456     return m_Graph->AddLayer<AdditionLayer>(name);
1457 }
1458
1459 IConnectableLayer* Network::AddMultiplicationLayer(const char* name)
1460 {
1461     return m_Graph->AddLayer<MultiplicationLayer>(name);
1462 }
1463
1464 IConnectableLayer* Network::AddOutputLayer(LayerBindingId id, const char* name)
1465 {
1466     return m_Graph->AddLayer<OutputLayer>(id, name);
1467 }
1468
1469 IConnectableLayer* Network::AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc,
1470                                                        const ConstTensor&                  mean,
1471                                                        const ConstTensor&                  variance,
1472                                                        const ConstTensor&                  beta,
1473                                                        const ConstTensor&                  gamma,
1474                                                        const char*                         name)
1475 {
1476     const auto layer = m_Graph->AddLayer<BatchNormalizationLayer>(desc, name);
1477
1478     layer->m_Mean = std::make_unique<ScopedCpuTensorHandle>(mean);
1479     layer->m_Variance = std::make_unique<ScopedCpuTensorHandle>(variance);
1480     layer->m_Beta = std::make_unique<ScopedCpuTensorHandle>(beta);
1481     layer->m_Gamma = std::make_unique<ScopedCpuTensorHandle>(gamma);
1482
1483     return layer;
1484 }
1485
1486 IConnectableLayer* Network::AddRankLayer(const char* name)
1487 {
1488     return m_Graph->AddLayer<RankLayer>(name);
1489 }
1490
1491 IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor& descriptor,
1492                                                    const char* name)
1493 {
1494     ResizeDescriptor resizeDescriptor;
1495     resizeDescriptor.m_Method           = ResizeMethod::Bilinear;
1496     resizeDescriptor.m_DataLayout       = descriptor.m_DataLayout;
1497     resizeDescriptor.m_TargetWidth      = descriptor.m_TargetWidth;
1498     resizeDescriptor.m_TargetHeight     = descriptor.m_TargetHeight;
1499     resizeDescriptor.m_AlignCorners     = descriptor.m_AlignCorners;
1500     resizeDescriptor.m_HalfPixelCenters = descriptor.m_HalfPixelCenters;
1501
1502     return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
1503 }
1504
1505 IConnectableLayer* Network::AddResizeLayer(const ResizeDescriptor&
1506 resizeDescriptor, const char* name)
1507 {
1508     return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
1509 }
1510
1511 IConnectableLayer* Network::AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor& desc,
1512                                                           const char* name)
1513 {
1514     return m_Graph->AddLayer<InstanceNormalizationLayer>(desc, name);
1515 }
1516
1517 IConnectableLayer* Network::AddL2NormalizationLayer(const L2NormalizationDescriptor& desc,
1518                                                     const char* name)
1519 {
1520     return m_Graph->AddLayer<L2NormalizationLayer>(desc, name);
1521 }
1522
1523 IConnectableLayer* Network::AddLogSoftmaxLayer(const LogSoftmaxDescriptor& desc,
1524                                                const char* name)
1525 {
1526     return m_Graph->AddLayer<LogSoftmaxLayer>(desc, name);
1527 }
1528
1529 IConnectableLayer* Network::AddConstantLayer(const ConstTensor& input, const char* name)
1530 {
1531     auto layer = m_Graph->AddLayer<ConstantLayer>(name);
1532
1533     layer->m_LayerOutput = std::make_unique<ScopedCpuTensorHandle>(input);
1534
1535     return layer;
1536 }
1537
1538 IConnectableLayer* Network::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor,
1539                                             const char* name)
1540 {
1541     return m_Graph->AddLayer<ReshapeLayer>(reshapeDescriptor, name);
1542 }
1543
1544 IConnectableLayer* Network::AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
1545                                                    const char* name)
1546 {
1547     return m_Graph->AddLayer<SpaceToBatchNdLayer>(spaceToBatchNdDescriptor, name);
1548 }
1549
1550 IConnectableLayer* Network::AddSpaceToDepthLayer(const SpaceToDepthDescriptor& spaceToDepthDescriptor,
1551                                                  const char* name)
1552 {
1553     return m_Graph->AddLayer<SpaceToDepthLayer>(spaceToDepthDescriptor, name);
1554 }
1555
1556 IConnectableLayer* Network::AddFloorLayer(const char* name)
1557 {
1558     return m_Graph->AddLayer<FloorLayer>(name);
1559 }
1560
1561 IConnectableLayer* Network::AddLstmLayer(const LstmDescriptor&  descriptor,
1562                                          const LstmInputParams& params,
1563                                          const char* name)
1564 {
1565     const auto layer = m_Graph->AddLayer<LstmLayer>(descriptor, name);
1566
1567     //Lstm Basic Parameters
1568     layer->m_BasicParameters.m_InputToForgetWeights =
1569         std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
1570     layer->m_BasicParameters.m_InputToCellWeights =
1571         std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
1572     layer->m_BasicParameters.m_InputToOutputWeights =
1573         std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
1574     layer->m_BasicParameters.m_RecurrentToForgetWeights =
1575         std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
1576     layer->m_BasicParameters.m_RecurrentToCellWeights =
1577         std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
1578     layer->m_BasicParameters.m_RecurrentToOutputWeights =
1579         std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
1580     layer->m_BasicParameters.m_ForgetGateBias =
1581             std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
1582     layer->m_BasicParameters.m_CellBias =
1583             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
1584     layer->m_BasicParameters.m_OutputGateBias =
1585             std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
1586
1587     //Lstm Cifg parameters
1588     if(!descriptor.m_CifgEnabled)
1589     {
1590         if(params.m_InputToInputWeights == nullptr)
1591         {
1592             throw InvalidArgumentException("AddLstmLayer: Input To Input Weights cannot be NULL "
1593                                            "when CIFG is disabled.");
1594         }
1595         if(params.m_RecurrentToInputWeights == nullptr)
1596         {
1597             throw InvalidArgumentException(
1598                     "AddLstmLayer: Recurrent To Input Weights cannot be NULL "
1599                     "when CIFG is disabled.");
1600         }
1601         if(params.m_InputGateBias == nullptr)
1602         {
1603             throw InvalidArgumentException("AddLstmLayer: Input Gate Bias cannot be NULL "
1604                                            "when CIFG is disabled.");
1605         }
1606         layer->m_CifgParameters.m_InputToInputWeights =
1607             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
1608         layer->m_CifgParameters.m_RecurrentToInputWeights =
1609             std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
1610         layer->m_CifgParameters.m_InputGateBias =
1611             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
1612     }
1613
1614     //Lstm projection parameters
1615     if(descriptor.m_ProjectionEnabled)
1616     {
1617         if(params.m_ProjectionWeights == nullptr)
1618         {
1619             throw InvalidArgumentException("AddLstmLayer: Projection Weights cannot be NULL "
1620                                            "when projection is enabled.");
1621         }
1622         layer->m_ProjectionParameters.m_ProjectionWeights =
1623             std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
1624         if(params.m_ProjectionBias != nullptr)
1625         {
1626             layer->m_ProjectionParameters.m_ProjectionBias =
1627                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
1628         }
1629     }
1630
1631     //Lstm Peephole params
1632     if(descriptor.m_PeepholeEnabled)
1633     {
1634         if(!descriptor.m_CifgEnabled)
1635         {
1636             if(params.m_CellToInputWeights == nullptr)
1637             {
1638                 throw InvalidArgumentException("AddLstmLayer: Cell To Input Weights cannot be NULL "
1639                                                "when Peephole is enabled and CIFG disabled.");
1640             }
1641
1642             layer->m_PeepholeParameters.m_CellToInputWeights =
1643                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
1644         }
1645
1646         if(params.m_CellToForgetWeights == nullptr)
1647         {
1648             throw InvalidArgumentException("AddLstmLayer: Cell To Forget Weights cannot be NULL "
1649                                            "when Peephole is enabled.");
1650         }
1651         if(params.m_CellToOutputWeights == nullptr)
1652         {
1653             throw InvalidArgumentException("AddLstmLayer: Cell To Output Weights cannot be NULL "
1654                                            "when Peephole is enabled.");
1655         }
1656
1657         layer->m_PeepholeParameters.m_CellToForgetWeights =
1658             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
1659         layer->m_PeepholeParameters.m_CellToOutputWeights =
1660             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
1661     }
1662
1663     //Lstm Layer Normalization params
1664     if(descriptor.m_LayerNormEnabled)
1665     {
1666         if(!descriptor.m_CifgEnabled)
1667         {
1668             if(params.m_InputLayerNormWeights == nullptr)
1669             {
1670                 throw InvalidArgumentException("AddLstmLayer: Input layer normalization weights cannot be NULL "
1671                                                "when layer normalization is enabled and CIFG disabled.");
1672             }
1673             layer->m_LayerNormParameters.m_InputLayerNormWeights =
1674                     std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputLayerNormWeights));
1675         }
1676
1677         if(params.m_ForgetLayerNormWeights == nullptr)
1678         {
1679             throw InvalidArgumentException("AddLstmLayer: Forget layer normalization weights cannot be NULL "
1680                                            "when layer normalization is enabled.");
1681         }
1682         if(params.m_CellLayerNormWeights == nullptr)
1683         {
1684             throw InvalidArgumentException("AddLstmLayer: Cell layer normalization weights cannot be NULL "
1685                                            "when layer normalization is enabled.");
1686         }
1687         if(params.m_OutputLayerNormWeights == nullptr)
1688         {
1689             throw InvalidArgumentException("AddLstmLayer: Output layer normalization weights cannot be NULL "
1690                                            "when layer normalization is enabled.");
1691         }
1692         layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
1693                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetLayerNormWeights));
1694         layer->m_LayerNormParameters.m_CellLayerNormWeights =
1695                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellLayerNormWeights));
1696         layer->m_LayerNormParameters.m_OutputLayerNormWeights =
1697                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputLayerNormWeights));
1698     }
1699     return layer;
1700 }
1701
1702 IConnectableLayer* Network::AddDivisionLayer(const char* name)
1703 {
1704     return m_Graph->AddLayer<DivisionLayer>(name);
1705 }
1706
1707 IConnectableLayer* Network::AddSubtractionLayer(const char* name)
1708 {
1709     return m_Graph->AddLayer<SubtractionLayer>(name);
1710 }
1711
1712 IConnectableLayer* Network::AddMeanLayer(const MeanDescriptor& meanDescriptor, const char* name)
1713 {
1714     return m_Graph->AddLayer<MeanLayer>(meanDescriptor,name);
1715 }
1716
1717 IConnectableLayer* Network::AddPadLayer(const PadDescriptor& padDescriptor, const char* name)
1718 {
1719     return m_Graph->AddLayer<PadLayer>(padDescriptor,name);
1720 }
1721
1722 IConnectableLayer *Network::AddQuantizeLayer(const char *name)
1723 {
1724     return m_Graph->AddLayer<QuantizeLayer>(name);
1725 }
1726
1727 IConnectableLayer* Network::AddDequantizeLayer(const char* name)
1728 {
1729     return m_Graph->AddLayer<DequantizeLayer>(name);
1730 }
1731
1732 IConnectableLayer* Network::AddStridedSliceLayer(const StridedSliceDescriptor& stridedSliceDescriptor,
1733                                                  const char* name)
1734 {
1735     return m_Graph->AddLayer<StridedSliceLayer>(stridedSliceDescriptor, name);
1736 }
1737
1738 IConnectableLayer* Network::AddGreaterLayer(const char* name)
1739 {
1740     return AddComparisonLayer(ComparisonDescriptor(ComparisonOperation::Greater), name);
1741 }
1742
1743 IConnectableLayer* Network::AddEqualLayer(const char* name)
1744 {
1745     return AddComparisonLayer(ComparisonDescriptor(ComparisonOperation::Equal), name);
1746 }
1747
1748 IConnectableLayer* Network::AddRsqrtLayer(const char * name)
1749 {
1750     return AddElementwiseUnaryLayer(ElementwiseUnaryDescriptor(UnaryOperation::Rsqrt), name);
1751 }
1752
1753 IConnectableLayer* Network::AddGatherLayer(const char* name)
1754 {
1755     GatherDescriptor gatherDescriptor{};
1756     return AddGatherLayer(gatherDescriptor, name);
1757 }
1758
1759 IConnectableLayer* Network::AddGatherLayer(const GatherDescriptor& gatherDescriptor,
1760                                            const char* name)
1761 {
1762     return m_Graph->AddLayer<GatherLayer>(gatherDescriptor, name);
1763 }
1764
1765 IConnectableLayer* Network::AddMergeLayer(const char* name)
1766 {
1767     return m_Graph->AddLayer<MergeLayer>(name);
1768 }
1769
1770 IConnectableLayer* Network::AddSwitchLayer(const char* name)
1771 {
1772     return m_Graph->AddLayer<SwitchLayer>(name);
1773 }
1774
1775 IConnectableLayer* Network::AddPreluLayer(const char* name)
1776 {
1777     return m_Graph->AddLayer<PreluLayer>(name);
1778 }
1779
1780 IConnectableLayer* Network::AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor& descriptor,
1781                                                            const ConstTensor& weights,
1782                                                            const Optional<ConstTensor>& biases,
1783                                                            const char* name)
1784 {
1785     if (descriptor.m_BiasEnabled && !biases.has_value())
1786     {
1787         throw InvalidArgumentException("AddTransposeConvolution2dLayer: Biases cannot be empty");
1788     }
1789
1790     const auto layer = m_Graph->AddLayer<TransposeConvolution2dLayer>(descriptor, name);
1791
1792     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1793
1794     if (descriptor.m_BiasEnabled)
1795     {
1796         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1797     }
1798
1799     return layer;
1800 }
1801
1802 IConnectableLayer* Network::AddTransposeLayer(const TransposeDescriptor& transposeDescriptor,
1803                                               const char* name)
1804 {
1805     return m_Graph->AddLayer<TransposeLayer>(transposeDescriptor, name);
1806 }
1807
1808 IConnectableLayer* Network::AddStackLayer(const StackDescriptor& stackDescriptor,
1809                                           const char* name)
1810 {
1811     return m_Graph->AddLayer<StackLayer>(stackDescriptor, name);
1812 }
1813
1814
1815 IConnectableLayer* Network::AddStandInLayer(const StandInDescriptor& desc,
1816                                             const char* name)
1817 {
1818     return m_Graph->AddLayer<StandInLayer>(desc, name);
1819 }
1820
1821 IConnectableLayer* Network::AddQuantizedLstmLayer(const QuantizedLstmInputParams& params,
1822                                                   const char* name)
1823 {
1824     const auto layer = m_Graph->AddLayer<QuantizedLstmLayer>(name);
1825
1826     // InputToX weights
1827     layer->m_QuantizedLstmParameters.m_InputToInputWeights =
1828             std::make_unique<ScopedCpuTensorHandle>(params.GetInputToInputWeights());
1829     layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
1830             std::make_unique<ScopedCpuTensorHandle>(params.GetInputToForgetWeights());
1831     layer->m_QuantizedLstmParameters.m_InputToCellWeights =
1832             std::make_unique<ScopedCpuTensorHandle>(params.GetInputToCellWeights());
1833     layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
1834             std::make_unique<ScopedCpuTensorHandle>(params.GetInputToOutputWeights());
1835
1836     // RecurrentToX weights
1837     layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
1838             std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToInputWeights());
1839     layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
1840             std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToForgetWeights());
1841     layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
1842             std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToCellWeights());
1843     layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
1844             std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToOutputWeights());
1845
1846     // Bias
1847     layer->m_QuantizedLstmParameters.m_InputGateBias =
1848             std::make_unique<ScopedCpuTensorHandle>(params.GetInputGateBias());
1849     layer->m_QuantizedLstmParameters.m_ForgetGateBias =
1850             std::make_unique<ScopedCpuTensorHandle>(params.GetForgetGateBias());
1851     layer->m_QuantizedLstmParameters.m_CellBias =
1852             std::make_unique<ScopedCpuTensorHandle>(params.GetCellBias());
1853     layer->m_QuantizedLstmParameters.m_OutputGateBias =
1854             std::make_unique<ScopedCpuTensorHandle>(params.GetOutputGateBias());
1855
1856     return layer;
1857 }
1858
1859 IConnectableLayer* Network::AddQLstmLayer(const QLstmDescriptor&  descriptor,
1860                                           const LstmInputParams& params,
1861                                           const char* name)
1862 {
1863     const auto layer = m_Graph->AddLayer<QLstmLayer>(descriptor, name);
1864
1865     // QLstm Basic Parameters
1866     layer->m_BasicParameters.m_InputToForgetWeights =
1867             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
1868     layer->m_BasicParameters.m_InputToCellWeights =
1869             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
1870     layer->m_BasicParameters.m_InputToOutputWeights =
1871             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
1872     layer->m_BasicParameters.m_RecurrentToForgetWeights =
1873             std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
1874     layer->m_BasicParameters.m_RecurrentToCellWeights =
1875             std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
1876     layer->m_BasicParameters.m_RecurrentToOutputWeights =
1877             std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
1878     layer->m_BasicParameters.m_ForgetGateBias =
1879             std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
1880     layer->m_BasicParameters.m_CellBias =
1881             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
1882     layer->m_BasicParameters.m_OutputGateBias =
1883             std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
1884
1885     // QLstm Cifg parameters
1886     if(!descriptor.m_CifgEnabled)
1887     {
1888         if(params.m_InputToInputWeights == nullptr)
1889         {
1890             throw InvalidArgumentException("AddQLstmLayer: Input To Input Weights cannot be NULL");
1891         }
1892
1893         if(params.m_RecurrentToInputWeights == nullptr)
1894         {
1895             throw InvalidArgumentException(
1896                     "AddQLstmLayer: Recurrent To Input Weights cannot be NULL");
1897         }
1898
1899         if(params.m_InputGateBias == nullptr)
1900         {
1901             throw InvalidArgumentException("AddQLstmLayer: Input Gate Bias cannot be NULL");
1902         }
1903
1904         layer->m_CifgParameters.m_InputToInputWeights =
1905                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
1906         layer->m_CifgParameters.m_RecurrentToInputWeights =
1907                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
1908         layer->m_CifgParameters.m_InputGateBias =
1909                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
1910     }
1911
1912     // QLstm Projection parameters
1913     if(descriptor.m_ProjectionEnabled)
1914     {
1915         if(params.m_ProjectionWeights == nullptr)
1916         {
1917             throw InvalidArgumentException("AddQLstmLayer: Projection Weights cannot be NULL");
1918         }
1919
1920         layer->m_ProjectionParameters.m_ProjectionWeights =
1921                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
1922
1923         // Projection bias is optional even if projection is enabled
1924         if(params.m_ProjectionWeights != nullptr)
1925         {
1926             layer->m_ProjectionParameters.m_ProjectionBias =
1927                     std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
1928         }
1929
1930     }
1931
1932     // QLstm Peephole params
1933     if(descriptor.m_PeepholeEnabled)
1934     {
1935         if(params.m_CellToForgetWeights == nullptr)
1936         {
1937             throw InvalidArgumentException("AddQLstmLayer: Cell To Forget Weights cannot be NULL");
1938         }
1939
1940         if(params.m_CellToOutputWeights == nullptr)
1941         {
1942             throw InvalidArgumentException("AddQLstmLayer: Cell To Output Weights cannot be NULL");
1943         }
1944
1945         if(!descriptor.m_CifgEnabled)
1946         {
1947             if(params.m_CellToInputWeights == nullptr)
1948             {
1949                 throw InvalidArgumentException("AddQLstmLayer: Cell To Input Weights cannot be NULL");
1950             }
1951
1952             layer->m_PeepholeParameters.m_CellToInputWeights =
1953                     std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
1954         }
1955
1956         layer->m_PeepholeParameters.m_CellToForgetWeights =
1957                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
1958         layer->m_PeepholeParameters.m_CellToOutputWeights =
1959                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
1960     }
1961
1962     // QLstm Layer Normalization params
1963     if(descriptor.m_LayerNormEnabled)
1964     {
1965         if(params.m_ForgetLayerNormWeights == nullptr)
1966         {
1967             throw InvalidArgumentException("AddQLstmLayer: Forget layer normalization weights cannot be NULL");
1968         }
1969
1970         if(params.m_CellLayerNormWeights == nullptr)
1971         {
1972             throw InvalidArgumentException("AddQLstmLayer: Cell layer normalization weights cannot be NULL");
1973         }
1974
1975         if(params.m_OutputLayerNormWeights == nullptr)
1976         {
1977             throw InvalidArgumentException("AddQLstmLayer: Output layer normalization weights cannot be NULL");
1978         }
1979
1980         if(!descriptor.m_CifgEnabled)
1981         {
1982             if(params.m_InputLayerNormWeights == nullptr)
1983             {
1984                 throw InvalidArgumentException("AddQLstmLayer: Input layer normalization weights cannot be NULL");
1985             }
1986
1987             layer->m_LayerNormParameters.m_InputLayerNormWeights =
1988                     std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputLayerNormWeights));
1989         }
1990
1991         layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
1992                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetLayerNormWeights));
1993         layer->m_LayerNormParameters.m_CellLayerNormWeights =
1994                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellLayerNormWeights));
1995         layer->m_LayerNormParameters.m_OutputLayerNormWeights =
1996                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputLayerNormWeights));
1997     }
1998     return layer;
1999 }
2000
2001 void Network::Accept(ILayerVisitor& visitor) const
2002 {
2003     for (auto layer : GetGraph())
2004     {
2005         layer->Accept(visitor);
2006     };
2007 }
2008
2009 OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph)
2010     : m_Graph(std::move(graph)), m_Guid(profiling::ProfilingService::GetNextGuid())
2011 {
2012 }
2013
2014 OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph, const ModelOptions& modelOptions)
2015     : m_Graph(std::move(graph)), m_Guid(profiling::ProfilingService::GetNextGuid()), m_ModelOptions(modelOptions)
2016 {
2017 }
2018
2019 OptimizedNetwork::~OptimizedNetwork()
2020 {
2021 }
2022
2023 } // namespace armnn