src/armnn/Network.cpp

   1 //
   2 // Copyright © 2017 Arm Ltd. All rights reserved.
   3 // SPDX-License-Identifier: MIT
   4 //
   5
   6 #include "Network.hpp"
   7 #include "Graph.hpp"
   8 #include "Layer.hpp"
   9 #include "DeviceSpec.hpp"
  10 #include "Optimizer.hpp"
  11 #include "SubgraphViewSelector.hpp"
  12 #include "BackendSettings.hpp"
  13 #include "optimizations/All.hpp"
  14
  15 #include <backendsCommon/CpuTensorHandle.hpp>
  16 #include <backendsCommon/WorkloadFactory.hpp>
  17 #include <backendsCommon/BackendRegistry.hpp>
  18 #include <backendsCommon/IBackendInternal.hpp>
  19 #include <backendsCommon/TensorHandleFactoryRegistry.hpp>
  20
  21 #include <armnn/Exceptions.hpp>
  22 #include <armnn/Utils.hpp>
  23 #include <armnn/TypesUtils.hpp>
  24
  25 #include <fcntl.h>
  26 #include <algorithm>
  27 #include <fstream>
  28 #include <memory>
  29 #include <vector>
  30 #include <algorithm>
  31
  32 #include <boost/assert.hpp>
  33 #include <boost/format.hpp>
  34 #include <boost/log/trivial.hpp>
  35 #include <boost/numeric/conversion/converter_policies.hpp>
  36 #include <boost/cast.hpp>
  37
  38 namespace armnn
  39 {
  40
  41 armnn::INetwork* INetwork::CreateRaw()
  42 {
  43     return new Network();
  44 }
  45
  46 armnn::INetworkPtr INetwork::Create()
  47 {
  48     return INetworkPtr(CreateRaw(), &INetwork::Destroy);
  49 }
  50
  51 void INetwork::Destroy(INetwork* network)
  52 {
  53     delete boost::polymorphic_downcast<Network*>(network);
  54 }
  55
  56 Status Network::PrintGraph()
  57 {
  58     m_Graph->Print();
  59     return Status::Success;
  60 }
  61
  62 void IOptimizedNetwork::Destroy(IOptimizedNetwork* network)
  63 {
  64     delete boost::polymorphic_downcast<OptimizedNetwork*>(network);
  65 }
  66
  67 Status OptimizedNetwork::PrintGraph()
  68 {
  69     m_Graph->Print();
  70     return Status::Success;
  71 }
  72
  73 Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const
  74 {
  75     return m_Graph->SerializeToDot(stream);
  76 }
  77
  78
  79
  80 void ReportError(const std::string& errorMessage,
  81                  Optional<std::vector<std::string>&> errorMessages)
  82 {
  83     std::stringstream fullErrorMessage;
  84     fullErrorMessage << "ERROR: " << errorMessage;
  85     BOOST_LOG_TRIVIAL(warning) << fullErrorMessage.str();
  86     if (errorMessages)
  87     {
  88         errorMessages.value().push_back(fullErrorMessage.str());
  89     }
  90 }
  91
  92 void ReportWarning(const std::string& warningMessage,
  93                    Optional<std::vector<std::string>&> warningMessages)
  94 {
  95     std::stringstream fullWarningMessage;
  96     fullWarningMessage << "WARNING: " << warningMessage;
  97     BOOST_LOG_TRIVIAL(warning) << fullWarningMessage.str();
  98     if (warningMessages)
  99     {
 100         warningMessages.value().push_back(fullWarningMessage.str());
 101     }
 102 }
 103
 104 bool CheckScaleSetOnQuantizedType(Layer* layer, Optional<std::vector<std::string>&> errMessages)
 105 {
 106     bool noErrors = true;
 107     unsigned int numOutputs = layer->GetNumOutputSlots();
 108     for (unsigned int i = 0; i < numOutputs; i++) {
 109         OutputSlot& outputSlot = layer->GetOutputSlot(i);
 110         TensorInfo info = outputSlot.GetTensorInfo();
 111         if (DataType::QuantisedAsymm8 == info.GetDataType()) {
 112             if (0.f == info.GetQuantizationScale()) {
 113                 noErrors = false;
 114                 std::stringstream ss;
 115                 ss << "output " << i << " of layer " << GetLayerTypeAsCString(layer->GetType())
 116                    << " (" << layer->GetNameStr() << ") is of type"
 117                    << " Quantized 8 bit but its scale parameter has not been set";
 118                 ReportError(ss.str(), errMessages);
 119             }
 120             // Softmax under QuantisedAsymm8 must always be scale (1.0f/256.0f) and offset 0
 121             if ((info.GetQuantizationScale() != (1.0f / 256.0f) ||
 122                  info.GetQuantizationOffset() != 0) &&
 123                  layer->GetType() == armnn::LayerType::Softmax)
 124             {
 125                 std::stringstream ss;
 126                 ss << "Quantization parameters for Softmax layer (Scale: " <<
 127                 info.GetQuantizationScale() << " and Offset: " << info.GetQuantizationOffset() <<
 128                 ") are incorrect and have been updated to Scale: 0.00390625 and Offset: 0";
 129                 BOOST_LOG_TRIVIAL(warning) << ss.str();
 130                 info.SetQuantizationScale((1.0f /256.0f));
 131                 info.SetQuantizationOffset(0);
 132                 outputSlot.SetTensorInfo(info);
 133             }
 134         }
 135     }
 136     return noErrors;
 137 }
 138
 139 OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
 140                                   BackendSettings& backendSettings,
 141                                   Graph::Iterator& firstLayer,
 142                                   Graph::Iterator& lastLayer,
 143                                   Optional<std::vector<std::string>&> errMessages)
 144 {
 145     OptimizationResult result;
 146
 147     // Helper lambda to compose meaningful error message before returning with error
 148     auto ReturnWithError = [&](const Layer* layer)
 149     {
 150         std::stringstream failureMsg;
 151         failureMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
 152                    << " is not supported on any preferred backend " << backendSettings.m_PreferredBackends;
 153         ReportError(failureMsg.str(), errMessages);
 154
 155         result.m_Error = true;
 156         return result;
 157     };
 158
 159     auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
 160     if (availablePreferredBackends.empty())
 161     {
 162         std::stringstream failureMsg;
 163         failureMsg << "No preferred backends are available";
 164         ReportError(failureMsg.str(), errMessages);
 165
 166         result.m_Error = true;
 167         return result;
 168     }
 169
 170     for (auto it = firstLayer; it != lastLayer; ++it)
 171     {
 172         auto layer = *it;
 173         DataType dataType = layer->GetDataType();
 174         std::string reasonIfUnsupported;
 175         bool found = false;
 176         if (!CheckScaleSetOnQuantizedType(layer, errMessages))
 177         {
 178             // don't bomb immediately, find all the quantized outputs
 179             // which haven't had a scale set and report them all back.
 180             result.m_Error = true;
 181         }
 182
 183         for (const auto& backend : availablePreferredBackends)
 184         {
 185             // need to set the compute device on the layer
 186             // before we can check if it is supported
 187             layer->SetBackendId(backend);
 188             if (!IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported))
 189             {
 190                 if (dataType == DataType::Float16)
 191                 {
 192                     if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
 193                         && layer->GetType() != LayerType::ConvertFp32ToFp16
 194                         && layer->GetType() != LayerType::ConvertFp16ToFp32)
 195                     {
 196                         // Insert FP16 -> FP32 conversion layer before current layer
 197                         std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers =
 198                             InsertConvertFp16ToFp32LayersBefore(optNetObjPtr->GetGraph(), *layer);
 199
 200                         // Insert FP32 -> FP16 conversion layer after current layer
 201                         std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers =
 202                             InsertConvertFp32ToFp16LayersAfter(optNetObjPtr->GetGraph(), *layer);
 203
 204                         // Assign a supported backend to the newly introduced conversion layers
 205                         auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
 206                         {
 207                             bool supportedBackendFound = false;
 208                             std::string reasonIfUnsupported;
 209
 210                             // Try preferred backend first
 211                             layer->SetBackendId(preferredBackend);
 212                             if (IWorkloadFactory::IsLayerSupported(*layer,
 213                                                                    EmptyOptional(),
 214                                                                    reasonIfUnsupported))
 215                             {
 216                                 supportedBackendFound = true;
 217                             }
 218                             else
 219                             {
 220                                 for (const auto& backend : availablePreferredBackends)
 221                                 {
 222                                     // Skip preferred backend (we already determined that it is not supported)
 223                                     if (backend == preferredBackend)
 224                                     {
 225                                         continue;
 226                                     }
 227
 228                                     layer->SetBackendId(backend);
 229                                     if (IWorkloadFactory::IsLayerSupported(*layer,
 230                                                                            EmptyOptional(),
 231                                                                            reasonIfUnsupported))
 232                                     {
 233                                         supportedBackendFound = true;
 234                                         break;
 235                                     }
 236                                 }
 237                             }
 238
 239                             return supportedBackendFound;
 240                         };
 241
 242                         for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
 243                         {
 244                             if (!AssignFirstSupportedBackend(convertLayer, backend))
 245                             {
 246                                 return ReturnWithError(convertLayer);
 247                             }
 248                         }
 249
 250                         for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
 251                         {
 252                             if (!AssignFirstSupportedBackend(convertLayer, backend))
 253                             {
 254                                 return ReturnWithError(convertLayer);
 255                             }
 256                         }
 257
 258                         found = true;
 259                         break;
 260                     }
 261                 }
 262                 std::stringstream warningMsg;
 263                 warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
 264                            << " is not supported on requested backend " << layer->GetBackendId().Get()
 265                            << " for data type " << GetDataTypeName(dataType)
 266                            << " (reason: " << reasonIfUnsupported
 267                            << "), falling back to the next backend.";
 268                 ReportWarning(warningMsg.str(), errMessages);
 269             }
 270             else
 271             {
 272                 found = true;
 273                 backendSettings.m_SelectedBackends.insert(backend);
 274                 break;
 275             }
 276         }
 277
 278         // If the layer is unsupported by any devices, log and return a null network.
 279         if (!found)
 280         {
 281             // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
 282             //       fallback we should set the compute device on the layer to CpuRef (these are not
 283             //       available as accelerated operations, or are only available under certain
 284             //       conditions, currently they comprise MemCopy, Constant, Permute)
 285             armnn::LayerType layerType = layer->GetType();
 286             if (!backendSettings.IsCpuRefUsed() && (layerType == armnn::LayerType::MemCopy ||
 287                                                     layerType == armnn::LayerType::Constant ||
 288                                                     layerType == armnn::LayerType::Permute))
 289             {
 290                 BackendId cpuBackendId(armnn::Compute::CpuRef);
 291                 layer->SetBackendId(cpuBackendId);
 292                 backendSettings.m_SelectedBackends.insert(cpuBackendId);
 293             }
 294             else
 295             {
 296                 return ReturnWithError(layer);
 297             }
 298         }
 299     }
 300
 301     return result;
 302 }
 303
 304 OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
 305                                   BackendSettings& backendSettings,
 306                                   SubgraphView& subgraph,
 307                                   Optional<std::vector<std::string>&> errMessages)
 308 {
 309     Graph::Iterator firstLayer = subgraph.begin();
 310     Graph::Iterator lastLayer  = subgraph.end();
 311     return AssignBackends(optNetObjPtr,
 312                           backendSettings,
 313                           firstLayer,
 314                           lastLayer,
 315                           errMessages);
 316 }
 317
 318 BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry& handleFactoryRegistry,
 319                                     BackendSettings& backendSettings)
 320 {
 321     BackendsMap backends;
 322     auto const& backendRegistry = BackendRegistryInstance();
 323     for (auto&& selectedBackend : backendSettings.m_SupportedBackends)
 324     {
 325         auto backendFactory = backendRegistry.GetFactory(selectedBackend);
 326         auto backendObjPtr = backendFactory();
 327         BOOST_ASSERT(backendObjPtr);
 328
 329         backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry);
 330
 331         backends[backendObjPtr->GetId()] = std::move(backendObjPtr);
 332     }
 333
 334     return backends;
 335 }
 336
 337 OptimizationResult ApplyBackendOptimizations(OptimizedNetwork* optNetObjPtr,
 338                                              BackendSettings& backendSettings,
 339                                              BackendsMap& backends,
 340                                              Optional<std::vector<std::string>&> errMessages)
 341 {
 342     BOOST_ASSERT(optNetObjPtr);
 343
 344     OptimizationResult result;
 345
 346     // Get the optimized graph
 347     Graph& optGraph = optNetObjPtr->GetGraph();
 348
 349     // Run backend specific optimizations
 350     for (auto&& selectedBackend : backendSettings.m_SelectedBackends)
 351     {
 352         auto backendObjPtr = backends.find(selectedBackend)->second.get();
 353         BOOST_ASSERT(backendObjPtr);
 354
 355         // Select sub-graphs based on backend
 356         SubgraphViewSelector::Subgraphs subgraphs =
 357                 SubgraphViewSelector::SelectSubgraphs(optGraph,
 358                                                       // Select layers assigned to the requested backend
 359                                                       [&backendObjPtr](const Layer& layer)
 360                                                       {
 361                                                           return layer.GetType() != LayerType::Input &&
 362                                                                  layer.GetType() != LayerType::Output &&
 363                                                                  layer.GetBackendId() == backendObjPtr->GetId();
 364                                                       });
 365         if (subgraphs.empty())
 366         {
 367             // No sub-graphs found, try with next selected backend
 368             continue;
 369         }
 370
 371         // Try to optimize each sub-graph
 372         for (auto& subgraph : subgraphs)
 373         {
 374             // Try to optimize the current sub-graph
 375             OptimizationViews optimizationViews = backendObjPtr->OptimizeSubgraphView(*subgraph);
 376             BOOST_ASSERT(optimizationViews.Validate(*subgraph));
 377
 378             // Optimization attempted, check the resulting optimized sub-graph
 379             for (auto& substitution : optimizationViews.GetSubstitutions())
 380             {
 381                 // Sub-graph optimized, substitute the sub-graph with the new optimized one in the main optimized graph
 382                 SubgraphView& replacementSubgraph   = substitution.m_ReplacementSubgraph;
 383                 SubgraphView& substitutableSubgraph = substitution.m_SubstitutableSubgraph;
 384                 optGraph.SubstituteSubgraph(substitutableSubgraph, replacementSubgraph);
 385
 386                 // Assign the current backend to the optimized sub-graph
 387                 std::for_each(replacementSubgraph.begin(), replacementSubgraph.end(), [&selectedBackend](Layer* l)
 388                     {
 389                         BOOST_ASSERT(l);
 390                         l->SetBackendId(selectedBackend);
 391                     });
 392             }
 393
 394             if (!optimizationViews.GetFailedSubgraphs().empty())
 395             {
 396                 std::stringstream warningMsg;
 397                 warningMsg << "Some sub-graph(s) failed to optimized on " << backendObjPtr->GetId() << " backend.";
 398                 ReportWarning(warningMsg.str(), errMessages);
 399
 400                 // Failed to optimize the given sub-graph, re-assign the sub-graph layers to other available backends
 401                 BackendSettings settingsCopy(backendSettings);
 402                 if (!backendObjPtr->GetId().IsCpuRef())
 403                 {
 404                     // Add the current backend to the list of backends to ignore
 405                     settingsCopy.m_IgnoredBackends.insert(backendObjPtr->GetId());
 406                 }
 407
 408                 int count=0;
 409                 for (auto& failedSubgraph : optimizationViews.GetFailedSubgraphs())
 410                 {
 411                     // An error occurred: the optimization was attempted but not performed, try different backends
 412                     std::stringstream subgraphMsg;
 413                     subgraphMsg << "Re-assigning backends to " << failedSubgraph.GetLayers().size()
 414                                 << " layers inside sub-graph " << count++;
 415                     ReportWarning(subgraphMsg.str(), errMessages);
 416
 417                     OptimizationResult reassignmentResult = AssignBackends(optNetObjPtr,
 418                                                                            settingsCopy,
 419                                                                            *subgraph,
 420                                                                            errMessages);
 421                     if (reassignmentResult.m_Error)
 422                     {
 423                         // Failed to re-assign one of the remaining backends to each layer of the sub-graph
 424                         result.m_Error = true;
 425                         return result;
 426                     }
 427                 }
 428             }
 429         }
 430     }
 431
 432     return result;
 433 }
 434
 435 bool RequiresCopy(ITensorHandleFactory::FactoryId src,
 436                   ITensorHandleFactory::FactoryId dst,
 437                   TensorHandleFactoryRegistry& registry)
 438 {
 439     if (src != dst)
 440     {
 441         ITensorHandleFactory* srcFactory = registry.GetFactory(src);
 442         ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
 443
 444         if (srcFactory && dstFactory &&
 445             (srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0)
 446         {
 447             return false;
 448         }
 449         return true;
 450     }
 451     return false;
 452 }
 453
 454 // Find the handle factory for the input layer which results in fewest required copies.
 455 ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backends,
 456                                                             OutputSlot& slot,
 457                                                             TensorHandleFactoryRegistry& registry)
 458 {
 459     Layer& layer = slot.GetOwningLayer();
 460     BOOST_ASSERT(layer.GetType() == LayerType::Input);
 461
 462     // Explicitly select the tensorhandle factory for InputLayer because the rules for it are slightly different. It
 463     // doesn't matter which backend it is assigned to because they all use the same implementation, which
 464     // requires Map/Unmap support. This means that, so long as the handle type supports map/unmap semantics, we can
 465     // select a factory with maximum compatibility with the layers connected to the InputLayer.
 466
 467     // First ensure the from backends can support the TensorHandeAPI
 468     auto frmBackend = backends.find(layer.GetBackendId());
 469     if (frmBackend == backends.end() ||
 470         !frmBackend->second->SupportsTensorAllocatorAPI())
 471     {
 472         return ITensorHandleFactory::LegacyFactoryId;
 473     }
 474
 475     // Go through all connections to the output slot and determine the TensorHandleFactory which results in the
 476     // fewest copies.
 477     std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
 478     int topScore = 0;
 479     ITensorHandleFactory::FactoryId topChoice = ITensorHandleFactory::LegacyFactoryId;
 480
 481     for (auto&& connection : slot.GetConnections())
 482     {
 483         const Layer& connectedLayer = connection->GetOwningLayer();
 484
 485         auto toBackend = backends.find(connectedLayer.GetBackendId());
 486         BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
 487
 488         if (!toBackend->second.get()->SupportsTensorAllocatorAPI())
 489         {
 490             // The destination backend does not support the tensor allocator API, move to the next one
 491             continue;
 492         }
 493
 494         auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
 495         for (auto&& dst : dstPrefs)
 496         {
 497             // Input layers use the mem copy workload or import, so the selected factory must
 498             // support either the map/unmap API or Import API
 499             ITensorHandleFactory* factory = registry.GetFactory(dst);
 500             if (!factory->SupportsMapUnmap() &&
 501                 !CheckFlag(factory->GetImportFlags(), MemorySource::Malloc)) // Just support cpu mem imports for now
 502             {
 503                 // The current tensor handle factory does not support the map/unmap or import
 504                 // strategy, move to the next one
 505                 continue;
 506             }
 507
 508             auto it = factoryScores.find(dst);
 509             if (it == factoryScores.end())
 510             {
 511                 // Add new score to the table
 512                 factoryScores[dst] = 0;
 513                 if (topChoice == ITensorHandleFactory::LegacyFactoryId)
 514                 {
 515                     topChoice = dst;
 516                 }
 517             }
 518             else
 519             {
 520                 // Increase the score
 521                 factoryScores[dst]++;
 522
 523                 // Track the best option
 524                 if (factoryScores[dst] > topScore)
 525                 {
 526                     topScore = factoryScores[dst];
 527                     topChoice = dst;
 528                 }
 529             }
 530         }
 531     }
 532
 533     return topChoice;
 534 }
 535
 536 // Find the handle factory for the output layer which results in fewest required copies.
 537 ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput(BackendsMap& backends,
 538                                                             OutputSlot& slot,
 539                                                             TensorHandleFactoryRegistry& registry)
 540 {
 541    return ITensorHandleFactory::DeferredFactoryId;
 542 }
 543
 544 // For all handle factories supported on the source backend, we wish to find the one which requires the fewest copies
 545 // when considering all connections.
 546 ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends,
 547                                                     OutputSlot& outputSlot,
 548                                                     TensorHandleFactoryRegistry& registry)
 549 {
 550     // First ensure the from backends can support the TensorHandeAPI
 551     Layer& layer = outputSlot.GetOwningLayer();
 552     auto frmBackend = backends.find(layer.GetBackendId());
 553     if (frmBackend == backends.end() ||
 554         !frmBackend->second->SupportsTensorAllocatorAPI())
 555     {
 556         return ITensorHandleFactory::LegacyFactoryId;
 557     }
 558
 559     // Connections to Output Layers requires support for map/unmap on the TensorHandle.
 560     bool requiresMapUnmap = false;
 561     for (auto&& connection : outputSlot.GetConnections())
 562     {
 563         const Layer& connectedLayer = connection->GetOwningLayer();
 564         if (connectedLayer.GetType() == LayerType::Output)
 565         {
 566             requiresMapUnmap = true;
 567         }
 568     }
 569
 570     IBackendInternal* srcBackend = frmBackend->second.get();
 571     auto srcPrefs = srcBackend->GetHandleFactoryPreferences();
 572
 573     // Initialize the scores
 574     std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
 575     for (auto&& pref : srcPrefs)
 576     {
 577         if (requiresMapUnmap) // Only consider factories that support map/unmap if required
 578         {
 579             ITensorHandleFactory* factory = registry.GetFactory(pref);
 580             if (!factory->SupportsMapUnmap())
 581             {
 582                 // The current tensor handle factory does not support the map/unmap strategy, move to the next one
 583                 continue;
 584             }
 585         }
 586
 587         auto it = factoryScores.find(pref);
 588         if (it == factoryScores.end())
 589         {
 590             // Add new score to the table
 591             factoryScores[pref] = 0;
 592         }
 593     }
 594
 595     // Score each handle factory based on how many times it requires copies on the slot connections
 596     for (auto&& connection : outputSlot.GetConnections())
 597     {
 598         const Layer& connectedLayer = connection->GetOwningLayer();
 599
 600         auto toBackend = backends.find(connectedLayer.GetBackendId());
 601         BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
 602
 603         auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
 604         for (auto&& src : srcPrefs)
 605         {
 606             if (factoryScores.find(src) == factoryScores.end()) // Don't consider excluded factories
 607             {
 608                 continue;
 609             }
 610
 611             for (auto&& dst : dstPrefs)
 612             {
 613                 if (RequiresCopy(src, dst, registry))
 614                 {
 615                     // Copy avoided, increase the score
 616                     factoryScores[src]++;
 617                     break;
 618                 }
 619             }
 620         }
 621     }
 622
 623     // Find the lowest score
 624     int minScore = std::numeric_limits<int>::max();
 625     for (auto it : factoryScores)
 626     {
 627         minScore = std::min(minScore, it.second);
 628     }
 629
 630     // Collect factories matching the best(lowest) score
 631     std::vector<ITensorHandleFactory::FactoryId> optimalFactories;
 632     for (auto it : factoryScores)
 633     {
 634         if (it.second == minScore)
 635         {
 636             optimalFactories.push_back(it.first);
 637         }
 638     }
 639
 640     // For all compatible Factories matching the best score, find the preferred one for the current layer.
 641     for (auto&& srcPref : srcPrefs)
 642     {
 643         for (auto&& comp : optimalFactories)
 644         {
 645             if (comp == srcPref)
 646             {
 647                 return comp;
 648             }
 649         }
 650     }
 651
 652     return ITensorHandleFactory::LegacyFactoryId;
 653 }
 654
 655 EdgeStrategy CalculateEdgeStrategy(BackendsMap& backends,
 656                                    ITensorHandleFactory::FactoryId srcFactoryId,
 657                                    const Layer& layer,
 658                                    const Layer& connectedLayer,
 659                                    TensorHandleFactoryRegistry& registry)
 660 {
 661     auto toBackend = backends.find(connectedLayer.GetBackendId());
 662     BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
 663
 664     auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
 665
 666     // Legacy API check for backward compatibility
 667     if (srcFactoryId == ITensorHandleFactory::LegacyFactoryId || dstPrefs.empty())
 668     {
 669         if (layer.GetBackendId() != connectedLayer.GetBackendId())
 670         {
 671             return EdgeStrategy::CopyToTarget;
 672         }
 673         else
 674         {
 675             return EdgeStrategy::DirectCompatibility;
 676         }
 677     }
 678
 679     // TensorHandleFactory API present, so perform more sophisticated strategies.
 680     // Dst Output layers don't require copy because they use import or map/unmap
 681     if (connectedLayer.GetType() == LayerType::Output)
 682     {
 683         return EdgeStrategy::DirectCompatibility;
 684     }
 685
 686     // Search for direct match in prefs
 687     for (auto&& pref : dstPrefs)
 688     {
 689         if (pref == srcFactoryId)
 690         {
 691             return EdgeStrategy::DirectCompatibility;
 692         }
 693     }
 694
 695     // Search for export/import options
 696     ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
 697     if (srcFactory->GetExportFlags() != 0)
 698     {
 699         for (auto&& pref : dstPrefs)
 700         {
 701             ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
 702             if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
 703             {
 704                 return EdgeStrategy::ExportToTarget;
 705             }
 706         }
 707     }
 708
 709     // Search for copy options via map/unmap
 710     if (srcFactory->SupportsMapUnmap())
 711     {
 712         for (auto&& pref : dstPrefs)
 713         {
 714             ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
 715             if (dstFactory->SupportsMapUnmap())
 716             {
 717                 return EdgeStrategy::CopyToTarget;
 718             }
 719         }
 720     }
 721
 722     return EdgeStrategy::Undefined;
 723 }
 724
 725 // Select the TensorHandleFactories and the corresponding memory strategy
 726 OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
 727                                               BackendsMap& backends,
 728                                               TensorHandleFactoryRegistry& registry,
 729                                               Optional<std::vector<std::string>&> errMessages)
 730 {
 731     OptimizationResult result;
 732
 733     optGraph.ForEachLayer([&backends, &registry, &result, &errMessages](Layer* layer)
 734     {
 735         BOOST_ASSERT(layer);
 736
 737         // Lets make sure the backend is in our list of supported backends. Something went wrong during backend
 738         // assignment if this check fails
 739         BOOST_ASSERT(backends.find(layer->GetBackendId()) != backends.end());
 740
 741         // Check each output separately
 742         for (unsigned int slotIdx = 0; slotIdx < layer->GetNumOutputSlots(); slotIdx++)
 743         {
 744             OutputSlot& outputSlot = layer->GetOutputSlot(slotIdx);
 745
 746             ITensorHandleFactory::FactoryId slotOption = ITensorHandleFactory::LegacyFactoryId;
 747
 748             // Calculate the factory to use which results in the fewest copies being made.
 749             switch(layer->GetType())
 750             {
 751                 case LayerType::Input:
 752                     slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry);
 753                     break;
 754                 case LayerType::Output:
 755                     slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
 756                     break;
 757                 default:
 758                     slotOption = CalculateSlotOption(backends, outputSlot, registry);
 759                     break;
 760             }
 761             outputSlot.SetTensorHandleFactory(slotOption);
 762
 763             // Now determine the "best" edge strategy for each connection given the slotOption.
 764             unsigned int connectionIdx = 0;
 765             for (auto&& connection : outputSlot.GetConnections())
 766             {
 767                 const Layer& connectedLayer = connection->GetOwningLayer();
 768
 769                 EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer, registry);
 770
 771                 if (strategy == EdgeStrategy::Undefined)
 772                 {
 773                     result.m_Error = true;
 774                     if (errMessages)
 775                     {
 776                         errMessages.value().emplace_back("Could not find valid strategy required for compatibility"
 777                                                          " between backends.");
 778                     }
 779                     return;
 780                 }
 781
 782                 outputSlot.SetEdgeStrategy(connectionIdx, strategy);
 783
 784                 connectionIdx++;
 785             }
 786         }
 787     });
 788
 789     return result;
 790 }
 791
 792 IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
 793                               const std::vector<BackendId>& backendPreferences,
 794                               const IDeviceSpec& deviceSpec,
 795                               const OptimizerOptions& options,
 796                               Optional<std::vector<std::string>&> errMessages)
 797 {
 798     if (backendPreferences.empty())
 799     {
 800         throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified");
 801     }
 802
 803     const Network& network = *boost::polymorphic_downcast<const Network*>(&inNetwork);
 804     std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph());
 805
 806     auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy);
 807
 808     OptimizedNetwork* optNetObjPtr = boost::polymorphic_downcast<OptimizedNetwork*>(optNet.get());
 809
 810     // Get the optimized graph
 811     Graph& optGraph = optNetObjPtr->GetGraph();
 812
 813     // Perform optimisation passes
 814     using namespace optimizations;
 815     Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(),
 816                                                 SquashEqualReshapeSiblings(),
 817                                                 OptimizeInversePermutes(),
 818                                                 MovePermuteUp(),
 819                                                 PermuteAsReshape(),
 820                                                 OptimizeConsecutiveReshapes(),
 821                                                 FoldPadIntoConvolution2d()));
 822
 823     // Infer the tensor infos for all output slots. Throws an exception on failure
 824     optGraph.InferTensorInfos();
 825
 826     // If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
 827     if (options.m_ReduceFp32ToFp16)
 828     {
 829         Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
 830     }
 831
 832     // Initialize backend settings
 833     BackendSettings backendSettings(backendPreferences, deviceSpec);
 834     if (backendSettings.GetAvailablePreferredBackends().empty())
 835     {
 836         std::stringstream failureMsg;
 837         failureMsg << "None of the preferred backends " << backendPreferences
 838                    << " are supported. Current platform provides " << backendSettings.m_SupportedBackends;
 839         ReportError(failureMsg.str(), errMessages);
 840         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
 841     }
 842
 843     // Create a map to temporarily hold initialized backend objects
 844     TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
 845     BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
 846
 847     // Assign an available backend to each layer
 848     Graph::Iterator firstLayer = optGraph.begin();
 849     Graph::Iterator lastLayer  = optGraph.end();
 850     OptimizationResult assignBackendsResult = AssignBackends(optNetObjPtr,
 851                                                              backendSettings,
 852                                                              firstLayer,
 853                                                              lastLayer,
 854                                                              errMessages);
 855     if (assignBackendsResult.m_Error)
 856     {
 857         // Failed to assign a backend to each layer
 858         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
 859     }
 860
 861     Optimizer::Pass(optGraph, MakeOptimizations(OptimizeInverseConversionsFp16(),
 862                                                 OptimizeInverseConversionsFp32()));
 863
 864     // Apply the backend-specific optimizations
 865     OptimizationResult backendOptimizationResult = ApplyBackendOptimizations(optNetObjPtr,
 866                                                                              backendSettings,
 867                                                                              backends,
 868                                                                              errMessages);
 869     if (backendOptimizationResult.m_Error)
 870     {
 871         // Failed to apply the backend-specific optimizations
 872         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
 873     }
 874
 875     // If the debug flag is set, then insert a DebugLayer after each layer
 876     // Doing this after applying the backend optimizations as they might have changed some layers
 877     if (options.m_Debug)
 878     {
 879         Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer()));
 880     }
 881
 882     // Calculate the compatibility strategies for tensor handles
 883     OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph,
 884                                                                    backends,
 885                                                                    tensorHandleFactoryRegistry,
 886                                                                    errMessages);
 887     if (strategyResult.m_Error)
 888     {
 889         // Failed to apply the backend-specific optimizations
 890         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
 891     }
 892
 893     // Based on the tensor handle strategy determined above, insert copy layers where required.
 894     optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
 895
 896     // Convert constants
 897     Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
 898     Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat()));
 899
 900     // Run backend specific optimizations (deprecated)
 901     for (auto&& chosenBackend : backendSettings.m_SelectedBackends)
 902     {
 903         auto factoryFun = BackendRegistryInstance().GetFactory(chosenBackend);
 904         auto backendPtr = factoryFun();
 905         BOOST_ASSERT(backendPtr.get() != nullptr);
 906
 907         ARMNN_NO_DEPRECATE_WARN_BEGIN
 908         auto backendSpecificOptimizations = backendPtr->GetOptimizations();
 909         ARMNN_NO_DEPRECATE_WARN_END
 910
 911         if (!backendSpecificOptimizations.empty())
 912         {
 913             Optimizer::Pass(optNetObjPtr->GetGraph(), backendSpecificOptimizations);
 914         }
 915     }
 916
 917     return optNet;
 918 }
 919
 920 Network::Network()
 921 : m_Graph(std::make_unique<Graph>())
 922 {
 923 }
 924
 925 Network::~Network()
 926 {
 927 }
 928
 929 IConnectableLayer* Network::AddInputLayer(LayerBindingId id, const char* name)
 930 {
 931     return m_Graph->AddLayer<InputLayer>(id, name);
 932 }
 933
 934 IConnectableLayer* Network::AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
 935                                             const char* name)
 936 {
 937     return m_Graph->AddLayer<BatchToSpaceNdLayer>(batchToSpaceNdDescriptor, name);
 938 }
 939
 940 IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor,
 941                                                        const ConstTensor& weights,
 942                                                        const Optional<ConstTensor>& biases,
 943                                                        const char* name)
 944 {
 945     if (fullyConnectedDescriptor.m_BiasEnabled && !biases.has_value())
 946     {
 947         throw InvalidArgumentException("AddFullyConnectedLayer: biases cannot be empty");
 948     }
 949
 950     const auto layer = m_Graph->AddLayer<FullyConnectedLayer>(fullyConnectedDescriptor, name);
 951
 952     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
 953
 954     if (fullyConnectedDescriptor.m_BiasEnabled)
 955     {
 956         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
 957     }
 958
 959     return layer;
 960 }
 961
 962 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
 963                                                    const ConstTensor& weights,
 964                                                    const Optional<ConstTensor>& biases,
 965                                                    const char* name)
 966 {
 967     return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, biases, name);
 968 }
 969
 970 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
 971                                                    const ConstTensor& weights,
 972                                                    const char* name)
 973 {
 974     Optional<ConstTensor> biases;
 975     return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, biases, name);
 976 }
 977
 978 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
 979                                                    const ConstTensor& weights,
 980                                                    const ConstTensor& biases,
 981                                                    const char* name)
 982 {
 983     Optional<ConstTensor> optionalBiases(biases);
 984     return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, optionalBiases, name);
 985 }
 986
 987 IConnectableLayer* Network::AddConcatLayer(const ConcatDescriptor& concatDescriptor,
 988                                            const char* name)
 989 {
 990     return m_Graph->AddLayer<ConcatLayer>(concatDescriptor, name);
 991 }
 992
 993 IConnectableLayer* Network::AddConvolution2dLayerImpl(const Convolution2dDescriptor& convolution2dDescriptor,
 994                                                       const ConstTensor& weights,
 995                                                       const Optional<ConstTensor>& biases,
 996                                                       const char* name)
 997 {
 998     if (convolution2dDescriptor.m_BiasEnabled && !biases.has_value())
 999     {
1000         throw InvalidArgumentException("AddConvolution2dLayer: biases cannot be empty");
1001     }
1002
1003     const auto layer = m_Graph->AddLayer<Convolution2dLayer>(convolution2dDescriptor, name);
1004
1005     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1006
1007     if (convolution2dDescriptor.m_BiasEnabled)
1008     {
1009         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1010     }
1011
1012     return layer;
1013 }
1014
1015 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1016                                                   const ConstTensor& weights,
1017                                                   const Optional<ConstTensor>& biases,
1018                                                   const char* name)
1019 {
1020     return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1021 }
1022
1023 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1024                                                   const ConstTensor& weights,
1025                                                   const char* name)
1026 {
1027     Optional<ConstTensor> biases;
1028     return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1029 }
1030
1031 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1032                                                   const ConstTensor& weights,
1033                                                   const ConstTensor& biases,
1034                                                   const char* name)
1035 {
1036     Optional<ConstTensor> optionalBiases(biases);
1037     return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, optionalBiases, name);
1038 }
1039
1040 IConnectableLayer* Network::AddDepthwiseConvolution2dLayerImpl(
1041     const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1042     const ConstTensor& weights,
1043     const Optional<ConstTensor>& biases,
1044     const char* name)
1045 {
1046     if (convolution2dDescriptor.m_BiasEnabled && !biases.has_value())
1047     {
1048         throw InvalidArgumentException("AddDepthwiseConvolution2dLayer: biases cannot be empty");
1049     }
1050
1051     const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name);
1052
1053     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1054
1055     if (convolution2dDescriptor.m_BiasEnabled)
1056     {
1057         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1058     }
1059
1060     return layer;
1061 }
1062
1063 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1064         const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1065         const ConstTensor& weights,
1066         const Optional<ConstTensor>& biases,
1067         const char* name)
1068 {
1069     return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1070 }
1071
1072 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1073     const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1074     const ConstTensor& weights,
1075     const char* name)
1076 {
1077     Optional<ConstTensor> biases;
1078     return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1079 }
1080
1081 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1082     const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1083     const ConstTensor& weights,
1084     const ConstTensor& biases,
1085     const char* name)
1086 {
1087     Optional<ConstTensor> optionalBiases(biases);
1088     return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, optionalBiases, name);
1089 }
1090
1091 IConnectableLayer* Network::AddDetectionPostProcessLayer(const armnn::DetectionPostProcessDescriptor& descriptor,
1092                                                          const ConstTensor& anchors, const char* name)
1093 {
1094     const auto layer = m_Graph->AddLayer<DetectionPostProcessLayer>(descriptor, name);
1095
1096     layer->m_Anchors = std::make_unique<ScopedCpuTensorHandle>(anchors);
1097
1098     return layer;
1099 }
1100
1101 IConnectableLayer* Network::AddPermuteLayer(const PermuteDescriptor& permuteDescriptor,
1102                                             const char* name)
1103 {
1104     return m_Graph->AddLayer<PermuteLayer>(permuteDescriptor, name);
1105 }
1106
1107 IConnectableLayer* Network::AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor,
1108     const char* name)
1109 {
1110     return m_Graph->AddLayer<Pooling2dLayer>(pooling2dDescriptor, name);
1111 }
1112
1113 IConnectableLayer* Network::AddActivationLayer(const ActivationDescriptor& activationDescriptor,
1114     const char* name)
1115 {
1116     return m_Graph->AddLayer<ActivationLayer>(activationDescriptor, name);
1117 }
1118
1119 IConnectableLayer* Network::AddNormalizationLayer(const NormalizationDescriptor&
1120 normalizationDescriptor,
1121     const char* name)
1122 {
1123     return m_Graph->AddLayer<NormalizationLayer>(normalizationDescriptor, name);
1124 }
1125
1126 IConnectableLayer* Network::AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor,
1127     const char* name)
1128 {
1129     return m_Graph->AddLayer<SoftmaxLayer>(softmaxDescriptor, name);
1130 }
1131
1132 IConnectableLayer* Network::AddSplitterLayer(const ViewsDescriptor& splitterDescriptor,
1133     const char* name)
1134 {
1135     return m_Graph->AddLayer<SplitterLayer>(splitterDescriptor, name);
1136 }
1137
1138 IConnectableLayer* Network::AddMaximumLayer(const char* name)
1139 {
1140     return m_Graph->AddLayer<MaximumLayer>(name);
1141 }
1142
1143 IConnectableLayer* Network::AddMinimumLayer(const char* name)
1144 {
1145     return m_Graph->AddLayer<MinimumLayer>(name);
1146 }
1147
1148 IConnectableLayer* Network::AddMergerLayer(const MergerDescriptor& mergerDescriptor,
1149                                            const char* name)
1150 {
1151     return AddConcatLayer(mergerDescriptor, name);
1152 }
1153
1154 IConnectableLayer* Network::AddAbsLayer(const char * name)
1155 {
1156     return m_Graph->AddLayer<AbsLayer>(name);
1157 }
1158
1159 IConnectableLayer* Network::AddAdditionLayer(const char* name)
1160 {
1161     return m_Graph->AddLayer<AdditionLayer>(name);
1162 }
1163
1164 IConnectableLayer* Network::AddMultiplicationLayer(const char* name)
1165 {
1166     return m_Graph->AddLayer<MultiplicationLayer>(name);
1167 }
1168
1169 IConnectableLayer* Network::AddOutputLayer(LayerBindingId id, const char* name)
1170 {
1171     return m_Graph->AddLayer<OutputLayer>(id, name);
1172 }
1173
1174 IConnectableLayer* Network::AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc,
1175                                                        const ConstTensor&                  mean,
1176                                                        const ConstTensor&                  variance,
1177                                                        const ConstTensor&                  beta,
1178                                                        const ConstTensor&                  gamma,
1179                                                        const char*                         name)
1180 {
1181     const auto layer = m_Graph->AddLayer<BatchNormalizationLayer>(desc, name);
1182
1183     layer->m_Mean = std::make_unique<ScopedCpuTensorHandle>(mean);
1184     layer->m_Variance = std::make_unique<ScopedCpuTensorHandle>(variance);
1185     layer->m_Beta = std::make_unique<ScopedCpuTensorHandle>(beta);
1186     layer->m_Gamma = std::make_unique<ScopedCpuTensorHandle>(gamma);
1187
1188     return layer;
1189 }
1190
1191 IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor& descriptor,
1192                                                    const char* name)
1193 {
1194     ResizeDescriptor resizeDescriptor;
1195     resizeDescriptor.m_Method       = ResizeMethod::Bilinear;
1196     resizeDescriptor.m_DataLayout   = descriptor.m_DataLayout;
1197     resizeDescriptor.m_TargetWidth  = descriptor.m_TargetWidth;
1198     resizeDescriptor.m_TargetHeight = descriptor.m_TargetHeight;
1199
1200     return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
1201 }
1202
1203 IConnectableLayer* Network::AddResizeLayer(const ResizeDescriptor&
1204 resizeDescriptor, const char* name)
1205 {
1206     return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
1207 }
1208
1209 IConnectableLayer* Network::AddL2NormalizationLayer(const L2NormalizationDescriptor& desc,
1210                                                     const char* name)
1211 {
1212     return m_Graph->AddLayer<L2NormalizationLayer>(desc, name);
1213 }
1214
1215 IConnectableLayer* Network::AddConstantLayer(const ConstTensor& input, const char* name)
1216 {
1217     auto layer = m_Graph->AddLayer<ConstantLayer>(name);
1218
1219     layer->m_LayerOutput = std::make_unique<ScopedCpuTensorHandle>(input);
1220
1221     return layer;
1222 }
1223
1224 IConnectableLayer* Network::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor,
1225                                             const char* name)
1226 {
1227     return m_Graph->AddLayer<ReshapeLayer>(reshapeDescriptor, name);
1228 }
1229
1230 IConnectableLayer* Network::AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
1231                                                    const char* name)
1232 {
1233     return m_Graph->AddLayer<SpaceToBatchNdLayer>(spaceToBatchNdDescriptor, name);
1234 }
1235
1236 IConnectableLayer* Network::AddSpaceToDepthLayer(const SpaceToDepthDescriptor& spaceToDepthDescriptor,
1237                                                  const char* name)
1238 {
1239     return m_Graph->AddLayer<SpaceToDepthLayer>(spaceToDepthDescriptor, name);
1240 }
1241
1242 IConnectableLayer* Network::AddFloorLayer(const char* name)
1243 {
1244     return m_Graph->AddLayer<FloorLayer>(name);
1245 }
1246
1247 IConnectableLayer* Network::AddLstmLayer(const LstmDescriptor&  descriptor,
1248                                          const LstmInputParams& params,
1249                                          const char* name)
1250 {
1251     const auto layer = m_Graph->AddLayer<LstmLayer>(descriptor, name);
1252
1253     //Lstm Basic Parameters
1254     layer->m_BasicParameters.m_InputToForgetWeights =
1255         std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
1256     layer->m_BasicParameters.m_InputToCellWeights =
1257         std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
1258     layer->m_BasicParameters.m_InputToOutputWeights =
1259         std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
1260     layer->m_BasicParameters.m_RecurrentToForgetWeights =
1261         std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
1262     layer->m_BasicParameters.m_RecurrentToCellWeights =
1263         std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
1264     layer->m_BasicParameters.m_RecurrentToOutputWeights =
1265         std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
1266     layer->m_BasicParameters.m_ForgetGateBias =
1267             std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
1268     layer->m_BasicParameters.m_CellBias =
1269             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
1270     layer->m_BasicParameters.m_OutputGateBias =
1271             std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
1272
1273     //Lstm Cifg parameters
1274     if(!descriptor.m_CifgEnabled)
1275     {
1276         if(params.m_InputToInputWeights == nullptr)
1277         {
1278             throw InvalidArgumentException("AddLstmLayer: Input To Input Weights cannot be NULL");
1279         }
1280         if(params.m_RecurrentToInputWeights == nullptr)
1281         {
1282             throw InvalidArgumentException(
1283                     "AddLstmLayer: Recurrent To Input Weights cannot be NULL");
1284         }
1285         if(params.m_InputGateBias == nullptr)
1286         {
1287             throw InvalidArgumentException("AddLstmLayer: Input Gate Bias cannot be NULL");
1288         }
1289         layer->m_CifgParameters.m_InputToInputWeights =
1290             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
1291         layer->m_CifgParameters.m_RecurrentToInputWeights =
1292             std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
1293         // In the VTS tests, cell-to-input weights may be null, even if the other CIFG params are not.
1294         if(params.m_CellToInputWeights != nullptr)
1295         {
1296             layer->m_CifgParameters.m_CellToInputWeights =
1297                     std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
1298         }
1299         layer->m_CifgParameters.m_InputGateBias =
1300             std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
1301     }
1302
1303     //Lstm projection parameters
1304     if(descriptor.m_ProjectionEnabled)
1305     {
1306         if(params.m_ProjectionWeights == nullptr)
1307         {
1308             throw InvalidArgumentException("AddLstmLayer: Projection Weights cannot be NULL");
1309         }
1310         layer->m_ProjectionParameters.m_ProjectionWeights =
1311             std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
1312         if(params.m_ProjectionBias != nullptr)
1313         {
1314             layer->m_ProjectionParameters.m_ProjectionBias =
1315                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
1316         }
1317     }
1318
1319     //Lstm Peephole params
1320     if(descriptor.m_PeepholeEnabled)
1321     {
1322         if(params.m_CellToForgetWeights == nullptr)
1323         {
1324             throw InvalidArgumentException("AddLstmLayer: Cell To Forget Weights cannot be NULL");
1325         }
1326         if(params.m_CellToOutputWeights == nullptr)
1327         {
1328             throw InvalidArgumentException("AddLstmLayer: Cell To Output Weights cannot be NULL");
1329         }
1330         layer->m_PeepholeParameters.m_CellToForgetWeights =
1331             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
1332         layer->m_PeepholeParameters.m_CellToOutputWeights =
1333             std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
1334     }
1335
1336     //Lstm Layer Normalization params
1337     if(descriptor.m_LayerNormEnabled)
1338     {
1339         if(!descriptor.m_CifgEnabled)
1340         {
1341             if(params.m_InputLayerNormWeights == nullptr)
1342             {
1343                 throw InvalidArgumentException("AddLstmLayer: Input layer normalization weights cannot be NULL");
1344             }
1345             layer->m_LayerNormParameters.m_InputLayerNormWeights =
1346                     std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputLayerNormWeights));
1347         }
1348
1349         if(params.m_ForgetLayerNormWeights == nullptr)
1350         {
1351             throw InvalidArgumentException("AddLstmLayer: Forget layer normalization weights cannot be NULL");
1352         }
1353         if(params.m_CellLayerNormWeights == nullptr)
1354         {
1355             throw InvalidArgumentException("AddLstmLayer: Cell layer normalization weights cannot be NULL");
1356         }
1357         if(params.m_OutputLayerNormWeights == nullptr)
1358         {
1359             throw InvalidArgumentException("AddLstmLayer: Output layer normalization weights cannot be NULL");
1360         }
1361         layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
1362                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetLayerNormWeights));
1363         layer->m_LayerNormParameters.m_CellLayerNormWeights =
1364                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellLayerNormWeights));
1365         layer->m_LayerNormParameters.m_OutputLayerNormWeights =
1366                 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputLayerNormWeights));
1367     }
1368     return layer;
1369 }
1370
1371 IConnectableLayer* Network::AddDivisionLayer(const char* name)
1372 {
1373     return m_Graph->AddLayer<DivisionLayer>(name);
1374 }
1375
1376 IConnectableLayer* Network::AddSubtractionLayer(const char* name)
1377 {
1378     return m_Graph->AddLayer<SubtractionLayer>(name);
1379 }
1380
1381 IConnectableLayer* Network::AddMeanLayer(const MeanDescriptor& meanDescriptor, const char* name)
1382 {
1383     return m_Graph->AddLayer<MeanLayer>(meanDescriptor,name);
1384 }
1385
1386 IConnectableLayer* Network::AddPadLayer(const PadDescriptor& padDescriptor, const char* name)
1387 {
1388     return m_Graph->AddLayer<PadLayer>(padDescriptor,name);
1389 }
1390
1391 IConnectableLayer *Network::AddQuantizeLayer(const char *name)
1392 {
1393     return m_Graph->AddLayer<QuantizeLayer>(name);
1394 }
1395
1396 IConnectableLayer* Network::AddDequantizeLayer(const char* name)
1397 {
1398     return m_Graph->AddLayer<DequantizeLayer>(name);
1399 }
1400
1401 IConnectableLayer* Network::AddStridedSliceLayer(const StridedSliceDescriptor& stridedSliceDescriptor,
1402                                                  const char* name)
1403 {
1404     return m_Graph->AddLayer<StridedSliceLayer>(stridedSliceDescriptor, name);
1405 }
1406
1407 IConnectableLayer* Network::AddGreaterLayer(const char* name)
1408 {
1409     return m_Graph->AddLayer<GreaterLayer>(name);
1410 }
1411
1412 IConnectableLayer* Network::AddEqualLayer(const char* name)
1413 {
1414     return m_Graph->AddLayer<EqualLayer>(name);
1415 }
1416
1417 IConnectableLayer* Network::AddRsqrtLayer(const char * name)
1418 {
1419     return m_Graph->AddLayer<RsqrtLayer>(name);
1420 }
1421
1422 IConnectableLayer* Network::AddGatherLayer(const char* name)
1423 {
1424     return m_Graph->AddLayer<GatherLayer>(name);
1425 }
1426
1427 IConnectableLayer* Network::AddMergeLayer(const char* name)
1428 {
1429     return m_Graph->AddLayer<MergeLayer>(name);
1430 }
1431
1432 IConnectableLayer* Network::AddSwitchLayer(const char* name)
1433 {
1434     return m_Graph->AddLayer<SwitchLayer>(name);
1435 }
1436
1437 IConnectableLayer* Network::AddPreluLayer(const char* name)
1438 {
1439     return m_Graph->AddLayer<PreluLayer>(name);
1440 }
1441
1442 IConnectableLayer* Network::AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor& descriptor,
1443                                                            const ConstTensor& weights,
1444                                                            const Optional<ConstTensor>& biases,
1445                                                            const char* name)
1446 {
1447     if (descriptor.m_BiasEnabled && !biases.has_value())
1448     {
1449         throw InvalidArgumentException("AddTransposeConvolution2dLayer: Biases cannot be empty");
1450     }
1451
1452     const auto layer = m_Graph->AddLayer<TransposeConvolution2dLayer>(descriptor, name);
1453
1454     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1455
1456     if (descriptor.m_BiasEnabled)
1457     {
1458         layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1459     }
1460
1461     return layer;
1462 }
1463
1464 IConnectableLayer* Network::AddStackLayer(const StackDescriptor& stackDescriptor,
1465                                           const char* name)
1466 {
1467     return m_Graph->AddLayer<StackLayer>(stackDescriptor, name);
1468 }
1469
1470 IConnectableLayer* Network::AddQuantizedLstmLayer(const QuantizedLstmInputParams& params,
1471                                                   const char* name)
1472 {
1473     const auto layer = m_Graph->AddLayer<QuantizedLstmLayer>(name);
1474
1475     // InputToX weights
1476     layer->m_QuantizedLstmParameters.m_InputToInputWeights =
1477             std::make_unique<ScopedCpuTensorHandle>(params.GetInputToInputWeights());
1478     layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
1479             std::make_unique<ScopedCpuTensorHandle>(params.GetInputToForgetWeights());
1480     layer->m_QuantizedLstmParameters.m_InputToCellWeights =
1481             std::make_unique<ScopedCpuTensorHandle>(params.GetInputToCellWeights());
1482     layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
1483             std::make_unique<ScopedCpuTensorHandle>(params.GetInputToOutputWeights());
1484
1485     // RecurrentToX weights
1486     layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
1487             std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToInputWeights());
1488     layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
1489             std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToForgetWeights());
1490     layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
1491             std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToCellWeights());
1492     layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
1493             std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToOutputWeights());
1494
1495     // Bias
1496     layer->m_QuantizedLstmParameters.m_InputGateBias =
1497             std::make_unique<ScopedCpuTensorHandle>(params.GetInputGateBias());
1498     layer->m_QuantizedLstmParameters.m_ForgetGateBias =
1499             std::make_unique<ScopedCpuTensorHandle>(params.GetForgetGateBias());
1500     layer->m_QuantizedLstmParameters.m_CellBias =
1501             std::make_unique<ScopedCpuTensorHandle>(params.GetCellBias());
1502     layer->m_QuantizedLstmParameters.m_OutputGateBias =
1503             std::make_unique<ScopedCpuTensorHandle>(params.GetOutputGateBias());
1504
1505     return layer;
1506 }
1507
1508 void Network::Accept(ILayerVisitor& visitor) const
1509 {
1510     for (auto layer : GetGraph())
1511     {
1512         layer->Accept(visitor);
1513     };
1514 }
1515
1516 OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph)
1517     : m_Graph(std::move(graph))
1518 {
1519 }
1520
1521 OptimizedNetwork::~OptimizedNetwork()
1522 {
1523 }
1524
1525 } // namespace armnn