2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
9 #include "DeviceSpec.hpp"
10 #include "Optimizer.hpp"
11 #include "SubgraphViewSelector.hpp"
12 #include "BackendSettings.hpp"
13 #include "optimizations/All.hpp"
15 #include <backendsCommon/CpuTensorHandle.hpp>
16 #include <backendsCommon/WorkloadFactory.hpp>
17 #include <backendsCommon/BackendRegistry.hpp>
18 #include <backendsCommon/IBackendInternal.hpp>
19 #include <backendsCommon/TensorHandleFactoryRegistry.hpp>
21 #include <armnn/Exceptions.hpp>
22 #include <armnn/Utils.hpp>
23 #include <armnn/TypesUtils.hpp>
32 #include <boost/assert.hpp>
33 #include <boost/format.hpp>
34 #include <boost/log/trivial.hpp>
35 #include <boost/numeric/conversion/converter_policies.hpp>
36 #include <boost/cast.hpp>
41 armnn::INetwork* INetwork::CreateRaw()
46 armnn::INetworkPtr INetwork::Create()
48 return INetworkPtr(CreateRaw(), &INetwork::Destroy);
51 void INetwork::Destroy(INetwork* network)
53 delete boost::polymorphic_downcast<Network*>(network);
56 Status Network::PrintGraph()
59 return Status::Success;
62 void IOptimizedNetwork::Destroy(IOptimizedNetwork* network)
64 delete boost::polymorphic_downcast<OptimizedNetwork*>(network);
67 Status OptimizedNetwork::PrintGraph()
70 return Status::Success;
73 Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const
75 return m_Graph->SerializeToDot(stream);
80 void ReportError(const std::string& errorMessage,
81 Optional<std::vector<std::string>&> errorMessages)
83 std::stringstream fullErrorMessage;
84 fullErrorMessage << "ERROR: " << errorMessage;
85 BOOST_LOG_TRIVIAL(warning) << fullErrorMessage.str();
88 errorMessages.value().push_back(fullErrorMessage.str());
92 void ReportWarning(const std::string& warningMessage,
93 Optional<std::vector<std::string>&> warningMessages)
95 std::stringstream fullWarningMessage;
96 fullWarningMessage << "WARNING: " << warningMessage;
97 BOOST_LOG_TRIVIAL(warning) << fullWarningMessage.str();
100 warningMessages.value().push_back(fullWarningMessage.str());
104 bool CheckScaleSetOnQuantizedType(Layer* layer, Optional<std::vector<std::string>&> errMessages)
106 bool noErrors = true;
107 unsigned int numOutputs = layer->GetNumOutputSlots();
108 for (unsigned int i = 0; i < numOutputs; i++) {
109 OutputSlot& outputSlot = layer->GetOutputSlot(i);
110 TensorInfo info = outputSlot.GetTensorInfo();
111 if (DataType::QuantisedAsymm8 == info.GetDataType()) {
112 if (0.f == info.GetQuantizationScale()) {
114 std::stringstream ss;
115 ss << "output " << i << " of layer " << GetLayerTypeAsCString(layer->GetType())
116 << " (" << layer->GetNameStr() << ") is of type"
117 << " Quantized 8 bit but its scale parameter has not been set";
118 ReportError(ss.str(), errMessages);
120 // Softmax under QuantisedAsymm8 must always be scale (1.0f/256.0f) and offset 0
121 if ((info.GetQuantizationScale() != (1.0f / 256.0f) ||
122 info.GetQuantizationOffset() != 0) &&
123 layer->GetType() == armnn::LayerType::Softmax)
125 std::stringstream ss;
126 ss << "Quantization parameters for Softmax layer (Scale: " <<
127 info.GetQuantizationScale() << " and Offset: " << info.GetQuantizationOffset() <<
128 ") are incorrect and have been updated to Scale: 0.00390625 and Offset: 0";
129 BOOST_LOG_TRIVIAL(warning) << ss.str();
130 info.SetQuantizationScale((1.0f /256.0f));
131 info.SetQuantizationOffset(0);
132 outputSlot.SetTensorInfo(info);
139 OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
140 BackendSettings& backendSettings,
141 Graph::Iterator& firstLayer,
142 Graph::Iterator& lastLayer,
143 Optional<std::vector<std::string>&> errMessages)
145 OptimizationResult result;
147 // Helper lambda to compose meaningful error message before returning with error
148 auto ReturnWithError = [&](const Layer* layer)
150 std::stringstream failureMsg;
151 failureMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
152 << " is not supported on any preferred backend " << backendSettings.m_PreferredBackends;
153 ReportError(failureMsg.str(), errMessages);
155 result.m_Error = true;
159 auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
160 if (availablePreferredBackends.empty())
162 std::stringstream failureMsg;
163 failureMsg << "No preferred backends are available";
164 ReportError(failureMsg.str(), errMessages);
166 result.m_Error = true;
170 for (auto it = firstLayer; it != lastLayer; ++it)
173 DataType dataType = layer->GetDataType();
174 std::string reasonIfUnsupported;
176 if (!CheckScaleSetOnQuantizedType(layer, errMessages))
178 // don't bomb immediately, find all the quantized outputs
179 // which haven't had a scale set and report them all back.
180 result.m_Error = true;
183 for (const auto& backend : availablePreferredBackends)
185 // need to set the compute device on the layer
186 // before we can check if it is supported
187 layer->SetBackendId(backend);
188 if (!IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported))
190 if (dataType == DataType::Float16)
192 if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
193 && layer->GetType() != LayerType::ConvertFp32ToFp16
194 && layer->GetType() != LayerType::ConvertFp16ToFp32)
196 // Insert FP16 -> FP32 conversion layer before current layer
197 std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers =
198 InsertConvertFp16ToFp32LayersBefore(optNetObjPtr->GetGraph(), *layer);
200 // Insert FP32 -> FP16 conversion layer after current layer
201 std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers =
202 InsertConvertFp32ToFp16LayersAfter(optNetObjPtr->GetGraph(), *layer);
204 // Assign a supported backend to the newly introduced conversion layers
205 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
207 bool supportedBackendFound = false;
208 std::string reasonIfUnsupported;
210 // Try preferred backend first
211 layer->SetBackendId(preferredBackend);
212 if (IWorkloadFactory::IsLayerSupported(*layer,
214 reasonIfUnsupported))
216 supportedBackendFound = true;
220 for (const auto& backend : availablePreferredBackends)
222 // Skip preferred backend (we already determined that it is not supported)
223 if (backend == preferredBackend)
228 layer->SetBackendId(backend);
229 if (IWorkloadFactory::IsLayerSupported(*layer,
231 reasonIfUnsupported))
233 supportedBackendFound = true;
239 return supportedBackendFound;
242 for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
244 if (!AssignFirstSupportedBackend(convertLayer, backend))
246 return ReturnWithError(convertLayer);
250 for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
252 if (!AssignFirstSupportedBackend(convertLayer, backend))
254 return ReturnWithError(convertLayer);
262 std::stringstream warningMsg;
263 warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
264 << " is not supported on requested backend " << layer->GetBackendId().Get()
265 << " for data type " << GetDataTypeName(dataType)
266 << " (reason: " << reasonIfUnsupported
267 << "), falling back to the next backend.";
268 ReportWarning(warningMsg.str(), errMessages);
273 backendSettings.m_SelectedBackends.insert(backend);
278 // If the layer is unsupported by any devices, log and return a null network.
281 // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
282 // fallback we should set the compute device on the layer to CpuRef (these are not
283 // available as accelerated operations, or are only available under certain
284 // conditions, currently they comprise MemCopy, Constant, Permute)
285 armnn::LayerType layerType = layer->GetType();
286 if (!backendSettings.IsCpuRefUsed() && (layerType == armnn::LayerType::MemCopy ||
287 layerType == armnn::LayerType::Constant ||
288 layerType == armnn::LayerType::Permute))
290 BackendId cpuBackendId(armnn::Compute::CpuRef);
291 layer->SetBackendId(cpuBackendId);
292 backendSettings.m_SelectedBackends.insert(cpuBackendId);
296 return ReturnWithError(layer);
304 OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
305 BackendSettings& backendSettings,
306 SubgraphView& subgraph,
307 Optional<std::vector<std::string>&> errMessages)
309 Graph::Iterator firstLayer = subgraph.begin();
310 Graph::Iterator lastLayer = subgraph.end();
311 return AssignBackends(optNetObjPtr,
318 BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry& handleFactoryRegistry,
319 BackendSettings& backendSettings)
321 BackendsMap backends;
322 auto const& backendRegistry = BackendRegistryInstance();
323 for (auto&& selectedBackend : backendSettings.m_SupportedBackends)
325 auto backendFactory = backendRegistry.GetFactory(selectedBackend);
326 auto backendObjPtr = backendFactory();
327 BOOST_ASSERT(backendObjPtr);
329 backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry);
331 backends[backendObjPtr->GetId()] = std::move(backendObjPtr);
337 OptimizationResult ApplyBackendOptimizations(OptimizedNetwork* optNetObjPtr,
338 BackendSettings& backendSettings,
339 BackendsMap& backends,
340 Optional<std::vector<std::string>&> errMessages)
342 BOOST_ASSERT(optNetObjPtr);
344 OptimizationResult result;
346 // Get the optimized graph
347 Graph& optGraph = optNetObjPtr->GetGraph();
349 // Run backend specific optimizations
350 for (auto&& selectedBackend : backendSettings.m_SelectedBackends)
352 auto backendObjPtr = backends.find(selectedBackend)->second.get();
353 BOOST_ASSERT(backendObjPtr);
355 // Select sub-graphs based on backend
356 SubgraphViewSelector::Subgraphs subgraphs =
357 SubgraphViewSelector::SelectSubgraphs(optGraph,
358 // Select layers assigned to the requested backend
359 [&backendObjPtr](const Layer& layer)
361 return layer.GetType() != LayerType::Input &&
362 layer.GetType() != LayerType::Output &&
363 layer.GetBackendId() == backendObjPtr->GetId();
365 if (subgraphs.empty())
367 // No sub-graphs found, try with next selected backend
371 // Try to optimize each sub-graph
372 for (auto& subgraph : subgraphs)
374 // Try to optimize the current sub-graph
375 OptimizationViews optimizationViews = backendObjPtr->OptimizeSubgraphView(*subgraph);
376 BOOST_ASSERT(optimizationViews.Validate(*subgraph));
378 // Optimization attempted, check the resulting optimized sub-graph
379 for (auto& substitution : optimizationViews.GetSubstitutions())
381 // Sub-graph optimized, substitute the sub-graph with the new optimized one in the main optimized graph
382 SubgraphView& replacementSubgraph = substitution.m_ReplacementSubgraph;
383 SubgraphView& substitutableSubgraph = substitution.m_SubstitutableSubgraph;
384 optGraph.SubstituteSubgraph(substitutableSubgraph, replacementSubgraph);
386 // Assign the current backend to the optimized sub-graph
387 std::for_each(replacementSubgraph.begin(), replacementSubgraph.end(), [&selectedBackend](Layer* l)
390 l->SetBackendId(selectedBackend);
394 if (!optimizationViews.GetFailedSubgraphs().empty())
396 std::stringstream warningMsg;
397 warningMsg << "Some sub-graph(s) failed to optimized on " << backendObjPtr->GetId() << " backend.";
398 ReportWarning(warningMsg.str(), errMessages);
400 // Failed to optimize the given sub-graph, re-assign the sub-graph layers to other available backends
401 BackendSettings settingsCopy(backendSettings);
402 if (!backendObjPtr->GetId().IsCpuRef())
404 // Add the current backend to the list of backends to ignore
405 settingsCopy.m_IgnoredBackends.insert(backendObjPtr->GetId());
409 for (auto& failedSubgraph : optimizationViews.GetFailedSubgraphs())
411 // An error occurred: the optimization was attempted but not performed, try different backends
412 std::stringstream subgraphMsg;
413 subgraphMsg << "Re-assigning backends to " << failedSubgraph.GetLayers().size()
414 << " layers inside sub-graph " << count++;
415 ReportWarning(subgraphMsg.str(), errMessages);
417 OptimizationResult reassignmentResult = AssignBackends(optNetObjPtr,
421 if (reassignmentResult.m_Error)
423 // Failed to re-assign one of the remaining backends to each layer of the sub-graph
424 result.m_Error = true;
435 bool RequiresCopy(ITensorHandleFactory::FactoryId src,
436 ITensorHandleFactory::FactoryId dst,
437 TensorHandleFactoryRegistry& registry)
441 ITensorHandleFactory* srcFactory = registry.GetFactory(src);
442 ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
444 if (srcFactory && dstFactory &&
445 (srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0)
454 // Find the handle factory for the input layer which results in fewest required copies.
455 ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backends,
457 TensorHandleFactoryRegistry& registry)
459 Layer& layer = slot.GetOwningLayer();
460 BOOST_ASSERT(layer.GetType() == LayerType::Input);
462 // Explicitly select the tensorhandle factory for InputLayer because the rules for it are slightly different. It
463 // doesn't matter which backend it is assigned to because they all use the same implementation, which
464 // requires Map/Unmap support. This means that, so long as the handle type supports map/unmap semantics, we can
465 // select a factory with maximum compatibility with the layers connected to the InputLayer.
467 // First ensure the from backends can support the TensorHandeAPI
468 auto frmBackend = backends.find(layer.GetBackendId());
469 if (frmBackend == backends.end() ||
470 !frmBackend->second->SupportsTensorAllocatorAPI())
472 return ITensorHandleFactory::LegacyFactoryId;
475 // Go through all connections to the output slot and determine the TensorHandleFactory which results in the
477 std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
479 ITensorHandleFactory::FactoryId topChoice = ITensorHandleFactory::LegacyFactoryId;
481 for (auto&& connection : slot.GetConnections())
483 const Layer& connectedLayer = connection->GetOwningLayer();
485 auto toBackend = backends.find(connectedLayer.GetBackendId());
486 BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
488 if (!toBackend->second.get()->SupportsTensorAllocatorAPI())
490 // The destination backend does not support the tensor allocator API, move to the next one
494 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
495 for (auto&& dst : dstPrefs)
497 // Input layers use the mem copy workload or import, so the selected factory must
498 // support either the map/unmap API or Import API
499 ITensorHandleFactory* factory = registry.GetFactory(dst);
500 if (!factory->SupportsMapUnmap() &&
501 !CheckFlag(factory->GetImportFlags(), MemorySource::Malloc)) // Just support cpu mem imports for now
503 // The current tensor handle factory does not support the map/unmap or import
504 // strategy, move to the next one
508 auto it = factoryScores.find(dst);
509 if (it == factoryScores.end())
511 // Add new score to the table
512 factoryScores[dst] = 0;
513 if (topChoice == ITensorHandleFactory::LegacyFactoryId)
520 // Increase the score
521 factoryScores[dst]++;
523 // Track the best option
524 if (factoryScores[dst] > topScore)
526 topScore = factoryScores[dst];
536 // Find the handle factory for the output layer which results in fewest required copies.
537 ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput(BackendsMap& backends,
539 TensorHandleFactoryRegistry& registry)
541 return ITensorHandleFactory::DeferredFactoryId;
544 // For all handle factories supported on the source backend, we wish to find the one which requires the fewest copies
545 // when considering all connections.
546 ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends,
547 OutputSlot& outputSlot,
548 TensorHandleFactoryRegistry& registry)
550 // First ensure the from backends can support the TensorHandeAPI
551 Layer& layer = outputSlot.GetOwningLayer();
552 auto frmBackend = backends.find(layer.GetBackendId());
553 if (frmBackend == backends.end() ||
554 !frmBackend->second->SupportsTensorAllocatorAPI())
556 return ITensorHandleFactory::LegacyFactoryId;
559 // Connections to Output Layers requires support for map/unmap on the TensorHandle.
560 bool requiresMapUnmap = false;
561 for (auto&& connection : outputSlot.GetConnections())
563 const Layer& connectedLayer = connection->GetOwningLayer();
564 if (connectedLayer.GetType() == LayerType::Output)
566 requiresMapUnmap = true;
570 IBackendInternal* srcBackend = frmBackend->second.get();
571 auto srcPrefs = srcBackend->GetHandleFactoryPreferences();
573 // Initialize the scores
574 std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
575 for (auto&& pref : srcPrefs)
577 if (requiresMapUnmap) // Only consider factories that support map/unmap if required
579 ITensorHandleFactory* factory = registry.GetFactory(pref);
580 if (!factory->SupportsMapUnmap())
582 // The current tensor handle factory does not support the map/unmap strategy, move to the next one
587 auto it = factoryScores.find(pref);
588 if (it == factoryScores.end())
590 // Add new score to the table
591 factoryScores[pref] = 0;
595 // Score each handle factory based on how many times it requires copies on the slot connections
596 for (auto&& connection : outputSlot.GetConnections())
598 const Layer& connectedLayer = connection->GetOwningLayer();
600 auto toBackend = backends.find(connectedLayer.GetBackendId());
601 BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
603 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
604 for (auto&& src : srcPrefs)
606 if (factoryScores.find(src) == factoryScores.end()) // Don't consider excluded factories
611 for (auto&& dst : dstPrefs)
613 if (RequiresCopy(src, dst, registry))
615 // Copy avoided, increase the score
616 factoryScores[src]++;
623 // Find the lowest score
624 int minScore = std::numeric_limits<int>::max();
625 for (auto it : factoryScores)
627 minScore = std::min(minScore, it.second);
630 // Collect factories matching the best(lowest) score
631 std::vector<ITensorHandleFactory::FactoryId> optimalFactories;
632 for (auto it : factoryScores)
634 if (it.second == minScore)
636 optimalFactories.push_back(it.first);
640 // For all compatible Factories matching the best score, find the preferred one for the current layer.
641 for (auto&& srcPref : srcPrefs)
643 for (auto&& comp : optimalFactories)
652 return ITensorHandleFactory::LegacyFactoryId;
655 EdgeStrategy CalculateEdgeStrategy(BackendsMap& backends,
656 ITensorHandleFactory::FactoryId srcFactoryId,
658 const Layer& connectedLayer,
659 TensorHandleFactoryRegistry& registry)
661 auto toBackend = backends.find(connectedLayer.GetBackendId());
662 BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
664 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
666 // Legacy API check for backward compatibility
667 if (srcFactoryId == ITensorHandleFactory::LegacyFactoryId || dstPrefs.empty())
669 if (layer.GetBackendId() != connectedLayer.GetBackendId())
671 return EdgeStrategy::CopyToTarget;
675 return EdgeStrategy::DirectCompatibility;
679 // TensorHandleFactory API present, so perform more sophisticated strategies.
680 // Dst Output layers don't require copy because they use import or map/unmap
681 if (connectedLayer.GetType() == LayerType::Output)
683 return EdgeStrategy::DirectCompatibility;
686 // Search for direct match in prefs
687 for (auto&& pref : dstPrefs)
689 if (pref == srcFactoryId)
691 return EdgeStrategy::DirectCompatibility;
695 // Search for export/import options
696 ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
697 if (srcFactory->GetExportFlags() != 0)
699 for (auto&& pref : dstPrefs)
701 ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
702 if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
704 return EdgeStrategy::ExportToTarget;
709 // Search for copy options via map/unmap
710 if (srcFactory->SupportsMapUnmap())
712 for (auto&& pref : dstPrefs)
714 ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
715 if (dstFactory->SupportsMapUnmap())
717 return EdgeStrategy::CopyToTarget;
722 return EdgeStrategy::Undefined;
725 // Select the TensorHandleFactories and the corresponding memory strategy
726 OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
727 BackendsMap& backends,
728 TensorHandleFactoryRegistry& registry,
729 Optional<std::vector<std::string>&> errMessages)
731 OptimizationResult result;
733 optGraph.ForEachLayer([&backends, ®istry, &result, &errMessages](Layer* layer)
737 // Lets make sure the backend is in our list of supported backends. Something went wrong during backend
738 // assignment if this check fails
739 BOOST_ASSERT(backends.find(layer->GetBackendId()) != backends.end());
741 // Check each output separately
742 for (unsigned int slotIdx = 0; slotIdx < layer->GetNumOutputSlots(); slotIdx++)
744 OutputSlot& outputSlot = layer->GetOutputSlot(slotIdx);
746 ITensorHandleFactory::FactoryId slotOption = ITensorHandleFactory::LegacyFactoryId;
748 // Calculate the factory to use which results in the fewest copies being made.
749 switch(layer->GetType())
751 case LayerType::Input:
752 slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry);
754 case LayerType::Output:
755 slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
758 slotOption = CalculateSlotOption(backends, outputSlot, registry);
761 outputSlot.SetTensorHandleFactory(slotOption);
763 // Now determine the "best" edge strategy for each connection given the slotOption.
764 unsigned int connectionIdx = 0;
765 for (auto&& connection : outputSlot.GetConnections())
767 const Layer& connectedLayer = connection->GetOwningLayer();
769 EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer, registry);
771 if (strategy == EdgeStrategy::Undefined)
773 result.m_Error = true;
776 errMessages.value().emplace_back("Could not find valid strategy required for compatibility"
777 " between backends.");
782 outputSlot.SetEdgeStrategy(connectionIdx, strategy);
792 IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
793 const std::vector<BackendId>& backendPreferences,
794 const IDeviceSpec& deviceSpec,
795 const OptimizerOptions& options,
796 Optional<std::vector<std::string>&> errMessages)
798 if (backendPreferences.empty())
800 throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified");
803 const Network& network = *boost::polymorphic_downcast<const Network*>(&inNetwork);
804 std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph());
806 auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy);
808 OptimizedNetwork* optNetObjPtr = boost::polymorphic_downcast<OptimizedNetwork*>(optNet.get());
810 // Get the optimized graph
811 Graph& optGraph = optNetObjPtr->GetGraph();
813 // Perform optimisation passes
814 using namespace optimizations;
815 Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(),
816 SquashEqualReshapeSiblings(),
817 OptimizeInversePermutes(),
820 OptimizeConsecutiveReshapes(),
821 FoldPadIntoConvolution2d()));
823 // Infer the tensor infos for all output slots. Throws an exception on failure
824 optGraph.InferTensorInfos();
826 // If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
827 if (options.m_ReduceFp32ToFp16)
829 Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
832 // Initialize backend settings
833 BackendSettings backendSettings(backendPreferences, deviceSpec);
834 if (backendSettings.GetAvailablePreferredBackends().empty())
836 std::stringstream failureMsg;
837 failureMsg << "None of the preferred backends " << backendPreferences
838 << " are supported. Current platform provides " << backendSettings.m_SupportedBackends;
839 ReportError(failureMsg.str(), errMessages);
840 return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
843 // Create a map to temporarily hold initialized backend objects
844 TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
845 BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
847 // Assign an available backend to each layer
848 Graph::Iterator firstLayer = optGraph.begin();
849 Graph::Iterator lastLayer = optGraph.end();
850 OptimizationResult assignBackendsResult = AssignBackends(optNetObjPtr,
855 if (assignBackendsResult.m_Error)
857 // Failed to assign a backend to each layer
858 return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
861 Optimizer::Pass(optGraph, MakeOptimizations(OptimizeInverseConversionsFp16(),
862 OptimizeInverseConversionsFp32()));
864 // Apply the backend-specific optimizations
865 OptimizationResult backendOptimizationResult = ApplyBackendOptimizations(optNetObjPtr,
869 if (backendOptimizationResult.m_Error)
871 // Failed to apply the backend-specific optimizations
872 return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
875 // If the debug flag is set, then insert a DebugLayer after each layer
876 // Doing this after applying the backend optimizations as they might have changed some layers
879 Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer()));
882 // Calculate the compatibility strategies for tensor handles
883 OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph,
885 tensorHandleFactoryRegistry,
887 if (strategyResult.m_Error)
889 // Failed to apply the backend-specific optimizations
890 return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
893 // Based on the tensor handle strategy determined above, insert copy layers where required.
894 optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
897 Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
898 Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat()));
900 // Run backend specific optimizations (deprecated)
901 for (auto&& chosenBackend : backendSettings.m_SelectedBackends)
903 auto factoryFun = BackendRegistryInstance().GetFactory(chosenBackend);
904 auto backendPtr = factoryFun();
905 BOOST_ASSERT(backendPtr.get() != nullptr);
907 ARMNN_NO_DEPRECATE_WARN_BEGIN
908 auto backendSpecificOptimizations = backendPtr->GetOptimizations();
909 ARMNN_NO_DEPRECATE_WARN_END
911 if (!backendSpecificOptimizations.empty())
913 Optimizer::Pass(optNetObjPtr->GetGraph(), backendSpecificOptimizations);
921 : m_Graph(std::make_unique<Graph>())
929 IConnectableLayer* Network::AddInputLayer(LayerBindingId id, const char* name)
931 return m_Graph->AddLayer<InputLayer>(id, name);
934 IConnectableLayer* Network::AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
937 return m_Graph->AddLayer<BatchToSpaceNdLayer>(batchToSpaceNdDescriptor, name);
940 IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor,
941 const ConstTensor& weights,
942 const Optional<ConstTensor>& biases,
945 if (fullyConnectedDescriptor.m_BiasEnabled && !biases.has_value())
947 throw InvalidArgumentException("AddFullyConnectedLayer: biases cannot be empty");
950 const auto layer = m_Graph->AddLayer<FullyConnectedLayer>(fullyConnectedDescriptor, name);
952 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
954 if (fullyConnectedDescriptor.m_BiasEnabled)
956 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
962 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
963 const ConstTensor& weights,
964 const Optional<ConstTensor>& biases,
967 return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, biases, name);
970 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
971 const ConstTensor& weights,
974 Optional<ConstTensor> biases;
975 return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, biases, name);
978 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
979 const ConstTensor& weights,
980 const ConstTensor& biases,
983 Optional<ConstTensor> optionalBiases(biases);
984 return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, optionalBiases, name);
987 IConnectableLayer* Network::AddConcatLayer(const ConcatDescriptor& concatDescriptor,
990 return m_Graph->AddLayer<ConcatLayer>(concatDescriptor, name);
993 IConnectableLayer* Network::AddConvolution2dLayerImpl(const Convolution2dDescriptor& convolution2dDescriptor,
994 const ConstTensor& weights,
995 const Optional<ConstTensor>& biases,
998 if (convolution2dDescriptor.m_BiasEnabled && !biases.has_value())
1000 throw InvalidArgumentException("AddConvolution2dLayer: biases cannot be empty");
1003 const auto layer = m_Graph->AddLayer<Convolution2dLayer>(convolution2dDescriptor, name);
1005 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1007 if (convolution2dDescriptor.m_BiasEnabled)
1009 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1015 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1016 const ConstTensor& weights,
1017 const Optional<ConstTensor>& biases,
1020 return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1023 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1024 const ConstTensor& weights,
1027 Optional<ConstTensor> biases;
1028 return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1031 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1032 const ConstTensor& weights,
1033 const ConstTensor& biases,
1036 Optional<ConstTensor> optionalBiases(biases);
1037 return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, optionalBiases, name);
1040 IConnectableLayer* Network::AddDepthwiseConvolution2dLayerImpl(
1041 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1042 const ConstTensor& weights,
1043 const Optional<ConstTensor>& biases,
1046 if (convolution2dDescriptor.m_BiasEnabled && !biases.has_value())
1048 throw InvalidArgumentException("AddDepthwiseConvolution2dLayer: biases cannot be empty");
1051 const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name);
1053 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1055 if (convolution2dDescriptor.m_BiasEnabled)
1057 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1063 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1064 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1065 const ConstTensor& weights,
1066 const Optional<ConstTensor>& biases,
1069 return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1072 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1073 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1074 const ConstTensor& weights,
1077 Optional<ConstTensor> biases;
1078 return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1081 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1082 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1083 const ConstTensor& weights,
1084 const ConstTensor& biases,
1087 Optional<ConstTensor> optionalBiases(biases);
1088 return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, optionalBiases, name);
1091 IConnectableLayer* Network::AddDetectionPostProcessLayer(const armnn::DetectionPostProcessDescriptor& descriptor,
1092 const ConstTensor& anchors, const char* name)
1094 const auto layer = m_Graph->AddLayer<DetectionPostProcessLayer>(descriptor, name);
1096 layer->m_Anchors = std::make_unique<ScopedCpuTensorHandle>(anchors);
1101 IConnectableLayer* Network::AddPermuteLayer(const PermuteDescriptor& permuteDescriptor,
1104 return m_Graph->AddLayer<PermuteLayer>(permuteDescriptor, name);
1107 IConnectableLayer* Network::AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor,
1110 return m_Graph->AddLayer<Pooling2dLayer>(pooling2dDescriptor, name);
1113 IConnectableLayer* Network::AddActivationLayer(const ActivationDescriptor& activationDescriptor,
1116 return m_Graph->AddLayer<ActivationLayer>(activationDescriptor, name);
1119 IConnectableLayer* Network::AddNormalizationLayer(const NormalizationDescriptor&
1120 normalizationDescriptor,
1123 return m_Graph->AddLayer<NormalizationLayer>(normalizationDescriptor, name);
1126 IConnectableLayer* Network::AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor,
1129 return m_Graph->AddLayer<SoftmaxLayer>(softmaxDescriptor, name);
1132 IConnectableLayer* Network::AddSplitterLayer(const ViewsDescriptor& splitterDescriptor,
1135 return m_Graph->AddLayer<SplitterLayer>(splitterDescriptor, name);
1138 IConnectableLayer* Network::AddMaximumLayer(const char* name)
1140 return m_Graph->AddLayer<MaximumLayer>(name);
1143 IConnectableLayer* Network::AddMinimumLayer(const char* name)
1145 return m_Graph->AddLayer<MinimumLayer>(name);
1148 IConnectableLayer* Network::AddMergerLayer(const MergerDescriptor& mergerDescriptor,
1151 return AddConcatLayer(mergerDescriptor, name);
1154 IConnectableLayer* Network::AddAbsLayer(const char * name)
1156 return m_Graph->AddLayer<AbsLayer>(name);
1159 IConnectableLayer* Network::AddAdditionLayer(const char* name)
1161 return m_Graph->AddLayer<AdditionLayer>(name);
1164 IConnectableLayer* Network::AddMultiplicationLayer(const char* name)
1166 return m_Graph->AddLayer<MultiplicationLayer>(name);
1169 IConnectableLayer* Network::AddOutputLayer(LayerBindingId id, const char* name)
1171 return m_Graph->AddLayer<OutputLayer>(id, name);
1174 IConnectableLayer* Network::AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc,
1175 const ConstTensor& mean,
1176 const ConstTensor& variance,
1177 const ConstTensor& beta,
1178 const ConstTensor& gamma,
1181 const auto layer = m_Graph->AddLayer<BatchNormalizationLayer>(desc, name);
1183 layer->m_Mean = std::make_unique<ScopedCpuTensorHandle>(mean);
1184 layer->m_Variance = std::make_unique<ScopedCpuTensorHandle>(variance);
1185 layer->m_Beta = std::make_unique<ScopedCpuTensorHandle>(beta);
1186 layer->m_Gamma = std::make_unique<ScopedCpuTensorHandle>(gamma);
1191 IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor& descriptor,
1194 ResizeDescriptor resizeDescriptor;
1195 resizeDescriptor.m_Method = ResizeMethod::Bilinear;
1196 resizeDescriptor.m_DataLayout = descriptor.m_DataLayout;
1197 resizeDescriptor.m_TargetWidth = descriptor.m_TargetWidth;
1198 resizeDescriptor.m_TargetHeight = descriptor.m_TargetHeight;
1200 return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
1203 IConnectableLayer* Network::AddResizeLayer(const ResizeDescriptor&
1204 resizeDescriptor, const char* name)
1206 return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
1209 IConnectableLayer* Network::AddL2NormalizationLayer(const L2NormalizationDescriptor& desc,
1212 return m_Graph->AddLayer<L2NormalizationLayer>(desc, name);
1215 IConnectableLayer* Network::AddConstantLayer(const ConstTensor& input, const char* name)
1217 auto layer = m_Graph->AddLayer<ConstantLayer>(name);
1219 layer->m_LayerOutput = std::make_unique<ScopedCpuTensorHandle>(input);
1224 IConnectableLayer* Network::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor,
1227 return m_Graph->AddLayer<ReshapeLayer>(reshapeDescriptor, name);
1230 IConnectableLayer* Network::AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
1233 return m_Graph->AddLayer<SpaceToBatchNdLayer>(spaceToBatchNdDescriptor, name);
1236 IConnectableLayer* Network::AddSpaceToDepthLayer(const SpaceToDepthDescriptor& spaceToDepthDescriptor,
1239 return m_Graph->AddLayer<SpaceToDepthLayer>(spaceToDepthDescriptor, name);
1242 IConnectableLayer* Network::AddFloorLayer(const char* name)
1244 return m_Graph->AddLayer<FloorLayer>(name);
1247 IConnectableLayer* Network::AddLstmLayer(const LstmDescriptor& descriptor,
1248 const LstmInputParams& params,
1251 const auto layer = m_Graph->AddLayer<LstmLayer>(descriptor, name);
1253 //Lstm Basic Parameters
1254 layer->m_BasicParameters.m_InputToForgetWeights =
1255 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
1256 layer->m_BasicParameters.m_InputToCellWeights =
1257 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
1258 layer->m_BasicParameters.m_InputToOutputWeights =
1259 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
1260 layer->m_BasicParameters.m_RecurrentToForgetWeights =
1261 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
1262 layer->m_BasicParameters.m_RecurrentToCellWeights =
1263 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
1264 layer->m_BasicParameters.m_RecurrentToOutputWeights =
1265 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
1266 layer->m_BasicParameters.m_ForgetGateBias =
1267 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
1268 layer->m_BasicParameters.m_CellBias =
1269 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
1270 layer->m_BasicParameters.m_OutputGateBias =
1271 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
1273 //Lstm Cifg parameters
1274 if(!descriptor.m_CifgEnabled)
1276 if(params.m_InputToInputWeights == nullptr)
1278 throw InvalidArgumentException("AddLstmLayer: Input To Input Weights cannot be NULL");
1280 if(params.m_RecurrentToInputWeights == nullptr)
1282 throw InvalidArgumentException(
1283 "AddLstmLayer: Recurrent To Input Weights cannot be NULL");
1285 if(params.m_InputGateBias == nullptr)
1287 throw InvalidArgumentException("AddLstmLayer: Input Gate Bias cannot be NULL");
1289 layer->m_CifgParameters.m_InputToInputWeights =
1290 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
1291 layer->m_CifgParameters.m_RecurrentToInputWeights =
1292 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
1293 // In the VTS tests, cell-to-input weights may be null, even if the other CIFG params are not.
1294 if(params.m_CellToInputWeights != nullptr)
1296 layer->m_CifgParameters.m_CellToInputWeights =
1297 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
1299 layer->m_CifgParameters.m_InputGateBias =
1300 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
1303 //Lstm projection parameters
1304 if(descriptor.m_ProjectionEnabled)
1306 if(params.m_ProjectionWeights == nullptr)
1308 throw InvalidArgumentException("AddLstmLayer: Projection Weights cannot be NULL");
1310 layer->m_ProjectionParameters.m_ProjectionWeights =
1311 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
1312 if(params.m_ProjectionBias != nullptr)
1314 layer->m_ProjectionParameters.m_ProjectionBias =
1315 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
1319 //Lstm Peephole params
1320 if(descriptor.m_PeepholeEnabled)
1322 if(params.m_CellToForgetWeights == nullptr)
1324 throw InvalidArgumentException("AddLstmLayer: Cell To Forget Weights cannot be NULL");
1326 if(params.m_CellToOutputWeights == nullptr)
1328 throw InvalidArgumentException("AddLstmLayer: Cell To Output Weights cannot be NULL");
1330 layer->m_PeepholeParameters.m_CellToForgetWeights =
1331 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
1332 layer->m_PeepholeParameters.m_CellToOutputWeights =
1333 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
1336 //Lstm Layer Normalization params
1337 if(descriptor.m_LayerNormEnabled)
1339 if(!descriptor.m_CifgEnabled)
1341 if(params.m_InputLayerNormWeights == nullptr)
1343 throw InvalidArgumentException("AddLstmLayer: Input layer normalization weights cannot be NULL");
1345 layer->m_LayerNormParameters.m_InputLayerNormWeights =
1346 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputLayerNormWeights));
1349 if(params.m_ForgetLayerNormWeights == nullptr)
1351 throw InvalidArgumentException("AddLstmLayer: Forget layer normalization weights cannot be NULL");
1353 if(params.m_CellLayerNormWeights == nullptr)
1355 throw InvalidArgumentException("AddLstmLayer: Cell layer normalization weights cannot be NULL");
1357 if(params.m_OutputLayerNormWeights == nullptr)
1359 throw InvalidArgumentException("AddLstmLayer: Output layer normalization weights cannot be NULL");
1361 layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
1362 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetLayerNormWeights));
1363 layer->m_LayerNormParameters.m_CellLayerNormWeights =
1364 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellLayerNormWeights));
1365 layer->m_LayerNormParameters.m_OutputLayerNormWeights =
1366 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputLayerNormWeights));
1371 IConnectableLayer* Network::AddDivisionLayer(const char* name)
1373 return m_Graph->AddLayer<DivisionLayer>(name);
1376 IConnectableLayer* Network::AddSubtractionLayer(const char* name)
1378 return m_Graph->AddLayer<SubtractionLayer>(name);
1381 IConnectableLayer* Network::AddMeanLayer(const MeanDescriptor& meanDescriptor, const char* name)
1383 return m_Graph->AddLayer<MeanLayer>(meanDescriptor,name);
1386 IConnectableLayer* Network::AddPadLayer(const PadDescriptor& padDescriptor, const char* name)
1388 return m_Graph->AddLayer<PadLayer>(padDescriptor,name);
1391 IConnectableLayer *Network::AddQuantizeLayer(const char *name)
1393 return m_Graph->AddLayer<QuantizeLayer>(name);
1396 IConnectableLayer* Network::AddDequantizeLayer(const char* name)
1398 return m_Graph->AddLayer<DequantizeLayer>(name);
1401 IConnectableLayer* Network::AddStridedSliceLayer(const StridedSliceDescriptor& stridedSliceDescriptor,
1404 return m_Graph->AddLayer<StridedSliceLayer>(stridedSliceDescriptor, name);
1407 IConnectableLayer* Network::AddGreaterLayer(const char* name)
1409 return m_Graph->AddLayer<GreaterLayer>(name);
1412 IConnectableLayer* Network::AddEqualLayer(const char* name)
1414 return m_Graph->AddLayer<EqualLayer>(name);
1417 IConnectableLayer* Network::AddRsqrtLayer(const char * name)
1419 return m_Graph->AddLayer<RsqrtLayer>(name);
1422 IConnectableLayer* Network::AddGatherLayer(const char* name)
1424 return m_Graph->AddLayer<GatherLayer>(name);
1427 IConnectableLayer* Network::AddMergeLayer(const char* name)
1429 return m_Graph->AddLayer<MergeLayer>(name);
1432 IConnectableLayer* Network::AddSwitchLayer(const char* name)
1434 return m_Graph->AddLayer<SwitchLayer>(name);
1437 IConnectableLayer* Network::AddPreluLayer(const char* name)
1439 return m_Graph->AddLayer<PreluLayer>(name);
1442 IConnectableLayer* Network::AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor& descriptor,
1443 const ConstTensor& weights,
1444 const Optional<ConstTensor>& biases,
1447 if (descriptor.m_BiasEnabled && !biases.has_value())
1449 throw InvalidArgumentException("AddTransposeConvolution2dLayer: Biases cannot be empty");
1452 const auto layer = m_Graph->AddLayer<TransposeConvolution2dLayer>(descriptor, name);
1454 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1456 if (descriptor.m_BiasEnabled)
1458 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1464 IConnectableLayer* Network::AddStackLayer(const StackDescriptor& stackDescriptor,
1467 return m_Graph->AddLayer<StackLayer>(stackDescriptor, name);
1470 IConnectableLayer* Network::AddQuantizedLstmLayer(const QuantizedLstmInputParams& params,
1473 const auto layer = m_Graph->AddLayer<QuantizedLstmLayer>(name);
1476 layer->m_QuantizedLstmParameters.m_InputToInputWeights =
1477 std::make_unique<ScopedCpuTensorHandle>(params.GetInputToInputWeights());
1478 layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
1479 std::make_unique<ScopedCpuTensorHandle>(params.GetInputToForgetWeights());
1480 layer->m_QuantizedLstmParameters.m_InputToCellWeights =
1481 std::make_unique<ScopedCpuTensorHandle>(params.GetInputToCellWeights());
1482 layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
1483 std::make_unique<ScopedCpuTensorHandle>(params.GetInputToOutputWeights());
1485 // RecurrentToX weights
1486 layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
1487 std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToInputWeights());
1488 layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
1489 std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToForgetWeights());
1490 layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
1491 std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToCellWeights());
1492 layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
1493 std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToOutputWeights());
1496 layer->m_QuantizedLstmParameters.m_InputGateBias =
1497 std::make_unique<ScopedCpuTensorHandle>(params.GetInputGateBias());
1498 layer->m_QuantizedLstmParameters.m_ForgetGateBias =
1499 std::make_unique<ScopedCpuTensorHandle>(params.GetForgetGateBias());
1500 layer->m_QuantizedLstmParameters.m_CellBias =
1501 std::make_unique<ScopedCpuTensorHandle>(params.GetCellBias());
1502 layer->m_QuantizedLstmParameters.m_OutputGateBias =
1503 std::make_unique<ScopedCpuTensorHandle>(params.GetOutputGateBias());
1508 void Network::Accept(ILayerVisitor& visitor) const
1510 for (auto layer : GetGraph())
1512 layer->Accept(visitor);
1516 OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph)
1517 : m_Graph(std::move(graph))
1521 OptimizedNetwork::~OptimizedNetwork()
1525 } // namespace armnn