2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
9 #include "DeviceSpec.hpp"
10 #include "Optimizer.hpp"
11 #include "SubgraphViewSelector.hpp"
12 #include "BackendSettings.hpp"
13 #include "optimizations/All.hpp"
15 #include <backendsCommon/CpuTensorHandle.hpp>
16 #include <backendsCommon/WorkloadFactory.hpp>
17 #include <armnn/backends/IBackendInternal.hpp>
18 #include <backendsCommon/TensorHandleFactoryRegistry.hpp>
20 #include <armnn/Exceptions.hpp>
21 #include <armnn/Utils.hpp>
22 #include <armnn/TypesUtils.hpp>
23 #include <armnn/BackendRegistry.hpp>
24 #include <armnn/Logging.hpp>
25 #include <armnn/utility/Assert.hpp>
26 #include <armnn/utility/IgnoreUnused.hpp>
27 #include <armnn/utility/PolymorphicDowncast.hpp>
29 #include <ProfilingService.hpp>
38 #include <boost/format.hpp>
39 #include <boost/numeric/conversion/converter_policies.hpp>
40 #include <boost/cast.hpp>
45 armnn::INetwork* INetwork::CreateRaw()
50 armnn::INetworkPtr INetwork::Create()
52 return INetworkPtr(CreateRaw(), &INetwork::Destroy);
55 void INetwork::Destroy(INetwork* network)
57 delete PolymorphicDowncast<Network*>(network);
60 void IOptimizedNetwork::Destroy(IOptimizedNetwork* network)
62 delete PolymorphicDowncast<OptimizedNetwork*>(network);
65 Status OptimizedNetwork::PrintGraph()
68 return Status::Success;
71 Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const
73 return m_Graph->SerializeToDot(stream);
76 void ReportError(const std::string& errorMessage,
77 Optional<std::vector<std::string>&> errorMessages)
79 std::stringstream fullErrorMessage;
80 fullErrorMessage << "ERROR: " << errorMessage;
81 ARMNN_LOG(warning) << fullErrorMessage.str();
84 errorMessages.value().push_back(fullErrorMessage.str());
88 void ReportWarning(const std::string& warningMessage,
89 Optional<std::vector<std::string>&> warningMessages)
91 std::stringstream fullWarningMessage;
92 fullWarningMessage << "WARNING: " << warningMessage;
93 ARMNN_LOG(warning) << fullWarningMessage.str();
96 warningMessages.value().push_back(fullWarningMessage.str());
100 OptimizationResult ReturnWithError(OptimizationResult res,
102 const BackendSettings& backendSettings,
103 Optional<std::vector<std::string>&> errMessages)
105 std::stringstream failureMsg;
106 failureMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
107 << " is not supported on any preferred backend " << backendSettings.m_PreferredBackends;
108 ReportError(failureMsg.str(), errMessages);
115 bool CheckScaleSetOnQuantizedType(Layer* layer, Optional<std::vector<std::string>&> errMessages)
117 bool noErrors = true;
118 unsigned int numOutputs = layer->GetNumOutputSlots();
119 for (unsigned int i = 0; i < numOutputs; i++) {
120 OutputSlot& outputSlot = layer->GetOutputSlot(i);
121 TensorInfo info = outputSlot.GetTensorInfo();
122 if (DataType::QAsymmU8 == info.GetDataType()) {
123 if (0.f == info.GetQuantizationScale()) {
125 std::stringstream ss;
126 ss << "output " << i << " of layer " << GetLayerTypeAsCString(layer->GetType())
127 << " (" << layer->GetNameStr() << ") is of type"
128 << " Quantized 8 bit but its scale parameter has not been set";
129 ReportError(ss.str(), errMessages);
131 // Softmax under QuantisedAsymm8 must always be scale (1.0f/256.0f) and offset 0
132 if ((info.GetQuantizationScale() != (1.0f / 256.0f) ||
133 info.GetQuantizationOffset() != 0) &&
134 layer->GetType() == armnn::LayerType::Softmax)
136 std::stringstream ss;
137 ss << "Quantization parameters for Softmax layer (Scale: " <<
138 info.GetQuantizationScale() << " and Offset: " << info.GetQuantizationOffset() <<
139 ") are incorrect and have been updated to Scale: 0.00390625 and Offset: 0";
140 ARMNN_LOG(warning) << ss.str();
141 info.SetQuantizationScale((1.0f /256.0f));
142 info.SetQuantizationOffset(0);
143 outputSlot.SetTensorInfo(info);
150 template <typename LayerT>
151 LayerT* ConvertBf16ToFp32Weight(Layer* l)
153 LayerT* layer = PolymorphicDowncast<LayerT*>(l);
154 if ((layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
157 const TensorInfo& info = layer->m_Weight->GetTensorInfo();
159 if (info.GetDataType() == DataType::BFloat16)
161 std::vector<float> newValues(info.GetNumElements());
163 armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(
164 layer->m_Weight->template GetTensor<armnn::BFloat16>(), info.GetNumElements(), newValues.data());
166 TensorInfo newInfo(info.GetShape(), DataType::Float32);
167 ConstTensor newInput(newInfo, newValues);
168 layer->m_Weight.reset(new ScopedCpuTensorHandle(newInput));
174 OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings,
179 DataType dataTypeOut,
180 const std::vector<BackendId>& availablePreferredBackends,
181 std::string& reasonIfUnsupported,
182 Optional<std::vector<std::string>&> errMessages)
184 OptimizationResult result;
186 // Helper lambda to compose meaningful error message before returning with error
187 auto ReturnError = [&](const Layer* layer)
189 return ReturnWithError(result, layer, backendSettings, errMessages);
192 // need to set the compute device on the layer
193 // before we can check if it is supported
194 layer->SetBackendId(backend);
195 if (!IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported))
197 if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
199 if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
200 && layer->GetType() != LayerType::ConvertFp32ToFp16
201 && layer->GetType() != LayerType::ConvertFp16ToFp32)
203 // Insert FP16 -> FP32 conversion layer before current layer
204 std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers;
205 if (dataTypeIn == DataType::Float16)
207 convertFp16ToFp32Layers =
208 InsertConvertFp16ToFp32LayersBefore(graph, *layer);
211 // Insert FP32 -> FP16 conversion layer after current layer
212 std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers;
213 if (dataTypeOut == DataType::Float16)
215 convertFp32ToFp16Layers =
216 InsertConvertFp32ToFp16LayersAfter(graph, *layer);
219 // Assign a supported backend to the newly introduced conversion layers
220 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
222 bool supportedBackendFound = false;
223 std::string reasonIfUnsupported;
225 // Try preferred backend first
226 layer->SetBackendId(preferredBackend);
227 if (IWorkloadFactory::IsLayerSupported(*layer,
229 reasonIfUnsupported))
231 supportedBackendFound = true;
235 for (const auto& backend : availablePreferredBackends)
237 // Skip preferred backend (we already determined that it is not supported)
238 if (backend == preferredBackend)
243 layer->SetBackendId(backend);
244 if (IWorkloadFactory::IsLayerSupported(*layer,
246 reasonIfUnsupported))
248 supportedBackendFound = true;
254 return supportedBackendFound;
257 for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
259 if (!AssignFirstSupportedBackend(convertLayer, backend))
261 return ReturnError(convertLayer);
265 for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
267 if (!AssignFirstSupportedBackend(convertLayer, backend))
269 return ReturnError(convertLayer);
276 else if (dataTypeIn == DataType::BFloat16 || dataTypeOut == DataType::BFloat16)
278 if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
279 && layer->GetType() != LayerType::ConvertFp32ToBf16
280 && layer->GetType() != LayerType::ConvertBf16ToFp32)
282 // Insert BF16 -> FP32 conversion layer before current layer
283 std::vector<ConvertBf16ToFp32Layer*> convertBf16ToFp32Layers;
284 if (dataTypeIn == DataType::BFloat16)
286 convertBf16ToFp32Layers =
287 InsertConvertBf16ToFp32LayersBefore(graph, *layer);
288 if (layer->GetType() == LayerType::Convolution2d)
290 ConvertBf16ToFp32Weight<Convolution2dLayer>(layer);
292 else if (layer->GetType() == LayerType::FullyConnected)
294 ConvertBf16ToFp32Weight<FullyConnectedLayer>(layer);
298 // Insert FP32 -> BF16 conversion layer after current layer
299 std::vector<ConvertFp32ToBf16Layer*> convertFp32ToBf16Layers;
300 if (dataTypeOut == DataType::BFloat16)
302 convertFp32ToBf16Layers =
303 InsertConvertFp32ToBf16LayersAfter(graph, *layer);
306 // Assign a supported backend to the newly introduced conversion layers
307 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
309 bool supportedBackendFound = false;
310 std::string reasonIfUnsupported;
312 // Try preferred backend first
313 layer->SetBackendId(preferredBackend);
314 if (IWorkloadFactory::IsLayerSupported(*layer,
316 reasonIfUnsupported))
318 supportedBackendFound = true;
322 for (const auto& backend : availablePreferredBackends)
324 // Skip preferred backend (we already determined that it is not supported)
325 if (backend == preferredBackend)
330 layer->SetBackendId(backend);
331 if (IWorkloadFactory::IsLayerSupported(*layer,
333 reasonIfUnsupported))
335 supportedBackendFound = true;
341 return supportedBackendFound;
344 for (ConvertBf16ToFp32Layer* convertLayer : convertBf16ToFp32Layers)
346 if (!AssignFirstSupportedBackend(convertLayer, backend))
348 return ReturnError(convertLayer);
352 for (ConvertFp32ToBf16Layer* convertLayer : convertFp32ToBf16Layers)
354 if (!AssignFirstSupportedBackend(convertLayer, backend))
356 return ReturnError(convertLayer);
364 std::stringstream warningMsg;
365 warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
366 << " is not supported on requested backend " << layer->GetBackendId().Get()
367 << " for input data type " << GetDataTypeName(dataTypeIn)
368 << " and output data type " << GetDataTypeName(dataTypeOut)
369 << " (reason: " << reasonIfUnsupported
370 << "), falling back to the next backend.";
371 ReportWarning(warningMsg.str(), errMessages);
373 return OptimizationResult(true, false);
382 OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
383 BackendSettings& backendSettings,
384 Graph::Iterator& firstLayer,
385 Graph::Iterator& lastLayer,
386 Optional<std::vector<std::string>&> errMessages)
388 OptimizationResult result;
390 // Helper lambda to compose meaningful error message before returning with error
391 auto ReturnError = [&](const Layer* layer)
393 return ReturnWithError(result, layer, backendSettings, errMessages);
397 auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
398 if (availablePreferredBackends.empty())
400 std::stringstream failureMsg;
401 failureMsg << "No preferred backends are available";
402 ReportError(failureMsg.str(), errMessages);
404 result.m_Error = true;
408 for (auto it = firstLayer; it != lastLayer; ++it)
412 DataType dataTypeIn = layer->GetNumInputSlots() == 0 ? DataType::Float32 :
413 layer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType();
414 DataType dataTypeOut = layer->GetNumOutputSlots() == 0 ? DataType::Float32 :
415 layer->GetOutputSlot(0).GetTensorInfo().GetDataType();
417 std::string reasonIfUnsupported;
419 if (!CheckScaleSetOnQuantizedType(layer, errMessages))
421 // don't bomb immediately, find all the quantized outputs
422 // which haven't had a scale set and report them all back.
423 result.m_Error = true;
426 // First try assign layer to hint backend
427 if (layer->GetBackendHint().has_value() &&
428 backendSettings.IsBackendSupported(layer->GetBackendHint().value()) &&
429 AttemptBackendAssignment(backendSettings,
430 optNetObjPtr->GetGraph(),
432 layer->GetBackendHint().value(),
435 availablePreferredBackends,
440 backendSettings.m_SelectedBackends.insert(layer->GetBackendHint().value());
444 // Try assign layer to prefered list of backends
445 for (const auto& backend : availablePreferredBackends)
447 if (layer->GetBackendHint().has_value() &&
448 layer->GetBackendHint().value() == backend)
450 continue; //Don't re-test the backend hint
453 OptimizationResult res = AttemptBackendAssignment(backendSettings,
454 optNetObjPtr->GetGraph(),
459 availablePreferredBackends,
466 backendSettings.m_SelectedBackends.insert(backend);
469 else if (res.IsError())
471 return res; // Cannot continue.
472 // Note: we don't need to log the error as it would already
473 // be logged in AttemptBackendAssignment().
477 ARMNN_ASSERT_MSG(res.IsWarningOnly(), "OptimizationResult in unexpected state.");
482 // If the layer is unsupported by any devices, log and return a null network.
485 // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
486 // fallback we should set the compute device on the layer to CpuRef (these are not
487 // available as accelerated operations, or are only available under certain
488 // conditions, currently they comprise MemCopy, Constant, Permute)
489 armnn::LayerType layerType = layer->GetType();
490 if (!backendSettings.IsCpuRefUsed() && (layerType == armnn::LayerType::MemCopy ||
491 layerType == armnn::LayerType::Constant ||
492 layerType == armnn::LayerType::Permute))
494 BackendId cpuBackendId(armnn::Compute::CpuRef);
495 layer->SetBackendId(cpuBackendId);
496 backendSettings.m_SelectedBackends.insert(cpuBackendId);
500 return ReturnError(layer);
508 OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
509 BackendSettings& backendSettings,
510 SubgraphView& subgraph,
511 Optional<std::vector<std::string>&> errMessages)
513 Graph::Iterator firstLayer = subgraph.begin();
514 Graph::Iterator lastLayer = subgraph.end();
515 return AssignBackends(optNetObjPtr,
522 BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry& handleFactoryRegistry,
523 BackendSettings& backendSettings)
525 BackendsMap backends;
526 auto const& backendRegistry = BackendRegistryInstance();
527 for (auto&& selectedBackend : backendSettings.m_SupportedBackends)
529 auto backendFactory = backendRegistry.GetFactory(selectedBackend);
530 auto backendObjPtr = backendFactory();
531 ARMNN_ASSERT(backendObjPtr);
533 backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry);
535 backends[backendObjPtr->GetId()] = std::move(backendObjPtr);
541 OptimizationResult ApplyBackendOptimizations(OptimizedNetwork* optNetObjPtr,
542 BackendSettings& backendSettings,
543 BackendsMap& backends,
544 Optional<std::vector<std::string>&> errMessages)
546 ARMNN_ASSERT(optNetObjPtr);
548 OptimizationResult result;
550 // Get the optimized graph
551 Graph& optGraph = optNetObjPtr->GetGraph();
553 // Run backend specific optimizations
554 for (auto&& selectedBackend : backendSettings.m_SelectedBackends)
556 auto backendObjPtr = backends.find(selectedBackend)->second.get();
557 ARMNN_ASSERT(backendObjPtr);
559 // Select sub-graphs based on backend
560 SubgraphViewSelector::Subgraphs subgraphs =
561 SubgraphViewSelector::SelectSubgraphs(optGraph,
562 // Select layers assigned to the requested backend
563 [&backendObjPtr](const Layer& layer)
565 return layer.GetType() != LayerType::Input &&
566 layer.GetType() != LayerType::Output &&
567 layer.GetBackendId() == backendObjPtr->GetId();
569 if (subgraphs.empty())
571 // No sub-graphs found, try with next selected backend
575 // Try to optimize each sub-graph
576 for (auto& subgraph : subgraphs)
578 // Try to optimize the current sub-graph
579 OptimizationViews optimizationViews = backendObjPtr->OptimizeSubgraphView(*subgraph);
580 ARMNN_ASSERT(optimizationViews.Validate(*subgraph));
582 // Optimization attempted, check the resulting optimized sub-graph
583 for (auto& substitution : optimizationViews.GetSubstitutions())
585 // Sub-graph optimized, substitute the sub-graph with the new optimized one in the main optimized graph
586 SubgraphView& replacementSubgraph = substitution.m_ReplacementSubgraph;
587 SubgraphView& substitutableSubgraph = substitution.m_SubstitutableSubgraph;
588 optGraph.SubstituteSubgraph(substitutableSubgraph, replacementSubgraph);
590 // Assign the current backend to the optimized sub-graph
591 std::for_each(replacementSubgraph.begin(), replacementSubgraph.end(), [&selectedBackend](Layer* l)
594 l->SetBackendId(selectedBackend);
598 if (!optimizationViews.GetFailedSubgraphs().empty())
600 std::stringstream warningMsg;
601 warningMsg << "Some sub-graph(s) failed to optimized on " << backendObjPtr->GetId() << " backend.";
602 ReportWarning(warningMsg.str(), errMessages);
604 // Failed to optimize the given sub-graph, re-assign the sub-graph layers to other available backends
605 BackendSettings settingsCopy(backendSettings);
606 if (!backendObjPtr->GetId().IsCpuRef())
608 // Add the current backend to the list of backends to ignore
609 settingsCopy.m_IgnoredBackends.insert(backendObjPtr->GetId());
613 for (auto& failedSubgraph : optimizationViews.GetFailedSubgraphs())
615 // An error occurred: the optimization was attempted but not performed, try different backends
616 std::stringstream subgraphMsg;
617 subgraphMsg << "Re-assigning backends to " << failedSubgraph.GetLayers().size()
618 << " layers inside sub-graph " << count++;
619 ReportWarning(subgraphMsg.str(), errMessages);
621 OptimizationResult reassignmentResult = AssignBackends(optNetObjPtr,
625 if (reassignmentResult.m_Error)
627 // Failed to re-assign one of the remaining backends to each layer of the sub-graph
628 result.m_Error = true;
639 bool RequiresCopy(ITensorHandleFactory::FactoryId src,
640 ITensorHandleFactory::FactoryId dst,
641 TensorHandleFactoryRegistry& registry)
645 ITensorHandleFactory* srcFactory = registry.GetFactory(src);
646 ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
648 if (srcFactory && dstFactory &&
649 (srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0)
658 // Find the handle factory for the input layer which results in fewest required copies.
659 ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backends,
661 TensorHandleFactoryRegistry& registry)
663 Layer& layer = slot.GetOwningLayer();
664 ARMNN_ASSERT(layer.GetType() == LayerType::Input);
666 // Explicitly select the tensorhandle factory for InputLayer because the rules for it are slightly different. It
667 // doesn't matter which backend it is assigned to because they all use the same implementation, which
668 // requires Map/Unmap support. This means that, so long as the handle type supports map/unmap semantics, we can
669 // select a factory with maximum compatibility with the layers connected to the InputLayer.
671 // First ensure the from backends can support the TensorHandeAPI
672 auto frmBackend = backends.find(layer.GetBackendId());
673 if (frmBackend == backends.end() ||
674 !frmBackend->second->SupportsTensorAllocatorAPI())
676 return ITensorHandleFactory::LegacyFactoryId;
679 // Go through all connections to the output slot and determine the TensorHandleFactory which results in the
681 std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
683 ITensorHandleFactory::FactoryId topChoice = ITensorHandleFactory::LegacyFactoryId;
685 for (auto&& connection : slot.GetConnections())
687 const Layer& connectedLayer = connection->GetOwningLayer();
689 auto toBackend = backends.find(connectedLayer.GetBackendId());
690 ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
692 if (!toBackend->second.get()->SupportsTensorAllocatorAPI())
694 // The destination backend does not support the tensor allocator API, move to the next one
698 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
699 for (auto&& dst : dstPrefs)
701 // Input layers use the mem copy workload or import, so the selected factory must
702 // support either the map/unmap API or Import API
703 ITensorHandleFactory* factory = registry.GetFactory(dst);
704 if (!factory->SupportsMapUnmap() &&
705 !CheckFlag(factory->GetImportFlags(), MemorySource::Malloc)) // Just support cpu mem imports for now
707 // The current tensor handle factory does not support the map/unmap or import
708 // strategy, move to the next one
712 auto it = factoryScores.find(dst);
713 if (it == factoryScores.end())
715 // Add new score to the table
716 factoryScores[dst] = 0;
717 if (topChoice == ITensorHandleFactory::LegacyFactoryId)
724 // Increase the score
725 factoryScores[dst]++;
727 // Track the best option
728 if (factoryScores[dst] > topScore)
730 topScore = factoryScores[dst];
740 // Find the handle factory for the output layer which results in fewest required copies.
741 ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput(BackendsMap& backends,
743 TensorHandleFactoryRegistry& registry)
745 IgnoreUnused(backends, slot, registry);
746 return ITensorHandleFactory::DeferredFactoryId;
749 // For all handle factories supported on the source backend, we wish to find the one which requires the fewest copies
750 // when considering all connections.
751 ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends,
752 OutputSlot& outputSlot,
753 TensorHandleFactoryRegistry& registry)
755 // First ensure the from backends can support the TensorHandeAPI
756 Layer& layer = outputSlot.GetOwningLayer();
757 auto frmBackend = backends.find(layer.GetBackendId());
758 if (frmBackend == backends.end() ||
759 !frmBackend->second->SupportsTensorAllocatorAPI())
761 return ITensorHandleFactory::LegacyFactoryId;
764 // Connections to Output Layers requires support for map/unmap on the TensorHandle.
765 bool requiresMapUnmap = false;
766 for (auto&& connection : outputSlot.GetConnections())
768 const Layer& connectedLayer = connection->GetOwningLayer();
769 if (connectedLayer.GetType() == LayerType::Output)
771 requiresMapUnmap = true;
775 IBackendInternal* srcBackend = frmBackend->second.get();
776 auto srcPrefs = srcBackend->GetHandleFactoryPreferences();
778 // Initialize the scores
779 std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
780 for (auto&& pref : srcPrefs)
782 if (requiresMapUnmap) // Only consider factories that support map/unmap if required
784 ITensorHandleFactory* factory = registry.GetFactory(pref);
785 if (!factory->SupportsMapUnmap())
787 // The current tensor handle factory does not support the map/unmap strategy, move to the next one
792 auto it = factoryScores.find(pref);
793 if (it == factoryScores.end())
795 // Add new score to the table
796 factoryScores[pref] = 0;
800 // Score each handle factory based on how many times it requires copies on the slot connections
801 for (auto&& connection : outputSlot.GetConnections())
803 const Layer& connectedLayer = connection->GetOwningLayer();
805 auto toBackend = backends.find(connectedLayer.GetBackendId());
806 ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
808 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
809 for (auto&& src : srcPrefs)
811 if (factoryScores.find(src) == factoryScores.end()) // Don't consider excluded factories
816 for (auto&& dst : dstPrefs)
818 if (RequiresCopy(src, dst, registry))
820 // Copy avoided, increase the score
821 factoryScores[src]++;
828 // Find the lowest score
829 int minScore = std::numeric_limits<int>::max();
830 for (auto it : factoryScores)
832 minScore = std::min(minScore, it.second);
835 // Collect factories matching the best(lowest) score
836 std::vector<ITensorHandleFactory::FactoryId> optimalFactories;
837 for (auto it : factoryScores)
839 if (it.second == minScore)
841 optimalFactories.push_back(it.first);
845 // For all compatible Factories matching the best score, find the preferred one for the current layer.
846 for (auto&& srcPref : srcPrefs)
848 for (auto&& comp : optimalFactories)
857 return ITensorHandleFactory::LegacyFactoryId;
860 EdgeStrategy CalculateEdgeStrategy(BackendsMap& backends,
861 ITensorHandleFactory::FactoryId srcFactoryId,
863 const Layer& connectedLayer,
864 TensorHandleFactoryRegistry& registry)
866 auto toBackend = backends.find(connectedLayer.GetBackendId());
867 ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
869 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
871 // Legacy API check for backward compatibility
872 if (srcFactoryId == ITensorHandleFactory::LegacyFactoryId || dstPrefs.empty())
874 if (layer.GetBackendId() != connectedLayer.GetBackendId())
876 return EdgeStrategy::CopyToTarget;
880 return EdgeStrategy::DirectCompatibility;
884 // TensorHandleFactory API present, so perform more sophisticated strategies.
885 // Dst Output layers don't require copy because they use import or map/unmap
886 if (connectedLayer.GetType() == LayerType::Output)
888 return EdgeStrategy::DirectCompatibility;
891 // Search for direct match in prefs
892 for (auto&& pref : dstPrefs)
894 if (pref == srcFactoryId)
896 return EdgeStrategy::DirectCompatibility;
900 // Search for export/import options
901 ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
902 if (srcFactory->GetExportFlags() != 0)
904 for (auto&& pref : dstPrefs)
906 ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
908 // Handles cases when a destPref is not listed in TensorHandleFactoryRegistry
913 if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
915 return EdgeStrategy::ExportToTarget;
920 // Search for copy options via map/unmap
921 if (srcFactory->SupportsMapUnmap())
923 for (auto&& pref : dstPrefs)
925 ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
926 if (dstFactory && dstFactory->SupportsMapUnmap())
928 return EdgeStrategy::CopyToTarget;
933 return EdgeStrategy::Undefined;
936 // Select the TensorHandleFactories and the corresponding memory strategy
937 OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
938 BackendsMap& backends,
939 TensorHandleFactoryRegistry& registry,
940 Optional<std::vector<std::string>&> errMessages)
942 OptimizationResult result;
944 optGraph.ForEachLayer([&backends, ®istry, &result, &errMessages](Layer* layer)
948 // Lets make sure the backend is in our list of supported backends. Something went wrong during backend
949 // assignment if this check fails
950 ARMNN_ASSERT(backends.find(layer->GetBackendId()) != backends.end());
952 // Check each output separately
953 for (unsigned int slotIdx = 0; slotIdx < layer->GetNumOutputSlots(); slotIdx++)
955 OutputSlot& outputSlot = layer->GetOutputSlot(slotIdx);
957 ITensorHandleFactory::FactoryId slotOption = ITensorHandleFactory::LegacyFactoryId;
959 // Calculate the factory to use which results in the fewest copies being made.
960 switch(layer->GetType())
962 case LayerType::Input:
963 slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry);
965 case LayerType::Output:
966 slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
969 slotOption = CalculateSlotOption(backends, outputSlot, registry);
972 outputSlot.SetTensorHandleFactory(slotOption);
974 // Now determine the "best" edge strategy for each connection given the slotOption.
975 unsigned int connectionIdx = 0;
976 for (auto&& connection : outputSlot.GetConnections())
978 const Layer& connectedLayer = connection->GetOwningLayer();
980 EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer, registry);
982 if (strategy == EdgeStrategy::Undefined)
984 result.m_Error = true;
987 errMessages.value().emplace_back("Could not find valid strategy required for compatibility"
988 " between backends.");
993 outputSlot.SetEdgeStrategy(connectionIdx, strategy);
1003 IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
1004 const std::vector<BackendId>& backendPreferences,
1005 const IDeviceSpec& deviceSpec,
1006 const OptimizerOptions& options,
1007 Optional<std::vector<std::string>&> messages)
1009 if (backendPreferences.empty())
1011 throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified");
1014 if (options.m_ReduceFp32ToFp16 && options.m_ReduceFp32ToBf16)
1016 throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
1019 const Network& network = *PolymorphicDowncast<const Network*>(&inNetwork);
1020 std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph());
1022 auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy);
1024 OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
1026 // Get the optimized graph
1027 Graph& optGraph = optNetObjPtr->GetGraph();
1029 // Perform optimisation passes
1030 using namespace optimizations;
1031 Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(),
1032 SquashEqualTransposeSiblings(),
1033 SquashEqualReshapeSiblings(),
1034 OptimizeInversePermutes(),
1035 OptimizeInverseTransposes(),
1039 TransposeAsReshape(),
1040 OptimizeConsecutiveReshapes(),
1041 FoldPadIntoConvolution2d(),
1042 PermuteAndBatchToSpaceAsDepthToSpace(),
1043 TransposeAndBatchToSpaceAsDepthToSpace()));
1045 // Infer the tensor infos for all output slots. Throws an exception on failure
1046 optGraph.InferTensorInfos();
1048 // If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
1049 if (options.m_ReduceFp32ToFp16)
1051 Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
1052 Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
1055 // If Fp32 to Bf16 optimization is set convert Fp32 network to Bf16
1056 // Convert input of Convolution2d and FullyConnected from Fp32 to Bf16
1057 // Only Constant weight of Convolution2d and FullyConnected are converted from Fp32 to Bf16
1058 if (options.m_ReduceFp32ToBf16)
1060 Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToBf16Converter()));
1063 // Initialize backend settings
1064 BackendSettings backendSettings(backendPreferences, deviceSpec);
1065 if (backendSettings.GetAvailablePreferredBackends().empty())
1067 std::stringstream failureMsg;
1068 failureMsg << "None of the preferred backends " << backendPreferences
1069 << " are supported. Current platform provides " << backendSettings.m_SupportedBackends;
1070 ReportError(failureMsg.str(), messages);
1071 return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
1074 // Create a map to temporarily hold initialized backend objects
1075 TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
1076 BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
1078 // Assign an available backend to each layer
1079 Graph::Iterator firstLayer = optGraph.begin();
1080 Graph::Iterator lastLayer = optGraph.end();
1081 OptimizationResult assignBackendsResult = AssignBackends(optNetObjPtr,
1086 if (assignBackendsResult.m_Error)
1088 // Failed to assign a backend to each layer
1089 return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
1092 Optimizer::Pass(optGraph, MakeOptimizations(OptimizeInverseConversionsFp16(),
1093 OptimizeInverseConversionsFp32()));
1095 // Apply the backend-specific optimizations
1096 OptimizationResult backendOptimizationResult = ApplyBackendOptimizations(optNetObjPtr,
1100 if (backendOptimizationResult.m_Error)
1102 // Failed to apply the backend-specific optimizations
1103 return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
1106 // If the debug flag is set, then insert a DebugLayer after each layer
1107 // Doing this after applying the backend optimizations as they might have changed some layers
1108 if (options.m_Debug)
1110 Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer()));
1113 // Calculate the compatibility strategies for tensor handles
1114 OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph,
1116 tensorHandleFactoryRegistry,
1118 if (strategyResult.m_Error)
1120 // Failed to apply the backend-specific optimizations
1121 return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
1124 // Based on the tensor handle strategy determined above, insert copy layers where required.
1125 optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
1127 // Convert constants
1128 Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
1129 Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat()));
1131 // Run backend specific optimizations (deprecated)
1132 for (auto&& chosenBackend : backendSettings.m_SelectedBackends)
1134 auto factoryFun = BackendRegistryInstance().GetFactory(chosenBackend);
1135 auto backendPtr = factoryFun();
1136 ARMNN_ASSERT(backendPtr.get() != nullptr);
1138 ARMNN_NO_DEPRECATE_WARN_BEGIN
1139 auto backendSpecificOptimizations = backendPtr->GetOptimizations();
1140 ARMNN_NO_DEPRECATE_WARN_END
1142 if (!backendSpecificOptimizations.empty())
1144 Optimizer::Pass(optNetObjPtr->GetGraph(), backendSpecificOptimizations);
1152 : m_Graph(std::make_unique<Graph>())
1160 Status Network::PrintGraph()
1163 return Status::Success;
1166 IConnectableLayer* Network::AddInputLayer(LayerBindingId id, const char* name)
1168 return m_Graph->AddLayer<InputLayer>(id, name);
1171 IConnectableLayer* Network::AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
1174 return m_Graph->AddLayer<BatchToSpaceNdLayer>(batchToSpaceNdDescriptor, name);
1177 IConnectableLayer* Network::AddComparisonLayer(const ComparisonDescriptor& comparisonDescriptor,
1180 return m_Graph->AddLayer<ComparisonLayer>(comparisonDescriptor, name);
1183 IConnectableLayer* Network::AddElementwiseUnaryLayer(const ElementwiseUnaryDescriptor& elementwiseUnaryDescriptor,
1186 return m_Graph->AddLayer<ElementwiseUnaryLayer>(elementwiseUnaryDescriptor, name);
1189 IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1190 const ConstTensor& weights,
1191 const Optional<ConstTensor>& biases,
1194 if (fullyConnectedDescriptor.m_BiasEnabled && !biases.has_value())
1196 throw InvalidArgumentException("AddFullyConnectedLayer: biases cannot be empty");
1199 const auto layer = m_Graph->AddLayer<FullyConnectedLayer>(fullyConnectedDescriptor, name);
1201 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1203 if (fullyConnectedDescriptor.m_BiasEnabled)
1205 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1211 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1212 const ConstTensor& weights,
1213 const Optional<ConstTensor>& biases,
1216 return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, biases, name);
1219 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1220 const ConstTensor& weights,
1223 Optional<ConstTensor> biases;
1224 return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, biases, name);
1227 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1228 const ConstTensor& weights,
1229 const ConstTensor& biases,
1232 Optional<ConstTensor> optionalBiases(biases);
1233 return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, optionalBiases, name);
1236 IConnectableLayer* Network::AddConcatLayer(const ConcatDescriptor& concatDescriptor,
1239 return m_Graph->AddLayer<ConcatLayer>(concatDescriptor, name);
1242 IConnectableLayer* Network::AddConvolution2dLayerImpl(const Convolution2dDescriptor& convolution2dDescriptor,
1243 const ConstTensor& weights,
1244 const Optional<ConstTensor>& biases,
1247 if (convolution2dDescriptor.m_BiasEnabled && !biases.has_value())
1249 throw InvalidArgumentException("AddConvolution2dLayer: biases cannot be empty");
1252 const auto layer = m_Graph->AddLayer<Convolution2dLayer>(convolution2dDescriptor, name);
1254 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1256 if (convolution2dDescriptor.m_BiasEnabled)
1258 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1264 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1265 const ConstTensor& weights,
1266 const Optional<ConstTensor>& biases,
1269 return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1272 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1273 const ConstTensor& weights,
1276 Optional<ConstTensor> biases;
1277 return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1280 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1281 const ConstTensor& weights,
1282 const ConstTensor& biases,
1285 Optional<ConstTensor> optionalBiases(biases);
1286 return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, optionalBiases, name);
1289 IConnectableLayer* Network::AddDepthwiseConvolution2dLayerImpl(
1290 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1291 const ConstTensor& weights,
1292 const Optional<ConstTensor>& biases,
1295 if (convolution2dDescriptor.m_BiasEnabled && !biases.has_value())
1297 throw InvalidArgumentException("AddDepthwiseConvolution2dLayer: biases cannot be empty");
1300 const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name);
1302 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1304 if (convolution2dDescriptor.m_BiasEnabled)
1306 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1312 IConnectableLayer* Network::AddDepthToSpaceLayer(const DepthToSpaceDescriptor& depthToSpaceDescriptor,
1315 return m_Graph->AddLayer<DepthToSpaceLayer>(depthToSpaceDescriptor, name);
1318 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1319 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1320 const ConstTensor& weights,
1321 const Optional<ConstTensor>& biases,
1324 return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1327 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1328 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1329 const ConstTensor& weights,
1332 Optional<ConstTensor> biases;
1333 return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1336 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1337 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1338 const ConstTensor& weights,
1339 const ConstTensor& biases,
1342 Optional<ConstTensor> optionalBiases(biases);
1343 return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, optionalBiases, name);
1346 IConnectableLayer* Network::AddDetectionPostProcessLayer(const armnn::DetectionPostProcessDescriptor& descriptor,
1347 const ConstTensor& anchors, const char* name)
1349 const auto layer = m_Graph->AddLayer<DetectionPostProcessLayer>(descriptor, name);
1351 layer->m_Anchors = std::make_unique<ScopedCpuTensorHandle>(anchors);
1356 IConnectableLayer* Network::AddPermuteLayer(const PermuteDescriptor& permuteDescriptor,
1359 return m_Graph->AddLayer<PermuteLayer>(permuteDescriptor, name);
1362 IConnectableLayer* Network::AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor,
1365 return m_Graph->AddLayer<Pooling2dLayer>(pooling2dDescriptor, name);
1368 IConnectableLayer* Network::AddActivationLayer(const ActivationDescriptor& activationDescriptor,
1371 return m_Graph->AddLayer<ActivationLayer>(activationDescriptor, name);
1374 IConnectableLayer* Network::AddArgMinMaxLayer(const ArgMinMaxDescriptor& argMinMaxDescriptor,
1377 return m_Graph->AddLayer<ArgMinMaxLayer>(argMinMaxDescriptor, name);
1380 IConnectableLayer* Network::AddNormalizationLayer(const NormalizationDescriptor&
1381 normalizationDescriptor,
1384 return m_Graph->AddLayer<NormalizationLayer>(normalizationDescriptor, name);
1387 IConnectableLayer* Network::AddSliceLayer(const SliceDescriptor& sliceDescriptor, const char* name)
1389 return m_Graph->AddLayer<SliceLayer>(sliceDescriptor, name);
1392 IConnectableLayer* Network::AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor,
1395 return m_Graph->AddLayer<SoftmaxLayer>(softmaxDescriptor, name);
1398 IConnectableLayer* Network::AddSplitterLayer(const ViewsDescriptor& splitterDescriptor,
1401 return m_Graph->AddLayer<SplitterLayer>(splitterDescriptor, name);
1404 IConnectableLayer* Network::AddMaximumLayer(const char* name)
1406 return m_Graph->AddLayer<MaximumLayer>(name);
1409 IConnectableLayer* Network::AddMinimumLayer(const char* name)
1411 return m_Graph->AddLayer<MinimumLayer>(name);
1414 IConnectableLayer* Network::AddMergerLayer(const MergerDescriptor& mergerDescriptor,
1417 return AddConcatLayer(mergerDescriptor, name);
1420 IConnectableLayer* Network::AddAbsLayer(const char * name)
1422 return AddElementwiseUnaryLayer(ElementwiseUnaryDescriptor(UnaryOperation::Abs), name);
1425 IConnectableLayer* Network::AddAdditionLayer(const char* name)
1427 return m_Graph->AddLayer<AdditionLayer>(name);
1430 IConnectableLayer* Network::AddMultiplicationLayer(const char* name)
1432 return m_Graph->AddLayer<MultiplicationLayer>(name);
1435 IConnectableLayer* Network::AddOutputLayer(LayerBindingId id, const char* name)
1437 return m_Graph->AddLayer<OutputLayer>(id, name);
1440 IConnectableLayer* Network::AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc,
1441 const ConstTensor& mean,
1442 const ConstTensor& variance,
1443 const ConstTensor& beta,
1444 const ConstTensor& gamma,
1447 const auto layer = m_Graph->AddLayer<BatchNormalizationLayer>(desc, name);
1449 layer->m_Mean = std::make_unique<ScopedCpuTensorHandle>(mean);
1450 layer->m_Variance = std::make_unique<ScopedCpuTensorHandle>(variance);
1451 layer->m_Beta = std::make_unique<ScopedCpuTensorHandle>(beta);
1452 layer->m_Gamma = std::make_unique<ScopedCpuTensorHandle>(gamma);
1457 IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor& descriptor,
1460 ResizeDescriptor resizeDescriptor;
1461 resizeDescriptor.m_Method = ResizeMethod::Bilinear;
1462 resizeDescriptor.m_DataLayout = descriptor.m_DataLayout;
1463 resizeDescriptor.m_TargetWidth = descriptor.m_TargetWidth;
1464 resizeDescriptor.m_TargetHeight = descriptor.m_TargetHeight;
1466 return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
1469 IConnectableLayer* Network::AddResizeLayer(const ResizeDescriptor&
1470 resizeDescriptor, const char* name)
1472 return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
1475 IConnectableLayer* Network::AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor& desc,
1478 return m_Graph->AddLayer<InstanceNormalizationLayer>(desc, name);
1481 IConnectableLayer* Network::AddL2NormalizationLayer(const L2NormalizationDescriptor& desc,
1484 return m_Graph->AddLayer<L2NormalizationLayer>(desc, name);
1487 IConnectableLayer* Network::AddLogSoftmaxLayer(const LogSoftmaxDescriptor& desc,
1490 return m_Graph->AddLayer<LogSoftmaxLayer>(desc, name);
1493 IConnectableLayer* Network::AddConstantLayer(const ConstTensor& input, const char* name)
1495 auto layer = m_Graph->AddLayer<ConstantLayer>(name);
1497 layer->m_LayerOutput = std::make_unique<ScopedCpuTensorHandle>(input);
1502 IConnectableLayer* Network::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor,
1505 return m_Graph->AddLayer<ReshapeLayer>(reshapeDescriptor, name);
1508 IConnectableLayer* Network::AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
1511 return m_Graph->AddLayer<SpaceToBatchNdLayer>(spaceToBatchNdDescriptor, name);
1514 IConnectableLayer* Network::AddSpaceToDepthLayer(const SpaceToDepthDescriptor& spaceToDepthDescriptor,
1517 return m_Graph->AddLayer<SpaceToDepthLayer>(spaceToDepthDescriptor, name);
1520 IConnectableLayer* Network::AddFloorLayer(const char* name)
1522 return m_Graph->AddLayer<FloorLayer>(name);
1525 IConnectableLayer* Network::AddLstmLayer(const LstmDescriptor& descriptor,
1526 const LstmInputParams& params,
1529 const auto layer = m_Graph->AddLayer<LstmLayer>(descriptor, name);
1531 //Lstm Basic Parameters
1532 layer->m_BasicParameters.m_InputToForgetWeights =
1533 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
1534 layer->m_BasicParameters.m_InputToCellWeights =
1535 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
1536 layer->m_BasicParameters.m_InputToOutputWeights =
1537 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
1538 layer->m_BasicParameters.m_RecurrentToForgetWeights =
1539 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
1540 layer->m_BasicParameters.m_RecurrentToCellWeights =
1541 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
1542 layer->m_BasicParameters.m_RecurrentToOutputWeights =
1543 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
1544 layer->m_BasicParameters.m_ForgetGateBias =
1545 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
1546 layer->m_BasicParameters.m_CellBias =
1547 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
1548 layer->m_BasicParameters.m_OutputGateBias =
1549 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
1551 //Lstm Cifg parameters
1552 if(!descriptor.m_CifgEnabled)
1554 if(params.m_InputToInputWeights == nullptr)
1556 throw InvalidArgumentException("AddLstmLayer: Input To Input Weights cannot be NULL "
1557 "when CIFG is disabled.");
1559 if(params.m_RecurrentToInputWeights == nullptr)
1561 throw InvalidArgumentException(
1562 "AddLstmLayer: Recurrent To Input Weights cannot be NULL "
1563 "when CIFG is disabled.");
1565 if(params.m_InputGateBias == nullptr)
1567 throw InvalidArgumentException("AddLstmLayer: Input Gate Bias cannot be NULL "
1568 "when CIFG is disabled.");
1570 layer->m_CifgParameters.m_InputToInputWeights =
1571 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
1572 layer->m_CifgParameters.m_RecurrentToInputWeights =
1573 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
1574 layer->m_CifgParameters.m_InputGateBias =
1575 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
1578 //Lstm projection parameters
1579 if(descriptor.m_ProjectionEnabled)
1581 if(params.m_ProjectionWeights == nullptr)
1583 throw InvalidArgumentException("AddLstmLayer: Projection Weights cannot be NULL "
1584 "when projection is enabled.");
1586 layer->m_ProjectionParameters.m_ProjectionWeights =
1587 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
1588 if(params.m_ProjectionBias != nullptr)
1590 layer->m_ProjectionParameters.m_ProjectionBias =
1591 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
1595 //Lstm Peephole params
1596 if(descriptor.m_PeepholeEnabled)
1598 if(!descriptor.m_CifgEnabled)
1600 if(params.m_CellToInputWeights == nullptr)
1602 throw InvalidArgumentException("AddLstmLayer: Cell To Input Weights cannot be NULL "
1603 "when Peephole is enabled and CIFG disabled.");
1606 layer->m_PeepholeParameters.m_CellToInputWeights =
1607 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
1610 if(params.m_CellToForgetWeights == nullptr)
1612 throw InvalidArgumentException("AddLstmLayer: Cell To Forget Weights cannot be NULL "
1613 "when Peephole is enabled.");
1615 if(params.m_CellToOutputWeights == nullptr)
1617 throw InvalidArgumentException("AddLstmLayer: Cell To Output Weights cannot be NULL "
1618 "when Peephole is enabled.");
1621 layer->m_PeepholeParameters.m_CellToForgetWeights =
1622 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
1623 layer->m_PeepholeParameters.m_CellToOutputWeights =
1624 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
1627 //Lstm Layer Normalization params
1628 if(descriptor.m_LayerNormEnabled)
1630 if(!descriptor.m_CifgEnabled)
1632 if(params.m_InputLayerNormWeights == nullptr)
1634 throw InvalidArgumentException("AddLstmLayer: Input layer normalization weights cannot be NULL "
1635 "when layer normalization is enabled and CIFG disabled.");
1637 layer->m_LayerNormParameters.m_InputLayerNormWeights =
1638 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputLayerNormWeights));
1641 if(params.m_ForgetLayerNormWeights == nullptr)
1643 throw InvalidArgumentException("AddLstmLayer: Forget layer normalization weights cannot be NULL "
1644 "when layer normalization is enabled.");
1646 if(params.m_CellLayerNormWeights == nullptr)
1648 throw InvalidArgumentException("AddLstmLayer: Cell layer normalization weights cannot be NULL "
1649 "when layer normalization is enabled.");
1651 if(params.m_OutputLayerNormWeights == nullptr)
1653 throw InvalidArgumentException("AddLstmLayer: Output layer normalization weights cannot be NULL "
1654 "when layer normalization is enabled.");
1656 layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
1657 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetLayerNormWeights));
1658 layer->m_LayerNormParameters.m_CellLayerNormWeights =
1659 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellLayerNormWeights));
1660 layer->m_LayerNormParameters.m_OutputLayerNormWeights =
1661 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputLayerNormWeights));
1666 IConnectableLayer* Network::AddDivisionLayer(const char* name)
1668 return m_Graph->AddLayer<DivisionLayer>(name);
1671 IConnectableLayer* Network::AddSubtractionLayer(const char* name)
1673 return m_Graph->AddLayer<SubtractionLayer>(name);
1676 IConnectableLayer* Network::AddMeanLayer(const MeanDescriptor& meanDescriptor, const char* name)
1678 return m_Graph->AddLayer<MeanLayer>(meanDescriptor,name);
1681 IConnectableLayer* Network::AddPadLayer(const PadDescriptor& padDescriptor, const char* name)
1683 return m_Graph->AddLayer<PadLayer>(padDescriptor,name);
1686 IConnectableLayer *Network::AddQuantizeLayer(const char *name)
1688 return m_Graph->AddLayer<QuantizeLayer>(name);
1691 IConnectableLayer* Network::AddDequantizeLayer(const char* name)
1693 return m_Graph->AddLayer<DequantizeLayer>(name);
1696 IConnectableLayer* Network::AddStridedSliceLayer(const StridedSliceDescriptor& stridedSliceDescriptor,
1699 return m_Graph->AddLayer<StridedSliceLayer>(stridedSliceDescriptor, name);
1702 IConnectableLayer* Network::AddGreaterLayer(const char* name)
1704 return AddComparisonLayer(ComparisonDescriptor(ComparisonOperation::Greater), name);
1707 IConnectableLayer* Network::AddEqualLayer(const char* name)
1709 return AddComparisonLayer(ComparisonDescriptor(ComparisonOperation::Equal), name);
1712 IConnectableLayer* Network::AddRsqrtLayer(const char * name)
1714 return AddElementwiseUnaryLayer(ElementwiseUnaryDescriptor(UnaryOperation::Rsqrt), name);
1717 IConnectableLayer* Network::AddGatherLayer(const char* name)
1719 return m_Graph->AddLayer<GatherLayer>(name);
1722 IConnectableLayer* Network::AddMergeLayer(const char* name)
1724 return m_Graph->AddLayer<MergeLayer>(name);
1727 IConnectableLayer* Network::AddSwitchLayer(const char* name)
1729 return m_Graph->AddLayer<SwitchLayer>(name);
1732 IConnectableLayer* Network::AddPreluLayer(const char* name)
1734 return m_Graph->AddLayer<PreluLayer>(name);
1737 IConnectableLayer* Network::AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor& descriptor,
1738 const ConstTensor& weights,
1739 const Optional<ConstTensor>& biases,
1742 if (descriptor.m_BiasEnabled && !biases.has_value())
1744 throw InvalidArgumentException("AddTransposeConvolution2dLayer: Biases cannot be empty");
1747 const auto layer = m_Graph->AddLayer<TransposeConvolution2dLayer>(descriptor, name);
1749 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1751 if (descriptor.m_BiasEnabled)
1753 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1759 IConnectableLayer* Network::AddTransposeLayer(const TransposeDescriptor& transposeDescriptor,
1762 return m_Graph->AddLayer<TransposeLayer>(transposeDescriptor, name);
1765 IConnectableLayer* Network::AddStackLayer(const StackDescriptor& stackDescriptor,
1768 return m_Graph->AddLayer<StackLayer>(stackDescriptor, name);
1772 IConnectableLayer* Network::AddStandInLayer(const StandInDescriptor& desc,
1775 return m_Graph->AddLayer<StandInLayer>(desc, name);
1778 IConnectableLayer* Network::AddQuantizedLstmLayer(const QuantizedLstmInputParams& params,
1781 const auto layer = m_Graph->AddLayer<QuantizedLstmLayer>(name);
1784 layer->m_QuantizedLstmParameters.m_InputToInputWeights =
1785 std::make_unique<ScopedCpuTensorHandle>(params.GetInputToInputWeights());
1786 layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
1787 std::make_unique<ScopedCpuTensorHandle>(params.GetInputToForgetWeights());
1788 layer->m_QuantizedLstmParameters.m_InputToCellWeights =
1789 std::make_unique<ScopedCpuTensorHandle>(params.GetInputToCellWeights());
1790 layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
1791 std::make_unique<ScopedCpuTensorHandle>(params.GetInputToOutputWeights());
1793 // RecurrentToX weights
1794 layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
1795 std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToInputWeights());
1796 layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
1797 std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToForgetWeights());
1798 layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
1799 std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToCellWeights());
1800 layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
1801 std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToOutputWeights());
1804 layer->m_QuantizedLstmParameters.m_InputGateBias =
1805 std::make_unique<ScopedCpuTensorHandle>(params.GetInputGateBias());
1806 layer->m_QuantizedLstmParameters.m_ForgetGateBias =
1807 std::make_unique<ScopedCpuTensorHandle>(params.GetForgetGateBias());
1808 layer->m_QuantizedLstmParameters.m_CellBias =
1809 std::make_unique<ScopedCpuTensorHandle>(params.GetCellBias());
1810 layer->m_QuantizedLstmParameters.m_OutputGateBias =
1811 std::make_unique<ScopedCpuTensorHandle>(params.GetOutputGateBias());
1816 IConnectableLayer* Network::AddQLstmLayer(const QLstmDescriptor& descriptor,
1817 const LstmInputParams& params,
1820 const auto layer = m_Graph->AddLayer<QLstmLayer>(descriptor, name);
1822 // QLstm Basic Parameters
1823 layer->m_BasicParameters.m_InputToForgetWeights =
1824 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
1825 layer->m_BasicParameters.m_InputToCellWeights =
1826 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
1827 layer->m_BasicParameters.m_InputToOutputWeights =
1828 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
1829 layer->m_BasicParameters.m_RecurrentToForgetWeights =
1830 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
1831 layer->m_BasicParameters.m_RecurrentToCellWeights =
1832 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
1833 layer->m_BasicParameters.m_RecurrentToOutputWeights =
1834 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
1835 layer->m_BasicParameters.m_ForgetGateBias =
1836 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
1837 layer->m_BasicParameters.m_CellBias =
1838 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
1839 layer->m_BasicParameters.m_OutputGateBias =
1840 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
1842 // QLstm Cifg parameters
1843 if(!descriptor.m_CifgEnabled)
1845 if(params.m_InputToInputWeights == nullptr)
1847 throw InvalidArgumentException("AddQLstmLayer: Input To Input Weights cannot be NULL");
1850 if(params.m_RecurrentToInputWeights == nullptr)
1852 throw InvalidArgumentException(
1853 "AddQLstmLayer: Recurrent To Input Weights cannot be NULL");
1856 if(params.m_InputGateBias == nullptr)
1858 throw InvalidArgumentException("AddQLstmLayer: Input Gate Bias cannot be NULL");
1861 layer->m_CifgParameters.m_InputToInputWeights =
1862 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
1863 layer->m_CifgParameters.m_RecurrentToInputWeights =
1864 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
1865 layer->m_CifgParameters.m_InputGateBias =
1866 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
1869 // QLstm Projection parameters
1870 if(descriptor.m_ProjectionEnabled)
1872 if(params.m_ProjectionWeights == nullptr)
1874 throw InvalidArgumentException("AddQLstmLayer: Projection Weights cannot be NULL");
1877 layer->m_ProjectionParameters.m_ProjectionWeights =
1878 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
1880 // Projection bias is optional even if projection is enabled
1881 if(params.m_ProjectionWeights != nullptr)
1883 layer->m_ProjectionParameters.m_ProjectionBias =
1884 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
1889 // QLstm Peephole params
1890 if(descriptor.m_PeepholeEnabled)
1892 if(params.m_CellToForgetWeights == nullptr)
1894 throw InvalidArgumentException("AddQLstmLayer: Cell To Forget Weights cannot be NULL");
1897 if(params.m_CellToOutputWeights == nullptr)
1899 throw InvalidArgumentException("AddQLstmLayer: Cell To Output Weights cannot be NULL");
1902 if(!descriptor.m_CifgEnabled)
1904 if(params.m_CellToInputWeights == nullptr)
1906 throw InvalidArgumentException("AddQLstmLayer: Cell To Input Weights cannot be NULL");
1909 layer->m_PeepholeParameters.m_CellToInputWeights =
1910 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
1913 layer->m_PeepholeParameters.m_CellToForgetWeights =
1914 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
1915 layer->m_PeepholeParameters.m_CellToOutputWeights =
1916 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
1919 // QLstm Layer Normalization params
1920 if(descriptor.m_LayerNormEnabled)
1922 if(params.m_ForgetLayerNormWeights == nullptr)
1924 throw InvalidArgumentException("AddQLstmLayer: Forget layer normalization weights cannot be NULL");
1927 if(params.m_CellLayerNormWeights == nullptr)
1929 throw InvalidArgumentException("AddQLstmLayer: Cell layer normalization weights cannot be NULL");
1932 if(params.m_OutputLayerNormWeights == nullptr)
1934 throw InvalidArgumentException("AddQLstmLayer: Output layer normalization weights cannot be NULL");
1937 if(!descriptor.m_CifgEnabled)
1939 if(params.m_InputLayerNormWeights == nullptr)
1941 throw InvalidArgumentException("AddQLstmLayer: Input layer normalization weights cannot be NULL");
1944 layer->m_LayerNormParameters.m_InputLayerNormWeights =
1945 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputLayerNormWeights));
1948 layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
1949 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetLayerNormWeights));
1950 layer->m_LayerNormParameters.m_CellLayerNormWeights =
1951 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellLayerNormWeights));
1952 layer->m_LayerNormParameters.m_OutputLayerNormWeights =
1953 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputLayerNormWeights));
1958 void Network::Accept(ILayerVisitor& visitor) const
1960 for (auto layer : GetGraph())
1962 layer->Accept(visitor);
1966 OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph)
1967 : m_Graph(std::move(graph)), m_Guid(profiling::ProfilingService::GetNextGuid())
1971 OptimizedNetwork::~OptimizedNetwork()
1975 } // namespace armnn