2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
8 #include "DeviceSpec.hpp"
9 #include "Optimizer.hpp"
10 #include "optimizations/All.hpp"
12 #include <backendsCommon/CpuTensorHandle.hpp>
13 #include <backendsCommon/WorkloadFactory.hpp>
15 #include <armnn/Exceptions.hpp>
16 #include <armnn/Utils.hpp>
17 #include <armnn/TypesUtils.hpp>
26 #include <boost/assert.hpp>
27 #include <boost/format.hpp>
28 #include <boost/log/trivial.hpp>
29 #include <boost/numeric/conversion/converter_policies.hpp>
30 #include <boost/cast.hpp>
35 armnn::INetwork* INetwork::CreateRaw()
40 armnn::INetworkPtr INetwork::Create()
42 return INetworkPtr(CreateRaw(), &INetwork::Destroy);
45 void INetwork::Destroy(INetwork* network)
47 delete boost::polymorphic_downcast<Network*>(network);
50 Status Network::PrintGraph()
53 return Status::Success;
56 void IOptimizedNetwork::Destroy(IOptimizedNetwork* network)
58 delete boost::polymorphic_downcast<OptimizedNetwork*>(network);
61 Status OptimizedNetwork::PrintGraph()
64 return Status::Success;
67 Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const
69 return m_Graph->SerializeToDot(stream);
72 bool CheckScaleSetOnQuantizedType(Layer* layer, Optional<std::vector<std::string>&> errMessages)
75 unsigned int numOutputs = layer->GetNumOutputSlots();
76 for (unsigned int i = 0; i < numOutputs; i++) {
77 const OutputSlot &outputSlot = layer->GetOutputSlot(i);
78 const TensorInfo &info = outputSlot.GetTensorInfo();
79 if (DataType::QuantisedAsymm8 == info.GetDataType()) {
80 if (0.f == info.GetQuantizationScale()) {
83 ss << "ERROR: output " << i << " of layer " << GetLayerTypeAsCString(layer->GetType())
84 << " (" << layer->GetNameStr() << ") is of type"
85 << " Quantized 8 bit but its scale parameter has not been set";
86 BOOST_LOG_TRIVIAL(warning) << ss.str() ;
88 errMessages.value().push_back(ss.str());
96 IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
97 const std::vector<BackendId>& backendPreferences,
98 const IDeviceSpec& deviceSpec,
99 const OptimizerOptions& options,
100 Optional<std::vector<std::string>&> errMessages)
102 if (backendPreferences.empty()) {
103 throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified");
105 const Network& network = *boost::polymorphic_downcast<const Network*>(&inNetwork);
106 std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph());
108 auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy);
110 OptimizedNetwork* optNetObjPtr = boost::polymorphic_downcast<OptimizedNetwork*>(optNet.get());
112 // Perform optimisation passes
113 using namespace optimizations;
114 Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(SquashEqualPermuteSiblings(),
115 SquashEqualReshapeSiblings(),
116 OptimizeInversePermutes(),
119 OptimizeConsecutiveReshapes()));
121 // Infer the tensor infos for all output slots. Throws an exception on failure.
122 optNetObjPtr->GetGraph().InferTensorInfos();
124 // if Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
125 if (options.m_ReduceFp32ToFp16)
127 Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(Fp32NetworkToFp16Converter()));
130 // We know that DeviceSpec should be the only implementation of IDeviceSpec.
131 const DeviceSpec& spec = *boost::polymorphic_downcast<const DeviceSpec*>(&deviceSpec);
132 auto const& supportedBackends = spec.GetSupportedBackends();
134 // determine which of the preferred backends we have available for use
135 // and whether we have specified CpuRef as one of those backends.
136 bool cpuRefUsed = false;
137 std::vector<BackendId> availablePreferredBackends;
138 for (const auto& backend : backendPreferences)
140 // Check if the backend is in the available backend devices.
141 if (supportedBackends.count(backend) > 0)
143 availablePreferredBackends.push_back(backend);
144 if (backend == armnn::Compute::CpuRef) {
149 if (availablePreferredBackends.empty()) {
150 std::stringstream failureMsg;
151 failureMsg << "ERROR: None of the preferred backends " << backendPreferences
152 << " are supported. Current platform provides " << supportedBackends;
153 BOOST_LOG_TRIVIAL(warning) << failureMsg.str();
155 errMessages.value().push_back(failureMsg.str());
157 return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
160 auto ReturnWithError = [&](Layer* layer)
162 std::stringstream failureMsg;
163 failureMsg << "ERROR: Layer of type " << GetLayerTypeAsCString(layer->GetType())
164 << " is not supported on any preferred backend " << backendPreferences;
165 BOOST_LOG_TRIVIAL(warning) << failureMsg.str();
167 errMessages.value().push_back(failureMsg.str());
169 return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
172 // Assign a compute device for all nodes
173 bool bErrorFound = false;
174 for (auto&& layer : optNetObjPtr->GetGraph())
176 DataType dataType = layer->GetDataType();
177 std::string reasonIfUnsupported;
179 if (!CheckScaleSetOnQuantizedType(layer, errMessages))
181 // don't bomb immediately, find all the quantized outputs
182 // which haven't had a scale set and report them all back.
185 for (const auto& backend : availablePreferredBackends)
187 // need to set the compute device on the layer
188 // before we can check if it is supported
189 layer->SetBackendId(backend);
190 if (!IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported))
192 if (dataType == DataType::Float16)
194 if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
195 && layer->GetType() != LayerType::ConvertFp32ToFp16
196 && layer->GetType() != LayerType::ConvertFp16ToFp32)
198 // Insert FP16 -> FP32 conversion layer before current layer
199 std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers =
200 InsertConvertFp16ToFp32LayersBefore(optNetObjPtr->GetGraph(), *layer);
202 // Insert FP32 -> FP16 conversion layer after current layer
203 std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers =
204 InsertConvertFp32ToFp16LayersAfter(optNetObjPtr->GetGraph(), *layer);
206 // Assign a supported backend to the newly introduced conversion layers
207 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
209 bool supportedBackendFound = false;
210 std::string reasonIfUnsupported;
212 // Try preferred backend first
213 layer->SetBackendId(preferredBackend);
214 if (IWorkloadFactory::IsLayerSupported(*layer,
216 reasonIfUnsupported))
218 supportedBackendFound = true;
222 for (const auto& backend : availablePreferredBackends)
224 // Skip preferred backend (we already determined that it is not supported)
225 if (backend == preferredBackend)
230 layer->SetBackendId(backend);
231 if (IWorkloadFactory::IsLayerSupported(*layer,
233 reasonIfUnsupported))
235 supportedBackendFound = true;
241 return supportedBackendFound;
244 for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
246 if (!AssignFirstSupportedBackend(convertLayer, backend))
248 return ReturnWithError(convertLayer);
252 for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
254 if (!AssignFirstSupportedBackend(convertLayer, backend))
256 return ReturnWithError(convertLayer);
264 std::stringstream warningMsg;
265 warningMsg << "WARNING: Layer of type " << GetLayerTypeAsCString(layer->GetType())
266 << " is not supported on requested backend " << layer->GetBackendId().Get()
267 << " for data type " << GetDataTypeName(dataType)
268 << " (reason: " << reasonIfUnsupported
269 << "), falling back to the next backend.";
270 BOOST_LOG_TRIVIAL(warning) << warningMsg.str();
272 errMessages.value().push_back(warningMsg.str());
282 // If the layer is unsupported by any devices, log and return a null network.
284 // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
285 // fallback we should set the compute device on the layer to CpuRef (these are not
286 // available as accelerated operations, or are only available under certain
287 // conditions, currently they comprise MemCopy, Constant, Permute)
288 armnn::LayerType layerType = layer->GetType();
289 if (!cpuRefUsed && (layerType == armnn::LayerType::MemCopy ||
290 layerType == armnn::LayerType::Constant ||
291 layerType == armnn::LayerType::Permute))
293 layer->SetBackendId(armnn::Compute::CpuRef);
297 return ReturnWithError(layer);
303 return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
306 Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(OptimizeInverseConversionsFp16(),
307 OptimizeInverseConversionsFp32()));
309 optNetObjPtr->GetGraph().AddCopyLayers();
312 Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(ConvertConstantsFloatToHalf()));
313 Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(ConvertConstantsHalfToFloat()));
320 : m_Graph(std::make_unique<Graph>())
328 IConnectableLayer* Network::AddInputLayer(LayerBindingId id, const char* name)
330 return m_Graph->AddLayer<InputLayer>(id, name);
333 IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor,
334 const ConstTensor& weights,
335 const ConstTensor* biases,
338 if (fullyConnectedDescriptor.m_BiasEnabled && (biases == nullptr))
340 throw InvalidArgumentException("AddFullyConnectedLayer: biases cannot be NULL");
343 const auto layer = m_Graph->AddLayer<FullyConnectedLayer>(fullyConnectedDescriptor, name);
345 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
347 if (fullyConnectedDescriptor.m_BiasEnabled)
349 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(*biases);
355 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
356 const ConstTensor& weights,
359 return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, nullptr, name);
362 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
363 const ConstTensor& weights,
364 const ConstTensor& biases,
367 return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, &biases, name);
370 IConnectableLayer* Network::AddConvolution2dLayerImpl(const Convolution2dDescriptor& convolution2dDescriptor,
371 const ConstTensor& weights,
372 const ConstTensor* biases,
375 if (convolution2dDescriptor.m_BiasEnabled && (biases == nullptr))
377 throw InvalidArgumentException("AddConvolution2dLayer: biases cannot be NULL");
380 const auto layer = m_Graph->AddLayer<Convolution2dLayer>(convolution2dDescriptor, name);
382 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
384 if (convolution2dDescriptor.m_BiasEnabled)
386 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(*biases);
392 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
393 const ConstTensor& weights,
396 return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, nullptr, name);
398 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
399 const ConstTensor& weights,
400 const ConstTensor& biases,
403 return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, &biases, name);
406 IConnectableLayer* Network::AddDepthwiseConvolution2dLayerImpl(
407 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
408 const ConstTensor& weights,
409 const ConstTensor* biases,
412 if (convolution2dDescriptor.m_BiasEnabled && (biases == nullptr))
414 throw InvalidArgumentException("AddDepthwiseConvolution2dLayer: biases cannot be NULL");
417 const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor,
420 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
422 if (convolution2dDescriptor.m_BiasEnabled)
424 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(*biases);
430 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
431 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
432 const ConstTensor& weights,
435 return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, nullptr, name);
437 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
438 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
439 const ConstTensor& weights,
440 const ConstTensor& biases,
443 return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, &biases, name);
446 IConnectableLayer* Network::AddPermuteLayer(const PermuteDescriptor& permuteDescriptor,
449 return m_Graph->AddLayer<PermuteLayer>(permuteDescriptor, name);
452 IConnectableLayer* Network::AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor,
455 return m_Graph->AddLayer<Pooling2dLayer>(pooling2dDescriptor, name);
458 IConnectableLayer* Network::AddActivationLayer(const ActivationDescriptor& activationDescriptor,
461 return m_Graph->AddLayer<ActivationLayer>(activationDescriptor, name);
464 IConnectableLayer* Network::AddNormalizationLayer(const NormalizationDescriptor&
465 normalizationDescriptor,
468 return m_Graph->AddLayer<NormalizationLayer>(normalizationDescriptor, name);
471 IConnectableLayer* Network::AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor,
474 return m_Graph->AddLayer<SoftmaxLayer>(softmaxDescriptor, name);
477 IConnectableLayer* Network::AddSplitterLayer(const ViewsDescriptor& splitterDescriptor,
480 return m_Graph->AddLayer<SplitterLayer>(splitterDescriptor, name);
483 IConnectableLayer* Network::AddMergerLayer(const OriginsDescriptor& mergerDescriptor,
486 return m_Graph->AddLayer<MergerLayer>(mergerDescriptor, name);
489 IConnectableLayer* Network::AddAdditionLayer(const char* name)
491 return m_Graph->AddLayer<AdditionLayer>(name);
494 IConnectableLayer* Network::AddMultiplicationLayer(const char* name)
496 return m_Graph->AddLayer<MultiplicationLayer>(name);
499 IConnectableLayer* Network::AddOutputLayer(LayerBindingId id, const char* name)
501 return m_Graph->AddLayer<OutputLayer>(id, name);
504 IConnectableLayer* Network::AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc,
505 const ConstTensor& mean,
506 const ConstTensor& variance,
507 const ConstTensor& beta,
508 const ConstTensor& gamma,
511 const auto layer = m_Graph->AddLayer<BatchNormalizationLayer>(desc, name);
513 layer->m_Mean = std::make_unique<ScopedCpuTensorHandle>(mean);
514 layer->m_Variance = std::make_unique<ScopedCpuTensorHandle>(variance);
515 layer->m_Beta = std::make_unique<ScopedCpuTensorHandle>(beta);
516 layer->m_Gamma = std::make_unique<ScopedCpuTensorHandle>(gamma);
521 IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor&
522 resizeDescriptor, const char* name)
524 return m_Graph->AddLayer<ResizeBilinearLayer>(resizeDescriptor,name);
527 IConnectableLayer* Network::AddL2NormalizationLayer(const L2NormalizationDescriptor& desc,
530 return m_Graph->AddLayer<L2NormalizationLayer>(desc, name);
533 IConnectableLayer* Network::AddConstantLayer(const ConstTensor& input, const char* name)
535 auto layer = m_Graph->AddLayer<ConstantLayer>(name);
537 layer->m_LayerOutput = std::make_unique<ScopedCpuTensorHandle>(input);
542 IConnectableLayer* Network::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor,
545 return m_Graph->AddLayer<ReshapeLayer>(reshapeDescriptor, name);
548 IConnectableLayer* Network::AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
551 return m_Graph->AddLayer<SpaceToBatchNdLayer>(spaceToBatchNdDescriptor, name);
554 IConnectableLayer* Network::AddFloorLayer(const char* name)
556 return m_Graph->AddLayer<FloorLayer>(name);
559 IConnectableLayer* Network::AddLstmLayer(const LstmDescriptor& descriptor,
560 const LstmInputParams& params,
563 const auto layer = m_Graph->AddLayer<LstmLayer>(descriptor, name);
565 //Lstm Basic Parameters
566 layer->m_BasicParameters.m_InputToForgetWeights =
567 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
568 layer->m_BasicParameters.m_InputToCellWeights =
569 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
570 layer->m_BasicParameters.m_InputToOutputWeights =
571 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
572 layer->m_BasicParameters.m_RecurrentToForgetWeights =
573 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
574 layer->m_BasicParameters.m_RecurrentToCellWeights =
575 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
576 layer->m_BasicParameters.m_RecurrentToOutputWeights =
577 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
578 layer->m_BasicParameters.m_ForgetGateBias =
579 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
580 layer->m_BasicParameters.m_CellBias =
581 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
582 layer->m_BasicParameters.m_OutputGateBias =
583 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
585 //Lstm Cifg parameters
586 if(!descriptor.m_CifgEnabled)
588 if(params.m_InputToInputWeights == nullptr)
590 throw InvalidArgumentException("AddLstmLayer: Input To Input Weights cannot be NULL");
592 if(params.m_RecurrentToInputWeights == nullptr)
594 throw InvalidArgumentException(
595 "AddLstmLayer: Recurrent To Input Weights cannot be NULL");
597 if(params.m_InputGateBias == nullptr)
599 throw InvalidArgumentException("AddLstmLayer: Input Gate Bias cannot be NULL");
601 layer->m_CifgParameters.m_InputToInputWeights =
602 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
603 layer->m_CifgParameters.m_RecurrentToInputWeights =
604 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
605 // In the VTS tests, cell-to-input weights may be null, even if the other CIFG params are not.
606 if(params.m_CellToInputWeights != nullptr)
608 layer->m_CifgParameters.m_CellToInputWeights =
609 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
611 layer->m_CifgParameters.m_InputGateBias =
612 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
615 //Lstm projection parameters
616 if(descriptor.m_ProjectionEnabled)
618 if(params.m_ProjectionWeights == nullptr)
620 throw InvalidArgumentException("AddLstmLayer: Projection Weights cannot be NULL");
622 layer->m_ProjectionParameters.m_ProjectionWeights =
623 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
624 if(params.m_ProjectionBias != nullptr)
626 layer->m_ProjectionParameters.m_ProjectionBias =
627 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
631 //Lstm Peephole params
632 if(descriptor.m_PeepholeEnabled)
634 if(params.m_CellToForgetWeights == nullptr)
636 throw InvalidArgumentException("AddLstmLayer: Cell To Forget Weights cannot be NULL");
638 if(params.m_CellToOutputWeights == nullptr)
640 throw InvalidArgumentException("AddLstmLayer: Cell To Output Weights cannot be NULL");
642 layer->m_PeepholeParameters.m_CellToForgetWeights =
643 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
644 layer->m_PeepholeParameters.m_CellToOutputWeights =
645 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
650 IConnectableLayer* Network::AddDivisionLayer(const char* name)
652 return m_Graph->AddLayer<DivisionLayer>(name);
655 IConnectableLayer* Network::AddSubtractionLayer(const char* name)
657 return m_Graph->AddLayer<SubtractionLayer>(name);
660 IConnectableLayer* Network::AddMeanLayer(const MeanDescriptor& meanDescriptor, const char* name)
662 return m_Graph->AddLayer<MeanLayer>(meanDescriptor,name);
665 IConnectableLayer* Network::AddPadLayer(const PadDescriptor& padDescriptor, const char* name)
667 return m_Graph->AddLayer<PadLayer>(padDescriptor,name);
670 OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph)
671 : m_Graph(std::move(graph))
675 OptimizedNetwork::~OptimizedNetwork()