2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
8 #include "DeviceSpec.hpp"
9 #include "Optimizer.hpp"
10 #include "optimizations/All.hpp"
12 #include <backends/CpuTensorHandle.hpp>
13 #include <backends/WorkloadFactory.hpp>
15 #include <armnn/Exceptions.hpp>
16 #include <armnn/Utils.hpp>
17 #include <armnn/TypesUtils.hpp>
26 #include <boost/assert.hpp>
27 #include <boost/format.hpp>
28 #include <boost/log/trivial.hpp>
29 #include <boost/numeric/conversion/converter_policies.hpp>
30 #include <boost/cast.hpp>
35 armnn::INetwork* INetwork::CreateRaw()
40 armnn::INetworkPtr INetwork::Create()
42 return INetworkPtr(CreateRaw(), &INetwork::Destroy);
45 void INetwork::Destroy(INetwork* network)
47 delete boost::polymorphic_downcast<Network*>(network);
50 Status Network::PrintGraph()
53 return Status::Success;
56 void IOptimizedNetwork::Destroy(IOptimizedNetwork* network)
58 delete boost::polymorphic_downcast<OptimizedNetwork*>(network);
61 Status OptimizedNetwork::PrintGraph()
64 return Status::Success;
67 Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const
69 return m_Graph->SerializeToDot(stream);
72 bool CheckScaleSetOnQuantizedType(Layer* layer, Optional<std::vector<std::string>&> errMessages)
75 unsigned int numOutputs = layer->GetNumOutputSlots();
76 for (unsigned int i = 0; i < numOutputs; i++) {
77 const OutputSlot &outputSlot = layer->GetOutputSlot(i);
78 const TensorInfo &info = outputSlot.GetTensorInfo();
79 if (DataType::QuantisedAsymm8 == info.GetDataType()) {
80 if (0.f == info.GetQuantizationScale()) {
83 ss << "ERROR: output " << i << " of layer " << GetLayerTypeAsCString(layer->GetType())
84 << " (" << layer->GetNameStr() << ") is of type"
85 << " Quantized 8 bit but its scale parameter has not been set";
86 BOOST_LOG_TRIVIAL(warning) << ss.str() ;
88 errMessages.value().push_back(ss.str());
96 IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
97 const std::vector<BackendId>& backendPreferences,
98 const IDeviceSpec& deviceSpec,
99 const OptimizerOptions& options,
100 Optional<std::vector<std::string>&> errMessages)
102 if (backendPreferences.empty()) {
103 throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified");
105 const Network& network = *boost::polymorphic_downcast<const Network*>(&inNetwork);
106 std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph());
108 auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy);
110 OptimizedNetwork* optNetObjPtr = boost::polymorphic_downcast<OptimizedNetwork*>(optNet.get());
112 // Perform optimisation passes
113 using namespace optimizations;
114 Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(SquashEqualPermuteSiblings(),
115 SquashEqualReshapeSiblings(),
116 OptimizeInversePermutes(),
119 OptimizeConsecutiveReshapes()));
121 // Infer the tensor infos for all output slots. Throws an exception on failure.
122 optNetObjPtr->GetGraph().InferTensorInfos();
124 // if Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
125 if (options.m_ReduceFp32ToFp16)
127 Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(Fp32NetworkToFp16Converter()));
130 // We know that DeviceSpec should be the only implementation of IDeviceSpec.
131 const DeviceSpec& spec = *boost::polymorphic_downcast<const DeviceSpec*>(&deviceSpec);
133 // determine which of the preferred backends we have available for use
134 // and whether we have specified CpuRef as one of those backends.
135 bool cpuRefUsed = false;
136 std::vector<BackendId> availablePreferredBackends;
137 for (const auto& backend : backendPreferences)
139 // Check if the backend is in the available backend devices.
140 if (std::find(spec.m_SupportedComputeDevices.begin(),
141 spec.m_SupportedComputeDevices.end(), backend) !=
142 spec.m_SupportedComputeDevices.end())
144 availablePreferredBackends.push_back(backend);
145 if (backend == armnn::Compute::CpuRef) {
150 if (availablePreferredBackends.empty()) {
151 std::stringstream failureMsg;
152 failureMsg << "ERROR: None of the preferred backends " << backendPreferences
153 << " are supported. Current platform provides " << spec.m_SupportedComputeDevices;
154 BOOST_LOG_TRIVIAL(warning) << failureMsg.str();
156 errMessages.value().push_back(failureMsg.str());
158 return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
161 auto ReturnWithError = [&](Layer* layer)
163 std::stringstream failureMsg;
164 failureMsg << "ERROR: Layer of type " << GetLayerTypeAsCString(layer->GetType())
165 << " is not supported on any preferred backend " << backendPreferences;
166 BOOST_LOG_TRIVIAL(warning) << failureMsg.str();
168 errMessages.value().push_back(failureMsg.str());
170 return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
173 // Assign a compute device for all nodes
174 bool bErrorFound = false;
175 for (auto&& layer : optNetObjPtr->GetGraph())
177 DataType dataType = layer->GetDataType();
178 std::string reasonIfUnsupported;
180 if (!CheckScaleSetOnQuantizedType(layer, errMessages))
182 // don't bomb immediately, find all the quantized outputs
183 // which haven't had a scale set and report them all back.
186 for (const auto& backend : availablePreferredBackends)
188 // need to set the compute device on the layer
189 // before we can check if it is supported
190 layer->SetBackendId(backend);
191 if (!IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported))
193 if (dataType == DataType::Float16)
195 if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
196 && layer->GetType() != LayerType::ConvertFp32ToFp16
197 && layer->GetType() != LayerType::ConvertFp16ToFp32)
199 // Insert FP16 -> FP32 conversion layer before current layer
200 std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers =
201 InsertConvertFp16ToFp32LayersBefore(optNetObjPtr->GetGraph(), *layer);
203 // Insert FP32 -> FP16 conversion layer after current layer
204 std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers =
205 InsertConvertFp32ToFp16LayersAfter(optNetObjPtr->GetGraph(), *layer);
207 // Assign a supported backend to the newly introduced conversion layers
208 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
210 bool supportedBackendFound = false;
211 std::string reasonIfUnsupported;
213 // Try preferred backend first
214 layer->SetBackendId(preferredBackend);
215 if (IWorkloadFactory::IsLayerSupported(*layer, boost::none, reasonIfUnsupported))
217 supportedBackendFound = true;
221 for (const auto& backend : availablePreferredBackends)
223 // Skip preferred backend (we already determined that it is not supported)
224 if (backend == preferredBackend)
229 layer->SetBackendId(backend);
230 if (IWorkloadFactory::IsLayerSupported(*layer, boost::none, reasonIfUnsupported))
232 supportedBackendFound = true;
238 return supportedBackendFound;
241 for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
243 if (!AssignFirstSupportedBackend(convertLayer, backend))
245 return ReturnWithError(convertLayer);
249 for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
251 if (!AssignFirstSupportedBackend(convertLayer, backend))
253 return ReturnWithError(convertLayer);
261 std::stringstream warningMsg;
262 warningMsg << "WARNING: Layer of type " << GetLayerTypeAsCString(layer->GetType())
263 << " is not supported on requested backend " << layer->GetBackendId().Get()
264 << " for data type " << GetDataTypeName(dataType)
265 << " (reason: " << reasonIfUnsupported
266 << "), falling back to the next backend.";
267 BOOST_LOG_TRIVIAL(warning) << warningMsg.str();
269 errMessages.value().push_back(warningMsg.str());
279 // If the layer is unsupported by any devices, log and return a null network.
281 // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
282 // fallback we should set the compute device on the layer to CpuRef (these are not
283 // available as accelerated operations, or are only available under certain
284 // conditions, currently they comprise MemCopy, Constant, Permute)
285 armnn::LayerType layerType = layer->GetType();
286 if (!cpuRefUsed && (layerType == armnn::LayerType::MemCopy ||
287 layerType == armnn::LayerType::Constant ||
288 layerType == armnn::LayerType::Permute))
290 layer->SetBackendId(armnn::Compute::CpuRef);
294 return ReturnWithError(layer);
300 return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
303 Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(OptimizeInverseConversionsFp16(),
304 OptimizeInverseConversionsFp32()));
306 optNetObjPtr->GetGraph().AddCopyLayers();
309 Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(ConvertConstantsFloatToHalf()));
310 Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(ConvertConstantsHalfToFloat()));
317 : m_Graph(std::make_unique<Graph>())
325 IConnectableLayer* Network::AddInputLayer(LayerBindingId id, const char* name)
327 return m_Graph->AddLayer<InputLayer>(id, name);
330 IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor,
331 const ConstTensor& weights,
332 const ConstTensor* biases,
335 if (fullyConnectedDescriptor.m_BiasEnabled && (biases == nullptr))
337 throw InvalidArgumentException("AddFullyConnectedLayer: biases cannot be NULL");
340 const auto layer = m_Graph->AddLayer<FullyConnectedLayer>(fullyConnectedDescriptor, name);
342 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
344 if (fullyConnectedDescriptor.m_BiasEnabled)
346 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(*biases);
352 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
353 const ConstTensor& weights,
356 return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, nullptr, name);
359 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
360 const ConstTensor& weights,
361 const ConstTensor& biases,
364 return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, &biases, name);
367 IConnectableLayer* Network::AddConvolution2dLayerImpl(const Convolution2dDescriptor& convolution2dDescriptor,
368 const ConstTensor& weights,
369 const ConstTensor* biases,
372 if (convolution2dDescriptor.m_BiasEnabled && (biases == nullptr))
374 throw InvalidArgumentException("AddConvolution2dLayer: biases cannot be NULL");
377 const auto layer = m_Graph->AddLayer<Convolution2dLayer>(convolution2dDescriptor, name);
379 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
381 if (convolution2dDescriptor.m_BiasEnabled)
383 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(*biases);
389 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
390 const ConstTensor& weights,
393 return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, nullptr, name);
395 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
396 const ConstTensor& weights,
397 const ConstTensor& biases,
400 return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, &biases, name);
403 IConnectableLayer* Network::AddDepthwiseConvolution2dLayerImpl(
404 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
405 const ConstTensor& weights,
406 const ConstTensor* biases,
409 if (convolution2dDescriptor.m_BiasEnabled && (biases == nullptr))
411 throw InvalidArgumentException("AddDepthwiseConvolution2dLayer: biases cannot be NULL");
414 const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor,
417 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
419 if (convolution2dDescriptor.m_BiasEnabled)
421 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(*biases);
427 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
428 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
429 const ConstTensor& weights,
432 return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, nullptr, name);
434 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
435 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
436 const ConstTensor& weights,
437 const ConstTensor& biases,
440 return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, &biases, name);
443 IConnectableLayer* Network::AddPermuteLayer(const PermuteDescriptor& permuteDescriptor,
446 return m_Graph->AddLayer<PermuteLayer>(permuteDescriptor, name);
449 IConnectableLayer* Network::AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor,
452 return m_Graph->AddLayer<Pooling2dLayer>(pooling2dDescriptor, name);
455 IConnectableLayer* Network::AddActivationLayer(const ActivationDescriptor& activationDescriptor,
458 return m_Graph->AddLayer<ActivationLayer>(activationDescriptor, name);
461 IConnectableLayer* Network::AddNormalizationLayer(const NormalizationDescriptor&
462 normalizationDescriptor,
465 return m_Graph->AddLayer<NormalizationLayer>(normalizationDescriptor, name);
468 IConnectableLayer* Network::AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor,
471 return m_Graph->AddLayer<SoftmaxLayer>(softmaxDescriptor, name);
474 IConnectableLayer* Network::AddSplitterLayer(const ViewsDescriptor& splitterDescriptor,
477 return m_Graph->AddLayer<SplitterLayer>(splitterDescriptor, name);
480 IConnectableLayer* Network::AddMergerLayer(const OriginsDescriptor& mergerDescriptor,
483 return m_Graph->AddLayer<MergerLayer>(mergerDescriptor, name);
486 IConnectableLayer* Network::AddAdditionLayer(const char* name)
488 return m_Graph->AddLayer<AdditionLayer>(name);
491 IConnectableLayer* Network::AddMultiplicationLayer(const char* name)
493 return m_Graph->AddLayer<MultiplicationLayer>(name);
496 IConnectableLayer* Network::AddOutputLayer(LayerBindingId id, const char* name)
498 return m_Graph->AddLayer<OutputLayer>(id, name);
501 IConnectableLayer* Network::AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc,
502 const ConstTensor& mean,
503 const ConstTensor& variance,
504 const ConstTensor& beta,
505 const ConstTensor& gamma,
508 const auto layer = m_Graph->AddLayer<BatchNormalizationLayer>(desc, name);
510 layer->m_Mean = std::make_unique<ScopedCpuTensorHandle>(mean);
511 layer->m_Variance = std::make_unique<ScopedCpuTensorHandle>(variance);
512 layer->m_Beta = std::make_unique<ScopedCpuTensorHandle>(beta);
513 layer->m_Gamma = std::make_unique<ScopedCpuTensorHandle>(gamma);
518 IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor&
519 resizeDescriptor, const char* name)
521 return m_Graph->AddLayer<ResizeBilinearLayer>(resizeDescriptor,name);
524 IConnectableLayer* Network::AddL2NormalizationLayer(const L2NormalizationDescriptor& desc,
527 return m_Graph->AddLayer<L2NormalizationLayer>(desc, name);
530 IConnectableLayer* Network::AddConstantLayer(const ConstTensor& input, const char* name)
532 auto layer = m_Graph->AddLayer<ConstantLayer>(name);
534 layer->m_LayerOutput = std::make_unique<ScopedCpuTensorHandle>(input);
539 IConnectableLayer* Network::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor,
542 return m_Graph->AddLayer<ReshapeLayer>(reshapeDescriptor, name);
545 IConnectableLayer* Network::AddFloorLayer(const char* name)
547 return m_Graph->AddLayer<FloorLayer>(name);
550 IConnectableLayer* Network::AddLstmLayer(const LstmDescriptor& descriptor,
551 const LstmInputParams& params,
554 const auto layer = m_Graph->AddLayer<LstmLayer>(descriptor, name);
556 //Lstm Basic Parameters
557 layer->m_BasicParameters.m_InputToForgetWeights =
558 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
559 layer->m_BasicParameters.m_InputToCellWeights =
560 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
561 layer->m_BasicParameters.m_InputToOutputWeights =
562 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
563 layer->m_BasicParameters.m_RecurrentToForgetWeights =
564 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
565 layer->m_BasicParameters.m_RecurrentToCellWeights =
566 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
567 layer->m_BasicParameters.m_RecurrentToOutputWeights =
568 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
569 layer->m_BasicParameters.m_ForgetGateBias =
570 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
571 layer->m_BasicParameters.m_CellBias =
572 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
573 layer->m_BasicParameters.m_OutputGateBias =
574 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
576 //Lstm Cifg parameters
577 if(!descriptor.m_CifgEnabled)
579 if(params.m_InputToInputWeights == nullptr)
581 throw InvalidArgumentException("AddLstmLayer: Input To Input Weights cannot be NULL");
583 if(params.m_RecurrentToInputWeights == nullptr)
585 throw InvalidArgumentException(
586 "AddLstmLayer: Recurrent To Input Weights cannot be NULL");
588 if(params.m_InputGateBias == nullptr)
590 throw InvalidArgumentException("AddLstmLayer: Input Gate Bias cannot be NULL");
592 layer->m_CifgParameters.m_InputToInputWeights =
593 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
594 layer->m_CifgParameters.m_RecurrentToInputWeights =
595 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
596 // In the VTS tests, cell-to-input weights may be null, even if the other CIFG params are not.
597 if(params.m_CellToInputWeights != nullptr)
599 layer->m_CifgParameters.m_CellToInputWeights =
600 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
602 layer->m_CifgParameters.m_InputGateBias =
603 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
606 //Lstm projection parameters
607 if(descriptor.m_ProjectionEnabled)
609 if(params.m_ProjectionWeights == nullptr)
611 throw InvalidArgumentException("AddLstmLayer: Projection Weights cannot be NULL");
613 layer->m_ProjectionParameters.m_ProjectionWeights =
614 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
615 if(params.m_ProjectionBias != nullptr)
617 layer->m_ProjectionParameters.m_ProjectionBias =
618 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
622 //Lstm Peephole params
623 if(descriptor.m_PeepholeEnabled)
625 if(params.m_CellToForgetWeights == nullptr)
627 throw InvalidArgumentException("AddLstmLayer: Cell To Forget Weights cannot be NULL");
629 if(params.m_CellToOutputWeights == nullptr)
631 throw InvalidArgumentException("AddLstmLayer: Cell To Output Weights cannot be NULL");
633 layer->m_PeepholeParameters.m_CellToForgetWeights =
634 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
635 layer->m_PeepholeParameters.m_CellToOutputWeights =
636 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
641 IConnectableLayer* Network::AddDivisionLayer(const char* name)
643 return m_Graph->AddLayer<DivisionLayer>(name);
646 IConnectableLayer* Network::AddSubtractionLayer(const char* name)
648 return m_Graph->AddLayer<SubtractionLayer>(name);
651 IConnectableLayer* Network::AddMeanLayer(const MeanDescriptor& meanDescriptor, const char* name)
653 return m_Graph->AddLayer<MeanLayer>(meanDescriptor,name);
656 IConnectableLayer* Network::AddPadLayer(const PadDescriptor& padDescriptor, const char* name)
658 return m_Graph->AddLayer<PadLayer>(padDescriptor,name);
661 OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph)
662 : m_Graph(std::move(graph))
666 OptimizedNetwork::~OptimizedNetwork()