2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
5 #include "CaffeParser.hpp"
6 #include "RecordByRecordCaffeParser.hpp"
8 #include "armnn/Descriptors.hpp"
9 #include "armnn/INetwork.hpp"
10 #include "armnn/Utils.hpp"
11 #include "armnn/Exceptions.hpp"
13 #include "GraphTopologicalSort.hpp"
14 #include "VerificationHelpers.hpp"
16 #include <armnn/utility/Assert.hpp>
17 #include <armnn/utility/NumericCast.hpp>
19 #include <fmt/format.h>
22 #include "caffe/proto/caffe.pb.h"
25 #include <google/protobuf/io/coded_stream.h>
26 #include <google/protobuf/io/zero_copy_stream.h>
27 #include <google/protobuf/io/zero_copy_stream_impl.h>
28 #include <google/protobuf/text_format.h>
29 #include <google/protobuf/stubs/common.h>
30 #include <google/protobuf/stubs/once.h>
31 #include <google/protobuf/io/coded_stream.h>
32 #include <google/protobuf/descriptor.h>
33 #include <google/protobuf/generated_message_reflection.h>
34 #include <google/protobuf/reflection_ops.h>
35 #include <google/protobuf/wire_format.h>
42 /// Caffe networks are loaded from protobuf files (binary or text) using the protobuf library and the generated
43 /// code from caffe.pb.h. This gives us a caffe::NetParameter which is an in-memory version of the file.
44 /// This contains a flat list of Caffe 'layers' (e.g. convolution, pooling etc.).
45 /// Each layer has inputs (called "bottoms") and outputs (called "tops"). Data flows from bottom to top.
46 /// The bottoms of a layer refer to the tops of other layers, not their names.
47 /// The names of layers seem to be arbitrary (you could rename a layer and the network wouldn't
48 /// need any other changes).
50 /// Some layers (e.g. Relu) can be configured so that their top and bottom are both the same. This is called an
51 /// "in-place" layer and is a Caffe runtime feature used to reduce memory usage by modifying tensors in-place.
52 /// This isn't relevant to the parser and so we preprocess these layers to convert them to regular layers, to result
53 /// in a consistent graph structure.
55 namespace armnnCaffeParser
58 using namespace armnn;
59 using namespace caffe;
61 using namespace google::protobuf::io;
66 const float* GetArrayPtrFromBlob(const LayerParameter& layerParam, unsigned int blobIndex)
68 auto nBlobs = layerParam.blobs_size();
69 if (blobIndex >= armnn::numeric_cast<unsigned int>(nBlobs))
72 fmt::format("Expected data blob at index {} in layer {} not found. nBlobs={}. {}",
76 CHECK_LOCATION().AsString()));
79 const BlobProto& blob = layerParam.blobs(armnn::numeric_cast<int>(blobIndex));
81 const float* arrayPtr = blob.data().data();
85 void GetDataFromBlob(const LayerParameter& layerParam, vector<float>& outData, unsigned int blobIndex)
87 auto nBlobs = layerParam.blobs_size();
88 if (blobIndex >= armnn::numeric_cast<unsigned int>(nBlobs))
91 fmt::format("Expected data blob at index {} in layer {} not found. {}",
94 CHECK_LOCATION().AsString()));
97 const BlobProto& blob = layerParam.blobs(armnn::numeric_cast<int>(blobIndex));
99 size_t blobSize = armnn::numeric_cast<size_t>(blob.data_size());
100 if (blobSize != outData.size())
102 throw ParseException(
103 fmt::format("Data blob at index {} in layer {} has an unexpected size. "
104 "Expected {} elements but got {} elements. {}",
109 CHECK_LOCATION().AsString()));
112 int outSizeInt = armnn::numeric_cast<int>(outData.size());
113 for (int i = 0; i < outSizeInt; ++i)
115 outData[static_cast<size_t>(i)] = blob.data(i);
119 template <typename T>
120 size_t SizeOfVectorData(const vector<T>& vec)
122 return vec.size() * sizeof(T);
125 void ValidateNumInputsOutputs(const caffe::LayerParameter& layerParameter,
126 unsigned int numInputs,
127 unsigned int numOutputs)
129 int numInputsActual = layerParameter.bottom_size();
130 if (numInputs != armnn::numeric_cast<unsigned int>(numInputsActual))
132 throw ParseException(
133 fmt::format("Invalid number of inputs requested {} for layer {} "
134 "while only {} present. {}",
136 layerParameter.name(),
138 CHECK_LOCATION().AsString()));
141 int numOutputsActual = layerParameter.top_size();
142 if (numOutputs != armnn::numeric_cast<unsigned int>(numOutputsActual))
144 throw ParseException(
145 fmt::format("Invalid number of outputs requested {} for layer {} "
146 "while only {} present. {}",
148 layerParameter.name(),
150 CHECK_LOCATION().AsString()));
154 template <typename ParamType, typename ExtractOptional, typename ExtractFallback, typename ValueType>
155 ValueType GetOptionalWithFallback(const ParamType& param,
156 ExtractOptional extractOptional,
157 ExtractFallback extractFallback,
158 ValueType defaultValue)
160 auto optValue = extractOptional(param, defaultValue);
163 return optValue.second;
165 auto fallbackValue = extractFallback(param, defaultValue);
166 return fallbackValue.second;
169 #define GET_OPTIONAL_WITH_VECTOR_FALLBACK(PARAM, \
175 GetOptionalWithFallback( \
177 [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \
179 if (param.has_##OPTIONAL_VALUE ()) \
181 return std::make_pair(true, param.OPTIONAL_VALUE ()); \
185 return std::make_pair(false, defaultValue); \
188 [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \
190 if (param.FALLBACK_VECTOR##_size() > 0) \
192 return std::make_pair(true, (param.FALLBACK_VECTOR ()).Get(0)); \
196 return std::make_pair(false, defaultValue); \
201 #define GET_OPTIONAL_WITH_FALLBACK(PARAM, \
207 GetOptionalWithFallback( \
209 [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \
211 if (param.has_##OPTIONAL_VALUE ()) \
213 return std::make_pair(true, param.OPTIONAL_VALUE ()); \
217 return std::make_pair(false, defaultValue); \
220 [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \
222 if (param.has_##FALLBACK_VALUE ()) \
224 return std::make_pair(true, param.FALLBACK_VALUE ()); \
228 return std::make_pair(false, defaultValue); \
233 } // namespace <anonymous>
235 const std::map<std::string, CaffeParserBase::OperationParsingFunction>
236 CaffeParserBase::ms_CaffeLayerNameToParsingFunctions = {
237 { "Input", &CaffeParserBase::ParseInputLayer },
238 { "Convolution", &CaffeParserBase::ParseConvLayer },
239 { "Pooling", &CaffeParserBase::ParsePoolingLayer },
240 { "ReLU", &CaffeParserBase::ParseReluLayer },
241 { "LRN", &CaffeParserBase::ParseLRNLayer },
242 { "InnerProduct", &CaffeParserBase::ParseInnerProductLayer },
243 { "Softmax", &CaffeParserBase::ParseSoftmaxLayer },
244 { "Eltwise", &CaffeParserBase::ParseEltwiseLayer },
245 { "Concat", &CaffeParserBase::ParseConcatLayer },
246 { "BatchNorm", &CaffeParserBase::ParseBatchNormLayer },
247 { "Scale", &CaffeParserBase::ParseScaleLayer },
248 { "Split", &CaffeParserBase::ParseSplitLayer },
249 { "Dropout", &CaffeParserBase::ParseDropoutLayer},
252 ICaffeParser* ICaffeParser::CreateRaw()
254 return new RecordByRecordCaffeParser();
257 ICaffeParserPtr ICaffeParser::Create()
259 return ICaffeParserPtr(CreateRaw(), &ICaffeParser::Destroy);
262 void ICaffeParser::Destroy(ICaffeParser* parser)
267 CaffeParserBase::CaffeParserBase()
268 : m_Network(nullptr, nullptr)
273 CaffeParser::CaffeParser()
279 BindingPointInfo CaffeParserBase::GetNetworkInputBindingInfo(const std::string& name) const
281 return GetBindingInfo(name, "input", m_NetworkInputsBindingInfo);
284 BindingPointInfo CaffeParserBase::GetNetworkOutputBindingInfo(const std::string& name) const
286 return GetBindingInfo(name, "output", m_NetworkOutputsBindingInfo);
289 std::pair<armnn::LayerBindingId, armnn::TensorInfo> CaffeParserBase::GetBindingInfo(const std::string& layerName,
290 const char* bindingPointDesc,
291 const std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo)
293 auto it = nameToBindingInfo.find(layerName);
294 if (it == nameToBindingInfo.end())
296 throw InvalidArgumentException(
297 fmt::format("Unknown binding {} for layer '{}'. {}",
300 CHECK_LOCATION().AsString()));
305 TensorInfo CaffeParserBase::BlobShapeToTensorInfo(const caffe::BlobShape& blobShape) const
307 std::vector<unsigned int> shape;
308 for (int j = 0; j < blobShape.dim_size(); ++j)
310 shape.push_back(static_cast<unsigned int>(blobShape.dim(j)));
313 return TensorInfo(armnn::numeric_cast<unsigned int>(shape.size()), shape.data(), DataType::Float32);
316 BlobShape TensorDescToBlobShape(const TensorInfo& desc)
319 for (unsigned int i = 0; i < desc.GetNumDimensions(); ++i)
322 ret.set_dim(armnn::numeric_cast<int>(i), desc.GetShape()[i]);
328 // Note: can move to CaffeParser when/if we optimise the text/string format
329 // to load on a layer by layer basis
330 vector<const LayerParameter*> CaffeParserBase::GetInputs(const LayerParameter& layerParam)
332 std::vector<const caffe::LayerParameter*> ret;
333 ret.reserve(armnn::numeric_cast<size_t>(layerParam.bottom_size()));
334 for (int j = 0; j < layerParam.bottom_size(); ++j)
336 std::string inputName = layerParam.bottom(j);
337 auto inputIt = m_CaffeLayersByTopName.find(inputName);
338 if (inputIt == m_CaffeLayersByTopName.end())
340 throw ParseException(
341 fmt::format("Can't find Caffe layer with top called '{}', "
342 "which is listed as an input of '{}'. {}",
345 CHECK_LOCATION().AsString()));
347 ret.push_back(inputIt->second);
353 void CaffeParserBase::ParseInputLayer(const LayerParameter& layerParam)
355 ARMNN_ASSERT(layerParam.type() == "Input");
356 ValidateNumInputsOutputs(layerParam, 0, 1);
358 const InputParameter& param = layerParam.input_param();
360 const armnn::LayerBindingId inputId = armnn::numeric_cast<armnn::LayerBindingId>(
361 m_NetworkInputsBindingInfo.size());
362 armnn::IConnectableLayer* const inputLayer = m_Network->AddInputLayer(inputId, layerParam.name().c_str());
364 // Decides the tensor info for this input. This can be specified in the Caffe network but can also
365 // be overriden by user input (m_inputShapes).
366 armnn::TensorInfo inputTensorInfo;
368 const BlobShape* originalShape = param.shape_size() > 0 && param.shape(0).dim_size() > 0 ?
369 ¶m.shape(0) : nullptr;
372 inputTensorInfo = BlobShapeToTensorInfo(*originalShape);
375 auto overrideIt = m_InputShapes.find(layerParam.name());
376 if (overrideIt != m_InputShapes.end())
378 const TensorShape& overrideShape = overrideIt->second;
380 ( originalShape->dim(1) != overrideShape[1]
381 || originalShape->dim(2) != overrideShape[2]
382 || originalShape->dim(3) != overrideShape[3]))
384 throw ParseException(
385 fmt::format("Parsed input shape for '{}' is incompatible with the override provided. {}",
387 CHECK_LOCATION().AsString()));
389 inputTensorInfo.SetShape(overrideShape);
391 else if (!originalShape)
393 throw ParseException(
394 fmt::format("No input descriptor given for '{}' and no input shape found in caffe model. {}",
396 CHECK_LOCATION().AsString()));
399 TrackInputBinding(inputLayer, inputId, inputTensorInfo);
400 inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
401 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), inputLayer->GetOutputSlot(0));
404 void CaffeParserBase::AddConvLayerWithSplits(const caffe::LayerParameter& layerParam,
405 const armnn::Convolution2dDescriptor& desc,
406 unsigned int kernelW,
407 unsigned int kernelH)
409 ARMNN_ASSERT(layerParam.type() == "Convolution");
410 ValidateNumInputsOutputs(layerParam, 1, 1);
412 ConvolutionParameter convParam = layerParam.convolution_param();
413 BlobShape inputShape = TensorDescToBlobShape(GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo());
414 const unsigned int numGroups = convParam.has_group() ? convParam.group() : 1;
416 // asusme these were already verified by the caller ParseConvLayer() function
417 ARMNN_ASSERT(numGroups < inputShape.dim(1));
418 ARMNN_ASSERT(numGroups > 1);
421 armnn::IOutputSlot& inputConnection = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
423 vector<string> convLayerNames(numGroups);
424 vector<armnn::IConnectableLayer*> convLayers(numGroups);
425 convLayerNames[0] = layerParam.name();
427 // This convolution is to be applied to chunks of the input data so add a splitter layer
429 // Redirect the convolution input to the splitter
430 unsigned int splitterDimSizes[4] = {static_cast<unsigned int>(inputShape.dim(0)),
431 static_cast<unsigned int>(inputShape.dim(1)),
432 static_cast<unsigned int>(inputShape.dim(2)),
433 static_cast<unsigned int>(inputShape.dim(3))};
435 // Split dimension 1 of the splitter output shape and conv input shapes
436 // according to the number of groups
438 splitterDimSizes[1] /= numGroups;
439 inputShape.set_dim(1, splitterDimSizes[1]);
441 // This is used to describe how the input is to be split
442 ViewsDescriptor splitterDesc(numGroups);
444 // Create an output node for each group, giving each a unique name
445 for (unsigned int g = 0; g < numGroups; ++g)
447 // Work out the names of the splitter layers child convolutions
449 ss << layerParam.name() << "_" << g;
450 convLayerNames[g] = ss.str();
452 splitterDesc.SetViewOriginCoord(g, 1, splitterDimSizes[1] * g);
454 // Set the size of the views.
455 for (unsigned int dimIdx=0; dimIdx < 4; dimIdx++)
457 splitterDesc.SetViewSize(g, dimIdx, splitterDimSizes[dimIdx]);
461 const std::string splitterLayerName = std::string("splitter_") + layerParam.bottom(0);
462 armnn::IConnectableLayer* splitterLayer = m_Network->AddSplitterLayer(splitterDesc, splitterLayerName.c_str());
464 inputConnection.Connect(splitterLayer->GetInputSlot(0));
465 for (unsigned int i = 0; i < splitterLayer->GetNumOutputSlots(); i++)
467 splitterLayer->GetOutputSlot(i).SetTensorInfo(BlobShapeToTensorInfo(inputShape));
470 unsigned int numFilters = convParam.num_output();
472 // Populates convolution output tensor descriptor dimensions.
473 BlobShape outputShape;
474 outputShape.add_dim(0);
475 outputShape.set_dim(0, inputShape.dim(0));
476 outputShape.add_dim(1);
477 // Ensures that dimension 1 of the convolution output is split according to the number of groups.
478 outputShape.set_dim(1, numFilters / numGroups);
479 outputShape.add_dim(2);
481 2, (static_cast<int>(
482 static_cast<float>(inputShape.dim(2) + 2 * desc.m_PadBottom - kernelH) /
483 static_cast<float>(desc.m_StrideY)) + 1));
484 outputShape.add_dim(3);
486 3, (static_cast<int>(
487 static_cast<float>(inputShape.dim(3) + 2 * desc.m_PadRight - kernelW) /
488 static_cast<float>(desc.m_StrideX)) + 1));
490 // Load the weight data for ALL groups
491 vector<float> weightData(armnn::numeric_cast<size_t>(numGroups *
492 inputShape.dim(1) * // number of input channels
493 outputShape.dim(1) * // number of output channels
496 GetDataFromBlob(layerParam, weightData, 0);
498 const unsigned int weightDimSizes[4] = {
499 static_cast<unsigned int>(outputShape.dim(1)),
500 static_cast<unsigned int>(inputShape.dim(1)),
505 vector<float> biasData;
507 if (desc.m_BiasEnabled)
509 biasData.resize(armnn::numeric_cast<size_t>(numGroups * outputShape.dim(1)), 1.f);
510 GetDataFromBlob(layerParam, biasData, 1);
512 const unsigned int biasDimSizes[1] = {static_cast<unsigned int>(outputShape.dim(1))};
513 biasInfo = TensorInfo(1, biasDimSizes, DataType::Float32);
516 const unsigned int numWeightsPerGroup = armnn::numeric_cast<unsigned int>(weightData.size()) / numGroups;
517 const unsigned int numBiasesPerGroup = armnn::numeric_cast<unsigned int>(biasData.size()) / numGroups;
519 for (unsigned int g = 0; g < numGroups; ++g)
521 // Sets the slot index, group 0 should be connected to the 0th output of the splitter
522 // group 1 should be connected to the 1st output of the splitter.
524 // Pulls out the weights for this group from that loaded from the model file earlier.
525 ConstTensor weights(TensorInfo(4, weightDimSizes, DataType::Float32),
526 weightData.data() + numWeightsPerGroup * g);
528 IConnectableLayer* convLayer = nullptr;
529 Optional<ConstTensor> optionalBiases;
530 if (desc.m_BiasEnabled)
532 // Pulls out the biases for this group from that loaded from the model file earlier.
533 ConstTensor biases(biasInfo, biasData.data() + numBiasesPerGroup * g);
534 optionalBiases = Optional<ConstTensor>(biases);
536 convLayer = m_Network->AddConvolution2dLayer(desc,
539 convLayerNames[g].c_str());
540 convLayers[g] = convLayer;
542 // If we have more than one group then the input to the nth convolution the splitter layer's nth output,
543 // otherwise it's the regular input to this layer.
544 armnn::IOutputSlot& splitterInputConnection =
545 splitterLayer ? splitterLayer->GetOutputSlot(g) : inputConnection;
546 splitterInputConnection.Connect(convLayer->GetInputSlot(0));
547 convLayer->GetOutputSlot(0).SetTensorInfo(BlobShapeToTensorInfo(outputShape));
550 // If the convolution was performed in chunks, add a layer to concatenate the results
552 // The merge input shape matches that of the convolution output
553 unsigned int concatDimSizes[4] = {static_cast<unsigned int>(outputShape.dim(0)),
554 static_cast<unsigned int>(outputShape.dim(1)),
555 static_cast<unsigned int>(outputShape.dim(2)),
556 static_cast<unsigned int>(outputShape.dim(3))};
558 // This is used to describe how the input is to be concatenated
559 OriginsDescriptor concatDesc(numGroups);
561 // Now create an input node for each group, using the name from
562 // the output of the corresponding convolution
563 for (unsigned int g = 0; g < numGroups; ++g)
565 concatDesc.SetViewOriginCoord(g, 1, concatDimSizes[1] * g);
568 // Make sure the output from the concat is the correct size to hold the data for all groups
569 concatDimSizes[1] *= numGroups;
570 outputShape.set_dim(1, concatDimSizes[1]);
572 // Finally add the concat layer
573 IConnectableLayer* concatLayer = m_Network->AddConcatLayer(concatDesc, layerParam.name().c_str());
577 throw ParseException(
578 fmt::format("Failed to create final concat layer for Split+Convolution+Concat. "
579 "Layer={} #groups={} #filters={} {}",
583 CHECK_LOCATION().AsString()));
586 for (unsigned int g = 0; g < numGroups; ++g)
588 convLayers[g]->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(g));
590 concatLayer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo(4, concatDimSizes, DataType::Float32));
591 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), concatLayer->GetOutputSlot(0));
594 void CaffeParserBase::AddConvLayerWithDepthwiseConv(const caffe::LayerParameter& layerParam,
595 const armnn::Convolution2dDescriptor& convDesc,
596 unsigned int kernelW,
597 unsigned int kernelH)
599 ARMNN_ASSERT(layerParam.type() == "Convolution");
600 ValidateNumInputsOutputs(layerParam, 1, 1);
602 ConvolutionParameter convParam = layerParam.convolution_param();
603 BlobShape inputShape = TensorDescToBlobShape(GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo());
605 DepthwiseConvolution2dDescriptor desc;
606 desc.m_PadLeft = convDesc.m_PadLeft;
607 desc.m_PadRight = convDesc.m_PadRight;
608 desc.m_PadTop = convDesc.m_PadTop;
609 desc.m_PadBottom = convDesc.m_PadBottom;
610 desc.m_StrideX = convDesc.m_StrideX;
611 desc.m_StrideY = convDesc.m_StrideY;
612 desc.m_BiasEnabled = convDesc.m_BiasEnabled;
614 unsigned int numFilters = convParam.num_output();
616 BlobShape outputShape;
617 outputShape.add_dim(0);
618 outputShape.set_dim(0, inputShape.dim(0));
619 outputShape.add_dim(1);
620 outputShape.set_dim(1, numFilters);
621 outputShape.add_dim(2);
623 2, (static_cast<int>(
624 static_cast<float>(inputShape.dim(2) + 2 * desc.m_PadBottom - kernelH) /
625 static_cast<float>(desc.m_StrideY)) + 1));
626 outputShape.add_dim(3);
628 3, (static_cast<int>(
629 static_cast<float>(inputShape.dim(3) + 2 * desc.m_PadRight - kernelW) /
630 static_cast<float>(desc.m_StrideX)) + 1));
632 // Load the weight data
633 size_t allWeightsSize = armnn::numeric_cast<size_t>(inputShape.dim(1) * kernelH * kernelW);
634 vector<float> weightData(allWeightsSize);
636 GetDataFromBlob(layerParam, weightData, 0);
638 // depth multiplier will be 1 for the depthwise convolution
639 const unsigned int weightDimSizes[4] = {
640 static_cast<unsigned int>(1), // depth multiplier
641 static_cast<unsigned int>(inputShape.dim(1)), // #channels
645 armnn::IConnectableLayer* returnLayer = nullptr;
646 ConstTensor weights(TensorInfo(4, weightDimSizes, DataType::Float32), weightData.data());
647 Optional<ConstTensor> optionalBiases;
648 vector<float> biasData;
649 if (desc.m_BiasEnabled)
653 biasData.resize(armnn::numeric_cast<size_t>(outputShape.dim(1)), 1.f);
654 GetDataFromBlob(layerParam, biasData, 1);
656 const unsigned int biasDimSizes[1] = {static_cast<unsigned int>(outputShape.dim(1))};
657 biasInfo = TensorInfo(1, biasDimSizes, DataType::Float32);
659 ConstTensor biases(biasInfo, biasData.data());
660 optionalBiases = Optional<ConstTensor>(biases);
662 returnLayer = m_Network->AddDepthwiseConvolution2dLayer(desc,
665 layerParam.name().c_str());
669 throw ParseException(
670 fmt::format("Failed to create depthwise convolution layer. "
671 "Layer={} #filters={} {}",
674 CHECK_LOCATION().AsString()));
676 armnn::IOutputSlot& inputConnection = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
677 inputConnection.Connect(returnLayer->GetInputSlot(0));
678 returnLayer->GetOutputSlot(0).SetTensorInfo(BlobShapeToTensorInfo(outputShape));
679 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), returnLayer->GetOutputSlot(0));
682 void CaffeParserBase::ParseConvLayer(const LayerParameter& layerParam)
684 // Ignored Caffe Parameters
692 // Not Available ArmNN Interface Parameters
693 // * Rounding policy;
695 ARMNN_ASSERT(layerParam.type() == "Convolution");
696 ValidateNumInputsOutputs(layerParam, 1, 1);
698 ConvolutionParameter convParam = layerParam.convolution_param();
699 BlobShape inputShape = TensorDescToBlobShape(GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo());
700 const unsigned int numGroups = convParam.has_group() ? convParam.group() : 1;
701 unsigned int numFilters = convParam.num_output();
703 const auto notFound = std::numeric_limits<unsigned int>::max();
705 unsigned int kernelH = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
706 kernel_h, kernel_size, unsigned int, notFound);
707 unsigned int kernelW = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
708 kernel_w, kernel_size, unsigned int, notFound);
710 unsigned int strideH = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
711 stride_h, stride, unsigned int, 1u);
712 unsigned int strideW = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
713 stride_w, stride, unsigned int, 1u);
715 unsigned int padH = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
716 pad_h, pad, unsigned int, 0u);
717 unsigned int padW = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
718 pad_w, pad, unsigned int, 0u);
720 Convolution2dDescriptor convolution2dDescriptor;
721 convolution2dDescriptor.m_PadLeft = padW;
722 convolution2dDescriptor.m_PadRight = padW;
723 convolution2dDescriptor.m_PadTop = padH;
724 convolution2dDescriptor.m_PadBottom = padH;
725 convolution2dDescriptor.m_StrideX = strideW;
726 convolution2dDescriptor.m_StrideY = strideH;
727 convolution2dDescriptor.m_BiasEnabled = convParam.has_bias_term() ? convParam.bias_term() : true;
729 if (numGroups > numFilters)
731 throw ParseException(
732 fmt::format("Error parsing Convolution: {}. "
733 "The 'group'={} parameter cannot be larger than the "
734 "number of filters supplied ='{}'. {}",
738 CHECK_LOCATION().AsString()));
741 if (inputShape.dim_size() != 4)
743 throw ParseException(
744 fmt::format("Convolution input shape is expected to have 4 dimensions. "
745 "{}'s input has only {}. {}",
747 inputShape.dim_size(),
748 CHECK_LOCATION().AsString()));
753 if (numGroups > inputShape.dim(1))
755 throw ParseException(
756 fmt::format("Error parsing Convolution: {}. "
757 "The 'group'={} parameter cannot be larger than the "
758 "channel of the input shape={} (in NCHW format). {}",
762 CHECK_LOCATION().AsString()));
764 else if (numGroups == inputShape.dim(1))
766 // we use a depthwise convolution here, because the number of groups equals to the
768 AddConvLayerWithDepthwiseConv(layerParam, convolution2dDescriptor, kernelW, kernelH);
773 // we split the input by channels into channels/groups separate convolutions
774 // and concatenate the results afterwards
775 AddConvLayerWithSplits(layerParam, convolution2dDescriptor, kernelW, kernelH);
780 // NOTE: at this point we only need to handle #group=1 case, all other cases should be
781 // handled by the AddConvLayer* helpers
783 // Populate convolution output tensor descriptor dimensions
784 BlobShape outputShape;
785 outputShape.add_dim(0);
786 outputShape.set_dim(0, inputShape.dim(0));
787 outputShape.add_dim(1);
788 outputShape.set_dim(1, numFilters);
789 outputShape.add_dim(2);
791 2, (static_cast<int>(
792 static_cast<float>(inputShape.dim(2) + 2 * padH - kernelH) /
793 static_cast<float>(strideH)) + 1));
794 outputShape.add_dim(3);
796 3, (static_cast<int>(
797 static_cast<float>(inputShape.dim(3) + 2 * padW - kernelW) /
798 static_cast<float>(strideW)) + 1));
800 // Load the weight data for ALL groups
801 vector<float> weightData(armnn::numeric_cast<size_t>(inputShape.dim(1) *
805 GetDataFromBlob(layerParam, weightData, 0);
807 const unsigned int weightDimSizes[4] = {
808 static_cast<unsigned int>(outputShape.dim(1)), // output channels
809 static_cast<unsigned int>(inputShape.dim(1)), // input channels
813 armnn::IConnectableLayer* returnLayer = nullptr;
815 // Pull out the weights for this group from that loaded from the model file earlier
816 ConstTensor weights(TensorInfo(4, weightDimSizes, DataType::Float32), weightData.data());
817 Optional<ConstTensor> optionalBiases;
818 vector<float> biasData;
819 if (convolution2dDescriptor.m_BiasEnabled)
823 biasData.resize(armnn::numeric_cast<size_t>(outputShape.dim(1)), 1.f);
824 GetDataFromBlob(layerParam, biasData, 1);
826 const unsigned int biasDimSizes[1] = {static_cast<unsigned int>(outputShape.dim(1))};
827 biasInfo = TensorInfo(1, biasDimSizes, DataType::Float32);
829 // Pull out the biases for this group from that loaded from the model file earlier
830 ConstTensor biases(biasInfo, biasData.data());
831 optionalBiases = Optional<ConstTensor>(biases);
833 returnLayer = m_Network->AddConvolution2dLayer(convolution2dDescriptor,
836 layerParam.name().c_str());
838 armnn::IOutputSlot& inputConnection = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
839 inputConnection.Connect(returnLayer->GetInputSlot(0));
840 returnLayer->GetOutputSlot(0).SetTensorInfo(BlobShapeToTensorInfo(outputShape));
844 throw ParseException(
845 fmt::format("Failed to create Convolution layer. "
846 "Layer={} #groups={} #filters={} {}",
850 CHECK_LOCATION().AsString()));
853 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), returnLayer->GetOutputSlot(0));
856 void CaffeParserBase::ParsePoolingLayer(const LayerParameter& layerParam)
858 // Ignored Caffe Parameters
859 // Stochastic Pooling
862 ValidateNumInputsOutputs(layerParam, 1, 1);
863 PoolingParameter param = layerParam.pooling_param();
864 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
866 const auto notFound = std::numeric_limits<unsigned int>::max();
868 unsigned int kernel_h = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
869 kernel_h, kernel_size, unsigned int, notFound);
870 unsigned int kernel_w = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
871 kernel_w, kernel_size, unsigned int, notFound);
873 if ((kernel_h == notFound || kernel_w == notFound) && param.has_global_pooling())
875 kernel_h = inputInfo.GetShape()[2];
876 kernel_w = inputInfo.GetShape()[3];
879 unsigned int stride_h = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
880 stride_h, stride, unsigned int, notFound);
881 unsigned int stride_w = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
882 stride_h, stride, unsigned int, notFound);
884 if ((stride_h == notFound || stride_w == notFound) && param.has_global_pooling())
890 unsigned int pad_h = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
891 pad_h, pad, unsigned int, 0u);
892 unsigned int pad_w = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
893 pad_w, pad, unsigned int, 0u);
895 // Populate Weight and Bias Filter Descriptor
896 Pooling2dDescriptor pooling2dDescriptor;
897 if (param.has_pool())
899 PoolingParameter_PoolMethod p = param.pool();
902 case PoolingParameter_PoolMethod_MAX:
904 pooling2dDescriptor.m_PoolType = PoolingAlgorithm::Max;
907 case PoolingParameter_PoolMethod_AVE:
909 pooling2dDescriptor.m_PoolType = PoolingAlgorithm::Average;
912 case PoolingParameter_PoolMethod_STOCHASTIC:
914 throw ParseException(
915 fmt::format("Pooling Layer: Stochastic Pooling Not Supported. Layer={} {}",
917 CHECK_LOCATION().AsString()));
921 throw ParseException(
922 fmt::format("Pooling Layer: unknown pooling method: {} for layer: {} {}",
925 CHECK_LOCATION().AsString()));
931 throw ParseException(
932 fmt::format("No Pooling Method Defined for {} {}",
934 CHECK_LOCATION().AsString()));
937 pooling2dDescriptor.m_PadLeft = pad_w;
938 pooling2dDescriptor.m_PadRight = pad_w;
939 pooling2dDescriptor.m_PadTop = pad_h;
940 pooling2dDescriptor.m_PadBottom = pad_h;
941 pooling2dDescriptor.m_StrideX = stride_w;
942 pooling2dDescriptor.m_StrideY = stride_h;
943 pooling2dDescriptor.m_PoolWidth = kernel_w;
944 pooling2dDescriptor.m_PoolHeight = kernel_h;
946 pooling2dDescriptor.m_OutputShapeRounding = OutputShapeRounding::Ceiling;
947 pooling2dDescriptor.m_PaddingMethod = PaddingMethod::IgnoreValue;
949 armnn::IConnectableLayer* poolingLayer = m_Network->AddPooling2dLayer(pooling2dDescriptor,
950 layerParam.name().c_str());
952 TensorInfo outputInfo(
953 { inputInfo.GetShape()[0],
954 inputInfo.GetShape()[1],
955 static_cast<unsigned int>(ceil(
956 static_cast<float>(inputInfo.GetShape()[2] + 2 * pad_h - kernel_h) /
957 armnn::numeric_cast<float>(stride_h))) + 1,
958 static_cast<unsigned int>(ceil(
959 static_cast<float>(inputInfo.GetShape()[3] + 2 * pad_w - kernel_w) /
960 armnn::numeric_cast<float>(stride_w))) + 1 },
963 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(poolingLayer->GetInputSlot(0));
964 poolingLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
965 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), poolingLayer->GetOutputSlot(0));
968 void CaffeParserBase::ParseReluLayer(const LayerParameter& layerParam)
970 ValidateNumInputsOutputs(layerParam, 1, 1);
972 const string& name = layerParam.name();
973 const ReLUParameter& param = layerParam.relu_param();
975 ActivationDescriptor activationDescriptor;
976 const float negativeSlope = param.negative_slope();
977 if (negativeSlope == 0.0f)
979 activationDescriptor.m_Function = ActivationFunction::ReLu;
983 activationDescriptor.m_Function = ActivationFunction::LeakyReLu;
984 activationDescriptor.m_A = negativeSlope;
987 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
988 IConnectableLayer* const activationLayer = m_Network->AddActivationLayer(activationDescriptor, name.c_str());
989 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(activationLayer->GetInputSlot(0));
990 activationLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
991 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), activationLayer->GetOutputSlot(0));
994 void CaffeParserBase::ParseLRNLayer(const LayerParameter& layerParam)
996 ValidateNumInputsOutputs(layerParam, 1, 1);
998 LRNParameter param = layerParam.lrn_param();
1000 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1002 // Ignored BATCH NORMALIZATION Caffe Parameters.
1003 // Ignored MVN Caffe Parameters.
1004 // Ignored LRN Caffe Parameters.
1007 NormalizationDescriptor normalizationDescriptor;
1008 if (param.has_norm_region())
1010 LRNParameter_NormRegion n = param.norm_region();
1013 case LRNParameter_NormRegion_ACROSS_CHANNELS:
1015 normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Across;
1018 case LRNParameter_NormRegion_WITHIN_CHANNEL:
1020 normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Within;
1025 throw ParseException(
1026 fmt::format("Unknown region {} for LRN layer {} {}",
1029 CHECK_LOCATION().AsString()));
1035 // Caffe defaults to normalization across channels.
1036 normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Across;
1039 normalizationDescriptor.m_NormMethodType = NormalizationAlgorithmMethod::LocalBrightness;
1040 if (param.has_local_size())
1042 normalizationDescriptor.m_NormSize = param.local_size();
1046 throw ParseException(
1047 fmt::format("local_size not defined for LRN layer {} {}",
1049 CHECK_LOCATION().AsString()));
1052 if (param.has_alpha())
1054 normalizationDescriptor.m_Alpha = param.alpha();
1055 normalizationDescriptor.m_Alpha /= armnn::numeric_cast<float>(param.local_size());
1059 throw ParseException(
1060 fmt::format("Alpha not defined for LRN layer {} {}",
1062 CHECK_LOCATION().AsString()));
1064 if (param.has_beta())
1066 normalizationDescriptor.m_Beta = param.beta();
1070 throw ParseException(
1071 fmt::format("Beta not defined for LRN layer {} {}",
1073 CHECK_LOCATION().AsString()));
1078 normalizationDescriptor.m_K = param.k();
1082 normalizationDescriptor.m_K = 1;
1085 IConnectableLayer* const normLayer = m_Network->AddNormalizationLayer(normalizationDescriptor,
1086 layerParam.name().c_str());
1087 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(normLayer->GetInputSlot(0));
1088 normLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1090 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), normLayer->GetOutputSlot(0));
1093 void CaffeParserBase::ParseInnerProductLayer(const LayerParameter& layerParam)
1095 InnerProductParameter param = layerParam.inner_product_param();
1097 ValidateNumInputsOutputs(layerParam, 1, 1);
1099 unsigned int outputSize = param.num_output();
1101 // Ignored Caffe Parameters:
1107 FullyConnectedDescriptor tensorFullyConnectedDescriptor;
1109 if (param.has_transpose())
1111 // If true, assumes transposed weights.
1112 tensorFullyConnectedDescriptor.m_TransposeWeightMatrix = param.transpose();
1116 // Caffe defaults to transposed.
1117 tensorFullyConnectedDescriptor.m_TransposeWeightMatrix = true;
1120 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1122 TensorInfo weightInfo;
1123 TensorInfo biasInfo;
1125 // Allows implicit flattening of extra dimensions.
1126 unsigned int inputSize = inputInfo.GetShape()[1];
1127 for (unsigned int i = 2; i < inputInfo.GetNumDimensions(); ++i)
1129 inputSize *= inputInfo.GetShape()[i];
1132 const float* weightDataPtr = GetArrayPtrFromBlob(layerParam, 0);
1133 const unsigned int swTD[2] = { outputSize, inputSize };
1134 ConstTensor weights(TensorInfo(2, swTD, DataType::Float32), weightDataPtr);
1136 tensorFullyConnectedDescriptor.m_BiasEnabled = true;
1137 // Todo: check whether bias enabled.
1138 armnn::IConnectableLayer* fullyConnectedLayer = nullptr;
1139 if (tensorFullyConnectedDescriptor.m_BiasEnabled)
1142 const float* biasDataPtr = GetArrayPtrFromBlob(layerParam, 1);
1144 const unsigned int sbTD[1] = { outputSize };
1146 ConstTensor biases(TensorInfo(1, sbTD, DataType::Float32), biasDataPtr);
1148 fullyConnectedLayer = m_Network->AddFullyConnectedLayer(tensorFullyConnectedDescriptor,
1150 Optional<ConstTensor>(biases),
1151 layerParam.name().c_str());
1155 fullyConnectedLayer = m_Network->AddFullyConnectedLayer(tensorFullyConnectedDescriptor,
1158 layerParam.name().c_str());
1161 TensorInfo outputInfo({ inputInfo.GetShape()[0], outputSize }, DataType::Float32);
1162 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(fullyConnectedLayer->GetInputSlot(0));
1163 fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
1164 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), fullyConnectedLayer->GetOutputSlot(0));
1167 void CaffeParserBase::ParseSoftmaxLayer(const LayerParameter& layerParam)
1169 ValidateNumInputsOutputs(layerParam, 1, 1);
1171 SoftmaxParameter param = layerParam.softmax_param();
1173 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1175 // Ignored Caffe Parameters:
1179 armnn::SoftmaxDescriptor softmaxDescriptor;
1180 softmaxDescriptor.m_Axis = 1;
1181 armnn::IConnectableLayer* const softmaxLayer = m_Network->AddSoftmaxLayer(
1183 layerParam.name().c_str());
1184 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(softmaxLayer->GetInputSlot(0));
1185 softmaxLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1186 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), softmaxLayer->GetOutputSlot(0));
1189 void CaffeParserBase::ParseEltwiseLayer(const LayerParameter& layerParam)
1191 ValidateNumInputsOutputs(layerParam, 2, 1);
1193 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1195 // Ignored Caffe Parameters:
1198 EltwiseParameter_EltwiseOp operation = EltwiseParameter_EltwiseOp_SUM; // Defaults to sum as per caffe.
1200 if (layerParam.has_eltwise_param() && layerParam.eltwise_param().has_operation())
1202 operation = layerParam.eltwise_param().operation();
1205 armnn::IConnectableLayer* newLayer = nullptr;
1208 case EltwiseParameter_EltwiseOp_SUM:
1210 newLayer = m_Network->AddAdditionLayer(layerParam.name().c_str());
1213 case EltwiseParameter_EltwiseOp_PROD:
1215 newLayer = m_Network->AddMultiplicationLayer(layerParam.name().c_str());
1220 throw ParseException(
1221 fmt::format("Unsupported operation {} in Eltwise layer {} {}",
1224 CHECK_LOCATION().AsString()));
1228 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(newLayer->GetInputSlot(0));
1229 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(1)).Connect(newLayer->GetInputSlot(1));
1230 newLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1231 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), newLayer->GetOutputSlot(0));
1234 void CaffeParserBase::ParseConcatLayer(const LayerParameter& layerParam)
1236 unsigned int numInputs = static_cast<unsigned int>(layerParam.bottom_size());
1237 // We assume concat happens along the channel dimension, which is 1 in (0, 1, 2, 3).
1238 unsigned int concatDim = 1;
1239 unsigned int numOfDims = 4;
1241 // we only consider 4-D tensor here
1242 OriginsDescriptor concatDescriptor(static_cast<uint32_t>(numInputs), numOfDims);
1243 std::vector<unsigned int>mergeDimSizes(numOfDims, 0u);
1245 unsigned int mergeDim = 0;
1246 for (unsigned int viewIndex = 0; viewIndex < numInputs; ++viewIndex)
1248 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(
1249 layerParam.bottom(armnn::numeric_cast<int>(viewIndex))).GetTensorInfo();
1250 // Checks whether the dimensions of the input tensors are actually 4.
1251 if (inputInfo.GetNumDimensions()!=4)
1253 throw ParseException(
1254 fmt::format("The number of dimensions for input tensors of "
1255 "the concatenation op should be 4. Inputs of {} has "
1256 "{} dimensions. {}",
1258 inputInfo.GetNumDimensions(),
1259 CHECK_LOCATION().AsString()));
1262 mergeDimSizes[0] = inputInfo.GetShape()[0];
1263 mergeDimSizes[1] = inputInfo.GetShape()[1];
1264 mergeDimSizes[2] = inputInfo.GetShape()[2];
1265 mergeDimSizes[3] = inputInfo.GetShape()[3];
1267 for (unsigned int j = 0; j < concatDim; ++j)
1269 concatDescriptor.SetViewOriginCoord(viewIndex, j, 0);
1272 concatDescriptor.SetViewOriginCoord(viewIndex, concatDim, mergeDim);
1273 mergeDim += mergeDimSizes[concatDim];
1275 for (unsigned int j = concatDim+1; j < numOfDims; ++j)
1277 concatDescriptor.SetViewOriginCoord(viewIndex, j, 0);
1280 mergeDimSizes[concatDim] = mergeDim;
1282 armnn::IConnectableLayer* concatlayer = m_Network->AddConcatLayer(concatDescriptor, layerParam.name().c_str());
1283 for (unsigned int i = 0; i < numInputs; ++i)
1285 armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(armnn::numeric_cast<int>(i)));
1286 outputSlot.Connect(concatlayer->GetInputSlot(i));
1289 concatlayer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo(numOfDims, mergeDimSizes.data(), DataType::Float32));
1290 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), concatlayer->GetOutputSlot(0));
1293 void CaffeParserBase::ParseBatchNormLayer(const LayerParameter& layerParam)
1295 ValidateNumInputsOutputs(layerParam, 1, 1);
1297 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1299 string name = layerParam.name();
1301 BatchNormParameter param = layerParam.batch_norm_param();
1302 // If use_global_stats is not explicitly set in the model, assume it to be true (its default value
1303 // when the network is in the testing phase).
1304 if (param.has_use_global_stats())
1306 if (!param.use_global_stats())
1308 throw ParseException(
1309 fmt::format("Error parsing Batch Norm layer '{}': "
1310 "Parameter 'use_global_stats' is set to false, which is "
1311 "unsupported (value used for training). {}",
1313 CHECK_LOCATION().AsString()));
1317 BatchNormalizationDescriptor desc;
1318 desc.m_Eps = param.eps();
1320 unsigned int channels = inputInfo.GetShape()[1];
1321 unsigned int shape[] = {channels};
1323 vector<float> meanData(channels);
1324 GetDataFromBlob(layerParam, meanData, 0);
1326 vector<float> varianceData(channels);
1327 GetDataFromBlob(layerParam, varianceData, 1);
1329 // Reads moving average factor and applies scaling (if required).
1330 const BlobProto& blob = layerParam.blobs(armnn::numeric_cast<int>(2));
1331 const float movingAverageFactor = blob.data(armnn::numeric_cast<int>(0));
1332 if(movingAverageFactor != 0.0f)
1334 const float scaleFactor = 1.0f / movingAverageFactor;
1335 auto scaleFunction = [scaleFactor](float f) -> float { return f * scaleFactor; };
1337 std::transform(varianceData.begin(), varianceData.end(), varianceData.begin(), scaleFunction);
1338 std::transform(meanData.begin(), meanData.end(), meanData.begin(), scaleFunction);
1341 // Identifies scale operation.
1342 vector<float> betaData(channels, 0.0f);
1343 vector<float> gammaData(channels, 1.0f);
1345 ConstTensor mean(TensorInfo(1, shape, armnn::DataType::Float32), meanData);
1346 ConstTensor variance(TensorInfo(1, shape, armnn::DataType::Float32), varianceData);
1347 ConstTensor beta(TensorInfo(1, shape, armnn::DataType::Float32), betaData);
1348 ConstTensor gamma(TensorInfo(1, shape, armnn::DataType::Float32), gammaData);
1350 armnn::IConnectableLayer* const batchNormLayer = m_Network->AddBatchNormalizationLayer(desc,
1351 mean, variance, beta, gamma, name.c_str());
1352 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(batchNormLayer->GetInputSlot(0));
1353 batchNormLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1354 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), batchNormLayer->GetOutputSlot(0));
1357 void CaffeParserBase::ParseScaleLayer(const LayerParameter& layerParam)
1359 // Current unoptimal solution: add a batchnormalization layer with 0 mean and 1 variance.
1360 ValidateNumInputsOutputs(layerParam, 1, 1);
1362 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1364 string name = layerParam.name();
1366 ScaleParameter param = layerParam.scale_param();
1367 if (param.axis() != 1)
1369 // Would have to use something other than BatchNormalizationLayer in this case
1370 throw ParseException(
1371 fmt::format("Loading Scale Layer: Only axis 1 is supported currently. "
1372 "Layer={} Axis={} {}",
1375 CHECK_LOCATION().AsString()));
1378 unsigned int channels = inputInfo.GetShape()[1];
1379 unsigned int shape[] = {channels};
1381 BatchNormalizationDescriptor desc;
1382 desc.m_Eps = 0.0f; // Don't need epsilon if variance is 1.
1383 vector<float> meanData(channels, 0.0f);
1384 vector<float> varianceData(channels, 1.0f);
1385 vector<float> betaData(channels, 0.0f);
1386 vector<float> gammaData(channels);
1388 GetDataFromBlob(layerParam, gammaData, 0);
1390 if(param.has_bias_term())
1392 GetDataFromBlob(layerParam, betaData, 1);
1395 ConstTensor mean(TensorInfo(1, shape, armnn::DataType::Float32), meanData);
1396 ConstTensor variance(TensorInfo(1, shape, armnn::DataType::Float32), varianceData);
1397 ConstTensor beta(TensorInfo(1, shape, armnn::DataType::Float32), betaData);
1398 ConstTensor gamma(TensorInfo(1, shape, armnn::DataType::Float32), gammaData);
1400 armnn::IConnectableLayer* const batchNormLayer = m_Network->AddBatchNormalizationLayer(desc,
1401 mean, variance, beta, gamma, name.c_str());
1402 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(batchNormLayer->GetInputSlot(0));
1403 batchNormLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1404 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), batchNormLayer->GetOutputSlot(0));
1407 void CaffeParserBase::ParseSplitLayer(const caffe::LayerParameter& layerParam)
1409 // Used in caffe to duplicate memory - not necessary in armnn.
1410 if (layerParam.bottom_size() != 1)
1412 throw ParseException(
1413 fmt::format("Split layer '{}' should have exactly 1 bottom. "
1416 layerParam.bottom_size(),
1417 CHECK_LOCATION().AsString()));
1419 armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
1420 for (int i = 0; i < layerParam.top_size(); i++)
1422 SetArmnnOutputSlotForCaffeTop(layerParam.top(i), outputSlot);
1426 void CaffeParserBase::ParseDropoutLayer(const caffe::LayerParameter& layerParam)
1428 // Ignored for inference, so patch the single input to its single output.
1429 if (layerParam.bottom_size() != 1 || layerParam.top_size() != 1)
1431 throw ParseException(
1432 fmt::format("Dropout layer '{}' should have exactly 1 bottom and 1 top. "
1433 "#bottoms={} #tops={} {}",
1435 layerParam.bottom_size(),
1436 layerParam.top_size(),
1437 CHECK_LOCATION().AsString()));
1439 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)));
1442 void CaffeParserBase::TrackInputBinding(armnn::IConnectableLayer* layer,
1443 armnn::LayerBindingId id,
1444 const armnn::TensorInfo& tensorInfo)
1446 return TrackBindingPoint(layer, id, tensorInfo, layer->GetName(), m_NetworkInputsBindingInfo);
1449 void CaffeParserBase::TrackOutputBinding(armnn::IConnectableLayer* layer,
1450 armnn::LayerBindingId id,
1451 const armnn::TensorInfo& tensorInfo)
1453 return TrackBindingPoint(layer, id, tensorInfo, layer->GetName(), m_NetworkOutputsBindingInfo);
1456 void CaffeParserBase::TrackBindingPoint(armnn::IConnectableLayer* layer,
1457 armnn::LayerBindingId id,
1458 const armnn::TensorInfo& tensorInfo,
1459 const char* bindingPointDesc,
1460 std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo)
1462 const std::string layerName = layer->GetName();
1463 auto it = nameToBindingInfo.find(layerName);
1464 if (it == nameToBindingInfo.end())
1466 nameToBindingInfo[layerName] = std::make_pair(id, tensorInfo);
1470 throw ParseException(
1471 fmt::format("Id {} used by more than one {} layer {}",
1474 CHECK_LOCATION().AsString()));
1478 armnn::IOutputSlot& CaffeParserBase::GetArmnnOutputSlotForCaffeTop(const std::string& caffeTopName) const
1480 auto it = m_ArmnnOutputSlotForCaffeTop.find(caffeTopName);
1481 if (it != m_ArmnnOutputSlotForCaffeTop.end())
1487 throw ParseException(
1488 fmt::format("Could not find armnn output slot for Caffe top '{}' {}",
1490 CHECK_LOCATION().AsString()));
1494 void CaffeParserBase::SetArmnnOutputSlotForCaffeTop(
1495 const std::string& caffeTopName, armnn::IOutputSlot& armnnOutputSlot)
1497 auto it = m_ArmnnOutputSlotForCaffeTop.find(caffeTopName);
1498 if (it == m_ArmnnOutputSlotForCaffeTop.end())
1500 m_ArmnnOutputSlotForCaffeTop[caffeTopName] = &armnnOutputSlot;
1504 throw ParseException(
1505 fmt::format("Attempting to add duplicate entry for Caffe top '{}' {}",
1507 CHECK_LOCATION().AsString()));
1511 // Note: can move to CaffeParser when/if we optimise the text/string format
1512 // to load on a layer by layer basis
1513 void CaffeParserBase::ResolveInPlaceLayers(caffe::NetParameter& netParameter)
1515 // Finds layers with the same top.
1516 std::map<std::string, std::vector<caffe::LayerParameter*>> layersByTop;
1517 for (int layerIdx = 0; layerIdx < netParameter.layer_size(); ++layerIdx)
1519 caffe::LayerParameter& layer = *netParameter.mutable_layer(layerIdx);
1520 std::string name = layer.name();
1521 for (int i = 0; i < layer.top_size(); ++i)
1523 layersByTop[layer.top(i)].push_back(&layer);
1527 // For each set of layers with the same top, resolves them to a linear chain rather than in-place layers.
1528 // Note that for 'regular' layers, there will be a single layer in each group and so this will be a no-op.
1529 for (auto layersWithSameTopIt : layersByTop)
1531 const std::string& top = layersWithSameTopIt.first;
1532 const std::vector<caffe::LayerParameter*>& layersWithSameTop = layersWithSameTopIt.second;
1534 // Chains the layers together in the order that they are listed in the prototxt (hopefully this is correct).
1535 // Note that the last layer will not have its top modified so that other layers will continue to reference it.
1536 for (unsigned int layerIdx = 0; layerIdx < layersWithSameTop.size() - 1; ++layerIdx)
1538 caffe::LayerParameter& layer1 = *layersWithSameTop[layerIdx];
1539 caffe::LayerParameter& layer2 = *layersWithSameTop[layerIdx+1];
1540 if (layer1.top_size() != 1)
1542 throw ParseException(
1543 fmt::format("Node '{}' is an in-place layer but doesn't have exactly one "
1544 "top. It has {} instead. {}",
1547 CHECK_LOCATION().AsString()));
1549 std::string newTop = layer1.name() + "_top";
1550 layer1.set_top(0, newTop);
1551 if (layer2.bottom_size() != 1 || layer2.bottom(0) != top)
1553 throw ParseException(
1554 fmt::format("Node '{}' is an in-place layer but "
1555 "doesn't have exactly one bottom, or it doesn't match its top. "
1556 "#bottoms={}, first bottom is {}, top is {} {}",
1560 CHECK_LOCATION().AsString()));
1562 layer2.set_bottom(0, newTop);
1567 // Note: can move to CaffeParser when/if we optimise the text/string format
1568 // to load on a layer by layer basis
1569 void CaffeParserBase::LoadNetParam(NetParameter& netParameter)
1571 // Caffe models sometimes have an implicit input layer.
1572 // In that case, add an explicit one.
1573 if (netParameter.input_size() > 0)
1575 LayerParameter* newLayer = netParameter.add_layer();
1577 newLayer->set_type("Input");
1578 newLayer->set_name(netParameter.input(0));
1579 newLayer->add_top(netParameter.input(0));
1581 InputParameter* inputParam = newLayer->mutable_input_param();
1582 BlobShape* shape = inputParam->add_shape();
1584 int dim_size = netParameter.input_dim_size();
1585 for (int i = 0; i < dim_size; ++i)
1587 shape->add_dim(netParameter.input_dim(i));
1591 // Replaces in-place layers with regular ones to make the rest of the parsing easier.
1592 ResolveInPlaceLayers(netParameter);
1594 // Creates a lookup of Caffe layers by name.
1595 for (int i = 0; i < netParameter.layer_size(); ++i)
1597 const caffe::LayerParameter& layer = netParameter.layer(i);
1598 for (int i = 0; i < layer.top_size(); ++i)
1600 m_CaffeLayersByTopName[layer.top(i)] = &layer;
1604 // Finds the output layers the user requested.
1605 std::vector<const caffe::LayerParameter*> targetLayers;
1606 for (const std::string& requestedOutputName : m_RequestedOutputs)
1608 auto nodeIt = m_CaffeLayersByTopName.find(requestedOutputName);
1609 if (nodeIt == m_CaffeLayersByTopName.end())
1611 throw ParseException(
1612 fmt::format("Couldn't find requested output layer '{}' in graph {}",
1613 requestedOutputName,
1614 CHECK_LOCATION().AsString()));
1616 targetLayers.push_back(nodeIt->second);
1619 // Sorts them into a linear ordering such that all inputs of a node are before the node itself.
1620 std::vector<const caffe::LayerParameter*> sortedNodes;
1621 if (!armnnUtils::GraphTopologicalSort<const caffe::LayerParameter*>(
1623 [this](const caffe::LayerParameter* node)
1625 return GetInputs(*node);
1629 throw ParseException(
1630 fmt::format("Cycle detected in graph. #nodes: {} {}",
1632 CHECK_LOCATION().AsString()));
1635 // Parses each node in order, knowing that all inputs of a node will be processed before the node itself.
1636 for (const caffe::LayerParameter* current : sortedNodes)
1638 auto it = ms_CaffeLayerNameToParsingFunctions.find(current->type());
1639 if (it == ms_CaffeLayerNameToParsingFunctions.end())
1641 throw ParseException(
1642 fmt::format("Unsupported layer type: '{}' for layer {} {}",
1645 CHECK_LOCATION().AsString()));
1647 auto func = it->second;
1648 (this->*func)(*current);
1651 // Adds ArmNN output layers connected to each requested output.
1652 for (const std::string& requestedOutput : m_RequestedOutputs)
1654 armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(requestedOutput);
1656 const armnn::LayerBindingId outputId = armnn::numeric_cast<armnn::LayerBindingId>(
1657 m_NetworkOutputsBindingInfo.size());
1658 armnn::IConnectableLayer* const outputLayer = m_Network->AddOutputLayer(outputId, requestedOutput.c_str());
1659 outputSlot.Connect(outputLayer->GetInputSlot(0));
1661 TrackOutputBinding(outputLayer, outputId, outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo());
1665 INetworkPtr CaffeParserBase::CreateNetworkFromTextFile(const char* graphFile,
1666 const std::map<std::string, armnn::TensorShape>& inputShapes,
1667 const std::vector<std::string>& requestedOutputs)
1669 FILE* fd = fopen(graphFile, "r");
1673 throw FileNotFoundException(
1674 fmt::format("Failed to open graph file: {} {}",
1676 CHECK_LOCATION().AsString()));
1679 // Parses the file into a message.
1680 NetParameter netParam;
1681 auto input = new google::protobuf::io::FileInputStream(fileno(fd));
1682 bool success = google::protobuf::TextFormat::Parse(input, &netParam);
1688 throw ParseException(
1689 fmt::format("Failed to parse graph file: {} {}",
1691 CHECK_LOCATION().AsString()));
1694 return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs);
1697 INetworkPtr CaffeParserBase::CreateNetworkFromString(const char* protoText,
1698 const std::map<std::string, armnn::TensorShape>& inputShapes,
1699 const std::vector<std::string>& requestedOutputs)
1701 // Parses the string into a message.
1702 NetParameter netParam;
1703 bool success = google::protobuf::TextFormat::ParseFromString(protoText, &netParam);
1707 throw ParseException(
1708 fmt::format("Failed to parse graph string {}",
1709 CHECK_LOCATION().AsString()));
1712 return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs);
1715 INetworkPtr CaffeParser::CreateNetworkFromBinaryFile(const char* graphFile,
1716 const std::map<std::string, armnn::TensorShape>& inputShapes,
1717 const std::vector<std::string>& requestedOutputs)
1719 FILE* fd = fopen(graphFile, "rb");
1723 throw FileNotFoundException(
1724 fmt::format("Failed to open graph file at: {} {}",
1726 CHECK_LOCATION().AsString()));
1729 // Parses the file into a message.
1730 NetParameter netParam;
1732 FileInputStream inStream(fileno(fd));
1733 CodedInputStream codedStream(&inStream);
1734 codedStream.SetTotalBytesLimit(INT_MAX, INT_MAX);
1735 bool success = netParam.ParseFromCodedStream(&codedStream);
1740 throw ParseException(
1741 fmt::format("Failed to parse protobuf file: {} {}",
1743 CHECK_LOCATION().AsString()));
1746 return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs);
1749 // Note: can move to CaffeParser when/if we optimise the text/string format
1750 // to load on a layer by layer basis
1751 INetworkPtr CaffeParserBase::CreateNetworkFromNetParameter(NetParameter& netParam,
1752 const std::map<std::string, armnn::TensorShape>& inputShapes,
1753 const std::vector<std::string>& requestedOutputs)
1755 m_NetworkInputsBindingInfo.clear();
1756 m_NetworkOutputsBindingInfo.clear();
1758 m_Network = INetwork::Create();
1760 m_InputShapes = inputShapes;
1761 if (requestedOutputs.size() == 0)
1763 throw ParseException("requestedOutputs must have at least one entry");
1765 m_RequestedOutputs = requestedOutputs;
1769 LoadNetParam(netParam);
1771 catch (const ParseException& e)
1779 return move(m_Network);
1782 void CaffeParserBase::Cleanup() {
1783 // cleanup, in case we reuse this parser
1784 m_InputShapes.clear();
1785 m_RequestedOutputs.clear();
1786 m_ArmnnOutputSlotForCaffeTop.clear();
1787 // NOTE: when we get the text/string format
1788 // optimised for memory then this data structure can
1789 // also move to the CaffeParser class
1790 m_CaffeLayersByTopName.clear();