2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // See LICENSE file in the project root for full license information.
5 #include "CaffeParser.hpp"
6 #include "RecordByRecordCaffeParser.hpp"
8 #include "armnn/Descriptors.hpp"
9 #include "armnn/INetwork.hpp"
10 #include "armnn/Utils.hpp"
11 #include "armnn/Exceptions.hpp"
13 #include "GraphTopologicalSort.hpp"
14 #include "VerificationHelpers.hpp"
16 #include <boost/numeric/conversion/cast.hpp>
17 #include <boost/assert.hpp>
18 #include <boost/format.hpp>
19 #include <boost/log/trivial.hpp>
22 #include "caffe/proto/caffe.pb.h"
25 #include <google/protobuf/io/coded_stream.h>
26 #include <google/protobuf/io/zero_copy_stream.h>
27 #include <google/protobuf/io/zero_copy_stream_impl.h>
28 #include <google/protobuf/text_format.h>
29 #include <google/protobuf/stubs/common.h>
30 #include <google/protobuf/stubs/once.h>
31 #include <google/protobuf/io/coded_stream.h>
32 #include <google/protobuf/wire_format_lite_inl.h>
33 #include <google/protobuf/descriptor.h>
34 #include <google/protobuf/generated_message_reflection.h>
35 #include <google/protobuf/reflection_ops.h>
36 #include <google/protobuf/wire_format.h>
43 /// Caffe networks are loaded from protobuf files (binary or text) using the protobuf library and the generated
44 /// code from caffe.pb.h. This gives us a caffe::NetParameter which is an in-memory version of the file.
45 /// This contains a flat list of Caffe 'layers' (e.g. convolution, pooling etc.).
46 /// Each layer has inputs (called "bottoms") and outputs (called "tops"). Data flows from bottom to top.
47 /// The bottoms of a layer refer to the tops of other layers, not their names.
48 /// The names of layers seem to be arbitrary (you could rename a layer and the network wouldn't
49 /// need any other changes).
51 /// Some layers (e.g. Relu) can be configured so that their top and bottom are both the same. This is called an
52 /// "in-place" layer and is a Caffe runtime feature used to reduce memory usage by modifying tensors in-place.
53 /// This isn't relevant to the parser and so we preprocess these layers to convert them to regular layers, to result
54 /// in a consistent graph structure.
56 namespace armnnCaffeParser
59 using namespace armnn;
60 using namespace caffe;
62 using namespace google::protobuf::io;
67 const float* GetArrayPtrFromBlob(const LayerParameter& layerParam, unsigned int blobIndex)
69 auto nBlobs = layerParam.blobs_size();
70 if (blobIndex >= boost::numeric_cast<unsigned int>(nBlobs))
75 "Expected data blob at index %1% in layer %2% not found. nBlobs=%2%. %4%") %
79 CHECK_LOCATION().AsString()));
82 const BlobProto& blob = layerParam.blobs(boost::numeric_cast<int>(blobIndex));
84 const float* arrayPtr = blob.data().data();
88 void GetDataFromBlob(const LayerParameter& layerParam, vector<float>& outData, unsigned int blobIndex)
90 auto nBlobs = layerParam.blobs_size();
91 if (blobIndex >= boost::numeric_cast<unsigned int>(nBlobs))
96 "Expected data blob at index %1% in layer %2% not found. %3%") %
99 CHECK_LOCATION().AsString()));
102 const BlobProto& blob = layerParam.blobs(boost::numeric_cast<int>(blobIndex));
104 size_t blobSize = boost::numeric_cast<size_t>(blob.data_size());
105 if (blobSize != outData.size())
107 throw ParseException(
110 "Data blob at index %1% in layer %2% has an unexpected size. "
111 "Expected %3% elements but got %4% elements. %5%") %
116 CHECK_LOCATION().AsString()));
119 int outSizeInt = boost::numeric_cast<int>(outData.size());
120 for (int i = 0; i < outSizeInt; ++i)
122 outData[static_cast<size_t>(i)] = blob.data(i);
126 bool IsInRange(unsigned int value, unsigned int min, unsigned int max)
128 return (value >= min && value <= max) ? true : false;
131 template <typename T>
132 size_t SizeOfVectorData(const vector<T>& vec)
134 return vec.size() * sizeof(T);
137 void ValidateNumInputsOutputs(const caffe::LayerParameter& layerParameter,
138 unsigned int numInputs,
139 unsigned int numOutputs)
141 int numInputsActual = layerParameter.bottom_size();
142 if (numInputs != boost::numeric_cast<unsigned int>(numInputsActual))
144 throw ParseException(
146 boost::format("Invalid number of inputs requested %1% for layer %2% "
147 "while only %3% present. %4%") %
149 layerParameter.name() %
151 CHECK_LOCATION().AsString()));
154 int numOutputsActual = layerParameter.top_size();
155 if (numOutputs != boost::numeric_cast<unsigned int>(numOutputsActual))
157 throw ParseException(
159 boost::format("Invalid number of outputs requested %1% for layer %2% "
160 "while only %3% present. %4%") %
162 layerParameter.name() %
164 CHECK_LOCATION().AsString()));
168 template <typename ParamType, typename ExtractOptional, typename ExtractFallback, typename ValueType>
169 ValueType GetOptionalWithFallback(const ParamType& param,
170 ExtractOptional extractOptional,
171 ExtractFallback extractFallback,
172 ValueType defaultValue)
174 auto optValue = extractOptional(param, defaultValue);
177 return optValue.second;
179 auto fallbackValue = extractFallback(param, defaultValue);
180 return fallbackValue.second;
183 #define GET_OPTIONAL_WITH_VECTOR_FALLBACK(PARAM, \
189 GetOptionalWithFallback( \
191 [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \
193 if (param.has_##OPTIONAL_VALUE ()) \
195 return std::make_pair(true, param.OPTIONAL_VALUE ()); \
199 return std::make_pair(false, defaultValue); \
202 [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \
204 if (param.FALLBACK_VECTOR##_size() > 0) \
206 return std::make_pair(true, (param.FALLBACK_VECTOR ()).Get(0)); \
210 return std::make_pair(false, defaultValue); \
215 #define GET_OPTIONAL_WITH_FALLBACK(PARAM, \
221 GetOptionalWithFallback( \
223 [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \
225 if (param.has_##OPTIONAL_VALUE ()) \
227 return std::make_pair(true, param.OPTIONAL_VALUE ()); \
231 return std::make_pair(false, defaultValue); \
234 [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \
236 if (param.has_##FALLBACK_VALUE ()) \
238 return std::make_pair(true, param.FALLBACK_VALUE ()); \
242 return std::make_pair(false, defaultValue); \
248 void ValidateEqualValuesInRange(unsigned int valueA,
249 const char* valueNameA,
251 const char* valueNameB,
254 const armnn::CheckLocation& location)
256 if (!IsInRange(valueA, min, max) || !IsInRange(valueB, min, max) || (valueA != valueB))
258 throw ParseException(
261 "%1%=%2% and %3%=%4% must be equal and within the valid range"
262 "of [%5%, %6%] %7%") %
269 location.AsString()));
273 #define VALIDATE_EQUAL_VALUES_IN_RANGE(A, B, MIN_RANGE, MAX_RANGE) \
274 ValidateEqualValuesInRange(A, #A, B, #B, MIN_RANGE, MAX_RANGE, CHECK_LOCATION())
276 } // namespace <anonymous>
278 const std::map<std::string, CaffeParserBase::OperationParsingFunction>
279 CaffeParserBase::ms_CaffeLayerNameToParsingFunctions = {
280 { "Input", &CaffeParserBase::ParseInputLayer },
281 { "Convolution", &CaffeParserBase::ParseConvLayer },
282 { "Pooling", &CaffeParserBase::ParsePoolingLayer },
283 { "ReLU", &CaffeParserBase::ParseReluLayer },
284 { "LRN", &CaffeParserBase::ParseLRNLayer },
285 { "InnerProduct", &CaffeParserBase::ParseInnerProductLayer },
286 { "Softmax", &CaffeParserBase::ParseSoftmaxLayer },
287 { "Eltwise", &CaffeParserBase::ParseEltwiseLayer },
288 { "Concat", &CaffeParserBase::ParseConcatLayer },
289 { "BatchNorm", &CaffeParserBase::ParseBatchNormLayer },
290 { "Scale", &CaffeParserBase::ParseScaleLayer },
291 { "Split", &CaffeParserBase::ParseSplitLayer },
292 { "Dropout", &CaffeParserBase::ParseDropoutLayer},
295 ICaffeParser* ICaffeParser::CreateRaw()
297 return new RecordByRecordCaffeParser();
300 ICaffeParserPtr ICaffeParser::Create()
302 return ICaffeParserPtr(CreateRaw(), &ICaffeParser::Destroy);
305 void ICaffeParser::Destroy(ICaffeParser* parser)
310 CaffeParserBase::CaffeParserBase()
311 : m_Network(nullptr, nullptr)
316 CaffeParser::CaffeParser()
322 BindingPointInfo CaffeParserBase::GetNetworkInputBindingInfo(const std::string& name) const
324 return GetBindingInfo(name, "input", m_NetworkInputsBindingInfo);
327 BindingPointInfo CaffeParserBase::GetNetworkOutputBindingInfo(const std::string& name) const
329 return GetBindingInfo(name, "output", m_NetworkOutputsBindingInfo);
332 std::pair<armnn::LayerBindingId, armnn::TensorInfo> CaffeParserBase::GetBindingInfo(const std::string& layerName,
333 const char* bindingPointDesc,
334 const std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo)
336 auto it = nameToBindingInfo.find(layerName);
337 if (it == nameToBindingInfo.end())
339 throw InvalidArgumentException(
342 "Unknown binding %1% for layer '%2%'. %3%") %
345 CHECK_LOCATION().AsString()));
350 TensorInfo CaffeParserBase::BlobShapeToTensorInfo(const caffe::BlobShape& blobShape) const
352 std::vector<unsigned int> shape;
353 for (int j = 0; j < blobShape.dim_size(); ++j)
355 shape.push_back(static_cast<unsigned int>(blobShape.dim(j)));
358 return TensorInfo(boost::numeric_cast<unsigned int>(shape.size()), shape.data(), DataType::Float32);
361 BlobShape TensorDescToBlobShape(const TensorInfo& desc)
364 for (unsigned int i = 0; i < desc.GetNumDimensions(); ++i)
367 ret.set_dim(boost::numeric_cast<int>(i), desc.GetShape()[i]);
373 // Note: can move to CaffeParser when/if we optimise the text/string format
374 // to load on a layer by layer basis
375 vector<const LayerParameter*> CaffeParserBase::GetInputs(const LayerParameter& layerParam)
377 std::vector<const caffe::LayerParameter*> ret;
378 ret.reserve(boost::numeric_cast<size_t>(layerParam.bottom_size()));
379 for (int j = 0; j < layerParam.bottom_size(); ++j)
381 std::string inputName = layerParam.bottom(j);
382 auto inputIt = m_CaffeLayersByTopName.find(inputName);
383 if (inputIt == m_CaffeLayersByTopName.end())
385 throw ParseException(
388 "Can't find Caffe layer with top called '%1%', "
389 "which is listed as an input of '%2%'. %3%") %
392 CHECK_LOCATION().AsString()));
394 ret.push_back(inputIt->second);
400 void CaffeParserBase::ParseInputLayer(const LayerParameter& layerParam)
402 BOOST_ASSERT(layerParam.type() == "Input");
403 ValidateNumInputsOutputs(layerParam, 0, 1);
405 const InputParameter& param = layerParam.input_param();
407 const armnn::LayerBindingId inputId = boost::numeric_cast<armnn::LayerBindingId>(
408 m_NetworkInputsBindingInfo.size());
409 armnn::IConnectableLayer* const inputLayer = m_Network->AddInputLayer(inputId, layerParam.name().c_str());
411 // Decides the tensor info for this input. This can be specified in the Caffe network but can also
412 // be overriden by user input (m_inputShapes).
413 armnn::TensorInfo inputTensorInfo;
415 const BlobShape* originalShape = param.shape_size() > 0 && param.shape(0).dim_size() > 0 ?
416 ¶m.shape(0) : nullptr;
419 inputTensorInfo = BlobShapeToTensorInfo(*originalShape);
422 auto overrideIt = m_InputShapes.find(layerParam.name());
423 if (overrideIt != m_InputShapes.end())
425 const TensorShape& overrideShape = overrideIt->second;
427 ( originalShape->dim(1) != overrideShape[1]
428 || originalShape->dim(2) != overrideShape[2]
429 || originalShape->dim(3) != overrideShape[3]))
431 throw ParseException(
434 "Parsed input shape for '%1%' is incompatible with the override provided. %2%") %
436 CHECK_LOCATION().AsString()));
438 inputTensorInfo.SetShape(overrideShape);
440 else if (!originalShape)
442 throw ParseException(
445 "No input descriptor given for '%1%' and no input shape found in caffe model. %2%") %
447 CHECK_LOCATION().AsString()));
450 TrackInputBinding(inputLayer, inputId, inputTensorInfo);
451 inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
452 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), inputLayer->GetOutputSlot(0));
455 void CaffeParserBase::AddConvLayerWithSplits(const caffe::LayerParameter& layerParam,
456 const armnn::Convolution2dDescriptor& desc,
457 unsigned int kernelW,
458 unsigned int kernelH)
460 BOOST_ASSERT(layerParam.type() == "Convolution");
461 ValidateNumInputsOutputs(layerParam, 1, 1);
463 ConvolutionParameter convParam = layerParam.convolution_param();
464 BlobShape inputShape = TensorDescToBlobShape(GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo());
465 const unsigned int numGroups = convParam.has_group() ? convParam.group() : 1;
467 // asusme these were already verified by the caller ParseConvLayer() function
468 BOOST_ASSERT(numGroups < inputShape.dim(1));
469 BOOST_ASSERT(numGroups > 1);
472 armnn::IOutputSlot& inputConnection = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
474 vector<string> convLayerNames(numGroups);
475 vector<armnn::IConnectableLayer*> convLayers(numGroups);
476 convLayerNames[0] = layerParam.name();
478 // This convolution is to be applied to chunks of the input data so add a splitter layer
480 // Redirect the convolution input to the splitter
481 unsigned int splitterDimSizes[4] = {static_cast<unsigned int>(inputShape.dim(0)),
482 static_cast<unsigned int>(inputShape.dim(1)),
483 static_cast<unsigned int>(inputShape.dim(2)),
484 static_cast<unsigned int>(inputShape.dim(3))};
486 // Split dimension 1 of the splitter output shape and conv input shapes
487 // according to the number of groups
489 splitterDimSizes[1] /= numGroups;
490 inputShape.set_dim(1, splitterDimSizes[1]);
492 // This is used to describe how the input is to be split
493 ViewsDescriptor splitterDesc(numGroups);
495 // Create an output node for each group, giving each a unique name
496 for (unsigned int g = 0; g < numGroups; ++g)
498 // Work out the names of the splitter layers child convolutions
500 ss << layerParam.name() << "_" << g;
501 convLayerNames[g] = ss.str();
503 splitterDesc.SetViewOriginCoord(g, 1, splitterDimSizes[1] * g);
505 // Set the size of the views.
506 for (unsigned int dimIdx=0; dimIdx < 4; dimIdx++)
508 splitterDesc.SetViewSize(g, dimIdx, splitterDimSizes[dimIdx]);
512 const std::string splitterLayerName = std::string("splitter_") + layerParam.bottom(0);
513 armnn::IConnectableLayer* splitterLayer = m_Network->AddSplitterLayer(splitterDesc, splitterLayerName.c_str());
515 inputConnection.Connect(splitterLayer->GetInputSlot(0));
516 for (unsigned int i = 0; i < splitterLayer->GetNumOutputSlots(); i++)
518 splitterLayer->GetOutputSlot(i).SetTensorInfo(BlobShapeToTensorInfo(inputShape));
521 unsigned int numFilters = convParam.num_output();
523 // Populates convolution output tensor descriptor dimensions.
524 BlobShape outputShape;
525 outputShape.add_dim(0);
526 outputShape.set_dim(0, inputShape.dim(0));
527 outputShape.add_dim(1);
528 // Ensures that dimension 1 of the convolution output is split according to the number of groups.
529 outputShape.set_dim(1, numFilters / numGroups);
530 outputShape.add_dim(2);
532 2, (static_cast<int>(
533 static_cast<float>(inputShape.dim(2) + 2 * desc.m_PadBottom - kernelH) /
534 static_cast<float>(desc.m_StrideY)) + 1));
535 outputShape.add_dim(3);
537 3, (static_cast<int>(
538 static_cast<float>(inputShape.dim(3) + 2 * desc.m_PadRight - kernelW) /
539 static_cast<float>(desc.m_StrideX)) + 1));
541 // Load the weight data for ALL groups
542 vector<float> weightData(boost::numeric_cast<size_t>(numGroups *
543 inputShape.dim(1) * // number of input channels
544 outputShape.dim(1) * // number of output channels
547 GetDataFromBlob(layerParam, weightData, 0);
549 const unsigned int weightDimSizes[4] = {
550 static_cast<unsigned int>(outputShape.dim(1)),
551 static_cast<unsigned int>(inputShape.dim(1)),
556 vector<float> biasData;
558 if (desc.m_BiasEnabled)
560 biasData.resize(boost::numeric_cast<size_t>(numGroups * outputShape.dim(1)), 1.f);
561 GetDataFromBlob(layerParam, biasData, 1);
563 const unsigned int biasDimSizes[1] = {static_cast<unsigned int>(outputShape.dim(1))};
564 biasInfo = TensorInfo(1, biasDimSizes, DataType::Float32);
567 const unsigned int numWeightsPerGroup = boost::numeric_cast<unsigned int>(weightData.size()) / numGroups;
568 const unsigned int numBiasesPerGroup = boost::numeric_cast<unsigned int>(biasData.size()) / numGroups;
570 for (unsigned int g = 0; g < numGroups; ++g)
572 // Sets the slot index, group 0 should be connected to the 0th output of the splitter
573 // group 1 should be connected to the 1st output of the splitter.
575 // Pulls out the weights for this group from that loaded from the model file earlier.
576 ConstTensor weights(TensorInfo(4, weightDimSizes, DataType::Float32),
577 weightData.data() + numWeightsPerGroup * g);
579 IConnectableLayer* convLayer = nullptr;
580 if (desc.m_BiasEnabled)
582 // Pulls out the biases for this group from that loaded from the model file earlier.
583 ConstTensor biases(biasInfo, biasData.data() + numBiasesPerGroup * g);
586 m_Network->AddConvolution2dLayer(desc, weights, biases, convLayerNames[g].c_str());
591 m_Network->AddConvolution2dLayer(desc, weights, convLayerNames[g].c_str());
593 convLayers[g] = convLayer;
595 // If we have more than one group then the input to the nth convolution the splitter layer's nth output,
596 // otherwise it's the regular input to this layer.
597 armnn::IOutputSlot& splitterInputConnection =
598 splitterLayer ? splitterLayer->GetOutputSlot(g) : inputConnection;
599 splitterInputConnection.Connect(convLayer->GetInputSlot(0));
600 convLayer->GetOutputSlot(0).SetTensorInfo(BlobShapeToTensorInfo(outputShape));
603 // If the convolution was performed in chunks, add a layer to merge the results
605 // The merge input shape matches that of the convolution output
606 unsigned int mergeDimSizes[4] = {static_cast<unsigned int>(outputShape.dim(0)),
607 static_cast<unsigned int>(outputShape.dim(1)),
608 static_cast<unsigned int>(outputShape.dim(2)),
609 static_cast<unsigned int>(outputShape.dim(3))};
611 // This is used to describe how the input is to be merged
612 OriginsDescriptor mergeDesc(numGroups);
614 // Now create an input node for each group, using the name from
615 // the output of the corresponding convolution
616 for (unsigned int g = 0; g < numGroups; ++g)
618 mergeDesc.SetViewOriginCoord(g, 1, mergeDimSizes[1] * g);
621 // Make sure the output from the merge is the correct size to hold the data for all groups
622 mergeDimSizes[1] *= numGroups;
623 outputShape.set_dim(1, mergeDimSizes[1]);
625 // Finally add the merge layer
626 IConnectableLayer* mergerLayer = m_Network->AddMergerLayer(mergeDesc, layerParam.name().c_str());
630 throw ParseException(
633 "Failed to create final merger layer for Split+Convolution+Merger. "
634 "Layer=%1% #groups=%2% #filters=%3% %4%") %
638 CHECK_LOCATION().AsString()));
641 for (unsigned int g = 0; g < numGroups; ++g)
643 convLayers[g]->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(g));
645 mergerLayer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo(4, mergeDimSizes, DataType::Float32));
646 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), mergerLayer->GetOutputSlot(0));
649 void CaffeParserBase::AddConvLayerWithDepthwiseConv(const caffe::LayerParameter& layerParam,
650 const armnn::Convolution2dDescriptor& convDesc,
651 unsigned int kernelW,
652 unsigned int kernelH)
654 BOOST_ASSERT(layerParam.type() == "Convolution");
655 ValidateNumInputsOutputs(layerParam, 1, 1);
657 ConvolutionParameter convParam = layerParam.convolution_param();
658 BlobShape inputShape = TensorDescToBlobShape(GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo());
660 DepthwiseConvolution2dDescriptor desc;
661 desc.m_PadLeft = convDesc.m_PadLeft;
662 desc.m_PadRight = convDesc.m_PadRight;
663 desc.m_PadTop = convDesc.m_PadTop;
664 desc.m_PadBottom = convDesc.m_PadBottom;
665 desc.m_StrideX = convDesc.m_StrideX;
666 desc.m_StrideY = convDesc.m_StrideY;
667 desc.m_BiasEnabled = convDesc.m_BiasEnabled;
669 unsigned int numFilters = convParam.num_output();
671 BlobShape outputShape;
672 outputShape.add_dim(0);
673 outputShape.set_dim(0, inputShape.dim(0));
674 outputShape.add_dim(1);
675 outputShape.set_dim(1, numFilters);
676 outputShape.add_dim(2);
678 2, (static_cast<int>(
679 static_cast<float>(inputShape.dim(2) + 2 * desc.m_PadBottom - kernelH) /
680 static_cast<float>(desc.m_StrideY)) + 1));
681 outputShape.add_dim(3);
683 3, (static_cast<int>(
684 static_cast<float>(inputShape.dim(3) + 2 * desc.m_PadRight - kernelW) /
685 static_cast<float>(desc.m_StrideX)) + 1));
687 // Load the weight data
688 size_t allWeightsSize = boost::numeric_cast<size_t>(inputShape.dim(1) * kernelH * kernelW);
689 vector<float> weightData(allWeightsSize);
691 GetDataFromBlob(layerParam, weightData, 0);
693 // depth multiplier will be 1 for the depthwise convolution
694 const unsigned int weightDimSizes[4] = {
695 static_cast<unsigned int>(1), // depth multiplier
696 static_cast<unsigned int>(inputShape.dim(1)), // #channels
700 armnn::IConnectableLayer* returnLayer = nullptr;
701 ConstTensor weights(TensorInfo(4, weightDimSizes, DataType::Float32), weightData.data());
703 if (desc.m_BiasEnabled)
706 vector<float> biasData;
708 biasData.resize(boost::numeric_cast<size_t>(outputShape.dim(1)), 1.f);
709 GetDataFromBlob(layerParam, biasData, 1);
711 const unsigned int biasDimSizes[1] = {static_cast<unsigned int>(outputShape.dim(1))};
712 biasInfo = TensorInfo(1, biasDimSizes, DataType::Float32);
714 ConstTensor biases(biasInfo, biasData.data());
715 returnLayer = m_Network->AddDepthwiseConvolution2dLayer(desc, weights, biases, layerParam.name().c_str());
719 returnLayer = m_Network->AddDepthwiseConvolution2dLayer(desc, weights, layerParam.name().c_str());
724 throw ParseException(
727 "Failed to create depthwise convolution layer. "
728 "Layer=%1% #filters=%2% %3%") %
731 CHECK_LOCATION().AsString()));
733 armnn::IOutputSlot& inputConnection = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
734 inputConnection.Connect(returnLayer->GetInputSlot(0));
735 returnLayer->GetOutputSlot(0).SetTensorInfo(BlobShapeToTensorInfo(outputShape));
736 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), returnLayer->GetOutputSlot(0));
739 void CaffeParserBase::ParseConvLayer(const LayerParameter& layerParam)
741 // Ignored Caffe Parameters
749 // Not Available ArmNN Interface Parameters
750 // * Rounding policy;
752 BOOST_ASSERT(layerParam.type() == "Convolution");
753 ValidateNumInputsOutputs(layerParam, 1, 1);
755 ConvolutionParameter convParam = layerParam.convolution_param();
756 BlobShape inputShape = TensorDescToBlobShape(GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo());
757 const unsigned int numGroups = convParam.has_group() ? convParam.group() : 1;
758 unsigned int numFilters = convParam.num_output();
760 const auto notFound = std::numeric_limits<unsigned int>::max();
762 unsigned int kernelH = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
763 kernel_h, kernel_size, unsigned int, notFound);
764 unsigned int kernelW = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
765 kernel_w, kernel_size, unsigned int, notFound);
767 unsigned int strideH = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
768 stride_h, stride, unsigned int, 1u);
769 unsigned int strideW = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
770 stride_w, stride, unsigned int, 1u);
772 unsigned int padH = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
773 pad_h, pad, unsigned int, 0u);
774 unsigned int padW = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
775 pad_w, pad, unsigned int, 0u);
777 VALIDATE_EQUAL_VALUES_IN_RANGE(kernelH, kernelW, 0, 11);
778 VALIDATE_EQUAL_VALUES_IN_RANGE(strideH, strideW, 0, 11);
779 VALIDATE_EQUAL_VALUES_IN_RANGE(padH, padW, 0, 11);
781 Convolution2dDescriptor convolution2dDescriptor;
782 convolution2dDescriptor.m_PadLeft = padW;
783 convolution2dDescriptor.m_PadRight = padW;
784 convolution2dDescriptor.m_PadTop = padH;
785 convolution2dDescriptor.m_PadBottom = padH;
786 convolution2dDescriptor.m_StrideX = strideW;
787 convolution2dDescriptor.m_StrideY = strideH;
788 convolution2dDescriptor.m_BiasEnabled = convParam.has_bias_term() ? convParam.bias_term() : true;
790 if (numGroups > numFilters)
792 throw ParseException(
795 "Error parsing Convolution: %1%. "
796 "The 'group'=%2% parameter cannot be larger than the "
797 "number of filters supplied ='%3%'. %4%") %
801 CHECK_LOCATION().AsString()));
804 if (inputShape.dim_size() != 4)
806 throw ParseException(
809 "Convolution input shape is expected to have 4 dimensions. "
810 "%1%'s input has only %2%. %3%") %
812 inputShape.dim_size() %
813 CHECK_LOCATION().AsString()));
818 if (numGroups > inputShape.dim(1))
820 throw ParseException(
823 "Error parsing Convolution: %1%. "
824 "The 'group'=%2% parameter cannot be larger than the "
825 "channel of the input shape=%3% (in NCHW format). %4%") %
829 CHECK_LOCATION().AsString()));
831 else if (numGroups == inputShape.dim(1))
833 // we use a depthwise convolution here, because the number of groups equals to the
835 AddConvLayerWithDepthwiseConv(layerParam, convolution2dDescriptor, kernelW, kernelH);
840 // we split the input by channels into channels/groups separate convolutions
841 // and merger the results afterwards
842 AddConvLayerWithSplits(layerParam, convolution2dDescriptor, kernelW, kernelH);
847 // NOTE: at this point we only need to handle #group=1 case, all other cases should be
848 // handled by the AddConvLayer* helpers
850 // Populate convolution output tensor descriptor dimensions
851 BlobShape outputShape;
852 outputShape.add_dim(0);
853 outputShape.set_dim(0, inputShape.dim(0));
854 outputShape.add_dim(1);
855 outputShape.set_dim(1, numFilters);
856 outputShape.add_dim(2);
858 2, (static_cast<int>(
859 static_cast<float>(inputShape.dim(2) + 2 * padH - kernelH) /
860 static_cast<float>(strideH)) + 1));
861 outputShape.add_dim(3);
863 3, (static_cast<int>(
864 static_cast<float>(inputShape.dim(3) + 2 * padW - kernelW) /
865 static_cast<float>(strideW)) + 1));
867 // Load the weight data for ALL groups
868 vector<float> weightData(boost::numeric_cast<size_t>(inputShape.dim(1) *
872 GetDataFromBlob(layerParam, weightData, 0);
874 const unsigned int weightDimSizes[4] = {
875 static_cast<unsigned int>(outputShape.dim(1)), // output channels
876 static_cast<unsigned int>(inputShape.dim(1)), // input channels
880 armnn::IConnectableLayer* returnLayer = nullptr;
882 // Pull out the weights for this group from that loaded from the model file earlier
883 ConstTensor weights(TensorInfo(4, weightDimSizes, DataType::Float32), weightData.data());
885 if (convolution2dDescriptor.m_BiasEnabled)
888 vector<float> biasData;
890 biasData.resize(boost::numeric_cast<size_t>(outputShape.dim(1)), 1.f);
891 GetDataFromBlob(layerParam, biasData, 1);
893 const unsigned int biasDimSizes[1] = {static_cast<unsigned int>(outputShape.dim(1))};
894 biasInfo = TensorInfo(1, biasDimSizes, DataType::Float32);
896 // Pull out the biases for this group from that loaded from the model file earlier
897 ConstTensor biases(biasInfo, biasData.data());
900 m_Network->AddConvolution2dLayer(convolution2dDescriptor, weights, biases, layerParam.name().c_str());
904 returnLayer = m_Network->AddConvolution2dLayer(convolution2dDescriptor, weights, layerParam.name().c_str());
907 armnn::IOutputSlot& inputConnection = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
908 inputConnection.Connect(returnLayer->GetInputSlot(0));
909 returnLayer->GetOutputSlot(0).SetTensorInfo(BlobShapeToTensorInfo(outputShape));
913 throw ParseException(
916 "Failed to create Convolution layer. "
917 "Layer=%1% #groups=%2% #filters=%3% %4%") %
921 CHECK_LOCATION().AsString()));
924 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), returnLayer->GetOutputSlot(0));
927 void CaffeParserBase::ParsePoolingLayer(const LayerParameter& layerParam)
929 // Ignored Caffe Parameters
930 // Stochastic Pooling
933 ValidateNumInputsOutputs(layerParam, 1, 1);
934 PoolingParameter param = layerParam.pooling_param();
935 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
937 const auto notFound = std::numeric_limits<unsigned int>::max();
939 unsigned int kernel_h = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
940 kernel_h, kernel_size, unsigned int, notFound);
941 unsigned int kernel_w = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
942 kernel_w, kernel_size, unsigned int, notFound);
944 if ((kernel_h == notFound || kernel_w == notFound) && param.has_global_pooling())
946 kernel_h = inputInfo.GetShape()[2];
947 kernel_w = inputInfo.GetShape()[3];
950 VALIDATE_EQUAL_VALUES_IN_RANGE(kernel_h, kernel_w, 0, 11);
952 unsigned int stride_h = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
953 stride_h, stride, unsigned int, notFound);
954 unsigned int stride_w = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
955 stride_h, stride, unsigned int, notFound);
957 if ((stride_h == notFound || stride_w == notFound) && param.has_global_pooling())
963 VALIDATE_EQUAL_VALUES_IN_RANGE(stride_h, stride_w, 0, 11);
965 unsigned int pad_h = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
966 pad_h, pad, unsigned int, 0u);
967 unsigned int pad_w = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
968 pad_w, pad, unsigned int, 0u);
970 VALIDATE_EQUAL_VALUES_IN_RANGE(pad_h, pad_w, 0, 11);
972 // Populate Weight and Bias Filter Descriptor
973 Pooling2dDescriptor pooling2dDescriptor;
974 if (param.has_pool())
976 PoolingParameter_PoolMethod p = param.pool();
979 case PoolingParameter_PoolMethod_MAX:
981 pooling2dDescriptor.m_PoolType = PoolingAlgorithm::Max;
984 case PoolingParameter_PoolMethod_AVE:
986 pooling2dDescriptor.m_PoolType = PoolingAlgorithm::Average;
989 case PoolingParameter_PoolMethod_STOCHASTIC:
991 throw ParseException(
994 "Pooling Layer: Stochastic Pooling Not Supported. Layer=%1% %2%") %
996 CHECK_LOCATION().AsString()));
1000 throw ParseException(
1003 "Pooling Layer: unknown pooling method: %1% for layer: %2% %3%") %
1006 CHECK_LOCATION().AsString()));
1012 throw ParseException(
1015 "No Pooling Method Defined for %1% %2%") %
1017 CHECK_LOCATION().AsString()));
1020 pooling2dDescriptor.m_PadLeft = pad_w;
1021 pooling2dDescriptor.m_PadRight = pad_w;
1022 pooling2dDescriptor.m_PadTop = pad_h;
1023 pooling2dDescriptor.m_PadBottom = pad_h;
1024 pooling2dDescriptor.m_StrideX = stride_w;
1025 pooling2dDescriptor.m_StrideY = stride_h;
1026 pooling2dDescriptor.m_PoolWidth = kernel_w;
1027 pooling2dDescriptor.m_PoolHeight = kernel_h;
1029 pooling2dDescriptor.m_OutputShapeRounding = OutputShapeRounding::Ceiling;
1030 pooling2dDescriptor.m_PaddingMethod = PaddingMethod::IgnoreValue;
1032 armnn::IConnectableLayer* poolingLayer = m_Network->AddPooling2dLayer(pooling2dDescriptor,
1033 layerParam.name().c_str());
1035 TensorInfo outputInfo(
1036 { inputInfo.GetShape()[0],
1037 inputInfo.GetShape()[1],
1038 static_cast<unsigned int>(ceil(
1039 static_cast<float>(inputInfo.GetShape()[2] + 2 * pad_h - kernel_h) /
1040 boost::numeric_cast<float>(stride_h))) + 1,
1041 static_cast<unsigned int>(ceil(
1042 static_cast<float>(inputInfo.GetShape()[3] + 2 * pad_w - kernel_w) /
1043 boost::numeric_cast<float>(stride_w))) + 1 },
1046 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(poolingLayer->GetInputSlot(0));
1047 poolingLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
1048 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), poolingLayer->GetOutputSlot(0));
1051 void CaffeParserBase::ParseReluLayer(const LayerParameter& layerParam)
1053 ValidateNumInputsOutputs(layerParam, 1, 1);
1055 const string& name = layerParam.name();
1056 const ReLUParameter& param = layerParam.relu_param();
1058 ActivationDescriptor activationDescriptor;
1059 const float negativeSlope = param.negative_slope();
1060 if (negativeSlope == 0.0f)
1062 activationDescriptor.m_Function = ActivationFunction::ReLu;
1066 activationDescriptor.m_Function = ActivationFunction::LeakyReLu;
1067 activationDescriptor.m_A = negativeSlope;
1070 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1071 IConnectableLayer* const activationLayer = m_Network->AddActivationLayer(activationDescriptor, name.c_str());
1072 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(activationLayer->GetInputSlot(0));
1073 activationLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1074 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), activationLayer->GetOutputSlot(0));
1077 void CaffeParserBase::ParseLRNLayer(const LayerParameter& layerParam)
1079 ValidateNumInputsOutputs(layerParam, 1, 1);
1081 LRNParameter param = layerParam.lrn_param();
1083 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1085 // Ignored BATCH NORMALIZATION Caffe Parameters.
1086 // Ignored MVN Caffe Parameters.
1087 // Ignored LRN Caffe Parameters.
1090 NormalizationDescriptor normalizationDescriptor;
1091 if (param.has_norm_region())
1093 LRNParameter_NormRegion n = param.norm_region();
1096 case LRNParameter_NormRegion_ACROSS_CHANNELS:
1098 normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Across;
1101 case LRNParameter_NormRegion_WITHIN_CHANNEL:
1103 normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Within;
1108 throw ParseException(
1111 "Unknown region %1% for LRN layer %2% %3%") %
1114 CHECK_LOCATION().AsString()));
1120 // Caffe defaults to normalization across channels.
1121 normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Across;
1124 normalizationDescriptor.m_NormMethodType = NormalizationAlgorithmMethod::LocalBrightness;
1125 if (param.has_local_size())
1127 normalizationDescriptor.m_NormSize = param.local_size();
1131 throw ParseException(
1134 "local_size not defined for LRN layer %1% %2%") %
1136 CHECK_LOCATION().AsString()));
1139 if (param.has_alpha())
1141 normalizationDescriptor.m_Alpha = param.alpha();
1142 normalizationDescriptor.m_Alpha /= boost::numeric_cast<float>(param.local_size());
1146 throw ParseException(
1149 "Alpha not defined for LRN layer %1% %2%") %
1151 CHECK_LOCATION().AsString()));
1153 if (param.has_beta())
1155 normalizationDescriptor.m_Beta = param.beta();
1159 throw ParseException(
1162 "Beta not defined for LRN layer %1% %2%") %
1164 CHECK_LOCATION().AsString()));
1169 normalizationDescriptor.m_K = param.k();
1173 normalizationDescriptor.m_K = 1;
1176 IConnectableLayer* const normLayer = m_Network->AddNormalizationLayer(normalizationDescriptor,
1177 layerParam.name().c_str());
1178 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(normLayer->GetInputSlot(0));
1179 normLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1181 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), normLayer->GetOutputSlot(0));
1184 void CaffeParserBase::ParseInnerProductLayer(const LayerParameter& layerParam)
1186 InnerProductParameter param = layerParam.inner_product_param();
1188 ValidateNumInputsOutputs(layerParam, 1, 1);
1190 unsigned int outputSize = param.num_output();
1192 // Ignored Caffe Parameters:
1198 FullyConnectedDescriptor tensorFullyConnectedDescriptor;
1200 if (param.has_transpose())
1202 // If true, assumes transposed weights.
1203 tensorFullyConnectedDescriptor.m_TransposeWeightMatrix = param.transpose();
1207 // Caffe defaults to transposed.
1208 tensorFullyConnectedDescriptor.m_TransposeWeightMatrix = true;
1211 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1213 TensorInfo weightInfo;
1214 TensorInfo biasInfo;
1216 // Allows implicit flattening of extra dimensions.
1217 unsigned int inputSize = inputInfo.GetShape()[1];
1218 for (unsigned int i = 2; i < inputInfo.GetNumDimensions(); ++i)
1220 inputSize *= inputInfo.GetShape()[i];
1223 const float* weightDataPtr = GetArrayPtrFromBlob(layerParam, 0);
1224 const unsigned int swTD[2] = { outputSize, inputSize };
1225 ConstTensor weights(TensorInfo(2, swTD, DataType::Float32), weightDataPtr);
1227 tensorFullyConnectedDescriptor.m_BiasEnabled = true;
1228 // Todo: check whether bias enabled.
1229 armnn::IConnectableLayer* fullyConnectedLayer = nullptr;
1230 if (tensorFullyConnectedDescriptor.m_BiasEnabled)
1233 const float* biasDataPtr = GetArrayPtrFromBlob(layerParam, 1);
1235 const unsigned int sbTD[1] = { outputSize };
1237 ConstTensor biases(TensorInfo(1, sbTD, DataType::Float32), biasDataPtr);
1239 fullyConnectedLayer = m_Network->AddFullyConnectedLayer(tensorFullyConnectedDescriptor, weights, biases,
1240 layerParam.name().c_str());
1244 fullyConnectedLayer = m_Network->AddFullyConnectedLayer(tensorFullyConnectedDescriptor, weights,
1245 layerParam.name().c_str());
1248 TensorInfo outputInfo({ inputInfo.GetShape()[0], outputSize }, DataType::Float32);
1249 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(fullyConnectedLayer->GetInputSlot(0));
1250 fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
1251 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), fullyConnectedLayer->GetOutputSlot(0));
1254 void CaffeParserBase::ParseSoftmaxLayer(const LayerParameter& layerParam)
1256 ValidateNumInputsOutputs(layerParam, 1, 1);
1258 SoftmaxParameter param = layerParam.softmax_param();
1260 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1262 // Ignored Caffe Parameters:
1266 armnn::SoftmaxDescriptor softmaxDescriptor;
1267 armnn::IConnectableLayer* const softmaxLayer = m_Network->AddSoftmaxLayer(
1269 layerParam.name().c_str());
1270 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(softmaxLayer->GetInputSlot(0));
1271 softmaxLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1272 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), softmaxLayer->GetOutputSlot(0));
1275 void CaffeParserBase::ParseEltwiseLayer(const LayerParameter& layerParam)
1277 ValidateNumInputsOutputs(layerParam, 2, 1);
1279 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1281 // Ignored Caffe Parameters:
1284 EltwiseParameter_EltwiseOp operation = EltwiseParameter_EltwiseOp_SUM; // Defaults to sum as per caffe.
1286 if (layerParam.has_eltwise_param() && layerParam.eltwise_param().has_operation())
1288 operation = layerParam.eltwise_param().operation();
1291 armnn::IConnectableLayer* newLayer = nullptr;
1294 case EltwiseParameter_EltwiseOp_SUM:
1296 newLayer = m_Network->AddAdditionLayer(layerParam.name().c_str());
1299 case EltwiseParameter_EltwiseOp_PROD:
1301 newLayer = m_Network->AddMultiplicationLayer(layerParam.name().c_str());
1306 throw ParseException(
1309 "Unsupported operation %1% in Eltwise layer %2% %3%") %
1312 CHECK_LOCATION().AsString()));
1316 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(newLayer->GetInputSlot(0));
1317 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(1)).Connect(newLayer->GetInputSlot(1));
1318 newLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1319 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), newLayer->GetOutputSlot(0));
1322 void CaffeParserBase::ParseConcatLayer(const LayerParameter& layerParam)
1324 unsigned int numInputs = static_cast<unsigned int>(layerParam.bottom_size());
1325 // We assume concat happens along the channel dimension, which is 1 in (0, 1, 2, 3).
1326 unsigned int concatDim = 1;
1327 unsigned int numOfDims = 4;
1329 // we only consider 4-D tensor here
1330 OriginsDescriptor concatDescriptor(static_cast<uint32_t>(numInputs), numOfDims);
1331 std::vector<unsigned int>mergeDimSizes(numOfDims, 0u);
1333 unsigned int mergeDim = 0;
1334 for (unsigned int viewIndex = 0; viewIndex < numInputs; ++viewIndex)
1336 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(
1337 layerParam.bottom(boost::numeric_cast<int>(viewIndex))).GetTensorInfo();
1338 // Checks whether the dimensions of the input tensors are actually 4.
1339 if (inputInfo.GetNumDimensions()!=4)
1341 throw ParseException(
1344 "The number of dimensions for input tensors of "
1345 "the concatenation op should be 4. Inputs of %1% has "
1346 "%2% dimensions. %3%") %
1348 inputInfo.GetNumDimensions() %
1349 CHECK_LOCATION().AsString()));
1352 mergeDimSizes[0] = inputInfo.GetShape()[0];
1353 mergeDimSizes[1] = inputInfo.GetShape()[1];
1354 mergeDimSizes[2] = inputInfo.GetShape()[2];
1355 mergeDimSizes[3] = inputInfo.GetShape()[3];
1357 for (unsigned int j = 0; j < concatDim; ++j)
1359 concatDescriptor.SetViewOriginCoord(viewIndex, j, 0);
1362 concatDescriptor.SetViewOriginCoord(viewIndex, concatDim, mergeDim);
1363 mergeDim += mergeDimSizes[concatDim];
1365 for (unsigned int j = concatDim+1; j < numOfDims; ++j)
1367 concatDescriptor.SetViewOriginCoord(viewIndex, j, 0);
1370 mergeDimSizes[concatDim] = mergeDim;
1372 armnn::IConnectableLayer* concatlayer = m_Network->AddMergerLayer(concatDescriptor, layerParam.name().c_str());
1373 for (unsigned int i = 0; i < numInputs; ++i)
1375 armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(boost::numeric_cast<int>(i)));
1376 outputSlot.Connect(concatlayer->GetInputSlot(i));
1379 concatlayer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo(numOfDims, mergeDimSizes.data(), DataType::Float32));
1380 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), concatlayer->GetOutputSlot(0));
1383 void CaffeParserBase::ParseBatchNormLayer(const LayerParameter& layerParam)
1385 ValidateNumInputsOutputs(layerParam, 1, 1);
1387 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1389 string name = layerParam.name();
1391 BatchNormParameter param = layerParam.batch_norm_param();
1392 // If use_global_stats is not explicitly set in the model, assume it to be true (its default value
1393 // when the network is in the testing phase).
1394 if (param.has_use_global_stats())
1396 if (!param.use_global_stats())
1398 throw ParseException(
1401 "Error parsing Batch Norm layer '%1%': "
1402 "Parameter 'use_global_stats' is set to false, which is "
1403 "unsupported (value used for training). %2%") %
1405 CHECK_LOCATION().AsString()));
1409 BatchNormalizationDescriptor desc;
1410 desc.m_Eps = param.eps();
1412 unsigned int channels = inputInfo.GetShape()[1];
1413 unsigned int shape[] = {channels};
1415 vector<float> meanData(channels);
1416 GetDataFromBlob(layerParam, meanData, 0);
1418 vector<float> varianceData(channels);
1419 GetDataFromBlob(layerParam, varianceData, 1);
1421 // Reads moving average factor and applies scaling (if required).
1422 const BlobProto& blob = layerParam.blobs(boost::numeric_cast<int>(2));
1423 const float movingAverageFactor = blob.data(boost::numeric_cast<int>(0));
1424 if(movingAverageFactor != 0.0f)
1426 const float scaleFactor = 1.0f / movingAverageFactor;
1427 auto scaleFunction = [scaleFactor](float f) -> float { return f * scaleFactor; };
1429 std::transform(varianceData.begin(), varianceData.end(), varianceData.begin(), scaleFunction);
1430 std::transform(meanData.begin(), meanData.end(), meanData.begin(), scaleFunction);
1433 // Identifies scale operation.
1434 vector<float> betaData(channels, 0.0f);
1435 vector<float> gammaData(channels, 1.0f);
1437 ConstTensor mean(TensorInfo(1, shape, armnn::DataType::Float32), meanData);
1438 ConstTensor variance(TensorInfo(1, shape, armnn::DataType::Float32), varianceData);
1439 ConstTensor beta(TensorInfo(1, shape, armnn::DataType::Float32), betaData);
1440 ConstTensor gamma(TensorInfo(1, shape, armnn::DataType::Float32), gammaData);
1442 armnn::IConnectableLayer* const batchNormLayer = m_Network->AddBatchNormalizationLayer(desc,
1443 mean, variance, beta, gamma, name.c_str());
1444 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(batchNormLayer->GetInputSlot(0));
1445 batchNormLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1446 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), batchNormLayer->GetOutputSlot(0));
1449 void CaffeParserBase::ParseScaleLayer(const LayerParameter& layerParam)
1451 // Current unoptimal solution: add a batchnormalization layer with 0 mean and 1 variance.
1452 ValidateNumInputsOutputs(layerParam, 1, 1);
1454 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1456 string name = layerParam.name();
1458 ScaleParameter param = layerParam.scale_param();
1459 if (param.axis() != 1)
1461 // Would have to use something other than BatchNormalizationLayer in this case
1462 throw ParseException(
1465 "Loading Scale Layer: Only axis 1 is supported currently. "
1466 "Layer=%1% Axis=%2% %3%") %
1469 CHECK_LOCATION().AsString()));
1472 unsigned int channels = inputInfo.GetShape()[1];
1473 unsigned int shape[] = {channels};
1475 BatchNormalizationDescriptor desc;
1476 desc.m_Eps = 0.0f; // Don't need epsilon if variance is 1.
1477 vector<float> meanData(channels, 0.0f);
1478 vector<float> varianceData(channels, 1.0f);
1479 vector<float> betaData(channels, 0.0f);
1480 vector<float> gammaData(channels);
1482 GetDataFromBlob(layerParam, gammaData, 0);
1484 if(param.has_bias_term())
1486 GetDataFromBlob(layerParam, betaData, 1);
1489 ConstTensor mean(TensorInfo(1, shape, armnn::DataType::Float32), meanData);
1490 ConstTensor variance(TensorInfo(1, shape, armnn::DataType::Float32), varianceData);
1491 ConstTensor beta(TensorInfo(1, shape, armnn::DataType::Float32), betaData);
1492 ConstTensor gamma(TensorInfo(1, shape, armnn::DataType::Float32), gammaData);
1494 armnn::IConnectableLayer* const batchNormLayer = m_Network->AddBatchNormalizationLayer(desc,
1495 mean, variance, beta, gamma, name.c_str());
1496 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(batchNormLayer->GetInputSlot(0));
1497 batchNormLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1498 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), batchNormLayer->GetOutputSlot(0));
1501 void CaffeParserBase::ParseSplitLayer(const caffe::LayerParameter& layerParam)
1503 // Used in caffe to duplicate memory - not necessary in armnn.
1504 if (layerParam.bottom_size() != 1)
1506 throw ParseException(
1509 "Split layer '%1%' should have exactly 1 bottom. "
1510 "#bottoms=%2% %3%") %
1512 layerParam.bottom_size() %
1513 CHECK_LOCATION().AsString()));
1515 armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
1516 for (int i = 0; i < layerParam.top_size(); i++)
1518 SetArmnnOutputSlotForCaffeTop(layerParam.top(i), outputSlot);
1522 void CaffeParserBase::ParseDropoutLayer(const caffe::LayerParameter& layerParam)
1524 // Ignored for inference, so patch the single input to its single output.
1525 if (layerParam.bottom_size() != 1 || layerParam.top_size() != 1)
1527 throw ParseException(
1530 "Dropout layer '%1%' should have exactly 1 bottom and 1 top. "
1531 "#bottoms=%2% #tops=%3% %4%") %
1533 layerParam.bottom_size() %
1534 layerParam.top_size() %
1535 CHECK_LOCATION().AsString()));
1537 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)));
1540 void CaffeParserBase::TrackInputBinding(armnn::IConnectableLayer* layer,
1541 armnn::LayerBindingId id,
1542 const armnn::TensorInfo& tensorInfo)
1544 return TrackBindingPoint(layer, id, tensorInfo, layer->GetName(), m_NetworkInputsBindingInfo);
1547 void CaffeParserBase::TrackOutputBinding(armnn::IConnectableLayer* layer,
1548 armnn::LayerBindingId id,
1549 const armnn::TensorInfo& tensorInfo)
1551 return TrackBindingPoint(layer, id, tensorInfo, layer->GetName(), m_NetworkOutputsBindingInfo);
1554 void CaffeParserBase::TrackBindingPoint(armnn::IConnectableLayer* layer,
1555 armnn::LayerBindingId id,
1556 const armnn::TensorInfo& tensorInfo,
1557 const char* bindingPointDesc,
1558 std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo)
1560 const std::string layerName = layer->GetName();
1561 auto it = nameToBindingInfo.find(layerName);
1562 if (it == nameToBindingInfo.end())
1564 nameToBindingInfo[layerName] = std::make_pair(id, tensorInfo);
1568 throw ParseException(
1571 "Id %1% used by more than one %2% layer %3%") %
1574 CHECK_LOCATION().AsString()));
1578 armnn::IOutputSlot& CaffeParserBase::GetArmnnOutputSlotForCaffeTop(const std::string& caffeTopName) const
1580 auto it = m_ArmnnOutputSlotForCaffeTop.find(caffeTopName);
1581 if (it != m_ArmnnOutputSlotForCaffeTop.end())
1587 throw ParseException(
1590 "Could not find armnn output slot for Caffe top '%1%' %2%") %
1592 CHECK_LOCATION().AsString()));
1596 void CaffeParserBase::SetArmnnOutputSlotForCaffeTop(
1597 const std::string& caffeTopName, armnn::IOutputSlot& armnnOutputSlot)
1599 auto it = m_ArmnnOutputSlotForCaffeTop.find(caffeTopName);
1600 if (it == m_ArmnnOutputSlotForCaffeTop.end())
1602 m_ArmnnOutputSlotForCaffeTop[caffeTopName] = &armnnOutputSlot;
1606 throw ParseException(
1609 "Attempting to add duplicate entry for Caffe top '%1%' %2%") %
1611 CHECK_LOCATION().AsString()));
1615 // Note: can move to CaffeParser when/if we optimise the text/string format
1616 // to load on a layer by layer basis
1617 void CaffeParserBase::ResolveInPlaceLayers(caffe::NetParameter& netParameter)
1619 // Finds layers with the same top.
1620 std::map<std::string, std::vector<caffe::LayerParameter*>> layersByTop;
1621 for (int layerIdx = 0; layerIdx < netParameter.layer_size(); ++layerIdx)
1623 caffe::LayerParameter& layer = *netParameter.mutable_layer(layerIdx);
1624 std::string name = layer.name();
1625 for (int i = 0; i < layer.top_size(); ++i)
1627 layersByTop[layer.top(i)].push_back(&layer);
1631 // For each set of layers with the same top, resolves them to a linear chain rather than in-place layers.
1632 // Note that for 'regular' layers, there will be a single layer in each group and so this will be a no-op.
1633 for (auto layersWithSameTopIt : layersByTop)
1635 const std::string& top = layersWithSameTopIt.first;
1636 const std::vector<caffe::LayerParameter*>& layersWithSameTop = layersWithSameTopIt.second;
1638 // Chains the layers together in the order that they are listed in the prototxt (hopefully this is correct).
1639 // Note that the last layer will not have its top modified so that other layers will continue to reference it.
1640 for (unsigned int layerIdx = 0; layerIdx < layersWithSameTop.size() - 1; ++layerIdx)
1642 caffe::LayerParameter& layer1 = *layersWithSameTop[layerIdx];
1643 caffe::LayerParameter& layer2 = *layersWithSameTop[layerIdx+1];
1644 if (layer1.top_size() != 1)
1646 throw ParseException(
1649 "Node '%1%' is an in-place layer but doesn't have exactly one "
1650 "top. It has %2% instead. %3%") %
1653 CHECK_LOCATION().AsString()));
1655 std::string newTop = layer1.name() + "_top";
1656 layer1.set_top(0, newTop);
1657 if (layer2.bottom_size() != 1 || layer2.bottom(0) != top)
1659 throw ParseException(
1662 "Node '%1%' is an in-place layer but "
1663 "doesn't have exactly one bottom, or it doesn't match its top. "
1664 "#bottoms=%2%, first bottom is %3%, top is %4% %5%") %
1668 CHECK_LOCATION().AsString()));
1670 layer2.set_bottom(0, newTop);
1675 // Note: can move to CaffeParser when/if we optimise the text/string format
1676 // to load on a layer by layer basis
1677 void CaffeParserBase::LoadNetParam(NetParameter& netParameter)
1679 // Caffe models sometimes have an implicit input layer.
1680 // In that case, add an explicit one.
1681 if (netParameter.input_size() > 0)
1683 LayerParameter* newLayer = netParameter.add_layer();
1685 newLayer->set_type("Input");
1686 newLayer->set_name(netParameter.input(0));
1687 newLayer->add_top(netParameter.input(0));
1689 InputParameter* inputParam = newLayer->mutable_input_param();
1690 BlobShape* shape = inputParam->add_shape();
1692 int dim_size = netParameter.input_dim_size();
1693 for (int i = 0; i < dim_size; ++i)
1695 shape->add_dim(netParameter.input_dim(i));
1699 // Replaces in-place layers with regular ones to make the rest of the parsing easier.
1700 ResolveInPlaceLayers(netParameter);
1702 // Creates a lookup of Caffe layers by name.
1703 for (int i = 0; i < netParameter.layer_size(); ++i)
1705 const caffe::LayerParameter& layer = netParameter.layer(i);
1706 for (int i = 0; i < layer.top_size(); ++i)
1708 m_CaffeLayersByTopName[layer.top(i)] = &layer;
1712 // Finds the output layers the user requested.
1713 std::vector<const caffe::LayerParameter*> targetLayers;
1714 for (const std::string& requestedOutputName : m_RequestedOutputs)
1716 auto nodeIt = m_CaffeLayersByTopName.find(requestedOutputName);
1717 if (nodeIt == m_CaffeLayersByTopName.end())
1719 throw ParseException(
1722 "Couldn't find requested output layer '%1%' in graph %2%") %
1723 requestedOutputName %
1724 CHECK_LOCATION().AsString()));
1726 targetLayers.push_back(nodeIt->second);
1729 // Sorts them into a linear ordering such that all inputs of a node are before the node itself.
1730 std::vector<const caffe::LayerParameter*> sortedNodes;
1731 if (!armnnUtils::GraphTopologicalSort<const caffe::LayerParameter*>(
1733 [this](const caffe::LayerParameter* node)
1735 return GetInputs(*node);
1739 throw ParseException(
1742 "Cycle detected in graph. #nodes: %1% %2%") %
1743 sortedNodes.size() %
1744 CHECK_LOCATION().AsString()));
1747 // Parses each node in order, knowing that all inputs of a node will be processed before the node itself.
1748 for (const caffe::LayerParameter* current : sortedNodes)
1750 auto it = ms_CaffeLayerNameToParsingFunctions.find(current->type());
1751 if (it == ms_CaffeLayerNameToParsingFunctions.end())
1753 throw ParseException(
1755 boost::format("Unsupported layer type: '%1%' for layer %2% %3%") %
1758 CHECK_LOCATION().AsString()));
1760 auto func = it->second;
1761 (this->*func)(*current);
1764 // Adds ArmNN output layers connected to each requested output.
1765 for (const std::string& requestedOutput : m_RequestedOutputs)
1767 armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(requestedOutput);
1769 const armnn::LayerBindingId outputId = boost::numeric_cast<armnn::LayerBindingId>(
1770 m_NetworkOutputsBindingInfo.size());
1771 armnn::IConnectableLayer* const outputLayer = m_Network->AddOutputLayer(outputId, requestedOutput.c_str());
1772 outputSlot.Connect(outputLayer->GetInputSlot(0));
1774 TrackOutputBinding(outputLayer, outputId, outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo());
1778 INetworkPtr CaffeParserBase::CreateNetworkFromTextFile(const char* graphFile,
1779 const std::map<std::string, armnn::TensorShape>& inputShapes,
1780 const std::vector<std::string>& requestedOutputs)
1782 FILE* fd = fopen(graphFile, "r");
1786 throw FileNotFoundException(
1789 "Failed to open graph file: %1% %2%") %
1791 CHECK_LOCATION().AsString()));
1794 // Parses the file into a message.
1795 NetParameter netParam;
1796 auto input = new google::protobuf::io::FileInputStream(fileno(fd));
1797 bool success = google::protobuf::TextFormat::Parse(input, &netParam);
1803 throw ParseException(
1806 "Failed to parse graph file: %1% %2%") %
1808 CHECK_LOCATION().AsString()));
1811 return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs);
1814 INetworkPtr CaffeParserBase::CreateNetworkFromString(const char* protoText,
1815 const std::map<std::string, armnn::TensorShape>& inputShapes,
1816 const std::vector<std::string>& requestedOutputs)
1818 // Parses the string into a message.
1819 NetParameter netParam;
1820 bool success = google::protobuf::TextFormat::ParseFromString(protoText, &netParam);
1824 throw ParseException(
1827 "Failed to parse graph string %1%") %
1828 CHECK_LOCATION().AsString()));
1831 return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs);
1834 INetworkPtr CaffeParser::CreateNetworkFromBinaryFile(const char* graphFile,
1835 const std::map<std::string, armnn::TensorShape>& inputShapes,
1836 const std::vector<std::string>& requestedOutputs)
1838 FILE* fd = fopen(graphFile, "rb");
1842 throw FileNotFoundException(
1845 "Failed to open graph file at: %1% %2%") %
1847 CHECK_LOCATION().AsString()));
1850 // Parses the file into a message.
1851 NetParameter netParam;
1853 FileInputStream inStream(fileno(fd));
1854 CodedInputStream codedStream(&inStream);
1855 codedStream.SetTotalBytesLimit(INT_MAX, INT_MAX);
1856 bool success = netParam.ParseFromCodedStream(&codedStream);
1861 throw ParseException(
1864 "Failed to parse protobuf file: %1% %2%") %
1866 CHECK_LOCATION().AsString()));
1869 return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs);
1872 // Note: can move to CaffeParser when/if we optimise the text/string format
1873 // to load on a layer by layer basis
1874 INetworkPtr CaffeParserBase::CreateNetworkFromNetParameter(NetParameter& netParam,
1875 const std::map<std::string, armnn::TensorShape>& inputShapes,
1876 const std::vector<std::string>& requestedOutputs)
1878 m_NetworkInputsBindingInfo.clear();
1879 m_NetworkOutputsBindingInfo.clear();
1881 m_Network = INetwork::Create();
1883 m_InputShapes = inputShapes;
1884 if (requestedOutputs.size() == 0)
1886 throw ParseException("requestedOutputs must have at least one entry");
1888 m_RequestedOutputs = requestedOutputs;
1892 LoadNetParam(netParam);
1894 catch (const ParseException& e)
1902 return move(m_Network);
1905 void CaffeParserBase::Cleanup() {
1906 // cleanup, in case we reuse this parser
1907 m_InputShapes.clear();
1908 m_RequestedOutputs.clear();
1909 m_ArmnnOutputSlotForCaffeTop.clear();
1910 // NOTE: when we get the text/string format
1911 // optimised for memory then this data structure can
1912 // also move to the CaffeParser class
1913 m_CaffeLayersByTopName.clear();