src/armnnCaffeParser/CaffeParser.cpp

   1 //
   2 // Copyright © 2017 Arm Ltd. All rights reserved.
   3 // See LICENSE file in the project root for full license information.
   4 //
   5 #include "CaffeParser.hpp"
   6 #include "RecordByRecordCaffeParser.hpp"
   7
   8 #include "armnn/Descriptors.hpp"
   9 #include "armnn/INetwork.hpp"
  10 #include "armnn/Utils.hpp"
  11 #include "armnn/Exceptions.hpp"
  12
  13 #include "GraphTopologicalSort.hpp"
  14 #include "VerificationHelpers.hpp"
  15
  16 #include <boost/numeric/conversion/cast.hpp>
  17 #include <boost/assert.hpp>
  18 #include <boost/format.hpp>
  19 #include <boost/log/trivial.hpp>
  20
  21 // Caffe
  22 #include "caffe/proto/caffe.pb.h"
  23
  24 // ProtoBuf
  25 #include <google/protobuf/io/coded_stream.h>
  26 #include <google/protobuf/io/zero_copy_stream.h>
  27 #include <google/protobuf/io/zero_copy_stream_impl.h>
  28 #include <google/protobuf/text_format.h>
  29 #include <google/protobuf/stubs/common.h>
  30 #include <google/protobuf/stubs/once.h>
  31 #include <google/protobuf/io/coded_stream.h>
  32 #include <google/protobuf/wire_format_lite_inl.h>
  33 #include <google/protobuf/descriptor.h>
  34 #include <google/protobuf/generated_message_reflection.h>
  35 #include <google/protobuf/reflection_ops.h>
  36 #include <google/protobuf/wire_format.h>
  37
  38 #include <cmath>
  39 #include <sstream>
  40 #include <queue>
  41 #include <fcntl.h>
  42
  43 /// Caffe networks are loaded from protobuf files (binary or text) using the protobuf library and the generated
  44 /// code from caffe.pb.h. This gives us a caffe::NetParameter which is an in-memory version of the file.
  45 /// This contains a flat list of Caffe 'layers' (e.g. convolution, pooling etc.).
  46 /// Each layer has inputs (called "bottoms") and outputs (called "tops"). Data flows from bottom to top.
  47 /// The bottoms of a layer refer to the tops of other layers, not their names.
  48 /// The names of layers seem to be arbitrary (you could rename a layer and the network wouldn't
  49 /// need any other changes).
  50 ///
  51 /// Some layers (e.g. Relu) can be configured so that their top and bottom are both the same. This is called an
  52 /// "in-place" layer and is a Caffe runtime feature used to reduce memory usage by modifying tensors in-place.
  53 /// This isn't relevant to the parser and so we preprocess these layers to convert them to regular layers, to result
  54 /// in a consistent graph structure.
  55
  56 namespace armnnCaffeParser
  57 {
  58
  59 using namespace armnn;
  60 using namespace caffe;
  61 using namespace std;
  62 using namespace google::protobuf::io;
  63
  64 namespace
  65 {
  66
  67 const float* GetArrayPtrFromBlob(const LayerParameter& layerParam, unsigned int blobIndex)
  68 {
  69     auto nBlobs = layerParam.blobs_size();
  70     if (blobIndex >= boost::numeric_cast<unsigned int>(nBlobs))
  71     {
  72         throw ParseException(
  73             boost::str(
  74                 boost::format(
  75                     "Expected data blob at index %1% in layer %2% not found. nBlobs=%2%. %4%") %
  76                     blobIndex %
  77                     layerParam.name() %
  78                     nBlobs %
  79                     CHECK_LOCATION().AsString()));
  80     }
  81
  82     const BlobProto& blob = layerParam.blobs(boost::numeric_cast<int>(blobIndex));
  83
  84     const float* arrayPtr = blob.data().data();
  85     return arrayPtr;
  86 }
  87
  88 void GetDataFromBlob(const LayerParameter& layerParam, vector<float>& outData, unsigned int blobIndex)
  89 {
  90     auto nBlobs = layerParam.blobs_size();
  91     if (blobIndex >= boost::numeric_cast<unsigned int>(nBlobs))
  92     {
  93         throw ParseException(
  94             boost::str(
  95                 boost::format(
  96                     "Expected data blob at index %1% in layer %2% not found. %3%") %
  97                     blobIndex %
  98                     layerParam.name() %
  99                     CHECK_LOCATION().AsString()));
 100     }
 101
 102     const BlobProto& blob = layerParam.blobs(boost::numeric_cast<int>(blobIndex));
 103
 104     size_t blobSize = boost::numeric_cast<size_t>(blob.data_size());
 105     if (blobSize != outData.size())
 106     {
 107         throw ParseException(
 108             boost::str(
 109                 boost::format(
 110                     "Data blob at index %1% in layer %2% has an unexpected size. "
 111                     "Expected %3% elements but got %4% elements. %5%") %
 112                     blobIndex %
 113                     layerParam.name() %
 114                     outData.size() %
 115                     blobSize %
 116                     CHECK_LOCATION().AsString()));
 117     }
 118
 119     int outSizeInt = boost::numeric_cast<int>(outData.size());
 120     for (int i = 0; i < outSizeInt; ++i)
 121     {
 122         outData[static_cast<size_t>(i)] = blob.data(i);
 123     }
 124 }
 125
 126 bool IsInRange(unsigned int value, unsigned int min, unsigned int max)
 127 {
 128     return (value >= min && value <= max) ? true : false;
 129 }
 130
 131 template <typename T>
 132 size_t SizeOfVectorData(const vector<T>& vec)
 133 {
 134     return vec.size() * sizeof(T);
 135 }
 136
 137 void ValidateNumInputsOutputs(const caffe::LayerParameter& layerParameter,
 138                               unsigned int                 numInputs,
 139                               unsigned int                 numOutputs)
 140 {
 141     int numInputsActual = layerParameter.bottom_size();
 142     if (numInputs != boost::numeric_cast<unsigned int>(numInputsActual))
 143     {
 144         throw ParseException(
 145             boost::str(
 146                 boost::format("Invalid number of inputs requested %1% for layer %2% "
 147                               "while only %3% present. %4%") %
 148                               numInputs %
 149                               layerParameter.name() %
 150                               numInputsActual %
 151                               CHECK_LOCATION().AsString()));
 152     }
 153
 154     int numOutputsActual = layerParameter.top_size();
 155     if (numOutputs != boost::numeric_cast<unsigned int>(numOutputsActual))
 156     {
 157         throw ParseException(
 158             boost::str(
 159                 boost::format("Invalid number of outputs requested %1% for layer %2% "
 160                               "while only %3% present. %4%") %
 161                               numOutputs %
 162                               layerParameter.name() %
 163                               numOutputsActual %
 164                               CHECK_LOCATION().AsString()));
 165     }
 166 }
 167
 168 template <typename ParamType, typename ExtractOptional, typename ExtractFallback, typename ValueType>
 169 ValueType GetOptionalWithFallback(const ParamType& param,
 170                                   ExtractOptional extractOptional,
 171                                   ExtractFallback extractFallback,
 172                                   ValueType defaultValue)
 173 {
 174     auto optValue = extractOptional(param, defaultValue);
 175     if (optValue.first)
 176     {
 177         return optValue.second;
 178     }
 179     auto fallbackValue = extractFallback(param, defaultValue);
 180     return fallbackValue.second;
 181 }
 182
 183 #define GET_OPTIONAL_WITH_VECTOR_FALLBACK(PARAM, \
 184                                           PARAM_TYPE, \
 185                                           OPTIONAL_VALUE, \
 186                                           FALLBACK_VECTOR, \
 187                                           VALUE_TYPE, \
 188                                           DEFAULT_VALUE) \
 189     GetOptionalWithFallback( \
 190         PARAM, \
 191         [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \
 192         { \
 193             if (param.has_##OPTIONAL_VALUE ()) \
 194             { \
 195                 return std::make_pair(true, param.OPTIONAL_VALUE ()); \
 196             } \
 197             else \
 198             { \
 199                 return std::make_pair(false, defaultValue); \
 200             } \
 201         }, \
 202         [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \
 203         { \
 204             if (param.FALLBACK_VECTOR##_size() > 0) \
 205             { \
 206                 return std::make_pair(true, (param.FALLBACK_VECTOR ()).Get(0)); \
 207             } \
 208             else \
 209             { \
 210                 return std::make_pair(false, defaultValue); \
 211             } \
 212         }, \
 213         DEFAULT_VALUE)
 214
 215 #define GET_OPTIONAL_WITH_FALLBACK(PARAM, \
 216                                    PARAM_TYPE, \
 217                                    OPTIONAL_VALUE, \
 218                                    FALLBACK_VALUE, \
 219                                    VALUE_TYPE, \
 220                                    DEFAULT_VALUE) \
 221     GetOptionalWithFallback( \
 222         PARAM, \
 223         [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \
 224         { \
 225             if (param.has_##OPTIONAL_VALUE ()) \
 226             { \
 227                 return std::make_pair(true, param.OPTIONAL_VALUE ()); \
 228             } \
 229             else \
 230             { \
 231                 return std::make_pair(false, defaultValue); \
 232             } \
 233         }, \
 234         [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \
 235         { \
 236             if (param.has_##FALLBACK_VALUE ()) \
 237             { \
 238                 return std::make_pair(true, param.FALLBACK_VALUE ()); \
 239             } \
 240             else \
 241             { \
 242                 return std::make_pair(false, defaultValue); \
 243             } \
 244         }, \
 245         DEFAULT_VALUE)
 246
 247
 248 void ValidateEqualValuesInRange(unsigned int valueA,
 249                                 const char* valueNameA,
 250                                 unsigned int valueB,
 251                                 const char* valueNameB,
 252                                 unsigned int min,
 253                                 unsigned int max,
 254                                 const armnn::CheckLocation& location)
 255 {
 256     if (!IsInRange(valueA, min, max) || !IsInRange(valueB, min, max) || (valueA != valueB))
 257     {
 258         throw ParseException(
 259             boost::str(
 260                 boost::format(
 261                     "%1%=%2% and %3%=%4% must be equal and within the valid range"
 262                     "of [%5%, %6%] %7%") %
 263                     valueNameA %
 264                     valueA %
 265                     valueNameB %
 266                     valueB %
 267                     min %
 268                     max %
 269                     location.AsString()));
 270     }
 271 }
 272
 273 #define VALIDATE_EQUAL_VALUES_IN_RANGE(A, B, MIN_RANGE, MAX_RANGE) \
 274     ValidateEqualValuesInRange(A, #A, B, #B, MIN_RANGE, MAX_RANGE, CHECK_LOCATION())
 275
 276 } // namespace <anonymous>
 277
 278 const std::map<std::string, CaffeParserBase::OperationParsingFunction>
 279     CaffeParserBase::ms_CaffeLayerNameToParsingFunctions = {
 280     { "Input",        &CaffeParserBase::ParseInputLayer },
 281     { "Convolution",  &CaffeParserBase::ParseConvLayer },
 282     { "Pooling",      &CaffeParserBase::ParsePoolingLayer },
 283     { "ReLU",         &CaffeParserBase::ParseReluLayer },
 284     { "LRN",          &CaffeParserBase::ParseLRNLayer },
 285     { "InnerProduct", &CaffeParserBase::ParseInnerProductLayer },
 286     { "Softmax",      &CaffeParserBase::ParseSoftmaxLayer },
 287     { "Eltwise",      &CaffeParserBase::ParseEltwiseLayer },
 288     { "Concat",       &CaffeParserBase::ParseConcatLayer },
 289     { "BatchNorm",    &CaffeParserBase::ParseBatchNormLayer },
 290     { "Scale",        &CaffeParserBase::ParseScaleLayer },
 291     { "Split",        &CaffeParserBase::ParseSplitLayer },
 292     { "Dropout",      &CaffeParserBase::ParseDropoutLayer},
 293 };
 294
 295 ICaffeParser* ICaffeParser::CreateRaw()
 296 {
 297     return new RecordByRecordCaffeParser();
 298 }
 299
 300 ICaffeParserPtr ICaffeParser::Create()
 301 {
 302     return ICaffeParserPtr(CreateRaw(), &ICaffeParser::Destroy);
 303 }
 304
 305 void ICaffeParser::Destroy(ICaffeParser* parser)
 306 {
 307     delete parser;
 308 }
 309
 310 CaffeParserBase::CaffeParserBase()
 311     : m_Network(nullptr, nullptr)
 312 {
 313
 314 }
 315
 316 CaffeParser::CaffeParser()
 317 : CaffeParserBase()
 318 {
 319
 320 }
 321
 322 BindingPointInfo CaffeParserBase::GetNetworkInputBindingInfo(const std::string& name) const
 323 {
 324     return GetBindingInfo(name, "input", m_NetworkInputsBindingInfo);
 325 }
 326
 327 BindingPointInfo CaffeParserBase::GetNetworkOutputBindingInfo(const std::string& name) const
 328 {
 329     return GetBindingInfo(name, "output", m_NetworkOutputsBindingInfo);
 330 }
 331
 332 std::pair<armnn::LayerBindingId, armnn::TensorInfo> CaffeParserBase::GetBindingInfo(const std::string& layerName,
 333     const char* bindingPointDesc,
 334     const std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo)
 335 {
 336     auto it = nameToBindingInfo.find(layerName);
 337     if (it == nameToBindingInfo.end())
 338     {
 339         throw InvalidArgumentException(
 340             boost::str(
 341                 boost::format(
 342                     "Unknown binding %1% for layer '%2%'. %3%") %
 343                     bindingPointDesc %
 344                     layerName %
 345                     CHECK_LOCATION().AsString()));
 346     }
 347     return it->second;
 348 }
 349
 350 TensorInfo CaffeParserBase::BlobShapeToTensorInfo(const caffe::BlobShape& blobShape) const
 351 {
 352     std::vector<unsigned int> shape;
 353     for (int j = 0; j < blobShape.dim_size(); ++j)
 354     {
 355         shape.push_back(static_cast<unsigned int>(blobShape.dim(j)));
 356     }
 357
 358     return TensorInfo(boost::numeric_cast<unsigned int>(shape.size()), shape.data(), DataType::Float32);
 359 }
 360
 361 BlobShape TensorDescToBlobShape(const TensorInfo& desc)
 362 {
 363     BlobShape ret;
 364     for (unsigned int i = 0; i < desc.GetNumDimensions(); ++i)
 365     {
 366         ret.add_dim(i);
 367         ret.set_dim(boost::numeric_cast<int>(i), desc.GetShape()[i]);
 368     }
 369
 370     return ret;
 371 }
 372
 373 // Note: can move to CaffeParser when/if we optimise the text/string format
 374 //       to load on a layer by layer basis
 375 vector<const LayerParameter*> CaffeParserBase::GetInputs(const LayerParameter& layerParam)
 376 {
 377     std::vector<const caffe::LayerParameter*> ret;
 378     ret.reserve(boost::numeric_cast<size_t>(layerParam.bottom_size()));
 379     for (int j = 0; j < layerParam.bottom_size(); ++j)
 380     {
 381         std::string inputName = layerParam.bottom(j);
 382         auto inputIt = m_CaffeLayersByTopName.find(inputName);
 383         if (inputIt == m_CaffeLayersByTopName.end())
 384         {
 385             throw ParseException(
 386                 boost::str(
 387                     boost::format(
 388                         "Can't find Caffe layer with top called '%1%', "
 389                         "which is listed as an input of '%2%'. %3%") %
 390                         inputName %
 391                         layerParam.name() %
 392                         CHECK_LOCATION().AsString()));
 393         }
 394         ret.push_back(inputIt->second);
 395     }
 396
 397     return ret;
 398 }
 399
 400 void CaffeParserBase::ParseInputLayer(const LayerParameter& layerParam)
 401 {
 402     BOOST_ASSERT(layerParam.type() == "Input");
 403     ValidateNumInputsOutputs(layerParam, 0, 1);
 404
 405     const InputParameter& param = layerParam.input_param();
 406
 407     const armnn::LayerBindingId inputId = boost::numeric_cast<armnn::LayerBindingId>(
 408         m_NetworkInputsBindingInfo.size());
 409     armnn::IConnectableLayer* const inputLayer = m_Network->AddInputLayer(inputId, layerParam.name().c_str());
 410
 411     // Decides the tensor info for this input. This can be specified in the Caffe network but can also
 412     // be overriden by user input (m_inputShapes).
 413     armnn::TensorInfo inputTensorInfo;
 414
 415     const BlobShape* originalShape = param.shape_size() > 0 && param.shape(0).dim_size() > 0 ?
 416         &param.shape(0) : nullptr;
 417     if (originalShape)
 418     {
 419         inputTensorInfo = BlobShapeToTensorInfo(*originalShape);
 420     }
 421
 422     auto overrideIt = m_InputShapes.find(layerParam.name());
 423     if (overrideIt != m_InputShapes.end())
 424     {
 425         const TensorShape& overrideShape = overrideIt->second;
 426         if (originalShape &&
 427             (    originalShape->dim(1) != overrideShape[1]
 428               || originalShape->dim(2) != overrideShape[2]
 429               || originalShape->dim(3) != overrideShape[3]))
 430         {
 431             throw ParseException(
 432                 boost::str(
 433                     boost::format(
 434                         "Parsed input shape for '%1%' is incompatible with the override provided. %2%") %
 435                         layerParam.name() %
 436                         CHECK_LOCATION().AsString()));
 437         }
 438         inputTensorInfo.SetShape(overrideShape);
 439     }
 440     else if (!originalShape)
 441     {
 442         throw ParseException(
 443             boost::str(
 444                 boost::format(
 445                     "No input descriptor given for '%1%' and no input shape found in caffe model. %2%") %
 446                     layerParam.name() %
 447                     CHECK_LOCATION().AsString()));
 448     }
 449
 450     TrackInputBinding(inputLayer, inputId, inputTensorInfo);
 451     inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
 452     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), inputLayer->GetOutputSlot(0));
 453 }
 454
 455 void CaffeParserBase::AddConvLayerWithSplits(const caffe::LayerParameter& layerParam,
 456                                              const armnn::Convolution2dDescriptor& desc,
 457                                              unsigned int kernelW,
 458                                              unsigned int kernelH)
 459 {
 460     BOOST_ASSERT(layerParam.type() == "Convolution");
 461     ValidateNumInputsOutputs(layerParam, 1, 1);
 462
 463     ConvolutionParameter convParam = layerParam.convolution_param();
 464     BlobShape inputShape = TensorDescToBlobShape(GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo());
 465     const unsigned int numGroups = convParam.has_group() ? convParam.group() : 1;
 466
 467     // asusme these were already verified by the caller ParseConvLayer() function
 468     BOOST_ASSERT(numGroups < inputShape.dim(1));
 469     BOOST_ASSERT(numGroups > 1);
 470
 471     // Handle grouping
 472     armnn::IOutputSlot& inputConnection = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
 473
 474     vector<string> convLayerNames(numGroups);
 475     vector<armnn::IConnectableLayer*> convLayers(numGroups);
 476     convLayerNames[0] = layerParam.name();
 477
 478     // This convolution is to be applied to chunks of the input data so add a splitter layer
 479
 480     // Redirect the convolution input to the splitter
 481     unsigned int splitterDimSizes[4] = {static_cast<unsigned int>(inputShape.dim(0)),
 482                                         static_cast<unsigned int>(inputShape.dim(1)),
 483                                         static_cast<unsigned int>(inputShape.dim(2)),
 484                                         static_cast<unsigned int>(inputShape.dim(3))};
 485
 486     // Split dimension 1 of the splitter output shape and conv input shapes
 487     // according to the number of groups
 488
 489     splitterDimSizes[1] /= numGroups;
 490     inputShape.set_dim(1, splitterDimSizes[1]);
 491
 492     // This is used to describe how the input is to be split
 493     ViewsDescriptor splitterDesc(numGroups);
 494
 495     // Create an output node for each group, giving each a unique name
 496     for (unsigned int g = 0; g < numGroups; ++g)
 497     {
 498         // Work out the names of the splitter layers child convolutions
 499         stringstream ss;
 500         ss << layerParam.name() << "_" << g;
 501         convLayerNames[g] = ss.str();
 502
 503         splitterDesc.SetViewOriginCoord(g, 1, splitterDimSizes[1] * g);
 504
 505         // Set the size of the views.
 506         for (unsigned int dimIdx=0; dimIdx < 4; dimIdx++)
 507         {
 508             splitterDesc.SetViewSize(g, dimIdx, splitterDimSizes[dimIdx]);
 509         }
 510     }
 511
 512     const std::string splitterLayerName = std::string("splitter_") + layerParam.bottom(0);
 513     armnn::IConnectableLayer* splitterLayer = m_Network->AddSplitterLayer(splitterDesc, splitterLayerName.c_str());
 514
 515     inputConnection.Connect(splitterLayer->GetInputSlot(0));
 516     for (unsigned int i = 0; i < splitterLayer->GetNumOutputSlots(); i++)
 517     {
 518         splitterLayer->GetOutputSlot(i).SetTensorInfo(BlobShapeToTensorInfo(inputShape));
 519     }
 520
 521     unsigned int numFilters = convParam.num_output();
 522
 523     // Populates convolution output tensor descriptor dimensions.
 524     BlobShape outputShape;
 525     outputShape.add_dim(0);
 526     outputShape.set_dim(0, inputShape.dim(0));
 527     outputShape.add_dim(1);
 528     // Ensures that dimension 1 of the convolution output is split according to the number of groups.
 529     outputShape.set_dim(1, numFilters / numGroups);
 530     outputShape.add_dim(2);
 531     outputShape.set_dim(
 532         2, (static_cast<int>(
 533                 static_cast<float>(inputShape.dim(2) + 2 * desc.m_PadBottom - kernelH) /
 534                 static_cast<float>(desc.m_StrideY)) + 1));
 535     outputShape.add_dim(3);
 536     outputShape.set_dim(
 537         3, (static_cast<int>(
 538                 static_cast<float>(inputShape.dim(3) + 2 * desc.m_PadRight - kernelW) /
 539                 static_cast<float>(desc.m_StrideX)) + 1));
 540
 541     // Load the weight data for ALL groups
 542     vector<float> weightData(boost::numeric_cast<size_t>(numGroups *
 543                                                          inputShape.dim(1) *  // number of input channels
 544                                                          outputShape.dim(1) * // number of output channels
 545                                                          kernelH *
 546                                                          kernelW));
 547     GetDataFromBlob(layerParam, weightData, 0);
 548
 549     const unsigned int weightDimSizes[4] = {
 550         static_cast<unsigned int>(outputShape.dim(1)),
 551         static_cast<unsigned int>(inputShape.dim(1)),
 552         kernelH,
 553         kernelW};
 554
 555     TensorInfo biasInfo;
 556     vector<float> biasData;
 557
 558     if (desc.m_BiasEnabled)
 559     {
 560         biasData.resize(boost::numeric_cast<size_t>(numGroups * outputShape.dim(1)), 1.f);
 561         GetDataFromBlob(layerParam, biasData, 1);
 562
 563         const unsigned int biasDimSizes[1] = {static_cast<unsigned int>(outputShape.dim(1))};
 564         biasInfo = TensorInfo(1, biasDimSizes, DataType::Float32);
 565     }
 566
 567     const unsigned int numWeightsPerGroup = boost::numeric_cast<unsigned int>(weightData.size()) / numGroups;
 568     const unsigned int numBiasesPerGroup  = boost::numeric_cast<unsigned int>(biasData.size()) / numGroups;
 569
 570     for (unsigned int g = 0; g < numGroups; ++g)
 571     {
 572         // Sets the slot index, group 0 should be connected to the 0th output of the splitter
 573         // group 1 should be connected to the 1st output of the splitter.
 574
 575         // Pulls out the weights for this group from that loaded from the model file earlier.
 576         ConstTensor weights(TensorInfo(4, weightDimSizes, DataType::Float32),
 577                             weightData.data() + numWeightsPerGroup * g);
 578
 579         IConnectableLayer* convLayer = nullptr;
 580         if (desc.m_BiasEnabled)
 581         {
 582             // Pulls out the biases for this group from that loaded from the model file earlier.
 583             ConstTensor biases(biasInfo, biasData.data() + numBiasesPerGroup * g);
 584
 585             convLayer =
 586                 m_Network->AddConvolution2dLayer(desc, weights, biases, convLayerNames[g].c_str());
 587         }
 588         else
 589         {
 590             convLayer =
 591                 m_Network->AddConvolution2dLayer(desc, weights, convLayerNames[g].c_str());
 592         }
 593         convLayers[g] = convLayer;
 594
 595         // If we have more than one group then the input to the nth convolution the splitter layer's nth output,
 596         // otherwise it's the regular input to this layer.
 597         armnn::IOutputSlot& splitterInputConnection =
 598             splitterLayer ? splitterLayer->GetOutputSlot(g) : inputConnection;
 599         splitterInputConnection.Connect(convLayer->GetInputSlot(0));
 600         convLayer->GetOutputSlot(0).SetTensorInfo(BlobShapeToTensorInfo(outputShape));
 601     }
 602
 603     // If the convolution was performed in chunks, add a layer to merge the results
 604
 605     // The merge input shape matches that of the convolution output
 606     unsigned int mergeDimSizes[4] = {static_cast<unsigned int>(outputShape.dim(0)),
 607                                         static_cast<unsigned int>(outputShape.dim(1)),
 608                                         static_cast<unsigned int>(outputShape.dim(2)),
 609                                         static_cast<unsigned int>(outputShape.dim(3))};
 610
 611     // This is used to describe how the input is to be merged
 612     OriginsDescriptor mergeDesc(numGroups);
 613
 614     // Now create an input node for each group, using the name from
 615     // the output of the corresponding convolution
 616     for (unsigned int g = 0; g < numGroups; ++g)
 617     {
 618         mergeDesc.SetViewOriginCoord(g, 1, mergeDimSizes[1] * g);
 619     }
 620
 621     // Make sure the output from the merge is the correct size to hold the data for all groups
 622     mergeDimSizes[1] *= numGroups;
 623     outputShape.set_dim(1, mergeDimSizes[1]);
 624
 625     // Finally add the merge layer
 626     IConnectableLayer* mergerLayer = m_Network->AddMergerLayer(mergeDesc, layerParam.name().c_str());
 627
 628     if (!mergerLayer)
 629     {
 630         throw ParseException(
 631             boost::str(
 632                 boost::format(
 633                     "Failed to create final merger layer for Split+Convolution+Merger. "
 634                     "Layer=%1% #groups=%2% #filters=%3% %4%") %
 635                     layerParam.name() %
 636                     numGroups %
 637                     numFilters %
 638                     CHECK_LOCATION().AsString()));
 639     }
 640
 641     for (unsigned int g = 0; g < numGroups; ++g)
 642     {
 643         convLayers[g]->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(g));
 644     }
 645     mergerLayer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo(4, mergeDimSizes, DataType::Float32));
 646     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), mergerLayer->GetOutputSlot(0));
 647 }
 648
 649 void CaffeParserBase::AddConvLayerWithDepthwiseConv(const caffe::LayerParameter& layerParam,
 650                                                     const armnn::Convolution2dDescriptor& convDesc,
 651                                                     unsigned int kernelW,
 652                                                     unsigned int kernelH)
 653 {
 654     BOOST_ASSERT(layerParam.type() == "Convolution");
 655     ValidateNumInputsOutputs(layerParam, 1, 1);
 656
 657     ConvolutionParameter convParam  = layerParam.convolution_param();
 658     BlobShape inputShape = TensorDescToBlobShape(GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo());
 659
 660     DepthwiseConvolution2dDescriptor desc;
 661     desc.m_PadLeft      = convDesc.m_PadLeft;
 662     desc.m_PadRight     = convDesc.m_PadRight;
 663     desc.m_PadTop       = convDesc.m_PadTop;
 664     desc.m_PadBottom    = convDesc.m_PadBottom;
 665     desc.m_StrideX      = convDesc.m_StrideX;
 666     desc.m_StrideY      = convDesc.m_StrideY;
 667     desc.m_BiasEnabled  = convDesc.m_BiasEnabled;
 668
 669     unsigned int numFilters = convParam.num_output();
 670
 671     BlobShape outputShape;
 672     outputShape.add_dim(0);
 673     outputShape.set_dim(0, inputShape.dim(0));
 674     outputShape.add_dim(1);
 675     outputShape.set_dim(1, numFilters);
 676     outputShape.add_dim(2);
 677     outputShape.set_dim(
 678         2, (static_cast<int>(
 679                 static_cast<float>(inputShape.dim(2) + 2 * desc.m_PadBottom - kernelH) /
 680                 static_cast<float>(desc.m_StrideY)) + 1));
 681     outputShape.add_dim(3);
 682     outputShape.set_dim(
 683         3, (static_cast<int>(
 684                 static_cast<float>(inputShape.dim(3) + 2 * desc.m_PadRight - kernelW) /
 685                 static_cast<float>(desc.m_StrideX)) + 1));
 686
 687     // Load the weight data
 688     size_t allWeightsSize = boost::numeric_cast<size_t>(inputShape.dim(1) * kernelH * kernelW);
 689     vector<float> weightData(allWeightsSize);
 690
 691     GetDataFromBlob(layerParam, weightData, 0);
 692
 693     // depth multiplier will be 1 for the depthwise convolution
 694     const unsigned int weightDimSizes[4] = {
 695         static_cast<unsigned int>(1),                 // depth multiplier
 696         static_cast<unsigned int>(inputShape.dim(1)), // #channels
 697         kernelH,
 698         kernelW};
 699
 700     armnn::IConnectableLayer* returnLayer = nullptr;
 701     ConstTensor weights(TensorInfo(4, weightDimSizes, DataType::Float32), weightData.data());
 702
 703     if (desc.m_BiasEnabled)
 704     {
 705         TensorInfo biasInfo;
 706         vector<float> biasData;
 707
 708         biasData.resize(boost::numeric_cast<size_t>(outputShape.dim(1)), 1.f);
 709         GetDataFromBlob(layerParam, biasData, 1);
 710
 711         const unsigned int biasDimSizes[1] = {static_cast<unsigned int>(outputShape.dim(1))};
 712         biasInfo = TensorInfo(1, biasDimSizes, DataType::Float32);
 713
 714         ConstTensor biases(biasInfo, biasData.data());
 715         returnLayer = m_Network->AddDepthwiseConvolution2dLayer(desc, weights, biases, layerParam.name().c_str());
 716     }
 717     else
 718     {
 719         returnLayer = m_Network->AddDepthwiseConvolution2dLayer(desc, weights, layerParam.name().c_str());
 720     }
 721
 722     if (!returnLayer)
 723     {
 724         throw ParseException(
 725             boost::str(
 726                 boost::format(
 727                     "Failed to create depthwise convolution layer. "
 728                     "Layer=%1% #filters=%2% %3%") %
 729                     layerParam.name() %
 730                     numFilters %
 731                     CHECK_LOCATION().AsString()));
 732     }
 733     armnn::IOutputSlot& inputConnection = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
 734     inputConnection.Connect(returnLayer->GetInputSlot(0));
 735     returnLayer->GetOutputSlot(0).SetTensorInfo(BlobShapeToTensorInfo(outputShape));
 736     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), returnLayer->GetOutputSlot(0));
 737 }
 738
 739 void CaffeParserBase::ParseConvLayer(const LayerParameter& layerParam)
 740 {
 741     // Ignored Caffe Parameters
 742     // * Dilation Size
 743     // * Weight Filler
 744     // * Bias Filler
 745     // * Engine
 746     // * Force nd_im2col
 747     // * Axis
 748
 749     // Not Available ArmNN Interface Parameters
 750     // * Rounding policy;
 751
 752     BOOST_ASSERT(layerParam.type() == "Convolution");
 753     ValidateNumInputsOutputs(layerParam, 1, 1);
 754
 755     ConvolutionParameter convParam = layerParam.convolution_param();
 756     BlobShape inputShape = TensorDescToBlobShape(GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo());
 757     const unsigned int numGroups = convParam.has_group() ? convParam.group() : 1;
 758     unsigned int numFilters = convParam.num_output();
 759
 760     const auto notFound = std::numeric_limits<unsigned int>::max();
 761
 762     unsigned int kernelH = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
 763                                                              kernel_h, kernel_size, unsigned int, notFound);
 764     unsigned int kernelW = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
 765                                                              kernel_w, kernel_size, unsigned int, notFound);
 766
 767     unsigned int strideH = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
 768                                                              stride_h, stride, unsigned int, 1u);
 769     unsigned int strideW = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
 770                                                              stride_w, stride, unsigned int, 1u);
 771
 772     unsigned int padH = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
 773                                                           pad_h, pad, unsigned int, 0u);
 774     unsigned int padW = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
 775                                                           pad_w, pad, unsigned int, 0u);
 776
 777     VALIDATE_EQUAL_VALUES_IN_RANGE(kernelH, kernelW, 0, 11);
 778     VALIDATE_EQUAL_VALUES_IN_RANGE(strideH, strideW, 0, 11);
 779     VALIDATE_EQUAL_VALUES_IN_RANGE(padH, padW, 0, 11);
 780
 781     Convolution2dDescriptor convolution2dDescriptor;
 782     convolution2dDescriptor.m_PadLeft     = padW;
 783     convolution2dDescriptor.m_PadRight    = padW;
 784     convolution2dDescriptor.m_PadTop      = padH;
 785     convolution2dDescriptor.m_PadBottom   = padH;
 786     convolution2dDescriptor.m_StrideX     = strideW;
 787     convolution2dDescriptor.m_StrideY     = strideH;
 788     convolution2dDescriptor.m_BiasEnabled = convParam.has_bias_term() ? convParam.bias_term() : true;
 789
 790     if (numGroups > numFilters)
 791     {
 792         throw ParseException(
 793             boost::str(
 794                 boost::format(
 795                     "Error parsing Convolution: %1%. "
 796                     "The 'group'=%2% parameter cannot be larger than the "
 797                     "number of filters supplied ='%3%'. %4%") %
 798                     layerParam.name() %
 799                     numGroups %
 800                     numFilters %
 801                     CHECK_LOCATION().AsString()));
 802     }
 803
 804     if (inputShape.dim_size() != 4)
 805     {
 806         throw ParseException(
 807             boost::str(
 808                 boost::format(
 809                     "Convolution input shape is expected to have 4 dimensions. "
 810                     "%1%'s input has only %2%. %3%") %
 811                     layerParam.name() %
 812                     inputShape.dim_size() %
 813                     CHECK_LOCATION().AsString()));
 814     }
 815
 816     if (numGroups > 1)
 817     {
 818         if (numGroups > inputShape.dim(1))
 819         {
 820             throw ParseException(
 821                 boost::str(
 822                     boost::format(
 823                         "Error parsing Convolution: %1%. "
 824                         "The 'group'=%2% parameter cannot be larger than the "
 825                         "channel of the input shape=%3% (in NCHW format). %4%") %
 826                         layerParam.name() %
 827                         numGroups %
 828                         inputShape.dim(1) %
 829                         CHECK_LOCATION().AsString()));
 830         }
 831         else if (numGroups == inputShape.dim(1))
 832         {
 833             // we use a depthwise convolution here, because the number of groups equals to the
 834             // input channels
 835             AddConvLayerWithDepthwiseConv(layerParam, convolution2dDescriptor, kernelW, kernelH);
 836             return;
 837         }
 838         else
 839         {
 840             // we split the input by channels into channels/groups separate convolutions
 841             // and merger the results afterwards
 842             AddConvLayerWithSplits(layerParam, convolution2dDescriptor, kernelW, kernelH);
 843             return;
 844         }
 845     }
 846
 847     // NOTE: at this point we only need to handle #group=1 case, all other cases should be
 848     //       handled by the AddConvLayer* helpers
 849
 850     // Populate convolution output tensor descriptor dimensions
 851     BlobShape outputShape;
 852     outputShape.add_dim(0);
 853     outputShape.set_dim(0, inputShape.dim(0));
 854     outputShape.add_dim(1);
 855     outputShape.set_dim(1, numFilters);
 856     outputShape.add_dim(2);
 857     outputShape.set_dim(
 858         2, (static_cast<int>(
 859                 static_cast<float>(inputShape.dim(2) + 2 * padH - kernelH) /
 860                 static_cast<float>(strideH)) + 1));
 861     outputShape.add_dim(3);
 862     outputShape.set_dim(
 863         3, (static_cast<int>(
 864                 static_cast<float>(inputShape.dim(3) + 2 * padW - kernelW) /
 865                 static_cast<float>(strideW)) + 1));
 866
 867     // Load the weight data for ALL groups
 868     vector<float> weightData(boost::numeric_cast<size_t>(inputShape.dim(1) *
 869                                                          outputShape.dim(1) *
 870                                                          kernelH *
 871                                                          kernelW));
 872     GetDataFromBlob(layerParam, weightData, 0);
 873
 874     const unsigned int weightDimSizes[4] = {
 875         static_cast<unsigned int>(outputShape.dim(1)), // output channels
 876         static_cast<unsigned int>(inputShape.dim(1)),  // input channels
 877         kernelH,
 878         kernelW};
 879
 880     armnn::IConnectableLayer* returnLayer = nullptr;
 881
 882     // Pull out the weights for this group from that loaded from the model file earlier
 883     ConstTensor weights(TensorInfo(4, weightDimSizes, DataType::Float32), weightData.data());
 884
 885     if (convolution2dDescriptor.m_BiasEnabled)
 886     {
 887         TensorInfo biasInfo;
 888         vector<float> biasData;
 889
 890         biasData.resize(boost::numeric_cast<size_t>(outputShape.dim(1)), 1.f);
 891         GetDataFromBlob(layerParam, biasData, 1);
 892
 893         const unsigned int biasDimSizes[1] = {static_cast<unsigned int>(outputShape.dim(1))};
 894         biasInfo = TensorInfo(1, biasDimSizes, DataType::Float32);
 895
 896         // Pull out the biases for this group from that loaded from the model file earlier
 897         ConstTensor biases(biasInfo, biasData.data());
 898
 899         returnLayer =
 900             m_Network->AddConvolution2dLayer(convolution2dDescriptor, weights, biases, layerParam.name().c_str());
 901     }
 902     else
 903     {
 904         returnLayer = m_Network->AddConvolution2dLayer(convolution2dDescriptor, weights, layerParam.name().c_str());
 905     }
 906
 907     armnn::IOutputSlot& inputConnection = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
 908     inputConnection.Connect(returnLayer->GetInputSlot(0));
 909     returnLayer->GetOutputSlot(0).SetTensorInfo(BlobShapeToTensorInfo(outputShape));
 910
 911     if (!returnLayer)
 912     {
 913         throw ParseException(
 914             boost::str(
 915                 boost::format(
 916                     "Failed to create Convolution layer. "
 917                     "Layer=%1% #groups=%2% #filters=%3% %4%") %
 918                     layerParam.name() %
 919                     numGroups %
 920                     numFilters %
 921                     CHECK_LOCATION().AsString()));
 922     }
 923
 924     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), returnLayer->GetOutputSlot(0));
 925 }
 926
 927 void CaffeParserBase::ParsePoolingLayer(const LayerParameter& layerParam)
 928 {
 929     // Ignored Caffe Parameters
 930     //      Stochastic Pooling
 931     //      Engine
 932
 933     ValidateNumInputsOutputs(layerParam, 1, 1);
 934     PoolingParameter param = layerParam.pooling_param();
 935     const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
 936
 937     const auto notFound = std::numeric_limits<unsigned int>::max();
 938
 939     unsigned int kernel_h = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
 940                                                        kernel_h, kernel_size, unsigned int, notFound);
 941     unsigned int kernel_w = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
 942                                                        kernel_w, kernel_size, unsigned int, notFound);
 943
 944     if ((kernel_h == notFound || kernel_w == notFound) && param.has_global_pooling())
 945     {
 946         kernel_h = inputInfo.GetShape()[2];
 947         kernel_w = inputInfo.GetShape()[3];
 948     }
 949
 950     VALIDATE_EQUAL_VALUES_IN_RANGE(kernel_h, kernel_w, 0, 11);
 951
 952     unsigned int stride_h = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
 953                                                        stride_h, stride, unsigned int, notFound);
 954     unsigned int stride_w = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
 955                                                        stride_h, stride, unsigned int, notFound);
 956
 957     if ((stride_h == notFound || stride_w == notFound) && param.has_global_pooling())
 958     {
 959         stride_h = 1;
 960         stride_w = 1;
 961     }
 962
 963     VALIDATE_EQUAL_VALUES_IN_RANGE(stride_h, stride_w, 0, 11);
 964
 965     unsigned int pad_h = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
 966                                                     pad_h, pad, unsigned int, 0u);
 967     unsigned int pad_w = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
 968                                                     pad_w, pad, unsigned int, 0u);
 969
 970     VALIDATE_EQUAL_VALUES_IN_RANGE(pad_h, pad_w, 0, 11);
 971
 972     // Populate Weight and Bias Filter Descriptor
 973     Pooling2dDescriptor pooling2dDescriptor;
 974     if (param.has_pool())
 975     {
 976         PoolingParameter_PoolMethod p = param.pool();
 977         switch (p)
 978         {
 979             case PoolingParameter_PoolMethod_MAX:
 980             {
 981                 pooling2dDescriptor.m_PoolType = PoolingAlgorithm::Max;
 982                 break;
 983             }
 984             case PoolingParameter_PoolMethod_AVE:
 985             {
 986                 pooling2dDescriptor.m_PoolType = PoolingAlgorithm::Average;
 987                 break;
 988             }
 989             case PoolingParameter_PoolMethod_STOCHASTIC:
 990             {
 991                 throw ParseException(
 992                     boost::str(
 993                         boost::format(
 994                             "Pooling Layer: Stochastic Pooling Not Supported. Layer=%1% %2%") %
 995                             layerParam.name() %
 996                             CHECK_LOCATION().AsString()));
 997             }
 998             default:
 999             {
1000                 throw ParseException(
1001                     boost::str(
1002                         boost::format(
1003                             "Pooling Layer: unknown pooling method: %1% for layer: %2% %3%") %
1004                             p %
1005                             layerParam.name() %
1006                             CHECK_LOCATION().AsString()));
1007             }
1008         }
1009     }
1010     else
1011     {
1012         throw ParseException(
1013             boost::str(
1014                 boost::format(
1015                     "No Pooling Method Defined for %1% %2%") %
1016                     layerParam.name() %
1017                     CHECK_LOCATION().AsString()));
1018     }
1019
1020     pooling2dDescriptor.m_PadLeft     = pad_w;
1021     pooling2dDescriptor.m_PadRight    = pad_w;
1022     pooling2dDescriptor.m_PadTop      = pad_h;
1023     pooling2dDescriptor.m_PadBottom   = pad_h;
1024     pooling2dDescriptor.m_StrideX     = stride_w;
1025     pooling2dDescriptor.m_StrideY     = stride_h;
1026     pooling2dDescriptor.m_PoolWidth   = kernel_w;
1027     pooling2dDescriptor.m_PoolHeight  = kernel_h;
1028
1029     pooling2dDescriptor.m_OutputShapeRounding = OutputShapeRounding::Ceiling;
1030     pooling2dDescriptor.m_PaddingMethod  = PaddingMethod::IgnoreValue;
1031
1032     armnn::IConnectableLayer* poolingLayer = m_Network->AddPooling2dLayer(pooling2dDescriptor,
1033         layerParam.name().c_str());
1034
1035     TensorInfo outputInfo(
1036         { inputInfo.GetShape()[0],
1037           inputInfo.GetShape()[1],
1038           static_cast<unsigned int>(ceil(
1039               static_cast<float>(inputInfo.GetShape()[2] + 2 * pad_h - kernel_h) /
1040               boost::numeric_cast<float>(stride_h))) + 1,
1041           static_cast<unsigned int>(ceil(
1042               static_cast<float>(inputInfo.GetShape()[3] + 2 * pad_w - kernel_w) /
1043               boost::numeric_cast<float>(stride_w))) + 1 },
1044         DataType::Float32);
1045
1046     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(poolingLayer->GetInputSlot(0));
1047     poolingLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
1048     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), poolingLayer->GetOutputSlot(0));
1049 }
1050
1051 void CaffeParserBase::ParseReluLayer(const LayerParameter& layerParam)
1052 {
1053     ValidateNumInputsOutputs(layerParam, 1, 1);
1054
1055     const string& name = layerParam.name();
1056     const ReLUParameter& param = layerParam.relu_param();
1057
1058     ActivationDescriptor activationDescriptor;
1059     const float negativeSlope = param.negative_slope();
1060     if (negativeSlope == 0.0f)
1061     {
1062         activationDescriptor.m_Function = ActivationFunction::ReLu;
1063     }
1064     else
1065     {
1066         activationDescriptor.m_Function = ActivationFunction::LeakyReLu;
1067         activationDescriptor.m_A = negativeSlope;
1068     }
1069
1070     const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1071     IConnectableLayer* const activationLayer = m_Network->AddActivationLayer(activationDescriptor, name.c_str());
1072     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(activationLayer->GetInputSlot(0));
1073     activationLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1074     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), activationLayer->GetOutputSlot(0));
1075 }
1076
1077 void CaffeParserBase::ParseLRNLayer(const LayerParameter& layerParam)
1078 {
1079     ValidateNumInputsOutputs(layerParam, 1, 1);
1080
1081     LRNParameter param = layerParam.lrn_param();
1082
1083     const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1084
1085     // Ignored BATCH NORMALIZATION Caffe Parameters.
1086     // Ignored MVN Caffe Parameters.
1087     // Ignored LRN Caffe Parameters.
1088     //      Engine
1089
1090     NormalizationDescriptor normalizationDescriptor;
1091     if (param.has_norm_region())
1092     {
1093         LRNParameter_NormRegion n = param.norm_region();
1094         switch (n)
1095         {
1096             case LRNParameter_NormRegion_ACROSS_CHANNELS:
1097             {
1098                 normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Across;
1099                 break;
1100             }
1101             case LRNParameter_NormRegion_WITHIN_CHANNEL:
1102             {
1103                 normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Within;
1104                 break;
1105             }
1106             default:
1107             {
1108                 throw ParseException(
1109                     boost::str(
1110                         boost::format(
1111                             "Unknown region %1% for LRN layer %2% %3%") %
1112                             n %
1113                             layerParam.name() %
1114                             CHECK_LOCATION().AsString()));
1115             }
1116         }
1117     }
1118     else
1119     {
1120         // Caffe defaults to normalization across channels.
1121         normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Across;
1122     }
1123
1124     normalizationDescriptor.m_NormMethodType = NormalizationAlgorithmMethod::LocalBrightness;
1125     if (param.has_local_size())
1126     {
1127         normalizationDescriptor.m_NormSize = param.local_size();
1128     }
1129     else
1130     {
1131         throw ParseException(
1132             boost::str(
1133                 boost::format(
1134                     "local_size not defined for LRN layer %1% %2%") %
1135                     layerParam.name() %
1136                     CHECK_LOCATION().AsString()));
1137     }
1138
1139     if (param.has_alpha())
1140     {
1141         normalizationDescriptor.m_Alpha = param.alpha();
1142         normalizationDescriptor.m_Alpha /= boost::numeric_cast<float>(param.local_size());
1143     }
1144     else
1145     {
1146         throw ParseException(
1147             boost::str(
1148                 boost::format(
1149                     "Alpha not defined for LRN layer %1% %2%") %
1150                     layerParam.name() %
1151                     CHECK_LOCATION().AsString()));
1152     }
1153     if (param.has_beta())
1154     {
1155         normalizationDescriptor.m_Beta = param.beta();
1156     }
1157     else
1158     {
1159         throw ParseException(
1160             boost::str(
1161                 boost::format(
1162                     "Beta not defined for LRN layer %1% %2%") %
1163                     layerParam.name() %
1164                     CHECK_LOCATION().AsString()));
1165     }
1166
1167     if (param.has_k())
1168     {
1169         normalizationDescriptor.m_K = param.k();
1170     }
1171     else
1172     {
1173         normalizationDescriptor.m_K = 1;
1174     }
1175
1176     IConnectableLayer* const normLayer = m_Network->AddNormalizationLayer(normalizationDescriptor,
1177         layerParam.name().c_str());
1178     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(normLayer->GetInputSlot(0));
1179     normLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1180
1181     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), normLayer->GetOutputSlot(0));
1182 }
1183
1184 void CaffeParserBase::ParseInnerProductLayer(const LayerParameter& layerParam)
1185 {
1186     InnerProductParameter param = layerParam.inner_product_param();
1187
1188     ValidateNumInputsOutputs(layerParam, 1, 1);
1189
1190     unsigned int outputSize = param.num_output();
1191
1192     // Ignored Caffe Parameters:
1193     // Weight Filler
1194     // Bias Filler
1195     // Engine
1196     // Axis
1197
1198     FullyConnectedDescriptor tensorFullyConnectedDescriptor;
1199
1200     if (param.has_transpose())
1201     {
1202         // If true, assumes transposed weights.
1203         tensorFullyConnectedDescriptor.m_TransposeWeightMatrix = param.transpose();
1204     }
1205     else
1206     {
1207         // Caffe defaults to transposed.
1208         tensorFullyConnectedDescriptor.m_TransposeWeightMatrix = true;
1209     }
1210
1211     const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1212
1213     TensorInfo weightInfo;
1214     TensorInfo biasInfo;
1215
1216     // Allows implicit flattening of extra dimensions.
1217     unsigned int inputSize = inputInfo.GetShape()[1];
1218     for (unsigned int i = 2; i < inputInfo.GetNumDimensions(); ++i)
1219     {
1220         inputSize *= inputInfo.GetShape()[i];
1221     }
1222
1223     const float* weightDataPtr = GetArrayPtrFromBlob(layerParam, 0);
1224     const unsigned int swTD[2] = { outputSize, inputSize };
1225     ConstTensor weights(TensorInfo(2, swTD, DataType::Float32), weightDataPtr);
1226
1227     tensorFullyConnectedDescriptor.m_BiasEnabled = true;
1228     // Todo: check whether bias enabled.
1229     armnn::IConnectableLayer* fullyConnectedLayer = nullptr;
1230     if (tensorFullyConnectedDescriptor.m_BiasEnabled)
1231     {
1232         // BIAS VALUE
1233         const float* biasDataPtr = GetArrayPtrFromBlob(layerParam, 1);
1234
1235         const unsigned int sbTD[1] = { outputSize };
1236
1237         ConstTensor biases(TensorInfo(1, sbTD, DataType::Float32), biasDataPtr);
1238
1239         fullyConnectedLayer = m_Network->AddFullyConnectedLayer(tensorFullyConnectedDescriptor, weights, biases,
1240             layerParam.name().c_str());
1241     }
1242     else
1243     {
1244         fullyConnectedLayer = m_Network->AddFullyConnectedLayer(tensorFullyConnectedDescriptor, weights,
1245             layerParam.name().c_str());
1246     }
1247
1248     TensorInfo outputInfo({ inputInfo.GetShape()[0], outputSize }, DataType::Float32);
1249     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(fullyConnectedLayer->GetInputSlot(0));
1250     fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
1251     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), fullyConnectedLayer->GetOutputSlot(0));
1252 }
1253
1254 void CaffeParserBase::ParseSoftmaxLayer(const LayerParameter& layerParam)
1255 {
1256     ValidateNumInputsOutputs(layerParam, 1, 1);
1257
1258     SoftmaxParameter param = layerParam.softmax_param();
1259
1260     const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1261
1262     // Ignored Caffe Parameters:
1263     //      axis
1264     //      Engine
1265
1266     armnn::SoftmaxDescriptor softmaxDescriptor;
1267     armnn::IConnectableLayer* const softmaxLayer = m_Network->AddSoftmaxLayer(
1268         softmaxDescriptor,
1269         layerParam.name().c_str());
1270     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(softmaxLayer->GetInputSlot(0));
1271     softmaxLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1272     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), softmaxLayer->GetOutputSlot(0));
1273 }
1274
1275 void CaffeParserBase::ParseEltwiseLayer(const LayerParameter& layerParam)
1276 {
1277     ValidateNumInputsOutputs(layerParam, 2, 1);
1278
1279     const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1280
1281     // Ignored Caffe Parameters:
1282     //      coeff
1283
1284     EltwiseParameter_EltwiseOp operation = EltwiseParameter_EltwiseOp_SUM; // Defaults to sum as per caffe.
1285
1286     if (layerParam.has_eltwise_param() && layerParam.eltwise_param().has_operation())
1287     {
1288         operation = layerParam.eltwise_param().operation();
1289     }
1290
1291     armnn::IConnectableLayer* newLayer = nullptr;
1292     switch (operation)
1293     {
1294         case EltwiseParameter_EltwiseOp_SUM:
1295         {
1296             newLayer = m_Network->AddAdditionLayer(layerParam.name().c_str());
1297             break;
1298         }
1299         case EltwiseParameter_EltwiseOp_PROD:
1300         {
1301             newLayer = m_Network->AddMultiplicationLayer(layerParam.name().c_str());
1302             break;
1303         }
1304         default:
1305         {
1306             throw ParseException(
1307                 boost::str(
1308                     boost::format(
1309                         "Unsupported operation %1% in Eltwise layer %2% %3%") %
1310                         operation %
1311                         layerParam.name() %
1312                         CHECK_LOCATION().AsString()));
1313         }
1314     }
1315
1316     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(newLayer->GetInputSlot(0));
1317     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(1)).Connect(newLayer->GetInputSlot(1));
1318     newLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1319     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), newLayer->GetOutputSlot(0));
1320 }
1321
1322 void CaffeParserBase::ParseConcatLayer(const LayerParameter& layerParam)
1323 {
1324     unsigned int numInputs = static_cast<unsigned int>(layerParam.bottom_size());
1325     // We assume concat happens along the channel dimension, which is 1 in (0, 1, 2, 3).
1326     unsigned int concatDim = 1;
1327     unsigned int numOfDims = 4;
1328
1329     // we only consider 4-D tensor here
1330     OriginsDescriptor concatDescriptor(static_cast<uint32_t>(numInputs), numOfDims);
1331     std::vector<unsigned int>mergeDimSizes(numOfDims, 0u);
1332
1333     unsigned int mergeDim = 0;
1334     for (unsigned int viewIndex = 0; viewIndex < numInputs; ++viewIndex)
1335     {
1336         const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(
1337             layerParam.bottom(boost::numeric_cast<int>(viewIndex))).GetTensorInfo();
1338         // Checks whether the dimensions of the input tensors are actually 4.
1339         if (inputInfo.GetNumDimensions()!=4)
1340         {
1341             throw ParseException(
1342                 boost::str(
1343                     boost::format(
1344                         "The number of dimensions for input tensors of "
1345                         "the concatenation op should be 4. Inputs of %1% has "
1346                         "%2% dimensions. %3%") %
1347                         layerParam.name() %
1348                         inputInfo.GetNumDimensions() %
1349                         CHECK_LOCATION().AsString()));
1350         }
1351
1352         mergeDimSizes[0] = inputInfo.GetShape()[0];
1353         mergeDimSizes[1] = inputInfo.GetShape()[1];
1354         mergeDimSizes[2] = inputInfo.GetShape()[2];
1355         mergeDimSizes[3] = inputInfo.GetShape()[3];
1356
1357         for (unsigned int j = 0; j < concatDim; ++j)
1358         {
1359             concatDescriptor.SetViewOriginCoord(viewIndex, j, 0);
1360         }
1361
1362         concatDescriptor.SetViewOriginCoord(viewIndex, concatDim, mergeDim);
1363         mergeDim += mergeDimSizes[concatDim];
1364
1365         for (unsigned int j = concatDim+1; j < numOfDims; ++j)
1366         {
1367             concatDescriptor.SetViewOriginCoord(viewIndex, j, 0);
1368         }
1369     }
1370     mergeDimSizes[concatDim] = mergeDim;
1371
1372     armnn::IConnectableLayer* concatlayer = m_Network->AddMergerLayer(concatDescriptor, layerParam.name().c_str());
1373     for (unsigned int i = 0; i < numInputs; ++i)
1374     {
1375         armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(boost::numeric_cast<int>(i)));
1376         outputSlot.Connect(concatlayer->GetInputSlot(i));
1377     }
1378
1379     concatlayer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo(numOfDims, mergeDimSizes.data(), DataType::Float32));
1380     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), concatlayer->GetOutputSlot(0));
1381 }
1382
1383 void CaffeParserBase::ParseBatchNormLayer(const LayerParameter& layerParam)
1384 {
1385     ValidateNumInputsOutputs(layerParam, 1, 1);
1386
1387     const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1388
1389     string name = layerParam.name();
1390
1391     BatchNormParameter param = layerParam.batch_norm_param();
1392     // If use_global_stats is not explicitly set in the model, assume it to be true (its default value
1393     // when the network is in the testing phase).
1394     if (param.has_use_global_stats())
1395     {
1396         if (!param.use_global_stats())
1397         {
1398             throw ParseException(
1399                 boost::str(
1400                     boost::format(
1401                         "Error parsing Batch Norm layer '%1%': "
1402                         "Parameter 'use_global_stats' is set to false, which is "
1403                         "unsupported (value used for training). %2%") %
1404                         name %
1405                         CHECK_LOCATION().AsString()));
1406         }
1407     }
1408
1409     BatchNormalizationDescriptor desc;
1410     desc.m_Eps = param.eps();
1411
1412     unsigned int channels = inputInfo.GetShape()[1];
1413     unsigned int shape[]  = {channels};
1414
1415     vector<float> meanData(channels);
1416     GetDataFromBlob(layerParam, meanData, 0);
1417
1418     vector<float> varianceData(channels);
1419     GetDataFromBlob(layerParam, varianceData, 1);
1420
1421     // Reads moving average factor and applies scaling (if required).
1422     const BlobProto& blob = layerParam.blobs(boost::numeric_cast<int>(2));
1423     const float movingAverageFactor = blob.data(boost::numeric_cast<int>(0));
1424     if(movingAverageFactor != 0.0f)
1425     {
1426         const float scaleFactor = 1.0f / movingAverageFactor;
1427         auto scaleFunction = [scaleFactor](float f) -> float { return f * scaleFactor; };
1428
1429         std::transform(varianceData.begin(), varianceData.end(), varianceData.begin(), scaleFunction);
1430         std::transform(meanData.begin(), meanData.end(), meanData.begin(), scaleFunction);
1431     }
1432
1433     // Identifies scale operation.
1434     vector<float> betaData(channels, 0.0f);
1435     vector<float> gammaData(channels, 1.0f);
1436
1437     ConstTensor mean(TensorInfo(1, shape, armnn::DataType::Float32), meanData);
1438     ConstTensor variance(TensorInfo(1, shape, armnn::DataType::Float32), varianceData);
1439     ConstTensor beta(TensorInfo(1, shape, armnn::DataType::Float32), betaData);
1440     ConstTensor gamma(TensorInfo(1, shape, armnn::DataType::Float32), gammaData);
1441
1442     armnn::IConnectableLayer* const batchNormLayer = m_Network->AddBatchNormalizationLayer(desc,
1443         mean, variance, beta, gamma, name.c_str());
1444     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(batchNormLayer->GetInputSlot(0));
1445     batchNormLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1446     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), batchNormLayer->GetOutputSlot(0));
1447 }
1448
1449 void CaffeParserBase::ParseScaleLayer(const LayerParameter& layerParam)
1450 {
1451     // Current unoptimal solution: add a batchnormalization layer with 0 mean and 1 variance.
1452     ValidateNumInputsOutputs(layerParam, 1, 1);
1453
1454     const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1455
1456     string name = layerParam.name();
1457
1458     ScaleParameter param = layerParam.scale_param();
1459     if (param.axis() != 1)
1460     {
1461         // Would have to use something other than BatchNormalizationLayer in this case
1462         throw ParseException(
1463             boost::str(
1464                 boost::format(
1465                     "Loading Scale Layer: Only axis 1 is supported currently. "
1466                     "Layer=%1% Axis=%2% %3%") %
1467                     layerParam.name() %
1468                     param.axis() %
1469                     CHECK_LOCATION().AsString()));
1470     }
1471
1472     unsigned int     channels = inputInfo.GetShape()[1];
1473     unsigned int     shape[]  = {channels};
1474
1475     BatchNormalizationDescriptor desc;
1476     desc.m_Eps = 0.0f; // Don't need epsilon if variance is 1.
1477     vector<float> meanData(channels, 0.0f);
1478     vector<float> varianceData(channels, 1.0f);
1479     vector<float> betaData(channels, 0.0f);
1480     vector<float> gammaData(channels);
1481
1482     GetDataFromBlob(layerParam, gammaData, 0);
1483
1484     if(param.has_bias_term())
1485     {
1486         GetDataFromBlob(layerParam, betaData, 1);
1487     }
1488
1489     ConstTensor mean(TensorInfo(1, shape, armnn::DataType::Float32), meanData);
1490     ConstTensor variance(TensorInfo(1, shape, armnn::DataType::Float32), varianceData);
1491     ConstTensor beta(TensorInfo(1, shape, armnn::DataType::Float32), betaData);
1492     ConstTensor gamma(TensorInfo(1, shape, armnn::DataType::Float32), gammaData);
1493
1494     armnn::IConnectableLayer* const batchNormLayer = m_Network->AddBatchNormalizationLayer(desc,
1495         mean, variance, beta, gamma, name.c_str());
1496     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(batchNormLayer->GetInputSlot(0));
1497     batchNormLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1498     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), batchNormLayer->GetOutputSlot(0));
1499 }
1500
1501 void CaffeParserBase::ParseSplitLayer(const caffe::LayerParameter& layerParam)
1502 {
1503     // Used in caffe to duplicate memory - not necessary in armnn.
1504     if (layerParam.bottom_size() != 1)
1505     {
1506         throw ParseException(
1507             boost::str(
1508                 boost::format(
1509                     "Split layer '%1%' should have exactly 1 bottom. "
1510                     "#bottoms=%2% %3%") %
1511                     layerParam.name() %
1512                     layerParam.bottom_size() %
1513                     CHECK_LOCATION().AsString()));
1514     }
1515     armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
1516     for (int i = 0; i < layerParam.top_size(); i++)
1517     {
1518         SetArmnnOutputSlotForCaffeTop(layerParam.top(i), outputSlot);
1519     }
1520 }
1521
1522 void CaffeParserBase::ParseDropoutLayer(const caffe::LayerParameter& layerParam)
1523 {
1524     // Ignored for inference, so patch the single input to its single output.
1525     if (layerParam.bottom_size() != 1 || layerParam.top_size() != 1)
1526     {
1527         throw ParseException(
1528             boost::str(
1529                 boost::format(
1530                     "Dropout layer '%1%' should have exactly 1 bottom and 1 top. "
1531                     "#bottoms=%2% #tops=%3% %4%") %
1532                     layerParam.name() %
1533                     layerParam.bottom_size() %
1534                     layerParam.top_size() %
1535                     CHECK_LOCATION().AsString()));
1536     }
1537     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)));
1538 }
1539
1540 void CaffeParserBase::TrackInputBinding(armnn::IConnectableLayer* layer,
1541     armnn::LayerBindingId id,
1542     const armnn::TensorInfo& tensorInfo)
1543 {
1544     return TrackBindingPoint(layer, id, tensorInfo, layer->GetName(), m_NetworkInputsBindingInfo);
1545 }
1546
1547 void CaffeParserBase::TrackOutputBinding(armnn::IConnectableLayer* layer,
1548     armnn::LayerBindingId id,
1549     const armnn::TensorInfo& tensorInfo)
1550 {
1551     return TrackBindingPoint(layer, id, tensorInfo, layer->GetName(), m_NetworkOutputsBindingInfo);
1552 }
1553
1554 void CaffeParserBase::TrackBindingPoint(armnn::IConnectableLayer* layer,
1555     armnn::LayerBindingId id,
1556     const armnn::TensorInfo& tensorInfo,
1557     const char* bindingPointDesc,
1558     std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo)
1559 {
1560     const std::string layerName = layer->GetName();
1561     auto it = nameToBindingInfo.find(layerName);
1562     if (it == nameToBindingInfo.end())
1563     {
1564         nameToBindingInfo[layerName] = std::make_pair(id, tensorInfo);
1565     }
1566     else
1567     {
1568         throw ParseException(
1569             boost::str(
1570                 boost::format(
1571                     "Id %1% used by more than one %2% layer %3%") %
1572                     id %
1573                     bindingPointDesc %
1574                     CHECK_LOCATION().AsString()));
1575     }
1576 }
1577
1578 armnn::IOutputSlot& CaffeParserBase::GetArmnnOutputSlotForCaffeTop(const std::string& caffeTopName) const
1579 {
1580     auto it = m_ArmnnOutputSlotForCaffeTop.find(caffeTopName);
1581     if (it != m_ArmnnOutputSlotForCaffeTop.end())
1582     {
1583         return *it->second;
1584     }
1585     else
1586     {
1587         throw ParseException(
1588             boost::str(
1589                 boost::format(
1590                     "Could not find armnn output slot for Caffe top '%1%' %2%") %
1591                     caffeTopName %
1592                     CHECK_LOCATION().AsString()));
1593     }
1594 }
1595
1596 void CaffeParserBase::SetArmnnOutputSlotForCaffeTop(
1597     const std::string& caffeTopName, armnn::IOutputSlot& armnnOutputSlot)
1598 {
1599     auto it = m_ArmnnOutputSlotForCaffeTop.find(caffeTopName);
1600     if (it == m_ArmnnOutputSlotForCaffeTop.end())
1601     {
1602         m_ArmnnOutputSlotForCaffeTop[caffeTopName] = &armnnOutputSlot;
1603     }
1604     else
1605     {
1606         throw ParseException(
1607             boost::str(
1608                 boost::format(
1609                     "Attempting to add duplicate entry for Caffe top '%1%' %2%") %
1610                     caffeTopName %
1611                     CHECK_LOCATION().AsString()));
1612     }
1613 }
1614
1615 // Note: can move to CaffeParser when/if we optimise the text/string format
1616 //       to load on a layer by layer basis
1617 void CaffeParserBase::ResolveInPlaceLayers(caffe::NetParameter& netParameter)
1618 {
1619     // Finds layers with the same top.
1620     std::map<std::string, std::vector<caffe::LayerParameter*>> layersByTop;
1621     for (int layerIdx = 0; layerIdx < netParameter.layer_size(); ++layerIdx)
1622     {
1623         caffe::LayerParameter& layer = *netParameter.mutable_layer(layerIdx);
1624         std::string name = layer.name();
1625         for (int i = 0; i < layer.top_size(); ++i)
1626         {
1627             layersByTop[layer.top(i)].push_back(&layer);
1628         }
1629     }
1630
1631     // For each set of layers with the same top, resolves them to a linear chain rather than in-place layers.
1632     // Note that for 'regular' layers, there will be a single layer in each group and so this will be a no-op.
1633     for (auto layersWithSameTopIt : layersByTop)
1634     {
1635         const std::string& top = layersWithSameTopIt.first;
1636         const std::vector<caffe::LayerParameter*>& layersWithSameTop = layersWithSameTopIt.second;
1637
1638         // Chains the layers together in the order that they are listed in the prototxt (hopefully this is correct).
1639         // Note that the last layer will not have its top modified so that other layers will continue to reference it.
1640         for (unsigned int layerIdx = 0; layerIdx < layersWithSameTop.size() - 1; ++layerIdx)
1641         {
1642             caffe::LayerParameter& layer1 = *layersWithSameTop[layerIdx];
1643             caffe::LayerParameter& layer2 = *layersWithSameTop[layerIdx+1];
1644             if (layer1.top_size() != 1)
1645             {
1646                 throw ParseException(
1647                     boost::str(
1648                         boost::format(
1649                             "Node '%1%' is an in-place layer but doesn't have exactly one "
1650                             "top. It has %2% instead. %3%") %
1651                             layer1.name() %
1652                             layer1.top_size() %
1653                             CHECK_LOCATION().AsString()));
1654             }
1655             std::string newTop = layer1.name() + "_top";
1656             layer1.set_top(0, newTop);
1657             if (layer2.bottom_size() != 1 || layer2.bottom(0) != top)
1658             {
1659                 throw ParseException(
1660                     boost::str(
1661                         boost::format(
1662                             "Node '%1%' is an in-place layer but "
1663                             "doesn't have exactly one bottom, or it doesn't match its top. "
1664                             "#bottoms=%2%, first bottom is %3%, top is %4% %5%") %
1665                             layer2.name() %
1666                             layer2.bottom(0) %
1667                             top %
1668                             CHECK_LOCATION().AsString()));
1669             }
1670             layer2.set_bottom(0, newTop);
1671         }
1672     }
1673 }
1674
1675 // Note: can move to CaffeParser when/if we optimise the text/string format
1676 //       to load on a layer by layer basis
1677 void CaffeParserBase::LoadNetParam(NetParameter& netParameter)
1678 {
1679     // Caffe models sometimes have an implicit input layer.
1680     // In that case, add an explicit one.
1681     if (netParameter.input_size() > 0)
1682     {
1683         LayerParameter* newLayer = netParameter.add_layer();
1684
1685         newLayer->set_type("Input");
1686         newLayer->set_name(netParameter.input(0));
1687         newLayer->add_top(netParameter.input(0));
1688
1689         InputParameter* inputParam = newLayer->mutable_input_param();
1690         BlobShape* shape = inputParam->add_shape();
1691
1692         int dim_size = netParameter.input_dim_size();
1693         for (int i = 0; i < dim_size; ++i)
1694         {
1695             shape->add_dim(netParameter.input_dim(i));
1696         }
1697     }
1698
1699     // Replaces in-place layers with regular ones to make the rest of the parsing easier.
1700     ResolveInPlaceLayers(netParameter);
1701
1702     // Creates a lookup of Caffe layers by name.
1703     for (int i = 0; i < netParameter.layer_size(); ++i)
1704     {
1705         const caffe::LayerParameter& layer = netParameter.layer(i);
1706         for (int i = 0; i < layer.top_size(); ++i)
1707         {
1708             m_CaffeLayersByTopName[layer.top(i)] = &layer;
1709         }
1710     }
1711
1712     // Finds the output layers the user requested.
1713     std::vector<const caffe::LayerParameter*> targetLayers;
1714     for (const std::string& requestedOutputName : m_RequestedOutputs)
1715     {
1716         auto nodeIt = m_CaffeLayersByTopName.find(requestedOutputName);
1717         if (nodeIt == m_CaffeLayersByTopName.end())
1718         {
1719             throw ParseException(
1720                 boost::str(
1721                     boost::format(
1722                         "Couldn't find requested output layer '%1%' in graph %2%") %
1723                         requestedOutputName %
1724                         CHECK_LOCATION().AsString()));
1725         }
1726         targetLayers.push_back(nodeIt->second);
1727     }
1728
1729     // Sorts them into a linear ordering such that all inputs of a node are before the node itself.
1730     std::vector<const caffe::LayerParameter*> sortedNodes;
1731     if (!armnnUtils::GraphTopologicalSort<const caffe::LayerParameter*>(
1732         targetLayers,
1733         [this](const caffe::LayerParameter* node)
1734         {
1735             return GetInputs(*node);
1736         },
1737         sortedNodes))
1738     {
1739         throw ParseException(
1740             boost::str(
1741                 boost::format(
1742                     "Cycle detected in graph. #nodes: %1% %2%") %
1743                     sortedNodes.size() %
1744                     CHECK_LOCATION().AsString()));
1745     }
1746
1747     // Parses each node in order, knowing that all inputs of a node will be processed before the node itself.
1748     for (const caffe::LayerParameter* current : sortedNodes)
1749     {
1750         auto it = ms_CaffeLayerNameToParsingFunctions.find(current->type());
1751         if (it == ms_CaffeLayerNameToParsingFunctions.end())
1752         {
1753             throw ParseException(
1754                 boost::str(
1755                     boost::format("Unsupported layer type: '%1%' for layer %2% %3%") %
1756                     current->type() %
1757                     current->name() %
1758                     CHECK_LOCATION().AsString()));
1759         }
1760         auto func = it->second;
1761         (this->*func)(*current);
1762     }
1763
1764     // Adds ArmNN output layers connected to each requested output.
1765     for (const std::string& requestedOutput : m_RequestedOutputs)
1766     {
1767         armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(requestedOutput);
1768
1769         const armnn::LayerBindingId outputId = boost::numeric_cast<armnn::LayerBindingId>(
1770             m_NetworkOutputsBindingInfo.size());
1771         armnn::IConnectableLayer* const outputLayer = m_Network->AddOutputLayer(outputId, requestedOutput.c_str());
1772         outputSlot.Connect(outputLayer->GetInputSlot(0));
1773
1774         TrackOutputBinding(outputLayer, outputId, outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo());
1775     }
1776 }
1777
1778 INetworkPtr CaffeParserBase::CreateNetworkFromTextFile(const char* graphFile,
1779     const std::map<std::string, armnn::TensorShape>& inputShapes,
1780     const std::vector<std::string>& requestedOutputs)
1781 {
1782     FILE* fd = fopen(graphFile, "r");
1783
1784     if (fd == nullptr)
1785     {
1786         throw FileNotFoundException(
1787             boost::str(
1788                 boost::format(
1789                     "Failed to open graph file: %1% %2%") %
1790                     graphFile %
1791                     CHECK_LOCATION().AsString()));
1792     }
1793
1794     // Parses the file into a message.
1795     NetParameter netParam;
1796     auto         input   = new google::protobuf::io::FileInputStream(fileno(fd));
1797     bool         success = google::protobuf::TextFormat::Parse(input, &netParam);
1798     delete input;
1799     fclose(fd);
1800
1801     if (!success)
1802     {
1803         throw ParseException(
1804             boost::str(
1805                 boost::format(
1806                     "Failed to parse graph file: %1% %2%") %
1807                     graphFile %
1808                     CHECK_LOCATION().AsString()));
1809     }
1810
1811     return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs);
1812 }
1813
1814 INetworkPtr CaffeParserBase::CreateNetworkFromString(const char* protoText,
1815     const std::map<std::string, armnn::TensorShape>& inputShapes,
1816     const std::vector<std::string>& requestedOutputs)
1817 {
1818     // Parses the string into a message.
1819     NetParameter netParam;
1820     bool         success = google::protobuf::TextFormat::ParseFromString(protoText, &netParam);
1821
1822     if (!success)
1823     {
1824         throw ParseException(
1825             boost::str(
1826                 boost::format(
1827                     "Failed to parse graph string %1%") %
1828                     CHECK_LOCATION().AsString()));
1829     }
1830
1831     return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs);
1832 }
1833
1834 INetworkPtr CaffeParser::CreateNetworkFromBinaryFile(const char* graphFile,
1835     const std::map<std::string, armnn::TensorShape>& inputShapes,
1836     const std::vector<std::string>& requestedOutputs)
1837 {
1838     FILE* fd = fopen(graphFile, "rb");
1839
1840     if (fd == nullptr)
1841     {
1842         throw FileNotFoundException(
1843             boost::str(
1844                 boost::format(
1845                     "Failed to open graph file at: %1% %2%") %
1846                     graphFile %
1847                     CHECK_LOCATION().AsString()));
1848     }
1849
1850     // Parses the file into a message.
1851     NetParameter netParam;
1852
1853     FileInputStream  inStream(fileno(fd));
1854     CodedInputStream codedStream(&inStream);
1855     codedStream.SetTotalBytesLimit(INT_MAX, INT_MAX);
1856     bool success = netParam.ParseFromCodedStream(&codedStream);
1857     fclose(fd);
1858
1859     if (!success)
1860     {
1861         throw ParseException(
1862             boost::str(
1863                 boost::format(
1864                     "Failed to parse protobuf file: %1% %2%") %
1865                     graphFile %
1866                     CHECK_LOCATION().AsString()));
1867     }
1868
1869     return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs);
1870 }
1871
1872 // Note: can move to CaffeParser when/if we optimise the text/string format
1873 //       to load on a layer by layer basis
1874 INetworkPtr CaffeParserBase::CreateNetworkFromNetParameter(NetParameter& netParam,
1875     const std::map<std::string, armnn::TensorShape>& inputShapes,
1876     const std::vector<std::string>& requestedOutputs)
1877 {
1878     m_NetworkInputsBindingInfo.clear();
1879     m_NetworkOutputsBindingInfo.clear();
1880
1881     m_Network = INetwork::Create();
1882
1883     m_InputShapes = inputShapes;
1884     if (requestedOutputs.size() == 0)
1885     {
1886         throw ParseException("requestedOutputs must have at least one entry");
1887     }
1888     m_RequestedOutputs = requestedOutputs;
1889
1890     try
1891     {
1892         LoadNetParam(netParam);
1893     }
1894     catch (const ParseException& e)
1895     {
1896         Cleanup();
1897         throw e;
1898     }
1899
1900     Cleanup();
1901
1902     return move(m_Network);
1903 }
1904
1905 void CaffeParserBase::Cleanup() {
1906     // cleanup, in case we reuse this parser
1907     m_InputShapes.clear();
1908     m_RequestedOutputs.clear();
1909     m_ArmnnOutputSlotForCaffeTop.clear();
1910     // NOTE: when we get the text/string format
1911     //       optimised for memory then this data structure can
1912     //       also move to the CaffeParser class
1913     m_CaffeLayersByTopName.clear();
1914 }
1915
1916 }