src/armnnCaffeParser/CaffeParser.cpp

   1 //
   2 // Copyright © 2017 Arm Ltd. All rights reserved.
   3 // See LICENSE file in the project root for full license information.
   4 //
   5 #include "CaffeParser.hpp"
   6
   7 #include "armnn/Descriptors.hpp"
   8 #include "armnn/INetwork.hpp"
   9 #include "armnn/Utils.hpp"
  10 #include "armnn/Exceptions.hpp"
  11
  12 #include "GraphTopologicalSort.hpp"
  13
  14 #include <boost/numeric/conversion/cast.hpp>
  15 #include <boost/assert.hpp>
  16 #include <boost/format.hpp>
  17 #include <boost/log/trivial.hpp>
  18
  19 // Caffe
  20 #include "caffe/proto/caffe.pb.h"
  21
  22 // ProtoBuf
  23 #include <google/protobuf/io/coded_stream.h>
  24 #include <google/protobuf/io/zero_copy_stream.h>
  25 #include <google/protobuf/io/zero_copy_stream_impl.h>
  26 #include <google/protobuf/text_format.h>
  27 #include <google/protobuf/stubs/common.h>
  28 #include <google/protobuf/stubs/once.h>
  29 #include <google/protobuf/io/coded_stream.h>
  30 #include <google/protobuf/wire_format_lite_inl.h>
  31 #include <google/protobuf/descriptor.h>
  32 #include <google/protobuf/generated_message_reflection.h>
  33 #include <google/protobuf/reflection_ops.h>
  34 #include <google/protobuf/wire_format.h>
  35
  36 #include <cmath>
  37 #include <sstream>
  38 #include <queue>
  39 #include <fcntl.h>
  40
  41 /// Caffe networks are loaded from protobuf files (binary or text) using the protobuf library and the generated
  42 /// code from caffe.pb.h. This gives us a caffe::NetParameter which is an in-memory version of the file.
  43 /// This contains a flat list of Caffe 'layers' (e.g. convolution, pooling etc.).
  44 /// Each layer has inputs (called "bottoms") and outputs (called "tops"). Data flows from bottom to top.
  45 /// The bottoms of a layer refer to the tops of other layers, not their names.
  46 /// The names of layers seem to be arbitrary (you could rename a layer and the network wouldn't need any other changes).
  47 ///
  48 /// Some layers (e.g. Relu) can be configured so that their top and bottom are both the same. This is called an
  49 /// "in-place" layer and is a Caffe runtime feature used to reduce memory usage by modifying tensors in-place.
  50 /// This isn't relevant to the parser and so we preprocess these layers to convert them to regular layers, to result
  51 /// in a consistent graph structure.
  52
  53 namespace armnnCaffeParser
  54 {
  55
  56 using namespace armnn;
  57 using namespace caffe;
  58 using namespace std;
  59 using namespace google::protobuf::io;
  60
  61 const std::map<std::string, CaffeParser::OperationParsingFunction> CaffeParser::ms_CaffeLayerNameToParsingFunctions = {
  62     { "Input",        &CaffeParser::ParseInputLayer },
  63     { "Convolution",  &CaffeParser::ParseConvLayer },
  64     { "Pooling",      &CaffeParser::ParsePoolingLayer },
  65     { "ReLU",         &CaffeParser::ParseReluLayer },
  66     { "LRN",          &CaffeParser::ParseLRNLayer },
  67     { "InnerProduct", &CaffeParser::ParseInnerProductLayer },
  68     { "Softmax",      &CaffeParser::ParseSoftmaxLayer },
  69     { "Eltwise",      &CaffeParser::ParseEltwiseLayer },
  70     { "Concat",       &CaffeParser::ParseConcatLayer },
  71     { "BatchNorm",    &CaffeParser::ParseBatchNormLayer },
  72     { "Scale",        &CaffeParser::ParseScaleLayer },
  73     { "Split",        &CaffeParser::ParseSplitLayer },
  74     { "Dropout",      &CaffeParser::ParseDropoutLayer},
  75 };
  76
  77 ICaffeParser* ICaffeParser::CreateRaw()
  78 {
  79     return new CaffeParser();
  80 }
  81
  82 ICaffeParserPtr ICaffeParser::Create()
  83 {
  84     return ICaffeParserPtr(CreateRaw(), &ICaffeParser::Destroy);
  85 }
  86
  87 void ICaffeParser::Destroy(ICaffeParser* parser)
  88 {
  89     delete parser;
  90 }
  91
  92 CaffeParser::CaffeParser()
  93 : m_Network(nullptr, nullptr)
  94 {
  95
  96 }
  97
  98 void GetDataFromBlob(const LayerParameter& layerParam, vector<float>& outData, unsigned int blobIndex)
  99 {
 100     if (blobIndex >= boost::numeric_cast<unsigned int>(layerParam.blobs_size()))
 101     {
 102         throw ParseException(boost::str(boost::format("Expected data blob at index %1% in layer %2% not found")
 103             % blobIndex % layerParam.name()));
 104     }
 105
 106     const BlobProto& blob = layerParam.blobs(boost::numeric_cast<int>(blobIndex));
 107
 108     if (boost::numeric_cast<size_t>(blob.data_size()) != outData.size())
 109     {
 110         throw ParseException(boost::str(boost::format(
 111             "Data blob at index %1% in layer %2% has an unexpected size. Expected %3% elements but got %4% elements")
 112             % blobIndex % layerParam.name() % outData.size() % blob.data_size()));
 113     }
 114
 115     for (unsigned int i = 0; i < outData.size(); ++i)
 116     {
 117         outData[i] = blob.data(boost::numeric_cast<int>(i));
 118     }
 119 }
 120
 121 bool IsInRange(unsigned int value, unsigned int min, unsigned int max)
 122 {
 123     return (value >= min && value <= max) ? true : false;
 124 }
 125
 126 template <typename T>
 127 size_t SizeOfVectorData(const vector<T>& vec)
 128 {
 129     return vec.size() * sizeof(T);
 130 }
 131
 132 void ValidateNumInputsOutputs(const caffe::LayerParameter& layerParameter,
 133                               unsigned int                 numInputs,
 134                               unsigned int                 numOutputs)
 135 {
 136     int numInputsActual = layerParameter.bottom_size();
 137     if (numInputs != boost::numeric_cast<unsigned int>(numInputsActual))
 138     {
 139         throw ParseException("Loading layer: invalid number of inputs");
 140     }
 141
 142     int numOutputsActual = layerParameter.top_size();
 143     if (numOutputs != boost::numeric_cast<unsigned int>(numOutputsActual))
 144     {
 145         throw ParseException("Loading layer: invalid number of outputs");
 146     }
 147 }
 148
 149 BindingPointInfo CaffeParser::GetNetworkInputBindingInfo(const std::string& name) const
 150 {
 151     return GetBindingInfo(name, "input", m_NetworkInputsBindingInfo);
 152 }
 153
 154 BindingPointInfo CaffeParser::GetNetworkOutputBindingInfo(const std::string& name) const
 155 {
 156     return GetBindingInfo(name, "output", m_NetworkOutputsBindingInfo);
 157 }
 158
 159 std::pair<armnn::LayerBindingId, armnn::TensorInfo> CaffeParser::GetBindingInfo(const std::string& layerName,
 160     const char* bindingPointDesc,
 161     const std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo)
 162 {
 163     auto it = nameToBindingInfo.find(layerName);
 164     if (it == nameToBindingInfo.end())
 165     {
 166         throw InvalidArgumentException(boost::str(boost::format("Unknown %1% '%2%'") % bindingPointDesc % layerName));
 167     }
 168     return it->second;
 169 }
 170
 171 TensorInfo CaffeParser::BlobShapeToTensorInfo(const caffe::BlobShape& blobShape) const
 172 {
 173     std::vector<unsigned int> shape;
 174     for (int j = 0; j < blobShape.dim_size(); ++j)
 175     {
 176         shape.push_back(static_cast<unsigned int>(blobShape.dim(j)));
 177     }
 178
 179     return TensorInfo(boost::numeric_cast<unsigned int>(shape.size()), shape.data(), DataType::Float32);
 180 }
 181
 182 BlobShape TensorDescToBlobShape(const TensorInfo& desc)
 183 {
 184     BlobShape ret;
 185     for (unsigned int i = 0; i < desc.GetNumDimensions(); ++i)
 186     {
 187         ret.add_dim(i);
 188         ret.set_dim(boost::numeric_cast<int>(i), desc.GetShape()[i]);
 189     }
 190
 191     return ret;
 192 }
 193
 194 vector<const LayerParameter*> CaffeParser::GetInputs(const LayerParameter& layerParam)
 195 {
 196     std::vector<const caffe::LayerParameter*> ret;
 197     ret.reserve(boost::numeric_cast<size_t>(layerParam.bottom_size()));
 198     for (int j = 0; j < layerParam.bottom_size(); ++j)
 199     {
 200         std::string inputName = layerParam.bottom(j);
 201         auto inputIt = m_CaffeLayersByTopName.find(inputName);
 202         if (inputIt == m_CaffeLayersByTopName.end())
 203         {
 204             throw ParseException(
 205                 "Can't find Caffe layer with top called '" + inputName + "', which is listed as an input of '" +
 206                 layerParam.name() + "'");
 207         }
 208         ret.push_back(inputIt->second);
 209     }
 210
 211     return ret;
 212 }
 213
 214 void CaffeParser::ParseInputLayer(const LayerParameter& layerParam)
 215 {
 216     BOOST_ASSERT(layerParam.type() == "Input");
 217     ValidateNumInputsOutputs(layerParam, 0, 1);
 218
 219     const InputParameter& param = layerParam.input_param();
 220
 221     const armnn::LayerBindingId inputId = boost::numeric_cast<armnn::LayerBindingId>(m_NetworkInputsBindingInfo.size());
 222     armnn::IConnectableLayer* const inputLayer = m_Network->AddInputLayer(inputId, layerParam.name().c_str());
 223
 224     // Decide on the tensor info for this input. This can be specified in the Caffe network but can also
 225     // be overriden by user input (m_inputShapes).
 226     armnn::TensorInfo inputTensorInfo;
 227
 228     const BlobShape* originalShape = param.shape_size() > 0 && param.shape(0).dim_size() > 0 ?
 229         &param.shape(0) : nullptr;
 230     if (originalShape)
 231     {
 232         inputTensorInfo = BlobShapeToTensorInfo(*originalShape);
 233     }
 234
 235     auto overrideIt = m_InputShapes.find(layerParam.name());
 236     if (overrideIt != m_InputShapes.end())
 237     {
 238         const TensorShape& overrideShape = overrideIt->second;
 239         if (originalShape &&
 240             (    originalShape->dim(1) != overrideShape[1]
 241               || originalShape->dim(2) != overrideShape[2]
 242               || originalShape->dim(3) != overrideShape[3]))
 243         {
 244             throw ParseException("Parsed input shape for '" + layerParam.name() +
 245                 "' is incompatible with the override provided");
 246         }
 247         inputTensorInfo.SetShape(overrideShape);
 248     }
 249     else if (!originalShape)
 250     {
 251         throw ParseException("No input descriptor given for '" + layerParam.name() +
 252             "' and no input shape found in caffe model");
 253     }
 254
 255     TrackInputBinding(inputLayer, inputId, inputTensorInfo);
 256     inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
 257     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), inputLayer->GetOutputSlot(0));
 258 }
 259
 260 void CaffeParser::ParseConvLayer(const LayerParameter& layerParam)
 261 {
 262     BOOST_ASSERT(layerParam.type() == "Convolution");
 263     ValidateNumInputsOutputs(layerParam, 1, 1);
 264
 265     ConvolutionParameter convParam      = layerParam.convolution_param();
 266     BlobShape inputShape = TensorDescToBlobShape(GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo());
 267
 268     unsigned int kernelH = 0;
 269     unsigned int kernelW = 0;
 270     if (convParam.has_kernel_h() && convParam.has_kernel_w())
 271     {
 272         kernelH = convParam.kernel_h();
 273         kernelW = convParam.kernel_w();
 274     }
 275     else if (convParam.kernel_size_size() > 0)
 276     {
 277         kernelH = (convParam.kernel_size()).Get(0);
 278         kernelW = (convParam.kernel_size()).Get(0);
 279     }
 280     else
 281     {
 282         throw ParseException("Loading Convolution Layer: Kernel Size defined Illegally");
 283     }
 284
 285     if (!IsInRange(kernelH, 0, 11) || !IsInRange(kernelW, 0, 11) || (kernelH != kernelW))
 286     {
 287         throw ParseException("Loading Convolution Layer: Kernel has invalid size");
 288     }
 289
 290     unsigned int strideH = 0;
 291     unsigned int strideW = 0;
 292
 293     if (convParam.has_stride_h() && convParam.has_stride_w())
 294     {
 295         strideH = convParam.stride_h();
 296         strideW = convParam.stride_w();
 297     }
 298     else if (convParam.stride_size() > 0)
 299     {
 300         strideH = (convParam.stride()).Get(0);
 301         strideW = (convParam.stride()).Get(0);
 302     }
 303     else
 304     {
 305         // Caffe stride default is 1
 306         strideH = strideW = 1;
 307     }
 308
 309     if (!IsInRange(strideH, 0, 11) || !IsInRange(strideW, 0, 11) || (strideH != strideW))
 310     {
 311         throw ParseException("Loading Convolution Layer: stride has invalid size");
 312     }
 313
 314     unsigned int padH = 0;
 315     unsigned int padW = 0;
 316
 317     if (convParam.has_pad_h() && convParam.has_pad_w())
 318     {
 319         padH = convParam.pad_h();
 320         padW = convParam.pad_w();
 321     }
 322     else if (convParam.pad_size() > 0)
 323     {
 324         padH = (convParam.pad()).Get(0);
 325         padW = (convParam.pad()).Get(0);
 326     }
 327     else
 328     {
 329         padH = 0;
 330         padW = 0;
 331     }
 332
 333     if (!IsInRange(padH, 0, 11) || !IsInRange(padW, 0, 11) || (padH != padW))
 334     {
 335         throw ParseException("Loading Convolution Layer: pad has invalid size");
 336     }
 337
 338     // Handle grouping
 339     const unsigned int numGroups = convParam.has_group() ? convParam.group() : 1;
 340     armnn::IOutputSlot& inputConnection = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
 341
 342     vector<string> convLayerNames(numGroups);
 343     vector<armnn::IConnectableLayer*> convLayers(numGroups);
 344     convLayerNames[0] = layerParam.name();
 345
 346     armnn::IConnectableLayer* splitterLayer = nullptr;
 347     if (numGroups > 1)
 348     {
 349         // This convolution is to be applied to chunks of the input data so add a splitter layer
 350
 351         // Redirect the convolution input to the splitter
 352         unsigned int splitterDimSizes[4] = {static_cast<unsigned int>(inputShape.dim(0)),
 353                                             static_cast<unsigned int>(inputShape.dim(1)),
 354                                             static_cast<unsigned int>(inputShape.dim(2)),
 355                                             static_cast<unsigned int>(inputShape.dim(3))};
 356
 357         // Split dimension 1 of the splitter output shape and conv input shapes
 358         // according to the number of groups
 359         splitterDimSizes[1] /= numGroups;
 360         inputShape.set_dim(1, splitterDimSizes[1]);
 361
 362         // This is used to describe how the input is to be split
 363         ViewsDescriptor splitterDesc(numGroups);
 364
 365         // Create an output node for each group, giving each a unique name
 366         for (unsigned int g = 0; g < numGroups; ++g)
 367         {
 368             // Work out the names of the splitter layers child convolutions
 369             stringstream ss;
 370             ss << layerParam.name() << "_" << g;
 371             convLayerNames[g] = ss.str();
 372
 373             splitterDesc.SetViewOriginCoord(g, 1, splitterDimSizes[1] * g);
 374
 375             // Set the size of the views.
 376             for (unsigned int dimIdx=0; dimIdx < 4; dimIdx++)
 377             {
 378                 splitterDesc.SetViewSize(g, dimIdx, splitterDimSizes[dimIdx]);
 379             }
 380         }
 381
 382         const std::string splitterLayerName = std::string("splitter_") + layerParam.bottom(0);
 383
 384         // Add the splitter layer
 385         splitterLayer = m_Network->AddSplitterLayer(splitterDesc,
 386             splitterLayerName.c_str());
 387
 388         inputConnection.Connect(splitterLayer->GetInputSlot(0));
 389         for (unsigned int i = 0; i < splitterLayer->GetNumOutputSlots(); i++)
 390         {
 391             splitterLayer->GetOutputSlot(i).SetTensorInfo(BlobShapeToTensorInfo(inputShape));
 392         }
 393     }
 394
 395     // Ignored Caffe Parameters
 396     // * Dilation Size
 397     // * Weight Filler
 398     // * Bias Filler
 399     // * Engine
 400     // * Force nd_im2col
 401     // * Axis
 402
 403     // Not Available ArmNN Interface Parameters
 404     // * Rounding policy;
 405
 406     Convolution2dDescriptor convolution2dDescriptor;
 407     convolution2dDescriptor.m_PadLeft        = padW;
 408     convolution2dDescriptor.m_PadRight       = padW;
 409     convolution2dDescriptor.m_PadTop         = padH;
 410     convolution2dDescriptor.m_PadBottom      = padH;
 411     convolution2dDescriptor.m_StrideX        = strideW;
 412     convolution2dDescriptor.m_StrideY        = strideH;
 413
 414     unsigned int numFilters = convParam.num_output();
 415
 416     // Populate convolution output tensor descriptor dimensions
 417     BlobShape outputShape;
 418     outputShape.add_dim(0);
 419     outputShape.set_dim(0, inputShape.dim(0));
 420     outputShape.add_dim(1);
 421     // Ensure that dimension 1 of the convolution output is split according to the number of groups.
 422     outputShape.set_dim(1, numFilters / numGroups);
 423     outputShape.add_dim(2);
 424     outputShape.set_dim(
 425         2, (static_cast<int>(static_cast<float>(inputShape.dim(2) + 2 * padH - kernelH) /
 426             boost::numeric_cast<float>(strideH)) + 1));
 427     outputShape.add_dim(3);
 428     outputShape.set_dim(
 429         3, (static_cast<int>(static_cast<float>(inputShape.dim(3) + 2 * padW - kernelW) /
 430             boost::numeric_cast<float>(strideW)) + 1));
 431
 432     // Load the weight data for ALL groups
 433     vector<float> weightData(boost::numeric_cast<size_t>(numGroups * inputShape.dim(1) * outputShape.dim(1) *
 434         kernelH * kernelW));
 435     GetDataFromBlob(layerParam, weightData, 0);
 436
 437     const unsigned int weightDimSizes[4] = {
 438         static_cast<unsigned int>(outputShape.dim(1)), static_cast<unsigned int>(inputShape.dim(1)), kernelH, kernelW};
 439
 440     // Bias data - This defaults to true in Caffe
 441     TensorInfo biasInfo;
 442     vector<float> biasData;
 443     convolution2dDescriptor.m_BiasEnabled = convParam.has_bias_term() ? convParam.bias_term() : true;
 444     if (convolution2dDescriptor.m_BiasEnabled)
 445     {
 446         biasData.resize(boost::numeric_cast<size_t>(numGroups * outputShape.dim(1)), 1.f);
 447         GetDataFromBlob(layerParam, biasData, 1);
 448
 449         const unsigned int biasDimSizes[1] = {static_cast<unsigned int>(outputShape.dim(1))};
 450         biasInfo = TensorInfo(1, biasDimSizes, DataType::Float32);
 451     }
 452
 453     const unsigned int numWeightsPerGroup = boost::numeric_cast<unsigned int>(weightData.size()) / numGroups;
 454     const unsigned int numBiasesPerGroup  = boost::numeric_cast<unsigned int>(biasData.size()) / numGroups;
 455
 456     armnn::IConnectableLayer* returnLayer = nullptr;
 457
 458     for (unsigned int g = 0; g < numGroups; ++g)
 459     {
 460         // set the slot index, group 0 should be connected to the 0th output of the splitter
 461         // group 1 should be connected to the 1st output of the splitter
 462
 463         // Pull out the weights for this group from that loaded from the model file earlier
 464         ConstTensor weights(TensorInfo(4, weightDimSizes, DataType::Float32),
 465                             weightData.data() + numWeightsPerGroup * g);
 466
 467         IConnectableLayer* convLayer = nullptr;
 468         if (convolution2dDescriptor.m_BiasEnabled)
 469         {
 470             // Pull out the biases for this group from that loaded from the model file earlier
 471             ConstTensor biases(biasInfo, biasData.data() + numBiasesPerGroup * g);
 472
 473             convLayer = m_Network->AddConvolution2dLayer(convolution2dDescriptor,
 474                 weights, biases, convLayerNames[g].c_str());
 475         }
 476         else
 477         {
 478             convLayer = m_Network->AddConvolution2dLayer(convolution2dDescriptor,
 479                 weights, convLayerNames[g].c_str());
 480         }
 481         convLayers[g] = convLayer;
 482
 483         // If we have more than one group then the input to the nth convolution the splitter layer's nth output,
 484         // otherwise it's the regular input to this layer.
 485         armnn::IOutputSlot& splitterInputConnection = splitterLayer ? splitterLayer->GetOutputSlot(g) : inputConnection;
 486         splitterInputConnection.Connect(convLayer->GetInputSlot(0));
 487         convLayer->GetOutputSlot(0).SetTensorInfo(BlobShapeToTensorInfo(outputShape));
 488
 489         returnLayer = convLayer;
 490     }
 491
 492     if (numGroups > 1)
 493     {
 494         // If the convolution was performed in chunks, add a layer to merge the results
 495
 496         // The merge input shape matches that of the convolution output
 497         unsigned int mergeDimSizes[4] = {static_cast<unsigned int>(outputShape.dim(0)),
 498                                          static_cast<unsigned int>(outputShape.dim(1)),
 499                                          static_cast<unsigned int>(outputShape.dim(2)),
 500                                          static_cast<unsigned int>(outputShape.dim(3))};
 501
 502         // This is used to describe how the input is to be merged
 503         OriginsDescriptor mergeDesc(numGroups);
 504
 505         // Now create an input node for each group, using the name from
 506         // the output of the corresponding convolution
 507         for (unsigned int g = 0; g < numGroups; ++g)
 508         {
 509             mergeDesc.SetViewOriginCoord(g, 1, mergeDimSizes[1] * g);
 510         }
 511
 512         // Make sure the output from the merge is the correct size to hold the data for all groups
 513         mergeDimSizes[1] *= numGroups;
 514         outputShape.set_dim(1, mergeDimSizes[1]);
 515
 516         // The merge layer just assumes the name of the original convolution
 517         // layer so the following layer connection "just works"
 518         const string mergeOutputName = layerParam.name();
 519
 520         // Finally add the merge layer
 521         IConnectableLayer* layer = m_Network->AddMergerLayer(mergeDesc, mergeOutputName.c_str());
 522
 523         for (unsigned int g = 0; g < numGroups; ++g)
 524         {
 525             convLayers[g]->GetOutputSlot(0).Connect(layer->GetInputSlot(g));
 526         }
 527         layer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo(4, mergeDimSizes, DataType::Float32));
 528
 529         returnLayer = layer;
 530     }
 531
 532     if (!returnLayer)
 533     {
 534         throw ParseException("Loading Convolution Layer: invalid return layer");
 535     }
 536
 537     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), returnLayer->GetOutputSlot(0));
 538 }
 539
 540 void CaffeParser::ParsePoolingLayer(const LayerParameter& layerParam)
 541 {
 542     ValidateNumInputsOutputs(layerParam, 1, 1);
 543
 544     PoolingParameter param = layerParam.pooling_param();
 545
 546     const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
 547
 548     // Kernel size
 549     unsigned int kernel_h = 0;
 550     unsigned int kernel_w = 0;
 551     if (param.has_kernel_h() && param.has_kernel_w())
 552     {
 553         kernel_h = param.kernel_h();
 554         kernel_w = param.kernel_w();
 555     }
 556     else if (param.kernel_size() > 0)
 557     {
 558         kernel_h = param.kernel_size();
 559         kernel_w = param.kernel_size();
 560     }
 561     else if (param.has_global_pooling())
 562     {
 563         kernel_h = inputInfo.GetShape()[2];
 564         kernel_w = inputInfo.GetShape()[3];
 565     }
 566     else
 567     {
 568         throw ParseException("Loading Pooling Layer: Kernel Size defined Illegally");
 569     }
 570
 571     if (!IsInRange(kernel_h, 0, 11) || !IsInRange(kernel_w, 0, 11) || (kernel_h != kernel_w))
 572     {
 573         throw ParseException(boost::str(
 574             boost::format("Loading Pooling Layer: kernel has invalid size: %1% x %2%") % kernel_h % kernel_w));
 575     }
 576
 577     // Strides
 578     // Default to a valid value for the case of global pooling (where the strides don't have to be explicitly set)
 579     unsigned int stride_h = 1;
 580     unsigned int stride_w = 1;
 581     if (param.has_stride_h() && param.has_stride_w())
 582     {
 583         stride_h = param.stride_h();
 584         stride_w = param.stride_w();
 585     }
 586     else if (param.has_stride())
 587     {
 588         stride_h = param.stride();
 589         stride_w = param.stride();
 590     }
 591     else if (!param.has_global_pooling())
 592     {
 593         throw ParseException("Loading Pooling Layer: Stride Size defined Illegally");
 594     }
 595
 596     if (!IsInRange(stride_h, 0, 11) || !IsInRange(stride_w, 0, 11) || (stride_h != stride_w))
 597     {
 598         throw ParseException("Loading Pooling Layer: stride has invalid size");
 599     }
 600
 601     // Padding
 602     unsigned int pad_h = 0;
 603     unsigned int pad_w = 0;
 604     if (param.has_pad_h() && param.has_pad_w())
 605     {
 606         pad_h = param.pad_h();
 607         pad_w = param.pad_w();
 608     }
 609     else if (param.has_pad())
 610     {
 611         pad_h = param.pad();
 612         pad_w = param.pad();
 613     }
 614     else
 615     {
 616         pad_h = 0;
 617         pad_w = 0;
 618     }
 619
 620     if (!IsInRange(pad_h, 0, 11) || !IsInRange(pad_w, 0, 11) || (pad_h != pad_w))
 621     {
 622         throw ParseException("Loading Pooling Layer: pad has invalid size");
 623     }
 624
 625     // Ignored Caffe Parameters
 626     //      Stochastic Pooling
 627     //      Engine
 628
 629     // Populate Weight and Bias Filter Descriptor
 630     Pooling2dDescriptor pooling2dDescriptor;
 631     if (param.has_pool())
 632     {
 633         PoolingParameter_PoolMethod p = param.pool();
 634         switch (p)
 635         {
 636             case PoolingParameter_PoolMethod_MAX:
 637             {
 638                 pooling2dDescriptor.m_PoolType = PoolingAlgorithm::Max;
 639                 break;
 640             }
 641             case PoolingParameter_PoolMethod_AVE:
 642             {
 643                 pooling2dDescriptor.m_PoolType = PoolingAlgorithm::Average;
 644                 break;
 645             }
 646             case PoolingParameter_PoolMethod_STOCHASTIC:
 647             {
 648                 throw ParseException("Loading Pooling Layer: Stochastic Pooling Not Supported");
 649             }
 650             default:
 651             {
 652                 throw ParseException("Loading Pooling Layer: Mode Not Supported");
 653             }
 654         }
 655     }
 656     else
 657     {
 658         throw ParseException("Loading Pooling Layer: No Pooling Method Defined");
 659     }
 660
 661     pooling2dDescriptor.m_PadLeft     = pad_w;
 662     pooling2dDescriptor.m_PadRight    = pad_w;
 663     pooling2dDescriptor.m_PadTop      = pad_h;
 664     pooling2dDescriptor.m_PadBottom   = pad_h;
 665     pooling2dDescriptor.m_StrideX     = stride_w;
 666     pooling2dDescriptor.m_StrideY     = stride_h;
 667     pooling2dDescriptor.m_PoolWidth   = kernel_w;
 668     pooling2dDescriptor.m_PoolHeight  = kernel_h;
 669
 670     pooling2dDescriptor.m_OutputShapeRounding = OutputShapeRounding::Ceiling;
 671     pooling2dDescriptor.m_PaddingMethod  = PaddingMethod::IgnoreValue;
 672
 673     armnn::IConnectableLayer* poolingLayer = m_Network->AddPooling2dLayer(pooling2dDescriptor,
 674         layerParam.name().c_str());
 675
 676
 677     TensorInfo outputInfo(
 678         { inputInfo.GetShape()[0],
 679           inputInfo.GetShape()[1],
 680           static_cast<unsigned int>(ceil(
 681               static_cast<float>(inputInfo.GetShape()[2] + 2 * pad_h - kernel_h) /
 682               boost::numeric_cast<float>(stride_h))) + 1,
 683           static_cast<unsigned int>(ceil(
 684               static_cast<float>(inputInfo.GetShape()[3] + 2 * pad_w - kernel_w) /
 685               boost::numeric_cast<float>(stride_w))) + 1 },
 686         DataType::Float32);
 687
 688     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(poolingLayer->GetInputSlot(0));
 689     poolingLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
 690     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), poolingLayer->GetOutputSlot(0));
 691 }
 692
 693 void CaffeParser::ParseReluLayer(const LayerParameter& layerParam)
 694 {
 695     ValidateNumInputsOutputs(layerParam, 1, 1);
 696
 697     const string& name = layerParam.name();
 698     const ReLUParameter& param = layerParam.relu_param();
 699
 700     ActivationDescriptor activationDescriptor;
 701     const float negativeSlope = param.negative_slope();
 702     if (negativeSlope == 0.0f)
 703     {
 704         activationDescriptor.m_Function = ActivationFunction::ReLu;
 705     }
 706     else
 707     {
 708         activationDescriptor.m_Function = ActivationFunction::LeakyReLu;
 709         activationDescriptor.m_A = negativeSlope;
 710     }
 711
 712     const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
 713     IConnectableLayer* const activationLayer = m_Network->AddActivationLayer(activationDescriptor, name.c_str());
 714     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(activationLayer->GetInputSlot(0));
 715     activationLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
 716     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), activationLayer->GetOutputSlot(0));
 717 }
 718
 719 void CaffeParser::ParseLRNLayer(const LayerParameter& layerParam)
 720 {
 721     ValidateNumInputsOutputs(layerParam, 1, 1);
 722
 723     LRNParameter param = layerParam.lrn_param();
 724
 725     const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
 726
 727     // Ignored BATCH NORMALIZATION Caffe Parameters
 728     // Ignored MVN Caffe Parameters
 729     // Ignored LRN Caffe Parameters
 730     //      Engine
 731
 732     NormalizationDescriptor normalizationDescriptor;
 733     if (param.has_norm_region())
 734     {
 735         LRNParameter_NormRegion n = param.norm_region();
 736         switch (n)
 737         {
 738             case LRNParameter_NormRegion_ACROSS_CHANNELS:
 739             {
 740                 normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Across;
 741                 break;
 742             }
 743             case LRNParameter_NormRegion_WITHIN_CHANNEL:
 744             {
 745                 normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Within;
 746                 break;
 747             }
 748             default:
 749                 throw ParseException("Loading LRN Layer: Mode Not Supported");
 750         }
 751     }
 752     else
 753     {
 754         // Caffe defaults to normalization across channels
 755         normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Across;
 756     }
 757
 758     normalizationDescriptor.m_NormMethodType = NormalizationAlgorithmMethod::LocalBrightness;
 759     if (param.has_local_size())
 760     {
 761         normalizationDescriptor.m_NormSize = param.local_size();
 762     }
 763     else
 764     {
 765         throw ParseException("Loading LRN Layer: Local_size not defined");
 766     }
 767
 768     if (param.has_alpha())
 769     {
 770         normalizationDescriptor.m_Alpha = param.alpha();
 771         normalizationDescriptor.m_Alpha /= boost::numeric_cast<float>(param.local_size());
 772     }
 773     else
 774     {
 775         throw ParseException("Loading LRN Layer: Alpha not defined");
 776     }
 777     if (param.has_beta())
 778     {
 779         normalizationDescriptor.m_Beta = param.beta();
 780     }
 781     else
 782     {
 783         throw ParseException("Loading LRN Layer: Beta not defined");
 784     }
 785     if (param.has_k())
 786     {
 787         normalizationDescriptor.m_K = param.k();
 788     }
 789     else
 790         normalizationDescriptor.m_K = 1;
 791
 792     IConnectableLayer* const normLayer = m_Network->AddNormalizationLayer(normalizationDescriptor,
 793         layerParam.name().c_str());
 794     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(normLayer->GetInputSlot(0));
 795     normLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
 796
 797     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), normLayer->GetOutputSlot(0));
 798 }
 799
 800 void CaffeParser::ParseInnerProductLayer(const LayerParameter& layerParam)
 801 {
 802     InnerProductParameter param = layerParam.inner_product_param();
 803
 804     ValidateNumInputsOutputs(layerParam, 1, 1);
 805
 806     unsigned int outputSize = param.num_output();
 807
 808     // Ignored Caffe Parameters
 809     // Weight Filler
 810     // Bias Filler
 811     // Engine
 812     // Axis
 813
 814     FullyConnectedDescriptor tensorFullyConnectedDescriptor;
 815
 816     if (param.has_transpose())
 817     {
 818         // If true assume transposed weights
 819         tensorFullyConnectedDescriptor.m_TransposeWeightMatrix = param.transpose();
 820     }
 821     else
 822     {
 823         // caffe defaults to transposed
 824         tensorFullyConnectedDescriptor.m_TransposeWeightMatrix = true;
 825     }
 826
 827     const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
 828
 829     TensorInfo weightInfo;
 830     TensorInfo biasInfo;
 831
 832     // allow implicit flattening of extra dimensions
 833     unsigned int inputSize = inputInfo.GetShape()[1];
 834     for (unsigned int i = 2; i < inputInfo.GetNumDimensions(); ++i)
 835     {
 836         inputSize *= inputInfo.GetShape()[i];
 837     }
 838
 839     vector<float> weightData(inputSize * outputSize);
 840
 841     GetDataFromBlob(layerParam, weightData, 0);
 842     const unsigned int swTD[2] = { outputSize, inputSize };
 843     ConstTensor weights(TensorInfo(2, swTD, DataType::Float32), weightData);
 844
 845     tensorFullyConnectedDescriptor.m_BiasEnabled = true;
 846     // Todo: check whether bias enabled
 847     armnn::IConnectableLayer* fullyConnectedLayer = nullptr;
 848     if (tensorFullyConnectedDescriptor.m_BiasEnabled)
 849     {
 850         // BIAS VALUE
 851         vector<float> biasData(outputSize);
 852
 853         GetDataFromBlob(layerParam, biasData, 1);
 854
 855         const unsigned int sbTD[1] = { outputSize };
 856
 857         ConstTensor biases(TensorInfo(1, sbTD, DataType::Float32), biasData);
 858
 859         fullyConnectedLayer = m_Network->AddFullyConnectedLayer(tensorFullyConnectedDescriptor, weights, biases,
 860             layerParam.name().c_str());
 861     }
 862     else
 863     {
 864         fullyConnectedLayer = m_Network->AddFullyConnectedLayer(tensorFullyConnectedDescriptor, weights,
 865             layerParam.name().c_str());
 866     }
 867
 868     TensorInfo outputInfo({ inputInfo.GetShape()[0], outputSize }, DataType::Float32);
 869     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(fullyConnectedLayer->GetInputSlot(0));
 870     fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
 871     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), fullyConnectedLayer->GetOutputSlot(0));
 872 }
 873
 874 void CaffeParser::ParseSoftmaxLayer(const LayerParameter& layerParam)
 875 {
 876     ValidateNumInputsOutputs(layerParam, 1, 1);
 877
 878     SoftmaxParameter param = layerParam.softmax_param();
 879
 880     const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
 881
 882     // Ignored Caffe Parameters
 883     //      axis
 884     //      Engine
 885
 886     armnn::SoftmaxDescriptor softmaxDescriptor;
 887     armnn::IConnectableLayer* const softmaxLayer = m_Network->AddSoftmaxLayer(
 888         softmaxDescriptor,
 889         layerParam.name().c_str());
 890     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(softmaxLayer->GetInputSlot(0));
 891     softmaxLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
 892     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), softmaxLayer->GetOutputSlot(0));
 893 }
 894
 895 void CaffeParser::ParseEltwiseLayer(const LayerParameter& layerParam)
 896 {
 897     ValidateNumInputsOutputs(layerParam, 2, 1);
 898
 899     const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
 900
 901     // Ignored Caffe Parameters
 902     //      coeff
 903
 904     EltwiseParameter_EltwiseOp operation = EltwiseParameter_EltwiseOp_SUM; // default to sum as per caffe
 905
 906     if (layerParam.has_eltwise_param() && layerParam.eltwise_param().has_operation())
 907     {
 908         operation = layerParam.eltwise_param().operation();
 909     }
 910
 911     armnn::IConnectableLayer* newLayer = nullptr;
 912     switch (operation)
 913     {
 914         case EltwiseParameter_EltwiseOp_SUM:
 915         {
 916             newLayer = m_Network->AddAdditionLayer(layerParam.name().c_str());
 917             break;
 918         }
 919         case EltwiseParameter_EltwiseOp_PROD:
 920         {
 921             newLayer = m_Network->AddMultiplicationLayer(layerParam.name().c_str());
 922             break;
 923         }
 924         default:
 925         {
 926             throw ParseException("Unsupported operation in Eltwise layer");
 927         }
 928     }
 929
 930     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(newLayer->GetInputSlot(0));
 931     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(1)).Connect(newLayer->GetInputSlot(1));
 932     newLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
 933     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), newLayer->GetOutputSlot(0));
 934 }
 935
 936 void CaffeParser::ParseConcatLayer(const LayerParameter& layerParam)
 937 {
 938     unsigned int numInputs = static_cast<unsigned int>(layerParam.bottom_size());
 939     // we assume concat happens along the channel dimension, which is 1 in (0, 1, 2, 3)
 940     unsigned int concatDim = 1;
 941     unsigned int numOfDims = 4;
 942
 943     OriginsDescriptor concatDescriptor(static_cast<uint32_t>(numInputs), numOfDims);// we only consider 4-D tensor here
 944     std::vector<unsigned int>mergeDimSizes(numOfDims, 0u);
 945
 946     unsigned int mergeDim = 0;
 947     for (unsigned int viewIndex = 0; viewIndex < numInputs; ++viewIndex)
 948     {
 949         const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(
 950             layerParam.bottom(boost::numeric_cast<int>(viewIndex))).GetTensorInfo();
 951         // Check whether the dimensions of the input tensors are actually 4
 952         if (inputInfo.GetNumDimensions()!=4)
 953         {
 954             throw ParseException("The number of dimensions for input tensors of the concatenation op should be 4.");
 955         }
 956
 957         mergeDimSizes[0] = inputInfo.GetShape()[0];
 958         mergeDimSizes[1] = inputInfo.GetShape()[1];
 959         mergeDimSizes[2] = inputInfo.GetShape()[2];
 960         mergeDimSizes[3] = inputInfo.GetShape()[3];
 961
 962         for (unsigned int j = 0; j < concatDim; ++j)
 963         {
 964             concatDescriptor.SetViewOriginCoord(viewIndex, j, 0);
 965         }
 966
 967         concatDescriptor.SetViewOriginCoord(viewIndex, concatDim, mergeDim);
 968         mergeDim += mergeDimSizes[concatDim];
 969
 970         for (unsigned int j = concatDim+1; j < numOfDims; ++j)
 971         {
 972             concatDescriptor.SetViewOriginCoord(viewIndex, j, 0);
 973         }
 974     }
 975     mergeDimSizes[concatDim] = mergeDim;
 976
 977     armnn::IConnectableLayer *concatlayer = m_Network->AddMergerLayer(concatDescriptor, layerParam.name().c_str());
 978     for (unsigned int i = 0; i < numInputs; ++i)
 979     {
 980         armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(boost::numeric_cast<int>(i)));
 981         outputSlot.Connect(concatlayer->GetInputSlot(i));
 982     }
 983
 984     concatlayer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo(numOfDims, mergeDimSizes.data(), DataType::Float32));
 985     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), concatlayer->GetOutputSlot(0));
 986 }
 987
 988 void CaffeParser::ParseBatchNormLayer(const LayerParameter& layerParam)
 989 {
 990     ValidateNumInputsOutputs(layerParam, 1, 1);
 991
 992     const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
 993
 994     string name = layerParam.name();
 995
 996     BatchNormParameter param = layerParam.batch_norm_param();
 997     // If use_global_stats is not explicitly set in the model, assume it to be true (its default value
 998     // when the network is in the testing phase).
 999     if (param.has_use_global_stats())
1000     {
1001         if (!param.use_global_stats())
1002         {
1003             throw ParseException(boost::str(boost::format("Error parsing Batch Norm layer '%1%': "
1004                 "Parameter 'use_global_stats' is set to false, which is unsupported (value used for training).")
1005                 % name));
1006         }
1007     }
1008
1009     BatchNormalizationDescriptor desc;
1010     desc.m_Eps = param.eps();
1011
1012     unsigned int channels = inputInfo.GetShape()[1];
1013     unsigned int shape[]  = {channels};
1014
1015     vector<float> meanData(channels);
1016     GetDataFromBlob(layerParam, meanData, 0);
1017
1018     vector<float> varianceData(channels);
1019     GetDataFromBlob(layerParam, varianceData, 1);
1020
1021     // read moving average factor and apply scaling (if required)
1022     const BlobProto& blob = layerParam.blobs(boost::numeric_cast<int>(2));
1023     const float movingAverageFactor = blob.data(boost::numeric_cast<int>(0));
1024     if(movingAverageFactor != 0.0f)
1025     {
1026         const float scaleFactor = 1.0f / movingAverageFactor;
1027         auto scaleFunction = [scaleFactor](float f) -> float { return f * scaleFactor; };
1028
1029         std::transform(varianceData.begin(), varianceData.end(), varianceData.begin(), scaleFunction);
1030         std::transform(meanData.begin(), meanData.end(), meanData.begin(), scaleFunction);
1031     }
1032
1033     // identity scale operation
1034     vector<float> betaData(channels, 0.0f);
1035     vector<float> gammaData(channels, 1.0f);
1036
1037     ConstTensor mean(TensorInfo(1, shape, armnn::DataType::Float32), meanData);
1038     ConstTensor variance(TensorInfo(1, shape, armnn::DataType::Float32), varianceData);
1039     ConstTensor beta(TensorInfo(1, shape, armnn::DataType::Float32), betaData);
1040     ConstTensor gamma(TensorInfo(1, shape, armnn::DataType::Float32), gammaData);
1041
1042     armnn::IConnectableLayer* const batchNormLayer = m_Network->AddBatchNormalizationLayer(desc,
1043         mean, variance, beta, gamma, name.c_str());
1044     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(batchNormLayer->GetInputSlot(0));
1045     batchNormLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1046     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), batchNormLayer->GetOutputSlot(0));
1047 }
1048
1049 void CaffeParser::ParseScaleLayer(const LayerParameter& layerParam)
1050 {
1051     // current unoptimal solution: add a batchnormalization layer with 0 mean and 1 variance
1052     ValidateNumInputsOutputs(layerParam, 1, 1);
1053
1054     const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1055
1056     string name = layerParam.name();
1057
1058     ScaleParameter param = layerParam.scale_param();
1059     if (param.axis() != 1)
1060     {
1061         // Would have to use something other than BatchNormalizationLayer in this case
1062         throw ParseException("Loading Scale Layer: Only axis 1 supported currently");
1063     }
1064
1065     unsigned int     channels = inputInfo.GetShape()[1];
1066     unsigned int     shape[]  = {channels};
1067
1068     BatchNormalizationDescriptor desc;
1069     desc.m_Eps = 0.0f; // don't need epsilon if variance is 1
1070     vector<float> meanData(channels, 0.0f);
1071     vector<float> varianceData(channels, 1.0f);
1072     vector<float> betaData(channels, 0.0f);
1073     vector<float> gammaData(channels);
1074
1075     GetDataFromBlob(layerParam, gammaData, 0);
1076
1077     if(param.has_bias_term())
1078     {
1079         GetDataFromBlob(layerParam, betaData, 1);
1080     }
1081
1082     ConstTensor mean(TensorInfo(1, shape, armnn::DataType::Float32), meanData);
1083     ConstTensor variance(TensorInfo(1, shape, armnn::DataType::Float32), varianceData);
1084     ConstTensor beta(TensorInfo(1, shape, armnn::DataType::Float32), betaData);
1085     ConstTensor gamma(TensorInfo(1, shape, armnn::DataType::Float32), gammaData);
1086
1087     armnn::IConnectableLayer* const batchNormLayer = m_Network->AddBatchNormalizationLayer(desc,
1088         mean, variance, beta, gamma, name.c_str());
1089     GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(batchNormLayer->GetInputSlot(0));
1090     batchNormLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1091     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), batchNormLayer->GetOutputSlot(0));
1092 }
1093
1094 void CaffeParser::ParseSplitLayer(const caffe::LayerParameter& layerParam)
1095 {
1096     // Used in caffe to duplicate memory - not necessary in armnn
1097     if (layerParam.bottom_size() != 1)
1098     {
1099         throw ParseException("Split layer '" + layerParam.name() + "' should have exactly 1 bottom");
1100     }
1101     armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
1102     for (int i = 0; i < layerParam.top_size(); i++)
1103     {
1104         SetArmnnOutputSlotForCaffeTop(layerParam.top(i), outputSlot);
1105     }
1106 }
1107
1108 void CaffeParser::ParseDropoutLayer(const caffe::LayerParameter& layerParam)
1109 {
1110     // Ignored for inference so patch the single input to its single output
1111     if (layerParam.bottom_size() != 1 || layerParam.top_size() != 1)
1112     {
1113         throw ParseException("Dropout layer '" + layerParam.name() + "' should have exactly 1 bottom and 1 top");
1114     }
1115     SetArmnnOutputSlotForCaffeTop(layerParam.top(0), GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)));
1116 }
1117
1118 void CaffeParser::TrackInputBinding(armnn::IConnectableLayer* layer,
1119     armnn::LayerBindingId id,
1120     const armnn::TensorInfo& tensorInfo)
1121 {
1122     return TrackBindingPoint(layer, id, tensorInfo, layer->GetName(), m_NetworkInputsBindingInfo);
1123 }
1124
1125 void CaffeParser::TrackOutputBinding(armnn::IConnectableLayer* layer,
1126     armnn::LayerBindingId id,
1127     const armnn::TensorInfo& tensorInfo)
1128 {
1129     return TrackBindingPoint(layer, id, tensorInfo, layer->GetName(), m_NetworkOutputsBindingInfo);
1130 }
1131
1132 void CaffeParser::TrackBindingPoint(armnn::IConnectableLayer* layer,
1133     armnn::LayerBindingId id,
1134     const armnn::TensorInfo& tensorInfo,
1135     const char* bindingPointDesc,
1136     std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo)
1137 {
1138     const std::string layerName = layer->GetName();
1139     auto it = nameToBindingInfo.find(layerName);
1140     if (it == nameToBindingInfo.end())
1141     {
1142         nameToBindingInfo[layerName] = std::make_pair(id, tensorInfo);
1143     }
1144     else
1145     {
1146         throw ParseException(boost::str(
1147             boost::format("Id %1% used by more than one %2% layer") % id % bindingPointDesc));
1148     }
1149 }
1150
1151 armnn::IOutputSlot& CaffeParser::GetArmnnOutputSlotForCaffeTop(const std::string& caffeTopName) const
1152 {
1153     auto it = m_ArmnnOutputSlotForCaffeTop.find(caffeTopName);
1154     if (it != m_ArmnnOutputSlotForCaffeTop.end())
1155     {
1156         return *it->second;
1157     }
1158     else
1159     {
1160         throw ParseException(boost::str(boost::format(
1161             "Could not find armnn output slot for Caffe top '%1%'") % caffeTopName));
1162     }
1163 }
1164
1165 void CaffeParser::SetArmnnOutputSlotForCaffeTop(const std::string& caffeTopName, armnn::IOutputSlot& armnnOutputSlot)
1166 {
1167     auto it = m_ArmnnOutputSlotForCaffeTop.find(caffeTopName);
1168     if (it == m_ArmnnOutputSlotForCaffeTop.end())
1169     {
1170         m_ArmnnOutputSlotForCaffeTop[caffeTopName] = &armnnOutputSlot;
1171     }
1172     else
1173     {
1174         throw ParseException("Attempting to add duplicate entry for Caffe top '" + caffeTopName + "'");
1175     }
1176 }
1177
1178 void CaffeParser::ResolveInPlaceLayers(caffe::NetParameter& netParameter)
1179 {
1180     // Find layers with the same top
1181     std::map<std::string, std::vector<caffe::LayerParameter*>> layersByTop;
1182     for (int layerIdx = 0; layerIdx < netParameter.layer_size(); ++layerIdx)
1183     {
1184         caffe::LayerParameter& layer = *netParameter.mutable_layer(layerIdx);
1185         for (int i = 0; i < layer.top_size(); ++i)
1186         {
1187             layersByTop[layer.top(i)].push_back(&layer);
1188         }
1189     }
1190
1191     // For each set of layers with the same top, resolve them to a linear chain rather than in-place layers.
1192     // Note that for 'regular' layers, there will be a single layer in each group and so this will be a no-op.
1193     for (auto layersWithSameTopIt : layersByTop)
1194     {
1195         const std::string& top = layersWithSameTopIt.first;
1196         const std::vector<caffe::LayerParameter*>& layersWithSameTop = layersWithSameTopIt.second;
1197
1198         // Chain the layers together in the order that they are listed in the prototxt (hopefully this is correct).
1199         // Note that the last layer will not have its top modified so that other layers will continue to reference it.
1200         for (unsigned int layerIdx = 0; layerIdx < layersWithSameTop.size() - 1; ++layerIdx)
1201         {
1202             caffe::LayerParameter& layer1 = *layersWithSameTop[layerIdx];
1203             caffe::LayerParameter& layer2 = *layersWithSameTop[layerIdx+1];
1204             if (layer1.top_size() != 1)
1205             {
1206                 throw ParseException("Node '" + layer1.name() + "' is an in-place layer but "
1207                     "doesn't have exactly one top.");
1208             }
1209             std::string newTop = layer1.name() + "_top";
1210             layer1.set_top(0, newTop);
1211             if (layer2.bottom_size() != 1 || layer2.bottom(0) != top)
1212             {
1213                 throw ParseException("Node '" + layer2.name() + "' is an in-place layer but "
1214                     " doesn't have exactly one bottom, or it doesn't match its top.");
1215             }
1216             layer2.set_bottom(0, newTop);
1217         }
1218     }
1219 }
1220
1221 void CaffeParser::LoadNetParam(NetParameter& netParameter)
1222 {
1223     // caffe models sometimes have an implicit input layer.
1224     // in that case, add an explicit one
1225     if (netParameter.input_size() > 0)
1226     {
1227         LayerParameter* newLayer = netParameter.add_layer();
1228
1229         newLayer->set_type("Input");
1230         newLayer->set_name(netParameter.input(0));
1231         newLayer->add_top(netParameter.input(0));
1232
1233         InputParameter* inputParam = newLayer->mutable_input_param();
1234         BlobShape* shape = inputParam->add_shape();
1235
1236         int dim_size = netParameter.input_dim_size();
1237         for (int i = 0; i < dim_size; ++i)
1238         {
1239             shape->add_dim(netParameter.input_dim(i));
1240         }
1241     }
1242
1243     // Replace in-place layers with regular ones to make the rest of the parsing easier.
1244     ResolveInPlaceLayers(netParameter);
1245
1246     // Create a lookup of Caffe layers by name
1247     for (int i = 0; i < netParameter.layer_size(); ++i)
1248     {
1249         const caffe::LayerParameter& layer = netParameter.layer(i);
1250         for (int i = 0; i < layer.top_size(); ++i)
1251         {
1252             m_CaffeLayersByTopName[layer.top(i)] = &layer;
1253         }
1254     }
1255
1256     // Find the output layers the user requested
1257     std::vector<const caffe::LayerParameter*> targetLayers;
1258     for (const std::string& requestedOutputName : m_RequestedOutputs)
1259     {
1260         auto nodeIt = m_CaffeLayersByTopName.find(requestedOutputName);
1261         if (nodeIt == m_CaffeLayersByTopName.end())
1262         {
1263             throw ParseException("Couldn't find requested output layer '" + requestedOutputName + "' in graph");
1264         }
1265         targetLayers.push_back(nodeIt->second);
1266     }
1267
1268     // Sort them into a linear ordering such that all inputs of a node are before the node itself
1269     std::vector<const caffe::LayerParameter*> sortedNodes;
1270     if (!armnnUtils::GraphTopologicalSort<const caffe::LayerParameter*>(
1271         targetLayers,
1272         [this](const caffe::LayerParameter* node)
1273         {
1274             return GetInputs(*node);
1275         },
1276         sortedNodes))
1277     {
1278         throw ParseException("Cycle detected in graph");
1279     }
1280
1281     // Parse each node in order, knowing that all inputs of a node will be processed before the node itself
1282     for (const caffe::LayerParameter* current : sortedNodes)
1283     {
1284         auto it = ms_CaffeLayerNameToParsingFunctions.find(current->type());
1285         if (it == ms_CaffeLayerNameToParsingFunctions.end())
1286         {
1287             throw ParseException("Unsupported layer type '" + current->type() + "'");
1288         }
1289         auto func = it->second;
1290         (this->*func)(*current);
1291     }
1292
1293     // Add ArmNN output layers connected to each requested output
1294     for (const std::string& requestedOutput : m_RequestedOutputs)
1295     {
1296         armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(requestedOutput);
1297
1298         const armnn::LayerBindingId outputId = boost::numeric_cast<armnn::LayerBindingId>(
1299             m_NetworkOutputsBindingInfo.size());
1300         armnn::IConnectableLayer* const outputLayer = m_Network->AddOutputLayer(outputId, requestedOutput.c_str());
1301         outputSlot.Connect(outputLayer->GetInputSlot(0));
1302
1303         TrackOutputBinding(outputLayer, outputId, outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo());
1304     }
1305 }
1306
1307 INetworkPtr CaffeParser::CreateNetworkFromTextFile(const char* graphFile,
1308     const std::map<std::string, armnn::TensorShape>& inputShapes,
1309     const std::vector<std::string>& requestedOutputs)
1310 {
1311     FILE* fd = fopen(graphFile, "r");
1312
1313     if (fd == nullptr)
1314     {
1315         std::stringstream error;
1316         error << "Graph file " << graphFile << " failed to open";
1317         throw FileNotFoundException(error.str());
1318     }
1319
1320     // Parse the file into a message
1321     NetParameter netParam;
1322     auto         input   = new google::protobuf::io::FileInputStream(fileno(fd));
1323     bool         success = google::protobuf::TextFormat::Parse(input, &netParam);
1324     delete input;
1325     fclose(fd);
1326
1327     if (!success)
1328     {
1329         std::stringstream error;
1330         error << "Failed to parse graph file";
1331         throw ParseException(error.str());
1332     }
1333
1334     return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs);
1335 }
1336
1337 INetworkPtr CaffeParser::CreateNetworkFromString(const char* protoText,
1338     const std::map<std::string, armnn::TensorShape>& inputShapes,
1339     const std::vector<std::string>& requestedOutputs)
1340 {
1341     // Parse the string into a message
1342     NetParameter netParam;
1343     bool         success = google::protobuf::TextFormat::ParseFromString(protoText, &netParam);
1344
1345     if (!success)
1346     {
1347         std::stringstream error;
1348         error << "Failed to parse graph string";
1349         throw ParseException(error.str());
1350     }
1351
1352     return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs);
1353 }
1354
1355 INetworkPtr CaffeParser::CreateNetworkFromBinaryFile(const char* graphFile,
1356     const std::map<std::string, armnn::TensorShape>& inputShapes,
1357     const std::vector<std::string>& requestedOutputs)
1358 {
1359     FILE* fd = fopen(graphFile, "rb");
1360
1361     if (fd == nullptr)
1362     {
1363         std::stringstream error;
1364         error << "Graph file " << graphFile << " failed to open";
1365         throw FileNotFoundException(error.str());
1366     }
1367
1368     // Parse the file into a message
1369     NetParameter netParam;
1370
1371     FileInputStream  inStream(fileno(fd));
1372     CodedInputStream codedStream(&inStream);
1373     codedStream.SetTotalBytesLimit(INT_MAX, INT_MAX);
1374     bool success = netParam.ParseFromCodedStream(&codedStream);
1375     fclose(fd);
1376
1377     if (!success)
1378     {
1379         std::stringstream error;
1380         error << "Failed to parse protobuf file" << graphFile;
1381         throw ParseException(error.str());
1382     }
1383
1384     return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs);
1385 }
1386
1387 INetworkPtr CaffeParser::CreateNetworkFromNetParameter(NetParameter& netParam,
1388     const std::map<std::string, armnn::TensorShape>& inputShapes,
1389     const std::vector<std::string>& requestedOutputs)
1390 {
1391     m_NetworkInputsBindingInfo.clear();
1392     m_NetworkOutputsBindingInfo.clear();
1393
1394     m_Network = INetwork::Create();
1395
1396     m_InputShapes = inputShapes;
1397     if (requestedOutputs.size() == 0)
1398     {
1399         throw ParseException("requestedOutputs must have at least one entry");
1400     }
1401     m_RequestedOutputs = requestedOutputs;
1402
1403     try
1404     {
1405         LoadNetParam(netParam);
1406     }
1407     catch (const ParseException& e)
1408     {
1409         Cleanup();
1410         throw e;
1411     }
1412
1413     Cleanup();
1414
1415     return move(m_Network);
1416 }
1417
1418 void CaffeParser::Cleanup()
1419 {
1420     // cleanup, in case we reuse this parser
1421     m_CaffeLayersByTopName.clear();
1422     m_InputShapes.clear();
1423     m_RequestedOutputs.clear();
1424     m_ArmnnOutputSlotForCaffeTop.clear();
1425 }
1426
1427 }
1428
1429