src/armnnTfParser/TfParser.cpp

   1 //
   2 // Copyright © 2017 Arm Ltd. All rights reserved.
   3 // See LICENSE file in the project root for full license information.
   4 //
   5 #include "TfParser.hpp"
   6
   7 #include <armnn/INetwork.hpp>
   8 #include <armnn/Utils.hpp>
   9 #include <armnn/TypesUtils.hpp>
  10 #include <armnn/Exceptions.hpp>
  11 #include <armnn/Descriptors.hpp>
  12
  13 #include <GraphTopologicalSort.hpp>
  14 #include <Permute.hpp>
  15 #include <VerificationHelpers.hpp>
  16
  17 #include <google/protobuf/io/zero_copy_stream_impl.h>
  18 #include <google/protobuf/text_format.h>
  19
  20 #include "tensorflow/core/framework/graph.pb.h"
  21 #include "tensorflow/core/framework/node_def.pb.h"
  22 #include "tensorflow/core/framework/types.pb.h"
  23 #include "tensorflow/core/framework/tensor.pb.h"
  24 #include "tensorflow/core/framework/tensor_shape.pb.h"
  25
  26 #include <boost/assert.hpp>
  27 #include <boost/format.hpp>
  28 #include <boost/core/ignore_unused.hpp>
  29 #include <boost/log/trivial.hpp>
  30 #include <boost/numeric/conversion/cast.hpp>
  31 #include <boost/polymorphic_cast.hpp>
  32
  33 #include <memory>
  34 #include <sstream>
  35 #include <numeric>
  36 #include <functional>
  37
  38 using namespace armnn;
  39
  40 namespace armnnTfParser
  41 {
  42 namespace
  43 {
  44
  45 const PermutationVector NHWCToArmNN = { 0, 2, 3, 1 };
  46 const PermutationVector ArmNNToNHWC = { 0, 3, 1, 2 };
  47
  48 IConnectableLayer* AddSwizzleLayer(INetwork& network, IOutputSlot& input, const PermutationVector& mapping,
  49     const std::string& name)
  50 {
  51     // Adds swizzle layer.
  52     IConnectableLayer* const layer = network.AddPermuteLayer(mapping, name.c_str());
  53
  54     // Connects intput to swizzle layer.
  55     input.Connect(layer->GetInputSlot(0));
  56
  57     // Sets up swizzled output.
  58     const TensorInfo outInfo = armnnUtils::Permuted(input.GetTensorInfo(), mapping);
  59     layer->GetOutputSlot(0).SetTensorInfo(outInfo);
  60
  61     return layer;
  62 }
  63
  64 IConnectableLayer* SwizzleInDeswizzleOut(INetwork& network, IOutputSlot& input, IConnectableLayer& layer,
  65     const std::string& name)
  66 {
  67     // Adds swizzle layer.
  68     IConnectableLayer* const swizzleLayer = AddSwizzleLayer(network, input, NHWCToArmNN, "swizzle_for-" + name);
  69
  70     // Connects swizzledInput to layer.
  71     swizzleLayer->GetOutputSlot(0).Connect(layer.GetInputSlot(0));
  72
  73     // Adds deswizzle layer.
  74     IConnectableLayer* const deswizzleLayer = AddSwizzleLayer(network, layer.GetOutputSlot(0), ArmNNToNHWC,
  75         "deswizzle_for-" + name);
  76
  77     return deswizzleLayer;
  78 }
  79
  80 template <typename Callable>
  81 void ReadMandatoryNodeAttributeImpl(const tensorflow::NodeDef& nodeDef,
  82     const std::string& attribName,
  83     tensorflow::AttrValue::ValueCase expectedValueCase,
  84     Callable callable)
  85 {
  86     auto iter = nodeDef.attr().find(attribName);
  87     if (iter != nodeDef.attr().end())
  88     {
  89         const auto& attrValue = iter->second;
  90         if (attrValue.value_case() == expectedValueCase)
  91         {
  92             callable(attrValue);
  93         }
  94         else
  95         {
  96             throw ParseException(
  97                 boost::str(
  98                     boost::format(
  99                         "Attribute %1% of node %2% expected to have %3% as tensorflow::AttrValue::ValueCase, "
 100                         "but found %4% instead %5%")
 101                         % attribName
 102                         % nodeDef.name()
 103                         % static_cast<int>(expectedValueCase)
 104                         % static_cast<int>(attrValue.value_case())
 105                         % CHECK_LOCATION().AsString()));
 106         }
 107     }
 108     else
 109     {
 110         throw ParseException(
 111             boost::str(
 112                 boost::format(
 113                     "Could not find required attribute %1% in node %2% %3%")
 114                     % attribName
 115                     % nodeDef.name()
 116                     % CHECK_LOCATION().AsString()));
 117     }
 118 }
 119
 120 template <typename Callable>
 121 void ReadOptionalNodeAttributeImpl(const tensorflow::NodeDef& nodeDef,
 122     const std::string& attribName,
 123     tensorflow::AttrValue::ValueCase expectedValueCase,
 124     Callable callable)
 125 {
 126     auto iter = nodeDef.attr().find(attribName);
 127     if (iter != nodeDef.attr().end())
 128     {
 129         const auto& attrValue = iter->second;
 130         if (attrValue.value_case() == expectedValueCase)
 131         {
 132             callable(attrValue);
 133         }
 134         else
 135         {
 136             throw ParseException(
 137                 boost::str(
 138                     boost::format(
 139                         "Attribute %1% of node %2% expected to have %3% as tensorflow::AttrValue::ValueCase, "
 140                         "but found %4% instead %5%")
 141                         % attribName
 142                         % nodeDef.name()
 143                         % static_cast<int>(expectedValueCase)
 144                         % static_cast<int>(attrValue.value_case())
 145                         % CHECK_LOCATION().AsString()));
 146         }
 147     }
 148 }
 149
 150 float ReadMandatoryNodeFloatAttribute(const tensorflow::NodeDef& nodeDef, const std::string& name)
 151 {
 152     float attribValue = 0.0f;
 153     ReadMandatoryNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kF,
 154         [&attribValue](const tensorflow::AttrValue& attrValue)
 155     {
 156         attribValue = attrValue.f();
 157     });
 158     return attribValue;
 159 }
 160
 161 uint32_t ReadMandatoryNodeUint32Attribute(const tensorflow::NodeDef& nodeDef, const std::string& name)
 162 {
 163     uint32_t attribValue = 0u;
 164     ReadMandatoryNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kI,
 165         [&attribValue](const tensorflow::AttrValue& attrValue)
 166     {
 167         attribValue = static_cast<uint32_t>(attrValue.i());
 168     });
 169     return attribValue;
 170 }
 171
 172 std::string ReadMandatoryNodeStringAttribute(const tensorflow::NodeDef& nodeDef, const std::string& name)
 173 {
 174     std::string attribValue = "";
 175     ReadMandatoryNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kS,
 176         [&attribValue](const tensorflow::AttrValue& attrValue)
 177     {
 178         attribValue = attrValue.s();
 179     });
 180     return attribValue;
 181 }
 182
 183 std::vector<uint32_t> ReadMandatoryNodeUint32ListAttribute(const tensorflow::NodeDef& nodeDef,
 184     const std::string& name)
 185 {
 186     std::vector<uint32_t> attriList;
 187     ReadMandatoryNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kList,
 188         [&attriList](const tensorflow::AttrValue& attrValue)
 189     {
 190         for (int attriNum = 0; attriNum < attrValue.list().i_size(); ++attriNum)
 191         {
 192             attriList.push_back(static_cast<uint32_t>(attrValue.list().i(attriNum)));
 193         }
 194     });
 195
 196     return attriList;
 197 }
 198
 199 std::vector<uint32_t> ReadOptionalNodeUint32ListAttribute(const tensorflow::NodeDef& nodeDef,
 200     const std::string& name)
 201 {
 202     std::vector<uint32_t> attriList;
 203     ReadOptionalNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kList,
 204         [&attriList](const tensorflow::AttrValue& attrValue)
 205     {
 206         for (int attriNum = 0; attriNum < attrValue.list().i_size(); ++attriNum)
 207         {
 208             attriList.push_back(static_cast<uint32_t>(attrValue.list().i(attriNum)));
 209         }
 210     });
 211
 212     return attriList;
 213 }
 214
 215 bool ReadOptionalNodeBoolAttribute(const tensorflow::NodeDef& nodeDef,
 216     const std::string& name,
 217     bool defaultValue = false)
 218 {
 219     bool attribValue = defaultValue;
 220     ReadOptionalNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kB,
 221         [&attribValue](const tensorflow::AttrValue& attrValue)
 222     {
 223         attribValue = attrValue.b();
 224     });
 225     return attribValue;
 226 }
 227
 228 tensorflow::DataType ReadMandatoryNodeTypeAttribute(const tensorflow::NodeDef& nodeDef, const std::string& name)
 229 {
 230     tensorflow::DataType attribValue = tensorflow::DT_INVALID;
 231     ReadMandatoryNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kType,
 232         [&attribValue](const tensorflow::AttrValue& attrValue)
 233     {
 234         attribValue = attrValue.type();
 235     });
 236     return attribValue;
 237 }
 238
 239 TensorInfo PrepareReshape(const TensorInfo& input, const std::vector<int32_t>& targetDims)
 240 {
 241     std::vector<unsigned int> outDims(targetDims.begin(), targetDims.end());
 242     const auto stretchDim = std::find(targetDims.begin(), targetDims.end(), -1);
 243
 244     if (stretchDim != targetDims.end())
 245     {
 246         if (std::find(std::next(stretchDim), targetDims.end(), -1) != targetDims.end())
 247         {
 248             throw ParseException(
 249                 boost::str(
 250                     boost::format(
 251                         "At most one component of shape can be -1 %1%")
 252                         % CHECK_LOCATION().AsString()));
 253         }
 254
 255         auto targetNumElements =
 256             boost::numeric_cast<unsigned int>(
 257                 std::accumulate(targetDims.begin(), targetDims.end(), -1, std::multiplies<int32_t>()));
 258         auto stretchIndex = static_cast<size_t>(std::distance(targetDims.begin(), stretchDim));
 259         outDims[stretchIndex] = input.GetNumElements() / targetNumElements;
 260     }
 261
 262     TensorInfo reshapeInfo = input;
 263     reshapeInfo.SetShape(TensorShape{ static_cast<unsigned int>(outDims.size()), outDims.data() });
 264
 265     return reshapeInfo;
 266 }
 267
 268 // We need the input0Slot to guide the reshape for input1Slot.
 269 IOutputSlot* BroadcastForAddandMul(IOutputSlot* input0Slot, IOutputSlot* input1Slot, bool isNHWC, INetwork& m_Network,
 270                                    const tensorflow::NodeDef& nodeDef)
 271 {
 272     const TensorInfo& input1Info = input1Slot->GetTensorInfo();
 273     const TensorInfo inputTensorInfo = input0Slot->GetTensorInfo();
 274     const unsigned int matchDim = inputTensorInfo.GetNumDimensions() - (isNHWC ? 1 : 3);
 275     std::array<unsigned int, MaxNumOfTensorDimensions> reshapedDimensions;
 276     std::fill_n(reshapedDimensions.begin(), inputTensorInfo.GetNumDimensions(), 1);
 277     reshapedDimensions[matchDim] = input1Info.GetShape()[0];
 278
 279     armnn::TensorInfo reshapedInfo = input1Info;
 280     reshapedInfo.SetShape(TensorShape{ inputTensorInfo.GetNumDimensions(), reshapedDimensions.data() });
 281
 282     const std::string reshapeLayerName = "reshape_for-" + nodeDef.name();
 283     ReshapeDescriptor reshapeDesc;
 284     reshapeDesc.m_TargetShape = reshapedInfo.GetShape();
 285     IConnectableLayer* const reshapeLayer = m_Network.AddReshapeLayer(reshapeDesc, reshapeLayerName.c_str());
 286
 287     input1Slot->Connect(reshapeLayer->GetInputSlot(0));
 288     reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapedInfo);
 289
 290     input1Slot = &reshapeLayer->GetOutputSlot(0);
 291
 292     return input1Slot;
 293 }
 294
 295 OutputId ParseOutputId(const std::string & name)
 296 {
 297     unsigned int outputNum = 0;
 298     size_t colonPos = name.find_last_of(":");
 299     if (colonPos != std::string::npos)
 300     {
 301         int n = std::stoi(name.substr(colonPos+1));
 302         if (n<0 || n>100)
 303         {
 304             throw ParseException(
 305                 boost::str(
 306                     boost::format(
 307                         "Output tensor id is out of range for %1% %2%")
 308                         % name
 309                         % CHECK_LOCATION().AsString()));
 310         }
 311         outputNum = static_cast<unsigned int>(n);
 312     }
 313     return OutputId(name.substr(0,colonPos),outputNum);
 314 }
 315
 316 #define CHECK_DATA_FORMAT(NODE_DEF, FORMAT, NODE_TYPE) \
 317     if( FORMAT != "NHWC" && FORMAT != "NCHW" ) \
 318     { \
 319         throw ParseException( \
 320             boost::str( \
 321                 boost::format( \
 322                     "Unsupported data format %1% passed for %2% node %3%. " \
 323                     "Only NHWC and NCHW supported %4%") \
 324                     % FORMAT \
 325                     % NODE_TYPE \
 326                     % NODE_DEF.name() \
 327                     % CHECK_LOCATION().AsString())); \
 328     }
 329
 330 #define CHECK_PADDING_TYPE(NODE_DEF, PADDING) \
 331     if(PADDING != "SAME" && PADDING != "VALID" ) \
 332     { \
 333         throw ParseException( \
 334             boost::str( \
 335                 boost::format( \
 336                     "Only 'SAME' and 'VALID' padding supported. Got %1% for %2% %3%") \
 337                     % PADDING \
 338                     % NODE_DEF.name() \
 339                     % CHECK_LOCATION().AsString())); \
 340     } \
 341
 342 } // namespace
 343
 344 const std::map<std::string, TfParser::OperationParsingFunction> TfParser::ms_OperationNameToParsingFunctions = {
 345     { "Const",                 &TfParser::ParseConst },
 346     { "Add",                   &TfParser::ParseAdd },
 347     { "BiasAdd",               &TfParser::ParseBiasAdd },
 348     { "Identity",              &TfParser::ParseIdentity },
 349     { "Conv2D",                &TfParser::ParseConv2D },
 350     { "DepthwiseConv2dNative", &TfParser::ParseDepthwiseConv2D },
 351     { "FusedBatchNorm",        &TfParser::ParseFusedBatchNorm },
 352     { "ConcatV2",              &TfParser::ParseConcat },
 353     { "LRN",                   &TfParser::ParseLrn },
 354     { "MatMul",                &TfParser::ParseMatMul },
 355     { "Mul",                   &TfParser::ParseMul },
 356     { "Placeholder",           &TfParser::ParsePlaceholder },
 357     { "Relu",                  &TfParser::ParseRelu },
 358     { "Relu6",                 &TfParser::ParseRelu6 },
 359     { "Reshape",               &TfParser::ParseReshape },
 360     { "ResizeBilinear",        &TfParser::ParseResizeBilinear },
 361     { "Shape",                 &TfParser::ParseShape },
 362     { "Squeeze",               &TfParser::ParseSqueeze },
 363     { "Sigmoid",               &TfParser::ParseSigmoid },
 364     { "Softmax",               &TfParser::ParseSoftmax },
 365     { "Softplus",              &TfParser::ParseSoftplus },
 366     { "Tanh",                  &TfParser::ParseTanh },
 367     { "MaxPool",               &TfParser::ParseMaxPool },
 368     { "AvgPool",               &TfParser::ParseAvgPool },
 369     { "Maximum",               &TfParser::ParseMaximum },
 370 };
 371
 372 ITfParser* ITfParser::CreateRaw()
 373 {
 374     return new TfParser();
 375 }
 376
 377 ITfParserPtr ITfParser::Create()
 378 {
 379     return ITfParserPtr(CreateRaw(), &ITfParser::Destroy);
 380 }
 381
 382 void ITfParser::Destroy(ITfParser* parser)
 383 {
 384     delete parser;
 385 }
 386
 387 inline void CalculateSamePadding(uint32_t inputSize, uint32_t stride,
 388                                  uint32_t filterSize, bool samePadding,
 389                                  uint32_t* paddingFront, uint32_t* paddingBack) {
 390     *paddingFront = 0;
 391     *paddingBack = 0;
 392
 393     if (samePadding) {
 394         uint32_t outputSize = (inputSize + stride - 1) / stride;
 395         uint32_t temp = (outputSize - 1) * stride + filterSize;
 396         if (temp > inputSize) {
 397             *paddingFront = (temp - inputSize) / 2;
 398             *paddingBack = (temp - inputSize) - *paddingFront;
 399         }
 400     }
 401 }
 402
 403 void CalcPadding(uint32_t input, uint32_t kernel, uint32_t stride, uint32_t& outPadHead, uint32_t& outPadTail,
 404                  bool samePadding)
 405 {
 406     CalculateSamePadding(input, stride, kernel, samePadding, &outPadHead, &outPadTail);
 407 }
 408
 409 /// An Abstract base class which represents a single tensorflow operation (node)
 410 /// that has been (potentially partially) converted to Armnn.
 411 /// It may not yet have been fully converted into actual Armnn layers.
 412 class ParsedTfOperation
 413 {
 414 public:
 415     ParsedTfOperation(TfParser* parser, const tensorflow::NodeDef& node)
 416     : m_Parser(parser)
 417     , m_Node(node)
 418     {
 419     }
 420
 421     virtual ~ParsedTfOperation() {};
 422
 423     const tensorflow::NodeDef& GetNode() const { return m_Node; }
 424
 425     /// Gets the ArmNN IOutputSlot corresponding to the given output index of the Tensorflow operation.
 426     /// This may result in the creation of Armnn layers if this was deferred (e.g. see ParsedConstTfOperation).
 427     virtual IOutputSlot& ResolveArmnnOutputSlot(unsigned int tfOutputIndex) = 0;
 428
 429     /// If this operation is an Identity then this will follow return the 'parent' operation (recursively).
 430     virtual ParsedTfOperation* ResolveIdentityOperations()
 431     {
 432         return this;
 433     }
 434
 435 protected:
 436     TfParser* m_Parser;
 437     const tensorflow::NodeDef& m_Node;
 438 };
 439
 440 /// An ParsedTfOperation where the Armnn equivalent is a single layer,
 441 /// with output slots that correspond directly to the Tf node outputs.
 442 class SingleLayerParsedTfOperation : public ParsedTfOperation
 443 {
 444 public:
 445     SingleLayerParsedTfOperation(TfParser* parser, const tensorflow::NodeDef& node, IConnectableLayer* layer)
 446     : ParsedTfOperation(parser, node)
 447     , m_Layer(layer)
 448     {
 449     }
 450
 451     IOutputSlot& ResolveArmnnOutputSlot(unsigned int tfOutputIndex) override
 452     {
 453         BOOST_ASSERT(m_Layer);
 454         // Assumes one-to-one mapping between Tf and armnn output slots.
 455         unsigned int armnnOutputSlotIdx = tfOutputIndex;
 456         if (armnnOutputSlotIdx >= m_Layer->GetNumOutputSlots())
 457         {
 458             throw ParseException(
 459                 boost::str(
 460                     boost::format(
 461                         "The requested output slot #%1% "
 462                         "for %2% does not exist %3%")
 463                         % armnnOutputSlotIdx
 464                         % m_Layer->GetName()
 465                         % CHECK_LOCATION().AsString()));
 466         }
 467         return m_Layer->GetOutputSlot(armnnOutputSlotIdx);
 468     }
 469
 470 protected:
 471     IConnectableLayer* m_Layer;
 472 };
 473
 474 /// A SingleLayerParsedTfOperation for deferred layer creation.
 475 class DeferredSingleLayerParsedTfOperation : public SingleLayerParsedTfOperation
 476 {
 477 public:
 478     DeferredSingleLayerParsedTfOperation(TfParser* parser, const tensorflow::NodeDef& node)
 479     : SingleLayerParsedTfOperation(parser, node, nullptr)
 480     {
 481     }
 482
 483     IOutputSlot& ResolveArmnnOutputSlot(unsigned int tfOutputIndex) override
 484     {
 485         if (!m_Layer)
 486         {
 487             CreateLayerDeferred();
 488         }
 489         return SingleLayerParsedTfOperation::ResolveArmnnOutputSlot(tfOutputIndex);
 490     }
 491
 492 private:
 493     virtual void CreateLayerDeferred() = 0;
 494 };
 495
 496
 497 TfParser::TfParser()
 498     : m_Network(nullptr, nullptr)
 499 {
 500 }
 501
 502
 503 const tensorflow::NodeDef* TfParser::ResolveIdentityNode(const tensorflow::NodeDef* nodeDef)
 504 {
 505     if (nodeDef->op() != "Identity")
 506     {
 507         return nodeDef;
 508     }
 509
 510     if (nodeDef->input_size() != 1)
 511     {
 512         throw ParseException(
 513             boost::str(
 514                 boost::format(
 515                     "Identity node should have a single input! %1% has %2% inputs %3%")
 516                     % nodeDef->name()
 517                     % nodeDef->input_size()
 518                     % CHECK_LOCATION().AsString()));
 519     }
 520
 521     auto it = m_NodesByName.find(nodeDef->input(0));
 522     if (it != m_NodesByName.end())
 523     {
 524         const tensorflow::NodeDef* inputNode = it->second;
 525         return ResolveIdentityNode(inputNode);
 526     }
 527     else
 528     {
 529         throw ParseException(
 530             boost::str(
 531                 boost::format(
 532                     "Cannot find what the Identity node %1% is linked to! %2%")
 533                     % nodeDef->name()
 534                     % CHECK_LOCATION().AsString()));
 535     }
 536 }
 537
 538 std::vector<OutputOfConstNodeDef>
 539 TfParser::GetTfInputNodes(const tensorflow::NodeDef& nodeDef) const
 540 {
 541     std::vector<OutputOfConstNodeDef> ret;
 542
 543     if (nodeDef.op() == "Const")
 544     {
 545         // For some reason const node can have "Control Inputs". We ignore them for now.
 546         return ret;
 547     }
 548
 549     ret.reserve(boost::numeric_cast<size_t>(nodeDef.input_size()));
 550     for (int j = 0; j < nodeDef.input_size(); ++j)
 551     {
 552         OutputId outputId = ParseOutputId(nodeDef.input(j));
 553
 554         if (nodeDef.input(j)[0] == '^') // I couldn't find a better test for control inputs.
 555         {
 556             throw ParseException(
 557                 boost::str(
 558                     boost::format(
 559                         "Node '%1%' has Control Input '%2%' for input #%3% which is unsupported. %4%")
 560                         % nodeDef.name()
 561                         % nodeDef.input(j)
 562                         % j
 563                         % CHECK_LOCATION().AsString()));
 564         }
 565
 566         auto inputIt = m_NodesByName.find(outputId.m_IndexedValue);
 567         if (inputIt == m_NodesByName.end())
 568         {
 569             throw ParseException(
 570                 boost::str(
 571                     boost::format(
 572                         "Can't find node '%1%', which is listed as an input of '%2%' %3%")
 573                         % nodeDef.input(j)
 574                         % nodeDef.name()
 575                         % CHECK_LOCATION().AsString()));
 576         }
 577         ret.push_back(OutputOfConstNodeDef(inputIt->second,outputId.m_Index));
 578     }
 579
 580     return ret;
 581 }
 582
 583 std::vector<OutputOfParsedTfOperation>
 584 TfParser::GetInputParsedTfOperationsChecked(const tensorflow::NodeDef& nodeDef,
 585                                             std::size_t expectedNumInputs)
 586 {
 587     // Fetches the tensorflow nodes connected as inputs and validate the size.
 588     std::vector<OutputOfConstNodeDef> nodes = GetTfInputNodes(nodeDef);
 589     const std::size_t numInputs = nodes.size();
 590     if (numInputs != expectedNumInputs)
 591     {
 592         throw ParseException(
 593             boost::str(
 594                 boost::format(
 595                     "Unexpected number of inputs for node %1%. Expected %2%, found %3% %4%")
 596                     % nodeDef.name()
 597                     % expectedNumInputs
 598                     % numInputs
 599                     % CHECK_LOCATION().AsString()));
 600     }
 601     // Fetches the corresponding ParsedTfOperation operations
 602     std::vector<OutputOfParsedTfOperation> result;
 603     for (auto&& node : nodes)
 604     {
 605         auto it = m_ParsedTfOperations.find(node.m_IndexedValue->name());
 606         if (it == m_ParsedTfOperations.end())
 607         {
 608             throw ParseException(
 609                 boost::str(
 610                     boost::format(
 611                         "Node with name '%1%' has not been parsed %2%")
 612                         % node.m_IndexedValue->name()
 613                         % CHECK_LOCATION().AsString()));
 614         }
 615         ParsedTfOperation* parsedOp = it->second.get();
 616         // Transparently 'skip' any Identity operations. This simplifies the logic inside the ParseXXX() functions.
 617         parsedOp = parsedOp->ResolveIdentityOperations();
 618         result.push_back(OutputOfParsedTfOperation(parsedOp,node.m_Index));
 619     }
 620     return result;
 621 }
 622
 623 ParsedTfOperationPtr TfParser::ParseAdd(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
 624 {
 625     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
 626
 627     // If one of the inputs is a MatMul and the other is a const, then we handle both nodes
 628     // together as FullyConnected.
 629     if (inputs[0].m_IndexedValue->GetNode().op() == "MatMul" &&
 630         HasParsedConstTensor<float>(inputs[1].m_IndexedValue->GetNode().name()))
 631     {
 632         IConnectableLayer* layer =
 633             AddFullyConnectedLayer(inputs[0].m_IndexedValue->GetNode(),
 634                                    &nodeDef,nodeDef.name().c_str());
 635         return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
 636     }
 637     else if (HasParsedConstTensor<float>(inputs[0].m_IndexedValue->GetNode().name()) &&
 638                                          inputs[1].m_IndexedValue->GetNode().op() == "MatMul")
 639     {
 640         IConnectableLayer* layer =
 641             AddFullyConnectedLayer(inputs[1].m_IndexedValue->GetNode(),
 642                                    &nodeDef,nodeDef.name().c_str());
 643         return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
 644     }
 645     else
 646     {
 647         // Otherwise it's just a regular addition.
 648         return AddAdditionLayer(nodeDef);
 649     }
 650 }
 651
 652 ParsedTfOperationPtr TfParser::ParseBiasAdd(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
 653 {
 654     return AddAdditionLayer(nodeDef, true);
 655 }
 656
 657 /// An ParsedTfOperation which forwards to another (used for Identity nodes).
 658 class ParsedIdentityTfOperation : public ParsedTfOperation
 659 {
 660 public:
 661     ParsedIdentityTfOperation(TfParser* parser, const tensorflow::NodeDef& node, ParsedTfOperation* representative)
 662         : ParsedTfOperation(parser, node)
 663         , m_Representative(representative)
 664     {
 665     }
 666
 667     virtual IOutputSlot& ResolveArmnnOutputSlot(unsigned int tfOutputIndex) override
 668     {
 669         BOOST_ASSERT(m_Representative);
 670         return m_Representative->ResolveArmnnOutputSlot(tfOutputIndex);
 671     }
 672
 673     virtual ParsedTfOperation* ResolveIdentityOperations() override
 674     {
 675         return m_Representative->ResolveIdentityOperations();
 676     }
 677
 678 private:
 679     ParsedTfOperation* m_Representative;
 680 };
 681
 682 ParsedTfOperationPtr TfParser::ParseIdentity(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
 683 {
 684     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
 685     // Any requests for the output slots of this node should be forwarded to the node connected as input.
 686     return std::make_unique<ParsedIdentityTfOperation>(this, nodeDef, inputs[0].m_IndexedValue);
 687 }
 688
 689 /// An ParsedTfOperation for a Const node.
 690 /// Creation of the armnn ConstLayer is deferred until it is actually needed, because Const nodes are mostly used
 691 /// for weight inputs to MatMul/Conv2D nodes and in these cases armnn doesn't need a ConstLayer.
 692 template <typename T>
 693 class ParsedConstTfOperation : public DeferredSingleLayerParsedTfOperation
 694 {
 695 public:
 696     ParsedConstTfOperation(TfParser* parser, const tensorflow::NodeDef& node,
 697         const T* tensorData, const TensorInfo& tensorInfo)
 698         : DeferredSingleLayerParsedTfOperation(parser, node),
 699         m_Storage(tensorData, tensorData + tensorInfo.GetNumElements()),
 700         m_TensorInfo(tensorInfo)
 701     {
 702         BOOST_ASSERT(tensorInfo.GetDataType() == GetDataType<T>());
 703     }
 704
 705     void CreateLayerDeferred() override
 706     {
 707         BOOST_ASSERT(m_Layer == nullptr);
 708         m_Layer = m_Parser->m_Network->AddConstantLayer(ConstTensor(m_TensorInfo, m_Storage), m_Node.name().c_str());
 709         m_Layer->GetOutputSlot(0).SetTensorInfo(m_TensorInfo);
 710     }
 711
 712     ConstTensor GetConstTensor(bool swizzleForConvolutionWeights, std::vector<T>& outputTensorData) const
 713     {
 714         // Mappings from TensorFlow filter tensors to the ArmNN filter tensors.
 715         // Tensorflow weights are [H, W, In, Out].
 716         // ArmNN weights are [Out, In, H, W].
 717         static const PermutationVector HWIOToOIHW = {2, 3, 1, 0};
 718
 719         const TensorInfo outInfo = swizzleForConvolutionWeights
 720                                    ? armnnUtils::Permuted(m_TensorInfo, HWIOToOIHW)
 721                                    : m_TensorInfo;
 722
 723         outputTensorData.resize(m_TensorInfo.GetNumElements());
 724
 725         // Copies or swizzles from the permanent storage into the storage the caller provided.
 726         if (swizzleForConvolutionWeights)
 727         {
 728             armnnUtils::Permute(outInfo.GetShape(), HWIOToOIHW, m_Storage.data(), outputTensorData.data());
 729         }
 730         else
 731         {
 732             memcpy(outputTensorData.data(), m_Storage.data(), m_TensorInfo.GetNumBytes());
 733         }
 734         // Updates the result to point to the user provided storage.
 735         ConstTensor constTensor(outInfo, outputTensorData);
 736         return constTensor;
 737     }
 738
 739 private:
 740     ///< Manages the lifetime of the tensor data.
 741     std::vector<T> m_Storage;
 742     ///< Describes the layout of the tensor and points to the data in m_Storage.
 743     TensorInfo m_TensorInfo;
 744 };
 745
 746 DataType ConvertTfTensorDataType(const tensorflow::DataType tfDataType,
 747                                  const tensorflow::NodeDef& nodeDef)
 748 {
 749     switch (tfDataType)
 750     {
 751     case tensorflow::DT_FLOAT:
 752         return DataType::Float32;
 753         break;
 754     case tensorflow::DT_INT32:
 755         return DataType::Signed32;
 756         break;
 757     default:
 758         throw ParseException(
 759             boost::str(
 760                 boost::format(
 761                     "Unknown DataType %1% for node %2% %3%")
 762                     % tensorflow::DataType_Name(tfDataType)
 763                     % nodeDef.name()
 764                     % CHECK_LOCATION().AsString()));
 765     }
 766 }
 767
 768 struct ParseTfTensorValueList
 769 {
 770     template<typename DataType>
 771     static void Parse(
 772         const tensorflow::TensorProto& tfTensor,
 773         unsigned int dstElements,
 774         std::vector<int8_t>& outputData);
 775
 776     template <typename DataType>
 777     static void ReadData(const void* srcData, unsigned int numSrcElements,
 778         std::vector<int8_t>& dstData, unsigned int numDstElements)
 779     {
 780         // If there are no entries in the list, perform no action.
 781         if (numSrcElements == 0)
 782         {
 783             return;
 784         }
 785
 786         // If no size was provided, use the length of the value list.
 787         if (numDstElements == 0)
 788         {
 789             numDstElements = numSrcElements;
 790         }
 791
 792         // Allocates memory.
 793         dstData.resize(std::max(numSrcElements, numDstElements) * sizeof(DataType));
 794
 795         const DataType* srcTensor = reinterpret_cast<const DataType*>(srcData);
 796         DataType* dstTensor = reinterpret_cast<DataType*>(dstData.data());
 797
 798         // Copies the value list entries into the destination.
 799         std::copy(srcTensor, srcTensor + numSrcElements, dstTensor);
 800
 801         if (numDstElements > numSrcElements)
 802         {
 803             // Uses the last element in the list to fill the remaining entries.
 804             std::fill(dstTensor + numSrcElements, dstTensor + numDstElements, srcTensor[numSrcElements - 1]);
 805         }
 806     }
 807
 808 };
 809
 810 template <>
 811 void ParseTfTensorValueList::Parse<float>(const tensorflow::TensorProto& tfTensor,
 812     unsigned int dstElements, std::vector<int8_t>& outputData)
 813 {
 814     ReadData<float>(tfTensor.float_val().data(), static_cast<unsigned int>(tfTensor.float_val_size()),
 815         outputData, dstElements);
 816 }
 817
 818 template <>
 819 void ParseTfTensorValueList::Parse<int32_t>(const tensorflow::TensorProto& tfTensor,
 820     unsigned int dstElements, std::vector<int8_t>& outputData)
 821 {
 822     ReadData<int32_t>(tfTensor.int_val().data(), static_cast<unsigned int>(tfTensor.int_val_size()),
 823         outputData, dstElements);
 824 }
 825
 826 template <template<typename> class OperatorType, typename T = int8_t>
 827 struct MakeTfOperation
 828 {
 829     template<typename DataType, class... Args>
 830     inline static std::unique_ptr<OperatorType<DataType>> Parse(TfParser* parser, const tensorflow::NodeDef& node,
 831         Args&&... args)
 832     {
 833         return std::make_unique<OperatorType<DataType>>(parser, node, std::forward<Args>(args)...);
 834     }
 835 };
 836
 837 template <>
 838 struct MakeTfOperation<ParsedConstTfOperation>
 839 {
 840     template<typename DataType, class... Args>
 841     inline static std::unique_ptr<ParsedConstTfOperation<DataType>> Parse(TfParser* parser,
 842         const tensorflow::NodeDef& node, const std::vector<int8_t>& tensorData, const TensorInfo& tensorInfo)
 843     {
 844         return std::make_unique<ParsedConstTfOperation<DataType>>(parser, node,
 845             reinterpret_cast<const DataType*>(tensorData.data()), tensorInfo);
 846     }
 847 };
 848
 849 template <class FuncType>
 850 struct InvokeParseFunction
 851 {
 852     template<class ResType, class... Args>
 853     inline static ResType Result(DataType dataType, Args&&... args)
 854     {
 855         if (dataType == DataType::Float32)
 856         {
 857             return FuncType::template Parse<float>(std::forward<Args>(args)...);
 858         }
 859         else if (dataType == DataType::Signed32)
 860         {
 861             return FuncType::template Parse<int32_t>(std::forward<Args>(args)...);
 862         }
 863
 864         return ResType();
 865     }
 866
 867     template<class... Args>
 868     inline static void Result(DataType dataType, Args&&... args)
 869     {
 870         if (dataType == DataType::Float32)
 871         {
 872             FuncType::template Parse<float>(std::forward<Args>(args)...);
 873         }
 874         else if (dataType == DataType::Signed32)
 875         {
 876             FuncType::template Parse<int32_t>(std::forward<Args>(args)...);
 877         }
 878     }
 879 };
 880
 881 ParsedTfOperationPtr TfParser::ParseConst(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
 882 {
 883     BOOST_ASSERT(nodeDef.op() == "Const");
 884
 885     if (nodeDef.attr().count("value") == 0)
 886     {
 887         throw ParseException(
 888             boost::str(
 889                 boost::format(
 890                     "Value not found for Const node - %1% %2%")
 891                     % nodeDef.name()
 892                     % CHECK_LOCATION().AsString()));
 893     }
 894
 895     const tensorflow::TensorProto& tfTensor = nodeDef.attr().at("value").tensor();
 896     const tensorflow::TensorShapeProto& tfTensorShape = tfTensor.tensor_shape();
 897     const tensorflow::DataType tfDataType = ReadMandatoryNodeTypeAttribute(nodeDef, "dtype");
 898
 899     const auto GetDimensionSize = [](auto& d) { return d.size(); };
 900
 901     std::vector<unsigned int> dimensionSizes;
 902     std::transform(tfTensorShape.dim().begin(), tfTensorShape.dim().end(),
 903         std::back_inserter(dimensionSizes), GetDimensionSize);
 904
 905     // Calculates number of elements.
 906     const DataType dataType = ConvertTfTensorDataType(tfDataType, nodeDef);
 907     unsigned int numElements = 0U;
 908
 909     if (!dimensionSizes.empty())
 910     {
 911         numElements = std::accumulate(dimensionSizes.begin(), dimensionSizes.end(),
 912                                       1U, std::multiplies<unsigned int>());
 913     }
 914
 915     std::vector<int8_t> tensorData;
 916
 917     // Get tensor data from the list of values attribute.
 918     if (tfTensor.tensor_content().empty())
 919     {
 920         InvokeParseFunction<ParseTfTensorValueList>::Result<void>(dataType, tfTensor, numElements, tensorData);
 921
 922         // If the tensor shape is not defined, but there is a value list, then interpret the data as a 1D
 923         // tensor of the provided number of elements.
 924         if (numElements == 0)
 925         {
 926             const unsigned int tfNumElements =
 927                 static_cast<unsigned int>(tensorData.size()) / GetDataTypeSize(dataType);
 928             dimensionSizes.push_back(tfNumElements);
 929         }
 930     }
 931     // Gets tensor data from tensor content attribute.
 932     else
 933     {
 934         tensorData.assign(tfTensor.tensor_content().begin(), tfTensor.tensor_content().end());
 935
 936         // Checks if a tensor shape is defined for the tensor content.
 937         if (numElements == 0)
 938         {
 939             throw ParseException(
 940                 boost::str(
 941                     boost::format(
 942                         "No tensor shape found for Const node - %1% %2%")
 943                         % nodeDef.name()
 944                         % CHECK_LOCATION().AsString()));
 945         }
 946     }
 947
 948     // Const node requires at least a list of values or a content attribute.
 949     if (tensorData.empty())
 950     {
 951         throw ParseException(
 952             boost::str(
 953                 boost::format(
 954                     "No tensor data found for Const node - %1% %2%")
 955                     % nodeDef.name()
 956                     % CHECK_LOCATION().AsString()));
 957     }
 958
 959     const TensorInfo tensorInfo(static_cast<unsigned int>(dimensionSizes.size()),
 960                                 dimensionSizes.data(),
 961                                 dataType);
 962
 963     // If we have a list of values, then the length of the list must be
 964     // less than or equal to the number of elements implied by the shape argument.
 965     if (tensorData.size() > tensorInfo.GetNumBytes())
 966     {
 967         throw ParseException(
 968             boost::str(
 969                 boost::format(
 970                     "Number of elements (%1%) should be less than or equal "
 971                     "to the number of elements implied by the shape argument (%2%) for Const node - %3% %4%")
 972                     % (tensorData.size() / GetDataTypeSize(dataType))
 973                     % tensorInfo.GetNumElements()
 974                     % nodeDef.name()
 975                     % CHECK_LOCATION().AsString()));
 976     }
 977
 978     return InvokeParseFunction<MakeTfOperation<ParsedConstTfOperation>>::Result<ParsedTfOperationPtr>(
 979         dataType, this, nodeDef, tensorData, tensorInfo);
 980 }
 981
 982 template<typename Type>
 983 bool TfParser::HasParsedConstTensor(const std::string & nodeName) const
 984 {
 985     auto it = m_ParsedTfOperations.find(nodeName);
 986     if (it == m_ParsedTfOperations.end() ||
 987         dynamic_cast<ParsedConstTfOperation<Type>*>(it->second.get()) == nullptr)
 988     {
 989         return false;
 990     }
 991     else
 992     {
 993         return true;
 994     }
 995 }
 996
 997 ParsedTfOperationPtr TfParser::ParseConv2D(const tensorflow::NodeDef& nodeDef,
 998     const tensorflow::GraphDef& graphDef)
 999 {
1000     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
1001     IOutputSlot& inputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1002     TensorInfo inputTensorInfo = inputSlot.GetTensorInfo();
1003
1004     if (!HasParsedConstTensor<float>(inputs[1].m_IndexedValue->GetNode().name()))
1005     {
1006         throw ParseException(
1007             boost::str(
1008                 boost::format(
1009                     "ArmNN only supports Convolution layers with constant weights for %1%, input %2% %3%")
1010                     % nodeDef.name()
1011                     % inputs[1].m_IndexedValue->GetNode().name()
1012                     % CHECK_LOCATION().AsString()));
1013     }
1014     ParsedConstTfOperation<float>* weightNode =
1015         boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[1].m_IndexedValue);
1016
1017     std::string paddingString = ReadMandatoryNodeStringAttribute(nodeDef, "padding");
1018     std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
1019     std::vector<uint32_t> strides = ReadMandatoryNodeUint32ListAttribute(nodeDef, "strides");
1020
1021     // Read the dilations, if present - only [1,1,1,1] (the default) is supported.
1022     std::vector<uint32_t> dilations = ReadOptionalNodeUint32ListAttribute(nodeDef, "dilations");
1023     if (!dilations.empty())
1024     {
1025         for (auto dilation : dilations)
1026         {
1027             if (dilation != 1u)
1028             {
1029                 throw ParseException(
1030                     boost::str(
1031                         boost::format(
1032                             "ArmNN only supports Convolution layers with dilations [1,1,1,1] for %1% %2%")
1033                             % nodeDef.name()
1034                             % CHECK_LOCATION().AsString()));
1035             }
1036         }
1037     }
1038
1039     Convolution2dDescriptor desc;
1040     desc.m_BiasEnabled = false;
1041
1042     CHECK_DATA_FORMAT(nodeDef, dataFormat, "Conv2D");
1043
1044     if (dataFormat == "NHWC")
1045     {
1046         desc.m_StrideX = strides[2];
1047         desc.m_StrideY = strides[1];
1048         // Swizzles input to supported memory layout.
1049         inputTensorInfo = armnnUtils::Permuted(inputSlot.GetTensorInfo(), NHWCToArmNN);
1050     }
1051     else if (dataFormat == "NCHW")
1052     {
1053         desc.m_StrideX = strides[3];
1054         desc.m_StrideY = strides[2];
1055     }
1056
1057     uint32_t inputHeight = inputTensorInfo.GetShape()[2];
1058     uint32_t inputWidth = inputTensorInfo.GetShape()[3];
1059
1060     std::vector<float> outputTensorData;
1061
1062     ConstTensor weightTensor = weightNode->GetConstTensor(true, outputTensorData);
1063
1064     uint32_t weightHeight = weightTensor.GetShape()[2];
1065     uint32_t weightWidth = weightTensor.GetShape()[3];
1066
1067     bool padding = false;
1068     TensorInfo outputInfo;
1069
1070     CHECK_PADDING_TYPE(nodeDef, paddingString);
1071
1072     if (paddingString == "SAME")
1073     {
1074         padding = true;
1075         outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
1076                                   weightTensor.GetShape()[0],
1077                                   static_cast<uint32_t>(ceil(
1078                                       static_cast<float>(inputHeight) /
1079                                       static_cast<float>(desc.m_StrideY))),
1080                                   static_cast<uint32_t>(ceil(
1081                                       static_cast<float>(inputWidth) /
1082                                       static_cast<float>(desc.m_StrideX)))
1083                                 }, DataType::Float32);
1084     }
1085     else if (paddingString == "VALID")
1086     {
1087         padding = false;
1088         outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
1089                                   weightTensor.GetShape()[0],
1090                                   static_cast<uint32_t>(ceil(
1091                                       static_cast<float>(inputHeight - weightHeight + 1) /
1092                                       static_cast<float>(desc.m_StrideY))),
1093                                   static_cast<uint32_t>(ceil(
1094                                       static_cast<float>(inputWidth - weightWidth + 1) /
1095                                       static_cast<float>(desc.m_StrideX)))
1096                                 }, DataType::Float32);
1097     }
1098
1099     CalcPadding(inputHeight, weightHeight, desc.m_StrideY, desc.m_PadTop, desc.m_PadBottom, padding);
1100     CalcPadding(inputWidth, weightWidth, desc.m_StrideX, desc.m_PadLeft, desc.m_PadRight, padding);
1101
1102     IConnectableLayer* layer = m_Network->AddConvolution2dLayer(desc, weightTensor, nodeDef.name().c_str());
1103     layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
1104
1105     if (dataFormat == "NHWC")
1106     {
1107         layer = SwizzleInDeswizzleOut(*m_Network, inputSlot, *layer, nodeDef.name());
1108     }
1109     else
1110     {
1111         inputSlot.Connect(layer->GetInputSlot(0));
1112     }
1113
1114     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1115 }
1116
1117 ParsedTfOperationPtr TfParser::ParseDepthwiseConv2D(const tensorflow::NodeDef& nodeDef,
1118                                                     const tensorflow::GraphDef& graphDef)
1119 {
1120     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
1121     IOutputSlot& inputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1122     TensorInfo inputTensorInfo = inputSlot.GetTensorInfo();
1123
1124     if (!HasParsedConstTensor<float>(inputs[1].m_IndexedValue->GetNode().name()))
1125     {
1126         throw ParseException(
1127             boost::str(
1128                 boost::format(
1129                     "ArmNN only supports Depthwise Convolution layer with constant weights. "
1130                     "Non const input found %1% for node %2% %3%")
1131                     % inputs[1].m_IndexedValue->GetNode().name()
1132                     % nodeDef.name()
1133                     % CHECK_LOCATION().AsString()));
1134     }
1135     ParsedConstTfOperation<float>* weightNode =
1136         boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[1].m_IndexedValue);
1137
1138
1139     std::string paddingString = ReadMandatoryNodeStringAttribute(nodeDef, "padding");
1140     std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
1141     std::vector<uint32_t> strides = ReadMandatoryNodeUint32ListAttribute(nodeDef, "strides");
1142
1143     DepthwiseConvolution2dDescriptor desc;
1144     desc.m_BiasEnabled = false;
1145
1146     CHECK_DATA_FORMAT(nodeDef, dataFormat, "DepthwiseConv2dNative");
1147
1148     if (dataFormat == "NHWC")
1149     {
1150         desc.m_StrideX = strides[2];
1151         desc.m_StrideY = strides[1];
1152         // Swizzles input to supported memory layout.
1153         inputTensorInfo = armnnUtils::Permuted(inputSlot.GetTensorInfo(), NHWCToArmNN);
1154     }
1155     else if (dataFormat == "NCHW")
1156     {
1157         desc.m_StrideX = strides[3];
1158         desc.m_StrideY = strides[2];
1159     }
1160
1161     uint32_t inputHeight = inputTensorInfo.GetShape()[2];
1162     uint32_t inputWidth = inputTensorInfo.GetShape()[3];
1163
1164     std::vector<float> outputTensorData;
1165
1166     ConstTensor weightTensor = weightNode->GetConstTensor(true, outputTensorData);
1167
1168     uint32_t weightHeight = weightTensor.GetShape()[2];
1169     uint32_t weightWidth = weightTensor.GetShape()[3];
1170
1171     bool padding = false;
1172     TensorInfo outputInfo;
1173
1174     CHECK_PADDING_TYPE(nodeDef, paddingString);
1175
1176     if (paddingString == "SAME")
1177     {
1178         padding = true;
1179         outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
1180                                 weightTensor.GetShape()[0] * weightTensor.GetShape()[1],
1181                                 static_cast<uint32_t>(ceil(
1182                                     static_cast<float>(inputHeight) /
1183                                     static_cast<float>(desc.m_StrideY))),
1184                                 static_cast<uint32_t>(ceil(
1185                                     static_cast<float>(inputWidth) /
1186                                     static_cast<float>(desc.m_StrideX)))
1187                                 }, DataType::Float32);
1188     }
1189     else if (paddingString == "VALID")
1190     {
1191         padding = false;
1192         outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
1193                                 weightTensor.GetShape()[0] * weightTensor.GetShape()[1],
1194                                 static_cast<uint32_t>(ceil(
1195                                     static_cast<float>(inputHeight - weightHeight + 1) /
1196                                     static_cast<float>(desc.m_StrideY))),
1197                                 static_cast<uint32_t>(ceil(
1198                                     static_cast<float>(inputWidth - weightWidth + 1) /
1199                                     static_cast<float>(desc.m_StrideX)))
1200                                 }, DataType::Float32);
1201     }
1202
1203     CalcPadding(inputHeight, weightHeight, desc.m_StrideY, desc.m_PadTop, desc.m_PadBottom, padding);
1204     CalcPadding(inputWidth, weightWidth, desc.m_StrideX, desc.m_PadLeft, desc.m_PadRight, padding);
1205
1206     IConnectableLayer* layer = m_Network->AddDepthwiseConvolution2dLayer(desc, weightTensor, nodeDef.name().c_str());
1207     layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
1208
1209     if (dataFormat == "NHWC")
1210     {
1211         layer = SwizzleInDeswizzleOut(*m_Network, inputSlot, *layer, nodeDef.name());
1212     }
1213     else
1214     {
1215         inputSlot.Connect(layer->GetInputSlot(0));
1216     }
1217
1218     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1219 }
1220
1221 ParsedTfOperationPtr TfParser::ParseFusedBatchNorm(const tensorflow::NodeDef& nodeDef,
1222                                                    const tensorflow::GraphDef& graphDef)
1223 {
1224     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 5);
1225
1226     if (!HasParsedConstTensor<float>(inputs[1].m_IndexedValue->GetNode().name()))
1227     {
1228         throw ParseException(
1229             boost::str(
1230                 boost::format(
1231                     "ArmNN only supports FusedBatchNormalization layers with constant scale. "
1232                     "Input %1%. Node %2% %3%")
1233                     % inputs[1].m_IndexedValue->GetNode().name()
1234                     % nodeDef.name()
1235                     % CHECK_LOCATION().AsString()));
1236     }
1237     ParsedConstTfOperation<float>* scaleNode =
1238         boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[1].m_IndexedValue);
1239
1240     if (!HasParsedConstTensor<float>(inputs[2].m_IndexedValue->GetNode().name()))
1241     {
1242         throw ParseException(
1243             boost::str(
1244                 boost::format(
1245                     "ArmNN only supports FusedBatchNormalization layers with constant offset. "
1246                     "Input %1%. Node %2% %3%")
1247                     % inputs[2].m_IndexedValue->GetNode().name()
1248                     % nodeDef.name()
1249                     % CHECK_LOCATION().AsString()));
1250     }
1251     ParsedConstTfOperation<float>* offsetNode =
1252         boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[2].m_IndexedValue);
1253
1254     if (!HasParsedConstTensor<float>(inputs[3].m_IndexedValue->GetNode().name()))
1255     {
1256         throw ParseException(
1257             boost::str(
1258                 boost::format(
1259                     "ArmNN only supports FusedBatchNormalization layers with constant mean. "
1260                     "Input %1%. Node %2% %3%")
1261                     % inputs[3].m_IndexedValue->GetNode().name()
1262                     % nodeDef.name()
1263                     % CHECK_LOCATION().AsString()));
1264     }
1265     ParsedConstTfOperation<float>* meanNode =
1266         boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[3].m_IndexedValue);
1267
1268     if (!HasParsedConstTensor<float>(inputs[4].m_IndexedValue->GetNode().name()))
1269     {
1270         throw ParseException(
1271             boost::str(
1272                 boost::format(
1273                     "ArmNN only supports FusedBatchNormalization layers with constant variance. "
1274                     "Input %1%. Node %2% %3%")
1275                     % inputs[4].m_IndexedValue->GetNode().name()
1276                     % nodeDef.name()
1277                     % CHECK_LOCATION().AsString()));
1278     }
1279     ParsedConstTfOperation<float>* varianceNode =
1280         boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[4].m_IndexedValue);
1281
1282     // The descriptor only has the epsilon attribute.
1283     BatchNormalizationDescriptor desc;
1284     desc.m_Eps = ReadMandatoryNodeFloatAttribute(nodeDef, "epsilon");
1285
1286     // Data for the parsed tensor args (scale, offset, mean, variance) must be stored
1287     // locally until the layer is added.
1288     std::vector<float> scaleTensorData;
1289     ConstTensor scaleTensor = scaleNode->GetConstTensor(false, scaleTensorData);
1290
1291     std::vector<float> offsetTensorData;
1292     ConstTensor offsetTensor = offsetNode->GetConstTensor(false, offsetTensorData);
1293
1294     std::vector<float> meanTensorData;
1295     ConstTensor meanTensor = meanNode->GetConstTensor(false, meanTensorData);
1296
1297     std::vector<float> varianceTensorData;
1298     ConstTensor varianceTensor = varianceNode->GetConstTensor(false, varianceTensorData);
1299
1300     IConnectableLayer* layer = m_Network->AddBatchNormalizationLayer(desc,
1301                                                                      meanTensor,
1302                                                                      varianceTensor,
1303                                                                      offsetTensor,
1304                                                                      scaleTensor,
1305                                                                      nodeDef.name().c_str());
1306
1307     IOutputSlot& inputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1308
1309     const std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
1310
1311     if (dataFormat == "NHWC")
1312     {
1313         const TensorInfo outputTensorInfo = armnnUtils::Permuted(inputSlot.GetTensorInfo(), NHWCToArmNN);
1314         layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
1315         layer = SwizzleInDeswizzleOut(*m_Network, inputSlot, *layer, nodeDef.name());
1316     }
1317     else
1318     {
1319         layer->GetOutputSlot(0).SetTensorInfo(inputSlot.GetTensorInfo());
1320         inputSlot.Connect(layer->GetInputSlot(0));
1321     }
1322
1323     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1324 }
1325
1326 bool TfParser::IsSupportedLeakyReluPattern(const tensorflow::NodeDef& mulNodeDef,
1327                                            size_t alphaLayerIndex,
1328                                            const OutputOfParsedTfOperation& otherOp,
1329                                            armnn::IOutputSlot** outputOfLeakyRelu,
1330                                            armnn::ActivationDescriptor & desc)
1331 {
1332     const tensorflow::NodeDef& otherNodeDef = otherOp.m_IndexedValue->GetNode();
1333
1334     // Verifying all these assumptions hold:
1335     //
1336     // 1, the mulNodeDef is an elementwise multiplication node "Mul"
1337     // 2, the alphaLayerIndex selects a constant node from the inputs of the "Mul" node
1338     // 3, the inputLayerIndex selects a layer which has the same name as otherNodeDef
1339     //
1340
1341     if (mulNodeDef.op() == "Mul")
1342     {
1343         size_t otherLayerIndex = (alphaLayerIndex == 0 ? 1 : 0);
1344         std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(mulNodeDef, 2);
1345
1346         BOOST_ASSERT(inputs.size() == 2);
1347         BOOST_ASSERT((otherLayerIndex == 0 || alphaLayerIndex == 0));
1348         BOOST_ASSERT((otherLayerIndex == 1 || alphaLayerIndex == 1));
1349         BOOST_ASSERT(((otherLayerIndex + alphaLayerIndex) == 1));
1350
1351         if (inputs[otherLayerIndex].m_IndexedValue->GetNode().name() == otherNodeDef.name())
1352         {
1353             if (HasParsedConstTensor<float>(inputs[alphaLayerIndex].m_IndexedValue->GetNode().name()))
1354             {
1355                 ParsedConstTfOperation<float>* alpha =
1356                     boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(
1357                         inputs[alphaLayerIndex].m_IndexedValue);
1358
1359                 std::vector<float> const_data;
1360                 ConstTensor const_tensor = alpha->GetConstTensor(false, const_data);
1361
1362                 if (const_data.size() == 1)
1363                 {
1364                     desc.m_Function = ActivationFunction::LeakyReLu;
1365                     desc.m_A = const_data[0];
1366
1367                     *outputOfLeakyRelu = &(otherOp.m_IndexedValue->ResolveArmnnOutputSlot(otherOp.m_Index));
1368                     return true;
1369                 }
1370             }
1371         }
1372     }
1373     return false;
1374 }
1375
1376 // For max nodes, we only support those as part of a leaky relu, i.e.,
1377 // as part for a max(mul(a, x), x) expression. We thus need to
1378 // identify one input as a multiplication with a scalar constant,
1379 // extract the constant and the two inputs, verify that the two other
1380 // inputs are the same node, and then create a leaky relu node.
1381
1382 ParsedTfOperationPtr TfParser::ParseMaximum(const tensorflow::NodeDef& nodeDef,
1383                                             const tensorflow::GraphDef& graphDef)
1384 {
1385     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
1386     auto inputNode0 = inputs[0].m_IndexedValue->GetNode();
1387     auto inputNode1 = inputs[1].m_IndexedValue->GetNode();
1388     IOutputSlot* outputOfLeakyRelu = nullptr;
1389
1390     ActivationDescriptor desc;
1391
1392     // There are four possible scenarios we need to support (respectively below):
1393     // 1, max(mul(a, x), x)
1394     // 2, max(mul(x, a), x)
1395     // 3, max(x, mul(a, x))
1396     // 4, max(x, mul(x, a))
1397
1398     if (IsSupportedLeakyReluPattern(inputNode0, 0, inputs[1], &outputOfLeakyRelu, desc) ||
1399         IsSupportedLeakyReluPattern(inputNode0, 1, inputs[1], &outputOfLeakyRelu, desc) ||
1400         IsSupportedLeakyReluPattern(inputNode1, 0, inputs[0], &outputOfLeakyRelu, desc) ||
1401         IsSupportedLeakyReluPattern(inputNode1, 1, inputs[0], &outputOfLeakyRelu, desc))
1402     {
1403         BOOST_ASSERT(outputOfLeakyRelu != nullptr);
1404
1405         IConnectableLayer* const layer = m_Network->AddActivationLayer(desc, nodeDef.name().c_str());
1406         outputOfLeakyRelu->Connect(layer->GetInputSlot(0));
1407         layer->GetOutputSlot(0).SetTensorInfo(outputOfLeakyRelu->GetTensorInfo());
1408         return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1409     }
1410     else
1411     {
1412         throw ParseException(
1413             boost::str(
1414                 boost::format(
1415                     "ArmNN currenly offers limited support for Maximum node when it can be fused to "
1416                     "form a LeakyRelu activation as leakyrelu=max(mul(alpha, X), X). "
1417                     "Node: %1% %2%")
1418                     % nodeDef.name()
1419                     % CHECK_LOCATION().AsString()));
1420     }
1421 }
1422
1423 ParsedTfOperationPtr TfParser::ParseConcat(const tensorflow::NodeDef& nodeDef,
1424                                            const tensorflow::GraphDef& graphDef)
1425 {
1426     std::vector<OutputOfConstNodeDef> nodes = GetTfInputNodes(nodeDef);
1427     // In tensorflow, we have the last input of the Concat layer as the axis for concatenation.
1428     unsigned int numInputs = static_cast<unsigned int>(nodes.size());
1429     unsigned int numConcatView = numInputs - 1;
1430
1431     OriginsDescriptor concatDescriptor(static_cast<uint32_t>(numConcatView), MaxNumOfTensorDimensions);
1432     std::vector<unsigned int>mergeDimSizes(MaxNumOfTensorDimensions, 0u);
1433
1434     unsigned int mergeDim = 0;
1435     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, numInputs);
1436
1437     // The last input is the axis for concatenation.
1438     if (!HasParsedConstTensor<int32_t>(inputs[numInputs - 1].m_IndexedValue->GetNode().name()))
1439     {
1440         throw ParseException(
1441             boost::str(
1442                 boost::format(
1443                     "ArmNN only supports Concat with constant axis. "
1444                     "Input %1%. Node %2% %3%")
1445                     % inputs[numInputs - 1].m_IndexedValue->GetNode().name()
1446                     % nodeDef.name()
1447                     % CHECK_LOCATION().AsString()));
1448     }
1449     ParsedConstTfOperation<int32_t>* shapeNode =
1450             boost::polymorphic_downcast<ParsedConstTfOperation<int32_t>*>(inputs[numInputs - 1].m_IndexedValue);
1451
1452     std::vector<int32_t> axisTensorData;
1453     ConstTensor axisTensor = shapeNode->GetConstTensor(false, axisTensorData);
1454
1455     // This concatDim indicates the data format: 3 is the NHWC, 1 is the NCHW.
1456     const unsigned int concatDimInput = static_cast<unsigned int>(axisTensorData[0]);
1457
1458     // Armnn supports concatenation along the channel dimension for data formats NHWC and NCHW.
1459     if (concatDimInput == 0 || concatDimInput == 2)
1460     {
1461         throw ParseException(
1462             boost::str(
1463                 boost::format(
1464                     "Dimension %1% for concatenation is not supported by Armnn. "
1465                     "Node %2% %3%")
1466                     % concatDimInput
1467                     % nodeDef.name()
1468                     % CHECK_LOCATION().AsString()));
1469     }
1470
1471     // This is the only concatDim we support in armnn.
1472     const unsigned int concatDim = 1;
1473     for (unsigned int viewIndex = 0; viewIndex < numConcatView; ++viewIndex)
1474     {
1475         // Need to double check whether it should be
1476         IOutputSlot& inputSlot =
1477             inputs[viewIndex].m_IndexedValue->ResolveArmnnOutputSlot(inputs[viewIndex].m_Index);
1478         TensorInfo inputTensorInfo = inputSlot.GetTensorInfo();
1479
1480         if (inputTensorInfo.GetNumDimensions() != MaxNumOfTensorDimensions)
1481         {
1482             throw ParseException(
1483                 boost::str(
1484                     boost::format(
1485                         "The number of dimensions: %1% for input tensors of the "
1486                         "concatenation op should be %2% for Node %3% %4%")
1487                         % inputTensorInfo.GetNumDimensions()
1488                         % MaxNumOfTensorDimensions
1489                         % nodeDef.name()
1490                         % CHECK_LOCATION().AsString()));
1491         }
1492
1493         if (concatDimInput == 3)
1494         {
1495             inputTensorInfo = armnnUtils::Permuted(inputTensorInfo, NHWCToArmNN);
1496         }
1497
1498         for (unsigned int dim = 0; dim < MaxNumOfTensorDimensions; ++dim)
1499         {
1500             mergeDimSizes[dim] = inputTensorInfo.GetShape()[dim];
1501         }
1502
1503         for (unsigned int j = 0; j < concatDim; ++j)
1504         {
1505             concatDescriptor.SetViewOriginCoord(viewIndex, j, 0);
1506         }
1507
1508         concatDescriptor.SetViewOriginCoord(viewIndex, concatDim, mergeDim);
1509         mergeDim += mergeDimSizes[concatDim];
1510
1511         for (unsigned int j = concatDim+1; j < MaxNumOfTensorDimensions; ++j)
1512         {
1513             concatDescriptor.SetViewOriginCoord(viewIndex, j, 0);
1514         }
1515     }
1516
1517     mergeDimSizes[concatDim] = mergeDim;
1518     armnn::IConnectableLayer *layer = m_Network->AddMergerLayer(concatDescriptor, nodeDef.name().c_str());
1519
1520     layer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo(MaxNumOfTensorDimensions, mergeDimSizes.data(),
1521                                                             DataType::Float32));
1522
1523     for (unsigned int v = 0; v < numConcatView; ++v)
1524     {
1525         IOutputSlot& inputSlot = inputs[v].m_IndexedValue->ResolveArmnnOutputSlot(inputs[v].m_Index);
1526         if (concatDimInput == 3)
1527         {
1528             IConnectableLayer* const swizzleLayer = AddSwizzleLayer(*m_Network, inputSlot, NHWCToArmNN,
1529                                                                     "swizzle_for-" + nodeDef.name());
1530             swizzleLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(v));
1531         }
1532         else
1533         {
1534             inputSlot.Connect(layer->GetInputSlot(v));
1535         }
1536     }
1537
1538     if (concatDimInput == 3)
1539     {
1540         IConnectableLayer* const deswizzleLayer = AddSwizzleLayer(*m_Network, layer->GetOutputSlot(0), ArmNNToNHWC,
1541                                                                   "deswizzle_for-" + nodeDef.name());
1542         layer = deswizzleLayer;
1543     }
1544
1545     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1546 }
1547
1548 ParsedTfOperationPtr TfParser::ParseShape(const tensorflow::NodeDef& nodeDef,
1549     const tensorflow::GraphDef& graphDef)
1550 {
1551     // Note: the Shape layer is handled in a special way, because:
1552     //        1. ARMNN doesn't support int32 tensors which it outputs.
1553     //        2. ARMNN works with statically shaped tensors which are known at parse time.
1554     //        3. because of 1. and 2. we treat the output of Shape as a temporary const int32
1555     //           tensor which may be used as an input to other ops, most likely a Reshape.
1556
1557     const tensorflow::DataType tfDataType = ReadMandatoryNodeTypeAttribute(nodeDef, "out_type");
1558     if (tfDataType != tensorflow::DT_INT32)
1559     {
1560         throw ParseException(
1561             boost::str(
1562                 boost::format(
1563                     "Armnn only supports DT_INT32 as out_type. Got %1% for Node %2% %3%")
1564                     % tensorflow::DataType_Name(tfDataType)
1565                     % nodeDef.name()
1566                     % CHECK_LOCATION().AsString()));
1567     }
1568
1569     const std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
1570     IOutputSlot& prevLayerOutputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1571     const TensorInfo& prevLayerTensorInfo = prevLayerOutputSlot.GetTensorInfo();
1572     unsigned int prevLayerDimensions = prevLayerTensorInfo.GetNumDimensions();
1573
1574     std::vector<int32_t> shapeTensorData;
1575     shapeTensorData.reserve(prevLayerDimensions);
1576
1577     for (unsigned int i=0; i<prevLayerDimensions; ++i)
1578     {
1579         shapeTensorData.push_back(static_cast<int32_t>(prevLayerTensorInfo.GetShape()[i]));
1580     }
1581
1582     TensorInfo shapeTensorInfo(1, &prevLayerDimensions, DataType::Signed32);
1583
1584     return std::make_unique<ParsedConstTfOperation<int32_t>>(this,
1585                                                              nodeDef,
1586                                                              &shapeTensorData[0],
1587                                                              shapeTensorInfo);
1588 }
1589
1590 ParsedTfOperationPtr TfParser::ParseReshape(const tensorflow::NodeDef& nodeDef,
1591     const tensorflow::GraphDef& graphDef)
1592 {
1593     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
1594     ParsedTfOperation* inputNode = inputs[0].m_IndexedValue;
1595
1596     if (!HasParsedConstTensor<int32_t>(inputs[1].m_IndexedValue->GetNode().name()))
1597     {
1598         throw ParseException(
1599             boost::str(
1600                 boost::format(
1601                     "ArmNN only supports Reshape layers with constant shapes. "
1602                     "Input %1% Node %2% %3%")
1603                     % inputs[1].m_IndexedValue->GetNode().name()
1604                     % nodeDef.name()
1605                     % CHECK_LOCATION().AsString()));
1606     }
1607     ParsedConstTfOperation<int32_t>* shapeNode =
1608         boost::polymorphic_downcast<ParsedConstTfOperation<int32_t>*>(inputs[1].m_IndexedValue);
1609
1610     armnn::IOutputSlot& prevLayerOutputSlot = inputNode->ResolveArmnnOutputSlot(inputs[0].m_Index);
1611     TensorInfo inputTensorInfo = prevLayerOutputSlot.GetTensorInfo();
1612
1613     std::vector<int32_t> shapeTensorData;
1614     ConstTensor shapeTensor = shapeNode->GetConstTensor(false, shapeTensorData);
1615     const TensorInfo outputTensorInfo = PrepareReshape(inputTensorInfo, shapeTensorData);
1616
1617     TensorShape targetShape = outputTensorInfo.GetShape();
1618     ReshapeDescriptor reshapeDesc;
1619     reshapeDesc.m_TargetShape = targetShape;
1620
1621     IConnectableLayer* layer = m_Network->AddReshapeLayer(reshapeDesc, nodeDef.name().c_str());
1622     prevLayerOutputSlot.Connect(layer->GetInputSlot(0));
1623     layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
1624
1625     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1626 }
1627
1628 ParsedTfOperationPtr TfParser::ParseResizeBilinear(const tensorflow::NodeDef& nodeDef,
1629     const tensorflow::GraphDef& graphDef)
1630 {
1631     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
1632
1633     if (!HasParsedConstTensor<int32_t>(inputs[1].m_IndexedValue->GetNode().name()))
1634     {
1635         throw ParseException(
1636             boost::str(
1637                 boost::format(
1638                     "ArmNN only supports ResizeBilinear layers with constant sizes. "
1639                     "Input %1%. Node %2% %3%")
1640                     % inputs[1].m_IndexedValue->GetNode().name()
1641                     % nodeDef.name()
1642                     % CHECK_LOCATION().AsString()));
1643     }
1644     ParsedConstTfOperation<int32_t>* sizeNode =
1645         boost::polymorphic_downcast<ParsedConstTfOperation<int32_t>*>(inputs[1].m_IndexedValue);
1646
1647     // Checks the align_corners attribute is not set.
1648     if (ReadOptionalNodeBoolAttribute(nodeDef, "align_corners", false))
1649     {
1650         throw ParseException(
1651             boost::str(
1652                 boost::format(
1653                     "ArmNN only supports ResizeBilinear layers with align_corners set to false. "
1654                     "Node %1% %2%")
1655                     % nodeDef.name()
1656                     % CHECK_LOCATION().AsString()));
1657     }
1658
1659     // Data for the parsed tensor args (size) must be stored locally.
1660     std::vector<int32_t> sizeTensorData;
1661     ConstTensor sizeTensor = sizeNode->GetConstTensor(false, sizeTensorData);
1662
1663     // The descriptor only has target height and width attributes, which we get from the size tensor.
1664     ResizeBilinearDescriptor desc;
1665     desc.m_TargetHeight = static_cast<uint32_t> (sizeTensorData[0]);
1666     desc.m_TargetWidth = static_cast<uint32_t> (sizeTensorData[1]);
1667
1668     IConnectableLayer* layer = m_Network->AddResizeBilinearLayer(desc, nodeDef.name().c_str());
1669
1670     IOutputSlot& inputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1671     TensorInfo inputTensorInfo = inputSlot.GetTensorInfo();
1672     // The input shape is always in BHWC format, this will be swizzled below; for now,
1673     // get the batch and channels to make up the ArmNN output shape with the target size.
1674     unsigned int outBatch = inputTensorInfo.GetShape()[0];
1675     unsigned int outChannels = inputTensorInfo.GetShape()[3];
1676     unsigned int outHeight = desc.m_TargetHeight;
1677     unsigned int outWidth = desc.m_TargetWidth;
1678     TensorShape outShape({outBatch, outChannels, outHeight, outWidth});
1679     // The output DataType is always Float32, regardless of the input DataType.
1680     const TensorInfo outputTensorInfo(outShape, armnn::DataType::Float32);
1681     layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
1682
1683     // TensorFlow ResizeBilinear input is always in BHWC format, so add swizzle and deswizzle layers.
1684     layer = SwizzleInDeswizzleOut(*m_Network, inputSlot, *layer, nodeDef.name());
1685
1686     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1687 }
1688
1689 TensorInfo OutputShapeOfSqueeze(const tensorflow::NodeDef& nodeDef, TensorInfo inputTensorInfo)
1690 {
1691     BOOST_ASSERT(nodeDef.op() == "Squeeze");
1692     tensorflow::DataType tfDataType = ReadMandatoryNodeTypeAttribute(nodeDef, "T");
1693
1694     DataType type;
1695     if (tfDataType == tensorflow::DT_FLOAT)
1696     {
1697         type = DataType::Float32;
1698     }
1699     else if (tfDataType == tensorflow::DT_INT32)
1700     {
1701         type = DataType::Signed32;
1702     }
1703     else
1704     {
1705         throw ParseException(
1706             boost::str(
1707                 boost::format("Unsupported DataType %1% for Squeeze operation %2% %3%")
1708                 % tensorflow::DataType_Name(tfDataType)
1709                 % nodeDef.name()
1710                 % CHECK_LOCATION().AsString()));
1711     }
1712
1713
1714     if (inputTensorInfo.GetNumDimensions() > 4)
1715     {
1716         throw ParseException(
1717             boost::str(
1718                 boost::format(
1719                     "Unsupported number of dimensions: %1% for input shape for Squeeze %2% %3%")
1720                     % inputTensorInfo.GetNumDimensions()
1721                     % nodeDef.name()
1722                     % CHECK_LOCATION().AsString()));
1723     }
1724
1725     std::vector<uint32_t> squeezeDims = ReadOptionalNodeUint32ListAttribute(nodeDef, "squeeze_dims");
1726     static const uint32_t dimensionSequence[] = { 0, 1, 2, 3 };
1727
1728     if (squeezeDims.empty())
1729     {
1730         squeezeDims.assign(dimensionSequence,
1731                            dimensionSequence+inputTensorInfo.GetNumDimensions());
1732     }
1733
1734     std::vector<uint32_t> outputDims;
1735     for(unsigned int i = 0; i < inputTensorInfo.GetNumDimensions(); i++)
1736     {
1737         bool skipSqueeze = (std::find(squeezeDims.begin(), squeezeDims.end(), i) == squeezeDims.end());
1738         auto currentDimension = inputTensorInfo.GetShape()[i];
1739         if (skipSqueeze || currentDimension != 1)
1740         {
1741             outputDims.push_back(currentDimension);
1742         }
1743     }
1744
1745     if (outputDims.size() > 4)
1746     {
1747         throw ParseException(
1748             boost::str(
1749                 boost::format(
1750                     "Unsupported number of dimensions: %1% for output shape for Squeeze %2% %3%")
1751                     % outputDims.size()
1752                     % nodeDef.name()
1753                     % CHECK_LOCATION().AsString()));
1754     }
1755
1756     TensorShape outShape = TensorShape(static_cast<unsigned int>(outputDims.size()),
1757                                        outputDims.data());
1758
1759     TensorInfo outTensorInfo = inputTensorInfo;
1760     outTensorInfo.SetShape(outShape);
1761     outTensorInfo.SetDataType(type);
1762
1763     return outTensorInfo;
1764 }
1765
1766 ParsedTfOperationPtr TfParser::ParseSqueeze(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
1767 {
1768     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
1769
1770     IOutputSlot& prevLayerOutputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1771     TensorInfo inputTensorInfo = prevLayerOutputSlot.GetTensorInfo();
1772
1773     TensorInfo outputInfo;
1774     outputInfo = OutputShapeOfSqueeze(nodeDef, inputTensorInfo);
1775
1776     ReshapeDescriptor reshapeDesc;
1777     reshapeDesc.m_TargetShape = outputInfo.GetShape();
1778     IConnectableLayer* layer = m_Network->AddReshapeLayer(reshapeDesc, nodeDef.name().c_str());
1779     prevLayerOutputSlot.Connect(layer->GetInputSlot(0));
1780     layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
1781
1782     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1783 }
1784
1785 ParsedTfOperationPtr TfParser::ParseLrn(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
1786 {
1787     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
1788
1789     NormalizationDescriptor normalizationDescriptor;
1790     normalizationDescriptor.m_NormMethodType = NormalizationAlgorithmMethod::LocalBrightness;
1791     normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Across;
1792     normalizationDescriptor.m_Alpha = ReadMandatoryNodeFloatAttribute(nodeDef, "alpha");
1793     normalizationDescriptor.m_Beta = ReadMandatoryNodeFloatAttribute(nodeDef, "beta");
1794     normalizationDescriptor.m_K = ReadMandatoryNodeFloatAttribute(nodeDef, "bias");
1795     normalizationDescriptor.m_NormSize = ReadMandatoryNodeUint32Attribute(nodeDef, "depth_radius");
1796
1797     // The window size must be an odd value. For a window size of (2 * n + 1), TensorFlow defines depth_radius = n.
1798     normalizationDescriptor.m_NormSize = normalizationDescriptor.m_NormSize * 2 + 1;
1799
1800     IOutputSlot& prevLayerOutputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1801
1802     IConnectableLayer* layer = m_Network->AddNormalizationLayer(normalizationDescriptor,
1803         nodeDef.name().c_str());
1804
1805     const TensorInfo permutedInfo = armnnUtils::Permuted(prevLayerOutputSlot.GetTensorInfo(), NHWCToArmNN);
1806     layer->GetOutputSlot(0).SetTensorInfo(permutedInfo);
1807
1808     layer = SwizzleInDeswizzleOut(*m_Network, prevLayerOutputSlot, *layer, nodeDef.name());
1809
1810     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1811 }
1812
1813 /// An ParsedTfOperation for a MatMul node.
1814 /// Creation of the armnn FullyConnected layer is deferred until it is actually needed, because
1815 /// MatMul nodes are often used for the first part of a biased FullyConnected (MatMul followed
1816 /// by Add) and in these cases armnn doesn't need a separate layer for the MatMul.
1817 ///
1818 class ParsedMatMulTfOperation : public DeferredSingleLayerParsedTfOperation
1819 {
1820 public:
1821     ParsedMatMulTfOperation(TfParser* parser, const tensorflow::NodeDef& node)
1822         : DeferredSingleLayerParsedTfOperation(parser, node)
1823     {
1824     }
1825
1826     void CreateLayerDeferred() override
1827     {
1828         BOOST_ASSERT(m_Layer == nullptr);
1829         m_Layer = m_Parser->AddFullyConnectedLayer(m_Node, nullptr, m_Node.name().c_str());
1830     }
1831 };
1832
1833 ParsedTfOperationPtr TfParser::ParseMatMul(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
1834 {
1835     // Defers the creation of the layer (see ParsedMatMulTfOperation).
1836     return std::make_unique<ParsedMatMulTfOperation>(this, nodeDef);
1837 }
1838
1839 /// An ParsedTfOperation for a Mul node.
1840 /// Creation of the armnn Mul layer is deferred until it is actually needed, because Mul nodes
1841 /// are also used for the first part of a leaky relu activation function (Mul followed by Maximum)
1842 /// and in these cases armnn doesn't need a separate layer for the Mul.
1843 ///
1844 class ParsedMulTfOperation : public DeferredSingleLayerParsedTfOperation
1845 {
1846 public:
1847     ParsedMulTfOperation(TfParser* parser, const tensorflow::NodeDef& node)
1848         : DeferredSingleLayerParsedTfOperation(parser, node)
1849     {
1850     }
1851
1852     void CreateLayerDeferred() override
1853     {
1854         BOOST_ASSERT(m_Layer == nullptr);
1855         m_Layer = m_Parser->AddMultiplicationLayer(m_Node);
1856     }
1857 };
1858
1859 ParsedTfOperationPtr TfParser::ParseMul(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
1860 {
1861     boost::ignore_unused(graphDef);
1862
1863     return std::make_unique<ParsedMulTfOperation>(this, nodeDef);
1864 }
1865
1866 ParsedTfOperationPtr TfParser::ParsePlaceholder(const tensorflow::NodeDef& nodeDef,
1867     const tensorflow::GraphDef& graphDef)
1868 {
1869     boost::ignore_unused(graphDef);
1870
1871     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 0);
1872
1873     const LayerBindingId layerId = boost::numeric_cast<LayerBindingId>(m_NetworkInputsBindingInfo.size());
1874
1875     auto it = m_InputShapes.find(nodeDef.name());
1876     if (it == m_InputShapes.end())
1877     {
1878         throw ParseException(
1879             boost::str(
1880                 boost::format(
1881                     "Missing input shape for Placeholder '%1%' %2%")
1882                     % nodeDef.name()
1883                     % CHECK_LOCATION().AsString()));
1884     }
1885     TensorInfo tensorInfo(it->second, DataType::Float32);
1886
1887     IConnectableLayer* const layer = m_Network->AddInputLayer(layerId, nodeDef.name().c_str());
1888
1889     layer->GetOutputSlot(0).SetTensorInfo(tensorInfo);
1890
1891     TrackInputBinding(layer, layerId, tensorInfo);
1892
1893     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1894 }
1895
1896 ParsedTfOperationPtr TfParser::ParseRelu(const tensorflow::NodeDef& nodeDef,
1897     const tensorflow::GraphDef& graphDef)
1898 {
1899     boost::ignore_unused(graphDef);
1900
1901     ActivationDescriptor activationDesc;
1902     activationDesc.m_Function = ActivationFunction::ReLu;
1903     return AddActivationLayer(nodeDef, activationDesc);
1904 }
1905
1906 ParsedTfOperationPtr TfParser::ParseRelu6(const tensorflow::NodeDef& nodeDef,
1907     const tensorflow::GraphDef& graphDef)
1908 {
1909     boost::ignore_unused(graphDef);
1910
1911     ActivationDescriptor activationDesc;
1912     activationDesc.m_Function = ActivationFunction::BoundedReLu;
1913     activationDesc.m_A = 6.0f;
1914     activationDesc.m_B = 0.0f;
1915
1916     return AddActivationLayer(nodeDef, activationDesc);
1917 }
1918
1919 ParsedTfOperationPtr TfParser::ParseSigmoid(const tensorflow::NodeDef& nodeDef,
1920     const tensorflow::GraphDef& graphDef)
1921 {
1922     boost::ignore_unused(graphDef);
1923
1924     ActivationDescriptor activationDesc;
1925     activationDesc.m_Function = ActivationFunction::Sigmoid;
1926
1927     return AddActivationLayer(nodeDef, activationDesc);
1928 }
1929
1930 ParsedTfOperationPtr TfParser::ParseSoftmax(const tensorflow::NodeDef& nodeDef,
1931     const tensorflow::GraphDef& graphDef)
1932 {
1933     boost::ignore_unused(graphDef);
1934
1935     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
1936
1937     SoftmaxDescriptor softmaxDescriptor;
1938     IConnectableLayer* const layer = m_Network->AddSoftmaxLayer(softmaxDescriptor, nodeDef.name().c_str());
1939
1940     IOutputSlot& prevLayerSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1941     prevLayerSlot.Connect(layer->GetInputSlot(0));
1942     layer->GetOutputSlot(0).SetTensorInfo(prevLayerSlot.GetTensorInfo());
1943
1944     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1945 }
1946
1947 ParsedTfOperationPtr TfParser::ParseSoftplus(const tensorflow::NodeDef& nodeDef,
1948     const tensorflow::GraphDef& graphDef)
1949 {
1950     boost::ignore_unused(graphDef);
1951
1952     ActivationDescriptor activationDesc;
1953     activationDesc.m_Function = ActivationFunction::SoftReLu;
1954
1955     return AddActivationLayer(nodeDef, activationDesc);
1956 }
1957
1958 ParsedTfOperationPtr TfParser::ParseTanh(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
1959 {
1960     boost::ignore_unused(graphDef);
1961
1962     ActivationDescriptor activationDesc;
1963     activationDesc.m_Function = ActivationFunction::TanH;
1964     activationDesc.m_A = 1.0f;
1965     activationDesc.m_B = 1.0f;
1966
1967     return AddActivationLayer(nodeDef, activationDesc);
1968 }
1969
1970 ParsedTfOperationPtr TfParser::AddActivationLayer(const tensorflow::NodeDef& nodeDef,
1971     ActivationDescriptor& activationDesc)
1972 {
1973     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
1974
1975     IConnectableLayer* const layer = m_Network->AddActivationLayer(activationDesc, nodeDef.name().c_str());
1976
1977     IOutputSlot& prevLayerOutputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1978     prevLayerOutputSlot.Connect(layer->GetInputSlot(0));
1979     layer->GetOutputSlot(0).SetTensorInfo(prevLayerOutputSlot.GetTensorInfo());
1980     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1981 }
1982
1983 ParsedTfOperationPtr TfParser::ParseMaxPool(const tensorflow::NodeDef& nodeDef,
1984     const tensorflow::GraphDef& graphDef)
1985 {
1986     return ParsePooling2d(nodeDef, graphDef, PoolingAlgorithm::Max);
1987 }
1988
1989 ParsedTfOperationPtr TfParser::ParseAvgPool(const tensorflow::NodeDef& nodeDef,
1990     const tensorflow::GraphDef& graphDef)
1991 {
1992     return ParsePooling2d(nodeDef, graphDef, PoolingAlgorithm::Average);
1993 }
1994
1995 ParsedTfOperationPtr TfParser::ParsePooling2d(const tensorflow::NodeDef& nodeDef,
1996     const tensorflow::GraphDef& graphDef, PoolingAlgorithm pooltype)
1997 {
1998     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
1999     IOutputSlot& inputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
2000     TensorInfo inputTensorInfo = inputSlot.GetTensorInfo();
2001
2002     if (inputs.size() != 1)
2003     {
2004         throw ParseException(
2005             boost::str(
2006                 boost::format(
2007                     "2D Pooling expects one input!. Got %1% for Node %2% %3%")
2008                     % inputs.size()
2009                     % nodeDef.name()
2010                     % CHECK_LOCATION().AsString()));
2011     }
2012
2013     std::string paddingString = ReadMandatoryNodeStringAttribute(nodeDef, "padding");
2014     std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
2015     std::vector<uint32_t> strides = ReadMandatoryNodeUint32ListAttribute(nodeDef, "strides");
2016     std::vector<uint32_t> ksize = ReadMandatoryNodeUint32ListAttribute(nodeDef, "ksize"); // size of pool windows
2017
2018     Pooling2dDescriptor pooling2dDescriptor;
2019     pooling2dDescriptor.m_PoolType = pooltype;
2020     pooling2dDescriptor.m_PaddingMethod = PaddingMethod::Exclude;
2021     pooling2dDescriptor.m_OutputShapeRounding = OutputShapeRounding::Floor;
2022
2023     CHECK_DATA_FORMAT(nodeDef, dataFormat, "Pooling2D");
2024
2025     if (dataFormat == "NHWC")
2026     {
2027         pooling2dDescriptor.m_StrideX    = strides[2];
2028         pooling2dDescriptor.m_StrideY    = strides[1];
2029         pooling2dDescriptor.m_PoolWidth  = ksize[2];
2030         pooling2dDescriptor.m_PoolHeight = ksize[1];
2031         // Swizzles input to supported memory layout.
2032         inputTensorInfo = armnnUtils::Permuted(inputSlot.GetTensorInfo(), NHWCToArmNN);
2033     }
2034     else if (dataFormat == "NCHW")
2035     {
2036         pooling2dDescriptor.m_StrideX    = strides[3];
2037         pooling2dDescriptor.m_StrideY    = strides[2];
2038         pooling2dDescriptor.m_PoolWidth  = ksize[3];
2039         pooling2dDescriptor.m_PoolHeight = ksize[2];
2040     }
2041
2042     uint32_t inputHeight = inputTensorInfo.GetShape()[2];
2043     uint32_t inputWidth = inputTensorInfo.GetShape()[3];
2044
2045     bool padding = false;
2046     TensorInfo outputInfo;
2047
2048     CHECK_PADDING_TYPE(nodeDef, paddingString);
2049
2050     if (paddingString == "SAME")
2051     {
2052         padding = true;
2053         outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
2054                                   inputTensorInfo.GetShape()[1],
2055                                   static_cast<uint32_t>(ceil(
2056                                       static_cast<float>(inputHeight) /
2057                                       static_cast<float>(pooling2dDescriptor.m_StrideY))),
2058                                   static_cast<uint32_t>(ceil(
2059                                       static_cast<float>(inputWidth) /
2060                                       static_cast<float>(pooling2dDescriptor.m_StrideX)))
2061                                 }, DataType::Float32);
2062     }
2063     else if (paddingString == "VALID")
2064     {
2065         padding = false;
2066         outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
2067                                   inputTensorInfo.GetShape()[1],
2068                                   static_cast<uint32_t>(ceil(
2069                                       static_cast<float>(inputHeight - pooling2dDescriptor.m_PoolHeight + 1) /
2070                                       static_cast<float>(pooling2dDescriptor.m_StrideY))),
2071                                   static_cast<uint32_t>(ceil(
2072                                       static_cast<float>(inputWidth - pooling2dDescriptor.m_PoolWidth + 1) /
2073                                       static_cast<float>(pooling2dDescriptor.m_StrideX)))
2074                                 }, DataType::Float32);
2075     }
2076
2077     CalcPadding(inputWidth, pooling2dDescriptor.m_PoolWidth, pooling2dDescriptor.m_StrideX,
2078                     pooling2dDescriptor.m_PadLeft, pooling2dDescriptor.m_PadRight, padding);
2079     CalcPadding(inputHeight, pooling2dDescriptor.m_PoolHeight, pooling2dDescriptor.m_StrideY,
2080                     pooling2dDescriptor.m_PadTop, pooling2dDescriptor.m_PadBottom, padding);
2081
2082
2083     IConnectableLayer* layer = m_Network->AddPooling2dLayer(pooling2dDescriptor, nodeDef.name().c_str());
2084     if (layer == nullptr)
2085     {
2086         throw ParseException(
2087             boost::str(
2088                 boost::format(
2089                     "Failed to add pooling2d layer for %1% %2%")
2090                     % nodeDef.name()
2091                     % CHECK_LOCATION().AsString()));
2092     }
2093
2094     layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
2095
2096     if (dataFormat == "NHWC")
2097     {
2098         layer = SwizzleInDeswizzleOut(*m_Network, inputSlot, *layer, nodeDef.name());
2099     }
2100     else
2101     {
2102         inputSlot.Connect(layer->GetInputSlot(0));
2103     }
2104
2105     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
2106 }
2107
2108 ParsedTfOperationPtr TfParser::AddAdditionLayer(const tensorflow::NodeDef& nodeDef, bool isBiasAdd)
2109 {
2110     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
2111
2112     IOutputSlot* input0Slot = &inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
2113     IOutputSlot* input1Slot = &inputs[1].m_IndexedValue->ResolveArmnnOutputSlot(inputs[1].m_Index);
2114
2115     const TensorInfo& input0Info = input0Slot->GetTensorInfo();
2116     const TensorInfo& input1Info = input1Slot->GetTensorInfo();
2117
2118     if (isBiasAdd)
2119     {
2120         // BiasAdd takes bias as a 1D tensor. We need to add a reshape layer to create a 4D tensor
2121         // with the same data in the correct dimension for broadcast in addition.
2122         if(input1Info.GetNumDimensions() != 1)
2123         {
2124             throw ParseException(
2125                 boost::str(
2126                     boost::format(
2127                         "Unsupported bias for BiasAdd. It should be a 1D vector. "
2128                         "Got %1% dimensions for input %2%. Node %3% %4%")
2129                         % input1Info.GetNumDimensions()
2130                         % inputs[1].m_IndexedValue->GetNode().name()
2131                         % nodeDef.name()
2132                         % CHECK_LOCATION().AsString()));
2133         }
2134
2135         const std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
2136
2137         CHECK_DATA_FORMAT(nodeDef, dataFormat, "BiasAdd");
2138         input1Slot = BroadcastForAddandMul(input0Slot, input1Slot, dataFormat == "NHWC", *m_Network, nodeDef);
2139     }
2140     else
2141     {
2142         if (input0Info.GetNumDimensions() == 1)
2143         {
2144             const bool isNHWC = true;
2145             input0Slot = BroadcastForAddandMul(input1Slot, input0Slot, isNHWC, *m_Network, nodeDef);
2146         }
2147
2148         if (input1Info.GetNumDimensions() == 1)
2149         {
2150             const bool isNHWC = true;
2151             input1Slot = BroadcastForAddandMul(input0Slot, input1Slot, isNHWC, *m_Network, nodeDef);
2152         }
2153     }
2154
2155     IConnectableLayer* const layer = m_Network->AddAdditionLayer(nodeDef.name().c_str());
2156
2157     input0Slot->Connect(layer->GetInputSlot(0));
2158     input1Slot->Connect(layer->GetInputSlot(1));
2159
2160     if (input0Info.GetNumDimensions() == 1 && isBiasAdd == false)
2161     {
2162         layer->GetOutputSlot(0).SetTensorInfo(input1Slot->GetTensorInfo());
2163     }
2164     else
2165     {
2166         layer->GetOutputSlot(0).SetTensorInfo(input0Slot->GetTensorInfo());
2167     }
2168
2169     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
2170 }
2171
2172 IConnectableLayer* TfParser::AddMultiplicationLayer(const tensorflow::NodeDef& nodeDef)
2173 {
2174     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
2175
2176     IConnectableLayer* const layer = m_Network->AddMultiplicationLayer(nodeDef.name().c_str());
2177     IOutputSlot* input0Slot = &inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
2178     IOutputSlot* input1Slot = &inputs[1].m_IndexedValue->ResolveArmnnOutputSlot(inputs[1].m_Index);
2179
2180     auto const input0NumDims = input0Slot->GetTensorInfo().GetNumDimensions();
2181     auto const input1NumDims = input1Slot->GetTensorInfo().GetNumDimensions();
2182
2183     if (input0NumDims < input1NumDims)
2184     {
2185         const bool isNHWC = true;
2186         input0Slot = BroadcastForAddandMul(input1Slot, input0Slot, isNHWC, *m_Network, nodeDef);
2187     }
2188     if (input1NumDims < input0NumDims)
2189     {
2190         const bool isNHWC = true;
2191         input1Slot = BroadcastForAddandMul(input0Slot, input1Slot, isNHWC, *m_Network, nodeDef);
2192     }
2193
2194     input0Slot->Connect(layer->GetInputSlot(0));
2195     input1Slot->Connect(layer->GetInputSlot(1));
2196
2197     if (input0NumDims < input1NumDims)
2198     {
2199         layer->GetOutputSlot(0).SetTensorInfo(input1Slot->GetTensorInfo());
2200     }
2201     else
2202     {
2203         layer->GetOutputSlot(0).SetTensorInfo(input0Slot->GetTensorInfo());
2204     }
2205     return layer;
2206 }
2207
2208
2209 IConnectableLayer* TfParser::AddFullyConnectedLayer(const tensorflow::NodeDef& matMulNodeDef,
2210     const tensorflow::NodeDef* addNodeDef, const char* armnnLayerName)
2211 {
2212     // Finds bias const (if applicable).
2213     ParsedConstTfOperation<float>* biasNode = nullptr;
2214     if (addNodeDef != nullptr)
2215     {
2216         std::vector<OutputOfParsedTfOperation> addInputs = GetInputParsedTfOperationsChecked(*addNodeDef, 2);
2217         // Finds our inputs.
2218         if (HasParsedConstTensor<float>(addInputs[0].m_IndexedValue->GetNode().name()))
2219         {
2220             biasNode = boost::polymorphic_downcast<ParsedConstTfOperation<float>*>(addInputs[0].m_IndexedValue);
2221         }
2222         else if (HasParsedConstTensor<float>(addInputs[1].m_IndexedValue->GetNode().name()))
2223         {
2224             biasNode = boost::polymorphic_downcast<ParsedConstTfOperation<float>*>(addInputs[1].m_IndexedValue);
2225         }
2226         else
2227         {
2228             throw ParseException(
2229                 boost::str(
2230                     boost::format(
2231                         "ArmNN only supports fully connected layers with constant bias. "
2232                         "Inputs %1% and %2%. AddNode %3%. MatMulNode %4% %5%")
2233                         % addInputs[0].m_IndexedValue->GetNode().name()
2234                         % addInputs[1].m_IndexedValue->GetNode().name()
2235                         % addNodeDef->name()
2236                         % matMulNodeDef.name()
2237                         % CHECK_LOCATION().AsString()));
2238         }
2239     }
2240
2241     // Finds matmul inputs.
2242     ParsedConstTfOperation<float>* weightNode = nullptr;
2243     ParsedTfOperation* inputNode  = nullptr;
2244     unsigned int inputIdx = 0;
2245     std::vector<OutputOfParsedTfOperation> mulInputs = GetInputParsedTfOperationsChecked(matMulNodeDef, 2);
2246     if (HasParsedConstTensor<float>(mulInputs[0].m_IndexedValue->GetNode().name()))
2247     {
2248         weightNode = boost::polymorphic_downcast<ParsedConstTfOperation<float>*>(mulInputs[0].m_IndexedValue);
2249         inputNode = mulInputs[1].m_IndexedValue;
2250         inputIdx = mulInputs[1].m_Index;
2251     }
2252     else if (HasParsedConstTensor<float>(mulInputs[1].m_IndexedValue->GetNode().name()))
2253     {
2254         weightNode = boost::polymorphic_downcast<ParsedConstTfOperation<float>*>(mulInputs[1].m_IndexedValue);
2255         inputNode = mulInputs[0].m_IndexedValue;
2256         inputIdx = mulInputs[0].m_Index;
2257     }
2258     else
2259     {
2260         throw ParseException(
2261             boost::str(
2262                 boost::format(
2263                     "ArmNN only supports fully connected layers with constant weights. "
2264                     "Inputs %1% and %2%. MatMulNode %3% %4%")
2265                     % mulInputs[0].m_IndexedValue->GetNode().name()
2266                     % mulInputs[1].m_IndexedValue->GetNode().name()
2267                     % matMulNodeDef.name()
2268                     % CHECK_LOCATION().AsString()));
2269     }
2270
2271     std::vector<float> weightTensorData;
2272     // Handles weight.
2273     ConstTensor weights = weightNode->GetConstTensor(false, weightTensorData);
2274
2275     FullyConnectedDescriptor desc;
2276     desc.m_BiasEnabled = addNodeDef != nullptr;
2277
2278     IConnectableLayer* layer = nullptr;
2279     // Makes the layer.
2280     if (addNodeDef != nullptr)
2281     {
2282         std::vector<float> biasTensorData;
2283         ConstTensor biases = biasNode->GetConstTensor(false, biasTensorData);
2284
2285         if (weights.GetShape()[1] != biases.GetShape()[0])
2286         {
2287             throw ParseException(
2288                 boost::str(
2289                     boost::format(
2290                         "Shape of matmul weights and bias do not match. "
2291                         "AddNode %1%. MatMulNode %2% %3%")
2292                         % addNodeDef->name()
2293                         % matMulNodeDef.name()
2294                         % CHECK_LOCATION().AsString()));
2295         }
2296
2297         layer = m_Network->AddFullyConnectedLayer(desc, weights, biases, armnnLayerName);
2298     }
2299     else
2300     {
2301         layer = m_Network->AddFullyConnectedLayer(desc, weights, armnnLayerName);
2302     }
2303
2304     BOOST_ASSERT(layer != nullptr);
2305
2306     inputNode->ResolveArmnnOutputSlot(inputIdx).Connect(layer->GetInputSlot(0));
2307     unsigned int batches = inputNode->ResolveArmnnOutputSlot(inputIdx).GetTensorInfo().GetShape()[0];
2308
2309     // Handles output.
2310     TensorInfo outputInfo({ batches, weights.GetShape()[1] }, DataType::Float32);
2311     layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
2312     return layer;
2313 }
2314
2315 void TfParser::LoadNodeDef(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
2316 {
2317     // Gets the type of the node (assume float).
2318     tensorflow::DataType type = tensorflow::DT_FLOAT;
2319     if (nodeDef.attr().count("T") != 0)
2320     {
2321         auto attr = nodeDef.attr().at("T");
2322         type      = attr.type();
2323     }
2324     else if (nodeDef.attr().count("dtype") != 0)
2325     {
2326         auto attr = nodeDef.attr().at("dtype");
2327         type      = attr.type();
2328     }
2329
2330     if (type != tensorflow::DT_FLOAT && nodeDef.op() != "Const")
2331     {
2332         throw ParseException(
2333             boost::str(
2334                 boost::format(
2335                     "Currently only FLOAT is supported for tensorflow nodes (apart from Const). "
2336                     "Got %1% for Node %2% %3%")
2337                     % tensorflow::DataType_Name(type)
2338                     % nodeDef.name()
2339                     % CHECK_LOCATION().AsString()));
2340     }
2341
2342     const std::string& operation = nodeDef.op();
2343     auto it = ms_OperationNameToParsingFunctions.find(operation);
2344     if (it != ms_OperationNameToParsingFunctions.end())
2345     {
2346         auto func = it->second;
2347         ParsedTfOperationPtr parsedTfOperation = (this->*func)(nodeDef, graphDef);
2348         ParsedTfOperation* parsedTfOperationRaw = parsedTfOperation.get();
2349
2350         // Stores the parsed operation so that dependent layers can connect to it.
2351         auto it = m_ParsedTfOperations.find(nodeDef.name());
2352         if (it != m_ParsedTfOperations.end())
2353         {
2354             throw ParseException(boost::str(boost::format("Name %1% used by more than one node") % nodeDef.name()));
2355         }
2356         m_ParsedTfOperations[nodeDef.name()] = std::move(parsedTfOperation);
2357
2358         // If this node was requested as an output from the network, then adds an ArmNN output layer.
2359         if (std::find(m_RequestedOutputs.begin(), m_RequestedOutputs.end(), nodeDef.name()) !=
2360             m_RequestedOutputs.end())
2361         {
2362             auto outId = ParseOutputId(nodeDef.name());
2363             const LayerBindingId layerId = boost::numeric_cast<LayerBindingId>(m_NetworkOutputsBindingInfo.size());
2364             IOutputSlot& prevSlot = parsedTfOperationRaw->ResolveArmnnOutputSlot(outId.m_Index);
2365
2366             TensorInfo tensorInfo = prevSlot.GetTensorInfo();
2367
2368             IConnectableLayer* outputLayer = m_Network->AddOutputLayer(layerId, nodeDef.name().c_str());
2369
2370             prevSlot.Connect(outputLayer->GetInputSlot(0));
2371
2372             TrackOutputBinding(outputLayer, layerId, tensorInfo);
2373         }
2374     }
2375     else
2376     {
2377         throw ParseException(
2378             boost::str(
2379                 boost::format(
2380                     "Unsupported operation %1% in tensorflow::GraphDef %2%")
2381                     % operation
2382                     % CHECK_LOCATION().AsString()));
2383     }
2384 }
2385
2386 void TfParser::LoadGraphDef(const tensorflow::GraphDef& graphDef)
2387 {
2388     // Adds all nodes to our map.
2389     m_NodesByName.clear();
2390     m_NetworkInputsBindingInfo.clear();
2391     m_NetworkOutputsBindingInfo.clear();
2392
2393     for (int i = 0; i < graphDef.node_size(); ++i)
2394     {
2395         const tensorflow::NodeDef& node = graphDef.node(i);
2396         m_NodesByName[node.name()]      = &node;
2397     }
2398
2399     // Finds the output nodes the user requested.
2400     std::vector<const tensorflow::NodeDef*> targetNodes;
2401     for (const std::string& requestedOutputName : m_RequestedOutputs)
2402     {
2403         auto nodeIt = m_NodesByName.find(requestedOutputName);
2404         if (nodeIt == m_NodesByName.end())
2405         {
2406             throw ParseException(
2407                 boost::str(
2408                     boost::format(
2409                         "Couldn't find requested output node '%1%' in graph %2%")
2410                         % requestedOutputName
2411                         % CHECK_LOCATION().AsString()));
2412         }
2413         targetNodes.push_back(nodeIt->second);
2414     }
2415
2416     // Sorts them into a linear ordering such that all inputs of a node are before the node itself.
2417     std::vector<const tensorflow::NodeDef*> sortedNodes;
2418     if (!armnnUtils::GraphTopologicalSort<const tensorflow::NodeDef*>(
2419         targetNodes,
2420         [this](const tensorflow::NodeDef* node)
2421         {
2422             auto outputs = GetTfInputNodes(*node);
2423             std::vector<const tensorflow::NodeDef*> nodesOnly;
2424             for (const auto & o : outputs) {
2425                 nodesOnly.push_back(o.m_IndexedValue);
2426             }
2427             return nodesOnly;
2428         },
2429         sortedNodes))
2430     {
2431         throw ParseException(
2432             boost::str(
2433                 boost::format(
2434                     "Cycle detected in graph %1%")
2435                     % CHECK_LOCATION().AsString()));
2436     }
2437
2438     // Parses each node in order, knowing that all inputs of a node will be processed before the node itself.
2439     for (const auto& it : sortedNodes)
2440     {
2441         const tensorflow::NodeDef& currentNode = *it;
2442         LoadNodeDef(currentNode, graphDef);
2443     }
2444 }
2445
2446 INetworkPtr TfParser::CreateNetworkFromTextFile(const char* graphFile,
2447     const std::map<std::string, TensorShape>& inputShapes,
2448     const std::vector<std::string>& requestedOutputs)
2449 {
2450     FILE* fd = fopen(graphFile, "r");
2451
2452     if (fd == nullptr)
2453     {
2454         throw FileNotFoundException(
2455             boost::str(
2456                 boost::format(
2457                     "Graph file %1% failed to open %2%")
2458                     % graphFile
2459                     % CHECK_LOCATION().AsString()));
2460     }
2461
2462     // Parses the file into a message.
2463     tensorflow::GraphDef graphDef;
2464     auto                 input   = new google::protobuf::io::FileInputStream(fileno(fd));
2465     bool                 success = google::protobuf::TextFormat::Parse(input, &graphDef);
2466     delete input;
2467     fclose(fd);
2468
2469     if (!success)
2470     {
2471         throw ParseException(
2472             boost::str(
2473                 boost::format(
2474                     "Failed to parse graph file %1%")
2475                     % CHECK_LOCATION().AsString()));
2476     }
2477
2478     return CreateNetworkFromGraphDef(graphDef, inputShapes, requestedOutputs);
2479 }
2480
2481 INetworkPtr TfParser::CreateNetworkFromString(const char* protoText,
2482     const std::map<std::string, TensorShape>& inputShapes,
2483     const std::vector<std::string>& requestedOutputs)
2484 {
2485     // Parses the string into a message.
2486     tensorflow::GraphDef graphDef;
2487     bool success = google::protobuf::TextFormat::ParseFromString(protoText, &graphDef);
2488
2489     if (!success)
2490     {
2491         throw ParseException(
2492             boost::str(
2493                 boost::format(
2494                     "Failed to parse graph file %1%")
2495                     % CHECK_LOCATION().AsString()));
2496     }
2497
2498     return CreateNetworkFromGraphDef(graphDef, inputShapes, requestedOutputs);
2499 }
2500
2501 INetworkPtr TfParser::CreateNetworkFromBinaryFile(const char* graphFile,
2502     const std::map<std::string, TensorShape>& inputShapes,
2503     const std::vector<std::string>& requestedOutputs)
2504 {
2505     FILE* fd = fopen(graphFile, "rb");
2506
2507     if (fd == nullptr)
2508     {
2509         throw FileNotFoundException(
2510             boost::str(
2511                 boost::format(
2512                     "Graph file %1% failed to open %2%")
2513                     % graphFile
2514                     % CHECK_LOCATION().AsString()));
2515     }
2516
2517     // Parses the file into a message.
2518     tensorflow::GraphDef graphDef;
2519
2520     google::protobuf::io::FileInputStream  inStream(fileno(fd));
2521     google::protobuf::io::CodedInputStream codedStream(&inStream);
2522     codedStream.SetTotalBytesLimit(INT_MAX, INT_MAX);
2523     bool success = graphDef.ParseFromCodedStream(&codedStream);
2524     fclose(fd);
2525
2526     if (!success)
2527     {
2528         throw ParseException(
2529             boost::str(
2530                 boost::format(
2531                     "Failed to parse protobuf file %1% %2%")
2532                     % graphFile
2533                     % CHECK_LOCATION().AsString()));
2534     }
2535
2536     return CreateNetworkFromGraphDef(graphDef, inputShapes, requestedOutputs);
2537 }
2538
2539 INetworkPtr TfParser::CreateNetworkFromGraphDef(const tensorflow::GraphDef& graphDef,
2540     const std::map<std::string, TensorShape>& inputShapes,
2541     const std::vector<std::string>& requestedOutputs)
2542 {
2543     m_Network = INetwork::Create();
2544
2545     m_InputShapes = inputShapes;
2546     if (requestedOutputs.size() == 0)
2547     {
2548         throw ParseException(
2549             boost::str(
2550                 boost::format(
2551                     "requestedOutputs must have at least one entry %1%")
2552                     % CHECK_LOCATION().AsString()));
2553     }
2554     m_RequestedOutputs = requestedOutputs;
2555
2556     try
2557     {
2558         LoadGraphDef(graphDef);
2559     }
2560     catch (const ParseException& e)
2561     {
2562         Cleanup();
2563         throw e;
2564     }
2565
2566     Cleanup();
2567
2568     return std::move(m_Network);
2569 }
2570
2571 void TfParser::Cleanup()
2572 {
2573     // Cleanup, in case we reuse this parser.
2574     m_InputShapes.clear();
2575     m_RequestedOutputs.clear();
2576     m_NodesByName.clear();
2577     m_ParsedTfOperations.clear();
2578 }
2579
2580 BindingPointInfo TfParser::GetNetworkInputBindingInfo(const std::string& name) const
2581 {
2582     return GetBindingInfo(name, "input", m_NetworkInputsBindingInfo);
2583 }
2584
2585 BindingPointInfo TfParser::GetNetworkOutputBindingInfo(const std::string& name) const
2586 {
2587     return GetBindingInfo(name, "output", m_NetworkOutputsBindingInfo);
2588 }
2589
2590 std::pair<LayerBindingId, TensorInfo> TfParser::GetBindingInfo(const std::string& layerName,
2591     const char* bindingPointDesc,
2592     const std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo)
2593 {
2594     auto it = nameToBindingInfo.find(layerName);
2595     if (it == nameToBindingInfo.end())
2596     {
2597         throw InvalidArgumentException(
2598             boost::str(
2599                 boost::format(
2600                     "Unknown %1% '%2%' %3%")
2601                     % bindingPointDesc
2602                     % layerName
2603                     % CHECK_LOCATION().AsString()));
2604     }
2605     return it->second;
2606 }
2607
2608 void TfParser::TrackInputBinding(IConnectableLayer* layer, LayerBindingId id, const TensorInfo& tensorInfo)
2609 {
2610     return TrackBindingPoint(layer, id, tensorInfo, "input", m_NetworkInputsBindingInfo);
2611 }
2612
2613 void TfParser::TrackOutputBinding(IConnectableLayer* layer, LayerBindingId id, const TensorInfo& tensorInfo)
2614 {
2615     return TrackBindingPoint(layer, id, tensorInfo, "output", m_NetworkOutputsBindingInfo);
2616 }
2617
2618 void TfParser::TrackBindingPoint(IConnectableLayer* layer,
2619     LayerBindingId id,
2620     const TensorInfo& tensorInfo,
2621     const char* bindingPointDesc,
2622     std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo)
2623 {
2624     const std::string layerName = layer->GetName();
2625     auto it = nameToBindingInfo.find(layerName);
2626     if (it == nameToBindingInfo.end())
2627     {
2628         nameToBindingInfo[layerName] = std::make_pair(id, tensorInfo);
2629     }
2630     else
2631     {
2632         throw ParseException(
2633             boost::str(
2634                 boost::format(
2635                     "Id %1% used by more than one %2% layer %3%")
2636                     % id
2637                     % bindingPointDesc
2638                     % CHECK_LOCATION().AsString()));
2639     }
2640 }
2641
2642 } // namespace armnnTfParser