Release 18.08
[platform/upstream/armnn.git] / src / armnnTfParser / TfParser.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // See LICENSE file in the project root for full license information.
4 //
5 #include "TfParser.hpp"
6
7 #include <armnn/INetwork.hpp>
8 #include <armnn/Utils.hpp>
9 #include <armnn/TypesUtils.hpp>
10 #include <armnn/Exceptions.hpp>
11 #include <armnn/Descriptors.hpp>
12
13 #include <GraphTopologicalSort.hpp>
14 #include <Permute.hpp>
15 #include <VerificationHelpers.hpp>
16
17 #include <google/protobuf/io/zero_copy_stream_impl.h>
18 #include <google/protobuf/text_format.h>
19
20 #include "tensorflow/core/framework/graph.pb.h"
21 #include "tensorflow/core/framework/node_def.pb.h"
22 #include "tensorflow/core/framework/types.pb.h"
23 #include "tensorflow/core/framework/tensor.pb.h"
24 #include "tensorflow/core/framework/tensor_shape.pb.h"
25
26 #include <boost/assert.hpp>
27 #include <boost/format.hpp>
28 #include <boost/core/ignore_unused.hpp>
29 #include <boost/log/trivial.hpp>
30 #include <boost/numeric/conversion/cast.hpp>
31 #include <boost/polymorphic_cast.hpp>
32
33 #include <memory>
34 #include <sstream>
35 #include <numeric>
36 #include <functional>
37
38 using namespace armnn;
39
40 namespace armnnTfParser
41 {
42 namespace
43 {
44
45 const PermutationVector NHWCToArmNN = { 0, 2, 3, 1 };
46 const PermutationVector ArmNNToNHWC = { 0, 3, 1, 2 };
47
48 IConnectableLayer* AddSwizzleLayer(INetwork& network, IOutputSlot& input, const PermutationVector& mapping,
49     const std::string& name)
50 {
51     // Adds swizzle layer.
52     IConnectableLayer* const layer = network.AddPermuteLayer(mapping, name.c_str());
53
54     // Connects intput to swizzle layer.
55     input.Connect(layer->GetInputSlot(0));
56
57     // Sets up swizzled output.
58     const TensorInfo outInfo = armnnUtils::Permuted(input.GetTensorInfo(), mapping);
59     layer->GetOutputSlot(0).SetTensorInfo(outInfo);
60
61     return layer;
62 }
63
64 IConnectableLayer* SwizzleInDeswizzleOut(INetwork& network, IOutputSlot& input, IConnectableLayer& layer,
65     const std::string& name)
66 {
67     // Adds swizzle layer.
68     IConnectableLayer* const swizzleLayer = AddSwizzleLayer(network, input, NHWCToArmNN, "swizzle_for-" + name);
69
70     // Connects swizzledInput to layer.
71     swizzleLayer->GetOutputSlot(0).Connect(layer.GetInputSlot(0));
72
73     // Adds deswizzle layer.
74     IConnectableLayer* const deswizzleLayer = AddSwizzleLayer(network, layer.GetOutputSlot(0), ArmNNToNHWC,
75         "deswizzle_for-" + name);
76
77     return deswizzleLayer;
78 }
79
80 template <typename Callable>
81 void ReadMandatoryNodeAttributeImpl(const tensorflow::NodeDef& nodeDef,
82     const std::string& attribName,
83     tensorflow::AttrValue::ValueCase expectedValueCase,
84     Callable callable)
85 {
86     auto iter = nodeDef.attr().find(attribName);
87     if (iter != nodeDef.attr().end())
88     {
89         const auto& attrValue = iter->second;
90         if (attrValue.value_case() == expectedValueCase)
91         {
92             callable(attrValue);
93         }
94         else
95         {
96             throw ParseException(
97                 boost::str(
98                     boost::format(
99                         "Attribute %1% of node %2% expected to have %3% as tensorflow::AttrValue::ValueCase, "
100                         "but found %4% instead %5%")
101                         % attribName
102                         % nodeDef.name()
103                         % static_cast<int>(expectedValueCase)
104                         % static_cast<int>(attrValue.value_case())
105                         % CHECK_LOCATION().AsString()));
106         }
107     }
108     else
109     {
110         throw ParseException(
111             boost::str(
112                 boost::format(
113                     "Could not find required attribute %1% in node %2% %3%")
114                     % attribName
115                     % nodeDef.name()
116                     % CHECK_LOCATION().AsString()));
117     }
118 }
119
120 template <typename Callable>
121 void ReadOptionalNodeAttributeImpl(const tensorflow::NodeDef& nodeDef,
122     const std::string& attribName,
123     tensorflow::AttrValue::ValueCase expectedValueCase,
124     Callable callable)
125 {
126     auto iter = nodeDef.attr().find(attribName);
127     if (iter != nodeDef.attr().end())
128     {
129         const auto& attrValue = iter->second;
130         if (attrValue.value_case() == expectedValueCase)
131         {
132             callable(attrValue);
133         }
134         else
135         {
136             throw ParseException(
137                 boost::str(
138                     boost::format(
139                         "Attribute %1% of node %2% expected to have %3% as tensorflow::AttrValue::ValueCase, "
140                         "but found %4% instead %5%")
141                         % attribName
142                         % nodeDef.name()
143                         % static_cast<int>(expectedValueCase)
144                         % static_cast<int>(attrValue.value_case())
145                         % CHECK_LOCATION().AsString()));
146         }
147     }
148 }
149
150 float ReadMandatoryNodeFloatAttribute(const tensorflow::NodeDef& nodeDef, const std::string& name)
151 {
152     float attribValue = 0.0f;
153     ReadMandatoryNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kF,
154         [&attribValue](const tensorflow::AttrValue& attrValue)
155     {
156         attribValue = attrValue.f();
157     });
158     return attribValue;
159 }
160
161 uint32_t ReadMandatoryNodeUint32Attribute(const tensorflow::NodeDef& nodeDef, const std::string& name)
162 {
163     uint32_t attribValue = 0u;
164     ReadMandatoryNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kI,
165         [&attribValue](const tensorflow::AttrValue& attrValue)
166     {
167         attribValue = static_cast<uint32_t>(attrValue.i());
168     });
169     return attribValue;
170 }
171
172 std::string ReadMandatoryNodeStringAttribute(const tensorflow::NodeDef& nodeDef, const std::string& name)
173 {
174     std::string attribValue = "";
175     ReadMandatoryNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kS,
176         [&attribValue](const tensorflow::AttrValue& attrValue)
177     {
178         attribValue = attrValue.s();
179     });
180     return attribValue;
181 }
182
183 std::vector<uint32_t> ReadMandatoryNodeUint32ListAttribute(const tensorflow::NodeDef& nodeDef,
184     const std::string& name)
185 {
186     std::vector<uint32_t> attriList;
187     ReadMandatoryNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kList,
188         [&attriList](const tensorflow::AttrValue& attrValue)
189     {
190         for (int attriNum = 0; attriNum < attrValue.list().i_size(); ++attriNum)
191         {
192             attriList.push_back(static_cast<uint32_t>(attrValue.list().i(attriNum)));
193         }
194     });
195
196     return attriList;
197 }
198
199 std::vector<uint32_t> ReadOptionalNodeUint32ListAttribute(const tensorflow::NodeDef& nodeDef,
200     const std::string& name)
201 {
202     std::vector<uint32_t> attriList;
203     ReadOptionalNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kList,
204         [&attriList](const tensorflow::AttrValue& attrValue)
205     {
206         for (int attriNum = 0; attriNum < attrValue.list().i_size(); ++attriNum)
207         {
208             attriList.push_back(static_cast<uint32_t>(attrValue.list().i(attriNum)));
209         }
210     });
211
212     return attriList;
213 }
214
215 bool ReadOptionalNodeBoolAttribute(const tensorflow::NodeDef& nodeDef,
216     const std::string& name,
217     bool defaultValue = false)
218 {
219     bool attribValue = defaultValue;
220     ReadOptionalNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kB,
221         [&attribValue](const tensorflow::AttrValue& attrValue)
222     {
223         attribValue = attrValue.b();
224     });
225     return attribValue;
226 }
227
228 tensorflow::DataType ReadMandatoryNodeTypeAttribute(const tensorflow::NodeDef& nodeDef, const std::string& name)
229 {
230     tensorflow::DataType attribValue = tensorflow::DT_INVALID;
231     ReadMandatoryNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kType,
232         [&attribValue](const tensorflow::AttrValue& attrValue)
233     {
234         attribValue = attrValue.type();
235     });
236     return attribValue;
237 }
238
239 TensorInfo PrepareReshape(const TensorInfo& input, const std::vector<int32_t>& targetDims)
240 {
241     std::vector<unsigned int> outDims(targetDims.begin(), targetDims.end());
242     const auto stretchDim = std::find(targetDims.begin(), targetDims.end(), -1);
243
244     if (stretchDim != targetDims.end())
245     {
246         if (std::find(std::next(stretchDim), targetDims.end(), -1) != targetDims.end())
247         {
248             throw ParseException(
249                 boost::str(
250                     boost::format(
251                         "At most one component of shape can be -1 %1%")
252                         % CHECK_LOCATION().AsString()));
253         }
254
255         auto targetNumElements =
256             boost::numeric_cast<unsigned int>(
257                 std::accumulate(targetDims.begin(), targetDims.end(), -1, std::multiplies<int32_t>()));
258         auto stretchIndex = static_cast<size_t>(std::distance(targetDims.begin(), stretchDim));
259         outDims[stretchIndex] = input.GetNumElements() / targetNumElements;
260     }
261
262     TensorInfo reshapeInfo = input;
263     reshapeInfo.SetShape(TensorShape{ static_cast<unsigned int>(outDims.size()), outDims.data() });
264
265     return reshapeInfo;
266 }
267
268 // We need the input0Slot to guide the reshape for input1Slot.
269 IOutputSlot* BroadcastForAddandMul(IOutputSlot* input0Slot, IOutputSlot* input1Slot, bool isNHWC, INetwork& m_Network,
270                                    const tensorflow::NodeDef& nodeDef)
271 {
272     const TensorInfo& input1Info = input1Slot->GetTensorInfo();
273     const TensorInfo inputTensorInfo = input0Slot->GetTensorInfo();
274     const unsigned int matchDim = inputTensorInfo.GetNumDimensions() - (isNHWC ? 1 : 3);
275     std::array<unsigned int, MaxNumOfTensorDimensions> reshapedDimensions;
276     std::fill_n(reshapedDimensions.begin(), inputTensorInfo.GetNumDimensions(), 1);
277     reshapedDimensions[matchDim] = input1Info.GetShape()[0];
278
279     armnn::TensorInfo reshapedInfo = input1Info;
280     reshapedInfo.SetShape(TensorShape{ inputTensorInfo.GetNumDimensions(), reshapedDimensions.data() });
281
282     const std::string reshapeLayerName = "reshape_for-" + nodeDef.name();
283     ReshapeDescriptor reshapeDesc;
284     reshapeDesc.m_TargetShape = reshapedInfo.GetShape();
285     IConnectableLayer* const reshapeLayer = m_Network.AddReshapeLayer(reshapeDesc, reshapeLayerName.c_str());
286
287     input1Slot->Connect(reshapeLayer->GetInputSlot(0));
288     reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapedInfo);
289
290     input1Slot = &reshapeLayer->GetOutputSlot(0);
291
292     return input1Slot;
293 }
294
295 OutputId ParseOutputId(const std::string & name)
296 {
297     unsigned int outputNum = 0;
298     size_t colonPos = name.find_last_of(":");
299     if (colonPos != std::string::npos)
300     {
301         int n = std::stoi(name.substr(colonPos+1));
302         if (n<0 || n>100)
303         {
304             throw ParseException(
305                 boost::str(
306                     boost::format(
307                         "Output tensor id is out of range for %1% %2%")
308                         % name
309                         % CHECK_LOCATION().AsString()));
310         }
311         outputNum = static_cast<unsigned int>(n);
312     }
313     return OutputId(name.substr(0,colonPos),outputNum);
314 }
315
316 #define CHECK_DATA_FORMAT(NODE_DEF, FORMAT, NODE_TYPE) \
317     if( FORMAT != "NHWC" && FORMAT != "NCHW" ) \
318     { \
319         throw ParseException( \
320             boost::str( \
321                 boost::format( \
322                     "Unsupported data format %1% passed for %2% node %3%. " \
323                     "Only NHWC and NCHW supported %4%") \
324                     % FORMAT \
325                     % NODE_TYPE \
326                     % NODE_DEF.name() \
327                     % CHECK_LOCATION().AsString())); \
328     }
329
330 #define CHECK_PADDING_TYPE(NODE_DEF, PADDING) \
331     if(PADDING != "SAME" && PADDING != "VALID" ) \
332     { \
333         throw ParseException( \
334             boost::str( \
335                 boost::format( \
336                     "Only 'SAME' and 'VALID' padding supported. Got %1% for %2% %3%") \
337                     % PADDING \
338                     % NODE_DEF.name() \
339                     % CHECK_LOCATION().AsString())); \
340     } \
341
342 } // namespace
343
344 const std::map<std::string, TfParser::OperationParsingFunction> TfParser::ms_OperationNameToParsingFunctions = {
345     { "Const",                 &TfParser::ParseConst },
346     { "Add",                   &TfParser::ParseAdd },
347     { "BiasAdd",               &TfParser::ParseBiasAdd },
348     { "Identity",              &TfParser::ParseIdentity },
349     { "Conv2D",                &TfParser::ParseConv2D },
350     { "DepthwiseConv2dNative", &TfParser::ParseDepthwiseConv2D },
351     { "FusedBatchNorm",        &TfParser::ParseFusedBatchNorm },
352     { "ConcatV2",              &TfParser::ParseConcat },
353     { "LRN",                   &TfParser::ParseLrn },
354     { "MatMul",                &TfParser::ParseMatMul },
355     { "Mul",                   &TfParser::ParseMul },
356     { "Placeholder",           &TfParser::ParsePlaceholder },
357     { "Relu",                  &TfParser::ParseRelu },
358     { "Relu6",                 &TfParser::ParseRelu6 },
359     { "Reshape",               &TfParser::ParseReshape },
360     { "ResizeBilinear",        &TfParser::ParseResizeBilinear },
361     { "Shape",                 &TfParser::ParseShape },
362     { "Squeeze",               &TfParser::ParseSqueeze },
363     { "Sigmoid",               &TfParser::ParseSigmoid },
364     { "Softmax",               &TfParser::ParseSoftmax },
365     { "Softplus",              &TfParser::ParseSoftplus },
366     { "Tanh",                  &TfParser::ParseTanh },
367     { "MaxPool",               &TfParser::ParseMaxPool },
368     { "AvgPool",               &TfParser::ParseAvgPool },
369     { "Maximum",               &TfParser::ParseMaximum },
370 };
371
372 ITfParser* ITfParser::CreateRaw()
373 {
374     return new TfParser();
375 }
376
377 ITfParserPtr ITfParser::Create()
378 {
379     return ITfParserPtr(CreateRaw(), &ITfParser::Destroy);
380 }
381
382 void ITfParser::Destroy(ITfParser* parser)
383 {
384     delete parser;
385 }
386
387 inline void CalculateSamePadding(uint32_t inputSize, uint32_t stride,
388                                  uint32_t filterSize, bool samePadding,
389                                  uint32_t* paddingFront, uint32_t* paddingBack) {
390     *paddingFront = 0;
391     *paddingBack = 0;
392
393     if (samePadding) {
394         uint32_t outputSize = (inputSize + stride - 1) / stride;
395         uint32_t temp = (outputSize - 1) * stride + filterSize;
396         if (temp > inputSize) {
397             *paddingFront = (temp - inputSize) / 2;
398             *paddingBack = (temp - inputSize) - *paddingFront;
399         }
400     }
401 }
402
403 void CalcPadding(uint32_t input, uint32_t kernel, uint32_t stride, uint32_t& outPadHead, uint32_t& outPadTail,
404                  bool samePadding)
405 {
406     CalculateSamePadding(input, stride, kernel, samePadding, &outPadHead, &outPadTail);
407 }
408
409 /// An Abstract base class which represents a single tensorflow operation (node)
410 /// that has been (potentially partially) converted to Armnn.
411 /// It may not yet have been fully converted into actual Armnn layers.
412 class ParsedTfOperation
413 {
414 public:
415     ParsedTfOperation(TfParser* parser, const tensorflow::NodeDef& node)
416     : m_Parser(parser)
417     , m_Node(node)
418     {
419     }
420
421     virtual ~ParsedTfOperation() {};
422
423     const tensorflow::NodeDef& GetNode() const { return m_Node; }
424
425     /// Gets the ArmNN IOutputSlot corresponding to the given output index of the Tensorflow operation.
426     /// This may result in the creation of Armnn layers if this was deferred (e.g. see ParsedConstTfOperation).
427     virtual IOutputSlot& ResolveArmnnOutputSlot(unsigned int tfOutputIndex) = 0;
428
429     /// If this operation is an Identity then this will follow return the 'parent' operation (recursively).
430     virtual ParsedTfOperation* ResolveIdentityOperations()
431     {
432         return this;
433     }
434
435 protected:
436     TfParser* m_Parser;
437     const tensorflow::NodeDef& m_Node;
438 };
439
440 /// An ParsedTfOperation where the Armnn equivalent is a single layer,
441 /// with output slots that correspond directly to the Tf node outputs.
442 class SingleLayerParsedTfOperation : public ParsedTfOperation
443 {
444 public:
445     SingleLayerParsedTfOperation(TfParser* parser, const tensorflow::NodeDef& node, IConnectableLayer* layer)
446     : ParsedTfOperation(parser, node)
447     , m_Layer(layer)
448     {
449     }
450
451     IOutputSlot& ResolveArmnnOutputSlot(unsigned int tfOutputIndex) override
452     {
453         BOOST_ASSERT(m_Layer);
454         // Assumes one-to-one mapping between Tf and armnn output slots.
455         unsigned int armnnOutputSlotIdx = tfOutputIndex;
456         if (armnnOutputSlotIdx >= m_Layer->GetNumOutputSlots())
457         {
458             throw ParseException(
459                 boost::str(
460                     boost::format(
461                         "The requested output slot #%1% "
462                         "for %2% does not exist %3%")
463                         % armnnOutputSlotIdx
464                         % m_Layer->GetName()
465                         % CHECK_LOCATION().AsString()));
466         }
467         return m_Layer->GetOutputSlot(armnnOutputSlotIdx);
468     }
469
470 protected:
471     IConnectableLayer* m_Layer;
472 };
473
474 /// A SingleLayerParsedTfOperation for deferred layer creation.
475 class DeferredSingleLayerParsedTfOperation : public SingleLayerParsedTfOperation
476 {
477 public:
478     DeferredSingleLayerParsedTfOperation(TfParser* parser, const tensorflow::NodeDef& node)
479     : SingleLayerParsedTfOperation(parser, node, nullptr)
480     {
481     }
482
483     IOutputSlot& ResolveArmnnOutputSlot(unsigned int tfOutputIndex) override
484     {
485         if (!m_Layer)
486         {
487             CreateLayerDeferred();
488         }
489         return SingleLayerParsedTfOperation::ResolveArmnnOutputSlot(tfOutputIndex);
490     }
491
492 private:
493     virtual void CreateLayerDeferred() = 0;
494 };
495
496
497 TfParser::TfParser()
498     : m_Network(nullptr, nullptr)
499 {
500 }
501
502
503 const tensorflow::NodeDef* TfParser::ResolveIdentityNode(const tensorflow::NodeDef* nodeDef)
504 {
505     if (nodeDef->op() != "Identity")
506     {
507         return nodeDef;
508     }
509
510     if (nodeDef->input_size() != 1)
511     {
512         throw ParseException(
513             boost::str(
514                 boost::format(
515                     "Identity node should have a single input! %1% has %2% inputs %3%")
516                     % nodeDef->name()
517                     % nodeDef->input_size()
518                     % CHECK_LOCATION().AsString()));
519     }
520
521     auto it = m_NodesByName.find(nodeDef->input(0));
522     if (it != m_NodesByName.end())
523     {
524         const tensorflow::NodeDef* inputNode = it->second;
525         return ResolveIdentityNode(inputNode);
526     }
527     else
528     {
529         throw ParseException(
530             boost::str(
531                 boost::format(
532                     "Cannot find what the Identity node %1% is linked to! %2%")
533                     % nodeDef->name()
534                     % CHECK_LOCATION().AsString()));
535     }
536 }
537
538 std::vector<OutputOfConstNodeDef>
539 TfParser::GetTfInputNodes(const tensorflow::NodeDef& nodeDef) const
540 {
541     std::vector<OutputOfConstNodeDef> ret;
542
543     if (nodeDef.op() == "Const")
544     {
545         // For some reason const node can have "Control Inputs". We ignore them for now.
546         return ret;
547     }
548
549     ret.reserve(boost::numeric_cast<size_t>(nodeDef.input_size()));
550     for (int j = 0; j < nodeDef.input_size(); ++j)
551     {
552         OutputId outputId = ParseOutputId(nodeDef.input(j));
553
554         if (nodeDef.input(j)[0] == '^') // I couldn't find a better test for control inputs.
555         {
556             throw ParseException(
557                 boost::str(
558                     boost::format(
559                         "Node '%1%' has Control Input '%2%' for input #%3% which is unsupported. %4%")
560                         % nodeDef.name()
561                         % nodeDef.input(j)
562                         % j
563                         % CHECK_LOCATION().AsString()));
564         }
565
566         auto inputIt = m_NodesByName.find(outputId.m_IndexedValue);
567         if (inputIt == m_NodesByName.end())
568         {
569             throw ParseException(
570                 boost::str(
571                     boost::format(
572                         "Can't find node '%1%', which is listed as an input of '%2%' %3%")
573                         % nodeDef.input(j)
574                         % nodeDef.name()
575                         % CHECK_LOCATION().AsString()));
576         }
577         ret.push_back(OutputOfConstNodeDef(inputIt->second,outputId.m_Index));
578     }
579
580     return ret;
581 }
582
583 std::vector<OutputOfParsedTfOperation>
584 TfParser::GetInputParsedTfOperationsChecked(const tensorflow::NodeDef& nodeDef,
585                                             std::size_t expectedNumInputs)
586 {
587     // Fetches the tensorflow nodes connected as inputs and validate the size.
588     std::vector<OutputOfConstNodeDef> nodes = GetTfInputNodes(nodeDef);
589     const std::size_t numInputs = nodes.size();
590     if (numInputs != expectedNumInputs)
591     {
592         throw ParseException(
593             boost::str(
594                 boost::format(
595                     "Unexpected number of inputs for node %1%. Expected %2%, found %3% %4%")
596                     % nodeDef.name()
597                     % expectedNumInputs
598                     % numInputs
599                     % CHECK_LOCATION().AsString()));
600     }
601     // Fetches the corresponding ParsedTfOperation operations
602     std::vector<OutputOfParsedTfOperation> result;
603     for (auto&& node : nodes)
604     {
605         auto it = m_ParsedTfOperations.find(node.m_IndexedValue->name());
606         if (it == m_ParsedTfOperations.end())
607         {
608             throw ParseException(
609                 boost::str(
610                     boost::format(
611                         "Node with name '%1%' has not been parsed %2%")
612                         % node.m_IndexedValue->name()
613                         % CHECK_LOCATION().AsString()));
614         }
615         ParsedTfOperation* parsedOp = it->second.get();
616         // Transparently 'skip' any Identity operations. This simplifies the logic inside the ParseXXX() functions.
617         parsedOp = parsedOp->ResolveIdentityOperations();
618         result.push_back(OutputOfParsedTfOperation(parsedOp,node.m_Index));
619     }
620     return result;
621 }
622
623 ParsedTfOperationPtr TfParser::ParseAdd(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
624 {
625     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
626
627     // If one of the inputs is a MatMul and the other is a const, then we handle both nodes
628     // together as FullyConnected.
629     if (inputs[0].m_IndexedValue->GetNode().op() == "MatMul" &&
630         HasParsedConstTensor<float>(inputs[1].m_IndexedValue->GetNode().name()))
631     {
632         IConnectableLayer* layer =
633             AddFullyConnectedLayer(inputs[0].m_IndexedValue->GetNode(),
634                                    &nodeDef,nodeDef.name().c_str());
635         return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
636     }
637     else if (HasParsedConstTensor<float>(inputs[0].m_IndexedValue->GetNode().name()) &&
638                                          inputs[1].m_IndexedValue->GetNode().op() == "MatMul")
639     {
640         IConnectableLayer* layer =
641             AddFullyConnectedLayer(inputs[1].m_IndexedValue->GetNode(),
642                                    &nodeDef,nodeDef.name().c_str());
643         return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
644     }
645     else
646     {
647         // Otherwise it's just a regular addition.
648         return AddAdditionLayer(nodeDef);
649     }
650 }
651
652 ParsedTfOperationPtr TfParser::ParseBiasAdd(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
653 {
654     return AddAdditionLayer(nodeDef, true);
655 }
656
657 /// An ParsedTfOperation which forwards to another (used for Identity nodes).
658 class ParsedIdentityTfOperation : public ParsedTfOperation
659 {
660 public:
661     ParsedIdentityTfOperation(TfParser* parser, const tensorflow::NodeDef& node, ParsedTfOperation* representative)
662         : ParsedTfOperation(parser, node)
663         , m_Representative(representative)
664     {
665     }
666
667     virtual IOutputSlot& ResolveArmnnOutputSlot(unsigned int tfOutputIndex) override
668     {
669         BOOST_ASSERT(m_Representative);
670         return m_Representative->ResolveArmnnOutputSlot(tfOutputIndex);
671     }
672
673     virtual ParsedTfOperation* ResolveIdentityOperations() override
674     {
675         return m_Representative->ResolveIdentityOperations();
676     }
677
678 private:
679     ParsedTfOperation* m_Representative;
680 };
681
682 ParsedTfOperationPtr TfParser::ParseIdentity(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
683 {
684     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
685     // Any requests for the output slots of this node should be forwarded to the node connected as input.
686     return std::make_unique<ParsedIdentityTfOperation>(this, nodeDef, inputs[0].m_IndexedValue);
687 }
688
689 /// An ParsedTfOperation for a Const node.
690 /// Creation of the armnn ConstLayer is deferred until it is actually needed, because Const nodes are mostly used
691 /// for weight inputs to MatMul/Conv2D nodes and in these cases armnn doesn't need a ConstLayer.
692 template <typename T>
693 class ParsedConstTfOperation : public DeferredSingleLayerParsedTfOperation
694 {
695 public:
696     ParsedConstTfOperation(TfParser* parser, const tensorflow::NodeDef& node,
697         const T* tensorData, const TensorInfo& tensorInfo)
698         : DeferredSingleLayerParsedTfOperation(parser, node),
699         m_Storage(tensorData, tensorData + tensorInfo.GetNumElements()),
700         m_TensorInfo(tensorInfo)
701     {
702         BOOST_ASSERT(tensorInfo.GetDataType() == GetDataType<T>());
703     }
704
705     void CreateLayerDeferred() override
706     {
707         BOOST_ASSERT(m_Layer == nullptr);
708         m_Layer = m_Parser->m_Network->AddConstantLayer(ConstTensor(m_TensorInfo, m_Storage), m_Node.name().c_str());
709         m_Layer->GetOutputSlot(0).SetTensorInfo(m_TensorInfo);
710     }
711
712     ConstTensor GetConstTensor(bool swizzleForConvolutionWeights, std::vector<T>& outputTensorData) const
713     {
714         // Mappings from TensorFlow filter tensors to the ArmNN filter tensors.
715         // Tensorflow weights are [H, W, In, Out].
716         // ArmNN weights are [Out, In, H, W].
717         static const PermutationVector HWIOToOIHW = {2, 3, 1, 0};
718
719         const TensorInfo outInfo = swizzleForConvolutionWeights
720                                    ? armnnUtils::Permuted(m_TensorInfo, HWIOToOIHW)
721                                    : m_TensorInfo;
722
723         outputTensorData.resize(m_TensorInfo.GetNumElements());
724
725         // Copies or swizzles from the permanent storage into the storage the caller provided.
726         if (swizzleForConvolutionWeights)
727         {
728             armnnUtils::Permute(outInfo.GetShape(), HWIOToOIHW, m_Storage.data(), outputTensorData.data());
729         }
730         else
731         {
732             memcpy(outputTensorData.data(), m_Storage.data(), m_TensorInfo.GetNumBytes());
733         }
734         // Updates the result to point to the user provided storage.
735         ConstTensor constTensor(outInfo, outputTensorData);
736         return constTensor;
737     }
738
739 private:
740     ///< Manages the lifetime of the tensor data.
741     std::vector<T> m_Storage;
742     ///< Describes the layout of the tensor and points to the data in m_Storage.
743     TensorInfo m_TensorInfo;
744 };
745
746 DataType ConvertTfTensorDataType(const tensorflow::DataType tfDataType,
747                                  const tensorflow::NodeDef& nodeDef)
748 {
749     switch (tfDataType)
750     {
751     case tensorflow::DT_FLOAT:
752         return DataType::Float32;
753         break;
754     case tensorflow::DT_INT32:
755         return DataType::Signed32;
756         break;
757     default:
758         throw ParseException(
759             boost::str(
760                 boost::format(
761                     "Unknown DataType %1% for node %2% %3%")
762                     % tensorflow::DataType_Name(tfDataType)
763                     % nodeDef.name()
764                     % CHECK_LOCATION().AsString()));
765     }
766 }
767
768 struct ParseTfTensorValueList
769 {
770     template<typename DataType>
771     static void Parse(
772         const tensorflow::TensorProto& tfTensor,
773         unsigned int dstElements,
774         std::vector<int8_t>& outputData);
775
776     template <typename DataType>
777     static void ReadData(const void* srcData, unsigned int numSrcElements,
778         std::vector<int8_t>& dstData, unsigned int numDstElements)
779     {
780         // If there are no entries in the list, perform no action.
781         if (numSrcElements == 0)
782         {
783             return;
784         }
785
786         // If no size was provided, use the length of the value list.
787         if (numDstElements == 0)
788         {
789             numDstElements = numSrcElements;
790         }
791
792         // Allocates memory.
793         dstData.resize(std::max(numSrcElements, numDstElements) * sizeof(DataType));
794
795         const DataType* srcTensor = reinterpret_cast<const DataType*>(srcData);
796         DataType* dstTensor = reinterpret_cast<DataType*>(dstData.data());
797
798         // Copies the value list entries into the destination.
799         std::copy(srcTensor, srcTensor + numSrcElements, dstTensor);
800
801         if (numDstElements > numSrcElements)
802         {
803             // Uses the last element in the list to fill the remaining entries.
804             std::fill(dstTensor + numSrcElements, dstTensor + numDstElements, srcTensor[numSrcElements - 1]);
805         }
806     }
807
808 };
809
810 template <>
811 void ParseTfTensorValueList::Parse<float>(const tensorflow::TensorProto& tfTensor,
812     unsigned int dstElements, std::vector<int8_t>& outputData)
813 {
814     ReadData<float>(tfTensor.float_val().data(), static_cast<unsigned int>(tfTensor.float_val_size()),
815         outputData, dstElements);
816 }
817
818 template <>
819 void ParseTfTensorValueList::Parse<int32_t>(const tensorflow::TensorProto& tfTensor,
820     unsigned int dstElements, std::vector<int8_t>& outputData)
821 {
822     ReadData<int32_t>(tfTensor.int_val().data(), static_cast<unsigned int>(tfTensor.int_val_size()),
823         outputData, dstElements);
824 }
825
826 template <template<typename> class OperatorType, typename T = int8_t>
827 struct MakeTfOperation
828 {
829     template<typename DataType, class... Args>
830     inline static std::unique_ptr<OperatorType<DataType>> Parse(TfParser* parser, const tensorflow::NodeDef& node,
831         Args&&... args)
832     {
833         return std::make_unique<OperatorType<DataType>>(parser, node, std::forward<Args>(args)...);
834     }
835 };
836
837 template <>
838 struct MakeTfOperation<ParsedConstTfOperation>
839 {
840     template<typename DataType, class... Args>
841     inline static std::unique_ptr<ParsedConstTfOperation<DataType>> Parse(TfParser* parser,
842         const tensorflow::NodeDef& node, const std::vector<int8_t>& tensorData, const TensorInfo& tensorInfo)
843     {
844         return std::make_unique<ParsedConstTfOperation<DataType>>(parser, node,
845             reinterpret_cast<const DataType*>(tensorData.data()), tensorInfo);
846     }
847 };
848
849 template <class FuncType>
850 struct InvokeParseFunction
851 {
852     template<class ResType, class... Args>
853     inline static ResType Result(DataType dataType, Args&&... args)
854     {
855         if (dataType == DataType::Float32)
856         {
857             return FuncType::template Parse<float>(std::forward<Args>(args)...);
858         }
859         else if (dataType == DataType::Signed32)
860         {
861             return FuncType::template Parse<int32_t>(std::forward<Args>(args)...);
862         }
863
864         return ResType();
865     }
866
867     template<class... Args>
868     inline static void Result(DataType dataType, Args&&... args)
869     {
870         if (dataType == DataType::Float32)
871         {
872             FuncType::template Parse<float>(std::forward<Args>(args)...);
873         }
874         else if (dataType == DataType::Signed32)
875         {
876             FuncType::template Parse<int32_t>(std::forward<Args>(args)...);
877         }
878     }
879 };
880
881 ParsedTfOperationPtr TfParser::ParseConst(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
882 {
883     BOOST_ASSERT(nodeDef.op() == "Const");
884
885     if (nodeDef.attr().count("value") == 0)
886     {
887         throw ParseException(
888             boost::str(
889                 boost::format(
890                     "Value not found for Const node - %1% %2%")
891                     % nodeDef.name()
892                     % CHECK_LOCATION().AsString()));
893     }
894
895     const tensorflow::TensorProto& tfTensor = nodeDef.attr().at("value").tensor();
896     const tensorflow::TensorShapeProto& tfTensorShape = tfTensor.tensor_shape();
897     const tensorflow::DataType tfDataType = ReadMandatoryNodeTypeAttribute(nodeDef, "dtype");
898
899     const auto GetDimensionSize = [](auto& d) { return d.size(); };
900
901     std::vector<unsigned int> dimensionSizes;
902     std::transform(tfTensorShape.dim().begin(), tfTensorShape.dim().end(),
903         std::back_inserter(dimensionSizes), GetDimensionSize);
904
905     // Calculates number of elements.
906     const DataType dataType = ConvertTfTensorDataType(tfDataType, nodeDef);
907     unsigned int numElements = 0U;
908
909     if (!dimensionSizes.empty())
910     {
911         numElements = std::accumulate(dimensionSizes.begin(), dimensionSizes.end(),
912                                       1U, std::multiplies<unsigned int>());
913     }
914
915     std::vector<int8_t> tensorData;
916
917     // Get tensor data from the list of values attribute.
918     if (tfTensor.tensor_content().empty())
919     {
920         InvokeParseFunction<ParseTfTensorValueList>::Result<void>(dataType, tfTensor, numElements, tensorData);
921
922         // If the tensor shape is not defined, but there is a value list, then interpret the data as a 1D
923         // tensor of the provided number of elements.
924         if (numElements == 0)
925         {
926             const unsigned int tfNumElements =
927                 static_cast<unsigned int>(tensorData.size()) / GetDataTypeSize(dataType);
928             dimensionSizes.push_back(tfNumElements);
929         }
930     }
931     // Gets tensor data from tensor content attribute.
932     else
933     {
934         tensorData.assign(tfTensor.tensor_content().begin(), tfTensor.tensor_content().end());
935
936         // Checks if a tensor shape is defined for the tensor content.
937         if (numElements == 0)
938         {
939             throw ParseException(
940                 boost::str(
941                     boost::format(
942                         "No tensor shape found for Const node - %1% %2%")
943                         % nodeDef.name()
944                         % CHECK_LOCATION().AsString()));
945         }
946     }
947
948     // Const node requires at least a list of values or a content attribute.
949     if (tensorData.empty())
950     {
951         throw ParseException(
952             boost::str(
953                 boost::format(
954                     "No tensor data found for Const node - %1% %2%")
955                     % nodeDef.name()
956                     % CHECK_LOCATION().AsString()));
957     }
958
959     const TensorInfo tensorInfo(static_cast<unsigned int>(dimensionSizes.size()),
960                                 dimensionSizes.data(),
961                                 dataType);
962
963     // If we have a list of values, then the length of the list must be
964     // less than or equal to the number of elements implied by the shape argument.
965     if (tensorData.size() > tensorInfo.GetNumBytes())
966     {
967         throw ParseException(
968             boost::str(
969                 boost::format(
970                     "Number of elements (%1%) should be less than or equal "
971                     "to the number of elements implied by the shape argument (%2%) for Const node - %3% %4%")
972                     % (tensorData.size() / GetDataTypeSize(dataType))
973                     % tensorInfo.GetNumElements()
974                     % nodeDef.name()
975                     % CHECK_LOCATION().AsString()));
976     }
977
978     return InvokeParseFunction<MakeTfOperation<ParsedConstTfOperation>>::Result<ParsedTfOperationPtr>(
979         dataType, this, nodeDef, tensorData, tensorInfo);
980 }
981
982 template<typename Type>
983 bool TfParser::HasParsedConstTensor(const std::string & nodeName) const
984 {
985     auto it = m_ParsedTfOperations.find(nodeName);
986     if (it == m_ParsedTfOperations.end() ||
987         dynamic_cast<ParsedConstTfOperation<Type>*>(it->second.get()) == nullptr)
988     {
989         return false;
990     }
991     else
992     {
993         return true;
994     }
995 }
996
997 ParsedTfOperationPtr TfParser::ParseConv2D(const tensorflow::NodeDef& nodeDef,
998     const tensorflow::GraphDef& graphDef)
999 {
1000     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
1001     IOutputSlot& inputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1002     TensorInfo inputTensorInfo = inputSlot.GetTensorInfo();
1003
1004     if (!HasParsedConstTensor<float>(inputs[1].m_IndexedValue->GetNode().name()))
1005     {
1006         throw ParseException(
1007             boost::str(
1008                 boost::format(
1009                     "ArmNN only supports Convolution layers with constant weights for %1%, input %2% %3%")
1010                     % nodeDef.name()
1011                     % inputs[1].m_IndexedValue->GetNode().name()
1012                     % CHECK_LOCATION().AsString()));
1013     }
1014     ParsedConstTfOperation<float>* weightNode =
1015         boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[1].m_IndexedValue);
1016
1017     std::string paddingString = ReadMandatoryNodeStringAttribute(nodeDef, "padding");
1018     std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
1019     std::vector<uint32_t> strides = ReadMandatoryNodeUint32ListAttribute(nodeDef, "strides");
1020
1021     // Read the dilations, if present - only [1,1,1,1] (the default) is supported.
1022     std::vector<uint32_t> dilations = ReadOptionalNodeUint32ListAttribute(nodeDef, "dilations");
1023     if (!dilations.empty())
1024     {
1025         for (auto dilation : dilations)
1026         {
1027             if (dilation != 1u)
1028             {
1029                 throw ParseException(
1030                     boost::str(
1031                         boost::format(
1032                             "ArmNN only supports Convolution layers with dilations [1,1,1,1] for %1% %2%")
1033                             % nodeDef.name()
1034                             % CHECK_LOCATION().AsString()));
1035             }
1036         }
1037     }
1038
1039     Convolution2dDescriptor desc;
1040     desc.m_BiasEnabled = false;
1041
1042     CHECK_DATA_FORMAT(nodeDef, dataFormat, "Conv2D");
1043
1044     if (dataFormat == "NHWC")
1045     {
1046         desc.m_StrideX = strides[2];
1047         desc.m_StrideY = strides[1];
1048         // Swizzles input to supported memory layout.
1049         inputTensorInfo = armnnUtils::Permuted(inputSlot.GetTensorInfo(), NHWCToArmNN);
1050     }
1051     else if (dataFormat == "NCHW")
1052     {
1053         desc.m_StrideX = strides[3];
1054         desc.m_StrideY = strides[2];
1055     }
1056
1057     uint32_t inputHeight = inputTensorInfo.GetShape()[2];
1058     uint32_t inputWidth = inputTensorInfo.GetShape()[3];
1059
1060     std::vector<float> outputTensorData;
1061
1062     ConstTensor weightTensor = weightNode->GetConstTensor(true, outputTensorData);
1063
1064     uint32_t weightHeight = weightTensor.GetShape()[2];
1065     uint32_t weightWidth = weightTensor.GetShape()[3];
1066
1067     bool padding = false;
1068     TensorInfo outputInfo;
1069
1070     CHECK_PADDING_TYPE(nodeDef, paddingString);
1071
1072     if (paddingString == "SAME")
1073     {
1074         padding = true;
1075         outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
1076                                   weightTensor.GetShape()[0],
1077                                   static_cast<uint32_t>(ceil(
1078                                       static_cast<float>(inputHeight) /
1079                                       static_cast<float>(desc.m_StrideY))),
1080                                   static_cast<uint32_t>(ceil(
1081                                       static_cast<float>(inputWidth) /
1082                                       static_cast<float>(desc.m_StrideX)))
1083                                 }, DataType::Float32);
1084     }
1085     else if (paddingString == "VALID")
1086     {
1087         padding = false;
1088         outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
1089                                   weightTensor.GetShape()[0],
1090                                   static_cast<uint32_t>(ceil(
1091                                       static_cast<float>(inputHeight - weightHeight + 1) /
1092                                       static_cast<float>(desc.m_StrideY))),
1093                                   static_cast<uint32_t>(ceil(
1094                                       static_cast<float>(inputWidth - weightWidth + 1) /
1095                                       static_cast<float>(desc.m_StrideX)))
1096                                 }, DataType::Float32);
1097     }
1098
1099     CalcPadding(inputHeight, weightHeight, desc.m_StrideY, desc.m_PadTop, desc.m_PadBottom, padding);
1100     CalcPadding(inputWidth, weightWidth, desc.m_StrideX, desc.m_PadLeft, desc.m_PadRight, padding);
1101
1102     IConnectableLayer* layer = m_Network->AddConvolution2dLayer(desc, weightTensor, nodeDef.name().c_str());
1103     layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
1104
1105     if (dataFormat == "NHWC")
1106     {
1107         layer = SwizzleInDeswizzleOut(*m_Network, inputSlot, *layer, nodeDef.name());
1108     }
1109     else
1110     {
1111         inputSlot.Connect(layer->GetInputSlot(0));
1112     }
1113
1114     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1115 }
1116
1117 ParsedTfOperationPtr TfParser::ParseDepthwiseConv2D(const tensorflow::NodeDef& nodeDef,
1118                                                     const tensorflow::GraphDef& graphDef)
1119 {
1120     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
1121     IOutputSlot& inputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1122     TensorInfo inputTensorInfo = inputSlot.GetTensorInfo();
1123
1124     if (!HasParsedConstTensor<float>(inputs[1].m_IndexedValue->GetNode().name()))
1125     {
1126         throw ParseException(
1127             boost::str(
1128                 boost::format(
1129                     "ArmNN only supports Depthwise Convolution layer with constant weights. "
1130                     "Non const input found %1% for node %2% %3%")
1131                     % inputs[1].m_IndexedValue->GetNode().name()
1132                     % nodeDef.name()
1133                     % CHECK_LOCATION().AsString()));
1134     }
1135     ParsedConstTfOperation<float>* weightNode =
1136         boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[1].m_IndexedValue);
1137
1138
1139     std::string paddingString = ReadMandatoryNodeStringAttribute(nodeDef, "padding");
1140     std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
1141     std::vector<uint32_t> strides = ReadMandatoryNodeUint32ListAttribute(nodeDef, "strides");
1142
1143     DepthwiseConvolution2dDescriptor desc;
1144     desc.m_BiasEnabled = false;
1145
1146     CHECK_DATA_FORMAT(nodeDef, dataFormat, "DepthwiseConv2dNative");
1147
1148     if (dataFormat == "NHWC")
1149     {
1150         desc.m_StrideX = strides[2];
1151         desc.m_StrideY = strides[1];
1152         // Swizzles input to supported memory layout.
1153         inputTensorInfo = armnnUtils::Permuted(inputSlot.GetTensorInfo(), NHWCToArmNN);
1154     }
1155     else if (dataFormat == "NCHW")
1156     {
1157         desc.m_StrideX = strides[3];
1158         desc.m_StrideY = strides[2];
1159     }
1160
1161     uint32_t inputHeight = inputTensorInfo.GetShape()[2];
1162     uint32_t inputWidth = inputTensorInfo.GetShape()[3];
1163
1164     std::vector<float> outputTensorData;
1165
1166     ConstTensor weightTensor = weightNode->GetConstTensor(true, outputTensorData);
1167
1168     uint32_t weightHeight = weightTensor.GetShape()[2];
1169     uint32_t weightWidth = weightTensor.GetShape()[3];
1170
1171     bool padding = false;
1172     TensorInfo outputInfo;
1173
1174     CHECK_PADDING_TYPE(nodeDef, paddingString);
1175
1176     if (paddingString == "SAME")
1177     {
1178         padding = true;
1179         outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
1180                                 weightTensor.GetShape()[0] * weightTensor.GetShape()[1],
1181                                 static_cast<uint32_t>(ceil(
1182                                     static_cast<float>(inputHeight) /
1183                                     static_cast<float>(desc.m_StrideY))),
1184                                 static_cast<uint32_t>(ceil(
1185                                     static_cast<float>(inputWidth) /
1186                                     static_cast<float>(desc.m_StrideX)))
1187                                 }, DataType::Float32);
1188     }
1189     else if (paddingString == "VALID")
1190     {
1191         padding = false;
1192         outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
1193                                 weightTensor.GetShape()[0] * weightTensor.GetShape()[1],
1194                                 static_cast<uint32_t>(ceil(
1195                                     static_cast<float>(inputHeight - weightHeight + 1) /
1196                                     static_cast<float>(desc.m_StrideY))),
1197                                 static_cast<uint32_t>(ceil(
1198                                     static_cast<float>(inputWidth - weightWidth + 1) /
1199                                     static_cast<float>(desc.m_StrideX)))
1200                                 }, DataType::Float32);
1201     }
1202
1203     CalcPadding(inputHeight, weightHeight, desc.m_StrideY, desc.m_PadTop, desc.m_PadBottom, padding);
1204     CalcPadding(inputWidth, weightWidth, desc.m_StrideX, desc.m_PadLeft, desc.m_PadRight, padding);
1205
1206     IConnectableLayer* layer = m_Network->AddDepthwiseConvolution2dLayer(desc, weightTensor, nodeDef.name().c_str());
1207     layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
1208
1209     if (dataFormat == "NHWC")
1210     {
1211         layer = SwizzleInDeswizzleOut(*m_Network, inputSlot, *layer, nodeDef.name());
1212     }
1213     else
1214     {
1215         inputSlot.Connect(layer->GetInputSlot(0));
1216     }
1217
1218     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1219 }
1220
1221 ParsedTfOperationPtr TfParser::ParseFusedBatchNorm(const tensorflow::NodeDef& nodeDef,
1222                                                    const tensorflow::GraphDef& graphDef)
1223 {
1224     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 5);
1225
1226     if (!HasParsedConstTensor<float>(inputs[1].m_IndexedValue->GetNode().name()))
1227     {
1228         throw ParseException(
1229             boost::str(
1230                 boost::format(
1231                     "ArmNN only supports FusedBatchNormalization layers with constant scale. "
1232                     "Input %1%. Node %2% %3%")
1233                     % inputs[1].m_IndexedValue->GetNode().name()
1234                     % nodeDef.name()
1235                     % CHECK_LOCATION().AsString()));
1236     }
1237     ParsedConstTfOperation<float>* scaleNode =
1238         boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[1].m_IndexedValue);
1239
1240     if (!HasParsedConstTensor<float>(inputs[2].m_IndexedValue->GetNode().name()))
1241     {
1242         throw ParseException(
1243             boost::str(
1244                 boost::format(
1245                     "ArmNN only supports FusedBatchNormalization layers with constant offset. "
1246                     "Input %1%. Node %2% %3%")
1247                     % inputs[2].m_IndexedValue->GetNode().name()
1248                     % nodeDef.name()
1249                     % CHECK_LOCATION().AsString()));
1250     }
1251     ParsedConstTfOperation<float>* offsetNode =
1252         boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[2].m_IndexedValue);
1253
1254     if (!HasParsedConstTensor<float>(inputs[3].m_IndexedValue->GetNode().name()))
1255     {
1256         throw ParseException(
1257             boost::str(
1258                 boost::format(
1259                     "ArmNN only supports FusedBatchNormalization layers with constant mean. "
1260                     "Input %1%. Node %2% %3%")
1261                     % inputs[3].m_IndexedValue->GetNode().name()
1262                     % nodeDef.name()
1263                     % CHECK_LOCATION().AsString()));
1264     }
1265     ParsedConstTfOperation<float>* meanNode =
1266         boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[3].m_IndexedValue);
1267
1268     if (!HasParsedConstTensor<float>(inputs[4].m_IndexedValue->GetNode().name()))
1269     {
1270         throw ParseException(
1271             boost::str(
1272                 boost::format(
1273                     "ArmNN only supports FusedBatchNormalization layers with constant variance. "
1274                     "Input %1%. Node %2% %3%")
1275                     % inputs[4].m_IndexedValue->GetNode().name()
1276                     % nodeDef.name()
1277                     % CHECK_LOCATION().AsString()));
1278     }
1279     ParsedConstTfOperation<float>* varianceNode =
1280         boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[4].m_IndexedValue);
1281
1282     // The descriptor only has the epsilon attribute.
1283     BatchNormalizationDescriptor desc;
1284     desc.m_Eps = ReadMandatoryNodeFloatAttribute(nodeDef, "epsilon");
1285
1286     // Data for the parsed tensor args (scale, offset, mean, variance) must be stored
1287     // locally until the layer is added.
1288     std::vector<float> scaleTensorData;
1289     ConstTensor scaleTensor = scaleNode->GetConstTensor(false, scaleTensorData);
1290
1291     std::vector<float> offsetTensorData;
1292     ConstTensor offsetTensor = offsetNode->GetConstTensor(false, offsetTensorData);
1293
1294     std::vector<float> meanTensorData;
1295     ConstTensor meanTensor = meanNode->GetConstTensor(false, meanTensorData);
1296
1297     std::vector<float> varianceTensorData;
1298     ConstTensor varianceTensor = varianceNode->GetConstTensor(false, varianceTensorData);
1299
1300     IConnectableLayer* layer = m_Network->AddBatchNormalizationLayer(desc,
1301                                                                      meanTensor,
1302                                                                      varianceTensor,
1303                                                                      offsetTensor,
1304                                                                      scaleTensor,
1305                                                                      nodeDef.name().c_str());
1306
1307     IOutputSlot& inputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1308
1309     const std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
1310
1311     if (dataFormat == "NHWC")
1312     {
1313         const TensorInfo outputTensorInfo = armnnUtils::Permuted(inputSlot.GetTensorInfo(), NHWCToArmNN);
1314         layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
1315         layer = SwizzleInDeswizzleOut(*m_Network, inputSlot, *layer, nodeDef.name());
1316     }
1317     else
1318     {
1319         layer->GetOutputSlot(0).SetTensorInfo(inputSlot.GetTensorInfo());
1320         inputSlot.Connect(layer->GetInputSlot(0));
1321     }
1322
1323     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1324 }
1325
1326 bool TfParser::IsSupportedLeakyReluPattern(const tensorflow::NodeDef& mulNodeDef,
1327                                            size_t alphaLayerIndex,
1328                                            const OutputOfParsedTfOperation& otherOp,
1329                                            armnn::IOutputSlot** outputOfLeakyRelu,
1330                                            armnn::ActivationDescriptor & desc)
1331 {
1332     const tensorflow::NodeDef& otherNodeDef = otherOp.m_IndexedValue->GetNode();
1333
1334     // Verifying all these assumptions hold:
1335     //
1336     // 1, the mulNodeDef is an elementwise multiplication node "Mul"
1337     // 2, the alphaLayerIndex selects a constant node from the inputs of the "Mul" node
1338     // 3, the inputLayerIndex selects a layer which has the same name as otherNodeDef
1339     //
1340
1341     if (mulNodeDef.op() == "Mul")
1342     {
1343         size_t otherLayerIndex = (alphaLayerIndex == 0 ? 1 : 0);
1344         std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(mulNodeDef, 2);
1345
1346         BOOST_ASSERT(inputs.size() == 2);
1347         BOOST_ASSERT((otherLayerIndex == 0 || alphaLayerIndex == 0));
1348         BOOST_ASSERT((otherLayerIndex == 1 || alphaLayerIndex == 1));
1349         BOOST_ASSERT(((otherLayerIndex + alphaLayerIndex) == 1));
1350
1351         if (inputs[otherLayerIndex].m_IndexedValue->GetNode().name() == otherNodeDef.name())
1352         {
1353             if (HasParsedConstTensor<float>(inputs[alphaLayerIndex].m_IndexedValue->GetNode().name()))
1354             {
1355                 ParsedConstTfOperation<float>* alpha =
1356                     boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(
1357                         inputs[alphaLayerIndex].m_IndexedValue);
1358
1359                 std::vector<float> const_data;
1360                 ConstTensor const_tensor = alpha->GetConstTensor(false, const_data);
1361
1362                 if (const_data.size() == 1)
1363                 {
1364                     desc.m_Function = ActivationFunction::LeakyReLu;
1365                     desc.m_A = const_data[0];
1366
1367                     *outputOfLeakyRelu = &(otherOp.m_IndexedValue->ResolveArmnnOutputSlot(otherOp.m_Index));
1368                     return true;
1369                 }
1370             }
1371         }
1372     }
1373     return false;
1374 }
1375
1376 // For max nodes, we only support those as part of a leaky relu, i.e.,
1377 // as part for a max(mul(a, x), x) expression. We thus need to
1378 // identify one input as a multiplication with a scalar constant,
1379 // extract the constant and the two inputs, verify that the two other
1380 // inputs are the same node, and then create a leaky relu node.
1381
1382 ParsedTfOperationPtr TfParser::ParseMaximum(const tensorflow::NodeDef& nodeDef,
1383                                             const tensorflow::GraphDef& graphDef)
1384 {
1385     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
1386     auto inputNode0 = inputs[0].m_IndexedValue->GetNode();
1387     auto inputNode1 = inputs[1].m_IndexedValue->GetNode();
1388     IOutputSlot* outputOfLeakyRelu = nullptr;
1389
1390     ActivationDescriptor desc;
1391
1392     // There are four possible scenarios we need to support (respectively below):
1393     // 1, max(mul(a, x), x)
1394     // 2, max(mul(x, a), x)
1395     // 3, max(x, mul(a, x))
1396     // 4, max(x, mul(x, a))
1397
1398     if (IsSupportedLeakyReluPattern(inputNode0, 0, inputs[1], &outputOfLeakyRelu, desc) ||
1399         IsSupportedLeakyReluPattern(inputNode0, 1, inputs[1], &outputOfLeakyRelu, desc) ||
1400         IsSupportedLeakyReluPattern(inputNode1, 0, inputs[0], &outputOfLeakyRelu, desc) ||
1401         IsSupportedLeakyReluPattern(inputNode1, 1, inputs[0], &outputOfLeakyRelu, desc))
1402     {
1403         BOOST_ASSERT(outputOfLeakyRelu != nullptr);
1404
1405         IConnectableLayer* const layer = m_Network->AddActivationLayer(desc, nodeDef.name().c_str());
1406         outputOfLeakyRelu->Connect(layer->GetInputSlot(0));
1407         layer->GetOutputSlot(0).SetTensorInfo(outputOfLeakyRelu->GetTensorInfo());
1408         return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1409     }
1410     else
1411     {
1412         throw ParseException(
1413             boost::str(
1414                 boost::format(
1415                     "ArmNN currenly offers limited support for Maximum node when it can be fused to "
1416                     "form a LeakyRelu activation as leakyrelu=max(mul(alpha, X), X). "
1417                     "Node: %1% %2%")
1418                     % nodeDef.name()
1419                     % CHECK_LOCATION().AsString()));
1420     }
1421 }
1422
1423 ParsedTfOperationPtr TfParser::ParseConcat(const tensorflow::NodeDef& nodeDef,
1424                                            const tensorflow::GraphDef& graphDef)
1425 {
1426     std::vector<OutputOfConstNodeDef> nodes = GetTfInputNodes(nodeDef);
1427     // In tensorflow, we have the last input of the Concat layer as the axis for concatenation.
1428     unsigned int numInputs = static_cast<unsigned int>(nodes.size());
1429     unsigned int numConcatView = numInputs - 1;
1430
1431     OriginsDescriptor concatDescriptor(static_cast<uint32_t>(numConcatView), MaxNumOfTensorDimensions);
1432     std::vector<unsigned int>mergeDimSizes(MaxNumOfTensorDimensions, 0u);
1433
1434     unsigned int mergeDim = 0;
1435     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, numInputs);
1436
1437     // The last input is the axis for concatenation.
1438     if (!HasParsedConstTensor<int32_t>(inputs[numInputs - 1].m_IndexedValue->GetNode().name()))
1439     {
1440         throw ParseException(
1441             boost::str(
1442                 boost::format(
1443                     "ArmNN only supports Concat with constant axis. "
1444                     "Input %1%. Node %2% %3%")
1445                     % inputs[numInputs - 1].m_IndexedValue->GetNode().name()
1446                     % nodeDef.name()
1447                     % CHECK_LOCATION().AsString()));
1448     }
1449     ParsedConstTfOperation<int32_t>* shapeNode =
1450             boost::polymorphic_downcast<ParsedConstTfOperation<int32_t>*>(inputs[numInputs - 1].m_IndexedValue);
1451
1452     std::vector<int32_t> axisTensorData;
1453     ConstTensor axisTensor = shapeNode->GetConstTensor(false, axisTensorData);
1454
1455     // This concatDim indicates the data format: 3 is the NHWC, 1 is the NCHW.
1456     const unsigned int concatDimInput = static_cast<unsigned int>(axisTensorData[0]);
1457
1458     // Armnn supports concatenation along the channel dimension for data formats NHWC and NCHW.
1459     if (concatDimInput == 0 || concatDimInput == 2)
1460     {
1461         throw ParseException(
1462             boost::str(
1463                 boost::format(
1464                     "Dimension %1% for concatenation is not supported by Armnn. "
1465                     "Node %2% %3%")
1466                     % concatDimInput
1467                     % nodeDef.name()
1468                     % CHECK_LOCATION().AsString()));
1469     }
1470
1471     // This is the only concatDim we support in armnn.
1472     const unsigned int concatDim = 1;
1473     for (unsigned int viewIndex = 0; viewIndex < numConcatView; ++viewIndex)
1474     {
1475         // Need to double check whether it should be
1476         IOutputSlot& inputSlot =
1477             inputs[viewIndex].m_IndexedValue->ResolveArmnnOutputSlot(inputs[viewIndex].m_Index);
1478         TensorInfo inputTensorInfo = inputSlot.GetTensorInfo();
1479
1480         if (inputTensorInfo.GetNumDimensions() != MaxNumOfTensorDimensions)
1481         {
1482             throw ParseException(
1483                 boost::str(
1484                     boost::format(
1485                         "The number of dimensions: %1% for input tensors of the "
1486                         "concatenation op should be %2% for Node %3% %4%")
1487                         % inputTensorInfo.GetNumDimensions()
1488                         % MaxNumOfTensorDimensions
1489                         % nodeDef.name()
1490                         % CHECK_LOCATION().AsString()));
1491         }
1492
1493         if (concatDimInput == 3)
1494         {
1495             inputTensorInfo = armnnUtils::Permuted(inputTensorInfo, NHWCToArmNN);
1496         }
1497
1498         for (unsigned int dim = 0; dim < MaxNumOfTensorDimensions; ++dim)
1499         {
1500             mergeDimSizes[dim] = inputTensorInfo.GetShape()[dim];
1501         }
1502
1503         for (unsigned int j = 0; j < concatDim; ++j)
1504         {
1505             concatDescriptor.SetViewOriginCoord(viewIndex, j, 0);
1506         }
1507
1508         concatDescriptor.SetViewOriginCoord(viewIndex, concatDim, mergeDim);
1509         mergeDim += mergeDimSizes[concatDim];
1510
1511         for (unsigned int j = concatDim+1; j < MaxNumOfTensorDimensions; ++j)
1512         {
1513             concatDescriptor.SetViewOriginCoord(viewIndex, j, 0);
1514         }
1515     }
1516
1517     mergeDimSizes[concatDim] = mergeDim;
1518     armnn::IConnectableLayer *layer = m_Network->AddMergerLayer(concatDescriptor, nodeDef.name().c_str());
1519
1520     layer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo(MaxNumOfTensorDimensions, mergeDimSizes.data(),
1521                                                             DataType::Float32));
1522
1523     for (unsigned int v = 0; v < numConcatView; ++v)
1524     {
1525         IOutputSlot& inputSlot = inputs[v].m_IndexedValue->ResolveArmnnOutputSlot(inputs[v].m_Index);
1526         if (concatDimInput == 3)
1527         {
1528             IConnectableLayer* const swizzleLayer = AddSwizzleLayer(*m_Network, inputSlot, NHWCToArmNN,
1529                                                                     "swizzle_for-" + nodeDef.name());
1530             swizzleLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(v));
1531         }
1532         else
1533         {
1534             inputSlot.Connect(layer->GetInputSlot(v));
1535         }
1536     }
1537
1538     if (concatDimInput == 3)
1539     {
1540         IConnectableLayer* const deswizzleLayer = AddSwizzleLayer(*m_Network, layer->GetOutputSlot(0), ArmNNToNHWC,
1541                                                                   "deswizzle_for-" + nodeDef.name());
1542         layer = deswizzleLayer;
1543     }
1544
1545     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1546 }
1547
1548 ParsedTfOperationPtr TfParser::ParseShape(const tensorflow::NodeDef& nodeDef,
1549     const tensorflow::GraphDef& graphDef)
1550 {
1551     // Note: the Shape layer is handled in a special way, because:
1552     //        1. ARMNN doesn't support int32 tensors which it outputs.
1553     //        2. ARMNN works with statically shaped tensors which are known at parse time.
1554     //        3. because of 1. and 2. we treat the output of Shape as a temporary const int32
1555     //           tensor which may be used as an input to other ops, most likely a Reshape.
1556
1557     const tensorflow::DataType tfDataType = ReadMandatoryNodeTypeAttribute(nodeDef, "out_type");
1558     if (tfDataType != tensorflow::DT_INT32)
1559     {
1560         throw ParseException(
1561             boost::str(
1562                 boost::format(
1563                     "Armnn only supports DT_INT32 as out_type. Got %1% for Node %2% %3%")
1564                     % tensorflow::DataType_Name(tfDataType)
1565                     % nodeDef.name()
1566                     % CHECK_LOCATION().AsString()));
1567     }
1568
1569     const std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
1570     IOutputSlot& prevLayerOutputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1571     const TensorInfo& prevLayerTensorInfo = prevLayerOutputSlot.GetTensorInfo();
1572     unsigned int prevLayerDimensions = prevLayerTensorInfo.GetNumDimensions();
1573
1574     std::vector<int32_t> shapeTensorData;
1575     shapeTensorData.reserve(prevLayerDimensions);
1576
1577     for (unsigned int i=0; i<prevLayerDimensions; ++i)
1578     {
1579         shapeTensorData.push_back(static_cast<int32_t>(prevLayerTensorInfo.GetShape()[i]));
1580     }
1581
1582     TensorInfo shapeTensorInfo(1, &prevLayerDimensions, DataType::Signed32);
1583
1584     return std::make_unique<ParsedConstTfOperation<int32_t>>(this,
1585                                                              nodeDef,
1586                                                              &shapeTensorData[0],
1587                                                              shapeTensorInfo);
1588 }
1589
1590 ParsedTfOperationPtr TfParser::ParseReshape(const tensorflow::NodeDef& nodeDef,
1591     const tensorflow::GraphDef& graphDef)
1592 {
1593     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
1594     ParsedTfOperation* inputNode = inputs[0].m_IndexedValue;
1595
1596     if (!HasParsedConstTensor<int32_t>(inputs[1].m_IndexedValue->GetNode().name()))
1597     {
1598         throw ParseException(
1599             boost::str(
1600                 boost::format(
1601                     "ArmNN only supports Reshape layers with constant shapes. "
1602                     "Input %1% Node %2% %3%")
1603                     % inputs[1].m_IndexedValue->GetNode().name()
1604                     % nodeDef.name()
1605                     % CHECK_LOCATION().AsString()));
1606     }
1607     ParsedConstTfOperation<int32_t>* shapeNode =
1608         boost::polymorphic_downcast<ParsedConstTfOperation<int32_t>*>(inputs[1].m_IndexedValue);
1609
1610     armnn::IOutputSlot& prevLayerOutputSlot = inputNode->ResolveArmnnOutputSlot(inputs[0].m_Index);
1611     TensorInfo inputTensorInfo = prevLayerOutputSlot.GetTensorInfo();
1612
1613     std::vector<int32_t> shapeTensorData;
1614     ConstTensor shapeTensor = shapeNode->GetConstTensor(false, shapeTensorData);
1615     const TensorInfo outputTensorInfo = PrepareReshape(inputTensorInfo, shapeTensorData);
1616
1617     TensorShape targetShape = outputTensorInfo.GetShape();
1618     ReshapeDescriptor reshapeDesc;
1619     reshapeDesc.m_TargetShape = targetShape;
1620
1621     IConnectableLayer* layer = m_Network->AddReshapeLayer(reshapeDesc, nodeDef.name().c_str());
1622     prevLayerOutputSlot.Connect(layer->GetInputSlot(0));
1623     layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
1624
1625     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1626 }
1627
1628 ParsedTfOperationPtr TfParser::ParseResizeBilinear(const tensorflow::NodeDef& nodeDef,
1629     const tensorflow::GraphDef& graphDef)
1630 {
1631     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
1632
1633     if (!HasParsedConstTensor<int32_t>(inputs[1].m_IndexedValue->GetNode().name()))
1634     {
1635         throw ParseException(
1636             boost::str(
1637                 boost::format(
1638                     "ArmNN only supports ResizeBilinear layers with constant sizes. "
1639                     "Input %1%. Node %2% %3%")
1640                     % inputs[1].m_IndexedValue->GetNode().name()
1641                     % nodeDef.name()
1642                     % CHECK_LOCATION().AsString()));
1643     }
1644     ParsedConstTfOperation<int32_t>* sizeNode =
1645         boost::polymorphic_downcast<ParsedConstTfOperation<int32_t>*>(inputs[1].m_IndexedValue);
1646
1647     // Checks the align_corners attribute is not set.
1648     if (ReadOptionalNodeBoolAttribute(nodeDef, "align_corners", false))
1649     {
1650         throw ParseException(
1651             boost::str(
1652                 boost::format(
1653                     "ArmNN only supports ResizeBilinear layers with align_corners set to false. "
1654                     "Node %1% %2%")
1655                     % nodeDef.name()
1656                     % CHECK_LOCATION().AsString()));
1657     }
1658
1659     // Data for the parsed tensor args (size) must be stored locally.
1660     std::vector<int32_t> sizeTensorData;
1661     ConstTensor sizeTensor = sizeNode->GetConstTensor(false, sizeTensorData);
1662
1663     // The descriptor only has target height and width attributes, which we get from the size tensor.
1664     ResizeBilinearDescriptor desc;
1665     desc.m_TargetHeight = static_cast<uint32_t> (sizeTensorData[0]);
1666     desc.m_TargetWidth = static_cast<uint32_t> (sizeTensorData[1]);
1667
1668     IConnectableLayer* layer = m_Network->AddResizeBilinearLayer(desc, nodeDef.name().c_str());
1669
1670     IOutputSlot& inputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1671     TensorInfo inputTensorInfo = inputSlot.GetTensorInfo();
1672     // The input shape is always in BHWC format, this will be swizzled below; for now,
1673     // get the batch and channels to make up the ArmNN output shape with the target size.
1674     unsigned int outBatch = inputTensorInfo.GetShape()[0];
1675     unsigned int outChannels = inputTensorInfo.GetShape()[3];
1676     unsigned int outHeight = desc.m_TargetHeight;
1677     unsigned int outWidth = desc.m_TargetWidth;
1678     TensorShape outShape({outBatch, outChannels, outHeight, outWidth});
1679     // The output DataType is always Float32, regardless of the input DataType.
1680     const TensorInfo outputTensorInfo(outShape, armnn::DataType::Float32);
1681     layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
1682
1683     // TensorFlow ResizeBilinear input is always in BHWC format, so add swizzle and deswizzle layers.
1684     layer = SwizzleInDeswizzleOut(*m_Network, inputSlot, *layer, nodeDef.name());
1685
1686     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1687 }
1688
1689 TensorInfo OutputShapeOfSqueeze(const tensorflow::NodeDef& nodeDef, TensorInfo inputTensorInfo)
1690 {
1691     BOOST_ASSERT(nodeDef.op() == "Squeeze");
1692     tensorflow::DataType tfDataType = ReadMandatoryNodeTypeAttribute(nodeDef, "T");
1693
1694     DataType type;
1695     if (tfDataType == tensorflow::DT_FLOAT)
1696     {
1697         type = DataType::Float32;
1698     }
1699     else if (tfDataType == tensorflow::DT_INT32)
1700     {
1701         type = DataType::Signed32;
1702     }
1703     else
1704     {
1705         throw ParseException(
1706             boost::str(
1707                 boost::format("Unsupported DataType %1% for Squeeze operation %2% %3%")
1708                 % tensorflow::DataType_Name(tfDataType)
1709                 % nodeDef.name()
1710                 % CHECK_LOCATION().AsString()));
1711     }
1712
1713
1714     if (inputTensorInfo.GetNumDimensions() > 4)
1715     {
1716         throw ParseException(
1717             boost::str(
1718                 boost::format(
1719                     "Unsupported number of dimensions: %1% for input shape for Squeeze %2% %3%")
1720                     % inputTensorInfo.GetNumDimensions()
1721                     % nodeDef.name()
1722                     % CHECK_LOCATION().AsString()));
1723     }
1724
1725     std::vector<uint32_t> squeezeDims = ReadOptionalNodeUint32ListAttribute(nodeDef, "squeeze_dims");
1726     static const uint32_t dimensionSequence[] = { 0, 1, 2, 3 };
1727
1728     if (squeezeDims.empty())
1729     {
1730         squeezeDims.assign(dimensionSequence,
1731                            dimensionSequence+inputTensorInfo.GetNumDimensions());
1732     }
1733
1734     std::vector<uint32_t> outputDims;
1735     for(unsigned int i = 0; i < inputTensorInfo.GetNumDimensions(); i++)
1736     {
1737         bool skipSqueeze = (std::find(squeezeDims.begin(), squeezeDims.end(), i) == squeezeDims.end());
1738         auto currentDimension = inputTensorInfo.GetShape()[i];
1739         if (skipSqueeze || currentDimension != 1)
1740         {
1741             outputDims.push_back(currentDimension);
1742         }
1743     }
1744
1745     if (outputDims.size() > 4)
1746     {
1747         throw ParseException(
1748             boost::str(
1749                 boost::format(
1750                     "Unsupported number of dimensions: %1% for output shape for Squeeze %2% %3%")
1751                     % outputDims.size()
1752                     % nodeDef.name()
1753                     % CHECK_LOCATION().AsString()));
1754     }
1755
1756     TensorShape outShape = TensorShape(static_cast<unsigned int>(outputDims.size()),
1757                                        outputDims.data());
1758
1759     TensorInfo outTensorInfo = inputTensorInfo;
1760     outTensorInfo.SetShape(outShape);
1761     outTensorInfo.SetDataType(type);
1762
1763     return outTensorInfo;
1764 }
1765
1766 ParsedTfOperationPtr TfParser::ParseSqueeze(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
1767 {
1768     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
1769
1770     IOutputSlot& prevLayerOutputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1771     TensorInfo inputTensorInfo = prevLayerOutputSlot.GetTensorInfo();
1772
1773     TensorInfo outputInfo;
1774     outputInfo = OutputShapeOfSqueeze(nodeDef, inputTensorInfo);
1775
1776     ReshapeDescriptor reshapeDesc;
1777     reshapeDesc.m_TargetShape = outputInfo.GetShape();
1778     IConnectableLayer* layer = m_Network->AddReshapeLayer(reshapeDesc, nodeDef.name().c_str());
1779     prevLayerOutputSlot.Connect(layer->GetInputSlot(0));
1780     layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
1781
1782     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1783 }
1784
1785 ParsedTfOperationPtr TfParser::ParseLrn(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
1786 {
1787     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
1788
1789     NormalizationDescriptor normalizationDescriptor;
1790     normalizationDescriptor.m_NormMethodType = NormalizationAlgorithmMethod::LocalBrightness;
1791     normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Across;
1792     normalizationDescriptor.m_Alpha = ReadMandatoryNodeFloatAttribute(nodeDef, "alpha");
1793     normalizationDescriptor.m_Beta = ReadMandatoryNodeFloatAttribute(nodeDef, "beta");
1794     normalizationDescriptor.m_K = ReadMandatoryNodeFloatAttribute(nodeDef, "bias");
1795     normalizationDescriptor.m_NormSize = ReadMandatoryNodeUint32Attribute(nodeDef, "depth_radius");
1796
1797     // The window size must be an odd value. For a window size of (2 * n + 1), TensorFlow defines depth_radius = n.
1798     normalizationDescriptor.m_NormSize = normalizationDescriptor.m_NormSize * 2 + 1;
1799
1800     IOutputSlot& prevLayerOutputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1801
1802     IConnectableLayer* layer = m_Network->AddNormalizationLayer(normalizationDescriptor,
1803         nodeDef.name().c_str());
1804
1805     const TensorInfo permutedInfo = armnnUtils::Permuted(prevLayerOutputSlot.GetTensorInfo(), NHWCToArmNN);
1806     layer->GetOutputSlot(0).SetTensorInfo(permutedInfo);
1807
1808     layer = SwizzleInDeswizzleOut(*m_Network, prevLayerOutputSlot, *layer, nodeDef.name());
1809
1810     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1811 }
1812
1813 /// An ParsedTfOperation for a MatMul node.
1814 /// Creation of the armnn FullyConnected layer is deferred until it is actually needed, because
1815 /// MatMul nodes are often used for the first part of a biased FullyConnected (MatMul followed
1816 /// by Add) and in these cases armnn doesn't need a separate layer for the MatMul.
1817 ///
1818 class ParsedMatMulTfOperation : public DeferredSingleLayerParsedTfOperation
1819 {
1820 public:
1821     ParsedMatMulTfOperation(TfParser* parser, const tensorflow::NodeDef& node)
1822         : DeferredSingleLayerParsedTfOperation(parser, node)
1823     {
1824     }
1825
1826     void CreateLayerDeferred() override
1827     {
1828         BOOST_ASSERT(m_Layer == nullptr);
1829         m_Layer = m_Parser->AddFullyConnectedLayer(m_Node, nullptr, m_Node.name().c_str());
1830     }
1831 };
1832
1833 ParsedTfOperationPtr TfParser::ParseMatMul(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
1834 {
1835     // Defers the creation of the layer (see ParsedMatMulTfOperation).
1836     return std::make_unique<ParsedMatMulTfOperation>(this, nodeDef);
1837 }
1838
1839 /// An ParsedTfOperation for a Mul node.
1840 /// Creation of the armnn Mul layer is deferred until it is actually needed, because Mul nodes
1841 /// are also used for the first part of a leaky relu activation function (Mul followed by Maximum)
1842 /// and in these cases armnn doesn't need a separate layer for the Mul.
1843 ///
1844 class ParsedMulTfOperation : public DeferredSingleLayerParsedTfOperation
1845 {
1846 public:
1847     ParsedMulTfOperation(TfParser* parser, const tensorflow::NodeDef& node)
1848         : DeferredSingleLayerParsedTfOperation(parser, node)
1849     {
1850     }
1851
1852     void CreateLayerDeferred() override
1853     {
1854         BOOST_ASSERT(m_Layer == nullptr);
1855         m_Layer = m_Parser->AddMultiplicationLayer(m_Node);
1856     }
1857 };
1858
1859 ParsedTfOperationPtr TfParser::ParseMul(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
1860 {
1861     boost::ignore_unused(graphDef);
1862
1863     return std::make_unique<ParsedMulTfOperation>(this, nodeDef);
1864 }
1865
1866 ParsedTfOperationPtr TfParser::ParsePlaceholder(const tensorflow::NodeDef& nodeDef,
1867     const tensorflow::GraphDef& graphDef)
1868 {
1869     boost::ignore_unused(graphDef);
1870
1871     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 0);
1872
1873     const LayerBindingId layerId = boost::numeric_cast<LayerBindingId>(m_NetworkInputsBindingInfo.size());
1874
1875     auto it = m_InputShapes.find(nodeDef.name());
1876     if (it == m_InputShapes.end())
1877     {
1878         throw ParseException(
1879             boost::str(
1880                 boost::format(
1881                     "Missing input shape for Placeholder '%1%' %2%")
1882                     % nodeDef.name()
1883                     % CHECK_LOCATION().AsString()));
1884     }
1885     TensorInfo tensorInfo(it->second, DataType::Float32);
1886
1887     IConnectableLayer* const layer = m_Network->AddInputLayer(layerId, nodeDef.name().c_str());
1888
1889     layer->GetOutputSlot(0).SetTensorInfo(tensorInfo);
1890
1891     TrackInputBinding(layer, layerId, tensorInfo);
1892
1893     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1894 }
1895
1896 ParsedTfOperationPtr TfParser::ParseRelu(const tensorflow::NodeDef& nodeDef,
1897     const tensorflow::GraphDef& graphDef)
1898 {
1899     boost::ignore_unused(graphDef);
1900
1901     ActivationDescriptor activationDesc;
1902     activationDesc.m_Function = ActivationFunction::ReLu;
1903     return AddActivationLayer(nodeDef, activationDesc);
1904 }
1905
1906 ParsedTfOperationPtr TfParser::ParseRelu6(const tensorflow::NodeDef& nodeDef,
1907     const tensorflow::GraphDef& graphDef)
1908 {
1909     boost::ignore_unused(graphDef);
1910
1911     ActivationDescriptor activationDesc;
1912     activationDesc.m_Function = ActivationFunction::BoundedReLu;
1913     activationDesc.m_A = 6.0f;
1914     activationDesc.m_B = 0.0f;
1915
1916     return AddActivationLayer(nodeDef, activationDesc);
1917 }
1918
1919 ParsedTfOperationPtr TfParser::ParseSigmoid(const tensorflow::NodeDef& nodeDef,
1920     const tensorflow::GraphDef& graphDef)
1921 {
1922     boost::ignore_unused(graphDef);
1923
1924     ActivationDescriptor activationDesc;
1925     activationDesc.m_Function = ActivationFunction::Sigmoid;
1926
1927     return AddActivationLayer(nodeDef, activationDesc);
1928 }
1929
1930 ParsedTfOperationPtr TfParser::ParseSoftmax(const tensorflow::NodeDef& nodeDef,
1931     const tensorflow::GraphDef& graphDef)
1932 {
1933     boost::ignore_unused(graphDef);
1934
1935     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
1936
1937     SoftmaxDescriptor softmaxDescriptor;
1938     IConnectableLayer* const layer = m_Network->AddSoftmaxLayer(softmaxDescriptor, nodeDef.name().c_str());
1939
1940     IOutputSlot& prevLayerSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1941     prevLayerSlot.Connect(layer->GetInputSlot(0));
1942     layer->GetOutputSlot(0).SetTensorInfo(prevLayerSlot.GetTensorInfo());
1943
1944     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1945 }
1946
1947 ParsedTfOperationPtr TfParser::ParseSoftplus(const tensorflow::NodeDef& nodeDef,
1948     const tensorflow::GraphDef& graphDef)
1949 {
1950     boost::ignore_unused(graphDef);
1951
1952     ActivationDescriptor activationDesc;
1953     activationDesc.m_Function = ActivationFunction::SoftReLu;
1954
1955     return AddActivationLayer(nodeDef, activationDesc);
1956 }
1957
1958 ParsedTfOperationPtr TfParser::ParseTanh(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
1959 {
1960     boost::ignore_unused(graphDef);
1961
1962     ActivationDescriptor activationDesc;
1963     activationDesc.m_Function = ActivationFunction::TanH;
1964     activationDesc.m_A = 1.0f;
1965     activationDesc.m_B = 1.0f;
1966
1967     return AddActivationLayer(nodeDef, activationDesc);
1968 }
1969
1970 ParsedTfOperationPtr TfParser::AddActivationLayer(const tensorflow::NodeDef& nodeDef,
1971     ActivationDescriptor& activationDesc)
1972 {
1973     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
1974
1975     IConnectableLayer* const layer = m_Network->AddActivationLayer(activationDesc, nodeDef.name().c_str());
1976
1977     IOutputSlot& prevLayerOutputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
1978     prevLayerOutputSlot.Connect(layer->GetInputSlot(0));
1979     layer->GetOutputSlot(0).SetTensorInfo(prevLayerOutputSlot.GetTensorInfo());
1980     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
1981 }
1982
1983 ParsedTfOperationPtr TfParser::ParseMaxPool(const tensorflow::NodeDef& nodeDef,
1984     const tensorflow::GraphDef& graphDef)
1985 {
1986     return ParsePooling2d(nodeDef, graphDef, PoolingAlgorithm::Max);
1987 }
1988
1989 ParsedTfOperationPtr TfParser::ParseAvgPool(const tensorflow::NodeDef& nodeDef,
1990     const tensorflow::GraphDef& graphDef)
1991 {
1992     return ParsePooling2d(nodeDef, graphDef, PoolingAlgorithm::Average);
1993 }
1994
1995 ParsedTfOperationPtr TfParser::ParsePooling2d(const tensorflow::NodeDef& nodeDef,
1996     const tensorflow::GraphDef& graphDef, PoolingAlgorithm pooltype)
1997 {
1998     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
1999     IOutputSlot& inputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
2000     TensorInfo inputTensorInfo = inputSlot.GetTensorInfo();
2001
2002     if (inputs.size() != 1)
2003     {
2004         throw ParseException(
2005             boost::str(
2006                 boost::format(
2007                     "2D Pooling expects one input!. Got %1% for Node %2% %3%")
2008                     % inputs.size()
2009                     % nodeDef.name()
2010                     % CHECK_LOCATION().AsString()));
2011     }
2012
2013     std::string paddingString = ReadMandatoryNodeStringAttribute(nodeDef, "padding");
2014     std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
2015     std::vector<uint32_t> strides = ReadMandatoryNodeUint32ListAttribute(nodeDef, "strides");
2016     std::vector<uint32_t> ksize = ReadMandatoryNodeUint32ListAttribute(nodeDef, "ksize"); // size of pool windows
2017
2018     Pooling2dDescriptor pooling2dDescriptor;
2019     pooling2dDescriptor.m_PoolType = pooltype;
2020     pooling2dDescriptor.m_PaddingMethod = PaddingMethod::Exclude;
2021     pooling2dDescriptor.m_OutputShapeRounding = OutputShapeRounding::Floor;
2022
2023     CHECK_DATA_FORMAT(nodeDef, dataFormat, "Pooling2D");
2024
2025     if (dataFormat == "NHWC")
2026     {
2027         pooling2dDescriptor.m_StrideX    = strides[2];
2028         pooling2dDescriptor.m_StrideY    = strides[1];
2029         pooling2dDescriptor.m_PoolWidth  = ksize[2];
2030         pooling2dDescriptor.m_PoolHeight = ksize[1];
2031         // Swizzles input to supported memory layout.
2032         inputTensorInfo = armnnUtils::Permuted(inputSlot.GetTensorInfo(), NHWCToArmNN);
2033     }
2034     else if (dataFormat == "NCHW")
2035     {
2036         pooling2dDescriptor.m_StrideX    = strides[3];
2037         pooling2dDescriptor.m_StrideY    = strides[2];
2038         pooling2dDescriptor.m_PoolWidth  = ksize[3];
2039         pooling2dDescriptor.m_PoolHeight = ksize[2];
2040     }
2041
2042     uint32_t inputHeight = inputTensorInfo.GetShape()[2];
2043     uint32_t inputWidth = inputTensorInfo.GetShape()[3];
2044
2045     bool padding = false;
2046     TensorInfo outputInfo;
2047
2048     CHECK_PADDING_TYPE(nodeDef, paddingString);
2049
2050     if (paddingString == "SAME")
2051     {
2052         padding = true;
2053         outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
2054                                   inputTensorInfo.GetShape()[1],
2055                                   static_cast<uint32_t>(ceil(
2056                                       static_cast<float>(inputHeight) /
2057                                       static_cast<float>(pooling2dDescriptor.m_StrideY))),
2058                                   static_cast<uint32_t>(ceil(
2059                                       static_cast<float>(inputWidth) /
2060                                       static_cast<float>(pooling2dDescriptor.m_StrideX)))
2061                                 }, DataType::Float32);
2062     }
2063     else if (paddingString == "VALID")
2064     {
2065         padding = false;
2066         outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
2067                                   inputTensorInfo.GetShape()[1],
2068                                   static_cast<uint32_t>(ceil(
2069                                       static_cast<float>(inputHeight - pooling2dDescriptor.m_PoolHeight + 1) /
2070                                       static_cast<float>(pooling2dDescriptor.m_StrideY))),
2071                                   static_cast<uint32_t>(ceil(
2072                                       static_cast<float>(inputWidth - pooling2dDescriptor.m_PoolWidth + 1) /
2073                                       static_cast<float>(pooling2dDescriptor.m_StrideX)))
2074                                 }, DataType::Float32);
2075     }
2076
2077     CalcPadding(inputWidth, pooling2dDescriptor.m_PoolWidth, pooling2dDescriptor.m_StrideX,
2078                     pooling2dDescriptor.m_PadLeft, pooling2dDescriptor.m_PadRight, padding);
2079     CalcPadding(inputHeight, pooling2dDescriptor.m_PoolHeight, pooling2dDescriptor.m_StrideY,
2080                     pooling2dDescriptor.m_PadTop, pooling2dDescriptor.m_PadBottom, padding);
2081
2082
2083     IConnectableLayer* layer = m_Network->AddPooling2dLayer(pooling2dDescriptor, nodeDef.name().c_str());
2084     if (layer == nullptr)
2085     {
2086         throw ParseException(
2087             boost::str(
2088                 boost::format(
2089                     "Failed to add pooling2d layer for %1% %2%")
2090                     % nodeDef.name()
2091                     % CHECK_LOCATION().AsString()));
2092     }
2093
2094     layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
2095
2096     if (dataFormat == "NHWC")
2097     {
2098         layer = SwizzleInDeswizzleOut(*m_Network, inputSlot, *layer, nodeDef.name());
2099     }
2100     else
2101     {
2102         inputSlot.Connect(layer->GetInputSlot(0));
2103     }
2104
2105     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
2106 }
2107
2108 ParsedTfOperationPtr TfParser::AddAdditionLayer(const tensorflow::NodeDef& nodeDef, bool isBiasAdd)
2109 {
2110     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
2111
2112     IOutputSlot* input0Slot = &inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
2113     IOutputSlot* input1Slot = &inputs[1].m_IndexedValue->ResolveArmnnOutputSlot(inputs[1].m_Index);
2114
2115     const TensorInfo& input0Info = input0Slot->GetTensorInfo();
2116     const TensorInfo& input1Info = input1Slot->GetTensorInfo();
2117
2118     if (isBiasAdd)
2119     {
2120         // BiasAdd takes bias as a 1D tensor. We need to add a reshape layer to create a 4D tensor
2121         // with the same data in the correct dimension for broadcast in addition.
2122         if(input1Info.GetNumDimensions() != 1)
2123         {
2124             throw ParseException(
2125                 boost::str(
2126                     boost::format(
2127                         "Unsupported bias for BiasAdd. It should be a 1D vector. "
2128                         "Got %1% dimensions for input %2%. Node %3% %4%")
2129                         % input1Info.GetNumDimensions()
2130                         % inputs[1].m_IndexedValue->GetNode().name()
2131                         % nodeDef.name()
2132                         % CHECK_LOCATION().AsString()));
2133         }
2134
2135         const std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
2136
2137         CHECK_DATA_FORMAT(nodeDef, dataFormat, "BiasAdd");
2138         input1Slot = BroadcastForAddandMul(input0Slot, input1Slot, dataFormat == "NHWC", *m_Network, nodeDef);
2139     }
2140     else
2141     {
2142         if (input0Info.GetNumDimensions() == 1)
2143         {
2144             const bool isNHWC = true;
2145             input0Slot = BroadcastForAddandMul(input1Slot, input0Slot, isNHWC, *m_Network, nodeDef);
2146         }
2147
2148         if (input1Info.GetNumDimensions() == 1)
2149         {
2150             const bool isNHWC = true;
2151             input1Slot = BroadcastForAddandMul(input0Slot, input1Slot, isNHWC, *m_Network, nodeDef);
2152         }
2153     }
2154
2155     IConnectableLayer* const layer = m_Network->AddAdditionLayer(nodeDef.name().c_str());
2156
2157     input0Slot->Connect(layer->GetInputSlot(0));
2158     input1Slot->Connect(layer->GetInputSlot(1));
2159
2160     if (input0Info.GetNumDimensions() == 1 && isBiasAdd == false)
2161     {
2162         layer->GetOutputSlot(0).SetTensorInfo(input1Slot->GetTensorInfo());
2163     }
2164     else
2165     {
2166         layer->GetOutputSlot(0).SetTensorInfo(input0Slot->GetTensorInfo());
2167     }
2168
2169     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
2170 }
2171
2172 IConnectableLayer* TfParser::AddMultiplicationLayer(const tensorflow::NodeDef& nodeDef)
2173 {
2174     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
2175
2176     IConnectableLayer* const layer = m_Network->AddMultiplicationLayer(nodeDef.name().c_str());
2177     IOutputSlot* input0Slot = &inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
2178     IOutputSlot* input1Slot = &inputs[1].m_IndexedValue->ResolveArmnnOutputSlot(inputs[1].m_Index);
2179
2180     auto const input0NumDims = input0Slot->GetTensorInfo().GetNumDimensions();
2181     auto const input1NumDims = input1Slot->GetTensorInfo().GetNumDimensions();
2182
2183     if (input0NumDims < input1NumDims)
2184     {
2185         const bool isNHWC = true;
2186         input0Slot = BroadcastForAddandMul(input1Slot, input0Slot, isNHWC, *m_Network, nodeDef);
2187     }
2188     if (input1NumDims < input0NumDims)
2189     {
2190         const bool isNHWC = true;
2191         input1Slot = BroadcastForAddandMul(input0Slot, input1Slot, isNHWC, *m_Network, nodeDef);
2192     }
2193
2194     input0Slot->Connect(layer->GetInputSlot(0));
2195     input1Slot->Connect(layer->GetInputSlot(1));
2196
2197     if (input0NumDims < input1NumDims)
2198     {
2199         layer->GetOutputSlot(0).SetTensorInfo(input1Slot->GetTensorInfo());
2200     }
2201     else
2202     {
2203         layer->GetOutputSlot(0).SetTensorInfo(input0Slot->GetTensorInfo());
2204     }
2205     return layer;
2206 }
2207
2208
2209 IConnectableLayer* TfParser::AddFullyConnectedLayer(const tensorflow::NodeDef& matMulNodeDef,
2210     const tensorflow::NodeDef* addNodeDef, const char* armnnLayerName)
2211 {
2212     // Finds bias const (if applicable).
2213     ParsedConstTfOperation<float>* biasNode = nullptr;
2214     if (addNodeDef != nullptr)
2215     {
2216         std::vector<OutputOfParsedTfOperation> addInputs = GetInputParsedTfOperationsChecked(*addNodeDef, 2);
2217         // Finds our inputs.
2218         if (HasParsedConstTensor<float>(addInputs[0].m_IndexedValue->GetNode().name()))
2219         {
2220             biasNode = boost::polymorphic_downcast<ParsedConstTfOperation<float>*>(addInputs[0].m_IndexedValue);
2221         }
2222         else if (HasParsedConstTensor<float>(addInputs[1].m_IndexedValue->GetNode().name()))
2223         {
2224             biasNode = boost::polymorphic_downcast<ParsedConstTfOperation<float>*>(addInputs[1].m_IndexedValue);
2225         }
2226         else
2227         {
2228             throw ParseException(
2229                 boost::str(
2230                     boost::format(
2231                         "ArmNN only supports fully connected layers with constant bias. "
2232                         "Inputs %1% and %2%. AddNode %3%. MatMulNode %4% %5%")
2233                         % addInputs[0].m_IndexedValue->GetNode().name()
2234                         % addInputs[1].m_IndexedValue->GetNode().name()
2235                         % addNodeDef->name()
2236                         % matMulNodeDef.name()
2237                         % CHECK_LOCATION().AsString()));
2238         }
2239     }
2240
2241     // Finds matmul inputs.
2242     ParsedConstTfOperation<float>* weightNode = nullptr;
2243     ParsedTfOperation* inputNode  = nullptr;
2244     unsigned int inputIdx = 0;
2245     std::vector<OutputOfParsedTfOperation> mulInputs = GetInputParsedTfOperationsChecked(matMulNodeDef, 2);
2246     if (HasParsedConstTensor<float>(mulInputs[0].m_IndexedValue->GetNode().name()))
2247     {
2248         weightNode = boost::polymorphic_downcast<ParsedConstTfOperation<float>*>(mulInputs[0].m_IndexedValue);
2249         inputNode = mulInputs[1].m_IndexedValue;
2250         inputIdx = mulInputs[1].m_Index;
2251     }
2252     else if (HasParsedConstTensor<float>(mulInputs[1].m_IndexedValue->GetNode().name()))
2253     {
2254         weightNode = boost::polymorphic_downcast<ParsedConstTfOperation<float>*>(mulInputs[1].m_IndexedValue);
2255         inputNode = mulInputs[0].m_IndexedValue;
2256         inputIdx = mulInputs[0].m_Index;
2257     }
2258     else
2259     {
2260         throw ParseException(
2261             boost::str(
2262                 boost::format(
2263                     "ArmNN only supports fully connected layers with constant weights. "
2264                     "Inputs %1% and %2%. MatMulNode %3% %4%")
2265                     % mulInputs[0].m_IndexedValue->GetNode().name()
2266                     % mulInputs[1].m_IndexedValue->GetNode().name()
2267                     % matMulNodeDef.name()
2268                     % CHECK_LOCATION().AsString()));
2269     }
2270
2271     std::vector<float> weightTensorData;
2272     // Handles weight.
2273     ConstTensor weights = weightNode->GetConstTensor(false, weightTensorData);
2274
2275     FullyConnectedDescriptor desc;
2276     desc.m_BiasEnabled = addNodeDef != nullptr;
2277
2278     IConnectableLayer* layer = nullptr;
2279     // Makes the layer.
2280     if (addNodeDef != nullptr)
2281     {
2282         std::vector<float> biasTensorData;
2283         ConstTensor biases = biasNode->GetConstTensor(false, biasTensorData);
2284
2285         if (weights.GetShape()[1] != biases.GetShape()[0])
2286         {
2287             throw ParseException(
2288                 boost::str(
2289                     boost::format(
2290                         "Shape of matmul weights and bias do not match. "
2291                         "AddNode %1%. MatMulNode %2% %3%")
2292                         % addNodeDef->name()
2293                         % matMulNodeDef.name()
2294                         % CHECK_LOCATION().AsString()));
2295         }
2296
2297         layer = m_Network->AddFullyConnectedLayer(desc, weights, biases, armnnLayerName);
2298     }
2299     else
2300     {
2301         layer = m_Network->AddFullyConnectedLayer(desc, weights, armnnLayerName);
2302     }
2303
2304     BOOST_ASSERT(layer != nullptr);
2305
2306     inputNode->ResolveArmnnOutputSlot(inputIdx).Connect(layer->GetInputSlot(0));
2307     unsigned int batches = inputNode->ResolveArmnnOutputSlot(inputIdx).GetTensorInfo().GetShape()[0];
2308
2309     // Handles output.
2310     TensorInfo outputInfo({ batches, weights.GetShape()[1] }, DataType::Float32);
2311     layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
2312     return layer;
2313 }
2314
2315 void TfParser::LoadNodeDef(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
2316 {
2317     // Gets the type of the node (assume float).
2318     tensorflow::DataType type = tensorflow::DT_FLOAT;
2319     if (nodeDef.attr().count("T") != 0)
2320     {
2321         auto attr = nodeDef.attr().at("T");
2322         type      = attr.type();
2323     }
2324     else if (nodeDef.attr().count("dtype") != 0)
2325     {
2326         auto attr = nodeDef.attr().at("dtype");
2327         type      = attr.type();
2328     }
2329
2330     if (type != tensorflow::DT_FLOAT && nodeDef.op() != "Const")
2331     {
2332         throw ParseException(
2333             boost::str(
2334                 boost::format(
2335                     "Currently only FLOAT is supported for tensorflow nodes (apart from Const). "
2336                     "Got %1% for Node %2% %3%")
2337                     % tensorflow::DataType_Name(type)
2338                     % nodeDef.name()
2339                     % CHECK_LOCATION().AsString()));
2340     }
2341
2342     const std::string& operation = nodeDef.op();
2343     auto it = ms_OperationNameToParsingFunctions.find(operation);
2344     if (it != ms_OperationNameToParsingFunctions.end())
2345     {
2346         auto func = it->second;
2347         ParsedTfOperationPtr parsedTfOperation = (this->*func)(nodeDef, graphDef);
2348         ParsedTfOperation* parsedTfOperationRaw = parsedTfOperation.get();
2349
2350         // Stores the parsed operation so that dependent layers can connect to it.
2351         auto it = m_ParsedTfOperations.find(nodeDef.name());
2352         if (it != m_ParsedTfOperations.end())
2353         {
2354             throw ParseException(boost::str(boost::format("Name %1% used by more than one node") % nodeDef.name()));
2355         }
2356         m_ParsedTfOperations[nodeDef.name()] = std::move(parsedTfOperation);
2357
2358         // If this node was requested as an output from the network, then adds an ArmNN output layer.
2359         if (std::find(m_RequestedOutputs.begin(), m_RequestedOutputs.end(), nodeDef.name()) !=
2360             m_RequestedOutputs.end())
2361         {
2362             auto outId = ParseOutputId(nodeDef.name());
2363             const LayerBindingId layerId = boost::numeric_cast<LayerBindingId>(m_NetworkOutputsBindingInfo.size());
2364             IOutputSlot& prevSlot = parsedTfOperationRaw->ResolveArmnnOutputSlot(outId.m_Index);
2365
2366             TensorInfo tensorInfo = prevSlot.GetTensorInfo();
2367
2368             IConnectableLayer* outputLayer = m_Network->AddOutputLayer(layerId, nodeDef.name().c_str());
2369
2370             prevSlot.Connect(outputLayer->GetInputSlot(0));
2371
2372             TrackOutputBinding(outputLayer, layerId, tensorInfo);
2373         }
2374     }
2375     else
2376     {
2377         throw ParseException(
2378             boost::str(
2379                 boost::format(
2380                     "Unsupported operation %1% in tensorflow::GraphDef %2%")
2381                     % operation
2382                     % CHECK_LOCATION().AsString()));
2383     }
2384 }
2385
2386 void TfParser::LoadGraphDef(const tensorflow::GraphDef& graphDef)
2387 {
2388     // Adds all nodes to our map.
2389     m_NodesByName.clear();
2390     m_NetworkInputsBindingInfo.clear();
2391     m_NetworkOutputsBindingInfo.clear();
2392
2393     for (int i = 0; i < graphDef.node_size(); ++i)
2394     {
2395         const tensorflow::NodeDef& node = graphDef.node(i);
2396         m_NodesByName[node.name()]      = &node;
2397     }
2398
2399     // Finds the output nodes the user requested.
2400     std::vector<const tensorflow::NodeDef*> targetNodes;
2401     for (const std::string& requestedOutputName : m_RequestedOutputs)
2402     {
2403         auto nodeIt = m_NodesByName.find(requestedOutputName);
2404         if (nodeIt == m_NodesByName.end())
2405         {
2406             throw ParseException(
2407                 boost::str(
2408                     boost::format(
2409                         "Couldn't find requested output node '%1%' in graph %2%")
2410                         % requestedOutputName
2411                         % CHECK_LOCATION().AsString()));
2412         }
2413         targetNodes.push_back(nodeIt->second);
2414     }
2415
2416     // Sorts them into a linear ordering such that all inputs of a node are before the node itself.
2417     std::vector<const tensorflow::NodeDef*> sortedNodes;
2418     if (!armnnUtils::GraphTopologicalSort<const tensorflow::NodeDef*>(
2419         targetNodes,
2420         [this](const tensorflow::NodeDef* node)
2421         {
2422             auto outputs = GetTfInputNodes(*node);
2423             std::vector<const tensorflow::NodeDef*> nodesOnly;
2424             for (const auto & o : outputs) {
2425                 nodesOnly.push_back(o.m_IndexedValue);
2426             }
2427             return nodesOnly;
2428         },
2429         sortedNodes))
2430     {
2431         throw ParseException(
2432             boost::str(
2433                 boost::format(
2434                     "Cycle detected in graph %1%")
2435                     % CHECK_LOCATION().AsString()));
2436     }
2437
2438     // Parses each node in order, knowing that all inputs of a node will be processed before the node itself.
2439     for (const auto& it : sortedNodes)
2440     {
2441         const tensorflow::NodeDef& currentNode = *it;
2442         LoadNodeDef(currentNode, graphDef);
2443     }
2444 }
2445
2446 INetworkPtr TfParser::CreateNetworkFromTextFile(const char* graphFile,
2447     const std::map<std::string, TensorShape>& inputShapes,
2448     const std::vector<std::string>& requestedOutputs)
2449 {
2450     FILE* fd = fopen(graphFile, "r");
2451
2452     if (fd == nullptr)
2453     {
2454         throw FileNotFoundException(
2455             boost::str(
2456                 boost::format(
2457                     "Graph file %1% failed to open %2%")
2458                     % graphFile
2459                     % CHECK_LOCATION().AsString()));
2460     }
2461
2462     // Parses the file into a message.
2463     tensorflow::GraphDef graphDef;
2464     auto                 input   = new google::protobuf::io::FileInputStream(fileno(fd));
2465     bool                 success = google::protobuf::TextFormat::Parse(input, &graphDef);
2466     delete input;
2467     fclose(fd);
2468
2469     if (!success)
2470     {
2471         throw ParseException(
2472             boost::str(
2473                 boost::format(
2474                     "Failed to parse graph file %1%")
2475                     % CHECK_LOCATION().AsString()));
2476     }
2477
2478     return CreateNetworkFromGraphDef(graphDef, inputShapes, requestedOutputs);
2479 }
2480
2481 INetworkPtr TfParser::CreateNetworkFromString(const char* protoText,
2482     const std::map<std::string, TensorShape>& inputShapes,
2483     const std::vector<std::string>& requestedOutputs)
2484 {
2485     // Parses the string into a message.
2486     tensorflow::GraphDef graphDef;
2487     bool success = google::protobuf::TextFormat::ParseFromString(protoText, &graphDef);
2488
2489     if (!success)
2490     {
2491         throw ParseException(
2492             boost::str(
2493                 boost::format(
2494                     "Failed to parse graph file %1%")
2495                     % CHECK_LOCATION().AsString()));
2496     }
2497
2498     return CreateNetworkFromGraphDef(graphDef, inputShapes, requestedOutputs);
2499 }
2500
2501 INetworkPtr TfParser::CreateNetworkFromBinaryFile(const char* graphFile,
2502     const std::map<std::string, TensorShape>& inputShapes,
2503     const std::vector<std::string>& requestedOutputs)
2504 {
2505     FILE* fd = fopen(graphFile, "rb");
2506
2507     if (fd == nullptr)
2508     {
2509         throw FileNotFoundException(
2510             boost::str(
2511                 boost::format(
2512                     "Graph file %1% failed to open %2%")
2513                     % graphFile
2514                     % CHECK_LOCATION().AsString()));
2515     }
2516
2517     // Parses the file into a message.
2518     tensorflow::GraphDef graphDef;
2519
2520     google::protobuf::io::FileInputStream  inStream(fileno(fd));
2521     google::protobuf::io::CodedInputStream codedStream(&inStream);
2522     codedStream.SetTotalBytesLimit(INT_MAX, INT_MAX);
2523     bool success = graphDef.ParseFromCodedStream(&codedStream);
2524     fclose(fd);
2525
2526     if (!success)
2527     {
2528         throw ParseException(
2529             boost::str(
2530                 boost::format(
2531                     "Failed to parse protobuf file %1% %2%")
2532                     % graphFile
2533                     % CHECK_LOCATION().AsString()));
2534     }
2535
2536     return CreateNetworkFromGraphDef(graphDef, inputShapes, requestedOutputs);
2537 }
2538
2539 INetworkPtr TfParser::CreateNetworkFromGraphDef(const tensorflow::GraphDef& graphDef,
2540     const std::map<std::string, TensorShape>& inputShapes,
2541     const std::vector<std::string>& requestedOutputs)
2542 {
2543     m_Network = INetwork::Create();
2544
2545     m_InputShapes = inputShapes;
2546     if (requestedOutputs.size() == 0)
2547     {
2548         throw ParseException(
2549             boost::str(
2550                 boost::format(
2551                     "requestedOutputs must have at least one entry %1%")
2552                     % CHECK_LOCATION().AsString()));
2553     }
2554     m_RequestedOutputs = requestedOutputs;
2555
2556     try
2557     {
2558         LoadGraphDef(graphDef);
2559     }
2560     catch (const ParseException& e)
2561     {
2562         Cleanup();
2563         throw e;
2564     }
2565
2566     Cleanup();
2567
2568     return std::move(m_Network);
2569 }
2570
2571 void TfParser::Cleanup()
2572 {
2573     // Cleanup, in case we reuse this parser.
2574     m_InputShapes.clear();
2575     m_RequestedOutputs.clear();
2576     m_NodesByName.clear();
2577     m_ParsedTfOperations.clear();
2578 }
2579
2580 BindingPointInfo TfParser::GetNetworkInputBindingInfo(const std::string& name) const
2581 {
2582     return GetBindingInfo(name, "input", m_NetworkInputsBindingInfo);
2583 }
2584
2585 BindingPointInfo TfParser::GetNetworkOutputBindingInfo(const std::string& name) const
2586 {
2587     return GetBindingInfo(name, "output", m_NetworkOutputsBindingInfo);
2588 }
2589
2590 std::pair<LayerBindingId, TensorInfo> TfParser::GetBindingInfo(const std::string& layerName,
2591     const char* bindingPointDesc,
2592     const std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo)
2593 {
2594     auto it = nameToBindingInfo.find(layerName);
2595     if (it == nameToBindingInfo.end())
2596     {
2597         throw InvalidArgumentException(
2598             boost::str(
2599                 boost::format(
2600                     "Unknown %1% '%2%' %3%")
2601                     % bindingPointDesc
2602                     % layerName
2603                     % CHECK_LOCATION().AsString()));
2604     }
2605     return it->second;
2606 }
2607
2608 void TfParser::TrackInputBinding(IConnectableLayer* layer, LayerBindingId id, const TensorInfo& tensorInfo)
2609 {
2610     return TrackBindingPoint(layer, id, tensorInfo, "input", m_NetworkInputsBindingInfo);
2611 }
2612
2613 void TfParser::TrackOutputBinding(IConnectableLayer* layer, LayerBindingId id, const TensorInfo& tensorInfo)
2614 {
2615     return TrackBindingPoint(layer, id, tensorInfo, "output", m_NetworkOutputsBindingInfo);
2616 }
2617
2618 void TfParser::TrackBindingPoint(IConnectableLayer* layer,
2619     LayerBindingId id,
2620     const TensorInfo& tensorInfo,
2621     const char* bindingPointDesc,
2622     std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo)
2623 {
2624     const std::string layerName = layer->GetName();
2625     auto it = nameToBindingInfo.find(layerName);
2626     if (it == nameToBindingInfo.end())
2627     {
2628         nameToBindingInfo[layerName] = std::make_pair(id, tensorInfo);
2629     }
2630     else
2631     {
2632         throw ParseException(
2633             boost::str(
2634                 boost::format(
2635                     "Id %1% used by more than one %2% layer %3%")
2636                     % id
2637                     % bindingPointDesc
2638                     % CHECK_LOCATION().AsString()));
2639     }
2640 }
2641
2642 } // namespace armnnTfParser