Release 18.03
[platform/upstream/armnn.git] / src / armnn / Layers.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // See LICENSE file in the project root for full license information.
4 //
5 #include "Layers.hpp"
6 #include "Graph.hpp"
7
8 #include "backends/CpuTensorHandle.hpp"
9 #include "backends/Workload.hpp"
10 #include "backends/WorkloadFactory.hpp"
11
12 #include "Permute.hpp"
13
14 #include <queue>
15
16
17 namespace armnn
18 {
19
20 template <typename LayerType, typename ... Params>
21 LayerType* Layer::CloneBase(Graph& graph, Params&& ... params) const
22 {
23     LayerType* const layer = graph.AddLayer<LayerType>(std::forward<Params>(params)...);
24
25     layer->SetComputeDevice(m_ComputeDevice);
26     layer->SetGuid(GetGuid());
27
28     return layer;
29 }
30
31 ActivationLayer::ActivationLayer(const ActivationDescriptor& param, const char* name)
32     : LayerWithParameters(1, 1, LayerType::Activation, param, name)
33 {
34 }
35
36 std::unique_ptr<IWorkload> ActivationLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
37 {
38     ActivationQueueDescriptor descriptor;
39     return factory.CreateActivation(descriptor, PrepInfoAndDesc(descriptor, graph));
40 }
41
42 ActivationLayer* ActivationLayer::Clone(Graph& graph) const
43 {
44     return CloneBase<ActivationLayer>(graph, m_Param, GetName());
45 }
46
47 void ActivationLayer::ValidateTensorShapesFromInputs()
48 {
49     auto& info = GetInputSlot(0).GetConnection()->GetTensorInfo();
50     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(info.GetShape()),
51                      "ActivationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
52 }
53
54 AdditionLayer::AdditionLayer(const char* name)
55     : Layer(2, 1, LayerType::Addition, name)
56 {
57 }
58
59 std::unique_ptr<IWorkload> AdditionLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
60 {
61     AdditionQueueDescriptor descriptor;
62     return factory.CreateAddition(descriptor, PrepInfoAndDesc(descriptor, graph));
63 }
64
65 AdditionLayer* AdditionLayer::Clone(Graph& graph) const
66 {
67     return CloneBase<AdditionLayer>(graph, GetName());
68 }
69
70 void AdditionLayer::ValidateTensorShapesFromInputs()
71 {
72     auto& input0 = GetInputSlot(0).GetConnection()->GetTensorInfo();
73     auto& input1 = GetInputSlot(1).GetConnection()->GetTensorInfo();
74
75     // Get the max of the inputs
76     BOOST_ASSERT(input0.GetNumDimensions() == input1.GetNumDimensions());
77     unsigned int numDims = input0.GetNumDimensions();
78     std::vector<unsigned int> dims(numDims);
79
80     // validate inputs are broadcast compatible
81 #if !NDEBUG
82     for (unsigned int i = 0; i < numDims; i++)
83     {
84         unsigned int dim0 = input0.GetShape()[i];
85         unsigned int dim1 = input1.GetShape()[i];
86         if (dim0 != dim1)
87         {
88             BOOST_ASSERT_MSG(dim0 == 1 || dim1 == 1, "Dimensions should either match or one should be of size 1.");
89         }
90     }
91 #endif
92
93     for (unsigned int i = 0; i < numDims; i++)
94     {
95         unsigned int dim0 = input0.GetShape()[i];
96         unsigned int dim1 = input1.GetShape()[i];
97         dims[i] = std::max(dim0, dim1);
98     }
99
100     TensorShape outShape(numDims, dims.data());
101     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
102                      "AdditionLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
103 }
104
105 BatchNormalizationLayer::BatchNormalizationLayer(const armnn::BatchNormalizationDescriptor& param, const char* name)
106     : LayerWithParameters(1, 1, LayerType::BatchNormalization, param, name)
107 {
108 }
109
110 std::unique_ptr<IWorkload> BatchNormalizationLayer::CreateWorkload(const Graph& graph,
111                                                                    const IWorkloadFactory& factory) const
112 {
113     BatchNormalizationQueueDescriptor descriptor;
114
115     descriptor.m_Mean = m_Mean.get();
116     descriptor.m_Variance = m_Variance.get();
117     descriptor.m_Beta = m_Beta.get();
118     descriptor.m_Gamma = m_Gamma.get();
119     return factory.CreateBatchNormalization(descriptor, PrepInfoAndDesc(descriptor, graph));
120 }
121
122 BatchNormalizationLayer* BatchNormalizationLayer::Clone(Graph& graph) const
123 {
124     auto layer = CloneBase<BatchNormalizationLayer>(graph, m_Param, GetName());
125
126     layer->m_Mean = m_Mean ? std::make_unique<ScopedCpuTensorHandle>(*m_Mean) : nullptr;
127     layer->m_Variance = m_Variance ? std::make_unique<ScopedCpuTensorHandle>(*m_Variance) : nullptr;
128     layer->m_Beta = m_Beta ? std::make_unique<ScopedCpuTensorHandle>(*m_Beta) : nullptr;
129     layer->m_Gamma = m_Gamma ? std::make_unique<ScopedCpuTensorHandle>(*m_Gamma) : nullptr;
130
131     return std::move(layer);
132 }
133
134 void BatchNormalizationLayer::ValidateTensorShapesFromInputs()
135 {
136     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
137                      "BatchNormalizationLayer: InputSlot must be connected to an OutputSlot");
138     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
139                      "BatchNormalizationLayer: TensorInfo must be set on connected OutputSlot.");
140
141     auto& info = GetInputSlot(0).GetConnection()->GetTensorInfo();
142     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(info.GetShape()),
143                      "BatchNormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
144 }
145
146 Convolution2dLayer::Convolution2dLayer(const Convolution2dDescriptor& param, const char* name)
147     : LayerWithParameters(1, 1, LayerType::Convolution2d, param, name)
148 {
149 }
150
151 std::unique_ptr<IWorkload> Convolution2dLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
152 {
153     Convolution2dQueueDescriptor descriptor;
154
155     descriptor.m_Weight = m_Weight.get();
156     if (m_Param.m_BiasEnabled)
157     {
158         descriptor.m_Bias = m_Bias.get();
159     }
160     return factory.CreateConvolution2d(descriptor, PrepInfoAndDesc(descriptor, graph));
161 }
162
163 Convolution2dLayer* Convolution2dLayer::Clone(Graph& graph) const
164 {
165     auto layer = CloneBase<Convolution2dLayer>(graph, m_Param, GetName());
166     layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
167
168     if (layer->m_Param.m_BiasEnabled)
169     {
170         layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
171     }
172
173     return std::move(layer);
174 }
175
176 void Convolution2dLayer::ValidateTensorShapesFromInputs()
177 {
178     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
179                      "Convolution2dLayer: InputSlot must be connected to an OutputSlot");
180     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
181                      "Convolution2dLayer: TensorInfo must be set on connected OutputSlot.");
182
183
184     IOutputSlot* input = GetInputSlot(0).GetConnection();
185     const TensorShape& inputShape = input->GetTensorInfo().GetShape();
186     const TensorShape filterShape = m_Weight->GetTensorInfo().GetShape();
187
188     // If we support multiple batch dimensions in the future, then this assert will need to change.
189     BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Convolutions will always have 4D input.");
190
191     unsigned int inWidth = inputShape[3];
192     unsigned int inHeight = inputShape[2];
193     unsigned int inBatchSize = inputShape[0];
194
195     unsigned int filterWidth = filterShape[3];
196     unsigned int readWidth = (inWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - (filterWidth);
197     unsigned int outWidth =  1+(readWidth / m_Param.m_StrideX);
198
199     unsigned int filterHeight = filterShape[2];
200     unsigned int readHeight = (inHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - (filterHeight);
201     unsigned int outHeight = 1+(readHeight / m_Param.m_StrideY);
202
203     unsigned int outChannels = filterShape[0];
204     unsigned int outBatchSize = inBatchSize;
205
206     TensorShape shapeOut({outBatchSize, outChannels, outHeight, outWidth});
207     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut),
208                      "Convolution2dLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
209 }
210
211
212 DepthwiseConvolution2dLayer::DepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor& param,
213                                                          const char* name)
214     : LayerWithParameters(1, 1, LayerType::DepthwiseConvolution2d, param, name)
215 {
216 }
217
218 std::unique_ptr<IWorkload> DepthwiseConvolution2dLayer::CreateWorkload(const Graph&                  graph,
219                                                                        const IWorkloadFactory& factory) const
220 {
221     DepthwiseConvolution2dQueueDescriptor descriptor;
222
223     descriptor.m_Weight = m_Weight.get();
224     if (m_Param.m_BiasEnabled)
225     {
226         descriptor.m_Bias = m_Bias.get();
227     }
228     return factory.CreateDepthwiseConvolution2d(descriptor, PrepInfoAndDesc(descriptor, graph));
229 }
230
231 DepthwiseConvolution2dLayer* DepthwiseConvolution2dLayer::Clone(Graph& graph) const
232 {
233     auto layer      = CloneBase<DepthwiseConvolution2dLayer>(graph, m_Param, GetName());
234     layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
235
236     if (layer->m_Param.m_BiasEnabled)
237     {
238         layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
239     }
240
241     return std::move(layer);
242 }
243
244 void DepthwiseConvolution2dLayer::ValidateTensorShapesFromInputs()
245 {
246     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
247                      "DepthwiseConvolution2dLayer: InputSlot must be connected to an OutputSlot");
248     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
249                      "DepthwiseConvolution2dLayer: TensorInfo must be set on connected OutputSlot.");
250
251     IOutputSlot* input = GetInputSlot(0).GetConnection();
252     const TensorShape& inputShape = input->GetTensorInfo().GetShape();
253     const TensorShape filterShape = m_Weight->GetTensorInfo().GetShape();
254
255     BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Convolutions will always have 4D input.");
256
257     unsigned int inWidth = inputShape[3];
258     unsigned int inHeight = inputShape[2];
259     unsigned int inBatchSize = inputShape[0];
260
261     unsigned int filterWidth = filterShape[3];
262     unsigned int readWidth = (inWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - (filterWidth);
263     unsigned int outWidth =  1+(readWidth / m_Param.m_StrideX);
264
265     unsigned int filterHeight = filterShape[2];
266     unsigned int readHeight = (inHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - (filterHeight);
267     unsigned int outHeight = 1+(readHeight / m_Param.m_StrideY);
268     unsigned int depthMultiplier = filterShape[0];
269
270     unsigned int outChannels = filterShape[1]*depthMultiplier;
271     unsigned int outBatchSize = inBatchSize;
272
273     TensorShape outShape({outBatchSize, outChannels, outHeight, outWidth});
274     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
275                      "DepthwiseConvolution2dLayer: "
276                          "TensorShape set on OutputSlot[0] does not match the inferred shape.");
277 }
278
279 FakeQuantizationLayer::FakeQuantizationLayer(const FakeQuantizationDescriptor& param, const char* name)
280 : LayerWithParameters(1, 1, LayerType::FakeQuantization, param, name)
281 {
282 }
283
284 std::unique_ptr<IWorkload> FakeQuantizationLayer::CreateWorkload(const Graph& graph,
285                                                                 const IWorkloadFactory& factory) const
286 {
287     FakeQuantizationQueueDescriptor descriptor;
288     return factory.CreateFakeQuantization(descriptor, PrepInfoAndDesc(descriptor, graph) );
289 }
290
291 FakeQuantizationLayer* FakeQuantizationLayer::Clone(Graph& graph) const
292 {
293     return CloneBase<FakeQuantizationLayer>(graph, m_Param, GetName());
294 }
295
296 void FakeQuantizationLayer::ValidateTensorShapesFromInputs()
297 {
298     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
299                      "FakeQuantizationLayer: InputSlot must be connected to an OutputSlot");
300     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
301                      "FakeQuantizationLayer: TensorInfo must be set on connected OutputSlot.");
302
303
304     IOutputSlot* input = GetInputSlot(0).GetConnection();
305
306     // input and output shapes are the same
307     TensorShape const& outShape = input->GetTensorInfo().GetShape();
308     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
309                      "FakeQuantizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
310 }
311
312 FloorLayer::FloorLayer(const char* name)
313  : Layer(1, 1, LayerType::Floor, name)
314 {
315 }
316
317 std::unique_ptr<IWorkload> FloorLayer::CreateWorkload(const Graph& graph,
318     const IWorkloadFactory& factory) const
319 {
320     FloorQueueDescriptor descriptor;
321     return factory.CreateFloor(descriptor, PrepInfoAndDesc(descriptor, graph));
322 }
323
324 FloorLayer* FloorLayer::Clone(Graph& graph) const
325 {
326     return CloneBase<FloorLayer>(graph, GetName());
327 }
328
329 void FloorLayer::ValidateTensorShapesFromInputs()
330 {
331     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
332                      "FloorLayer: InputSlot must be connected to an OutputSlot");
333     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
334                      "FloorLayer: TensorInfo must be set on connected OutputSlot.");
335
336     // input and output shapes are the same
337     IOutputSlot* input = GetInputSlot(0).GetConnection();
338     TensorShape const& outShape = input->GetTensorInfo().GetShape();
339     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
340                      "FloorLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
341 }
342
343 FullyConnectedLayer::FullyConnectedLayer(const FullyConnectedDescriptor& param, const char* name)
344     : LayerWithParameters(1, 1, LayerType::FullyConnected, param, name)
345 {
346 }
347
348 std::unique_ptr<IWorkload> FullyConnectedLayer::CreateWorkload(const Graph& graph,
349                                                                const IWorkloadFactory& factory) const
350 {
351     FullyConnectedQueueDescriptor descriptor;
352
353     descriptor.m_Weight = m_Weight.get();
354     if (m_Param.m_BiasEnabled)
355     {
356         descriptor.m_Bias = m_Bias.get();
357     }
358     return factory.CreateFullyConnected(descriptor, PrepInfoAndDesc(descriptor, graph));
359 }
360
361 FullyConnectedLayer* FullyConnectedLayer::Clone(Graph& graph) const
362 {
363     auto layer = CloneBase<FullyConnectedLayer>(graph, m_Param, GetName());
364
365     layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
366     if (layer->m_Param.m_BiasEnabled)
367     {
368         layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
369     }
370
371     return std::move(layer);
372 }
373
374 void FullyConnectedLayer::ValidateTensorShapesFromInputs()
375 {
376     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
377                      "FullyConnectedLayer: InputSlot must be connected to an OutputSlot");
378     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
379                      "FullyConnectedLayer: TensorInfo must be set on connected OutputSlot.");
380
381
382     TensorShape const& weightShape = m_Weight->GetTensorInfo().GetShape();
383
384     // output for FC is [1, w[1]]
385     unsigned int batches = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape()[0];
386     unsigned int dimIdx = m_Param.m_TransposeWeightMatrix ? 0 : 1;
387     TensorShape outShape({batches, weightShape[dimIdx]});
388
389     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
390                      "FullyConnectedLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
391 }
392
393 InputLayer::InputLayer(LayerBindingId id, const char* name)
394     : BindableLayer(0, 1, LayerType::Input, name, id)
395 {
396 }
397
398 std::unique_ptr<IWorkload> InputLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
399 {
400     return nullptr;
401 }
402
403 InputLayer* InputLayer::Clone(Graph& graph) const
404 {
405     return CloneBase<InputLayer>(graph, GetBindingId(), GetName());
406 }
407
408 void InputLayer::ValidateTensorShapesFromInputs()
409 {
410     //The input layer should already have it's inputs set during graph building phase in the driver/parser.
411     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).IsTensorInfoSet(),
412                                                "InputLayer should already have the TensorInfo set.");
413 }
414
415
416 MergerLayer::MergerLayer(const OriginsDescriptor& param, const char* name)
417     : LayerWithParameters(param.GetNumViews(), 1, LayerType::Merger, param, name)
418 {
419 }
420
421 std::unique_ptr<IWorkload> MergerLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
422 {
423     MergerQueueDescriptor descriptor;
424
425     // copy the view origins to the descriptor
426     descriptor.m_ViewOrigins.reserve(m_Param.GetNumViews());
427     for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
428     {
429         descriptor.m_ViewOrigins.emplace_back(
430             std::vector<unsigned int>(m_Param.GetViewOrigin(i), m_Param.GetViewOrigin(i) + m_Param.GetNumDimensions()));
431     }
432
433     return factory.CreateMerger(descriptor, PrepInfoAndDesc(descriptor, graph));
434 }
435
436 void MergerLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory)
437 {
438     //if sub tensors are supported than the merger
439     //just needs to make sure that the outputs of the prev layer
440     //are made subtensors of the output of the merger layer
441     m_OutputHandlers[0].CreateTensorHandles(factory);
442     if (factory.SupportsSubTensors())
443     {
444         std::queue<MergerLayer*> m_MergerLayers;
445
446         m_MergerLayers.push(this);
447         while (!m_MergerLayers.empty())
448         {
449             MergerLayer* currentLayer = m_MergerLayers.front();
450             ITensorHandle* parentTensor = currentLayer->GetOutputHandler(0).GetData();
451
452             m_MergerLayers.pop();
453
454             const unsigned int numInputSlots = currentLayer->GetNumInputSlots();
455             for (unsigned int i = 0; i < numInputSlots; ++i)
456             {
457                 OutputSlot* slot = currentLayer->GetInputSlot(i).GetConnectedOutputSlot();
458                 OutputHandler& outputHandler = slot->GetOutputHandler();
459                 outputHandler.SetData(factory.CreateSubTensorHandle(*parentTensor,
460                                                                     outputHandler.GetTensorInfo().GetShape(),
461                                                                     currentLayer->m_Param.GetViewOrigin(i)));
462
463                 Layer& inputLayer = slot->GetOwningLayer();
464                 if (inputLayer.GetType() == LayerType::Merger)
465                 {
466                     m_MergerLayers.push(boost::polymorphic_downcast<MergerLayer*>(&inputLayer));
467                 }
468             }
469         }
470     }
471 }
472
473 MergerLayer* MergerLayer::Clone(Graph& graph) const
474 {
475     return CloneBase<MergerLayer>(graph, m_Param, GetName());
476 }
477
478 void MergerLayer::ValidateTensorShapesFromInputs()
479 {
480     // Validate Merger layer
481     ConditionalThrow<LayerValidationException>(m_Param.GetNumViews() == GetNumInputSlots(),
482                      "MergerLayer: Num Inputs must match num views.");
483
484     unsigned int numDims = m_Param.GetNumDimensions();
485     for (unsigned int i=0; i<GetNumInputSlots(); i++)
486     {
487         auto& inputInfo = GetInputSlot(i).GetConnection()->GetTensorInfo();
488
489         boost::ignore_unused(inputInfo);
490         ConditionalThrow<LayerValidationException>(numDims == inputInfo.GetNumDimensions(),
491                          "MergerLayer: Num Dimensions must match all inputs.");
492     }
493
494     // Find the bounding box (extents) of all the views
495     std::vector<unsigned int> extentMin(numDims);
496     std::vector<unsigned int> extentMax(numDims);
497     for (unsigned int i = 0; i < GetNumInputSlots(); i++)
498     {
499         const uint32_t* origin = m_Param.GetViewOrigin(i);
500         const armnn::TensorShape& shape = GetInputSlot(i).GetConnection()->GetTensorInfo().GetShape();
501         for (unsigned int d = 0; d < numDims; d++)
502         {
503             extentMin[d] = std::min(extentMin[d], origin[d]);
504             extentMax[d] = std::max(extentMax[d], origin[d] + shape[d]);
505         }
506     }
507
508     // Check that the bounding box starts at the origin
509     if (!std::all_of(extentMin.begin(), extentMin.end(), [](unsigned int s) { return s == 0; }))
510     {
511         throw LayerValidationException("MergerLayer: there is no view that starts at the origin");
512     }
513
514     // Check that there are no overlaps of views (this would lead to undefined output at those locations).
515     // Check each pair of views against each other
516     // (and don't bother to check against self, or check the same pair both ways round)
517     for (unsigned int a = 0; a < GetNumInputSlots(); a++)
518     {
519         const uint32_t* aOrigin = m_Param.GetViewOrigin(a);
520         const armnn::TensorShape& aShape = GetInputSlot(a).GetConnection()->GetTensorInfo().GetShape();
521         for (unsigned int b = 0; b < a; b++)
522         {
523             const uint32_t* bOrigin = m_Param.GetViewOrigin(b);
524             const armnn::TensorShape& bShape = GetInputSlot(b).GetConnection()->GetTensorInfo().GetShape();
525
526             bool allAxesOverlap = true;
527             for (unsigned int d = 0; d < numDims && allAxesOverlap; d++)
528             {
529                 unsigned int a1 = aOrigin[d];
530                 unsigned int a2 = aOrigin[d] + aShape[d];
531
532                 unsigned int b1 = bOrigin[d];
533                 unsigned int b2 = bOrigin[d] + bShape[d];
534
535                 if (a2 <= b1 || b2 <= a1)
536                 {
537                     allAxesOverlap = false;
538                 }
539             }
540             if (allAxesOverlap)
541             {
542                 throw LayerValidationException("MergerLayer: Some views overlap.");
543             }
544         }
545     }
546
547     // Check that there are no "holes", i.e. regions of the output which is not covered by a view.
548     // Because we already checked that there are no overlaps, this can be done simply by checking that
549     // the total 'volume' of the views is the same as the output.
550     unsigned int totalViewsVolume = 0;
551     for (unsigned int i = 0; i < GetNumInputSlots(); i++)
552     {
553         totalViewsVolume += GetInputSlot(i).GetConnection()->GetTensorInfo().GetNumElements();
554     }
555     unsigned int outputVolume = 1;
556     for (unsigned int d = 0; d < numDims; d++)
557     {
558         outputVolume *= (extentMax[d] - extentMin[d]);
559     }
560     if (totalViewsVolume != outputVolume)
561     {
562         throw LayerValidationException("MergerLayer: there are some gaps between views");
563     }
564
565     TensorShape outShape(numDims, extentMax.data());
566     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
567                      "MergerLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
568 }
569
570 MultiplicationLayer::MultiplicationLayer(const char* name)
571     : Layer(2, 1, LayerType::Multiplication, name)
572 {
573 }
574
575 std::unique_ptr<IWorkload> MultiplicationLayer::CreateWorkload(const Graph&            graph,
576                                                                const IWorkloadFactory& factory) const
577 {
578     MultiplicationQueueDescriptor descriptor;
579
580     return factory.CreateMultiplication(descriptor, PrepInfoAndDesc(descriptor, graph));
581 }
582
583 MultiplicationLayer* MultiplicationLayer::Clone(Graph& graph) const
584 {
585     return CloneBase<MultiplicationLayer>(graph, GetName());
586 }
587
588 void MultiplicationLayer::ValidateTensorShapesFromInputs()
589 {
590     auto& input0 = GetInputSlot(0).GetConnection()->GetTensorInfo();
591     auto& input1 = GetInputSlot(1).GetConnection()->GetTensorInfo();
592
593     // Get the max of the inputs
594     BOOST_ASSERT(input0.GetNumDimensions() == input1.GetNumDimensions());
595     unsigned int numDims = input0.GetNumDimensions();
596     std::vector<unsigned int> dims(numDims);
597
598     // validate inputs are broadcast compatible
599 #if !NDEBUG
600     for (unsigned int i = 0; i < numDims; i++)
601     {
602         unsigned int dim0 = input0.GetShape()[i];
603         unsigned int dim1 = input1.GetShape()[i];
604         if (dim0 != dim1)
605         {
606             BOOST_ASSERT_MSG(dim0 == 1 || dim1 == 1, "Dimensions should either match or one should be of size 1.");
607         }
608     }
609 #endif
610
611     for (unsigned int i = 0; i < numDims; i++)
612     {
613         unsigned int dim0 = input0.GetShape()[i];
614         unsigned int dim1 = input1.GetShape()[i];
615         dims[i] = std::max(dim0, dim1);
616     }
617
618     TensorShape outShape(numDims, dims.data());
619     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
620                      "MultiplicationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
621 }
622
623 NormalizationLayer::NormalizationLayer(const NormalizationDescriptor& param, const char* name)
624     : LayerWithParameters(1, 1, LayerType::Normalization, param, name)
625 {
626 }
627
628 std::unique_ptr<IWorkload> NormalizationLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
629 {
630     NormalizationQueueDescriptor descriptor;
631     return factory.CreateNormalization(descriptor, PrepInfoAndDesc(descriptor, graph));
632 }
633
634 NormalizationLayer* NormalizationLayer::Clone(Graph& graph) const
635 {
636     return CloneBase<NormalizationLayer>(graph, m_Param, GetName());
637 }
638
639 void NormalizationLayer::ValidateTensorShapesFromInputs()
640 {
641     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
642                                                "NormalizationLayer: Input slot must be connected.");
643
644     const TensorShape& outShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape();
645     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
646                      "NormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
647 }
648
649 OutputLayer::OutputLayer(LayerBindingId id, const char* name)
650     : BindableLayer(1, 0, LayerType::Output, name, id)
651 {
652 }
653
654 std::unique_ptr<IWorkload> OutputLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
655 {
656     return nullptr;
657 }
658
659 OutputLayer* OutputLayer::Clone(Graph& graph) const
660 {
661     return CloneBase<OutputLayer>(graph, GetBindingId(), GetName());
662 }
663
664 void OutputLayer::ValidateTensorShapesFromInputs()
665 {
666     // Just validate the input is connected
667     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
668                                                "OutputLayer: Input slot must be connected.");
669 }
670
671 PermuteLayer::PermuteLayer(const PermuteDescriptor& param, const char* name)
672     : LayerWithParameters(1, 1, LayerType::Permute, param, name)
673 {
674 }
675
676 std::unique_ptr<IWorkload> PermuteLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
677 {
678     PermuteQueueDescriptor descriptor;
679     return factory.CreatePermute(descriptor, PrepInfoAndDesc(descriptor, graph));
680 }
681
682 PermuteLayer* PermuteLayer::Clone(Graph& graph) const
683 {
684     return CloneBase<PermuteLayer>(graph, m_Param, GetName());
685 }
686
687 void PermuteLayer::ValidateTensorShapesFromInputs()
688 {
689     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
690                      "PermuteLayer: InputSlot must be connected to an OutputSlot");
691     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
692                      "PermuteLayer: TensorInfo must be set on connected InputSlot.");
693
694     const TensorInfo& infoIn = GetInputSlot(0).GetConnection()->GetTensorInfo();
695     TensorShape shapeOut = armnnUtils::Permuted(infoIn.GetShape(), m_Param.m_DimMappings);
696     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut),
697                      "PermuteLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
698 }
699
700 Pooling2dLayer::Pooling2dLayer(const Pooling2dDescriptor& param, const char* name)
701     : LayerWithParameters(1, 1, LayerType::Pooling2d, param, name)
702 {
703 }
704
705 std::unique_ptr<IWorkload> Pooling2dLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
706 {
707     Pooling2dQueueDescriptor descriptor;
708     return factory.CreatePooling2d(descriptor, PrepInfoAndDesc(descriptor, graph));
709 }
710
711 Pooling2dLayer* Pooling2dLayer::Clone(Graph& graph) const
712 {
713     return CloneBase<Pooling2dLayer>(graph, m_Param, GetName());
714 }
715
716 void Pooling2dLayer::ValidateTensorShapesFromInputs()
717 {
718     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
719                      "Pooling2dLayer: InputSlot must be connected to an OutputSlot");
720     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
721                      "Pooling2dLayer: TensorInfo must be set on connected InputSlot.");
722
723     IOutputSlot* input = GetInputSlot(0).GetConnection();
724     const TensorShape& inputShape = input->GetTensorInfo().GetShape();
725
726     // If we support multiple batch dimensions in the future, then this assert will need to change.
727     BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Pooling2dLayer will always have 4D input.");
728
729
730     unsigned int inWidth = inputShape[3];
731     unsigned int inHeight = inputShape[2];
732     unsigned int inChannels = inputShape[1];
733     unsigned int inBatchSize = inputShape[0];
734
735     bool isGlobalPooling = (m_Param.m_StrideX==0 && m_Param.m_StrideY==0);
736     unsigned int outWidth = 1;
737     unsigned int outHeight = 1;
738     if (!isGlobalPooling)
739     {
740         BOOST_ASSERT_MSG(m_Param.m_StrideX!=0 && m_Param.m_StrideY!=0,
741                          "Stride can only be zero when performing global pooling");
742
743         auto CalcSize = [](auto inSize, auto lowPad, auto highPad, auto poolSize, auto stride, auto padMethod,
744                            auto outputShapeRounding)
745             {
746                 unsigned int readSize = inSize + lowPad + highPad - poolSize;
747                 float div = static_cast<float>(readSize) / static_cast<float>(stride);
748
749                 unsigned int size = 0;
750                 switch (outputShapeRounding)
751                 {
752                     case OutputShapeRounding::Ceiling:
753                         size = static_cast<unsigned int>(ceil(div)) + 1;
754                         break;
755                     case OutputShapeRounding ::Floor:
756                         size = static_cast<unsigned int>(floor(div)) + 1;
757                         break;
758                     default:
759                         BOOST_ASSERT_MSG(false, "Unsupported Output Shape Rounding");
760                 }
761
762                 // Make sure that border operations will start from inside the input and not the padded area
763                 // This is what both Caffe and CL does...
764                 if ((size - 1)*stride >= inSize + lowPad)
765                 {
766                     --size;
767                 }
768
769                 return size;
770             };
771
772         outWidth = CalcSize(inWidth, m_Param.m_PadLeft, m_Param.m_PadRight, m_Param.m_PoolWidth, m_Param.m_StrideX,
773                             m_Param.m_PaddingMethod, m_Param.m_OutputShapeRounding);
774         outHeight= CalcSize(inHeight, m_Param.m_PadTop, m_Param.m_PadBottom, m_Param.m_PoolHeight, m_Param.m_StrideY,
775                             m_Param.m_PaddingMethod, m_Param.m_OutputShapeRounding);
776
777
778     }
779     unsigned int outChannels = inChannels;
780     unsigned int outBatchSize = inBatchSize;
781
782     TensorShape shapeOut({outBatchSize, outChannels, outHeight, outWidth});
783
784     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut),
785                "Pooling2dLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
786 }
787
788 SoftmaxLayer::SoftmaxLayer(const SoftmaxDescriptor &param, const char* name)
789     : LayerWithParameters(1, 1, LayerType::Softmax, param, name)
790 {
791 }
792
793 std::unique_ptr<IWorkload> SoftmaxLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
794 {
795     SoftmaxQueueDescriptor descriptor;
796     return factory.CreateSoftmax(descriptor, PrepInfoAndDesc(descriptor, graph));
797 }
798
799 SoftmaxLayer* SoftmaxLayer::Clone(Graph& graph) const
800 {
801     return CloneBase<SoftmaxLayer>(graph, m_Param, GetName());
802 }
803
804 void SoftmaxLayer::ValidateTensorShapesFromInputs()
805 {
806     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
807                                                "SoftmaxLayer: Input slot must be connected.");
808     const TensorShape& outShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape();
809     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
810                      "SoftmaxLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
811 }
812
813 SplitterLayer::SplitterLayer(const ViewsDescriptor& param, const char* name)
814     : LayerWithParameters(1, param.GetNumViews(), LayerType::Splitter, param, name)
815 {
816 }
817
818 std::unique_ptr<IWorkload> SplitterLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
819 {
820     SplitterQueueDescriptor descriptor;
821
822     // copy the window origins to the descriptor
823     for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
824     {
825         descriptor.m_ViewOrigins.emplace_back(
826             std::vector<unsigned int>(m_Param.GetViewOrigin(i), m_Param.GetViewOrigin(i) + m_Param.GetNumDimensions()));
827     }
828
829     return factory.CreateSplitter(descriptor, PrepInfoAndDesc(descriptor, graph));
830 }
831
832 void SplitterLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory)
833 {
834     //if sub tensors are supported than all the "splitter" need to do is to
835     //set the outputs to be appropriate sub tensors of the input.
836     if (factory.SupportsSubTensors())
837     {
838         const OutputHandler& outputHandler = GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
839
840         ITensorHandle* inputData = outputHandler.GetData();
841         //create the outputs as subtensors of the input
842         for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
843         {
844             m_OutputHandlers[i].SetData(factory.CreateSubTensorHandle(*inputData,
845                                                                       m_OutputHandlers[i].GetTensorInfo().GetShape(),
846                                                                       m_Param.GetViewOrigin(i)));
847         }
848     }
849     else
850     {
851         for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
852         {
853             m_OutputHandlers[i].CreateTensorHandles(factory);
854         }
855     }
856 }
857
858 SplitterLayer* SplitterLayer::Clone(Graph& graph) const
859 {
860     return CloneBase<SplitterLayer>(graph, m_Param, GetName());
861 }
862
863 void SplitterLayer::ValidateTensorShapesFromInputs()
864 {
865     //Output shapes must match View shapes.
866     for (unsigned int viewIdx = 0; viewIdx < m_Param.GetNumViews(); viewIdx++)
867     {
868         const uint32_t* sizes = m_Param.GetViewSizes(viewIdx);
869
870         TensorShape outShape(m_Param.GetNumDimensions(), sizes);
871         ConditionalThrow<LayerValidationException>(GetOutputSlot(viewIdx).ValidateTensorShape(outShape),
872                          "SplitterLayer: View sizes must match output tensor shapes.");
873     }
874 }
875
876 MemCopyLayer::MemCopyLayer(const char* name)
877     : Layer(1, 1, LayerType::MemCopy, name)
878 {
879 }
880
881 MemCopyLayer* MemCopyLayer::Clone(Graph& graph) const
882 {
883     return CloneBase<MemCopyLayer>(graph, GetName());
884 }
885
886 std::unique_ptr<IWorkload> MemCopyLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
887 {
888     MemCopyQueueDescriptor descriptor;
889     return factory.CreateMemCopy(descriptor, PrepInfoAndDesc(descriptor, graph));
890 }
891
892 void MemCopyLayer::ValidateTensorShapesFromInputs()
893 {
894     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
895                      "MemCopyLayer: InputSlot must be connected to an OutputSlot");
896     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
897                      "MemCopyLayer: TensorInfo must be set on connected OutputSlot.");
898
899
900     IOutputSlot* input = GetInputSlot(0).GetConnection();
901
902     // input and output shapes are the same
903     TensorShape const& outShape = input->GetTensorInfo().GetShape();
904     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
905                      "MemCopyLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
906 }
907
908 ResizeBilinearLayer::ResizeBilinearLayer(const ResizeBilinearDescriptor& param, const char* name)
909     : LayerWithParameters(1, 1, LayerType::ResizeBilinear, param, name)
910 {
911 }
912
913 std::unique_ptr<IWorkload> ResizeBilinearLayer::CreateWorkload(const Graph& graph,
914                                                                const IWorkloadFactory& factory) const
915 {
916     ResizeBilinearQueueDescriptor descriptor;
917     return factory.CreateResizeBilinear(descriptor, PrepInfoAndDesc(descriptor, graph));
918 }
919
920 ResizeBilinearLayer* ResizeBilinearLayer::Clone(Graph& graph) const
921 {
922     return CloneBase<ResizeBilinearLayer>(graph, m_Param, GetName());
923 }
924
925 void ResizeBilinearLayer::ValidateTensorShapesFromInputs()
926 {
927     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
928                      "MemCopyLayer: InputSlot must be connected to an OutputSlot");
929     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
930                      "MemCopyLayer: TensorInfo must be set on connected OutputSlot.");
931
932     const TensorShape& inputShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape();
933     unsigned int outWidth = m_Param.m_TargetWidth;
934     unsigned int outHeight = m_Param.m_TargetHeight;
935     unsigned int outChannels = inputShape[1];
936     unsigned int outBatch = inputShape[0];
937     TensorShape outShape({outBatch, outChannels, outHeight, outWidth});
938     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
939                      "ResizeBilinearLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
940 }
941
942 L2NormalizationLayer::L2NormalizationLayer(const char* name)
943     : Layer(1, 1, LayerType::L2Normalization, name)
944 {
945 }
946
947 std::unique_ptr<IWorkload> L2NormalizationLayer::CreateWorkload(const Graph& graph,
948     const IWorkloadFactory& factory) const
949 {
950     L2NormalizationQueueDescriptor descriptor;
951     return factory.CreateL2Normalization(descriptor, PrepInfoAndDesc(descriptor, graph));
952 }
953
954 L2NormalizationLayer* L2NormalizationLayer::Clone(Graph& graph) const
955 {
956     return CloneBase<L2NormalizationLayer>(graph, GetName());
957 }
958
959 void L2NormalizationLayer::ValidateTensorShapesFromInputs()
960 {
961     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
962                      "L2NormalizationLayer: InputSlot must be connected to an OutputSlot");
963     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
964                      "L2NormalizationLayer: TensorInfo must be set on connected OutputSlot.");
965
966     IOutputSlot* input = GetInputSlot(0).GetConnection();
967
968     // input and output shapes are the same
969     TensorShape const& outShape = input->GetTensorInfo().GetShape();
970     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
971                      "L2NormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
972 }
973
974 ConstantLayer::ConstantLayer(const std::shared_ptr<ScopedCpuTensorHandle>& input, const char* name)
975     : Layer(0, 1, LayerType::Constant, name)
976     , m_LayerOutput(input)
977 {
978 }
979
980 std::unique_ptr<IWorkload> ConstantLayer::CreateWorkload(const Graph& graph,
981     const IWorkloadFactory& factory) const
982 {
983     ConstantQueueDescriptor descriptor;
984     descriptor.m_LayerOutput = m_LayerOutput.get();
985     return factory.CreateConstant(descriptor, PrepInfoAndDesc(descriptor, graph));
986 }
987
988 ConstantLayer* ConstantLayer::Clone(Graph& graph) const
989 {
990     // Cloned layers share the same layer output object
991     return CloneBase<ConstantLayer>(graph, m_LayerOutput, GetName());
992 }
993
994 void ConstantLayer::ValidateTensorShapesFromInputs()
995 {
996     // get the output shape from the value of the constant layer
997     TensorShape const& outShape = m_LayerOutput->GetTensorInfo().GetShape();
998     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
999                      "ConstantLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
1000 }
1001
1002 ReshapeLayer::ReshapeLayer(const ReshapeDescriptor& param, const char* name)
1003     : LayerWithParameters(1, 1, LayerType::Reshape, param, name)
1004 {
1005 }
1006
1007 std::unique_ptr<IWorkload> ReshapeLayer::CreateWorkload(const Graph& graph,
1008     const IWorkloadFactory& factory) const
1009 {
1010     ReshapeQueueDescriptor descriptor;
1011     return factory.CreateReshape(descriptor, PrepInfoAndDesc(descriptor, graph));
1012 }
1013
1014 ReshapeLayer* ReshapeLayer::Clone(Graph& graph) const
1015 {
1016     return CloneBase<ReshapeLayer>(graph, m_Param, GetName());
1017 }
1018
1019 void ReshapeLayer::ValidateTensorShapesFromInputs()
1020 {
1021     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
1022                      "ReshapeLayer: InputSlot must be connected to an OutputSlot");
1023     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
1024                      "ReshapeLayer: TensorInfo must be set on connected OutputSlot.");
1025     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(m_Param.m_TargetShape),
1026                      "ReshapeLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
1027 }
1028
1029 }