Release 18.02
[platform/upstream/armnn.git] / src / armnn / Layers.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // See LICENSE file in the project root for full license information.
4 //
5 #include "Layers.hpp"
6 #include "Graph.hpp"
7
8 #include "backends/CpuTensorHandle.hpp"
9 #include "backends/Workload.hpp"
10 #include "backends/WorkloadFactory.hpp"
11
12 #include "Permute.hpp"
13
14
15 namespace armnn
16 {
17
18 template <typename LayerType, typename ... Params>
19 LayerType* Layer::CloneBase(Graph& graph, Params&& ... params) const
20 {
21     LayerType* const layer = graph.AddLayer<LayerType>(std::forward<Params>(params)...);
22
23     layer->SetComputeDevice(m_ComputeDevice);
24
25     return layer;
26 }
27
28 ActivationLayer::ActivationLayer(const ActivationDescriptor& param, const char* name)
29     : LayerWithParameters(1, 1, LayerType::Activation, param, name)
30 {
31 }
32
33 std::unique_ptr<IWorkload> ActivationLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
34 {
35     ActivationQueueDescriptor descriptor;
36     return factory.CreateActivation(descriptor, PrepInfoAndDesc(descriptor, graph));
37 }
38
39 ActivationLayer* ActivationLayer::Clone(Graph& graph) const
40 {
41     return CloneBase<ActivationLayer>(graph, m_Param, GetName());
42 }
43
44 void ActivationLayer::ValidateTensorShapesFromInputs()
45 {
46     auto& info = GetInputSlot(0).GetConnection()->GetTensorInfo();
47     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(info.GetShape()),
48                      "ActivationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
49 }
50
51 AdditionLayer::AdditionLayer(const char* name)
52     : Layer(2, 1, LayerType::Addition, name)
53 {
54 }
55
56 std::unique_ptr<IWorkload> AdditionLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
57 {
58     AdditionQueueDescriptor descriptor;
59     return factory.CreateAddition(descriptor, PrepInfoAndDesc(descriptor, graph));
60 }
61
62 AdditionLayer* AdditionLayer::Clone(Graph& graph) const
63 {
64     return CloneBase<AdditionLayer>(graph, GetName());
65 }
66
67 void AdditionLayer::ValidateTensorShapesFromInputs()
68 {
69     auto& input0 = GetInputSlot(0).GetConnection()->GetTensorInfo();
70     auto& input1 = GetInputSlot(1).GetConnection()->GetTensorInfo();
71
72     // Get the max of the inputs
73     BOOST_ASSERT(input0.GetNumDimensions() == input1.GetNumDimensions());
74     unsigned int numDims = input0.GetNumDimensions();
75     std::vector<unsigned int> dims(numDims);
76
77     // validate inputs are broadcast compatible
78 #if !NDEBUG
79     for (unsigned int i = 0; i < numDims; i++)
80     {
81         unsigned int dim0 = input0.GetShape()[i];
82         unsigned int dim1 = input1.GetShape()[i];
83         if (dim0 != dim1)
84         {
85             BOOST_ASSERT_MSG(dim0 == 1 || dim1 == 1, "Dimensions should either match or one should be one length");
86         }
87     }
88 #endif
89
90
91     for (unsigned int i = 0; i < numDims; i++)
92     {
93         unsigned int dim0 = input0.GetShape()[i];
94         unsigned int dim1 = input1.GetShape()[i];
95         dims[i] = std::max(dim0, dim1);
96     }
97
98     TensorShape outShape(numDims, dims.data());
99     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
100                      "AdditionLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
101 }
102
103 BatchNormalizationLayer::BatchNormalizationLayer(const armnn::BatchNormalizationDescriptor& param, const char* name)
104     : LayerWithParameters(1, 1, LayerType::BatchNormalization, param, name)
105 {
106 }
107
108 std::unique_ptr<IWorkload> BatchNormalizationLayer::CreateWorkload(const Graph& graph,
109                                                                    const IWorkloadFactory& factory) const
110 {
111     BatchNormalizationQueueDescriptor descriptor;
112
113     descriptor.m_Mean = m_Mean.get();
114     descriptor.m_Variance = m_Variance.get();
115     descriptor.m_Beta = m_Beta.get();
116     descriptor.m_Gamma = m_Gamma.get();
117     return factory.CreateBatchNormalization(descriptor, PrepInfoAndDesc(descriptor, graph));
118 }
119
120 BatchNormalizationLayer* BatchNormalizationLayer::Clone(Graph& graph) const
121 {
122     auto layer = CloneBase<BatchNormalizationLayer>(graph, m_Param, GetName());
123
124     layer->m_Mean = m_Mean ? std::make_unique<ScopedCpuTensorHandle>(*m_Mean) : nullptr;
125     layer->m_Variance = m_Variance ? std::make_unique<ScopedCpuTensorHandle>(*m_Variance) : nullptr;
126     layer->m_Beta = m_Beta ? std::make_unique<ScopedCpuTensorHandle>(*m_Beta) : nullptr;
127     layer->m_Gamma = m_Gamma ? std::make_unique<ScopedCpuTensorHandle>(*m_Gamma) : nullptr;
128
129     return std::move(layer);
130 }
131
132 void BatchNormalizationLayer::ValidateTensorShapesFromInputs()
133 {
134     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
135                      "BatchNormalizationLayer: InputSlot must be connected to an OutputSlot");
136     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
137                      "BatchNormalizationLayer: TensorInfo must be set on connected OutputSlot.");
138
139     auto& info = GetInputSlot(0).GetConnection()->GetTensorInfo();
140     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(info.GetShape()),
141                      "BatchNormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
142 }
143
144 Convolution2dLayer::Convolution2dLayer(const Convolution2dDescriptor& param, const char* name)
145     : LayerWithParameters(1, 1, LayerType::Convolution2d, param, name)
146 {
147 }
148
149 std::unique_ptr<IWorkload> Convolution2dLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
150 {
151     Convolution2dQueueDescriptor descriptor;
152
153     descriptor.m_Weight = m_Weight.get();
154     if (m_Param.m_BiasEnabled)
155     {
156         descriptor.m_Bias = m_Bias.get();
157     }
158     return factory.CreateConvolution2d(descriptor, PrepInfoAndDesc(descriptor, graph));
159 }
160
161 Convolution2dLayer* Convolution2dLayer::Clone(Graph& graph) const
162 {
163     auto layer = CloneBase<Convolution2dLayer>(graph, m_Param, GetName());
164     layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
165
166     if (layer->m_Param.m_BiasEnabled)
167     {
168         layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
169     }
170
171     return std::move(layer);
172 }
173
174 void Convolution2dLayer::ValidateTensorShapesFromInputs()
175 {
176     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
177                      "Convolution2dLayer: InputSlot must be connected to an OutputSlot");
178     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
179                      "Convolution2dLayer: TensorInfo must be set on connected OutputSlot.");
180
181
182     IOutputSlot* input = GetInputSlot(0).GetConnection();
183     const TensorShape& inputShape = input->GetTensorInfo().GetShape();
184     const TensorShape filterShape = m_Weight->GetTensorInfo().GetShape();
185
186     // If we support multiple batch dimensions in the future, then this assert will need to change.
187     BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Convolutions will always have 4D input.");
188
189     unsigned int inWidth = inputShape[3];
190     unsigned int inHeight = inputShape[2];
191     unsigned int inBatchSize = inputShape[0];
192
193     unsigned int filterWidth = filterShape[3];
194     unsigned int readWidth = (inWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - (filterWidth);
195     unsigned int outWidth =  1+(readWidth / m_Param.m_StrideX);
196
197     unsigned int filterHeight = filterShape[2];
198     unsigned int readHeight = (inHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - (filterHeight);
199     unsigned int outHeight = 1+(readHeight / m_Param.m_StrideY);
200
201     unsigned int outChannels = filterShape[0];
202     unsigned int outBatchSize = inBatchSize;
203
204     TensorShape shapeOut({outBatchSize, outChannels, outHeight, outWidth});
205     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut),
206                      "Convolution2dLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
207 }
208
209
210 DepthwiseConvolution2dLayer::DepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor& param,
211                                                          const char* name)
212     : LayerWithParameters(1, 1, LayerType::DepthwiseConvolution2d, param, name)
213 {
214 }
215
216 std::unique_ptr<IWorkload> DepthwiseConvolution2dLayer::CreateWorkload(const Graph&                  graph,
217                                                                        const IWorkloadFactory& factory) const
218 {
219     DepthwiseConvolution2dQueueDescriptor descriptor;
220
221     descriptor.m_Weight = m_Weight.get();
222     if (m_Param.m_BiasEnabled)
223     {
224         descriptor.m_Bias = m_Bias.get();
225     }
226     return factory.CreateDepthwiseConvolution2d(descriptor, PrepInfoAndDesc(descriptor, graph));
227 }
228
229 DepthwiseConvolution2dLayer* DepthwiseConvolution2dLayer::Clone(Graph& graph) const
230 {
231     auto layer      = CloneBase<DepthwiseConvolution2dLayer>(graph, m_Param, GetName());
232     layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
233
234     if (layer->m_Param.m_BiasEnabled)
235     {
236         layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
237     }
238
239     return std::move(layer);
240 }
241
242 void DepthwiseConvolution2dLayer::ValidateTensorShapesFromInputs()
243 {
244     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
245                      "DepthwiseConvolution2dLayer: InputSlot must be connected to an OutputSlot");
246     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
247                      "DepthwiseConvolution2dLayer: TensorInfo must be set on connected OutputSlot.");
248
249     IOutputSlot* input = GetInputSlot(0).GetConnection();
250     const TensorShape& inputShape = input->GetTensorInfo().GetShape();
251     const TensorShape filterShape = m_Weight->GetTensorInfo().GetShape();
252
253     BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Convolutions will always have 4D input.");
254
255     unsigned int inWidth = inputShape[3];
256     unsigned int inHeight = inputShape[2];
257     unsigned int inBatchSize = inputShape[0];
258
259     unsigned int filterWidth = filterShape[3];
260     unsigned int readWidth = (inWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - (filterWidth);
261     unsigned int outWidth =  1+(readWidth / m_Param.m_StrideX);
262
263     unsigned int filterHeight = filterShape[2];
264     unsigned int readHeight = (inHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - (filterHeight);
265     unsigned int outHeight = 1+(readHeight / m_Param.m_StrideY);
266     unsigned int depthMultiplier = filterShape[0];
267
268     unsigned int outChannels = filterShape[1]*depthMultiplier;
269     unsigned int outBatchSize = inBatchSize;
270
271     TensorShape outShape({outBatchSize, outChannels, outHeight, outWidth});
272     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
273                      "DepthwiseConvolution2dLayer: "
274                          "TensorShape set on OutputSlot[0] does not match the inferred shape.");
275 }
276
277 FakeQuantizationLayer::FakeQuantizationLayer(const FakeQuantizationDescriptor& param, const char* name)
278 : LayerWithParameters(1, 1, LayerType::FakeQuantization, param, name)
279 {
280 }
281
282 std::unique_ptr<IWorkload> FakeQuantizationLayer::CreateWorkload(const Graph& graph,
283                                                                 const IWorkloadFactory& factory) const
284 {
285     FakeQuantizationQueueDescriptor descriptor;
286     return factory.CreateFakeQuantization(descriptor, PrepInfoAndDesc(descriptor, graph) );
287 }
288
289 FakeQuantizationLayer* FakeQuantizationLayer::Clone(Graph& graph) const
290 {
291     return CloneBase<FakeQuantizationLayer>(graph, m_Param, GetName());
292 }
293
294 void FakeQuantizationLayer::ValidateTensorShapesFromInputs()
295 {
296     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
297                      "FakeQuantizationLayer: InputSlot must be connected to an OutputSlot");
298     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
299                      "FakeQuantizationLayer: TensorInfo must be set on connected OutputSlot.");
300
301
302     IOutputSlot* input = GetInputSlot(0).GetConnection();
303
304     // input and output shapes are the same
305     TensorShape const& outShape = input->GetTensorInfo().GetShape();
306     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
307                      "FakeQuantizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
308 }
309
310 FloorLayer::FloorLayer(const char* name)
311  : Layer(1, 1, LayerType::Floor, name)
312 {
313 }
314
315 std::unique_ptr<IWorkload> FloorLayer::CreateWorkload(const Graph& graph,
316     const IWorkloadFactory& factory) const
317 {
318     FloorQueueDescriptor descriptor;
319     return factory.CreateFloor(descriptor, PrepInfoAndDesc(descriptor, graph));
320 }
321
322 FloorLayer* FloorLayer::Clone(Graph& graph) const
323 {
324     return CloneBase<FloorLayer>(graph, GetName());
325 }
326
327 void FloorLayer::ValidateTensorShapesFromInputs()
328 {
329     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
330                      "FloorLayer: InputSlot must be connected to an OutputSlot");
331     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
332                      "FloorLayer: TensorInfo must be set on connected OutputSlot.");
333
334     // input and output shapes are the same
335     IOutputSlot* input = GetInputSlot(0).GetConnection();
336     TensorShape const& outShape = input->GetTensorInfo().GetShape();
337     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
338                      "FloorLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
339 }
340
341 FullyConnectedLayer::FullyConnectedLayer(const FullyConnectedDescriptor& param, const char* name)
342     : LayerWithParameters(1, 1, LayerType::FullyConnected, param, name)
343 {
344 }
345
346 std::unique_ptr<IWorkload> FullyConnectedLayer::CreateWorkload(const Graph& graph,
347                                                                const IWorkloadFactory& factory) const
348 {
349     FullyConnectedQueueDescriptor descriptor;
350
351     descriptor.m_Weight = m_Weight.get();
352     if (m_Param.m_BiasEnabled)
353     {
354         descriptor.m_Bias = m_Bias.get();
355     }
356     return factory.CreateFullyConnected(descriptor, PrepInfoAndDesc(descriptor, graph));
357 }
358
359 FullyConnectedLayer* FullyConnectedLayer::Clone(Graph& graph) const
360 {
361     auto layer = CloneBase<FullyConnectedLayer>(graph, m_Param, GetName());
362
363     layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
364     if (layer->m_Param.m_BiasEnabled)
365     {
366         layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
367     }
368
369     return std::move(layer);
370 }
371
372 void FullyConnectedLayer::ValidateTensorShapesFromInputs()
373 {
374     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
375                      "FullyConnectedLayer: InputSlot must be connected to an OutputSlot");
376     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
377                      "FullyConnectedLayer: TensorInfo must be set on connected OutputSlot.");
378
379
380     TensorShape const& weightShape = m_Weight->GetTensorInfo().GetShape();
381
382     // output for FC is [1, w[1]]
383     unsigned int batches = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape()[0];
384     unsigned int dimIdx = m_Param.m_TransposeWeightMatrix ? 0 : 1;
385     TensorShape outShape({batches, weightShape[dimIdx]});
386
387     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
388                      "FullyConnectedLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
389 }
390
391 InputLayer::InputLayer(LayerBindingId id, const char* name)
392     : BindableLayer(0, 1, LayerType::Input, name, id)
393 {
394 }
395
396 std::unique_ptr<IWorkload> InputLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
397 {
398     return nullptr;
399 }
400
401 InputLayer* InputLayer::Clone(Graph& graph) const
402 {
403     return CloneBase<InputLayer>(graph, GetBindingId(), GetName());
404 }
405
406 void InputLayer::ValidateTensorShapesFromInputs()
407 {
408     //The input layer should already have it's inputs set during graph building phase in the driver/parser.
409     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).IsTensorInfoSet(),
410                                                "InputLayer should already have the TensorInfo set.");
411 }
412
413
414 MergerLayer::MergerLayer(const OriginsDescriptor& param, const char* name)
415     : LayerWithParameters(param.GetNumViews(), 1, LayerType::Merger, param, name)
416 {
417 }
418
419 std::unique_ptr<IWorkload> MergerLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
420 {
421     MergerQueueDescriptor descriptor;
422
423     // copy the view origins to the descriptor
424     descriptor.m_ViewOrigins.reserve(m_Param.GetNumViews());
425     for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
426     {
427         descriptor.m_ViewOrigins.emplace_back(
428             std::vector<unsigned int>(m_Param.GetViewOrigin(i), m_Param.GetViewOrigin(i) + m_Param.GetNumDimensions()));
429     }
430
431     return factory.CreateMerger(descriptor, PrepInfoAndDesc(descriptor, graph));
432 }
433
434 void MergerLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory)
435 {
436     //if sub tensors are supported than the merger
437     //just needs to make sure that the outputs of the prev layer
438     //are made subtensors of the output of the merger layer
439     m_OutputHandlers[0].CreateTensorHandles(factory);
440     if (factory.SupportsSubTensors())
441     {
442         const unsigned int numInputSlots = GetNumInputSlots();
443         for (unsigned int i = 0; i < numInputSlots; ++i)
444         {
445             OutputHandler& outputHandler = GetInputSlot(i).GetConnectedOutputSlot()->GetOutputHandler();
446
447             outputHandler.SetData(factory.CreateSubTensorHandle(*m_OutputHandlers[0].GetData(),
448                                                                 outputHandler.GetTensorInfo().GetShape(),
449                                                                 m_Param.GetViewOrigin(i)));
450         }
451     }
452 }
453
454 MergerLayer* MergerLayer::Clone(Graph& graph) const
455 {
456     return CloneBase<MergerLayer>(graph, m_Param, GetName());
457 }
458
459 void MergerLayer::ValidateTensorShapesFromInputs()
460 {
461     // Validate Merger layer
462     ConditionalThrow<LayerValidationException>(m_Param.GetNumViews() == GetNumInputSlots(),
463                      "MergerLayer: Num Inputs must match num views.");
464
465     unsigned int numDims = m_Param.GetNumDimensions();
466     for (unsigned int i=0; i<GetNumInputSlots(); i++)
467     {
468         auto& inputInfo = GetInputSlot(i).GetConnection()->GetTensorInfo();
469
470         boost::ignore_unused(inputInfo);
471         ConditionalThrow<LayerValidationException>(numDims == inputInfo.GetNumDimensions(),
472                          "MergerLayer: Num Dimensions must match all inputs.");
473     }
474
475     // Find the bounding box (extents) of all the views
476     std::vector<unsigned int> extentMin(numDims);
477     std::vector<unsigned int> extentMax(numDims);
478     for (unsigned int i = 0; i < GetNumInputSlots(); i++)
479     {
480         const uint32_t* origin = m_Param.GetViewOrigin(i);
481         const armnn::TensorShape& shape = GetInputSlot(i).GetConnection()->GetTensorInfo().GetShape();
482         for (unsigned int d = 0; d < numDims; d++)
483         {
484             extentMin[d] = std::min(extentMin[d], origin[d]);
485             extentMax[d] = std::max(extentMax[d], origin[d] + shape[d]);
486         }
487     }
488
489     // Check that the bounding box starts at the origin
490     if (!std::all_of(extentMin.begin(), extentMin.end(), [](unsigned int s) { return s == 0; }))
491     {
492         throw LayerValidationException("MergerLayer: there is no view that starts at the origin");
493     }
494
495     // Check that there are no overlaps of views (this would lead to undefined output at those locations).
496     // Check each pair of views against each other
497     // (and don't bother to check against self, or check the same pair both ways round)
498     for (unsigned int a = 0; a < GetNumInputSlots(); a++)
499     {
500         const uint32_t* aOrigin = m_Param.GetViewOrigin(a);
501         const armnn::TensorShape& aShape = GetInputSlot(a).GetConnection()->GetTensorInfo().GetShape();
502         for (unsigned int b = 0; b < a; b++)
503         {
504             const uint32_t* bOrigin = m_Param.GetViewOrigin(b);
505             const armnn::TensorShape& bShape = GetInputSlot(b).GetConnection()->GetTensorInfo().GetShape();
506
507             bool allAxesOverlap = true;
508             for (unsigned int d = 0; d < numDims && allAxesOverlap; d++)
509             {
510                 unsigned int a1 = aOrigin[d];
511                 unsigned int a2 = aOrigin[d] + aShape[d];
512
513                 unsigned int b1 = bOrigin[d];
514                 unsigned int b2 = bOrigin[d] + bShape[d];
515
516                 if (a2 <= b1 || b2 <= a1)
517                 {
518                     allAxesOverlap = false;
519                 }
520             }
521             if (allAxesOverlap)
522             {
523                 throw LayerValidationException("MergerLayer: Some views overlap.");
524             }
525         }
526     }
527
528     // Check that there are no "holes", i.e. regions of the output which is not covered by a view.
529     // Because we already checked that there are no overlaps, this can be done simply by checking that
530     // the total 'volume' of the views is the same as the output.
531     unsigned int totalViewsVolume = 0;
532     for (unsigned int i = 0; i < GetNumInputSlots(); i++)
533     {
534         totalViewsVolume += GetInputSlot(i).GetConnection()->GetTensorInfo().GetNumElements();
535     }
536     unsigned int outputVolume = 1;
537     for (unsigned int d = 0; d < numDims; d++)
538     {
539         outputVolume *= (extentMax[d] - extentMin[d]);
540     }
541     if (totalViewsVolume != outputVolume)
542     {
543         throw LayerValidationException("MergerLayer: there are some gaps between views");
544     }
545
546     TensorShape outShape(numDims, extentMax.data());
547     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
548                      "MergerLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
549 }
550
551 MultiplicationLayer::MultiplicationLayer(const char* name)
552     : Layer(2, 1, LayerType::Multiplication, name)
553 {
554 }
555
556 std::unique_ptr<IWorkload> MultiplicationLayer::CreateWorkload(const Graph&            graph,
557                                                                const IWorkloadFactory& factory) const
558 {
559     MultiplicationQueueDescriptor descriptor;
560
561     return factory.CreateMultiplication(descriptor, PrepInfoAndDesc(descriptor, graph));
562 }
563
564 MultiplicationLayer* MultiplicationLayer::Clone(Graph& graph) const
565 {
566     return CloneBase<MultiplicationLayer>(graph, GetName());
567 }
568
569 void MultiplicationLayer::ValidateTensorShapesFromInputs()
570 {
571     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() ==
572                      GetInputSlot(1).GetConnection()->GetTensorInfo().GetShape(),
573                      "MultiplicationLayer: Inputs must match");
574
575     TensorInfo infoOut(GetInputSlot(0).GetConnection()->GetTensorInfo());
576     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(infoOut.GetShape()),
577                      "MultiplicationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
578 }
579
580 NormalizationLayer::NormalizationLayer(const NormalizationDescriptor& param, const char* name)
581     : LayerWithParameters(1, 1, LayerType::Normalization, param, name)
582 {
583 }
584
585 std::unique_ptr<IWorkload> NormalizationLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
586 {
587     NormalizationQueueDescriptor descriptor;
588     return factory.CreateNormalization(descriptor, PrepInfoAndDesc(descriptor, graph));
589 }
590
591 NormalizationLayer* NormalizationLayer::Clone(Graph& graph) const
592 {
593     return CloneBase<NormalizationLayer>(graph, m_Param, GetName());
594 }
595
596 void NormalizationLayer::ValidateTensorShapesFromInputs()
597 {
598     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
599                                                "NormalizationLayer: Input slot must be connected.");
600
601     const TensorShape& outShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape();
602     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
603                      "NormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
604 }
605
606 OutputLayer::OutputLayer(LayerBindingId id, const char* name)
607     : BindableLayer(1, 0, LayerType::Output, name, id)
608 {
609 }
610
611 std::unique_ptr<IWorkload> OutputLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
612 {
613     return nullptr;
614 }
615
616 OutputLayer* OutputLayer::Clone(Graph& graph) const
617 {
618     return CloneBase<OutputLayer>(graph, GetBindingId(), GetName());
619 }
620
621 void OutputLayer::ValidateTensorShapesFromInputs()
622 {
623     // Just validate the input is connected
624     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
625                                                "OutputLayer: Input slot must be connected.");
626 }
627
628 PermuteLayer::PermuteLayer(const PermuteDescriptor& param, const char* name)
629     : LayerWithParameters(1, 1, LayerType::Permute, param, name)
630 {
631 }
632
633 std::unique_ptr<IWorkload> PermuteLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
634 {
635     PermuteQueueDescriptor descriptor;
636     return factory.CreatePermute(descriptor, PrepInfoAndDesc(descriptor, graph));
637 }
638
639 PermuteLayer* PermuteLayer::Clone(Graph& graph) const
640 {
641     return CloneBase<PermuteLayer>(graph, m_Param, GetName());
642 }
643
644 void PermuteLayer::ValidateTensorShapesFromInputs()
645 {
646     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
647                      "PermuteLayer: InputSlot must be connected to an OutputSlot");
648     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
649                      "PermuteLayer: TensorInfo must be set on connected InputSlot.");
650
651     const TensorInfo& infoIn = GetInputSlot(0).GetConnection()->GetTensorInfo();
652     TensorShape shapeOut = armnnUtils::Permuted(infoIn.GetShape(), m_Param.m_DimMappings);
653     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut),
654                      "PermuteLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
655 }
656
657 Pooling2dLayer::Pooling2dLayer(const Pooling2dDescriptor& param, const char* name)
658     : LayerWithParameters(1, 1, LayerType::Pooling2d, param, name)
659 {
660 }
661
662 std::unique_ptr<IWorkload> Pooling2dLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
663 {
664     Pooling2dQueueDescriptor descriptor;
665     return factory.CreatePooling2d(descriptor, PrepInfoAndDesc(descriptor, graph));
666 }
667
668 Pooling2dLayer* Pooling2dLayer::Clone(Graph& graph) const
669 {
670     return CloneBase<Pooling2dLayer>(graph, m_Param, GetName());
671 }
672
673 void Pooling2dLayer::ValidateTensorShapesFromInputs()
674 {
675     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
676                      "Pooling2dLayer: InputSlot must be connected to an OutputSlot");
677     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
678                      "Pooling2dLayer: TensorInfo must be set on connected InputSlot.");
679
680     IOutputSlot* input = GetInputSlot(0).GetConnection();
681     const TensorShape& inputShape = input->GetTensorInfo().GetShape();
682
683     // If we support multiple batch dimensions in the future, then this assert will need to change.
684     BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Pooling2dLayer will always have 4D input.");
685
686
687     unsigned int inWidth = inputShape[3];
688     unsigned int inHeight = inputShape[2];
689     unsigned int inChannels = inputShape[1];
690     unsigned int inBatchSize = inputShape[0];
691
692     bool isGlobalPooling = (m_Param.m_StrideX==0 && m_Param.m_StrideY==0);
693     unsigned int outWidth = 1;
694     unsigned int outHeight = 1;
695     if (!isGlobalPooling)
696     {
697         BOOST_ASSERT_MSG(m_Param.m_StrideX!=0 && m_Param.m_StrideY!=0,
698                          "Stride can only be zero when performing global pooling");
699
700         auto CalcSize = [](auto inSize, auto lowPad, auto highPad, auto poolSize, auto stride, auto padMethod,
701                            auto outputShapeRounding)
702             {
703                 unsigned int readSize = inSize + lowPad + highPad - poolSize;
704                 float div = static_cast<float>(readSize) / static_cast<float>(stride);
705
706                 unsigned int size = 0;
707                 switch (outputShapeRounding)
708                 {
709                     case OutputShapeRounding::Ceiling:
710                         size = static_cast<unsigned int>(ceil(div)) + 1;
711                         break;
712                     case OutputShapeRounding ::Floor:
713                         size = static_cast<unsigned int>(floor(div)) + 1;
714                         break;
715                     default:
716                         BOOST_ASSERT_MSG(false, "Unsupported Output Shape Rounding");
717                 }
718
719                 // Make sure that border operations will start from inside the input and not the padded area
720                 // This is what both Caffe and CL does...
721                 if ((size - 1)*stride >= inSize + lowPad)
722                 {
723                     --size;
724                 }
725
726                 return size;
727             };
728
729         outWidth = CalcSize(inWidth, m_Param.m_PadLeft, m_Param.m_PadRight, m_Param.m_PoolWidth, m_Param.m_StrideX,
730                             m_Param.m_PaddingMethod, m_Param.m_OutputShapeRounding);
731         outHeight= CalcSize(inHeight, m_Param.m_PadTop, m_Param.m_PadBottom, m_Param.m_PoolHeight, m_Param.m_StrideY,
732                             m_Param.m_PaddingMethod, m_Param.m_OutputShapeRounding);
733
734
735     }
736     unsigned int outChannels = inChannels;
737     unsigned int outBatchSize = inBatchSize;
738
739     TensorShape shapeOut({outBatchSize, outChannels, outHeight, outWidth});
740
741     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut),
742                "Pooling2dLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
743 }
744
745 SoftmaxLayer::SoftmaxLayer(const SoftmaxDescriptor &param, const char* name)
746     : LayerWithParameters(1, 1, LayerType::Softmax, param, name)
747 {
748 }
749
750 std::unique_ptr<IWorkload> SoftmaxLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
751 {
752     SoftmaxQueueDescriptor descriptor;
753     return factory.CreateSoftmax(descriptor, PrepInfoAndDesc(descriptor, graph));
754 }
755
756 SoftmaxLayer* SoftmaxLayer::Clone(Graph& graph) const
757 {
758     return CloneBase<SoftmaxLayer>(graph, m_Param, GetName());
759 }
760
761 void SoftmaxLayer::ValidateTensorShapesFromInputs()
762 {
763     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
764                                                "SoftmaxLayer: Input slot must be connected.");
765     const TensorShape& outShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape();
766     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
767                      "SoftmaxLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
768 }
769
770 SplitterLayer::SplitterLayer(const ViewsDescriptor& param, const char* name)
771     : LayerWithParameters(1, param.GetNumViews(), LayerType::Splitter, param, name)
772 {
773 }
774
775 std::unique_ptr<IWorkload> SplitterLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
776 {
777     SplitterQueueDescriptor descriptor;
778
779     // copy the window origins to the descriptor
780     for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
781     {
782         descriptor.m_ViewOrigins.emplace_back(
783             std::vector<unsigned int>(m_Param.GetViewOrigin(i), m_Param.GetViewOrigin(i) + m_Param.GetNumDimensions()));
784     }
785
786     return factory.CreateSplitter(descriptor, PrepInfoAndDesc(descriptor, graph));
787 }
788
789 void SplitterLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory)
790 {
791     //if sub tensors are supported than all the "splitter" need to do is to
792     //set the outputs to be appropriate sub tensors of the input.
793     if (factory.SupportsSubTensors())
794     {
795         const OutputHandler& outputHandler = GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
796
797         ITensorHandle* inputData = outputHandler.GetData();
798         //create the outputs as subtensors of the input
799         for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
800         {
801             m_OutputHandlers[i].SetData(factory.CreateSubTensorHandle(*inputData,
802                                                                       m_OutputHandlers[i].GetTensorInfo().GetShape(),
803                                                                       m_Param.GetViewOrigin(i)));
804         }
805     }
806     else
807     {
808         for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
809         {
810             m_OutputHandlers[i].CreateTensorHandles(factory);
811         }
812     }
813 }
814
815 SplitterLayer* SplitterLayer::Clone(Graph& graph) const
816 {
817     return CloneBase<SplitterLayer>(graph, m_Param, GetName());
818 }
819
820 void SplitterLayer::ValidateTensorShapesFromInputs()
821 {
822     //Output shapes must match View shapes.
823     for (unsigned int viewIdx = 0; viewIdx < m_Param.GetNumViews(); viewIdx++)
824     {
825         const uint32_t* sizes = m_Param.GetViewSizes(viewIdx);
826
827         TensorShape outShape(m_Param.GetNumDimensions(), sizes);
828         ConditionalThrow<LayerValidationException>(GetOutputSlot(viewIdx).ValidateTensorShape(outShape),
829                          "SplitterLayer: View sizes must match output tensor shapes.");
830     }
831 }
832
833 MemCopyLayer::MemCopyLayer(const char* name)
834     : Layer(1, 1, LayerType::MemCopy, name)
835 {
836 }
837
838 MemCopyLayer* MemCopyLayer::Clone(Graph& graph) const
839 {
840     return CloneBase<MemCopyLayer>(graph, GetName());
841 }
842
843 std::unique_ptr<IWorkload> MemCopyLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
844 {
845     MemCopyQueueDescriptor descriptor;
846     return factory.CreateMemCopy(descriptor, PrepInfoAndDesc(descriptor, graph));
847 }
848
849 void MemCopyLayer::ValidateTensorShapesFromInputs()
850 {
851     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
852                      "MemCopyLayer: InputSlot must be connected to an OutputSlot");
853     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
854                      "MemCopyLayer: TensorInfo must be set on connected OutputSlot.");
855
856
857     IOutputSlot* input = GetInputSlot(0).GetConnection();
858
859     // input and output shapes are the same
860     TensorShape const& outShape = input->GetTensorInfo().GetShape();
861     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
862                      "MemCopyLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
863 }
864
865 ResizeBilinearLayer::ResizeBilinearLayer(const ResizeBilinearDescriptor& param, const char* name)
866     : LayerWithParameters(1, 1, LayerType::ResizeBilinear, param, name)
867 {
868 }
869
870 std::unique_ptr<IWorkload> ResizeBilinearLayer::CreateWorkload(const Graph& graph,
871                                                                const IWorkloadFactory& factory) const
872 {
873     ResizeBilinearQueueDescriptor descriptor;
874     return factory.CreateResizeBilinear(descriptor, PrepInfoAndDesc(descriptor, graph));
875 }
876
877 ResizeBilinearLayer* ResizeBilinearLayer::Clone(Graph& graph) const
878 {
879     return CloneBase<ResizeBilinearLayer>(graph, m_Param, GetName());
880 }
881
882 void ResizeBilinearLayer::ValidateTensorShapesFromInputs()
883 {
884     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
885                      "MemCopyLayer: InputSlot must be connected to an OutputSlot");
886     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
887                      "MemCopyLayer: TensorInfo must be set on connected OutputSlot.");
888
889     const TensorShape& inputShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape();
890     unsigned int outWidth = m_Param.m_TargetWidth;
891     unsigned int outHeight = m_Param.m_TargetHeight;
892     unsigned int outChannels = inputShape[1];
893     unsigned int outBatch = inputShape[0];
894     TensorShape outShape({outBatch, outChannels, outHeight, outWidth});
895     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
896                      "ResizeBilinearLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
897 }
898
899 L2NormalizationLayer::L2NormalizationLayer(const char* name)
900     : Layer(1, 1, LayerType::L2Normalization, name)
901 {
902 }
903
904 std::unique_ptr<IWorkload> L2NormalizationLayer::CreateWorkload(const Graph& graph,
905     const IWorkloadFactory& factory) const
906 {
907     L2NormalizationQueueDescriptor descriptor;
908     return factory.CreateL2Normalization(descriptor, PrepInfoAndDesc(descriptor, graph));
909 }
910
911 L2NormalizationLayer* L2NormalizationLayer::Clone(Graph& graph) const
912 {
913     return CloneBase<L2NormalizationLayer>(graph, GetName());
914 }
915
916 void L2NormalizationLayer::ValidateTensorShapesFromInputs()
917 {
918     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
919                      "L2NormalizationLayer: InputSlot must be connected to an OutputSlot");
920     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
921                      "L2NormalizationLayer: TensorInfo must be set on connected OutputSlot.");
922
923     IOutputSlot* input = GetInputSlot(0).GetConnection();
924
925     // input and output shapes are the same
926     TensorShape const& outShape = input->GetTensorInfo().GetShape();
927     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
928                      "L2NormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
929 }
930
931 ConstantLayer::ConstantLayer(const std::shared_ptr<ScopedCpuTensorHandle>& input, const char* name)
932     : Layer(0, 1, LayerType::Constant, name)
933     , m_LayerOutput(input)
934 {
935 }
936
937 std::unique_ptr<IWorkload> ConstantLayer::CreateWorkload(const Graph& graph,
938     const IWorkloadFactory& factory) const
939 {
940     ConstantQueueDescriptor descriptor;
941     descriptor.m_LayerOutput = m_LayerOutput.get();
942     return factory.CreateConstant(descriptor, PrepInfoAndDesc(descriptor, graph));
943 }
944
945 ConstantLayer* ConstantLayer::Clone(Graph& graph) const
946 {
947     // Cloned layers share the same layer output object
948     return CloneBase<ConstantLayer>(graph, m_LayerOutput, GetName());
949 }
950
951 void ConstantLayer::ValidateTensorShapesFromInputs()
952 {
953     // get the output shape from the value of the constant layer
954     TensorShape const& outShape = m_LayerOutput->GetTensorInfo().GetShape();
955     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
956                      "ConstantLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
957 }
958
959 ReshapeLayer::ReshapeLayer(const ReshapeDescriptor& param, const char* name)
960     : LayerWithParameters(1, 1, LayerType::Reshape, param, name)
961 {
962 }
963
964 std::unique_ptr<IWorkload> ReshapeLayer::CreateWorkload(const Graph& graph,
965     const IWorkloadFactory& factory) const
966 {
967     ReshapeQueueDescriptor descriptor;
968     return factory.CreateReshape(descriptor, PrepInfoAndDesc(descriptor, graph));
969 }
970
971 ReshapeLayer* ReshapeLayer::Clone(Graph& graph) const
972 {
973     return CloneBase<ReshapeLayer>(graph, m_Param, GetName());
974 }
975
976 void ReshapeLayer::ValidateTensorShapesFromInputs()
977 {
978     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
979                      "ReshapeLayer: InputSlot must be connected to an OutputSlot");
980     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
981                      "ReshapeLayer: TensorInfo must be set on connected OutputSlot.");
982     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(m_Param.m_TargetShape),
983                      "ReshapeLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
984 }
985
986 }