2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // See LICENSE file in the project root for full license information.
8 #include "backends/CpuTensorHandle.hpp"
9 #include "backends/Workload.hpp"
10 #include "backends/WorkloadFactory.hpp"
12 #include "Permute.hpp"
20 template <typename LayerType, typename ... Params>
21 LayerType* Layer::CloneBase(Graph& graph, Params&& ... params) const
23 LayerType* const layer = graph.AddLayer<LayerType>(std::forward<Params>(params)...);
25 layer->SetComputeDevice(m_ComputeDevice);
26 layer->SetGuid(GetGuid());
31 ActivationLayer::ActivationLayer(const ActivationDescriptor& param, const char* name)
32 : LayerWithParameters(1, 1, LayerType::Activation, param, name)
36 std::unique_ptr<IWorkload> ActivationLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
38 ActivationQueueDescriptor descriptor;
39 return factory.CreateActivation(descriptor, PrepInfoAndDesc(descriptor, graph));
42 ActivationLayer* ActivationLayer::Clone(Graph& graph) const
44 return CloneBase<ActivationLayer>(graph, m_Param, GetName());
47 void ActivationLayer::ValidateTensorShapesFromInputs()
49 auto& info = GetInputSlot(0).GetConnection()->GetTensorInfo();
50 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(info.GetShape()),
51 "ActivationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
54 AdditionLayer::AdditionLayer(const char* name)
55 : Layer(2, 1, LayerType::Addition, name)
59 std::unique_ptr<IWorkload> AdditionLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
61 AdditionQueueDescriptor descriptor;
62 return factory.CreateAddition(descriptor, PrepInfoAndDesc(descriptor, graph));
65 AdditionLayer* AdditionLayer::Clone(Graph& graph) const
67 return CloneBase<AdditionLayer>(graph, GetName());
70 void AdditionLayer::ValidateTensorShapesFromInputs()
72 auto& input0 = GetInputSlot(0).GetConnection()->GetTensorInfo();
73 auto& input1 = GetInputSlot(1).GetConnection()->GetTensorInfo();
75 // Get the max of the inputs
76 BOOST_ASSERT(input0.GetNumDimensions() == input1.GetNumDimensions());
77 unsigned int numDims = input0.GetNumDimensions();
78 std::vector<unsigned int> dims(numDims);
80 // validate inputs are broadcast compatible
82 for (unsigned int i = 0; i < numDims; i++)
84 unsigned int dim0 = input0.GetShape()[i];
85 unsigned int dim1 = input1.GetShape()[i];
88 BOOST_ASSERT_MSG(dim0 == 1 || dim1 == 1, "Dimensions should either match or one should be of size 1.");
93 for (unsigned int i = 0; i < numDims; i++)
95 unsigned int dim0 = input0.GetShape()[i];
96 unsigned int dim1 = input1.GetShape()[i];
97 dims[i] = std::max(dim0, dim1);
100 TensorShape outShape(numDims, dims.data());
101 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
102 "AdditionLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
105 BatchNormalizationLayer::BatchNormalizationLayer(const armnn::BatchNormalizationDescriptor& param, const char* name)
106 : LayerWithParameters(1, 1, LayerType::BatchNormalization, param, name)
110 std::unique_ptr<IWorkload> BatchNormalizationLayer::CreateWorkload(const Graph& graph,
111 const IWorkloadFactory& factory) const
113 BatchNormalizationQueueDescriptor descriptor;
115 descriptor.m_Mean = m_Mean.get();
116 descriptor.m_Variance = m_Variance.get();
117 descriptor.m_Beta = m_Beta.get();
118 descriptor.m_Gamma = m_Gamma.get();
119 return factory.CreateBatchNormalization(descriptor, PrepInfoAndDesc(descriptor, graph));
122 BatchNormalizationLayer* BatchNormalizationLayer::Clone(Graph& graph) const
124 auto layer = CloneBase<BatchNormalizationLayer>(graph, m_Param, GetName());
126 layer->m_Mean = m_Mean ? std::make_unique<ScopedCpuTensorHandle>(*m_Mean) : nullptr;
127 layer->m_Variance = m_Variance ? std::make_unique<ScopedCpuTensorHandle>(*m_Variance) : nullptr;
128 layer->m_Beta = m_Beta ? std::make_unique<ScopedCpuTensorHandle>(*m_Beta) : nullptr;
129 layer->m_Gamma = m_Gamma ? std::make_unique<ScopedCpuTensorHandle>(*m_Gamma) : nullptr;
131 return std::move(layer);
134 void BatchNormalizationLayer::ValidateTensorShapesFromInputs()
136 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
137 "BatchNormalizationLayer: InputSlot must be connected to an OutputSlot");
138 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
139 "BatchNormalizationLayer: TensorInfo must be set on connected OutputSlot.");
141 auto& info = GetInputSlot(0).GetConnection()->GetTensorInfo();
142 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(info.GetShape()),
143 "BatchNormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
146 Convolution2dLayer::Convolution2dLayer(const Convolution2dDescriptor& param, const char* name)
147 : LayerWithParameters(1, 1, LayerType::Convolution2d, param, name)
151 std::unique_ptr<IWorkload> Convolution2dLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
153 Convolution2dQueueDescriptor descriptor;
155 descriptor.m_Weight = m_Weight.get();
156 if (m_Param.m_BiasEnabled)
158 descriptor.m_Bias = m_Bias.get();
160 return factory.CreateConvolution2d(descriptor, PrepInfoAndDesc(descriptor, graph));
163 Convolution2dLayer* Convolution2dLayer::Clone(Graph& graph) const
165 auto layer = CloneBase<Convolution2dLayer>(graph, m_Param, GetName());
166 layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
168 if (layer->m_Param.m_BiasEnabled)
170 layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
173 return std::move(layer);
176 void Convolution2dLayer::ValidateTensorShapesFromInputs()
178 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
179 "Convolution2dLayer: InputSlot must be connected to an OutputSlot");
180 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
181 "Convolution2dLayer: TensorInfo must be set on connected OutputSlot.");
184 IOutputSlot* input = GetInputSlot(0).GetConnection();
185 const TensorShape& inputShape = input->GetTensorInfo().GetShape();
186 const TensorShape filterShape = m_Weight->GetTensorInfo().GetShape();
188 // If we support multiple batch dimensions in the future, then this assert will need to change.
189 BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Convolutions will always have 4D input.");
191 unsigned int inWidth = inputShape[3];
192 unsigned int inHeight = inputShape[2];
193 unsigned int inBatchSize = inputShape[0];
195 unsigned int filterWidth = filterShape[3];
196 unsigned int readWidth = (inWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - (filterWidth);
197 unsigned int outWidth = 1+(readWidth / m_Param.m_StrideX);
199 unsigned int filterHeight = filterShape[2];
200 unsigned int readHeight = (inHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - (filterHeight);
201 unsigned int outHeight = 1+(readHeight / m_Param.m_StrideY);
203 unsigned int outChannels = filterShape[0];
204 unsigned int outBatchSize = inBatchSize;
206 TensorShape shapeOut({outBatchSize, outChannels, outHeight, outWidth});
207 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut),
208 "Convolution2dLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
212 DepthwiseConvolution2dLayer::DepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor& param,
214 : LayerWithParameters(1, 1, LayerType::DepthwiseConvolution2d, param, name)
218 std::unique_ptr<IWorkload> DepthwiseConvolution2dLayer::CreateWorkload(const Graph& graph,
219 const IWorkloadFactory& factory) const
221 DepthwiseConvolution2dQueueDescriptor descriptor;
223 descriptor.m_Weight = m_Weight.get();
224 if (m_Param.m_BiasEnabled)
226 descriptor.m_Bias = m_Bias.get();
228 return factory.CreateDepthwiseConvolution2d(descriptor, PrepInfoAndDesc(descriptor, graph));
231 DepthwiseConvolution2dLayer* DepthwiseConvolution2dLayer::Clone(Graph& graph) const
233 auto layer = CloneBase<DepthwiseConvolution2dLayer>(graph, m_Param, GetName());
234 layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
236 if (layer->m_Param.m_BiasEnabled)
238 layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
241 return std::move(layer);
244 void DepthwiseConvolution2dLayer::ValidateTensorShapesFromInputs()
246 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
247 "DepthwiseConvolution2dLayer: InputSlot must be connected to an OutputSlot");
248 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
249 "DepthwiseConvolution2dLayer: TensorInfo must be set on connected OutputSlot.");
251 IOutputSlot* input = GetInputSlot(0).GetConnection();
252 const TensorShape& inputShape = input->GetTensorInfo().GetShape();
253 const TensorShape filterShape = m_Weight->GetTensorInfo().GetShape();
255 BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Convolutions will always have 4D input.");
257 unsigned int inWidth = inputShape[3];
258 unsigned int inHeight = inputShape[2];
259 unsigned int inBatchSize = inputShape[0];
261 unsigned int filterWidth = filterShape[3];
262 unsigned int readWidth = (inWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - (filterWidth);
263 unsigned int outWidth = 1+(readWidth / m_Param.m_StrideX);
265 unsigned int filterHeight = filterShape[2];
266 unsigned int readHeight = (inHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - (filterHeight);
267 unsigned int outHeight = 1+(readHeight / m_Param.m_StrideY);
268 unsigned int depthMultiplier = filterShape[0];
270 unsigned int outChannels = filterShape[1]*depthMultiplier;
271 unsigned int outBatchSize = inBatchSize;
273 TensorShape outShape({outBatchSize, outChannels, outHeight, outWidth});
274 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
275 "DepthwiseConvolution2dLayer: "
276 "TensorShape set on OutputSlot[0] does not match the inferred shape.");
279 FakeQuantizationLayer::FakeQuantizationLayer(const FakeQuantizationDescriptor& param, const char* name)
280 : LayerWithParameters(1, 1, LayerType::FakeQuantization, param, name)
284 std::unique_ptr<IWorkload> FakeQuantizationLayer::CreateWorkload(const Graph& graph,
285 const IWorkloadFactory& factory) const
287 FakeQuantizationQueueDescriptor descriptor;
288 return factory.CreateFakeQuantization(descriptor, PrepInfoAndDesc(descriptor, graph) );
291 FakeQuantizationLayer* FakeQuantizationLayer::Clone(Graph& graph) const
293 return CloneBase<FakeQuantizationLayer>(graph, m_Param, GetName());
296 void FakeQuantizationLayer::ValidateTensorShapesFromInputs()
298 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
299 "FakeQuantizationLayer: InputSlot must be connected to an OutputSlot");
300 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
301 "FakeQuantizationLayer: TensorInfo must be set on connected OutputSlot.");
304 IOutputSlot* input = GetInputSlot(0).GetConnection();
306 // input and output shapes are the same
307 TensorShape const& outShape = input->GetTensorInfo().GetShape();
308 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
309 "FakeQuantizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
312 FloorLayer::FloorLayer(const char* name)
313 : Layer(1, 1, LayerType::Floor, name)
317 std::unique_ptr<IWorkload> FloorLayer::CreateWorkload(const Graph& graph,
318 const IWorkloadFactory& factory) const
320 FloorQueueDescriptor descriptor;
321 return factory.CreateFloor(descriptor, PrepInfoAndDesc(descriptor, graph));
324 FloorLayer* FloorLayer::Clone(Graph& graph) const
326 return CloneBase<FloorLayer>(graph, GetName());
329 void FloorLayer::ValidateTensorShapesFromInputs()
331 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
332 "FloorLayer: InputSlot must be connected to an OutputSlot");
333 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
334 "FloorLayer: TensorInfo must be set on connected OutputSlot.");
336 // input and output shapes are the same
337 IOutputSlot* input = GetInputSlot(0).GetConnection();
338 TensorShape const& outShape = input->GetTensorInfo().GetShape();
339 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
340 "FloorLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
343 FullyConnectedLayer::FullyConnectedLayer(const FullyConnectedDescriptor& param, const char* name)
344 : LayerWithParameters(1, 1, LayerType::FullyConnected, param, name)
348 std::unique_ptr<IWorkload> FullyConnectedLayer::CreateWorkload(const Graph& graph,
349 const IWorkloadFactory& factory) const
351 FullyConnectedQueueDescriptor descriptor;
353 descriptor.m_Weight = m_Weight.get();
354 if (m_Param.m_BiasEnabled)
356 descriptor.m_Bias = m_Bias.get();
358 return factory.CreateFullyConnected(descriptor, PrepInfoAndDesc(descriptor, graph));
361 FullyConnectedLayer* FullyConnectedLayer::Clone(Graph& graph) const
363 auto layer = CloneBase<FullyConnectedLayer>(graph, m_Param, GetName());
365 layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
366 if (layer->m_Param.m_BiasEnabled)
368 layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
371 return std::move(layer);
374 void FullyConnectedLayer::ValidateTensorShapesFromInputs()
376 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
377 "FullyConnectedLayer: InputSlot must be connected to an OutputSlot");
378 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
379 "FullyConnectedLayer: TensorInfo must be set on connected OutputSlot.");
382 TensorShape const& weightShape = m_Weight->GetTensorInfo().GetShape();
384 // output for FC is [1, w[1]]
385 unsigned int batches = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape()[0];
386 unsigned int dimIdx = m_Param.m_TransposeWeightMatrix ? 0 : 1;
387 TensorShape outShape({batches, weightShape[dimIdx]});
389 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
390 "FullyConnectedLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
393 InputLayer::InputLayer(LayerBindingId id, const char* name)
394 : BindableLayer(0, 1, LayerType::Input, name, id)
398 std::unique_ptr<IWorkload> InputLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
403 InputLayer* InputLayer::Clone(Graph& graph) const
405 return CloneBase<InputLayer>(graph, GetBindingId(), GetName());
408 void InputLayer::ValidateTensorShapesFromInputs()
410 //The input layer should already have it's inputs set during graph building phase in the driver/parser.
411 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).IsTensorInfoSet(),
412 "InputLayer should already have the TensorInfo set.");
416 MergerLayer::MergerLayer(const OriginsDescriptor& param, const char* name)
417 : LayerWithParameters(param.GetNumViews(), 1, LayerType::Merger, param, name)
421 std::unique_ptr<IWorkload> MergerLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
423 MergerQueueDescriptor descriptor;
425 // copy the view origins to the descriptor
426 descriptor.m_ViewOrigins.reserve(m_Param.GetNumViews());
427 for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
429 descriptor.m_ViewOrigins.emplace_back(
430 std::vector<unsigned int>(m_Param.GetViewOrigin(i), m_Param.GetViewOrigin(i) + m_Param.GetNumDimensions()));
433 return factory.CreateMerger(descriptor, PrepInfoAndDesc(descriptor, graph));
436 void MergerLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory)
438 //if sub tensors are supported than the merger
439 //just needs to make sure that the outputs of the prev layer
440 //are made subtensors of the output of the merger layer
441 m_OutputHandlers[0].CreateTensorHandles(factory);
442 if (factory.SupportsSubTensors())
444 std::queue<MergerLayer*> m_MergerLayers;
446 m_MergerLayers.push(this);
447 while (!m_MergerLayers.empty())
449 MergerLayer* currentLayer = m_MergerLayers.front();
450 ITensorHandle* parentTensor = currentLayer->GetOutputHandler(0).GetData();
452 m_MergerLayers.pop();
454 const unsigned int numInputSlots = currentLayer->GetNumInputSlots();
455 for (unsigned int i = 0; i < numInputSlots; ++i)
457 OutputSlot* slot = currentLayer->GetInputSlot(i).GetConnectedOutputSlot();
458 OutputHandler& outputHandler = slot->GetOutputHandler();
459 outputHandler.SetData(factory.CreateSubTensorHandle(*parentTensor,
460 outputHandler.GetTensorInfo().GetShape(),
461 currentLayer->m_Param.GetViewOrigin(i)));
463 Layer& inputLayer = slot->GetOwningLayer();
464 if (inputLayer.GetType() == LayerType::Merger)
466 m_MergerLayers.push(boost::polymorphic_downcast<MergerLayer*>(&inputLayer));
473 MergerLayer* MergerLayer::Clone(Graph& graph) const
475 return CloneBase<MergerLayer>(graph, m_Param, GetName());
478 void MergerLayer::ValidateTensorShapesFromInputs()
480 // Validate Merger layer
481 ConditionalThrow<LayerValidationException>(m_Param.GetNumViews() == GetNumInputSlots(),
482 "MergerLayer: Num Inputs must match num views.");
484 unsigned int numDims = m_Param.GetNumDimensions();
485 for (unsigned int i=0; i<GetNumInputSlots(); i++)
487 auto& inputInfo = GetInputSlot(i).GetConnection()->GetTensorInfo();
489 boost::ignore_unused(inputInfo);
490 ConditionalThrow<LayerValidationException>(numDims == inputInfo.GetNumDimensions(),
491 "MergerLayer: Num Dimensions must match all inputs.");
494 // Find the bounding box (extents) of all the views
495 std::vector<unsigned int> extentMin(numDims);
496 std::vector<unsigned int> extentMax(numDims);
497 for (unsigned int i = 0; i < GetNumInputSlots(); i++)
499 const uint32_t* origin = m_Param.GetViewOrigin(i);
500 const armnn::TensorShape& shape = GetInputSlot(i).GetConnection()->GetTensorInfo().GetShape();
501 for (unsigned int d = 0; d < numDims; d++)
503 extentMin[d] = std::min(extentMin[d], origin[d]);
504 extentMax[d] = std::max(extentMax[d], origin[d] + shape[d]);
508 // Check that the bounding box starts at the origin
509 if (!std::all_of(extentMin.begin(), extentMin.end(), [](unsigned int s) { return s == 0; }))
511 throw LayerValidationException("MergerLayer: there is no view that starts at the origin");
514 // Check that there are no overlaps of views (this would lead to undefined output at those locations).
515 // Check each pair of views against each other
516 // (and don't bother to check against self, or check the same pair both ways round)
517 for (unsigned int a = 0; a < GetNumInputSlots(); a++)
519 const uint32_t* aOrigin = m_Param.GetViewOrigin(a);
520 const armnn::TensorShape& aShape = GetInputSlot(a).GetConnection()->GetTensorInfo().GetShape();
521 for (unsigned int b = 0; b < a; b++)
523 const uint32_t* bOrigin = m_Param.GetViewOrigin(b);
524 const armnn::TensorShape& bShape = GetInputSlot(b).GetConnection()->GetTensorInfo().GetShape();
526 bool allAxesOverlap = true;
527 for (unsigned int d = 0; d < numDims && allAxesOverlap; d++)
529 unsigned int a1 = aOrigin[d];
530 unsigned int a2 = aOrigin[d] + aShape[d];
532 unsigned int b1 = bOrigin[d];
533 unsigned int b2 = bOrigin[d] + bShape[d];
535 if (a2 <= b1 || b2 <= a1)
537 allAxesOverlap = false;
542 throw LayerValidationException("MergerLayer: Some views overlap.");
547 // Check that there are no "holes", i.e. regions of the output which is not covered by a view.
548 // Because we already checked that there are no overlaps, this can be done simply by checking that
549 // the total 'volume' of the views is the same as the output.
550 unsigned int totalViewsVolume = 0;
551 for (unsigned int i = 0; i < GetNumInputSlots(); i++)
553 totalViewsVolume += GetInputSlot(i).GetConnection()->GetTensorInfo().GetNumElements();
555 unsigned int outputVolume = 1;
556 for (unsigned int d = 0; d < numDims; d++)
558 outputVolume *= (extentMax[d] - extentMin[d]);
560 if (totalViewsVolume != outputVolume)
562 throw LayerValidationException("MergerLayer: there are some gaps between views");
565 TensorShape outShape(numDims, extentMax.data());
566 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
567 "MergerLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
570 MultiplicationLayer::MultiplicationLayer(const char* name)
571 : Layer(2, 1, LayerType::Multiplication, name)
575 std::unique_ptr<IWorkload> MultiplicationLayer::CreateWorkload(const Graph& graph,
576 const IWorkloadFactory& factory) const
578 MultiplicationQueueDescriptor descriptor;
580 return factory.CreateMultiplication(descriptor, PrepInfoAndDesc(descriptor, graph));
583 MultiplicationLayer* MultiplicationLayer::Clone(Graph& graph) const
585 return CloneBase<MultiplicationLayer>(graph, GetName());
588 void MultiplicationLayer::ValidateTensorShapesFromInputs()
590 auto& input0 = GetInputSlot(0).GetConnection()->GetTensorInfo();
591 auto& input1 = GetInputSlot(1).GetConnection()->GetTensorInfo();
593 // Get the max of the inputs
594 BOOST_ASSERT(input0.GetNumDimensions() == input1.GetNumDimensions());
595 unsigned int numDims = input0.GetNumDimensions();
596 std::vector<unsigned int> dims(numDims);
598 // validate inputs are broadcast compatible
600 for (unsigned int i = 0; i < numDims; i++)
602 unsigned int dim0 = input0.GetShape()[i];
603 unsigned int dim1 = input1.GetShape()[i];
606 BOOST_ASSERT_MSG(dim0 == 1 || dim1 == 1, "Dimensions should either match or one should be of size 1.");
611 for (unsigned int i = 0; i < numDims; i++)
613 unsigned int dim0 = input0.GetShape()[i];
614 unsigned int dim1 = input1.GetShape()[i];
615 dims[i] = std::max(dim0, dim1);
618 TensorShape outShape(numDims, dims.data());
619 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
620 "MultiplicationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
623 NormalizationLayer::NormalizationLayer(const NormalizationDescriptor& param, const char* name)
624 : LayerWithParameters(1, 1, LayerType::Normalization, param, name)
628 std::unique_ptr<IWorkload> NormalizationLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
630 NormalizationQueueDescriptor descriptor;
631 return factory.CreateNormalization(descriptor, PrepInfoAndDesc(descriptor, graph));
634 NormalizationLayer* NormalizationLayer::Clone(Graph& graph) const
636 return CloneBase<NormalizationLayer>(graph, m_Param, GetName());
639 void NormalizationLayer::ValidateTensorShapesFromInputs()
641 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
642 "NormalizationLayer: Input slot must be connected.");
644 const TensorShape& outShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape();
645 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
646 "NormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
649 OutputLayer::OutputLayer(LayerBindingId id, const char* name)
650 : BindableLayer(1, 0, LayerType::Output, name, id)
654 std::unique_ptr<IWorkload> OutputLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
659 OutputLayer* OutputLayer::Clone(Graph& graph) const
661 return CloneBase<OutputLayer>(graph, GetBindingId(), GetName());
664 void OutputLayer::ValidateTensorShapesFromInputs()
666 // Just validate the input is connected
667 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
668 "OutputLayer: Input slot must be connected.");
671 PermuteLayer::PermuteLayer(const PermuteDescriptor& param, const char* name)
672 : LayerWithParameters(1, 1, LayerType::Permute, param, name)
676 std::unique_ptr<IWorkload> PermuteLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
678 PermuteQueueDescriptor descriptor;
679 return factory.CreatePermute(descriptor, PrepInfoAndDesc(descriptor, graph));
682 PermuteLayer* PermuteLayer::Clone(Graph& graph) const
684 return CloneBase<PermuteLayer>(graph, m_Param, GetName());
687 void PermuteLayer::ValidateTensorShapesFromInputs()
689 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
690 "PermuteLayer: InputSlot must be connected to an OutputSlot");
691 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
692 "PermuteLayer: TensorInfo must be set on connected InputSlot.");
694 const TensorInfo& infoIn = GetInputSlot(0).GetConnection()->GetTensorInfo();
695 TensorShape shapeOut = armnnUtils::Permuted(infoIn.GetShape(), m_Param.m_DimMappings);
696 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut),
697 "PermuteLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
700 Pooling2dLayer::Pooling2dLayer(const Pooling2dDescriptor& param, const char* name)
701 : LayerWithParameters(1, 1, LayerType::Pooling2d, param, name)
705 std::unique_ptr<IWorkload> Pooling2dLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
707 Pooling2dQueueDescriptor descriptor;
708 return factory.CreatePooling2d(descriptor, PrepInfoAndDesc(descriptor, graph));
711 Pooling2dLayer* Pooling2dLayer::Clone(Graph& graph) const
713 return CloneBase<Pooling2dLayer>(graph, m_Param, GetName());
716 void Pooling2dLayer::ValidateTensorShapesFromInputs()
718 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
719 "Pooling2dLayer: InputSlot must be connected to an OutputSlot");
720 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
721 "Pooling2dLayer: TensorInfo must be set on connected InputSlot.");
723 IOutputSlot* input = GetInputSlot(0).GetConnection();
724 const TensorShape& inputShape = input->GetTensorInfo().GetShape();
726 // If we support multiple batch dimensions in the future, then this assert will need to change.
727 BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Pooling2dLayer will always have 4D input.");
730 unsigned int inWidth = inputShape[3];
731 unsigned int inHeight = inputShape[2];
732 unsigned int inChannels = inputShape[1];
733 unsigned int inBatchSize = inputShape[0];
735 bool isGlobalPooling = (m_Param.m_StrideX==0 && m_Param.m_StrideY==0);
736 unsigned int outWidth = 1;
737 unsigned int outHeight = 1;
738 if (!isGlobalPooling)
740 BOOST_ASSERT_MSG(m_Param.m_StrideX!=0 && m_Param.m_StrideY!=0,
741 "Stride can only be zero when performing global pooling");
743 auto CalcSize = [](auto inSize, auto lowPad, auto highPad, auto poolSize, auto stride, auto padMethod,
744 auto outputShapeRounding)
746 unsigned int readSize = inSize + lowPad + highPad - poolSize;
747 float div = static_cast<float>(readSize) / static_cast<float>(stride);
749 unsigned int size = 0;
750 switch (outputShapeRounding)
752 case OutputShapeRounding::Ceiling:
753 size = static_cast<unsigned int>(ceil(div)) + 1;
755 case OutputShapeRounding ::Floor:
756 size = static_cast<unsigned int>(floor(div)) + 1;
759 BOOST_ASSERT_MSG(false, "Unsupported Output Shape Rounding");
762 // Make sure that border operations will start from inside the input and not the padded area
763 // This is what both Caffe and CL does...
764 if ((size - 1)*stride >= inSize + lowPad)
772 outWidth = CalcSize(inWidth, m_Param.m_PadLeft, m_Param.m_PadRight, m_Param.m_PoolWidth, m_Param.m_StrideX,
773 m_Param.m_PaddingMethod, m_Param.m_OutputShapeRounding);
774 outHeight= CalcSize(inHeight, m_Param.m_PadTop, m_Param.m_PadBottom, m_Param.m_PoolHeight, m_Param.m_StrideY,
775 m_Param.m_PaddingMethod, m_Param.m_OutputShapeRounding);
779 unsigned int outChannels = inChannels;
780 unsigned int outBatchSize = inBatchSize;
782 TensorShape shapeOut({outBatchSize, outChannels, outHeight, outWidth});
784 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut),
785 "Pooling2dLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
788 SoftmaxLayer::SoftmaxLayer(const SoftmaxDescriptor ¶m, const char* name)
789 : LayerWithParameters(1, 1, LayerType::Softmax, param, name)
793 std::unique_ptr<IWorkload> SoftmaxLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
795 SoftmaxQueueDescriptor descriptor;
796 return factory.CreateSoftmax(descriptor, PrepInfoAndDesc(descriptor, graph));
799 SoftmaxLayer* SoftmaxLayer::Clone(Graph& graph) const
801 return CloneBase<SoftmaxLayer>(graph, m_Param, GetName());
804 void SoftmaxLayer::ValidateTensorShapesFromInputs()
806 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
807 "SoftmaxLayer: Input slot must be connected.");
808 const TensorShape& outShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape();
809 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
810 "SoftmaxLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
813 SplitterLayer::SplitterLayer(const ViewsDescriptor& param, const char* name)
814 : LayerWithParameters(1, param.GetNumViews(), LayerType::Splitter, param, name)
818 std::unique_ptr<IWorkload> SplitterLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
820 SplitterQueueDescriptor descriptor;
822 // copy the window origins to the descriptor
823 for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
825 descriptor.m_ViewOrigins.emplace_back(
826 std::vector<unsigned int>(m_Param.GetViewOrigin(i), m_Param.GetViewOrigin(i) + m_Param.GetNumDimensions()));
829 return factory.CreateSplitter(descriptor, PrepInfoAndDesc(descriptor, graph));
832 void SplitterLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory)
834 //if sub tensors are supported than all the "splitter" need to do is to
835 //set the outputs to be appropriate sub tensors of the input.
836 if (factory.SupportsSubTensors())
838 const OutputHandler& outputHandler = GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
840 ITensorHandle* inputData = outputHandler.GetData();
841 //create the outputs as subtensors of the input
842 for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
844 m_OutputHandlers[i].SetData(factory.CreateSubTensorHandle(*inputData,
845 m_OutputHandlers[i].GetTensorInfo().GetShape(),
846 m_Param.GetViewOrigin(i)));
851 for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
853 m_OutputHandlers[i].CreateTensorHandles(factory);
858 SplitterLayer* SplitterLayer::Clone(Graph& graph) const
860 return CloneBase<SplitterLayer>(graph, m_Param, GetName());
863 void SplitterLayer::ValidateTensorShapesFromInputs()
865 //Output shapes must match View shapes.
866 for (unsigned int viewIdx = 0; viewIdx < m_Param.GetNumViews(); viewIdx++)
868 const uint32_t* sizes = m_Param.GetViewSizes(viewIdx);
870 TensorShape outShape(m_Param.GetNumDimensions(), sizes);
871 ConditionalThrow<LayerValidationException>(GetOutputSlot(viewIdx).ValidateTensorShape(outShape),
872 "SplitterLayer: View sizes must match output tensor shapes.");
876 MemCopyLayer::MemCopyLayer(const char* name)
877 : Layer(1, 1, LayerType::MemCopy, name)
881 MemCopyLayer* MemCopyLayer::Clone(Graph& graph) const
883 return CloneBase<MemCopyLayer>(graph, GetName());
886 std::unique_ptr<IWorkload> MemCopyLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
888 MemCopyQueueDescriptor descriptor;
889 return factory.CreateMemCopy(descriptor, PrepInfoAndDesc(descriptor, graph));
892 void MemCopyLayer::ValidateTensorShapesFromInputs()
894 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
895 "MemCopyLayer: InputSlot must be connected to an OutputSlot");
896 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
897 "MemCopyLayer: TensorInfo must be set on connected OutputSlot.");
900 IOutputSlot* input = GetInputSlot(0).GetConnection();
902 // input and output shapes are the same
903 TensorShape const& outShape = input->GetTensorInfo().GetShape();
904 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
905 "MemCopyLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
908 ResizeBilinearLayer::ResizeBilinearLayer(const ResizeBilinearDescriptor& param, const char* name)
909 : LayerWithParameters(1, 1, LayerType::ResizeBilinear, param, name)
913 std::unique_ptr<IWorkload> ResizeBilinearLayer::CreateWorkload(const Graph& graph,
914 const IWorkloadFactory& factory) const
916 ResizeBilinearQueueDescriptor descriptor;
917 return factory.CreateResizeBilinear(descriptor, PrepInfoAndDesc(descriptor, graph));
920 ResizeBilinearLayer* ResizeBilinearLayer::Clone(Graph& graph) const
922 return CloneBase<ResizeBilinearLayer>(graph, m_Param, GetName());
925 void ResizeBilinearLayer::ValidateTensorShapesFromInputs()
927 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
928 "MemCopyLayer: InputSlot must be connected to an OutputSlot");
929 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
930 "MemCopyLayer: TensorInfo must be set on connected OutputSlot.");
932 const TensorShape& inputShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape();
933 unsigned int outWidth = m_Param.m_TargetWidth;
934 unsigned int outHeight = m_Param.m_TargetHeight;
935 unsigned int outChannels = inputShape[1];
936 unsigned int outBatch = inputShape[0];
937 TensorShape outShape({outBatch, outChannels, outHeight, outWidth});
938 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
939 "ResizeBilinearLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
942 L2NormalizationLayer::L2NormalizationLayer(const char* name)
943 : Layer(1, 1, LayerType::L2Normalization, name)
947 std::unique_ptr<IWorkload> L2NormalizationLayer::CreateWorkload(const Graph& graph,
948 const IWorkloadFactory& factory) const
950 L2NormalizationQueueDescriptor descriptor;
951 return factory.CreateL2Normalization(descriptor, PrepInfoAndDesc(descriptor, graph));
954 L2NormalizationLayer* L2NormalizationLayer::Clone(Graph& graph) const
956 return CloneBase<L2NormalizationLayer>(graph, GetName());
959 void L2NormalizationLayer::ValidateTensorShapesFromInputs()
961 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
962 "L2NormalizationLayer: InputSlot must be connected to an OutputSlot");
963 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
964 "L2NormalizationLayer: TensorInfo must be set on connected OutputSlot.");
966 IOutputSlot* input = GetInputSlot(0).GetConnection();
968 // input and output shapes are the same
969 TensorShape const& outShape = input->GetTensorInfo().GetShape();
970 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
971 "L2NormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
974 ConstantLayer::ConstantLayer(const std::shared_ptr<ScopedCpuTensorHandle>& input, const char* name)
975 : Layer(0, 1, LayerType::Constant, name)
976 , m_LayerOutput(input)
980 std::unique_ptr<IWorkload> ConstantLayer::CreateWorkload(const Graph& graph,
981 const IWorkloadFactory& factory) const
983 ConstantQueueDescriptor descriptor;
984 descriptor.m_LayerOutput = m_LayerOutput.get();
985 return factory.CreateConstant(descriptor, PrepInfoAndDesc(descriptor, graph));
988 ConstantLayer* ConstantLayer::Clone(Graph& graph) const
990 // Cloned layers share the same layer output object
991 return CloneBase<ConstantLayer>(graph, m_LayerOutput, GetName());
994 void ConstantLayer::ValidateTensorShapesFromInputs()
996 // get the output shape from the value of the constant layer
997 TensorShape const& outShape = m_LayerOutput->GetTensorInfo().GetShape();
998 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
999 "ConstantLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
1002 ReshapeLayer::ReshapeLayer(const ReshapeDescriptor& param, const char* name)
1003 : LayerWithParameters(1, 1, LayerType::Reshape, param, name)
1007 std::unique_ptr<IWorkload> ReshapeLayer::CreateWorkload(const Graph& graph,
1008 const IWorkloadFactory& factory) const
1010 ReshapeQueueDescriptor descriptor;
1011 return factory.CreateReshape(descriptor, PrepInfoAndDesc(descriptor, graph));
1014 ReshapeLayer* ReshapeLayer::Clone(Graph& graph) const
1016 return CloneBase<ReshapeLayer>(graph, m_Param, GetName());
1019 void ReshapeLayer::ValidateTensorShapesFromInputs()
1021 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
1022 "ReshapeLayer: InputSlot must be connected to an OutputSlot");
1023 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
1024 "ReshapeLayer: TensorInfo must be set on connected OutputSlot.");
1025 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(m_Param.m_TargetShape),
1026 "ReshapeLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");