2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // See LICENSE file in the project root for full license information.
8 #include "backends/CpuTensorHandle.hpp"
9 #include "backends/Workload.hpp"
10 #include "backends/WorkloadFactory.hpp"
12 #include "Permute.hpp"
18 template <typename LayerType, typename ... Params>
19 LayerType* Layer::CloneBase(Graph& graph, Params&& ... params) const
21 LayerType* const layer = graph.AddLayer<LayerType>(std::forward<Params>(params)...);
23 layer->SetComputeDevice(m_ComputeDevice);
28 ActivationLayer::ActivationLayer(const ActivationDescriptor& param, const char* name)
29 : LayerWithParameters(1, 1, LayerType::Activation, param, name)
33 std::unique_ptr<IWorkload> ActivationLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
35 ActivationQueueDescriptor descriptor;
36 return factory.CreateActivation(descriptor, PrepInfoAndDesc(descriptor, graph));
39 ActivationLayer* ActivationLayer::Clone(Graph& graph) const
41 return CloneBase<ActivationLayer>(graph, m_Param, GetName());
44 void ActivationLayer::ValidateTensorShapesFromInputs()
46 auto& info = GetInputSlot(0).GetConnection()->GetTensorInfo();
47 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(info.GetShape()),
48 "ActivationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
51 AdditionLayer::AdditionLayer(const char* name)
52 : Layer(2, 1, LayerType::Addition, name)
56 std::unique_ptr<IWorkload> AdditionLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
58 AdditionQueueDescriptor descriptor;
59 return factory.CreateAddition(descriptor, PrepInfoAndDesc(descriptor, graph));
62 AdditionLayer* AdditionLayer::Clone(Graph& graph) const
64 return CloneBase<AdditionLayer>(graph, GetName());
67 void AdditionLayer::ValidateTensorShapesFromInputs()
69 auto& input0 = GetInputSlot(0).GetConnection()->GetTensorInfo();
70 auto& input1 = GetInputSlot(1).GetConnection()->GetTensorInfo();
72 // Get the max of the inputs
73 BOOST_ASSERT(input0.GetNumDimensions() == input1.GetNumDimensions());
74 unsigned int numDims = input0.GetNumDimensions();
75 std::vector<unsigned int> dims(numDims);
77 // validate inputs are broadcast compatible
79 for (unsigned int i = 0; i < numDims; i++)
81 unsigned int dim0 = input0.GetShape()[i];
82 unsigned int dim1 = input1.GetShape()[i];
85 BOOST_ASSERT_MSG(dim0 == 1 || dim1 == 1, "Dimensions should either match or one should be one length");
91 for (unsigned int i = 0; i < numDims; i++)
93 unsigned int dim0 = input0.GetShape()[i];
94 unsigned int dim1 = input1.GetShape()[i];
95 dims[i] = std::max(dim0, dim1);
98 TensorShape outShape(numDims, dims.data());
99 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
100 "AdditionLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
103 BatchNormalizationLayer::BatchNormalizationLayer(const armnn::BatchNormalizationDescriptor& param, const char* name)
104 : LayerWithParameters(1, 1, LayerType::BatchNormalization, param, name)
108 std::unique_ptr<IWorkload> BatchNormalizationLayer::CreateWorkload(const Graph& graph,
109 const IWorkloadFactory& factory) const
111 BatchNormalizationQueueDescriptor descriptor;
113 descriptor.m_Mean = m_Mean.get();
114 descriptor.m_Variance = m_Variance.get();
115 descriptor.m_Beta = m_Beta.get();
116 descriptor.m_Gamma = m_Gamma.get();
117 return factory.CreateBatchNormalization(descriptor, PrepInfoAndDesc(descriptor, graph));
120 BatchNormalizationLayer* BatchNormalizationLayer::Clone(Graph& graph) const
122 auto layer = CloneBase<BatchNormalizationLayer>(graph, m_Param, GetName());
124 layer->m_Mean = m_Mean ? std::make_unique<ScopedCpuTensorHandle>(*m_Mean) : nullptr;
125 layer->m_Variance = m_Variance ? std::make_unique<ScopedCpuTensorHandle>(*m_Variance) : nullptr;
126 layer->m_Beta = m_Beta ? std::make_unique<ScopedCpuTensorHandle>(*m_Beta) : nullptr;
127 layer->m_Gamma = m_Gamma ? std::make_unique<ScopedCpuTensorHandle>(*m_Gamma) : nullptr;
129 return std::move(layer);
132 void BatchNormalizationLayer::ValidateTensorShapesFromInputs()
134 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
135 "BatchNormalizationLayer: InputSlot must be connected to an OutputSlot");
136 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
137 "BatchNormalizationLayer: TensorInfo must be set on connected OutputSlot.");
139 auto& info = GetInputSlot(0).GetConnection()->GetTensorInfo();
140 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(info.GetShape()),
141 "BatchNormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
144 Convolution2dLayer::Convolution2dLayer(const Convolution2dDescriptor& param, const char* name)
145 : LayerWithParameters(1, 1, LayerType::Convolution2d, param, name)
149 std::unique_ptr<IWorkload> Convolution2dLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
151 Convolution2dQueueDescriptor descriptor;
153 descriptor.m_Weight = m_Weight.get();
154 if (m_Param.m_BiasEnabled)
156 descriptor.m_Bias = m_Bias.get();
158 return factory.CreateConvolution2d(descriptor, PrepInfoAndDesc(descriptor, graph));
161 Convolution2dLayer* Convolution2dLayer::Clone(Graph& graph) const
163 auto layer = CloneBase<Convolution2dLayer>(graph, m_Param, GetName());
164 layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
166 if (layer->m_Param.m_BiasEnabled)
168 layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
171 return std::move(layer);
174 void Convolution2dLayer::ValidateTensorShapesFromInputs()
176 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
177 "Convolution2dLayer: InputSlot must be connected to an OutputSlot");
178 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
179 "Convolution2dLayer: TensorInfo must be set on connected OutputSlot.");
182 IOutputSlot* input = GetInputSlot(0).GetConnection();
183 const TensorShape& inputShape = input->GetTensorInfo().GetShape();
184 const TensorShape filterShape = m_Weight->GetTensorInfo().GetShape();
186 // If we support multiple batch dimensions in the future, then this assert will need to change.
187 BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Convolutions will always have 4D input.");
189 unsigned int inWidth = inputShape[3];
190 unsigned int inHeight = inputShape[2];
191 unsigned int inBatchSize = inputShape[0];
193 unsigned int filterWidth = filterShape[3];
194 unsigned int readWidth = (inWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - (filterWidth);
195 unsigned int outWidth = 1+(readWidth / m_Param.m_StrideX);
197 unsigned int filterHeight = filterShape[2];
198 unsigned int readHeight = (inHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - (filterHeight);
199 unsigned int outHeight = 1+(readHeight / m_Param.m_StrideY);
201 unsigned int outChannels = filterShape[0];
202 unsigned int outBatchSize = inBatchSize;
204 TensorShape shapeOut({outBatchSize, outChannels, outHeight, outWidth});
205 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut),
206 "Convolution2dLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
210 DepthwiseConvolution2dLayer::DepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor& param,
212 : LayerWithParameters(1, 1, LayerType::DepthwiseConvolution2d, param, name)
216 std::unique_ptr<IWorkload> DepthwiseConvolution2dLayer::CreateWorkload(const Graph& graph,
217 const IWorkloadFactory& factory) const
219 DepthwiseConvolution2dQueueDescriptor descriptor;
221 descriptor.m_Weight = m_Weight.get();
222 if (m_Param.m_BiasEnabled)
224 descriptor.m_Bias = m_Bias.get();
226 return factory.CreateDepthwiseConvolution2d(descriptor, PrepInfoAndDesc(descriptor, graph));
229 DepthwiseConvolution2dLayer* DepthwiseConvolution2dLayer::Clone(Graph& graph) const
231 auto layer = CloneBase<DepthwiseConvolution2dLayer>(graph, m_Param, GetName());
232 layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
234 if (layer->m_Param.m_BiasEnabled)
236 layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
239 return std::move(layer);
242 void DepthwiseConvolution2dLayer::ValidateTensorShapesFromInputs()
244 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
245 "DepthwiseConvolution2dLayer: InputSlot must be connected to an OutputSlot");
246 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
247 "DepthwiseConvolution2dLayer: TensorInfo must be set on connected OutputSlot.");
249 IOutputSlot* input = GetInputSlot(0).GetConnection();
250 const TensorShape& inputShape = input->GetTensorInfo().GetShape();
251 const TensorShape filterShape = m_Weight->GetTensorInfo().GetShape();
253 BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Convolutions will always have 4D input.");
255 unsigned int inWidth = inputShape[3];
256 unsigned int inHeight = inputShape[2];
257 unsigned int inBatchSize = inputShape[0];
259 unsigned int filterWidth = filterShape[3];
260 unsigned int readWidth = (inWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - (filterWidth);
261 unsigned int outWidth = 1+(readWidth / m_Param.m_StrideX);
263 unsigned int filterHeight = filterShape[2];
264 unsigned int readHeight = (inHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - (filterHeight);
265 unsigned int outHeight = 1+(readHeight / m_Param.m_StrideY);
266 unsigned int depthMultiplier = filterShape[0];
268 unsigned int outChannels = filterShape[1]*depthMultiplier;
269 unsigned int outBatchSize = inBatchSize;
271 TensorShape outShape({outBatchSize, outChannels, outHeight, outWidth});
272 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
273 "DepthwiseConvolution2dLayer: "
274 "TensorShape set on OutputSlot[0] does not match the inferred shape.");
277 FakeQuantizationLayer::FakeQuantizationLayer(const FakeQuantizationDescriptor& param, const char* name)
278 : LayerWithParameters(1, 1, LayerType::FakeQuantization, param, name)
282 std::unique_ptr<IWorkload> FakeQuantizationLayer::CreateWorkload(const Graph& graph,
283 const IWorkloadFactory& factory) const
285 FakeQuantizationQueueDescriptor descriptor;
286 return factory.CreateFakeQuantization(descriptor, PrepInfoAndDesc(descriptor, graph) );
289 FakeQuantizationLayer* FakeQuantizationLayer::Clone(Graph& graph) const
291 return CloneBase<FakeQuantizationLayer>(graph, m_Param, GetName());
294 void FakeQuantizationLayer::ValidateTensorShapesFromInputs()
296 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
297 "FakeQuantizationLayer: InputSlot must be connected to an OutputSlot");
298 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
299 "FakeQuantizationLayer: TensorInfo must be set on connected OutputSlot.");
302 IOutputSlot* input = GetInputSlot(0).GetConnection();
304 // input and output shapes are the same
305 TensorShape const& outShape = input->GetTensorInfo().GetShape();
306 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
307 "FakeQuantizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
310 FloorLayer::FloorLayer(const char* name)
311 : Layer(1, 1, LayerType::Floor, name)
315 std::unique_ptr<IWorkload> FloorLayer::CreateWorkload(const Graph& graph,
316 const IWorkloadFactory& factory) const
318 FloorQueueDescriptor descriptor;
319 return factory.CreateFloor(descriptor, PrepInfoAndDesc(descriptor, graph));
322 FloorLayer* FloorLayer::Clone(Graph& graph) const
324 return CloneBase<FloorLayer>(graph, GetName());
327 void FloorLayer::ValidateTensorShapesFromInputs()
329 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
330 "FloorLayer: InputSlot must be connected to an OutputSlot");
331 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
332 "FloorLayer: TensorInfo must be set on connected OutputSlot.");
334 // input and output shapes are the same
335 IOutputSlot* input = GetInputSlot(0).GetConnection();
336 TensorShape const& outShape = input->GetTensorInfo().GetShape();
337 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
338 "FloorLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
341 FullyConnectedLayer::FullyConnectedLayer(const FullyConnectedDescriptor& param, const char* name)
342 : LayerWithParameters(1, 1, LayerType::FullyConnected, param, name)
346 std::unique_ptr<IWorkload> FullyConnectedLayer::CreateWorkload(const Graph& graph,
347 const IWorkloadFactory& factory) const
349 FullyConnectedQueueDescriptor descriptor;
351 descriptor.m_Weight = m_Weight.get();
352 if (m_Param.m_BiasEnabled)
354 descriptor.m_Bias = m_Bias.get();
356 return factory.CreateFullyConnected(descriptor, PrepInfoAndDesc(descriptor, graph));
359 FullyConnectedLayer* FullyConnectedLayer::Clone(Graph& graph) const
361 auto layer = CloneBase<FullyConnectedLayer>(graph, m_Param, GetName());
363 layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
364 if (layer->m_Param.m_BiasEnabled)
366 layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
369 return std::move(layer);
372 void FullyConnectedLayer::ValidateTensorShapesFromInputs()
374 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
375 "FullyConnectedLayer: InputSlot must be connected to an OutputSlot");
376 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
377 "FullyConnectedLayer: TensorInfo must be set on connected OutputSlot.");
380 TensorShape const& weightShape = m_Weight->GetTensorInfo().GetShape();
382 // output for FC is [1, w[1]]
383 unsigned int batches = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape()[0];
384 unsigned int dimIdx = m_Param.m_TransposeWeightMatrix ? 0 : 1;
385 TensorShape outShape({batches, weightShape[dimIdx]});
387 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
388 "FullyConnectedLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
391 InputLayer::InputLayer(LayerBindingId id, const char* name)
392 : BindableLayer(0, 1, LayerType::Input, name, id)
396 std::unique_ptr<IWorkload> InputLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
401 InputLayer* InputLayer::Clone(Graph& graph) const
403 return CloneBase<InputLayer>(graph, GetBindingId(), GetName());
406 void InputLayer::ValidateTensorShapesFromInputs()
408 //The input layer should already have it's inputs set during graph building phase in the driver/parser.
409 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).IsTensorInfoSet(),
410 "InputLayer should already have the TensorInfo set.");
414 MergerLayer::MergerLayer(const OriginsDescriptor& param, const char* name)
415 : LayerWithParameters(param.GetNumViews(), 1, LayerType::Merger, param, name)
419 std::unique_ptr<IWorkload> MergerLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
421 MergerQueueDescriptor descriptor;
423 // copy the view origins to the descriptor
424 descriptor.m_ViewOrigins.reserve(m_Param.GetNumViews());
425 for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
427 descriptor.m_ViewOrigins.emplace_back(
428 std::vector<unsigned int>(m_Param.GetViewOrigin(i), m_Param.GetViewOrigin(i) + m_Param.GetNumDimensions()));
431 return factory.CreateMerger(descriptor, PrepInfoAndDesc(descriptor, graph));
434 void MergerLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory)
436 //if sub tensors are supported than the merger
437 //just needs to make sure that the outputs of the prev layer
438 //are made subtensors of the output of the merger layer
439 m_OutputHandlers[0].CreateTensorHandles(factory);
440 if (factory.SupportsSubTensors())
442 const unsigned int numInputSlots = GetNumInputSlots();
443 for (unsigned int i = 0; i < numInputSlots; ++i)
445 OutputHandler& outputHandler = GetInputSlot(i).GetConnectedOutputSlot()->GetOutputHandler();
447 outputHandler.SetData(factory.CreateSubTensorHandle(*m_OutputHandlers[0].GetData(),
448 outputHandler.GetTensorInfo().GetShape(),
449 m_Param.GetViewOrigin(i)));
454 MergerLayer* MergerLayer::Clone(Graph& graph) const
456 return CloneBase<MergerLayer>(graph, m_Param, GetName());
459 void MergerLayer::ValidateTensorShapesFromInputs()
461 // Validate Merger layer
462 ConditionalThrow<LayerValidationException>(m_Param.GetNumViews() == GetNumInputSlots(),
463 "MergerLayer: Num Inputs must match num views.");
465 unsigned int numDims = m_Param.GetNumDimensions();
466 for (unsigned int i=0; i<GetNumInputSlots(); i++)
468 auto& inputInfo = GetInputSlot(i).GetConnection()->GetTensorInfo();
470 boost::ignore_unused(inputInfo);
471 ConditionalThrow<LayerValidationException>(numDims == inputInfo.GetNumDimensions(),
472 "MergerLayer: Num Dimensions must match all inputs.");
475 // Find the bounding box (extents) of all the views
476 std::vector<unsigned int> extentMin(numDims);
477 std::vector<unsigned int> extentMax(numDims);
478 for (unsigned int i = 0; i < GetNumInputSlots(); i++)
480 const uint32_t* origin = m_Param.GetViewOrigin(i);
481 const armnn::TensorShape& shape = GetInputSlot(i).GetConnection()->GetTensorInfo().GetShape();
482 for (unsigned int d = 0; d < numDims; d++)
484 extentMin[d] = std::min(extentMin[d], origin[d]);
485 extentMax[d] = std::max(extentMax[d], origin[d] + shape[d]);
489 // Check that the bounding box starts at the origin
490 if (!std::all_of(extentMin.begin(), extentMin.end(), [](unsigned int s) { return s == 0; }))
492 throw LayerValidationException("MergerLayer: there is no view that starts at the origin");
495 // Check that there are no overlaps of views (this would lead to undefined output at those locations).
496 // Check each pair of views against each other
497 // (and don't bother to check against self, or check the same pair both ways round)
498 for (unsigned int a = 0; a < GetNumInputSlots(); a++)
500 const uint32_t* aOrigin = m_Param.GetViewOrigin(a);
501 const armnn::TensorShape& aShape = GetInputSlot(a).GetConnection()->GetTensorInfo().GetShape();
502 for (unsigned int b = 0; b < a; b++)
504 const uint32_t* bOrigin = m_Param.GetViewOrigin(b);
505 const armnn::TensorShape& bShape = GetInputSlot(b).GetConnection()->GetTensorInfo().GetShape();
507 bool allAxesOverlap = true;
508 for (unsigned int d = 0; d < numDims && allAxesOverlap; d++)
510 unsigned int a1 = aOrigin[d];
511 unsigned int a2 = aOrigin[d] + aShape[d];
513 unsigned int b1 = bOrigin[d];
514 unsigned int b2 = bOrigin[d] + bShape[d];
516 if (a2 <= b1 || b2 <= a1)
518 allAxesOverlap = false;
523 throw LayerValidationException("MergerLayer: Some views overlap.");
528 // Check that there are no "holes", i.e. regions of the output which is not covered by a view.
529 // Because we already checked that there are no overlaps, this can be done simply by checking that
530 // the total 'volume' of the views is the same as the output.
531 unsigned int totalViewsVolume = 0;
532 for (unsigned int i = 0; i < GetNumInputSlots(); i++)
534 totalViewsVolume += GetInputSlot(i).GetConnection()->GetTensorInfo().GetNumElements();
536 unsigned int outputVolume = 1;
537 for (unsigned int d = 0; d < numDims; d++)
539 outputVolume *= (extentMax[d] - extentMin[d]);
541 if (totalViewsVolume != outputVolume)
543 throw LayerValidationException("MergerLayer: there are some gaps between views");
546 TensorShape outShape(numDims, extentMax.data());
547 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
548 "MergerLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
551 MultiplicationLayer::MultiplicationLayer(const char* name)
552 : Layer(2, 1, LayerType::Multiplication, name)
556 std::unique_ptr<IWorkload> MultiplicationLayer::CreateWorkload(const Graph& graph,
557 const IWorkloadFactory& factory) const
559 MultiplicationQueueDescriptor descriptor;
561 return factory.CreateMultiplication(descriptor, PrepInfoAndDesc(descriptor, graph));
564 MultiplicationLayer* MultiplicationLayer::Clone(Graph& graph) const
566 return CloneBase<MultiplicationLayer>(graph, GetName());
569 void MultiplicationLayer::ValidateTensorShapesFromInputs()
571 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() ==
572 GetInputSlot(1).GetConnection()->GetTensorInfo().GetShape(),
573 "MultiplicationLayer: Inputs must match");
575 TensorInfo infoOut(GetInputSlot(0).GetConnection()->GetTensorInfo());
576 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(infoOut.GetShape()),
577 "MultiplicationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
580 NormalizationLayer::NormalizationLayer(const NormalizationDescriptor& param, const char* name)
581 : LayerWithParameters(1, 1, LayerType::Normalization, param, name)
585 std::unique_ptr<IWorkload> NormalizationLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
587 NormalizationQueueDescriptor descriptor;
588 return factory.CreateNormalization(descriptor, PrepInfoAndDesc(descriptor, graph));
591 NormalizationLayer* NormalizationLayer::Clone(Graph& graph) const
593 return CloneBase<NormalizationLayer>(graph, m_Param, GetName());
596 void NormalizationLayer::ValidateTensorShapesFromInputs()
598 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
599 "NormalizationLayer: Input slot must be connected.");
601 const TensorShape& outShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape();
602 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
603 "NormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
606 OutputLayer::OutputLayer(LayerBindingId id, const char* name)
607 : BindableLayer(1, 0, LayerType::Output, name, id)
611 std::unique_ptr<IWorkload> OutputLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
616 OutputLayer* OutputLayer::Clone(Graph& graph) const
618 return CloneBase<OutputLayer>(graph, GetBindingId(), GetName());
621 void OutputLayer::ValidateTensorShapesFromInputs()
623 // Just validate the input is connected
624 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
625 "OutputLayer: Input slot must be connected.");
628 PermuteLayer::PermuteLayer(const PermuteDescriptor& param, const char* name)
629 : LayerWithParameters(1, 1, LayerType::Permute, param, name)
633 std::unique_ptr<IWorkload> PermuteLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
635 PermuteQueueDescriptor descriptor;
636 return factory.CreatePermute(descriptor, PrepInfoAndDesc(descriptor, graph));
639 PermuteLayer* PermuteLayer::Clone(Graph& graph) const
641 return CloneBase<PermuteLayer>(graph, m_Param, GetName());
644 void PermuteLayer::ValidateTensorShapesFromInputs()
646 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
647 "PermuteLayer: InputSlot must be connected to an OutputSlot");
648 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
649 "PermuteLayer: TensorInfo must be set on connected InputSlot.");
651 const TensorInfo& infoIn = GetInputSlot(0).GetConnection()->GetTensorInfo();
652 TensorShape shapeOut = armnnUtils::Permuted(infoIn.GetShape(), m_Param.m_DimMappings);
653 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut),
654 "PermuteLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
657 Pooling2dLayer::Pooling2dLayer(const Pooling2dDescriptor& param, const char* name)
658 : LayerWithParameters(1, 1, LayerType::Pooling2d, param, name)
662 std::unique_ptr<IWorkload> Pooling2dLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
664 Pooling2dQueueDescriptor descriptor;
665 return factory.CreatePooling2d(descriptor, PrepInfoAndDesc(descriptor, graph));
668 Pooling2dLayer* Pooling2dLayer::Clone(Graph& graph) const
670 return CloneBase<Pooling2dLayer>(graph, m_Param, GetName());
673 void Pooling2dLayer::ValidateTensorShapesFromInputs()
675 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
676 "Pooling2dLayer: InputSlot must be connected to an OutputSlot");
677 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
678 "Pooling2dLayer: TensorInfo must be set on connected InputSlot.");
680 IOutputSlot* input = GetInputSlot(0).GetConnection();
681 const TensorShape& inputShape = input->GetTensorInfo().GetShape();
683 // If we support multiple batch dimensions in the future, then this assert will need to change.
684 BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Pooling2dLayer will always have 4D input.");
687 unsigned int inWidth = inputShape[3];
688 unsigned int inHeight = inputShape[2];
689 unsigned int inChannels = inputShape[1];
690 unsigned int inBatchSize = inputShape[0];
692 bool isGlobalPooling = (m_Param.m_StrideX==0 && m_Param.m_StrideY==0);
693 unsigned int outWidth = 1;
694 unsigned int outHeight = 1;
695 if (!isGlobalPooling)
697 BOOST_ASSERT_MSG(m_Param.m_StrideX!=0 && m_Param.m_StrideY!=0,
698 "Stride can only be zero when performing global pooling");
700 auto CalcSize = [](auto inSize, auto lowPad, auto highPad, auto poolSize, auto stride, auto padMethod,
701 auto outputShapeRounding)
703 unsigned int readSize = inSize + lowPad + highPad - poolSize;
704 float div = static_cast<float>(readSize) / static_cast<float>(stride);
706 unsigned int size = 0;
707 switch (outputShapeRounding)
709 case OutputShapeRounding::Ceiling:
710 size = static_cast<unsigned int>(ceil(div)) + 1;
712 case OutputShapeRounding ::Floor:
713 size = static_cast<unsigned int>(floor(div)) + 1;
716 BOOST_ASSERT_MSG(false, "Unsupported Output Shape Rounding");
719 // Make sure that border operations will start from inside the input and not the padded area
720 // This is what both Caffe and CL does...
721 if ((size - 1)*stride >= inSize + lowPad)
729 outWidth = CalcSize(inWidth, m_Param.m_PadLeft, m_Param.m_PadRight, m_Param.m_PoolWidth, m_Param.m_StrideX,
730 m_Param.m_PaddingMethod, m_Param.m_OutputShapeRounding);
731 outHeight= CalcSize(inHeight, m_Param.m_PadTop, m_Param.m_PadBottom, m_Param.m_PoolHeight, m_Param.m_StrideY,
732 m_Param.m_PaddingMethod, m_Param.m_OutputShapeRounding);
736 unsigned int outChannels = inChannels;
737 unsigned int outBatchSize = inBatchSize;
739 TensorShape shapeOut({outBatchSize, outChannels, outHeight, outWidth});
741 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut),
742 "Pooling2dLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
745 SoftmaxLayer::SoftmaxLayer(const SoftmaxDescriptor ¶m, const char* name)
746 : LayerWithParameters(1, 1, LayerType::Softmax, param, name)
750 std::unique_ptr<IWorkload> SoftmaxLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
752 SoftmaxQueueDescriptor descriptor;
753 return factory.CreateSoftmax(descriptor, PrepInfoAndDesc(descriptor, graph));
756 SoftmaxLayer* SoftmaxLayer::Clone(Graph& graph) const
758 return CloneBase<SoftmaxLayer>(graph, m_Param, GetName());
761 void SoftmaxLayer::ValidateTensorShapesFromInputs()
763 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
764 "SoftmaxLayer: Input slot must be connected.");
765 const TensorShape& outShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape();
766 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
767 "SoftmaxLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
770 SplitterLayer::SplitterLayer(const ViewsDescriptor& param, const char* name)
771 : LayerWithParameters(1, param.GetNumViews(), LayerType::Splitter, param, name)
775 std::unique_ptr<IWorkload> SplitterLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
777 SplitterQueueDescriptor descriptor;
779 // copy the window origins to the descriptor
780 for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
782 descriptor.m_ViewOrigins.emplace_back(
783 std::vector<unsigned int>(m_Param.GetViewOrigin(i), m_Param.GetViewOrigin(i) + m_Param.GetNumDimensions()));
786 return factory.CreateSplitter(descriptor, PrepInfoAndDesc(descriptor, graph));
789 void SplitterLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory)
791 //if sub tensors are supported than all the "splitter" need to do is to
792 //set the outputs to be appropriate sub tensors of the input.
793 if (factory.SupportsSubTensors())
795 const OutputHandler& outputHandler = GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
797 ITensorHandle* inputData = outputHandler.GetData();
798 //create the outputs as subtensors of the input
799 for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
801 m_OutputHandlers[i].SetData(factory.CreateSubTensorHandle(*inputData,
802 m_OutputHandlers[i].GetTensorInfo().GetShape(),
803 m_Param.GetViewOrigin(i)));
808 for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
810 m_OutputHandlers[i].CreateTensorHandles(factory);
815 SplitterLayer* SplitterLayer::Clone(Graph& graph) const
817 return CloneBase<SplitterLayer>(graph, m_Param, GetName());
820 void SplitterLayer::ValidateTensorShapesFromInputs()
822 //Output shapes must match View shapes.
823 for (unsigned int viewIdx = 0; viewIdx < m_Param.GetNumViews(); viewIdx++)
825 const uint32_t* sizes = m_Param.GetViewSizes(viewIdx);
827 TensorShape outShape(m_Param.GetNumDimensions(), sizes);
828 ConditionalThrow<LayerValidationException>(GetOutputSlot(viewIdx).ValidateTensorShape(outShape),
829 "SplitterLayer: View sizes must match output tensor shapes.");
833 MemCopyLayer::MemCopyLayer(const char* name)
834 : Layer(1, 1, LayerType::MemCopy, name)
838 MemCopyLayer* MemCopyLayer::Clone(Graph& graph) const
840 return CloneBase<MemCopyLayer>(graph, GetName());
843 std::unique_ptr<IWorkload> MemCopyLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
845 MemCopyQueueDescriptor descriptor;
846 return factory.CreateMemCopy(descriptor, PrepInfoAndDesc(descriptor, graph));
849 void MemCopyLayer::ValidateTensorShapesFromInputs()
851 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
852 "MemCopyLayer: InputSlot must be connected to an OutputSlot");
853 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
854 "MemCopyLayer: TensorInfo must be set on connected OutputSlot.");
857 IOutputSlot* input = GetInputSlot(0).GetConnection();
859 // input and output shapes are the same
860 TensorShape const& outShape = input->GetTensorInfo().GetShape();
861 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
862 "MemCopyLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
865 ResizeBilinearLayer::ResizeBilinearLayer(const ResizeBilinearDescriptor& param, const char* name)
866 : LayerWithParameters(1, 1, LayerType::ResizeBilinear, param, name)
870 std::unique_ptr<IWorkload> ResizeBilinearLayer::CreateWorkload(const Graph& graph,
871 const IWorkloadFactory& factory) const
873 ResizeBilinearQueueDescriptor descriptor;
874 return factory.CreateResizeBilinear(descriptor, PrepInfoAndDesc(descriptor, graph));
877 ResizeBilinearLayer* ResizeBilinearLayer::Clone(Graph& graph) const
879 return CloneBase<ResizeBilinearLayer>(graph, m_Param, GetName());
882 void ResizeBilinearLayer::ValidateTensorShapesFromInputs()
884 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
885 "MemCopyLayer: InputSlot must be connected to an OutputSlot");
886 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
887 "MemCopyLayer: TensorInfo must be set on connected OutputSlot.");
889 const TensorShape& inputShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape();
890 unsigned int outWidth = m_Param.m_TargetWidth;
891 unsigned int outHeight = m_Param.m_TargetHeight;
892 unsigned int outChannels = inputShape[1];
893 unsigned int outBatch = inputShape[0];
894 TensorShape outShape({outBatch, outChannels, outHeight, outWidth});
895 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
896 "ResizeBilinearLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
899 L2NormalizationLayer::L2NormalizationLayer(const char* name)
900 : Layer(1, 1, LayerType::L2Normalization, name)
904 std::unique_ptr<IWorkload> L2NormalizationLayer::CreateWorkload(const Graph& graph,
905 const IWorkloadFactory& factory) const
907 L2NormalizationQueueDescriptor descriptor;
908 return factory.CreateL2Normalization(descriptor, PrepInfoAndDesc(descriptor, graph));
911 L2NormalizationLayer* L2NormalizationLayer::Clone(Graph& graph) const
913 return CloneBase<L2NormalizationLayer>(graph, GetName());
916 void L2NormalizationLayer::ValidateTensorShapesFromInputs()
918 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
919 "L2NormalizationLayer: InputSlot must be connected to an OutputSlot");
920 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
921 "L2NormalizationLayer: TensorInfo must be set on connected OutputSlot.");
923 IOutputSlot* input = GetInputSlot(0).GetConnection();
925 // input and output shapes are the same
926 TensorShape const& outShape = input->GetTensorInfo().GetShape();
927 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
928 "L2NormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
931 ConstantLayer::ConstantLayer(const std::shared_ptr<ScopedCpuTensorHandle>& input, const char* name)
932 : Layer(0, 1, LayerType::Constant, name)
933 , m_LayerOutput(input)
937 std::unique_ptr<IWorkload> ConstantLayer::CreateWorkload(const Graph& graph,
938 const IWorkloadFactory& factory) const
940 ConstantQueueDescriptor descriptor;
941 descriptor.m_LayerOutput = m_LayerOutput.get();
942 return factory.CreateConstant(descriptor, PrepInfoAndDesc(descriptor, graph));
945 ConstantLayer* ConstantLayer::Clone(Graph& graph) const
947 // Cloned layers share the same layer output object
948 return CloneBase<ConstantLayer>(graph, m_LayerOutput, GetName());
951 void ConstantLayer::ValidateTensorShapesFromInputs()
953 // get the output shape from the value of the constant layer
954 TensorShape const& outShape = m_LayerOutput->GetTensorInfo().GetShape();
955 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
956 "ConstantLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
959 ReshapeLayer::ReshapeLayer(const ReshapeDescriptor& param, const char* name)
960 : LayerWithParameters(1, 1, LayerType::Reshape, param, name)
964 std::unique_ptr<IWorkload> ReshapeLayer::CreateWorkload(const Graph& graph,
965 const IWorkloadFactory& factory) const
967 ReshapeQueueDescriptor descriptor;
968 return factory.CreateReshape(descriptor, PrepInfoAndDesc(descriptor, graph));
971 ReshapeLayer* ReshapeLayer::Clone(Graph& graph) const
973 return CloneBase<ReshapeLayer>(graph, m_Param, GetName());
976 void ReshapeLayer::ValidateTensorShapesFromInputs()
978 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
979 "ReshapeLayer: InputSlot must be connected to an OutputSlot");
980 ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
981 "ReshapeLayer: TensorInfo must be set on connected OutputSlot.");
982 ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(m_Param.m_TargetShape),
983 "ReshapeLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");