src/armnn/Layers.cpp

   1 //
   2 // Copyright © 2017 Arm Ltd. All rights reserved.
   3 // See LICENSE file in the project root for full license information.
   4 //
   5 #include "Layers.hpp"
   6 #include "Graph.hpp"
   7
   8 #include "backends/CpuTensorHandle.hpp"
   9 #include "backends/Workload.hpp"
  10 #include "backends/WorkloadFactory.hpp"
  11
  12 #include "Permute.hpp"
  13
  14
  15 namespace armnn
  16 {
  17
  18 template <typename LayerType, typename ... Params>
  19 LayerType* Layer::CloneBase(Graph& graph, Params&& ... params) const
  20 {
  21     LayerType* const layer = graph.AddLayer<LayerType>(std::forward<Params>(params)...);
  22
  23     layer->SetComputeDevice(m_ComputeDevice);
  24
  25     return layer;
  26 }
  27
  28 ActivationLayer::ActivationLayer(const ActivationDescriptor& param, const char* name)
  29     : LayerWithParameters(1, 1, LayerType::Activation, param, name)
  30 {
  31 }
  32
  33 std::unique_ptr<IWorkload> ActivationLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
  34 {
  35     ActivationQueueDescriptor descriptor;
  36     return factory.CreateActivation(descriptor, PrepInfoAndDesc(descriptor, graph));
  37 }
  38
  39 ActivationLayer* ActivationLayer::Clone(Graph& graph) const
  40 {
  41     return CloneBase<ActivationLayer>(graph, m_Param, GetName());
  42 }
  43
  44 void ActivationLayer::ValidateTensorShapesFromInputs()
  45 {
  46     auto& info = GetInputSlot(0).GetConnection()->GetTensorInfo();
  47     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(info.GetShape()),
  48                      "ActivationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
  49 }
  50
  51 AdditionLayer::AdditionLayer(const char* name)
  52     : Layer(2, 1, LayerType::Addition, name)
  53 {
  54 }
  55
  56 std::unique_ptr<IWorkload> AdditionLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
  57 {
  58     AdditionQueueDescriptor descriptor;
  59     return factory.CreateAddition(descriptor, PrepInfoAndDesc(descriptor, graph));
  60 }
  61
  62 AdditionLayer* AdditionLayer::Clone(Graph& graph) const
  63 {
  64     return CloneBase<AdditionLayer>(graph, GetName());
  65 }
  66
  67 void AdditionLayer::ValidateTensorShapesFromInputs()
  68 {
  69     auto& input0 = GetInputSlot(0).GetConnection()->GetTensorInfo();
  70     auto& input1 = GetInputSlot(1).GetConnection()->GetTensorInfo();
  71
  72     // Get the max of the inputs
  73     BOOST_ASSERT(input0.GetNumDimensions() == input1.GetNumDimensions());
  74     unsigned int numDims = input0.GetNumDimensions();
  75     std::vector<unsigned int> dims(numDims);
  76
  77     // validate inputs are broadcast compatible
  78 #if !NDEBUG
  79     for (unsigned int i = 0; i < numDims; i++)
  80     {
  81         unsigned int dim0 = input0.GetShape()[i];
  82         unsigned int dim1 = input1.GetShape()[i];
  83         if (dim0 != dim1)
  84         {
  85             BOOST_ASSERT_MSG(dim0 == 1 || dim1 == 1, "Dimensions should either match or one should be one length");
  86         }
  87     }
  88 #endif
  89
  90
  91     for (unsigned int i = 0; i < numDims; i++)
  92     {
  93         unsigned int dim0 = input0.GetShape()[i];
  94         unsigned int dim1 = input1.GetShape()[i];
  95         dims[i] = std::max(dim0, dim1);
  96     }
  97
  98     TensorShape outShape(numDims, dims.data());
  99     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
 100                      "AdditionLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 101 }
 102
 103 BatchNormalizationLayer::BatchNormalizationLayer(const armnn::BatchNormalizationDescriptor& param, const char* name)
 104     : LayerWithParameters(1, 1, LayerType::BatchNormalization, param, name)
 105 {
 106 }
 107
 108 std::unique_ptr<IWorkload> BatchNormalizationLayer::CreateWorkload(const Graph& graph,
 109                                                                    const IWorkloadFactory& factory) const
 110 {
 111     BatchNormalizationQueueDescriptor descriptor;
 112
 113     descriptor.m_Mean = m_Mean.get();
 114     descriptor.m_Variance = m_Variance.get();
 115     descriptor.m_Beta = m_Beta.get();
 116     descriptor.m_Gamma = m_Gamma.get();
 117     return factory.CreateBatchNormalization(descriptor, PrepInfoAndDesc(descriptor, graph));
 118 }
 119
 120 BatchNormalizationLayer* BatchNormalizationLayer::Clone(Graph& graph) const
 121 {
 122     auto layer = CloneBase<BatchNormalizationLayer>(graph, m_Param, GetName());
 123
 124     layer->m_Mean = m_Mean ? std::make_unique<ScopedCpuTensorHandle>(*m_Mean) : nullptr;
 125     layer->m_Variance = m_Variance ? std::make_unique<ScopedCpuTensorHandle>(*m_Variance) : nullptr;
 126     layer->m_Beta = m_Beta ? std::make_unique<ScopedCpuTensorHandle>(*m_Beta) : nullptr;
 127     layer->m_Gamma = m_Gamma ? std::make_unique<ScopedCpuTensorHandle>(*m_Gamma) : nullptr;
 128
 129     return std::move(layer);
 130 }
 131
 132 void BatchNormalizationLayer::ValidateTensorShapesFromInputs()
 133 {
 134     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
 135                      "BatchNormalizationLayer: InputSlot must be connected to an OutputSlot");
 136     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
 137                      "BatchNormalizationLayer: TensorInfo must be set on connected OutputSlot.");
 138
 139     auto& info = GetInputSlot(0).GetConnection()->GetTensorInfo();
 140     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(info.GetShape()),
 141                      "BatchNormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 142 }
 143
 144 Convolution2dLayer::Convolution2dLayer(const Convolution2dDescriptor& param, const char* name)
 145     : LayerWithParameters(1, 1, LayerType::Convolution2d, param, name)
 146 {
 147 }
 148
 149 std::unique_ptr<IWorkload> Convolution2dLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
 150 {
 151     Convolution2dQueueDescriptor descriptor;
 152
 153     descriptor.m_Weight = m_Weight.get();
 154     if (m_Param.m_BiasEnabled)
 155     {
 156         descriptor.m_Bias = m_Bias.get();
 157     }
 158     return factory.CreateConvolution2d(descriptor, PrepInfoAndDesc(descriptor, graph));
 159 }
 160
 161 Convolution2dLayer* Convolution2dLayer::Clone(Graph& graph) const
 162 {
 163     auto layer = CloneBase<Convolution2dLayer>(graph, m_Param, GetName());
 164     layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
 165
 166     if (layer->m_Param.m_BiasEnabled)
 167     {
 168         layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
 169     }
 170
 171     return std::move(layer);
 172 }
 173
 174 void Convolution2dLayer::ValidateTensorShapesFromInputs()
 175 {
 176     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
 177                      "Convolution2dLayer: InputSlot must be connected to an OutputSlot");
 178     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
 179                      "Convolution2dLayer: TensorInfo must be set on connected OutputSlot.");
 180
 181
 182     IOutputSlot* input = GetInputSlot(0).GetConnection();
 183     const TensorShape& inputShape = input->GetTensorInfo().GetShape();
 184     const TensorShape filterShape = m_Weight->GetTensorInfo().GetShape();
 185
 186     // If we support multiple batch dimensions in the future, then this assert will need to change.
 187     BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Convolutions will always have 4D input.");
 188
 189     unsigned int inWidth = inputShape[3];
 190     unsigned int inHeight = inputShape[2];
 191     unsigned int inBatchSize = inputShape[0];
 192
 193     unsigned int filterWidth = filterShape[3];
 194     unsigned int readWidth = (inWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - (filterWidth);
 195     unsigned int outWidth =  1+(readWidth / m_Param.m_StrideX);
 196
 197     unsigned int filterHeight = filterShape[2];
 198     unsigned int readHeight = (inHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - (filterHeight);
 199     unsigned int outHeight = 1+(readHeight / m_Param.m_StrideY);
 200
 201     unsigned int outChannels = filterShape[0];
 202     unsigned int outBatchSize = inBatchSize;
 203
 204     TensorShape shapeOut({outBatchSize, outChannels, outHeight, outWidth});
 205     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut),
 206                      "Convolution2dLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 207 }
 208
 209
 210 DepthwiseConvolution2dLayer::DepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor& param,
 211                                                          const char* name)
 212     : LayerWithParameters(1, 1, LayerType::DepthwiseConvolution2d, param, name)
 213 {
 214 }
 215
 216 std::unique_ptr<IWorkload> DepthwiseConvolution2dLayer::CreateWorkload(const Graph&                  graph,
 217                                                                        const IWorkloadFactory& factory) const
 218 {
 219     DepthwiseConvolution2dQueueDescriptor descriptor;
 220
 221     descriptor.m_Weight = m_Weight.get();
 222     if (m_Param.m_BiasEnabled)
 223     {
 224         descriptor.m_Bias = m_Bias.get();
 225     }
 226     return factory.CreateDepthwiseConvolution2d(descriptor, PrepInfoAndDesc(descriptor, graph));
 227 }
 228
 229 DepthwiseConvolution2dLayer* DepthwiseConvolution2dLayer::Clone(Graph& graph) const
 230 {
 231     auto layer      = CloneBase<DepthwiseConvolution2dLayer>(graph, m_Param, GetName());
 232     layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
 233
 234     if (layer->m_Param.m_BiasEnabled)
 235     {
 236         layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
 237     }
 238
 239     return std::move(layer);
 240 }
 241
 242 void DepthwiseConvolution2dLayer::ValidateTensorShapesFromInputs()
 243 {
 244     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
 245                      "DepthwiseConvolution2dLayer: InputSlot must be connected to an OutputSlot");
 246     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
 247                      "DepthwiseConvolution2dLayer: TensorInfo must be set on connected OutputSlot.");
 248
 249     IOutputSlot* input = GetInputSlot(0).GetConnection();
 250     const TensorShape& inputShape = input->GetTensorInfo().GetShape();
 251     const TensorShape filterShape = m_Weight->GetTensorInfo().GetShape();
 252
 253     BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Convolutions will always have 4D input.");
 254
 255     unsigned int inWidth = inputShape[3];
 256     unsigned int inHeight = inputShape[2];
 257     unsigned int inBatchSize = inputShape[0];
 258
 259     unsigned int filterWidth = filterShape[3];
 260     unsigned int readWidth = (inWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - (filterWidth);
 261     unsigned int outWidth =  1+(readWidth / m_Param.m_StrideX);
 262
 263     unsigned int filterHeight = filterShape[2];
 264     unsigned int readHeight = (inHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - (filterHeight);
 265     unsigned int outHeight = 1+(readHeight / m_Param.m_StrideY);
 266     unsigned int depthMultiplier = filterShape[0];
 267
 268     unsigned int outChannels = filterShape[1]*depthMultiplier;
 269     unsigned int outBatchSize = inBatchSize;
 270
 271     TensorShape outShape({outBatchSize, outChannels, outHeight, outWidth});
 272     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
 273                      "DepthwiseConvolution2dLayer: "
 274                          "TensorShape set on OutputSlot[0] does not match the inferred shape.");
 275 }
 276
 277 FakeQuantizationLayer::FakeQuantizationLayer(const FakeQuantizationDescriptor& param, const char* name)
 278 : LayerWithParameters(1, 1, LayerType::FakeQuantization, param, name)
 279 {
 280 }
 281
 282 std::unique_ptr<IWorkload> FakeQuantizationLayer::CreateWorkload(const Graph& graph,
 283                                                                 const IWorkloadFactory& factory) const
 284 {
 285     FakeQuantizationQueueDescriptor descriptor;
 286     return factory.CreateFakeQuantization(descriptor, PrepInfoAndDesc(descriptor, graph) );
 287 }
 288
 289 FakeQuantizationLayer* FakeQuantizationLayer::Clone(Graph& graph) const
 290 {
 291     return CloneBase<FakeQuantizationLayer>(graph, m_Param, GetName());
 292 }
 293
 294 void FakeQuantizationLayer::ValidateTensorShapesFromInputs()
 295 {
 296     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
 297                      "FakeQuantizationLayer: InputSlot must be connected to an OutputSlot");
 298     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
 299                      "FakeQuantizationLayer: TensorInfo must be set on connected OutputSlot.");
 300
 301
 302     IOutputSlot* input = GetInputSlot(0).GetConnection();
 303
 304     // input and output shapes are the same
 305     TensorShape const& outShape = input->GetTensorInfo().GetShape();
 306     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
 307                      "FakeQuantizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 308 }
 309
 310 FloorLayer::FloorLayer(const char* name)
 311  : Layer(1, 1, LayerType::Floor, name)
 312 {
 313 }
 314
 315 std::unique_ptr<IWorkload> FloorLayer::CreateWorkload(const Graph& graph,
 316     const IWorkloadFactory& factory) const
 317 {
 318     FloorQueueDescriptor descriptor;
 319     return factory.CreateFloor(descriptor, PrepInfoAndDesc(descriptor, graph));
 320 }
 321
 322 FloorLayer* FloorLayer::Clone(Graph& graph) const
 323 {
 324     return CloneBase<FloorLayer>(graph, GetName());
 325 }
 326
 327 void FloorLayer::ValidateTensorShapesFromInputs()
 328 {
 329     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
 330                      "FloorLayer: InputSlot must be connected to an OutputSlot");
 331     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
 332                      "FloorLayer: TensorInfo must be set on connected OutputSlot.");
 333
 334     // input and output shapes are the same
 335     IOutputSlot* input = GetInputSlot(0).GetConnection();
 336     TensorShape const& outShape = input->GetTensorInfo().GetShape();
 337     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
 338                      "FloorLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 339 }
 340
 341 FullyConnectedLayer::FullyConnectedLayer(const FullyConnectedDescriptor& param, const char* name)
 342     : LayerWithParameters(1, 1, LayerType::FullyConnected, param, name)
 343 {
 344 }
 345
 346 std::unique_ptr<IWorkload> FullyConnectedLayer::CreateWorkload(const Graph& graph,
 347                                                                const IWorkloadFactory& factory) const
 348 {
 349     FullyConnectedQueueDescriptor descriptor;
 350
 351     descriptor.m_Weight = m_Weight.get();
 352     if (m_Param.m_BiasEnabled)
 353     {
 354         descriptor.m_Bias = m_Bias.get();
 355     }
 356     return factory.CreateFullyConnected(descriptor, PrepInfoAndDesc(descriptor, graph));
 357 }
 358
 359 FullyConnectedLayer* FullyConnectedLayer::Clone(Graph& graph) const
 360 {
 361     auto layer = CloneBase<FullyConnectedLayer>(graph, m_Param, GetName());
 362
 363     layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr;
 364     if (layer->m_Param.m_BiasEnabled)
 365     {
 366         layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr;
 367     }
 368
 369     return std::move(layer);
 370 }
 371
 372 void FullyConnectedLayer::ValidateTensorShapesFromInputs()
 373 {
 374     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
 375                      "FullyConnectedLayer: InputSlot must be connected to an OutputSlot");
 376     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
 377                      "FullyConnectedLayer: TensorInfo must be set on connected OutputSlot.");
 378
 379
 380     TensorShape const& weightShape = m_Weight->GetTensorInfo().GetShape();
 381
 382     // output for FC is [1, w[1]]
 383     unsigned int batches = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape()[0];
 384     unsigned int dimIdx = m_Param.m_TransposeWeightMatrix ? 0 : 1;
 385     TensorShape outShape({batches, weightShape[dimIdx]});
 386
 387     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
 388                      "FullyConnectedLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 389 }
 390
 391 InputLayer::InputLayer(LayerBindingId id, const char* name)
 392     : BindableLayer(0, 1, LayerType::Input, name, id)
 393 {
 394 }
 395
 396 std::unique_ptr<IWorkload> InputLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
 397 {
 398     return nullptr;
 399 }
 400
 401 InputLayer* InputLayer::Clone(Graph& graph) const
 402 {
 403     return CloneBase<InputLayer>(graph, GetBindingId(), GetName());
 404 }
 405
 406 void InputLayer::ValidateTensorShapesFromInputs()
 407 {
 408     //The input layer should already have it's inputs set during graph building phase in the driver/parser.
 409     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).IsTensorInfoSet(),
 410                                                "InputLayer should already have the TensorInfo set.");
 411 }
 412
 413
 414 MergerLayer::MergerLayer(const OriginsDescriptor& param, const char* name)
 415     : LayerWithParameters(param.GetNumViews(), 1, LayerType::Merger, param, name)
 416 {
 417 }
 418
 419 std::unique_ptr<IWorkload> MergerLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
 420 {
 421     MergerQueueDescriptor descriptor;
 422
 423     // copy the view origins to the descriptor
 424     descriptor.m_ViewOrigins.reserve(m_Param.GetNumViews());
 425     for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
 426     {
 427         descriptor.m_ViewOrigins.emplace_back(
 428             std::vector<unsigned int>(m_Param.GetViewOrigin(i), m_Param.GetViewOrigin(i) + m_Param.GetNumDimensions()));
 429     }
 430
 431     return factory.CreateMerger(descriptor, PrepInfoAndDesc(descriptor, graph));
 432 }
 433
 434 void MergerLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory)
 435 {
 436     //if sub tensors are supported than the merger
 437     //just needs to make sure that the outputs of the prev layer
 438     //are made subtensors of the output of the merger layer
 439     m_OutputHandlers[0].CreateTensorHandles(factory);
 440     if (factory.SupportsSubTensors())
 441     {
 442         const unsigned int numInputSlots = GetNumInputSlots();
 443         for (unsigned int i = 0; i < numInputSlots; ++i)
 444         {
 445             OutputHandler& outputHandler = GetInputSlot(i).GetConnectedOutputSlot()->GetOutputHandler();
 446
 447             outputHandler.SetData(factory.CreateSubTensorHandle(*m_OutputHandlers[0].GetData(),
 448                                                                 outputHandler.GetTensorInfo().GetShape(),
 449                                                                 m_Param.GetViewOrigin(i)));
 450         }
 451     }
 452 }
 453
 454 MergerLayer* MergerLayer::Clone(Graph& graph) const
 455 {
 456     return CloneBase<MergerLayer>(graph, m_Param, GetName());
 457 }
 458
 459 void MergerLayer::ValidateTensorShapesFromInputs()
 460 {
 461     // Validate Merger layer
 462     ConditionalThrow<LayerValidationException>(m_Param.GetNumViews() == GetNumInputSlots(),
 463                      "MergerLayer: Num Inputs must match num views.");
 464
 465     unsigned int numDims = m_Param.GetNumDimensions();
 466     for (unsigned int i=0; i<GetNumInputSlots(); i++)
 467     {
 468         auto& inputInfo = GetInputSlot(i).GetConnection()->GetTensorInfo();
 469
 470         boost::ignore_unused(inputInfo);
 471         ConditionalThrow<LayerValidationException>(numDims == inputInfo.GetNumDimensions(),
 472                          "MergerLayer: Num Dimensions must match all inputs.");
 473     }
 474
 475     // Find the bounding box (extents) of all the views
 476     std::vector<unsigned int> extentMin(numDims);
 477     std::vector<unsigned int> extentMax(numDims);
 478     for (unsigned int i = 0; i < GetNumInputSlots(); i++)
 479     {
 480         const uint32_t* origin = m_Param.GetViewOrigin(i);
 481         const armnn::TensorShape& shape = GetInputSlot(i).GetConnection()->GetTensorInfo().GetShape();
 482         for (unsigned int d = 0; d < numDims; d++)
 483         {
 484             extentMin[d] = std::min(extentMin[d], origin[d]);
 485             extentMax[d] = std::max(extentMax[d], origin[d] + shape[d]);
 486         }
 487     }
 488
 489     // Check that the bounding box starts at the origin
 490     if (!std::all_of(extentMin.begin(), extentMin.end(), [](unsigned int s) { return s == 0; }))
 491     {
 492         throw LayerValidationException("MergerLayer: there is no view that starts at the origin");
 493     }
 494
 495     // Check that there are no overlaps of views (this would lead to undefined output at those locations).
 496     // Check each pair of views against each other
 497     // (and don't bother to check against self, or check the same pair both ways round)
 498     for (unsigned int a = 0; a < GetNumInputSlots(); a++)
 499     {
 500         const uint32_t* aOrigin = m_Param.GetViewOrigin(a);
 501         const armnn::TensorShape& aShape = GetInputSlot(a).GetConnection()->GetTensorInfo().GetShape();
 502         for (unsigned int b = 0; b < a; b++)
 503         {
 504             const uint32_t* bOrigin = m_Param.GetViewOrigin(b);
 505             const armnn::TensorShape& bShape = GetInputSlot(b).GetConnection()->GetTensorInfo().GetShape();
 506
 507             bool allAxesOverlap = true;
 508             for (unsigned int d = 0; d < numDims && allAxesOverlap; d++)
 509             {
 510                 unsigned int a1 = aOrigin[d];
 511                 unsigned int a2 = aOrigin[d] + aShape[d];
 512
 513                 unsigned int b1 = bOrigin[d];
 514                 unsigned int b2 = bOrigin[d] + bShape[d];
 515
 516                 if (a2 <= b1 || b2 <= a1)
 517                 {
 518                     allAxesOverlap = false;
 519                 }
 520             }
 521             if (allAxesOverlap)
 522             {
 523                 throw LayerValidationException("MergerLayer: Some views overlap.");
 524             }
 525         }
 526     }
 527
 528     // Check that there are no "holes", i.e. regions of the output which is not covered by a view.
 529     // Because we already checked that there are no overlaps, this can be done simply by checking that
 530     // the total 'volume' of the views is the same as the output.
 531     unsigned int totalViewsVolume = 0;
 532     for (unsigned int i = 0; i < GetNumInputSlots(); i++)
 533     {
 534         totalViewsVolume += GetInputSlot(i).GetConnection()->GetTensorInfo().GetNumElements();
 535     }
 536     unsigned int outputVolume = 1;
 537     for (unsigned int d = 0; d < numDims; d++)
 538     {
 539         outputVolume *= (extentMax[d] - extentMin[d]);
 540     }
 541     if (totalViewsVolume != outputVolume)
 542     {
 543         throw LayerValidationException("MergerLayer: there are some gaps between views");
 544     }
 545
 546     TensorShape outShape(numDims, extentMax.data());
 547     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
 548                      "MergerLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 549 }
 550
 551 MultiplicationLayer::MultiplicationLayer(const char* name)
 552     : Layer(2, 1, LayerType::Multiplication, name)
 553 {
 554 }
 555
 556 std::unique_ptr<IWorkload> MultiplicationLayer::CreateWorkload(const Graph&            graph,
 557                                                                const IWorkloadFactory& factory) const
 558 {
 559     MultiplicationQueueDescriptor descriptor;
 560
 561     return factory.CreateMultiplication(descriptor, PrepInfoAndDesc(descriptor, graph));
 562 }
 563
 564 MultiplicationLayer* MultiplicationLayer::Clone(Graph& graph) const
 565 {
 566     return CloneBase<MultiplicationLayer>(graph, GetName());
 567 }
 568
 569 void MultiplicationLayer::ValidateTensorShapesFromInputs()
 570 {
 571     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() ==
 572                      GetInputSlot(1).GetConnection()->GetTensorInfo().GetShape(),
 573                      "MultiplicationLayer: Inputs must match");
 574
 575     TensorInfo infoOut(GetInputSlot(0).GetConnection()->GetTensorInfo());
 576     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(infoOut.GetShape()),
 577                      "MultiplicationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 578 }
 579
 580 NormalizationLayer::NormalizationLayer(const NormalizationDescriptor& param, const char* name)
 581     : LayerWithParameters(1, 1, LayerType::Normalization, param, name)
 582 {
 583 }
 584
 585 std::unique_ptr<IWorkload> NormalizationLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
 586 {
 587     NormalizationQueueDescriptor descriptor;
 588     return factory.CreateNormalization(descriptor, PrepInfoAndDesc(descriptor, graph));
 589 }
 590
 591 NormalizationLayer* NormalizationLayer::Clone(Graph& graph) const
 592 {
 593     return CloneBase<NormalizationLayer>(graph, m_Param, GetName());
 594 }
 595
 596 void NormalizationLayer::ValidateTensorShapesFromInputs()
 597 {
 598     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
 599                                                "NormalizationLayer: Input slot must be connected.");
 600
 601     const TensorShape& outShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape();
 602     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
 603                      "NormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 604 }
 605
 606 OutputLayer::OutputLayer(LayerBindingId id, const char* name)
 607     : BindableLayer(1, 0, LayerType::Output, name, id)
 608 {
 609 }
 610
 611 std::unique_ptr<IWorkload> OutputLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
 612 {
 613     return nullptr;
 614 }
 615
 616 OutputLayer* OutputLayer::Clone(Graph& graph) const
 617 {
 618     return CloneBase<OutputLayer>(graph, GetBindingId(), GetName());
 619 }
 620
 621 void OutputLayer::ValidateTensorShapesFromInputs()
 622 {
 623     // Just validate the input is connected
 624     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
 625                                                "OutputLayer: Input slot must be connected.");
 626 }
 627
 628 PermuteLayer::PermuteLayer(const PermuteDescriptor& param, const char* name)
 629     : LayerWithParameters(1, 1, LayerType::Permute, param, name)
 630 {
 631 }
 632
 633 std::unique_ptr<IWorkload> PermuteLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
 634 {
 635     PermuteQueueDescriptor descriptor;
 636     return factory.CreatePermute(descriptor, PrepInfoAndDesc(descriptor, graph));
 637 }
 638
 639 PermuteLayer* PermuteLayer::Clone(Graph& graph) const
 640 {
 641     return CloneBase<PermuteLayer>(graph, m_Param, GetName());
 642 }
 643
 644 void PermuteLayer::ValidateTensorShapesFromInputs()
 645 {
 646     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
 647                      "PermuteLayer: InputSlot must be connected to an OutputSlot");
 648     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
 649                      "PermuteLayer: TensorInfo must be set on connected InputSlot.");
 650
 651     const TensorInfo& infoIn = GetInputSlot(0).GetConnection()->GetTensorInfo();
 652     TensorShape shapeOut = armnnUtils::Permuted(infoIn.GetShape(), m_Param.m_DimMappings);
 653     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut),
 654                      "PermuteLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 655 }
 656
 657 Pooling2dLayer::Pooling2dLayer(const Pooling2dDescriptor& param, const char* name)
 658     : LayerWithParameters(1, 1, LayerType::Pooling2d, param, name)
 659 {
 660 }
 661
 662 std::unique_ptr<IWorkload> Pooling2dLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
 663 {
 664     Pooling2dQueueDescriptor descriptor;
 665     return factory.CreatePooling2d(descriptor, PrepInfoAndDesc(descriptor, graph));
 666 }
 667
 668 Pooling2dLayer* Pooling2dLayer::Clone(Graph& graph) const
 669 {
 670     return CloneBase<Pooling2dLayer>(graph, m_Param, GetName());
 671 }
 672
 673 void Pooling2dLayer::ValidateTensorShapesFromInputs()
 674 {
 675     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
 676                      "Pooling2dLayer: InputSlot must be connected to an OutputSlot");
 677     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
 678                      "Pooling2dLayer: TensorInfo must be set on connected InputSlot.");
 679
 680     IOutputSlot* input = GetInputSlot(0).GetConnection();
 681     const TensorShape& inputShape = input->GetTensorInfo().GetShape();
 682
 683     // If we support multiple batch dimensions in the future, then this assert will need to change.
 684     BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Pooling2dLayer will always have 4D input.");
 685
 686
 687     unsigned int inWidth = inputShape[3];
 688     unsigned int inHeight = inputShape[2];
 689     unsigned int inChannels = inputShape[1];
 690     unsigned int inBatchSize = inputShape[0];
 691
 692     bool isGlobalPooling = (m_Param.m_StrideX==0 && m_Param.m_StrideY==0);
 693     unsigned int outWidth = 1;
 694     unsigned int outHeight = 1;
 695     if (!isGlobalPooling)
 696     {
 697         BOOST_ASSERT_MSG(m_Param.m_StrideX!=0 && m_Param.m_StrideY!=0,
 698                          "Stride can only be zero when performing global pooling");
 699
 700         auto CalcSize = [](auto inSize, auto lowPad, auto highPad, auto poolSize, auto stride, auto padMethod,
 701                            auto outputShapeRounding)
 702             {
 703                 unsigned int readSize = inSize + lowPad + highPad - poolSize;
 704                 float div = static_cast<float>(readSize) / static_cast<float>(stride);
 705
 706                 unsigned int size = 0;
 707                 switch (outputShapeRounding)
 708                 {
 709                     case OutputShapeRounding::Ceiling:
 710                         size = static_cast<unsigned int>(ceil(div)) + 1;
 711                         break;
 712                     case OutputShapeRounding ::Floor:
 713                         size = static_cast<unsigned int>(floor(div)) + 1;
 714                         break;
 715                     default:
 716                         BOOST_ASSERT_MSG(false, "Unsupported Output Shape Rounding");
 717                 }
 718
 719                 // Make sure that border operations will start from inside the input and not the padded area
 720                 // This is what both Caffe and CL does...
 721                 if ((size - 1)*stride >= inSize + lowPad)
 722                 {
 723                     --size;
 724                 }
 725
 726                 return size;
 727             };
 728
 729         outWidth = CalcSize(inWidth, m_Param.m_PadLeft, m_Param.m_PadRight, m_Param.m_PoolWidth, m_Param.m_StrideX,
 730                             m_Param.m_PaddingMethod, m_Param.m_OutputShapeRounding);
 731         outHeight= CalcSize(inHeight, m_Param.m_PadTop, m_Param.m_PadBottom, m_Param.m_PoolHeight, m_Param.m_StrideY,
 732                             m_Param.m_PaddingMethod, m_Param.m_OutputShapeRounding);
 733
 734
 735     }
 736     unsigned int outChannels = inChannels;
 737     unsigned int outBatchSize = inBatchSize;
 738
 739     TensorShape shapeOut({outBatchSize, outChannels, outHeight, outWidth});
 740
 741     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut),
 742                "Pooling2dLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 743 }
 744
 745 SoftmaxLayer::SoftmaxLayer(const SoftmaxDescriptor &param, const char* name)
 746     : LayerWithParameters(1, 1, LayerType::Softmax, param, name)
 747 {
 748 }
 749
 750 std::unique_ptr<IWorkload> SoftmaxLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
 751 {
 752     SoftmaxQueueDescriptor descriptor;
 753     return factory.CreateSoftmax(descriptor, PrepInfoAndDesc(descriptor, graph));
 754 }
 755
 756 SoftmaxLayer* SoftmaxLayer::Clone(Graph& graph) const
 757 {
 758     return CloneBase<SoftmaxLayer>(graph, m_Param, GetName());
 759 }
 760
 761 void SoftmaxLayer::ValidateTensorShapesFromInputs()
 762 {
 763     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
 764                                                "SoftmaxLayer: Input slot must be connected.");
 765     const TensorShape& outShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape();
 766     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
 767                      "SoftmaxLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 768 }
 769
 770 SplitterLayer::SplitterLayer(const ViewsDescriptor& param, const char* name)
 771     : LayerWithParameters(1, param.GetNumViews(), LayerType::Splitter, param, name)
 772 {
 773 }
 774
 775 std::unique_ptr<IWorkload> SplitterLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
 776 {
 777     SplitterQueueDescriptor descriptor;
 778
 779     // copy the window origins to the descriptor
 780     for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
 781     {
 782         descriptor.m_ViewOrigins.emplace_back(
 783             std::vector<unsigned int>(m_Param.GetViewOrigin(i), m_Param.GetViewOrigin(i) + m_Param.GetNumDimensions()));
 784     }
 785
 786     return factory.CreateSplitter(descriptor, PrepInfoAndDesc(descriptor, graph));
 787 }
 788
 789 void SplitterLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory)
 790 {
 791     //if sub tensors are supported than all the "splitter" need to do is to
 792     //set the outputs to be appropriate sub tensors of the input.
 793     if (factory.SupportsSubTensors())
 794     {
 795         const OutputHandler& outputHandler = GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
 796
 797         ITensorHandle* inputData = outputHandler.GetData();
 798         //create the outputs as subtensors of the input
 799         for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
 800         {
 801             m_OutputHandlers[i].SetData(factory.CreateSubTensorHandle(*inputData,
 802                                                                       m_OutputHandlers[i].GetTensorInfo().GetShape(),
 803                                                                       m_Param.GetViewOrigin(i)));
 804         }
 805     }
 806     else
 807     {
 808         for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i)
 809         {
 810             m_OutputHandlers[i].CreateTensorHandles(factory);
 811         }
 812     }
 813 }
 814
 815 SplitterLayer* SplitterLayer::Clone(Graph& graph) const
 816 {
 817     return CloneBase<SplitterLayer>(graph, m_Param, GetName());
 818 }
 819
 820 void SplitterLayer::ValidateTensorShapesFromInputs()
 821 {
 822     //Output shapes must match View shapes.
 823     for (unsigned int viewIdx = 0; viewIdx < m_Param.GetNumViews(); viewIdx++)
 824     {
 825         const uint32_t* sizes = m_Param.GetViewSizes(viewIdx);
 826
 827         TensorShape outShape(m_Param.GetNumDimensions(), sizes);
 828         ConditionalThrow<LayerValidationException>(GetOutputSlot(viewIdx).ValidateTensorShape(outShape),
 829                          "SplitterLayer: View sizes must match output tensor shapes.");
 830     }
 831 }
 832
 833 MemCopyLayer::MemCopyLayer(const char* name)
 834     : Layer(1, 1, LayerType::MemCopy, name)
 835 {
 836 }
 837
 838 MemCopyLayer* MemCopyLayer::Clone(Graph& graph) const
 839 {
 840     return CloneBase<MemCopyLayer>(graph, GetName());
 841 }
 842
 843 std::unique_ptr<IWorkload> MemCopyLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
 844 {
 845     MemCopyQueueDescriptor descriptor;
 846     return factory.CreateMemCopy(descriptor, PrepInfoAndDesc(descriptor, graph));
 847 }
 848
 849 void MemCopyLayer::ValidateTensorShapesFromInputs()
 850 {
 851     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
 852                      "MemCopyLayer: InputSlot must be connected to an OutputSlot");
 853     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
 854                      "MemCopyLayer: TensorInfo must be set on connected OutputSlot.");
 855
 856
 857     IOutputSlot* input = GetInputSlot(0).GetConnection();
 858
 859     // input and output shapes are the same
 860     TensorShape const& outShape = input->GetTensorInfo().GetShape();
 861     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
 862                      "MemCopyLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 863 }
 864
 865 ResizeBilinearLayer::ResizeBilinearLayer(const ResizeBilinearDescriptor& param, const char* name)
 866     : LayerWithParameters(1, 1, LayerType::ResizeBilinear, param, name)
 867 {
 868 }
 869
 870 std::unique_ptr<IWorkload> ResizeBilinearLayer::CreateWorkload(const Graph& graph,
 871                                                                const IWorkloadFactory& factory) const
 872 {
 873     ResizeBilinearQueueDescriptor descriptor;
 874     return factory.CreateResizeBilinear(descriptor, PrepInfoAndDesc(descriptor, graph));
 875 }
 876
 877 ResizeBilinearLayer* ResizeBilinearLayer::Clone(Graph& graph) const
 878 {
 879     return CloneBase<ResizeBilinearLayer>(graph, m_Param, GetName());
 880 }
 881
 882 void ResizeBilinearLayer::ValidateTensorShapesFromInputs()
 883 {
 884     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
 885                      "MemCopyLayer: InputSlot must be connected to an OutputSlot");
 886     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
 887                      "MemCopyLayer: TensorInfo must be set on connected OutputSlot.");
 888
 889     const TensorShape& inputShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape();
 890     unsigned int outWidth = m_Param.m_TargetWidth;
 891     unsigned int outHeight = m_Param.m_TargetHeight;
 892     unsigned int outChannels = inputShape[1];
 893     unsigned int outBatch = inputShape[0];
 894     TensorShape outShape({outBatch, outChannels, outHeight, outWidth});
 895     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
 896                      "ResizeBilinearLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 897 }
 898
 899 L2NormalizationLayer::L2NormalizationLayer(const char* name)
 900     : Layer(1, 1, LayerType::L2Normalization, name)
 901 {
 902 }
 903
 904 std::unique_ptr<IWorkload> L2NormalizationLayer::CreateWorkload(const Graph& graph,
 905     const IWorkloadFactory& factory) const
 906 {
 907     L2NormalizationQueueDescriptor descriptor;
 908     return factory.CreateL2Normalization(descriptor, PrepInfoAndDesc(descriptor, graph));
 909 }
 910
 911 L2NormalizationLayer* L2NormalizationLayer::Clone(Graph& graph) const
 912 {
 913     return CloneBase<L2NormalizationLayer>(graph, GetName());
 914 }
 915
 916 void L2NormalizationLayer::ValidateTensorShapesFromInputs()
 917 {
 918     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
 919                      "L2NormalizationLayer: InputSlot must be connected to an OutputSlot");
 920     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
 921                      "L2NormalizationLayer: TensorInfo must be set on connected OutputSlot.");
 922
 923     IOutputSlot* input = GetInputSlot(0).GetConnection();
 924
 925     // input and output shapes are the same
 926     TensorShape const& outShape = input->GetTensorInfo().GetShape();
 927     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
 928                      "L2NormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 929 }
 930
 931 ConstantLayer::ConstantLayer(const std::shared_ptr<ScopedCpuTensorHandle>& input, const char* name)
 932     : Layer(0, 1, LayerType::Constant, name)
 933     , m_LayerOutput(input)
 934 {
 935 }
 936
 937 std::unique_ptr<IWorkload> ConstantLayer::CreateWorkload(const Graph& graph,
 938     const IWorkloadFactory& factory) const
 939 {
 940     ConstantQueueDescriptor descriptor;
 941     descriptor.m_LayerOutput = m_LayerOutput.get();
 942     return factory.CreateConstant(descriptor, PrepInfoAndDesc(descriptor, graph));
 943 }
 944
 945 ConstantLayer* ConstantLayer::Clone(Graph& graph) const
 946 {
 947     // Cloned layers share the same layer output object
 948     return CloneBase<ConstantLayer>(graph, m_LayerOutput, GetName());
 949 }
 950
 951 void ConstantLayer::ValidateTensorShapesFromInputs()
 952 {
 953     // get the output shape from the value of the constant layer
 954     TensorShape const& outShape = m_LayerOutput->GetTensorInfo().GetShape();
 955     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
 956                      "ConstantLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 957 }
 958
 959 ReshapeLayer::ReshapeLayer(const ReshapeDescriptor& param, const char* name)
 960     : LayerWithParameters(1, 1, LayerType::Reshape, param, name)
 961 {
 962 }
 963
 964 std::unique_ptr<IWorkload> ReshapeLayer::CreateWorkload(const Graph& graph,
 965     const IWorkloadFactory& factory) const
 966 {
 967     ReshapeQueueDescriptor descriptor;
 968     return factory.CreateReshape(descriptor, PrepInfoAndDesc(descriptor, graph));
 969 }
 970
 971 ReshapeLayer* ReshapeLayer::Clone(Graph& graph) const
 972 {
 973     return CloneBase<ReshapeLayer>(graph, m_Param, GetName());
 974 }
 975
 976 void ReshapeLayer::ValidateTensorShapesFromInputs()
 977 {
 978     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr,
 979                      "ReshapeLayer: InputSlot must be connected to an OutputSlot");
 980     ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(),
 981                      "ReshapeLayer: TensorInfo must be set on connected OutputSlot.");
 982     ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(m_Param.m_TargetShape),
 983                      "ReshapeLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 984 }
 985
 986 }