IVGCVSW-5108 Allow Concat to use subtensor on x and y

author Sadik Armagan <sadik.armagan@arm.com>

Tue, 4 Aug 2020 13:01:05 +0000 (14:01 +0100)

committer KeithARM <keith.davis@arm.com>

Fri, 7 Aug 2020 12:44:19 +0000 (12:44 +0000)
author Sadik Armagan <sadik.armagan@arm.com>
Tue, 4 Aug 2020 13:01:05 +0000 (14:01 +0100)
committer KeithARM <keith.davis@arm.com>
Fri, 7 Aug 2020 12:44:19 +0000 (12:44 +0000)
diff --git a/src/armnn/layers/ConcatLayer.cpp b/src/armnn/layers/ConcatLayer.cpp

index d9fffff..fac6a1f 100644 (file)
--- a/src/armnn/layers/ConcatLayer.cpp
+++ b/src/armnn/layers/ConcatLayer.cpp
@@ -36,7 +36,7 @@ std::unique_ptr<IWorkload> ConcatLayer::CreateWorkload(const IWorkloadFactory& f
  }
  
  template<typename FactoryType>
-void ConcatLayer::CreateTensors(const FactoryType& factory)
+void ConcatLayer::CreateTensors(const TensorHandleFactoryRegistry& registry, const FactoryType& factory)
  {
      //If sub tensors are supported then the concat
      //just needs to make sure that the outputs of the prev layer
@@ -45,6 +45,12 @@ void ConcatLayer::CreateTensors(const FactoryType& factory)
  
      if (factory.SupportsSubTensors())
      {
+        // check if concat is along the x or y (2 innermost dimensions)
+        uint32_t concatAxis = m_Param.GetConcatAxis();
+        auto numberOfDimensions = m_Param.GetNumDimensions();
+        bool isConcatOnXorY = m_Param.GetNumDimensions() >= 3
+                                && ((concatAxis == numberOfDimensions - 1) || (concatAxis == numberOfDimensions - 2));
+
          ITensorHandleFactory::FactoryId factoryId = GetOutputSlot(0).GetTensorHandleFactoryId();
  
          std::queue<ConcatLayer*> m_ConcatLayers;
@@ -59,6 +65,35 @@ void ConcatLayer::CreateTensors(const FactoryType& factory)
  
              const unsigned int numInputSlots = currentLayer->GetNumInputSlots();
  
+            // if concat along x or y (2 innermost dimensions) and the previous layers do not require padding
+            bool canUseSubTensorOnXorY = true;
+            bool isTensorHandleFactory = std::is_same<armnn::ITensorHandleFactory, FactoryType>::value;
+            if (isTensorHandleFactory)
+            {
+                for (unsigned int i = 0; i < numInputSlots; ++i)
+                {
+                    OutputSlot* slot = currentLayer->GetInputSlot(i).GetConnectedOutputSlot();
+                    ITensorHandleFactory* handleFactory  = registry.GetFactory(factoryId);
+                    std::vector<Capability> capabilities =
+                        handleFactory->GetCapabilities(&(slot->GetOwningLayer()),
+                                                       currentLayer,
+                                                       CapabilityClass::PaddingRequired);
+                    if (isConcatOnXorY)
+                    {
+                        canUseSubTensorOnXorY = false;
+                        if (capabilities.empty())
+                        {
+                            canUseSubTensorOnXorY = true;
+                        }
+                    }
+
+                    if (!canUseSubTensorOnXorY)
+                    {
+                        break;
+                    }
+                }
+            }
+
              // First go through all the input slots and verify that we can sub-tensor all the inputs.
              std::vector<std::unique_ptr<ITensorHandle>> subTensors(0);
              subTensors.reserve(numInputSlots);
@@ -74,12 +109,14 @@ void ConcatLayer::CreateTensors(const FactoryType& factory)
                      // 2) the same TensorHandleFactory is used for input and Concat layer output
                      // 3) the input does not come from a Constant layer or input layer
                      // 4) the input is only read by this concat layer
+                    // 5) if concat along x or y (2 innermost dimensions) and the previous layers do not require padding
                      if (slot &&
                          parentInfo.IsTypeSpaceMatch(info) && //(1)
                          factoryId == slot->GetTensorHandleFactoryId() && //(2)
                          slot->GetOwningLayer().GetType() != LayerType::Constant && //(3)
                          slot->GetOwningLayer().GetType() != LayerType::Input && //(3)
-                        slot->GetNumConnections() == 1) //(4)
+                        slot->GetNumConnections() == 1 &&
+                        canUseSubTensorOnXorY) //(5)
                      {
                          return factory.CreateSubTensorHandle(*parentTensor,
                                                               info.GetShape(),
@@ -137,13 +174,13 @@ void ConcatLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& registr
  
      if (factoryId == ITensorHandleFactory::LegacyFactoryId)
      {
-        CreateTensors(workloadFactory);
+        CreateTensors(registry, workloadFactory);
      }
      else
      {
          ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId);
          ARMNN_ASSERT(handleFactory);
-        CreateTensors(*handleFactory);
+        CreateTensors(registry, *handleFactory);
      }
  }
  
diff --git a/src/armnn/layers/ConcatLayer.hpp b/src/armnn/layers/ConcatLayer.hpp

index 84eba2e..eaa5c15 100644 (file)
--- a/src/armnn/layers/ConcatLayer.hpp
+++ b/src/armnn/layers/ConcatLayer.hpp
@@ -56,7 +56,7 @@ protected:
  
  private:
      template <typename FactoryType>
-    void CreateTensors(const FactoryType& factory);
+    void CreateTensors(const TensorHandleFactoryRegistry& registry, const FactoryType& factory);
  
  };
  
diff --git a/src/backends/neon/NeonTensorHandleFactory.cpp b/src/backends/neon/NeonTensorHandleFactory.cpp

index 4e013a3..53d5a04 100644 (file)
--- a/src/backends/neon/NeonTensorHandleFactory.cpp
+++ b/src/backends/neon/NeonTensorHandleFactory.cpp
@@ -34,17 +34,6 @@ std::unique_ptr<ITensorHandle> NeonTensorHandleFactory::CreateSubTensorHandle(IT
  
      const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.GetShape());
  
-    // In order for ACL to support subtensors the concat axis cannot be on x or y and the values of x and y
-    // must match the parent shapes
-    if (coords.x() != 0 || coords.y() != 0)
-    {
-        return nullptr;
-    }
-    if ((parentShape.x() != shape.x()) || (parentShape.y() != shape.y()))
-    {
-        return nullptr;
-    }
-
      if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape))
      {
          return nullptr;
diff --git a/src/backends/neon/test/NeonTensorHandleTests.cpp b/src/backends/neon/test/NeonTensorHandleTests.cpp

index fe5e8f9..8b3e3fd 100644 (file)
--- a/src/backends/neon/test/NeonTensorHandleTests.cpp
+++ b/src/backends/neon/test/NeonTensorHandleTests.cpp
@@ -2,9 +2,17 @@
  // Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
  // SPDX-License-Identifier: MIT
  //
+
+#include <Graph.hpp>
+#include <Network.hpp>
+
  #include <neon/NeonTensorHandle.hpp>
  #include <neon/NeonTensorHandleFactory.hpp>
  
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
+#include <test/GraphUtils.hpp>
+
  #include <boost/test/unit_test.hpp>
  
  BOOST_AUTO_TEST_SUITE(NeonTensorHandleTests)
@@ -77,4 +85,79 @@ BOOST_AUTO_TEST_CASE(NeonTensorHandleGetCapabilitiesPadding)
      BOOST_TEST(capabilities[0].m_Value);
  }
  
+BOOST_AUTO_TEST_CASE(ConcatOnXorYSubTensorsNoPaddinRequiredTest)
+{
+    armnn::INetworkPtr net(armnn::INetwork::Create());
+
+    // Set up tensor infos
+    const armnn::TensorInfo inputInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
+    const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
+    const armnn::TensorInfo outputInfo = armnn::TensorInfo({2, 3, 4, 2}, armnn::DataType::Float32);
+
+    armnn::ElementwiseUnaryDescriptor descriptor(armnn::UnaryOperation::Abs);
+
+    // Create the network
+    armnn::IConnectableLayer* const input0Layer = net->AddInputLayer(0, "input_0");
+    input0Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
+    armnn::IConnectableLayer* elementwiseUnaryLayer0 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseUnary_0");
+    elementwiseUnaryLayer0->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
+    input0Layer->GetOutputSlot(0).Connect(elementwiseUnaryLayer0->GetInputSlot(0));
+
+    armnn::IConnectableLayer* const input1Layer = net->AddInputLayer(1, "input_1");
+    input1Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
+    armnn::IConnectableLayer* elementwiseUnaryLayer1 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseUnary_1");
+    elementwiseUnaryLayer1->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
+    input1Layer->GetOutputSlot(0).Connect(elementwiseUnaryLayer1->GetInputSlot(0));
+
+    std::array<armnn::TensorShape, 2> concatInputShapes = { intermediateInfo.GetShape(), intermediateInfo.GetShape() };
+    armnn::IConnectableLayer* const concatLayer = net->AddConcatLayer(armnn::CreateDescriptorForConcatenation(
+        concatInputShapes.begin(), concatInputShapes.end(), 2), "concatenation");
+    concatLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
+    elementwiseUnaryLayer0->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(0));
+    elementwiseUnaryLayer1->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(1));
+
+    armnn::IConnectableLayer* const outputLayer = net->AddOutputLayer(0, "output");
+    concatLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
+
+    const armnn::Graph& theGraph = static_cast<armnn::OptimizedNetwork*>(optimizedNet.get())->GetGraph();
+
+    // Load graph into runtime
+    armnn::NetworkId networkIdentifier;
+    runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
+
+    // now check the concat how many sub-tensors it is using..
+    auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
+    {
+        if (subTensorHandle && subTensorHandle->GetParent())
+        {
+            return true;
+        }
+        return false;
+    };
+
+    for (auto&& layer : theGraph)
+    {
+        if(layer->GetType() == armnn::LayerType::Concat)
+        {
+            unsigned int numberOfSubTensors = 0;
+            for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
+            {
+                const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
+                if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
+                {
+                    ++numberOfSubTensors;
+                }
+            }
+            // sub-tensors should be supported in this configuration
+            BOOST_CHECK(numberOfSubTensors > 0);
+        }
+    }
+}
+
  BOOST_AUTO_TEST_SUITE_END()
author	Sadik Armagan <sadik.armagan@arm.com>
	Tue, 4 Aug 2020 13:01:05 +0000 (14:01 +0100)
committer	KeithARM <keith.davis@arm.com>
	Fri, 7 Aug 2020 12:44:19 +0000 (12:44 +0000)
src/armnn/layers/ConcatLayer.cpp		patch \| blob \| history
src/armnn/layers/ConcatLayer.hpp		patch \| blob \| history
src/backends/neon/NeonTensorHandleFactory.cpp		patch \| blob \| history
src/backends/neon/test/NeonTensorHandleTests.cpp		patch \| blob \| history