return splitAxis;
}
+/// Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-dim, dim)
+inline int ComputeAclAxis(const int& armnnAxis, const armnn::TensorInfo& tensor)
+{
+ int dim = static_cast<int>(tensor.GetNumDimensions());
+
+ ARMNN_ASSERT(dim != 0);
+ ARMNN_ASSERT((-1 * dim) <= armnnAxis);
+ ARMNN_ASSERT(armnnAxis < dim);
+
+ int sign = (armnnAxis < 0) ? -1 : 1;
+ int aclAxis = sign * dim - 1 - armnnAxis;
+
+ return aclAxis;
+}
+
} // namespace armnn
#include "workloads/NeonNegWorkload.hpp"
#include "workloads/NeonNormalizationFloatWorkload.hpp"
#include "workloads/NeonFullyConnectedWorkload.hpp"
+#include "workloads/NeonGatherWorkload.hpp"
#include "workloads/NeonPadWorkload.hpp"
#include "workloads/NeonPermuteWorkload.hpp"
#include "workloads/NeonPooling2dWorkload.hpp"
descriptor);
}
+bool NeonLayerSupport::IsGatherSupported(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ Optional<std::string&> reasonIfUnsupported) const
+{
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonGatherWorkloadValidate,
+ reasonIfUnsupported,
+ input0,
+ input1,
+ output);
+}
+
bool NeonLayerSupport::IsGreaterSupported(const armnn::TensorInfo& input0,
const armnn::TensorInfo& input1,
const armnn::TensorInfo& output,
const FullyConnectedDescriptor& descriptor,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+ bool IsGatherSupported(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ Optional<std::string&> reasonIfUnsupported) const override;
+
ARMNN_DEPRECATED_MSG("Use IsComparisonSupported instead")
bool IsGreaterSupported(const TensorInfo& input0,
const TensorInfo& input1,
std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateGather(const armnn::GatherQueueDescriptor& descriptor,
const armnn::WorkloadInfo& info) const
{
- return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
+ return std::make_unique<NeonGatherWorkload>(descriptor, info);
}
std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateGreater(const GreaterQueueDescriptor& descriptor,
workloads/NeonExpWorkload.cpp \
workloads/NeonFloorFloatWorkload.cpp \
workloads/NeonFullyConnectedWorkload.cpp \
+ workloads/NeonGatherWorkload.cpp \
workloads/NeonInstanceNormalizationWorkload.cpp \
workloads/NeonL2NormalizationFloatWorkload.cpp \
workloads/NeonLstmFloatWorkload.cpp \
// Floor
ARMNN_AUTO_TEST_CASE(SimpleFloor, SimpleFloorTest<DataType::Float32>)
+// Gather
+ARMNN_AUTO_TEST_CASE(Gather1dParamsFloat32, Gather1dParamsFloat32Test)
+ARMNN_AUTO_TEST_CASE(Gather1dParamsUint8, Gather1dParamsUint8Test)
+ARMNN_AUTO_TEST_CASE(GatherMultiDimParamsFloat32, GatherMultiDimParamsFloat32Test)
+ARMNN_AUTO_TEST_CASE(GatherMultiDimParamsUint8, GatherMultiDimParamsUint8Test)
+
// Equal
ARMNN_AUTO_TEST_CASE(EqualSimple, EqualSimpleTest)
ARMNN_AUTO_TEST_CASE(EqualBroadcast1Element, EqualBroadcast1ElementTest)
NeonFloorFloatWorkload.hpp
NeonFullyConnectedWorkload.cpp
NeonFullyConnectedWorkload.hpp
+ NeonGatherWorkload.cpp
+ NeonGatherWorkload.hpp
NeonInstanceNormalizationWorkload.cpp
NeonInstanceNormalizationWorkload.hpp
NeonL2NormalizationFloatWorkload.cpp
{
const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
- std::array<arm_compute::DataType,8> supportedTypes = {
+ std::array<arm_compute::DataType,9> supportedTypes = {
arm_compute::DataType::BFLOAT16,
arm_compute::DataType::F16,
arm_compute::DataType::F32,
arm_compute::DataType::QASYMM8_SIGNED,
arm_compute::DataType::QSYMM16,
arm_compute::DataType::QSYMM8,
- arm_compute::DataType::QSYMM8_PER_CHANNEL
+ arm_compute::DataType::QSYMM8_PER_CHANNEL,
+ arm_compute::DataType::S32
};
auto it = std::find(begin(supportedTypes), end(supportedTypes), neonOutputInfo.data_type());
CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<int8_t>(), output);
break;
}
+ case arm_compute::DataType::S32:
+ {
+ CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<int32_t>(), output);
+ break;
+ }
default:
{
ARMNN_ASSERT_MSG(false, "Unknown data type");
--- /dev/null
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "NeonGatherWorkload.hpp"
+#include "NeonWorkloadUtils.hpp"
+#include <armnn/utility/PolymorphicDowncast.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+
+namespace armnn
+{
+arm_compute::Status NeonGatherWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& indices,
+ const TensorInfo& output)
+{
+ const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclIndices = BuildArmComputeTensorInfo(indices);
+ const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
+
+ int aclAxis = ComputeAclAxis(0, input);
+
+ return arm_compute::NEGather::validate(&aclInput, &aclIndices, &aclOutput, aclAxis);
+}
+
+NeonGatherWorkload::NeonGatherWorkload(const GatherQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : BaseWorkload<GatherQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("NeonGatherWorkload", 1, 1);
+
+ arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& indices = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+ arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ int aclAxis = ComputeAclAxis(0, info.m_InputTensorInfos[0]);
+
+ m_Layer.configure(&input, &indices, &output, aclAxis);
+}
+
+void NeonGatherWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonGatherWorkload_Execute");
+ m_Layer.run();
+}
+} //namespace armnn
\ No newline at end of file
--- /dev/null
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/runtime/NEON/functions/NEGather.h>
+
+namespace armnn
+{
+arm_compute::Status NeonGatherWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& indices,
+ const TensorInfo& output);
+
+class NeonGatherWorkload : public BaseWorkload<GatherQueueDescriptor>
+{
+public:
+ NeonGatherWorkload(const GatherQueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable arm_compute::NEGather m_Layer;
+};
+
+} //namespace armnn
\ No newline at end of file
#include "NeonExpWorkload.hpp"
#include "NeonFloorFloatWorkload.hpp"
#include "NeonFullyConnectedWorkload.hpp"
+#include "NeonGatherWorkload.hpp"
#include "NeonInstanceNormalizationWorkload.hpp"
#include "NeonL2NormalizationFloatWorkload.hpp"
#include "NeonLstmFloatWorkload.hpp"