src/armnn/backends/ArmComputeTensorUtils.hpp

   1 //
   2 // Copyright © 2017 Arm Ltd. All rights reserved.
   3 // See LICENSE file in the project root for full license information.
   4 //
   5 #pragma once
   6
   7 #include <armnn/Tensor.hpp>
   8 #include <armnn/DescriptorsFwd.hpp>
   9
  10 #include <arm_compute/core/ITensor.h>
  11 #include <arm_compute/core/TensorInfo.h>
  12 #include <arm_compute/core/Types.h>
  13
  14 #include <boost/cast.hpp>
  15
  16 namespace armnn
  17 {
  18 class ITensorHandle;
  19
  20 namespace armcomputetensorutils
  21 {
  22
  23 /// Utility function to map an armnn::DataType to corresponding arm_compute::DataType.
  24 arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType);
  25
  26 /// Utility function used to setup an arm_compute::TensorShape object from an armnn::TensorShape.
  27 arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape);
  28
  29 /// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given
  30 /// armnn::ITensorInfo.
  31 arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo);
  32
  33 /// Utility function used to setup an arm_compute::PoolingLayerInfo object from an armnn::Pooling2dDescriptor.
  34 arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor);
  35
  36 /// Utility function to setup an arm_compute::NormalizationLayerInfo object from an armnn::NormalizationDescriptor.
  37 arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& desc);
  38
  39 /// Utility function used to setup an arm_compute::PermutationVector object from an armnn::PermutationVector.
  40 arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& vector);
  41
  42 /// Utility function used to setup an arm_compute::PadStrideInfo object from an armnn layer descriptor.
  43 template <typename Descriptor>
  44 arm_compute::PadStrideInfo BuildArmComputePadStrideInfo(const Descriptor &descriptor)
  45 {
  46     return arm_compute::PadStrideInfo(descriptor.m_StrideX,
  47                                       descriptor.m_StrideY,
  48                                       descriptor.m_PadLeft,
  49                                       descriptor.m_PadRight,
  50                                       descriptor.m_PadTop,
  51                                       descriptor.m_PadBottom,
  52                                       arm_compute::DimensionRoundingType::FLOOR);
  53 }
  54
  55 /// Sets up the given ArmCompute tensor's dimensions based on the given ArmNN tensor.
  56 template <typename Tensor>
  57 void BuildArmComputeTensor(Tensor& tensor, const armnn::TensorInfo& tensorInfo)
  58 {
  59     tensor.allocator()->init(BuildArmComputeTensorInfo(tensorInfo));
  60 }
  61
  62 template <typename Tensor>
  63 void InitialiseArmComputeTensorEmpty(Tensor& tensor)
  64 {
  65     tensor.allocator()->allocate();
  66 }
  67
  68 /// Utility function to free unused tensors after a workload is configured and prepared
  69 template <typename Tensor>
  70 void FreeTensorIfUnused(std::unique_ptr<Tensor>& tensor)
  71 {
  72     if (tensor && !tensor->is_used())
  73     {
  74         tensor.reset(nullptr);
  75     }
  76 }
  77
  78 // Helper function to obtain byte offset into tensor data
  79 inline size_t GetTensorOffset(const arm_compute::ITensorInfo& info,
  80                               uint32_t batchIndex,
  81                               uint32_t channelIndex,
  82                               uint32_t y,
  83                               uint32_t x)
  84 {
  85     arm_compute::Coordinates coords;
  86     coords.set(3, static_cast<int>(batchIndex));
  87     coords.set(2, static_cast<int>(channelIndex));
  88     coords.set(1, static_cast<int>(y));
  89     coords.set(0, static_cast<int>(x));
  90     return info.offset_element_in_bytes(coords);
  91 }
  92
  93 // Helper function to obtain element offset into data buffer representing tensor data (assuming no strides).
  94 inline size_t GetLinearBufferOffset(const arm_compute::ITensorInfo& info,
  95                                     uint32_t batchIndex,
  96                                     uint32_t channelIndex,
  97                                     uint32_t y,
  98                                     uint32_t x)
  99 {
 100     const arm_compute::TensorShape& shape = info.tensor_shape();
 101     uint32_t width = static_cast<uint32_t>(shape[0]);
 102     uint32_t height = static_cast<uint32_t>(shape[1]);
 103     uint32_t numChannels = static_cast<uint32_t>(shape[2]);
 104     return ((batchIndex * numChannels + channelIndex) * height + y) * width + x;
 105 }
 106
 107 template <typename T>
 108 void CopyArmComputeITensorData(const arm_compute::ITensor& srcTensor, T* dstData)
 109 {
 110     // If MaxNumOfTensorDimensions is increased, this loop will need fixing.
 111     static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyArmComputeITensorData");
 112     {
 113         const arm_compute::ITensorInfo& info = *srcTensor.info();
 114         const arm_compute::TensorShape& shape = info.tensor_shape();
 115         const uint8_t* const bufferPtr = srcTensor.buffer();
 116         uint32_t width = static_cast<uint32_t>(shape[0]);
 117         uint32_t height = static_cast<uint32_t>(shape[1]);
 118         uint32_t numChannels = static_cast<uint32_t>(shape[2]);
 119         uint32_t numBatches = static_cast<uint32_t>(shape[3]);
 120
 121         for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex)
 122         {
 123             for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex)
 124             {
 125                 for (unsigned int y = 0; y < height; ++y)
 126                 {
 127                     // Copies one row from arm_compute tensor buffer to linear memory buffer.
 128                     // A row is the largest contiguous region we can copy, as the tensor data may be using strides.
 129                     memcpy(dstData + GetLinearBufferOffset(info, batchIndex, channelIndex, y, 0),
 130                            bufferPtr + GetTensorOffset(info, batchIndex, channelIndex, y, 0),
 131                            width * sizeof(T));
 132                 }
 133             }
 134         }
 135     }
 136 }
 137
 138 template <typename T>
 139 void CopyArmComputeITensorData(const T* srcData, arm_compute::ITensor& dstTensor)
 140 {
 141     // If MaxNumOfTensorDimensions is increased, this loop will need fixing.
 142     static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyArmComputeITensorData");
 143     {
 144         const arm_compute::ITensorInfo& info = *dstTensor.info();
 145         const arm_compute::TensorShape& shape = info.tensor_shape();
 146         uint8_t* const bufferPtr = dstTensor.buffer();
 147         uint32_t width = static_cast<uint32_t>(shape[0]);
 148         uint32_t height = static_cast<uint32_t>(shape[1]);
 149         uint32_t numChannels = static_cast<uint32_t>(shape[2]);
 150         uint32_t numBatches = static_cast<uint32_t>(shape[3]);
 151
 152         for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex)
 153         {
 154             for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex)
 155             {
 156                 for (unsigned int y = 0; y < height; ++y)
 157                 {
 158                     // Copies one row from linear memory buffer to arm_compute tensor buffer.
 159                     // A row is the largest contiguous region we can copy, as the tensor data may be using strides.
 160                     memcpy(bufferPtr + GetTensorOffset(info, batchIndex, channelIndex, y, 0),
 161                            srcData + GetLinearBufferOffset(info, batchIndex, channelIndex, y, 0),
 162                            width * sizeof(T));
 163                 }
 164             }
 165         }
 166     }
 167 }
 168
 169 /// Construct a TensorShape object from an ArmCompute object based on arm_compute::Dimensions.
 170 /// \tparam ArmComputeType Any type that implements the Dimensions interface
 171 /// \tparam T Shape value type
 172 /// \param shapelike An ArmCompute object that implements the Dimensions interface
 173 /// \param initial A default value to initialise the shape with
 174 /// \return A TensorShape object filled from the Acl shapelike object.
 175 template<typename ArmComputeType, typename T>
 176 TensorShape GetTensorShape(const ArmComputeType& shapelike, T initial)
 177 {
 178     std::vector<unsigned int> s(MaxNumOfTensorDimensions, initial);
 179     for (unsigned int i=0; i < shapelike.num_dimensions(); ++i)
 180     {
 181         s[(shapelike.num_dimensions()-1)-i] = boost::numeric_cast<unsigned int>(shapelike[i]);
 182     }
 183     return TensorShape(boost::numeric_cast<unsigned int>(shapelike.num_dimensions()), s.data());
 184 };
 185
 186 /// Get the strides from an ACL strides object
 187 inline TensorShape GetStrides(const arm_compute::Strides& strides)
 188 {
 189     return GetTensorShape(strides, 0U);
 190 }
 191
 192 /// Get the shape from an ACL shape object
 193 inline TensorShape GetShape(const arm_compute::TensorShape& shape)
 194 {
 195     return GetTensorShape(shape, 1U);
 196 }
 197
 198 } // namespace armcomputetensorutils
 199 } // namespace armnn