Release 18.08
[platform/upstream/armnn.git] / src / armnn / backends / ArmComputeTensorUtils.hpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // See LICENSE file in the project root for full license information.
4 //
5 #pragma once
6
7 #include <armnn/Tensor.hpp>
8 #include <armnn/DescriptorsFwd.hpp>
9
10 #include <arm_compute/core/ITensor.h>
11 #include <arm_compute/core/TensorInfo.h>
12 #include <arm_compute/core/Types.h>
13
14 #include <boost/cast.hpp>
15
16 namespace armnn
17 {
18 class ITensorHandle;
19
20 namespace armcomputetensorutils
21 {
22
23 /// Utility function to map an armnn::DataType to corresponding arm_compute::DataType.
24 arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType);
25
26 /// Utility function used to setup an arm_compute::TensorShape object from an armnn::TensorShape.
27 arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape);
28
29 /// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given
30 /// armnn::ITensorInfo.
31 arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo);
32
33 /// Utility function used to setup an arm_compute::PoolingLayerInfo object from an armnn::Pooling2dDescriptor.
34 arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor);
35
36 /// Utility function to setup an arm_compute::NormalizationLayerInfo object from an armnn::NormalizationDescriptor.
37 arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& desc);
38
39 /// Utility function used to setup an arm_compute::PermutationVector object from an armnn::PermutationVector.
40 arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& vector);
41
42 /// Utility function used to setup an arm_compute::PadStrideInfo object from an armnn layer descriptor.
43 template <typename Descriptor>
44 arm_compute::PadStrideInfo BuildArmComputePadStrideInfo(const Descriptor &descriptor)
45 {
46     return arm_compute::PadStrideInfo(descriptor.m_StrideX,
47                                       descriptor.m_StrideY,
48                                       descriptor.m_PadLeft,
49                                       descriptor.m_PadRight,
50                                       descriptor.m_PadTop,
51                                       descriptor.m_PadBottom,
52                                       arm_compute::DimensionRoundingType::FLOOR);
53 }
54
55 /// Sets up the given ArmCompute tensor's dimensions based on the given ArmNN tensor.
56 template <typename Tensor>
57 void BuildArmComputeTensor(Tensor& tensor, const armnn::TensorInfo& tensorInfo)
58 {
59     tensor.allocator()->init(BuildArmComputeTensorInfo(tensorInfo));
60 }
61
62 template <typename Tensor>
63 void InitialiseArmComputeTensorEmpty(Tensor& tensor)
64 {
65     tensor.allocator()->allocate();
66 }
67
68 /// Utility function to free unused tensors after a workload is configured and prepared
69 template <typename Tensor>
70 void FreeTensorIfUnused(std::unique_ptr<Tensor>& tensor)
71 {
72     if (tensor && !tensor->is_used())
73     {
74         tensor.reset(nullptr);
75     }
76 }
77
78 // Helper function to obtain byte offset into tensor data
79 inline size_t GetTensorOffset(const arm_compute::ITensorInfo& info,
80                               uint32_t batchIndex,
81                               uint32_t channelIndex,
82                               uint32_t y,
83                               uint32_t x)
84 {
85     arm_compute::Coordinates coords;
86     coords.set(3, static_cast<int>(batchIndex));
87     coords.set(2, static_cast<int>(channelIndex));
88     coords.set(1, static_cast<int>(y));
89     coords.set(0, static_cast<int>(x));
90     return info.offset_element_in_bytes(coords);
91 }
92
93 // Helper function to obtain element offset into data buffer representing tensor data (assuming no strides).
94 inline size_t GetLinearBufferOffset(const arm_compute::ITensorInfo& info,
95                                     uint32_t batchIndex,
96                                     uint32_t channelIndex,
97                                     uint32_t y,
98                                     uint32_t x)
99 {
100     const arm_compute::TensorShape& shape = info.tensor_shape();
101     uint32_t width = static_cast<uint32_t>(shape[0]);
102     uint32_t height = static_cast<uint32_t>(shape[1]);
103     uint32_t numChannels = static_cast<uint32_t>(shape[2]);
104     return ((batchIndex * numChannels + channelIndex) * height + y) * width + x;
105 }
106
107 template <typename T>
108 void CopyArmComputeITensorData(const arm_compute::ITensor& srcTensor, T* dstData)
109 {
110     // If MaxNumOfTensorDimensions is increased, this loop will need fixing.
111     static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyArmComputeITensorData");
112     {
113         const arm_compute::ITensorInfo& info = *srcTensor.info();
114         const arm_compute::TensorShape& shape = info.tensor_shape();
115         const uint8_t* const bufferPtr = srcTensor.buffer();
116         uint32_t width = static_cast<uint32_t>(shape[0]);
117         uint32_t height = static_cast<uint32_t>(shape[1]);
118         uint32_t numChannels = static_cast<uint32_t>(shape[2]);
119         uint32_t numBatches = static_cast<uint32_t>(shape[3]);
120
121         for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex)
122         {
123             for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex)
124             {
125                 for (unsigned int y = 0; y < height; ++y)
126                 {
127                     // Copies one row from arm_compute tensor buffer to linear memory buffer.
128                     // A row is the largest contiguous region we can copy, as the tensor data may be using strides.
129                     memcpy(dstData + GetLinearBufferOffset(info, batchIndex, channelIndex, y, 0),
130                            bufferPtr + GetTensorOffset(info, batchIndex, channelIndex, y, 0),
131                            width * sizeof(T));
132                 }
133             }
134         }
135     }
136 }
137
138 template <typename T>
139 void CopyArmComputeITensorData(const T* srcData, arm_compute::ITensor& dstTensor)
140 {
141     // If MaxNumOfTensorDimensions is increased, this loop will need fixing.
142     static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyArmComputeITensorData");
143     {
144         const arm_compute::ITensorInfo& info = *dstTensor.info();
145         const arm_compute::TensorShape& shape = info.tensor_shape();
146         uint8_t* const bufferPtr = dstTensor.buffer();
147         uint32_t width = static_cast<uint32_t>(shape[0]);
148         uint32_t height = static_cast<uint32_t>(shape[1]);
149         uint32_t numChannels = static_cast<uint32_t>(shape[2]);
150         uint32_t numBatches = static_cast<uint32_t>(shape[3]);
151
152         for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex)
153         {
154             for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex)
155             {
156                 for (unsigned int y = 0; y < height; ++y)
157                 {
158                     // Copies one row from linear memory buffer to arm_compute tensor buffer.
159                     // A row is the largest contiguous region we can copy, as the tensor data may be using strides.
160                     memcpy(bufferPtr + GetTensorOffset(info, batchIndex, channelIndex, y, 0),
161                            srcData + GetLinearBufferOffset(info, batchIndex, channelIndex, y, 0),
162                            width * sizeof(T));
163                 }
164             }
165         }
166     }
167 }
168
169 /// Construct a TensorShape object from an ArmCompute object based on arm_compute::Dimensions.
170 /// \tparam ArmComputeType Any type that implements the Dimensions interface
171 /// \tparam T Shape value type
172 /// \param shapelike An ArmCompute object that implements the Dimensions interface
173 /// \param initial A default value to initialise the shape with
174 /// \return A TensorShape object filled from the Acl shapelike object.
175 template<typename ArmComputeType, typename T>
176 TensorShape GetTensorShape(const ArmComputeType& shapelike, T initial)
177 {
178     std::vector<unsigned int> s(MaxNumOfTensorDimensions, initial);
179     for (unsigned int i=0; i < shapelike.num_dimensions(); ++i)
180     {
181         s[(shapelike.num_dimensions()-1)-i] = boost::numeric_cast<unsigned int>(shapelike[i]);
182     }
183     return TensorShape(boost::numeric_cast<unsigned int>(shapelike.num_dimensions()), s.data());
184 };
185
186 /// Get the strides from an ACL strides object
187 inline TensorShape GetStrides(const arm_compute::Strides& strides)
188 {
189     return GetTensorShape(strides, 0U);
190 }
191
192 /// Get the shape from an ACL shape object
193 inline TensorShape GetShape(const arm_compute::TensorShape& shape)
194 {
195     return GetTensorShape(shape, 1U);
196 }
197
198 } // namespace armcomputetensorutils
199 } // namespace armnn