Add asserts to check memcpy addresses
[platform/upstream/armnn.git] / src / backends / backendsCommon / WorkloadUtils.hpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #pragma once
7
8 #include "CpuTensorHandle.hpp"
9 #include "ITensorHandle.hpp"
10
11 #include <armnn/Tensor.hpp>
12
13 #include <armnnUtils/Permute.hpp>
14
15 #include <Half.hpp>
16 #include <Profiling.hpp>
17
18 #include <boost/cast.hpp>
19
20 namespace armnn
21 {
22 namespace
23 {
24
25 template <typename ArrayType, typename Arg>
26 void AssignValues(unsigned int num, unsigned int& idx, const ArrayType& array, Arg& arg)
27 {
28     if (idx >= num)
29     {
30         return;
31     }
32
33     arg = array[(num - 1) - idx];
34     idx++;
35 }
36
37 template <typename T, typename ArrayType, typename... Args>
38 void AssignValues(unsigned int num, unsigned int idx, const ArrayType& array, T& assignee, Args&... args)
39 {
40     AssignValues(num, idx, array, assignee);
41
42     AssignValues(num, idx, array, args...);
43 }
44
45 }    // anonymous namespace
46
47 template <typename CopyFunc>
48 void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* dstTensor, CopyFunc copy)
49 {
50     // For ease of understanding, names are assigned to the dimensions
51     // of the tensor as if NHWC, however this routine works with any 5D tensor
52     static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyTensorContents");
53
54     TensorShape srcStrides      = srcTensor->GetStrides();
55     const TensorShape& srcShape = srcTensor->GetShape();
56     const auto srcSize          = srcTensor->GetStrides()[0] * srcShape[0];
57     boost::ignore_unused(srcSize);  // Only used for asserts
58     TensorShape dstStrides      = dstTensor->GetStrides();
59     const TensorShape& dstShape = dstTensor->GetShape();
60     const auto dstSize          = dstTensor->GetStrides()[0] * dstShape[0];
61     boost::ignore_unused(dstSize);  // Only used for asserts
62
63     size_t srcDepth    = 1;
64     size_t srcBatches  = 1;
65     size_t srcHeight   = 1;
66     size_t srcWidth    = 1;
67     size_t srcChannels = 1;
68     AssignValues(srcShape.GetNumDimensions(),
69                  0,
70                  srcShape,
71                  srcChannels,
72                  srcWidth,
73                  srcHeight,
74                  srcBatches,
75                  srcDepth);
76
77     size_t srcDepthStride   = 0;
78     size_t srcBatchStride   = 0;
79     size_t srcHeightStride  = 0;
80     size_t srcWidthStride   = 0;
81     size_t srcChannelStride = 0;
82     AssignValues(srcStrides.GetNumDimensions(),
83                  0,
84                  srcStrides,
85                  srcChannelStride,
86                  srcWidthStride,
87                  srcHeightStride,
88                  srcBatchStride,
89                  srcDepthStride);
90
91     size_t dstDepth    = 1;
92     size_t dstBatches  = 1;
93     size_t dstHeight   = 1;
94     size_t dstWidth    = 1;
95     size_t dstChannels = 1;
96     AssignValues(dstShape.GetNumDimensions(),
97                  0,
98                  dstShape,
99                  dstChannels,
100                  dstWidth,
101                  dstHeight,
102                  dstBatches,
103                  dstDepth);
104
105     size_t dstDepthStride   = 0;
106     size_t dstBatchStride   = 0;
107     size_t dstHeightStride  = 0;
108     size_t dstWidthStride   = 0;
109     size_t dstChannelStride = 0;
110     AssignValues(dstStrides.GetNumDimensions(),
111                  0,
112                  dstStrides,
113                  dstChannelStride,
114                  dstWidthStride,
115                  dstHeightStride,
116                  dstBatchStride,
117                  dstDepthStride);
118
119     const unsigned char* srcDataStart;
120     unsigned char* dstDataStart;
121     {
122         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Synchronize buffers");
123         srcDataStart = static_cast<const uint8_t*>(srcTensor->Map());
124         dstDataStart = static_cast<uint8_t*>(dstTensor->Map());
125     }
126
127     size_t copyLength  = std::min(srcChannels * srcChannelStride, dstChannels * dstChannelStride);
128     size_t copyWidth   = std::min(srcWidth, dstWidth);
129     size_t copyHeight  = std::min(srcHeight, dstHeight);
130     size_t copyBatches = std::min(srcBatches, dstBatches);
131     size_t copyDepth   = std::min(srcDepth, dstDepth);
132
133     // Coalesce inner dimensions where possible
134     // to reduce overheard calling copy() and to
135     // allow for memory bandwidth optimisations
136     if (copyLength == srcWidthStride &&
137         copyLength == dstWidthStride)
138     {
139         // There is no special padding between rows,
140         // and sizes are compatible, so copy whole rows
141         copyLength *= copyWidth;
142         copyWidth = 1;
143
144         if (copyLength == srcHeightStride &&
145             copyLength == dstHeightStride)
146         {
147             // There is no special padding between batches
148             // and sizes are compatible so copy whole batches
149             copyLength *= copyHeight;
150             copyHeight = 1;
151         }
152     }
153
154     const unsigned char* srcData = srcDataStart;
155     unsigned char* dstData = dstDataStart;
156     for (unsigned int d = 0; d < copyDepth; ++d)
157     {
158         auto srcPtrDepth = srcData;
159         auto dstPtrDepth = dstData;
160         for (unsigned int b = 0; b < copyBatches; ++b)
161         {
162             auto srcPtrBatch = srcData;
163             auto dstPtrBatch = dstData;
164             for (unsigned int h = 0; h < copyHeight; ++h)
165             {
166                 auto srcPtrChannel = srcData;
167                 auto dstPtrChannel = dstData;
168                 for (unsigned int w = 0; w < copyWidth; ++w)
169                 {
170                     BOOST_ASSERT(srcData >= srcDataStart && srcData + copyLength <= srcDataStart + srcSize);
171                     BOOST_ASSERT(dstData >= dstDataStart && dstData + copyLength <= dstDataStart + dstSize);
172                     copy(dstData, srcData, copyLength);
173                     dstData += dstWidthStride;
174                     srcData += srcWidthStride;
175                 }
176                 dstData += (static_cast<long>(dstHeightStride) - (dstData - dstPtrChannel));
177                 srcData += (static_cast<long>(srcHeightStride) - (srcData - srcPtrChannel));
178             }
179             dstData += (static_cast<long>(dstBatchStride) - (dstData - dstPtrBatch));
180             srcData += (static_cast<long>(srcBatchStride) - (srcData - srcPtrBatch));
181         }
182         dstData += (static_cast<long>(dstDepthStride) - (dstData - dstPtrDepth));
183         srcData += (static_cast<long>(srcDepthStride) - (srcData - srcPtrDepth));
184     }
185
186     srcTensor->Unmap();
187     dstTensor->Unmap();
188 }
189
190 template <typename SrcTensorHandleType, typename DstTensorHandleType, typename DescriptorType>
191 void GatherTensorHandlePairs(const DescriptorType& descriptor,
192                              std::vector<std::pair<SrcTensorHandleType*, DstTensorHandleType*>>& tensorHandlePairs)
193 {
194     const unsigned int numInputs = static_cast<unsigned int>(descriptor.m_Inputs.size());
195     tensorHandlePairs.reserve(numInputs);
196
197     for (unsigned int i = 0; i < numInputs; ++i)
198     {
199         SrcTensorHandleType* const srcTensorHandle =
200             boost::polymorphic_downcast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
201         DstTensorHandleType* const dstTensorHandle =
202             boost::polymorphic_downcast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
203
204         tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
205     }
206 }
207
208 int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim);
209
210 armnn::ConstTensor PermuteTensor(const ConstCpuTensorHandle* tensor,
211                                  const PermutationVector& permutationVector,
212                                  void* permuteBuffer);
213
214 void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout);
215
216 TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, DataLayout dataLayout);
217
218 armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstCpuTensorHandle* weightTensor,
219                                                      DataLayout dataLayout,
220                                                      void* permuteBuffer);
221
222 }  //namespace armnn