From 019c4b1cd87849194cd30cb5bfd0e5d68089df08 Mon Sep 17 00:00:00 2001 From: Matthew Bentham Date: Sun, 15 Sep 2019 00:06:05 +0100 Subject: [PATCH] In CopyTensorContentsGeneric coalesce inner dimensions where possible This reduces the number of function calls in the inner loop, and allows for optimised implementations of memcpy to improve bandwidth Signed-off-by: Matthew Bentham Change-Id: I7458b45c075c87805242e92e54448b9dd762227f --- src/backends/backendsCommon/WorkloadUtils.hpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/backends/backendsCommon/WorkloadUtils.hpp b/src/backends/backendsCommon/WorkloadUtils.hpp index 3e0c40d..cb614ea 100644 --- a/src/backends/backendsCommon/WorkloadUtils.hpp +++ b/src/backends/backendsCommon/WorkloadUtils.hpp @@ -125,6 +125,27 @@ void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* ds size_t copyBatches = std::min(srcBatches, dstBatches); size_t copyDepth = std::min(srcDepth, dstDepth); + // Coalesce inner dimensions where possible + // to reduce overheard calling copy() and to + // allow for memory bandwidth optimisations + if (copyLength == srcWidthStride && + copyLength == dstWidthStride) + { + // There is no special padding between rows, + // and sizes are compatible, so copy whole rows + copyLength *= copyWidth; + copyWidth = 1; + + if (copyLength == srcHeightStride && + copyLength == dstHeightStride) + { + // There is no special padding between batches + // and sizes are compatible so copy whole batches + copyLength *= copyHeight; + copyHeight = 1; + } + } + for (unsigned int d = 0; d < copyDepth; ++d) { auto srcPtrDepth = srcData; -- 2.7.4