From 89a8077f3d68f0f431f3657a8805f7751f5eac69 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 27 Feb 2023 13:31:05 -0800 Subject: [PATCH] [OpenMP][FIX] Properly align firstprivate variables The old code didn't actually align the values, and it added padding even when none was necessary. This approach will pad entries if necessary and, similar to the struct case, use the host pointer as guidance. NOTE: This does still not align them as the host has, but it's unclear if the user really should use the alignment bits anyway. For now this is a reasonable compromise, only if we have host alignment information (explicitly not implicitly via the host pointer), we could do it completely right without wasting lots of resources for >99% of the cases. Fixes: https://github.com/llvm/llvm-project/issues/61034 --- openmp/libomptarget/src/omptarget.cpp | 62 +++++++++++++++++----- .../test/mapping/firstprivate_aligned.cpp | 37 +++++++++++++ 2 files changed, 86 insertions(+), 13 deletions(-) create mode 100644 openmp/libomptarget/test/mapping/firstprivate_aligned.cpp diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 441da7c..9d800d7 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -16,6 +16,8 @@ #include "private.h" #include "rtl.h" +#include "llvm/ADT/bit.h" + #include #include #include @@ -105,7 +107,7 @@ static const int64_t MaxAlignment = 16; /// Return the alignment requirement of partially mapped structs, see /// MaxAlignment above. -static int64_t getPartialStructRequiredAlignment(void *HstPtrBase) { +static uint64_t getPartialStructRequiredAlignment(void *HstPtrBase) { auto BaseAlignment = reinterpret_cast(HstPtrBase) % MaxAlignment; return BaseAlignment == 0 ? MaxAlignment : BaseAlignment; } @@ -1289,22 +1291,27 @@ class PrivateArgumentManagerTy { /// use this information to optimize data transfer by packing all /// first-private arguments and transfer them all at once. struct FirstPrivateArgInfoTy { - /// The index of the element in \p TgtArgs corresponding to the argument - int Index; /// Host pointer begin char *HstPtrBegin; /// Host pointer end char *HstPtrEnd; - /// Aligned size - int64_t AlignedSize; + /// The index of the element in \p TgtArgs corresponding to the argument + int Index; + /// Alignment of the entry (base of the entry, not after the entry). + uint32_t Alignment; + /// Size (without alignment, see padding) + uint32_t Size; + /// Padding used to align this argument entry, if necessary. + uint32_t Padding; /// Host pointer name map_var_info_t HstPtrName = nullptr; - FirstPrivateArgInfoTy(int Index, void *HstPtr, int64_t Size, + FirstPrivateArgInfoTy(int Index, void *HstPtr, uint32_t Size, + uint32_t Alignment, uint32_t Padding, const map_var_info_t HstPtrName = nullptr) - : Index(Index), HstPtrBegin(reinterpret_cast(HstPtr)), - HstPtrEnd(HstPtrBegin + Size), - AlignedSize(Size + Size % MaxAlignment), HstPtrName(HstPtrName) {} + : HstPtrBegin(reinterpret_cast(HstPtr)), + HstPtrEnd(HstPtrBegin + Size), Index(Index), Alignment(Alignment), + Size(Size), Padding(Padding), HstPtrName(HstPtrName) {} }; /// A vector of target pointers for all private arguments @@ -1382,9 +1389,34 @@ public: // Placeholder value TgtPtr = nullptr; + auto *LastFPArgInfo = + FirstPrivateArgInfo.empty() ? nullptr : &FirstPrivateArgInfo.back(); + + // Compute the start alignment of this entry, add padding if necessary. + // TODO: Consider sorting instead. + uint32_t Padding = 0; + uint32_t StartAlignment = + LastFPArgInfo ? LastFPArgInfo->Alignment : MaxAlignment; + if (LastFPArgInfo) { + // Check if we keep the start alignment or if it is shrunk due to the + // size of the last element. + uint32_t Offset = LastFPArgInfo->Size % StartAlignment; + if (Offset) + StartAlignment = Offset; + // We only need as much alignment as the host pointer had (since we + // don't know the alignment information from the source we might end up + // overaligning accesses but not too much). + uint32_t RequiredAlignment = + llvm::bit_floor(getPartialStructRequiredAlignment(HstPtr)); + if (RequiredAlignment > StartAlignment) { + Padding = RequiredAlignment - StartAlignment; + StartAlignment = RequiredAlignment; + } + } + FirstPrivateArgInfo.emplace_back(TgtArgsIndex, HstPtr, ArgSize, - HstPtrName); - FirstPrivateArgSize += FirstPrivateArgInfo.back().AlignedSize; + StartAlignment, Padding, HstPtrName); + FirstPrivateArgSize += Padding + ArgSize; } return OFFLOAD_SUCCESS; @@ -1400,8 +1432,10 @@ public: auto Itr = FirstPrivateArgBuffer.begin(); // Copy all host data to this buffer for (FirstPrivateArgInfoTy &Info : FirstPrivateArgInfo) { + // First pad the pointer as we (have to) pad it on the device too. + Itr = std::next(Itr, Info.Padding); std::copy(Info.HstPtrBegin, Info.HstPtrEnd, Itr); - Itr = std::next(Itr, Info.AlignedSize); + Itr = std::next(Itr, Info.Size); } // Allocate target memory void *TgtPtr = @@ -1425,8 +1459,10 @@ public: for (FirstPrivateArgInfoTy &Info : FirstPrivateArgInfo) { void *&Ptr = TgtArgs[Info.Index]; assert(Ptr == nullptr && "Target pointer is already set by mistaken"); + // Pad the device pointer to get the right alignment. + TP += Info.Padding; Ptr = reinterpret_cast(TP); - TP += Info.AlignedSize; + TP += Info.Size; DP("Firstprivate array " DPxMOD " of size %" PRId64 " mapped to " DPxMOD "\n", DPxPTR(Info.HstPtrBegin), Info.HstPtrEnd - Info.HstPtrBegin, diff --git a/openmp/libomptarget/test/mapping/firstprivate_aligned.cpp b/openmp/libomptarget/test/mapping/firstprivate_aligned.cpp new file mode 100644 index 0000000..ae6be0f --- /dev/null +++ b/openmp/libomptarget/test/mapping/firstprivate_aligned.cpp @@ -0,0 +1,37 @@ +// RUN: %libomptarget-compilexx-generic -O3 && %libomptarget-run-generic + +#include + +// CHECK: rx: 16, ry: 16; +// CHECK: rx: 16, ry: 16; +// CHECK: rx: 16, ry: 16; +// CHECK: rx: 16, ry: 16; + +template void test() { + printf("Test %saligned firstprivate\n", Aligned ? "" : "non-"); + char z1[3 + Aligned], z2[3 + Aligned]; + int x[4]; + int y[4]; + y[0] = y[1] = y[2] = y[3] = 4; + x[0] = x[1] = x[2] = x[3] = 4; + int rx = -1, ry = -1; +#pragma omp target firstprivate(z1, y, z2) map(from : ry, rx) map(to : x) + { + ry = (y[0] + y[1] + y[2] + y[3]); + rx = (x[0] + x[1] + x[2] + x[3]); + } + printf(" rx:%i, ry:%i\n", rx, ry); +#pragma omp target firstprivate(z1, y, z2) map(from : ry, rx) map(to : x) + { + z1[2] += 5; + ry = (y[0] + y[1] + y[2] + y[3]); + rx = (x[0] + x[1] + x[2] + x[3]); + z2[2] += 7; + } + printf(" rx:%i, ry:%i\n", rx, ry); +} + +int main() { + test(); + test(); +} -- 2.7.4