#include "private.h"
#include "rtl.h"
+#include "llvm/ADT/bit.h"
+
#include <cassert>
#include <cstdint>
#include <vector>
/// Return the alignment requirement of partially mapped structs, see
/// MaxAlignment above.
-static int64_t getPartialStructRequiredAlignment(void *HstPtrBase) {
+static uint64_t getPartialStructRequiredAlignment(void *HstPtrBase) {
auto BaseAlignment = reinterpret_cast<uintptr_t>(HstPtrBase) % MaxAlignment;
return BaseAlignment == 0 ? MaxAlignment : BaseAlignment;
}
/// use this information to optimize data transfer by packing all
/// first-private arguments and transfer them all at once.
struct FirstPrivateArgInfoTy {
- /// The index of the element in \p TgtArgs corresponding to the argument
- int Index;
/// Host pointer begin
char *HstPtrBegin;
/// Host pointer end
char *HstPtrEnd;
- /// Aligned size
- int64_t AlignedSize;
+ /// The index of the element in \p TgtArgs corresponding to the argument
+ int Index;
+ /// Alignment of the entry (base of the entry, not after the entry).
+ uint32_t Alignment;
+ /// Size (without alignment, see padding)
+ uint32_t Size;
+ /// Padding used to align this argument entry, if necessary.
+ uint32_t Padding;
/// Host pointer name
map_var_info_t HstPtrName = nullptr;
- FirstPrivateArgInfoTy(int Index, void *HstPtr, int64_t Size,
+ FirstPrivateArgInfoTy(int Index, void *HstPtr, uint32_t Size,
+ uint32_t Alignment, uint32_t Padding,
const map_var_info_t HstPtrName = nullptr)
- : Index(Index), HstPtrBegin(reinterpret_cast<char *>(HstPtr)),
- HstPtrEnd(HstPtrBegin + Size),
- AlignedSize(Size + Size % MaxAlignment), HstPtrName(HstPtrName) {}
+ : HstPtrBegin(reinterpret_cast<char *>(HstPtr)),
+ HstPtrEnd(HstPtrBegin + Size), Index(Index), Alignment(Alignment),
+ Size(Size), Padding(Padding), HstPtrName(HstPtrName) {}
};
/// A vector of target pointers for all private arguments
// Placeholder value
TgtPtr = nullptr;
+ auto *LastFPArgInfo =
+ FirstPrivateArgInfo.empty() ? nullptr : &FirstPrivateArgInfo.back();
+
+ // Compute the start alignment of this entry, add padding if necessary.
+ // TODO: Consider sorting instead.
+ uint32_t Padding = 0;
+ uint32_t StartAlignment =
+ LastFPArgInfo ? LastFPArgInfo->Alignment : MaxAlignment;
+ if (LastFPArgInfo) {
+ // Check if we keep the start alignment or if it is shrunk due to the
+ // size of the last element.
+ uint32_t Offset = LastFPArgInfo->Size % StartAlignment;
+ if (Offset)
+ StartAlignment = Offset;
+ // We only need as much alignment as the host pointer had (since we
+ // don't know the alignment information from the source we might end up
+ // overaligning accesses but not too much).
+ uint32_t RequiredAlignment =
+ llvm::bit_floor(getPartialStructRequiredAlignment(HstPtr));
+ if (RequiredAlignment > StartAlignment) {
+ Padding = RequiredAlignment - StartAlignment;
+ StartAlignment = RequiredAlignment;
+ }
+ }
+
FirstPrivateArgInfo.emplace_back(TgtArgsIndex, HstPtr, ArgSize,
- HstPtrName);
- FirstPrivateArgSize += FirstPrivateArgInfo.back().AlignedSize;
+ StartAlignment, Padding, HstPtrName);
+ FirstPrivateArgSize += Padding + ArgSize;
}
return OFFLOAD_SUCCESS;
auto Itr = FirstPrivateArgBuffer.begin();
// Copy all host data to this buffer
for (FirstPrivateArgInfoTy &Info : FirstPrivateArgInfo) {
+ // First pad the pointer as we (have to) pad it on the device too.
+ Itr = std::next(Itr, Info.Padding);
std::copy(Info.HstPtrBegin, Info.HstPtrEnd, Itr);
- Itr = std::next(Itr, Info.AlignedSize);
+ Itr = std::next(Itr, Info.Size);
}
// Allocate target memory
void *TgtPtr =
for (FirstPrivateArgInfoTy &Info : FirstPrivateArgInfo) {
void *&Ptr = TgtArgs[Info.Index];
assert(Ptr == nullptr && "Target pointer is already set by mistaken");
+ // Pad the device pointer to get the right alignment.
+ TP += Info.Padding;
Ptr = reinterpret_cast<void *>(TP);
- TP += Info.AlignedSize;
+ TP += Info.Size;
DP("Firstprivate array " DPxMOD " of size %" PRId64 " mapped to " DPxMOD
"\n",
DPxPTR(Info.HstPtrBegin), Info.HstPtrEnd - Info.HstPtrBegin,
--- /dev/null
+// RUN: %libomptarget-compilexx-generic -O3 && %libomptarget-run-generic
+
+#include <stdio.h>
+
+// CHECK: rx: 16, ry: 16;
+// CHECK: rx: 16, ry: 16;
+// CHECK: rx: 16, ry: 16;
+// CHECK: rx: 16, ry: 16;
+
+template <bool Aligned> void test() {
+ printf("Test %saligned firstprivate\n", Aligned ? "" : "non-");
+ char z1[3 + Aligned], z2[3 + Aligned];
+ int x[4];
+ int y[4];
+ y[0] = y[1] = y[2] = y[3] = 4;
+ x[0] = x[1] = x[2] = x[3] = 4;
+ int rx = -1, ry = -1;
+#pragma omp target firstprivate(z1, y, z2) map(from : ry, rx) map(to : x)
+ {
+ ry = (y[0] + y[1] + y[2] + y[3]);
+ rx = (x[0] + x[1] + x[2] + x[3]);
+ }
+ printf(" rx:%i, ry:%i\n", rx, ry);
+#pragma omp target firstprivate(z1, y, z2) map(from : ry, rx) map(to : x)
+ {
+ z1[2] += 5;
+ ry = (y[0] + y[1] + y[2] + y[3]);
+ rx = (x[0] + x[1] + x[2] + x[3]);
+ z2[2] += 7;
+ }
+ printf(" rx:%i, ry:%i\n", rx, ry);
+}
+
+int main() {
+ test<true>();
+ test<false>();
+}