From 9fa5e3280d0bfdb90e3f2823f5bc63446628682d Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Wed, 23 Jun 2021 09:39:04 -0400 Subject: [PATCH] [OpenMP] Fix delete map type in ref count debug messages For example, without this patch: ``` $ cat test.c int main() { int x; #pragma omp target enter data map(alloc: x) #pragma omp target enter data map(alloc: x) #pragma omp target enter data map(alloc: x) #pragma omp target exit data map(delete: x) ; return 0; } $ clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda test.c $ LIBOMPTARGET_DEBUG=1 ./a.out |& grep 'Creating\|Mapping exists\|last' Libomptarget --> Creating new map entry with HstPtrBegin=0x00007ffddf1eaea8, TgtPtrBegin=0x00000000013bb040, Size=4, RefCount=1, Name=unknown Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffddf1eaea8, TgtPtrBegin=0x00000000013bb040, Size=4, RefCount=2 (incremented), Name=unknown Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffddf1eaea8, TgtPtrBegin=0x00000000013bb040, Size=4, RefCount=3 (incremented), Name=unknown Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffddf1eaea8, TgtPtrBegin=0x00000000013bb040, Size=4, RefCount=2 (decremented) Libomptarget --> There are 4 bytes allocated at target address 0x00000000013bb040 - is not last ``` `RefCount` is reported as decremented to 2, but it ought to be reset because of the `delete` map type, and `is not last` is incorrect. This patch migrates the reset of reference counts from `DeviceTy::deallocTgtPtr` to `DeviceTy::getTgtPtrBegin`, which then correctly reports the reset. Based on the `IsLast` result from `DeviceTy::getTgtPtrBegin`, `targetDataEnd` then correctly reports `is last` for any deletion. `DeviceTy::deallocTgtPtr` is responsible only for the final reference count decrement and mapping removal. An obscure side effect of this patch is that a `delete` map type when the reference count is infinite yields `DelEntry=IsLast=false` in `targetDataEnd` and so no longer results in a `DeviceTy::deallocTgtPtr` call. Without this patch, that call is a no-op anyway besides some unnecessary locking and mapping table lookups. Reviewed By: grokos Differential Revision: https://reviews.llvm.org/D104560 --- openmp/libomptarget/src/device.cpp | 29 +++++++++++++++++------------ openmp/libomptarget/src/device.h | 12 ++++++++++-- openmp/libomptarget/src/omptarget.cpp | 19 ++++++++----------- 3 files changed, 35 insertions(+), 25 deletions(-) diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index 83e8bd5..36bf23d 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -289,7 +289,7 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, // Decrement the reference counter if called from targetDataEnd. void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, bool UpdateRefCount, bool &IsHostPtr, - bool MustContain) { + bool MustContain, bool ForceDelete) { void *rc = NULL; IsHostPtr = false; IsLast = false; @@ -304,13 +304,21 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, // removed the mapping in deallocTgtPtr, another thread could retrieve the // mapping, increment and decrement back to zero, and then both threads // would try to remove the mapping, resulting in a double free. - IsLast = HT.getRefCount() == 1; + IsLast = HT.decShouldRemove(ForceDelete); const char *RefCountAction; - if (!UpdateRefCount) + if (!UpdateRefCount) { RefCountAction = "update suppressed"; - else if (IsLast) + } else if (ForceDelete) { + HT.resetRefCount(); + assert(IsLast == HT.decShouldRemove() && + "expected correct IsLast prediction for reset"); + if (IsLast) + RefCountAction = "reset, deferred final decrement"; + else + RefCountAction = "reset"; + } else if (IsLast) { RefCountAction = "deferred final decrement"; - else { + } else { RefCountAction = "decremented"; HT.decRefCount(); } @@ -350,7 +358,7 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) { return NULL; } -int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete, +int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool HasCloseModifier) { if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier) @@ -361,17 +369,14 @@ int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete, LookupResult lr = lookupMapping(HstPtrBegin, Size); if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) { auto &HT = *lr.Entry; - if (ForceDelete) - HT.resetRefCount(); if (HT.decRefCount() == 0) { DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n", DPxPTR(HT.TgtPtrBegin), Size); deleteData((void *)HT.TgtPtrBegin); INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID, - "Removing%s map entry with HstPtrBegin=" DPxMOD - ", TgtPtrBegin=" DPxMOD ", Size=%" PRId64 ", Name=%s\n", - (ForceDelete ? " (forced)" : ""), DPxPTR(HT.HstPtrBegin), - DPxPTR(HT.TgtPtrBegin), Size, + "Removing map entry with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD + ", Size=%" PRId64 ", Name=%s\n", + DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size, (HT.HstPtrName) ? getNameFromMapping(HT.HstPtrName).c_str() : "unknown"); HostDataToTargetMap.erase(lr.Entry); diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h index 9c9b2cd..69fc65d 100644 --- a/openmp/libomptarget/src/device.h +++ b/openmp/libomptarget/src/device.h @@ -92,6 +92,14 @@ public: std::string refCountToStr() const { return isRefCountInf() ? "INF" : std::to_string(getRefCount()); } + + /// Should one decrement of the reference count (after resetting it if + /// \c AfterReset) remove this mapping? + bool decShouldRemove(bool AfterReset = false) const { + if (AfterReset) + return !isRefCountInf(); + return getRefCount() == 1; + } }; typedef uintptr_t HstPtrBeginTy; @@ -178,8 +186,8 @@ struct DeviceTy { void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size); void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, bool UpdateRefCount, bool &IsHostPtr, - bool MustContain = false); - int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool ForceDelete, + bool MustContain = false, bool ForceDelete = false); + int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool HasCloseModifier = false); int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size); int disassociatePtr(void *HstPtrBegin); diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 447ad73..dcc1f61 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -595,14 +595,11 @@ struct DeallocTgtPtrInfo { void *HstPtrBegin; /// Size of the data int64_t DataSize; - /// Whether it is forced to be removed from the map table - bool ForceDelete; /// Whether it has \p close modifier bool HasCloseModifier; - DeallocTgtPtrInfo(void *HstPtr, int64_t Size, bool ForceDelete, - bool HasCloseModifier) - : HstPtrBegin(HstPtr), DataSize(Size), ForceDelete(ForceDelete), + DeallocTgtPtrInfo(void *HstPtr, int64_t Size, bool HasCloseModifier) + : HstPtrBegin(HstPtr), DataSize(Size), HasCloseModifier(HasCloseModifier) {} }; } // namespace @@ -672,8 +669,9 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum, bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT; // If PTR_AND_OBJ, HstPtrBegin is address of pointee - void *TgtPtrBegin = Device.getTgtPtrBegin( - HstPtrBegin, DataSize, IsLast, UpdateRef, IsHostPtr, !IsImplicit); + void *TgtPtrBegin = + Device.getTgtPtrBegin(HstPtrBegin, DataSize, IsLast, UpdateRef, + IsHostPtr, !IsImplicit, ForceDelete); if (!TgtPtrBegin && (DataSize || HasPresentModifier)) { DP("Mapping does not exist (%s)\n", (HasPresentModifier ? "'present' map type modifier" : "ignored")); @@ -712,7 +710,7 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum, if (!TgtPtrBegin) continue; - bool DelEntry = IsLast || ForceDelete; + bool DelEntry = IsLast; // If the last element from the mapper (for end transfer args comes in // reverse order), do not remove the partial entry, the parent struct still @@ -797,8 +795,7 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum, // Add pointer to the buffer for later deallocation if (DelEntry) - DeallocTgtPtrs.emplace_back(HstPtrBegin, DataSize, ForceDelete, - HasCloseModifier); + DeallocTgtPtrs.emplace_back(HstPtrBegin, DataSize, HasCloseModifier); } } @@ -815,7 +812,7 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum, if (FromMapperBase && FromMapperBase == Info.HstPtrBegin) continue; Ret = Device.deallocTgtPtr(Info.HstPtrBegin, Info.DataSize, - Info.ForceDelete, Info.HasCloseModifier); + Info.HasCloseModifier); if (Ret != OFFLOAD_SUCCESS) { REPORT("Deallocating data from device failed.\n"); return OFFLOAD_FAIL; -- 2.7.4