From f75dcbef200540dab900e34396a542619d762f0a Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Tue, 11 Oct 2016 00:21:10 +0000 Subject: [PATCH] Aligned allocation versus CUDA: make deallocation function preference order match other CUDA preference orders, per discussion with jlebar. We now model this in an attempt to match overload resolution as closely as possible: - First, we throw out all non-callable (due to CUDA host/device mismatch) operator delete functions. - Then we apply sizedness / alignedness preferences based on whether the type is overaligned and whether the deallocation function is a member. - Finally, we use the CUDA callability preference as a tiebreaker. llvm-svn: 283830 --- clang/include/clang/Sema/Sema.h | 5 -- clang/lib/Sema/SemaCUDA.cpp | 73 ++++--------------------- clang/lib/Sema/SemaExprCXX.cpp | 60 +++++++++++--------- clang/test/SemaCUDA/call-host-fn-from-device.cu | 13 +++++ 4 files changed, 58 insertions(+), 93 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 6753d79..45f62e2 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -9329,14 +9329,9 @@ public: /// Finds a function in \p Matches with highest calling priority /// from \p Caller context and erases all functions with lower /// calling priority. - void EraseUnwantedCUDAMatches(const FunctionDecl *Caller, - SmallVectorImpl &Matches); - void EraseUnwantedCUDAMatches(const FunctionDecl *Caller, - SmallVectorImpl &Matches); void EraseUnwantedCUDAMatches( const FunctionDecl *Caller, SmallVectorImpl> &Matches); - void EraseUnwantedCUDAMatches(const FunctionDecl *Caller, LookupResult &R); /// Given a implicit special member, infer its CUDA target from the /// calls it needs to make to underlying base/field special members. diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index e448c3f..9e101d1 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -158,82 +158,31 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, llvm_unreachable("All cases should've been handled by now."); } -void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller, - LookupResult &R) { - if (R.empty() || R.isSingleResult()) - return; - - // Gets the CUDA function preference for a call from Caller to Match. - auto GetCFP = [&](const NamedDecl *D) { - if (auto *Callee = dyn_cast(D->getUnderlyingDecl())) - return IdentifyCUDAPreference(Caller, Callee); - return CFP_Never; - }; - - // Find the best call preference among the functions in R. - CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( - R.begin(), R.end(), [&](const NamedDecl *D1, const NamedDecl *D2) { - return GetCFP(D1) < GetCFP(D2); - })); - - // Erase all functions with lower priority. - auto Filter = R.makeFilter(); - while (Filter.hasNext()) { - auto *Callee = dyn_cast(Filter.next()->getUnderlyingDecl()); - if (Callee && GetCFP(Callee) < BestCFP) - Filter.erase(); - } - Filter.done(); -} - -template -static void EraseUnwantedCUDAMatchesImpl( - Sema &S, const FunctionDecl *Caller, llvm::SmallVectorImpl &Matches, - std::function FetchDecl) { +void Sema::EraseUnwantedCUDAMatches( + const FunctionDecl *Caller, + SmallVectorImpl> &Matches) { if (Matches.size() <= 1) return; + using Pair = std::pair; + // Gets the CUDA function preference for a call from Caller to Match. - auto GetCFP = [&](const T &Match) { - return S.IdentifyCUDAPreference(Caller, FetchDecl(Match)); + auto GetCFP = [&](const Pair &Match) { + return IdentifyCUDAPreference(Caller, Match.second); }; // Find the best call preference among the functions in Matches. - Sema::CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( + CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( Matches.begin(), Matches.end(), - [&](const T &M1, const T &M2) { return GetCFP(M1) < GetCFP(M2); })); + [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); // Erase all functions with lower priority. Matches.erase( - llvm::remove_if(Matches, - [&](const T &Match) { return GetCFP(Match) < BestCFP; }), + llvm::remove_if( + Matches, [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }), Matches.end()); } -void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller, - SmallVectorImpl &Matches){ - EraseUnwantedCUDAMatchesImpl( - *this, Caller, Matches, [](const FunctionDecl *item) { return item; }); -} - -void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller, - SmallVectorImpl &Matches) { - EraseUnwantedCUDAMatchesImpl( - *this, Caller, Matches, [](const DeclAccessPair &item) { - return dyn_cast(item.getDecl()); - }); -} - -void Sema::EraseUnwantedCUDAMatches( - const FunctionDecl *Caller, - SmallVectorImpl> &Matches){ - EraseUnwantedCUDAMatchesImpl>( - *this, Caller, Matches, - [](const std::pair &item) { - return dyn_cast(item.second); - }); -} - /// When an implicitly-declared special member has to invoke more than one /// base/field special member, conflicts may occur in the targets of these /// members. For example, if one base's member __host__ and another's is diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 9c5dba5a..a87e3db 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1354,9 +1354,9 @@ static bool isNonPlacementDeallocationFunction(Sema &S, FunctionDecl *FD) { namespace { struct UsualDeallocFnInfo { UsualDeallocFnInfo() : Found(), FD(nullptr) {} - UsualDeallocFnInfo(DeclAccessPair Found) + UsualDeallocFnInfo(Sema &S, DeclAccessPair Found) : Found(Found), FD(dyn_cast(Found->getUnderlyingDecl())), - HasSizeT(false), HasAlignValT(false) { + HasSizeT(false), HasAlignValT(false), CUDAPref(Sema::CFP_Native) { // A function template declaration is never a usual deallocation function. if (!FD) return; @@ -1366,13 +1366,35 @@ namespace { HasSizeT = FD->getParamDecl(1)->getType()->isIntegerType(); HasAlignValT = !HasSizeT; } + + // In CUDA, determine how much we'd like / dislike to call this. + if (S.getLangOpts().CUDA) + if (auto *Caller = dyn_cast(S.CurContext)) + CUDAPref = S.IdentifyCUDAPreference(Caller, FD); } operator bool() const { return FD; } + bool isBetterThan(const UsualDeallocFnInfo &Other, bool WantSize, + bool WantAlign) const { + // C++17 [expr.delete]p10: + // If the type has new-extended alignment, a function with a parameter + // of type std::align_val_t is preferred; otherwise a function without + // such a parameter is preferred + if (HasAlignValT != Other.HasAlignValT) + return HasAlignValT == WantAlign; + + if (HasSizeT != Other.HasSizeT) + return HasSizeT == WantSize; + + // Use CUDA call preference as a tiebreaker. + return CUDAPref > Other.CUDAPref; + } + DeclAccessPair Found; FunctionDecl *FD; bool HasSizeT, HasAlignValT; + Sema::CUDAFunctionPreference CUDAPref; }; } @@ -1393,16 +1415,10 @@ static UsualDeallocFnInfo resolveDeallocationOverload( llvm::SmallVectorImpl *BestFns = nullptr) { UsualDeallocFnInfo Best; - // For CUDA, rank callability above anything else when ordering usual - // deallocation functions. - // FIXME: We should probably instead rank this between alignment (which - // affects correctness) and size (which is just an optimization). - if (S.getLangOpts().CUDA) - S.EraseUnwantedCUDAMatches(dyn_cast(S.CurContext), R); - for (auto I = R.begin(), E = R.end(); I != E; ++I) { - UsualDeallocFnInfo Info(I.getPair()); - if (!Info || !isNonPlacementDeallocationFunction(S, Info.FD)) + UsualDeallocFnInfo Info(S, I.getPair()); + if (!Info || !isNonPlacementDeallocationFunction(S, Info.FD) || + Info.CUDAPref == Sema::CFP_Never) continue; if (!Best) { @@ -1412,21 +1428,12 @@ static UsualDeallocFnInfo resolveDeallocationOverload( continue; } - // C++17 [expr.delete]p10: - // If the type has new-extended alignment, a function with a parameter of - // type std::align_val_t is preferred; otherwise a function without such a - // parameter is preferred - if (Best.HasAlignValT == WantAlign && Info.HasAlignValT != WantAlign) - continue; - - if (Best.HasAlignValT == Info.HasAlignValT && - Best.HasSizeT == WantSize && Info.HasSizeT != WantSize) + if (Best.isBetterThan(Info, WantSize, WantAlign)) continue; // If more than one preferred function is found, all non-preferred // functions are eliminated from further consideration. - if (BestFns && (Best.HasAlignValT != Info.HasAlignValT || - Best.HasSizeT != Info.HasSizeT)) + if (BestFns && Info.isBetterThan(Best, WantSize, WantAlign)) BestFns->clear(); Best = Info; @@ -2373,7 +2380,8 @@ bool Sema::FindAllocationFunctions(SourceLocation StartLoc, SourceRange Range, // is ill-formed. if (getLangOpts().CPlusPlus11 && isPlacementNew && isNonPlacementDeallocationFunction(*this, OperatorDelete)) { - UsualDeallocFnInfo Info(DeclAccessPair::make(OperatorDelete, AS_public)); + UsualDeallocFnInfo Info(*this, + DeclAccessPair::make(OperatorDelete, AS_public)); // Core issue, per mail to core reflector, 2016-10-09: // If this is a member operator delete, and there is a corresponding // non-sized member operator delete, this isn't /really/ a sized @@ -3118,9 +3126,9 @@ Sema::ActOnCXXDelete(SourceLocation StartLoc, bool UseGlobal, // function we just found. else if (OperatorDelete && isa(OperatorDelete)) UsualArrayDeleteWantsSize = - UsualDeallocFnInfo( - DeclAccessPair::make(OperatorDelete, AS_public)) - .HasSizeT; + UsualDeallocFnInfo(*this, + DeclAccessPair::make(OperatorDelete, AS_public)) + .HasSizeT; } if (!PointeeRD->hasIrrelevantDestructor()) diff --git a/clang/test/SemaCUDA/call-host-fn-from-device.cu b/clang/test/SemaCUDA/call-host-fn-from-device.cu index 782c13d..b83beba 100644 --- a/clang/test/SemaCUDA/call-host-fn-from-device.cu +++ b/clang/test/SemaCUDA/call-host-fn-from-device.cu @@ -46,6 +46,14 @@ struct T { operator Dummy() { return Dummy(); } // expected-note@-1 {{'operator Dummy' declared here}} + + __host__ void operator delete(void*); + __device__ void operator delete(void*, size_t); +}; + +struct U { + __device__ void operator delete(void*, size_t) = delete; + __host__ __device__ void operator delete(void*); }; __host__ __device__ void T::hd3() { @@ -82,6 +90,11 @@ __host__ __device__ void explicit_destructor(S *s) { // expected-error@-1 {{reference to __host__ function '~S' in __host__ __device__ function}} } +__host__ __device__ void class_specific_delete(T *t, U *u) { + delete t; // ok, call sized device delete even though host has preferable non-sized version + delete u; // ok, call non-sized HD delete rather than sized D delete +} + __host__ __device__ void hd_member_fn() { T t; // Necessary to trigger an error on T::hd. It's (implicitly) inline, so -- 2.7.4