From f75dcbef200540dab900e34396a542619d762f0a Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Tue, 11 Oct 2016 00:21:10 +0000
Subject: [PATCH] Aligned allocation versus CUDA: make deallocation function
 preference order match other CUDA preference orders, per discussion with
 jlebar. We now model this in an attempt to match overload resolution as
 closely as possible:

- First, we throw out all non-callable (due to CUDA host/device mismatch)
  operator delete functions.
- Then we apply sizedness / alignedness preferences based on whether the type
  is overaligned and whether the deallocation function is a member.
- Finally, we use the CUDA callability preference as a tiebreaker.

llvm-svn: 283830
---
 clang/include/clang/Sema/Sema.h                 |  5 --
 clang/lib/Sema/SemaCUDA.cpp                     | 73 ++++---------------------
 clang/lib/Sema/SemaExprCXX.cpp                  | 60 +++++++++++---------
 clang/test/SemaCUDA/call-host-fn-from-device.cu | 13 +++++
 4 files changed, 58 insertions(+), 93 deletions(-)
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 6753d79..45f62e2 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -9329,14 +9329,9 @@ public:
   /// Finds a function in \p Matches with highest calling priority
   /// from \p Caller context and erases all functions with lower
   /// calling priority.
-  void EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
-                                SmallVectorImpl<FunctionDecl *> &Matches);
-  void EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
-                                SmallVectorImpl<DeclAccessPair> &Matches);
   void EraseUnwantedCUDAMatches(
       const FunctionDecl *Caller,
       SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches);
-  void EraseUnwantedCUDAMatches(const FunctionDecl *Caller, LookupResult &R);
 
   /// Given a implicit special member, infer its CUDA target from the
   /// calls it needs to make to underlying base/field special members.
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index e448c3f..9e101d1 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -158,82 +158,31 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
   llvm_unreachable("All cases should've been handled by now.");
 }
 
-void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
-                                    LookupResult &R) {
-  if (R.empty() || R.isSingleResult())
-    return;
-
-  // Gets the CUDA function preference for a call from Caller to Match.
-  auto GetCFP = [&](const NamedDecl *D) {
-    if (auto *Callee = dyn_cast<FunctionDecl>(D->getUnderlyingDecl()))
-      return IdentifyCUDAPreference(Caller, Callee);
-    return CFP_Never;
-  };
-
-  // Find the best call preference among the functions in R.
-  CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
-      R.begin(), R.end(), [&](const NamedDecl *D1, const NamedDecl *D2) {
-        return GetCFP(D1) < GetCFP(D2);
-      }));
-
-  // Erase all functions with lower priority.
-  auto Filter = R.makeFilter();
-  while (Filter.hasNext()) {
-    auto *Callee = dyn_cast<FunctionDecl>(Filter.next()->getUnderlyingDecl());
-    if (Callee && GetCFP(Callee) < BestCFP)
-      Filter.erase();
-  }
-  Filter.done();
-}
-
-template <typename T>
-static void EraseUnwantedCUDAMatchesImpl(
-    Sema &S, const FunctionDecl *Caller, llvm::SmallVectorImpl<T> &Matches,
-    std::function<const FunctionDecl *(const T &)> FetchDecl) {
+void Sema::EraseUnwantedCUDAMatches(
+    const FunctionDecl *Caller,
+    SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
   if (Matches.size() <= 1)
     return;
 
+  using Pair = std::pair<DeclAccessPair, FunctionDecl*>;
+
   // Gets the CUDA function preference for a call from Caller to Match.
-  auto GetCFP = [&](const T &Match) {
-    return S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
+  auto GetCFP = [&](const Pair &Match) {
+    return IdentifyCUDAPreference(Caller, Match.second);
   };
 
   // Find the best call preference among the functions in Matches.
-  Sema::CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
+  CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
       Matches.begin(), Matches.end(),
-      [&](const T &M1, const T &M2) { return GetCFP(M1) < GetCFP(M2); }));
+      [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); }));
 
   // Erase all functions with lower priority.
   Matches.erase(
-      llvm::remove_if(Matches,
-                      [&](const T &Match) { return GetCFP(Match) < BestCFP; }),
+      llvm::remove_if(
+          Matches, [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }),
       Matches.end());
 }
 
-void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
-                                    SmallVectorImpl<FunctionDecl *> &Matches){
-  EraseUnwantedCUDAMatchesImpl<FunctionDecl *>(
-      *this, Caller, Matches, [](const FunctionDecl *item) { return item; });
-}
-
-void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
-                                    SmallVectorImpl<DeclAccessPair> &Matches) {
-  EraseUnwantedCUDAMatchesImpl<DeclAccessPair>(
-      *this, Caller, Matches, [](const DeclAccessPair &item) {
-        return dyn_cast<FunctionDecl>(item.getDecl());
-      });
-}
-
-void Sema::EraseUnwantedCUDAMatches(
-    const FunctionDecl *Caller,
-    SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){
-  EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>(
-      *this, Caller, Matches,
-      [](const std::pair<DeclAccessPair, FunctionDecl *> &item) {
-        return dyn_cast<FunctionDecl>(item.second);
-      });
-}
-
 /// When an implicitly-declared special member has to invoke more than one
 /// base/field special member, conflicts may occur in the targets of these
 /// members. For example, if one base's member __host__ and another's is
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 9c5dba5a..a87e3db 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -1354,9 +1354,9 @@ static bool isNonPlacementDeallocationFunction(Sema &S, FunctionDecl *FD) {
 namespace {
   struct UsualDeallocFnInfo {
     UsualDeallocFnInfo() : Found(), FD(nullptr) {}
-    UsualDeallocFnInfo(DeclAccessPair Found)
+    UsualDeallocFnInfo(Sema &S, DeclAccessPair Found)
         : Found(Found), FD(dyn_cast<FunctionDecl>(Found->getUnderlyingDecl())),
-          HasSizeT(false), HasAlignValT(false) {
+          HasSizeT(false), HasAlignValT(false), CUDAPref(Sema::CFP_Native) {
       // A function template declaration is never a usual deallocation function.
       if (!FD)
         return;
@@ -1366,13 +1366,35 @@ namespace {
         HasSizeT = FD->getParamDecl(1)->getType()->isIntegerType();
         HasAlignValT = !HasSizeT;
       }
+
+      // In CUDA, determine how much we'd like / dislike to call this.
+      if (S.getLangOpts().CUDA)
+        if (auto *Caller = dyn_cast<FunctionDecl>(S.CurContext))
+          CUDAPref = S.IdentifyCUDAPreference(Caller, FD);
     }
 
     operator bool() const { return FD; }
 
+    bool isBetterThan(const UsualDeallocFnInfo &Other, bool WantSize,
+                      bool WantAlign) const {
+      // C++17 [expr.delete]p10:
+      //   If the type has new-extended alignment, a function with a parameter
+      //   of type std::align_val_t is preferred; otherwise a function without
+      //   such a parameter is preferred
+      if (HasAlignValT != Other.HasAlignValT)
+        return HasAlignValT == WantAlign;
+
+      if (HasSizeT != Other.HasSizeT)
+        return HasSizeT == WantSize;
+
+      // Use CUDA call preference as a tiebreaker.
+      return CUDAPref > Other.CUDAPref;
+    }
+
     DeclAccessPair Found;
     FunctionDecl *FD;
     bool HasSizeT, HasAlignValT;
+    Sema::CUDAFunctionPreference CUDAPref;
   };
 }
 
@@ -1393,16 +1415,10 @@ static UsualDeallocFnInfo resolveDeallocationOverload(
     llvm::SmallVectorImpl<UsualDeallocFnInfo> *BestFns = nullptr) {
   UsualDeallocFnInfo Best;
 
-  // For CUDA, rank callability above anything else when ordering usual
-  // deallocation functions.
-  // FIXME: We should probably instead rank this between alignment (which
-  // affects correctness) and size (which is just an optimization).
-  if (S.getLangOpts().CUDA)
-    S.EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(S.CurContext), R);
-
   for (auto I = R.begin(), E = R.end(); I != E; ++I) {
-    UsualDeallocFnInfo Info(I.getPair());
-    if (!Info || !isNonPlacementDeallocationFunction(S, Info.FD))
+    UsualDeallocFnInfo Info(S, I.getPair());
+    if (!Info || !isNonPlacementDeallocationFunction(S, Info.FD) ||
+        Info.CUDAPref == Sema::CFP_Never)
       continue;
 
     if (!Best) {
@@ -1412,21 +1428,12 @@ static UsualDeallocFnInfo resolveDeallocationOverload(
       continue;
     }
 
-    // C++17 [expr.delete]p10:
-    //   If the type has new-extended alignment, a function with a parameter of
-    //   type std::align_val_t is preferred; otherwise a function without such a
-    //   parameter is preferred
-    if (Best.HasAlignValT == WantAlign && Info.HasAlignValT != WantAlign)
-      continue;
-
-    if (Best.HasAlignValT == Info.HasAlignValT &&
-        Best.HasSizeT == WantSize && Info.HasSizeT != WantSize)
+    if (Best.isBetterThan(Info, WantSize, WantAlign))
       continue;
 
     //   If more than one preferred function is found, all non-preferred
     //   functions are eliminated from further consideration.
-    if (BestFns && (Best.HasAlignValT != Info.HasAlignValT ||
-        Best.HasSizeT != Info.HasSizeT))
+    if (BestFns && Info.isBetterThan(Best, WantSize, WantAlign))
       BestFns->clear();
 
     Best = Info;
@@ -2373,7 +2380,8 @@ bool Sema::FindAllocationFunctions(SourceLocation StartLoc, SourceRange Range,
     //   is ill-formed.
     if (getLangOpts().CPlusPlus11 && isPlacementNew &&
         isNonPlacementDeallocationFunction(*this, OperatorDelete)) {
-      UsualDeallocFnInfo Info(DeclAccessPair::make(OperatorDelete, AS_public));
+      UsualDeallocFnInfo Info(*this,
+                              DeclAccessPair::make(OperatorDelete, AS_public));
       // Core issue, per mail to core reflector, 2016-10-09:
       //   If this is a member operator delete, and there is a corresponding
       //   non-sized member operator delete, this isn't /really/ a sized
@@ -3118,9 +3126,9 @@ Sema::ActOnCXXDelete(SourceLocation StartLoc, bool UseGlobal,
         // function we just found.
         else if (OperatorDelete && isa<CXXMethodDecl>(OperatorDelete))
           UsualArrayDeleteWantsSize =
-              UsualDeallocFnInfo(
-                  DeclAccessPair::make(OperatorDelete, AS_public))
-                  .HasSizeT;
+            UsualDeallocFnInfo(*this,
+                               DeclAccessPair::make(OperatorDelete, AS_public))
+              .HasSizeT;
       }
 
       if (!PointeeRD->hasIrrelevantDestructor())
diff --git a/clang/test/SemaCUDA/call-host-fn-from-device.cu b/clang/test/SemaCUDA/call-host-fn-from-device.cu
index 782c13d..b83beba 100644
--- a/clang/test/SemaCUDA/call-host-fn-from-device.cu
+++ b/clang/test/SemaCUDA/call-host-fn-from-device.cu
@@ -46,6 +46,14 @@ struct T {
 
   operator Dummy() { return Dummy(); }
   // expected-note@-1 {{'operator Dummy' declared here}}
+
+  __host__ void operator delete(void*);
+  __device__ void operator delete(void*, size_t);
+};
+
+struct U {
+  __device__ void operator delete(void*, size_t) = delete;
+  __host__ __device__ void operator delete(void*);
 };
 
 __host__ __device__ void T::hd3() {
@@ -82,6 +90,11 @@ __host__ __device__ void explicit_destructor(S *s) {
   // expected-error@-1 {{reference to __host__ function '~S' in __host__ __device__ function}}
 }
 
+__host__ __device__ void class_specific_delete(T *t, U *u) {
+  delete t; // ok, call sized device delete even though host has preferable non-sized version
+  delete u; // ok, call non-sized HD delete rather than sized D delete
+}
+
 __host__ __device__ void hd_member_fn() {
   T t;
   // Necessary to trigger an error on T::hd.  It's (implicitly) inline, so
-- 
2.7.4