[IR] Require intrinsic struct return type to be anonymous

author Nikita Popov <npopov@redhat.com>

Fri, 25 Mar 2022 09:50:13 +0000 (10:50 +0100)

committer Nikita Popov <npopov@redhat.com>

Wed, 30 Mar 2022 07:51:24 +0000 (09:51 +0200)
author Nikita Popov <npopov@redhat.com>
Fri, 25 Mar 2022 09:50:13 +0000 (10:50 +0100)
committer Nikita Popov <npopov@redhat.com>
Wed, 30 Mar 2022 07:51:24 +0000 (09:51 +0200)
diff --git a/clang/test/CodeGenCUDA/texture.cu b/clang/test/CodeGenCUDA/texture.cu

index 031d238e507cce3f6817eb9bc4f86af14cac4cfe..f7ac5cc739d6f3a693c9f505355dcc5dcdb2f44f 100644 (file)
--- a/clang/test/CodeGenCUDA/texture.cu
+++ b/clang/test/CodeGenCUDA/texture.cu
@@ -5,6 +5,9 @@
  // RUN: echo "GPU binary would be here" > %t
  // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s
  
+// Accessing nvvm intrinsics in this way no longer works.
+// XFAIL: *
+
  struct textureReference {
    int desc;
  };
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp

index da01227a4c4a44f4b4241e50a62f34f895f1c0c1..bf52b098d8d883088f304dc7246898aa820fd276 100644 (file)
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -576,19 +576,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
                                          F->arg_begin()->getType());
        return true;
      }
-    static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
-    if (vldRegex.match(Name)) {
-      auto fArgs = F->getFunctionType()->params();
-      SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
-      // Can't use Intrinsic::getDeclaration here as the return types might
-      // then only be structurally equal.
-      FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
-      StringRef Suffix =
-          F->getContext().supportsTypedPointers() ? "p0i8" : "p0";
-      NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
-                               "llvm." + Name + "." + Suffix, F->getParent());
-      return true;
-    }
      static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
      if (vstRegex.match(Name)) {
        static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
@@ -1017,6 +1004,25 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
      if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
        return true;
    }
+
+  if (auto *ST = dyn_cast<StructType>(F->getReturnType())) {
+    if (!ST->isLiteral() || ST->isPacked()) {
+      // Replace return type with literal non-packed struct.
+      auto *FT = F->getFunctionType();
+      auto *NewST = StructType::get(ST->getContext(), ST->elements());
+      auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
+      std::string Name = F->getName().str();
+      rename(F);
+      NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
+                               Name, F->getParent());
+
+      // The new function may also need remangling.
+      if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F))
+        NewFn = *Result;
+      return true;
+    }
+  }
+
    // Remangle our intrinsic since we upgrade the mangling
    auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
    if (Result != None) {
@@ -3784,12 +3790,33 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
      return;
    }
  
-  const auto &DefaultCase = [&NewFn, &CI]() -> void {
-    // Handle generic mangling change, but nothing else
-    assert(
-        (CI->getCalledFunction()->getName() != NewFn->getName()) &&
-        "Unknown function for CallBase upgrade and isn't just a name change");
-    CI->setCalledFunction(NewFn);
+  const auto &DefaultCase = [&]() -> void {
+    if (CI->getFunctionType() == NewFn->getFunctionType()) {
+      // Handle generic mangling change.
+      assert(
+          (CI->getCalledFunction()->getName() != NewFn->getName()) &&
+          "Unknown function for CallBase upgrade and isn't just a name change");
+      CI->setCalledFunction(NewFn);
+      return;
+    }
+
+    // This must be an upgrade from a named to a literal struct.
+    auto *OldST = cast<StructType>(CI->getType());
+    auto *NewST = cast<StructType>(NewFn->getReturnType());
+    assert(OldST != NewST && "Return type must have changed");
+    assert(OldST->getNumElements() == NewST->getNumElements() &&
+           "Must have same number of elements");
+
+    SmallVector<Value *> Args(CI->args());
+    Value *NewCI = Builder.CreateCall(NewFn, Args);
+    Value *Res = PoisonValue::get(OldST);
+    for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
+      Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
+      Res = Builder.CreateInsertValue(Res, Elem, Idx);
+    }
+    CI->replaceAllUsesWith(Res);
+    CI->eraseFromParent();
+    return;
    };
    CallInst *NewCall = nullptr;
    switch (NewFn->getIntrinsicID()) {
@@ -3797,13 +3824,6 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
      DefaultCase();
      return;
    }
-  case Intrinsic::arm_neon_vld1:
-  case Intrinsic::arm_neon_vld2:
-  case Intrinsic::arm_neon_vld3:
-  case Intrinsic::arm_neon_vld4:
-  case Intrinsic::arm_neon_vld2lane:
-  case Intrinsic::arm_neon_vld3lane:
-  case Intrinsic::arm_neon_vld4lane:
    case Intrinsic::arm_neon_vst1:
    case Intrinsic::arm_neon_vst2:
    case Intrinsic::arm_neon_vst3:
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp

index 3298591be40a5cba17e2214994a39551a315872e..87a07702aa571de228da2331c2c2c9458a7ac21d 100644 (file)
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -1475,9 +1475,19 @@ static bool matchIntrinsicType(
        PointerType *PT = dyn_cast<PointerType>(Ty);
        if (!PT || PT->getAddressSpace() != D.Pointer_AddressSpace)
          return true;
-      if (!PT->isOpaque())
+      if (!PT->isOpaque()) {
+        /* Manually consume a pointer to empty struct descriptor, which is
+         * used for externref. We don't want to enforce that the struct is
+         * anonymous in this case. (This renders externref intrinsics
+         * non-unique, but this will go away with opaque pointers anyway.) */
+        if (Infos.front().Kind == IITDescriptor::Struct &&
+            Infos.front().Struct_NumElements == 0) {
+          Infos = Infos.slice(1);
+          return false;
+        }
          return matchIntrinsicType(PT->getNonOpaquePointerElementType(), Infos,
                                    ArgTys, DeferredChecks, IsDeferredCheck);
+      }
        // Consume IIT descriptors relating to the pointer element type.
        // FIXME: Intrinsic type matching of nested single value types or even
        // aggregates doesn't work properly with opaque pointers but hopefully
@@ -1491,7 +1501,8 @@ static bool matchIntrinsicType(
  
      case IITDescriptor::Struct: {
        StructType *ST = dyn_cast<StructType>(Ty);
-      if (!ST || ST->getNumElements() != D.Struct_NumElements)
+      if (!ST || !ST->isLiteral() || ST->isPacked() ||
+          ST->getNumElements() != D.Struct_NumElements)
          return true;
  
        for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i)
diff --git a/llvm/test/Bitcode/intrinsics-struct-upgrade.ll b/llvm/test/Bitcode/intrinsics-struct-upgrade.ll

new file mode 100644 (file)

index 0000000..2d7229a
--- /dev/null
+++ b/llvm/test/Bitcode/intrinsics-struct-upgrade.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+
+declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*)
+
+; CHECK-LABEL: define %struct.__neon_int8x8x2_t @test_named_struct_return(i8* %A) {
+; CHECK:  %1 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
+; CHECK:  %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0
+; CHECK:  %3 = insertvalue %struct.__neon_int8x8x2_t poison, <8 x i8> %2, 0
+; CHECK:  %4 = extractvalue { <8 x i8>, <8 x i8> } %1, 1
+; CHECK:  %5 = insertvalue %struct.__neon_int8x8x2_t %3, <8 x i8> %4, 1
+; CHECK:  ret %struct.__neon_int8x8x2_t %5
+
+define %struct.__neon_int8x8x2_t @test_named_struct_return(i8* %A) {
+  %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
+  ret %struct.__neon_int8x8x2_t %val
+}
diff --git a/llvm/test/Bitcode/intrinsics-struct-upgrade.ll.bc b/llvm/test/Bitcode/intrinsics-struct-upgrade.ll.bc

new file mode 100644 (file)

index 0000000..936fe70

Binary files /dev/null and b/llvm/test/Bitcode/intrinsics-struct-upgrade.ll.bc differ
diff --git a/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll b/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll

index 38a414a2d8f78d19ff3c2321cff94f276c733be1..74bffc1223462989d3899aeb234949eb4050450b 100644 (file)
--- a/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
+++ b/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
@@ -4,9 +4,8 @@
  ; RUN: llc < %s -O0 -mcpu=x86-64 -mattr=+avx512f | FileCheck %s
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
  target triple = "x86_64-apple-darwin10"
-       %0 = type { i32, i1 }           ; type %0
  
-declare %0 @llvm.sadd.with.overflow.i32(i32, i32) nounwind
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind
  
  define fastcc i32 @test() nounwind {
  entry:
@@ -16,12 +15,12 @@ entry:
  ; CHECK-NEXT:    addl $0, [[REG]]
  ; CHECK-NEXT:    seto {{%[a-z]+l}}
  ; CHECK:         jo LBB0_2
-       %tmp1 = call %0 @llvm.sadd.with.overflow.i32(i32 1, i32 0)
-       %tmp2 = extractvalue %0 %tmp1, 1
+       %tmp1 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 1, i32 0)
+       %tmp2 = extractvalue { i32, i1 } %tmp1, 1
         br i1 %tmp2, label %.backedge, label %BB3
  
  BB3:
-       %tmp4 = extractvalue %0 %tmp1, 0
+       %tmp4 = extractvalue { i32, i1 } %tmp1, 0
         br label %.backedge
  
  .backedge:
diff --git a/llvm/test/CodeGen/X86/fast-isel-extract.ll b/llvm/test/CodeGen/X86/fast-isel-extract.ll

index 62d5b440afac27cc390d2101e3d4d04cee5d1aec..946204652500be298eeb6a478fbe2389f3c77b66 100644 (file)
--- a/llvm/test/CodeGen/X86/fast-isel-extract.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-extract.ll
@@ -1,7 +1,6 @@
  ; RUN: llc < %s -mtriple x86_64-apple-darwin11 -O0 -fast-isel-abort=1 | FileCheck %s
  
  %struct.x = type { i64, i64 }
-%addovf = type { i32, i1 }
  declare %struct.x @f()
  
  define void @test1(i64*) nounwind ssp {
@@ -28,13 +27,13 @@ define void @test2(i64*) nounwind ssp {
  ; CHECK: addq $10, %rdx
  }
  
-declare %addovf @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
  
  define void @test3(i32 %x, i32 %y, i32* %z) {
-  %r = call %addovf @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
-  %sum = extractvalue %addovf %r, 0
+  %r = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
+  %sum = extractvalue { i32, i1 } %r, 0
    %sum3 = mul i32 %sum, 3
-  %bit = extractvalue %addovf %r, 1
+  %bit = extractvalue { i32, i1 } %r, 1
    br i1 %bit, label %then, label %end
    
  then:
author	Nikita Popov <npopov@redhat.com>
	Fri, 25 Mar 2022 09:50:13 +0000 (10:50 +0100)
committer	Nikita Popov <npopov@redhat.com>
	Wed, 30 Mar 2022 07:51:24 +0000 (09:51 +0200)
clang/test/CodeGenCUDA/texture.cu		patch \| blob \| history
llvm/lib/IR/AutoUpgrade.cpp		patch \| blob \| history
llvm/lib/IR/Function.cpp		patch \| blob \| history
llvm/test/Bitcode/intrinsics-struct-upgrade.ll	[new file with mode: 0644]	patch \| blob
llvm/test/Bitcode/intrinsics-struct-upgrade.ll.bc	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll		patch \| blob \| history
llvm/test/CodeGen/X86/fast-isel-extract.ll		patch \| blob \| history