// RUN: echo "GPU binary would be here" > %t
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s
+// Accessing nvvm intrinsics in this way no longer works.
+// XFAIL: *
+
struct textureReference {
int desc;
};
F->arg_begin()->getType());
return true;
}
- static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
- if (vldRegex.match(Name)) {
- auto fArgs = F->getFunctionType()->params();
- SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
- // Can't use Intrinsic::getDeclaration here as the return types might
- // then only be structurally equal.
- FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
- StringRef Suffix =
- F->getContext().supportsTypedPointers() ? "p0i8" : "p0";
- NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
- "llvm." + Name + "." + Suffix, F->getParent());
- return true;
- }
static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
if (vstRegex.match(Name)) {
static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
return true;
}
+
+ if (auto *ST = dyn_cast<StructType>(F->getReturnType())) {
+ if (!ST->isLiteral() || ST->isPacked()) {
+ // Replace return type with literal non-packed struct.
+ auto *FT = F->getFunctionType();
+ auto *NewST = StructType::get(ST->getContext(), ST->elements());
+ auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
+ std::string Name = F->getName().str();
+ rename(F);
+ NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
+ Name, F->getParent());
+
+ // The new function may also need remangling.
+ if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F))
+ NewFn = *Result;
+ return true;
+ }
+ }
+
// Remangle our intrinsic since we upgrade the mangling
auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
if (Result != None) {
return;
}
- const auto &DefaultCase = [&NewFn, &CI]() -> void {
- // Handle generic mangling change, but nothing else
- assert(
- (CI->getCalledFunction()->getName() != NewFn->getName()) &&
- "Unknown function for CallBase upgrade and isn't just a name change");
- CI->setCalledFunction(NewFn);
+ const auto &DefaultCase = [&]() -> void {
+ if (CI->getFunctionType() == NewFn->getFunctionType()) {
+ // Handle generic mangling change.
+ assert(
+ (CI->getCalledFunction()->getName() != NewFn->getName()) &&
+ "Unknown function for CallBase upgrade and isn't just a name change");
+ CI->setCalledFunction(NewFn);
+ return;
+ }
+
+ // This must be an upgrade from a named to a literal struct.
+ auto *OldST = cast<StructType>(CI->getType());
+ auto *NewST = cast<StructType>(NewFn->getReturnType());
+ assert(OldST != NewST && "Return type must have changed");
+ assert(OldST->getNumElements() == NewST->getNumElements() &&
+ "Must have same number of elements");
+
+ SmallVector<Value *> Args(CI->args());
+ Value *NewCI = Builder.CreateCall(NewFn, Args);
+ Value *Res = PoisonValue::get(OldST);
+ for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
+ Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
+ Res = Builder.CreateInsertValue(Res, Elem, Idx);
+ }
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+ return;
};
CallInst *NewCall = nullptr;
switch (NewFn->getIntrinsicID()) {
DefaultCase();
return;
}
- case Intrinsic::arm_neon_vld1:
- case Intrinsic::arm_neon_vld2:
- case Intrinsic::arm_neon_vld3:
- case Intrinsic::arm_neon_vld4:
- case Intrinsic::arm_neon_vld2lane:
- case Intrinsic::arm_neon_vld3lane:
- case Intrinsic::arm_neon_vld4lane:
case Intrinsic::arm_neon_vst1:
case Intrinsic::arm_neon_vst2:
case Intrinsic::arm_neon_vst3:
PointerType *PT = dyn_cast<PointerType>(Ty);
if (!PT || PT->getAddressSpace() != D.Pointer_AddressSpace)
return true;
- if (!PT->isOpaque())
+ if (!PT->isOpaque()) {
+ /* Manually consume a pointer to empty struct descriptor, which is
+ * used for externref. We don't want to enforce that the struct is
+ * anonymous in this case. (This renders externref intrinsics
+ * non-unique, but this will go away with opaque pointers anyway.) */
+ if (Infos.front().Kind == IITDescriptor::Struct &&
+ Infos.front().Struct_NumElements == 0) {
+ Infos = Infos.slice(1);
+ return false;
+ }
return matchIntrinsicType(PT->getNonOpaquePointerElementType(), Infos,
ArgTys, DeferredChecks, IsDeferredCheck);
+ }
// Consume IIT descriptors relating to the pointer element type.
// FIXME: Intrinsic type matching of nested single value types or even
// aggregates doesn't work properly with opaque pointers but hopefully
case IITDescriptor::Struct: {
StructType *ST = dyn_cast<StructType>(Ty);
- if (!ST || ST->getNumElements() != D.Struct_NumElements)
+ if (!ST || !ST->isLiteral() || ST->isPacked() ||
+ ST->getNumElements() != D.Struct_NumElements)
return true;
for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i)
--- /dev/null
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+
+declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*)
+
+; CHECK-LABEL: define %struct.__neon_int8x8x2_t @test_named_struct_return(i8* %A) {
+; CHECK: %1 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
+; CHECK: %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0
+; CHECK: %3 = insertvalue %struct.__neon_int8x8x2_t poison, <8 x i8> %2, 0
+; CHECK: %4 = extractvalue { <8 x i8>, <8 x i8> } %1, 1
+; CHECK: %5 = insertvalue %struct.__neon_int8x8x2_t %3, <8 x i8> %4, 1
+; CHECK: ret %struct.__neon_int8x8x2_t %5
+
+define %struct.__neon_int8x8x2_t @test_named_struct_return(i8* %A) {
+ %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
+ ret %struct.__neon_int8x8x2_t %val
+}
; RUN: llc < %s -O0 -mcpu=x86-64 -mattr=+avx512f | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin10"
- %0 = type { i32, i1 } ; type %0
-declare %0 @llvm.sadd.with.overflow.i32(i32, i32) nounwind
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind
define fastcc i32 @test() nounwind {
entry:
; CHECK-NEXT: addl $0, [[REG]]
; CHECK-NEXT: seto {{%[a-z]+l}}
; CHECK: jo LBB0_2
- %tmp1 = call %0 @llvm.sadd.with.overflow.i32(i32 1, i32 0)
- %tmp2 = extractvalue %0 %tmp1, 1
+ %tmp1 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 1, i32 0)
+ %tmp2 = extractvalue { i32, i1 } %tmp1, 1
br i1 %tmp2, label %.backedge, label %BB3
BB3:
- %tmp4 = extractvalue %0 %tmp1, 0
+ %tmp4 = extractvalue { i32, i1 } %tmp1, 0
br label %.backedge
.backedge:
; RUN: llc < %s -mtriple x86_64-apple-darwin11 -O0 -fast-isel-abort=1 | FileCheck %s
%struct.x = type { i64, i64 }
-%addovf = type { i32, i1 }
declare %struct.x @f()
define void @test1(i64*) nounwind ssp {
; CHECK: addq $10, %rdx
}
-declare %addovf @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
define void @test3(i32 %x, i32 %y, i32* %z) {
- %r = call %addovf @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
- %sum = extractvalue %addovf %r, 0
+ %r = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
+ %sum = extractvalue { i32, i1 } %r, 0
%sum3 = mul i32 %sum, 3
- %bit = extractvalue %addovf %r, 1
+ %bit = extractvalue { i32, i1 } %r, 1
br i1 %bit, label %then, label %end
then: