// CHECK-ASM: vsumqg
idx = vec_test_mask(vsc, vuc);
- // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vuc, vuc);
- // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vss, vus);
- // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vus, vus);
- // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vsi, vui);
- // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vui, vui);
- // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vsl, vul);
- // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vul, vul);
- // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vd, vul);
- // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
}
// CHECK-ASM: vsrlb
idx = vec_test_mask(vf, vui);
- // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vd, vul);
- // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
vuc = vec_msum_u128(vul, vul, vuc, 0);
short test_loadbe_i16(const short *P) {
// CHECK-LABEL: @test_loadbe_i16
// CHECK: [[LOAD:%.*]] = load i16, i16* %{{.*}}, align 1
- // CHECK: call i16 @llvm.bswap.i16(i16 [[LOAD]])
+ // CHECK: call signext i16 @llvm.bswap.i16(i16 [[LOAD]])
return _loadbe_i16(P);
}
-// RUN: %clang_cc1 -ffreestanding -triple i686--linux -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
-// RUN: %clang_cc1 -ffreestanding -triple x86_64--linux -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG
-// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
-// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
-// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
-// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN: %clang_cc1 -ffreestanding -triple i686--linux -emit-llvm -mllvm -update-return-attrs=false %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN: %clang_cc1 -ffreestanding -triple x86_64--linux -emit-llvm -mllvm -update-return-attrs=false %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG
+// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
#include <x86intrin.h>
cl::Hidden,
cl::desc("Convert noalias attributes to metadata during inlining."));
+static cl::opt<bool> UpdateReturnAttributes(
+ "update-return-attrs", cl::init(true), cl::Hidden,
+ cl::desc("Update return attributes on calls within inlined body"));
+
static cl::opt<bool>
PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
cl::init(true), cl::Hidden,
cl::desc("Convert align attributes to assumptions during inlining."));
+static cl::opt<unsigned> InlinerAttributeWindow(
+ "inliner-attribute-window", cl::Hidden,
+ cl::desc("the maximum number of instructions analyzed for may throw during "
+ "attribute inference in inlined body"),
+ cl::init(4));
+
llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI,
AAResults *CalleeAAR,
bool InsertLifetime) {
}
}
+static bool MayContainThrowingOrExitingCall(Instruction *Begin,
+ Instruction *End) {
+
+ assert(Begin->getParent() == End->getParent() &&
+ "Expected to be in same basic block!");
+ unsigned NumInstChecked = 0;
+ // Check that all instructions in the range [Begin, End) are guaranteed to
+ // transfer execution to successor.
+ for (auto &I : make_range(Begin->getIterator(), End->getIterator()))
+ if (NumInstChecked++ > InlinerAttributeWindow ||
+ !isGuaranteedToTransferExecutionToSuccessor(&I))
+ return true;
+ return false;
+}
+
+static void AddReturnAttributes(CallSite CS, ValueToValueMapTy &VMap) {
+ if (!UpdateReturnAttributes)
+ return;
+ AttrBuilder AB(CS.getAttributes(), AttributeList::ReturnIndex);
+ if (AB.empty())
+ return;
+
+ auto *CalledFunction = CS.getCalledFunction();
+ auto &Context = CalledFunction->getContext();
+
+ for (auto &BB : *CalledFunction) {
+ auto *RI = dyn_cast<ReturnInst>(BB.getTerminator());
+ if (!RI || !isa<CallBase>(RI->getOperand(0)))
+ continue;
+ // Sanity check that the cloned return instruction exists and is a return
+ // instruction itself.
+ auto *NewRI = dyn_cast_or_null<ReturnInst>(VMap.lookup(RI));
+ if (!NewRI)
+ continue;
+ auto *RetVal = cast<CallBase>(RI->getOperand(0));
+ // Sanity check that the cloned RetVal exists and is a call.
+ // Simplification during inlining could have transformed the cloned
+ // instruction.
+ auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal));
+ if (!NewRetVal)
+ continue;
+ // Backward propagation of attributes to the returned value may be incorrect
+ // if it is control flow dependent.
+ // Consider:
+ // @callee {
+ // %rv = call @foo()
+ // %rv2 = call @bar()
+ // if (%rv2 != null)
+ // return %rv2
+ // if (%rv == null)
+ // exit()
+ // return %rv
+ // }
+ // caller() {
+ // %val = call nonnull @callee()
+ // }
+ // Here we cannot add the nonnull attribute on either foo or bar. So, we
+ // limit the check to both NewRetVal and NewRI are in the same basic block
+ // and there are no throwing/exiting instructions between these
+ // instructions.
+ if (NewRI->getParent() != NewRetVal->getParent() ||
+ MayContainThrowingOrExitingCall(NewRetVal, NewRI))
+ continue;
+ // Add to the existing attributes of NewRetVal.
+ // NB! When we have the same attribute already existing on NewRetVal, but
+ // with a differing value, the AttributeList's merge API honours the already
+ // existing attribute value (i.e. attributes such as dereferenceable,
+ // dereferenceable_or_null etc). See AttrBuilder::merge for more details.
+ AttributeList AL = NewRetVal->getAttributes();
+ AttributeList NewAL =
+ AL.addAttributes(Context, AttributeList::ReturnIndex, AB);
+ NewRetVal->setAttributes(NewAL);
+ }
+}
+
/// If the inlined function has non-byval align arguments, then
/// add @llvm.assume-based alignment assumptions to preserve this information.
static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
// Add noalias metadata if necessary.
AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR);
+ // Clone return attributes on the callsite into the calls within the inlined
+ // function which feed into its return value.
+ AddReturnAttributes(CS, VMap);
+
// Propagate llvm.mem.parallel_loop_access if necessary.
PropagateParallelLoopAccessMetadata(CS, VMap);
--- /dev/null
+; RUN: opt < %s -inline-threshold=0 -always-inline -S | FileCheck %s
+; RUN: opt < %s -passes=always-inline -S | FileCheck %s
+
+declare i8* @foo(i8*) argmemonly nounwind
+
+define i8* @callee(i8 *%p) alwaysinline {
+; CHECK: @callee(
+; CHECK: call i8* @foo(i8* noalias %p)
+ %r = call i8* @foo(i8* noalias %p)
+ ret i8* %r
+}
+
+define i8* @caller(i8* %ptr, i64 %x) {
+; CHECK-LABEL: @caller
+; CHECK: call nonnull i8* @foo(i8* noalias
+ %gep = getelementptr inbounds i8, i8* %ptr, i64 %x
+ %p = call nonnull i8* @callee(i8* %gep)
+ ret i8* %p
+}
+
+declare void @llvm.experimental.guard(i1,...)
+; Cannot add nonnull attribute to foo
+; because the guard is a throwing call
+define internal i8* @callee_with_throwable(i8* %p) alwaysinline {
+; CHECK-NOT: callee_with_throwable
+ %r = call i8* @foo(i8* %p)
+ %cond = icmp ne i8* %r, null
+ call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+ ret i8* %r
+}
+
+declare i8* @bar(i8*) readonly nounwind
+; Here also we cannot add nonnull attribute to the call bar.
+define internal i8* @callee_with_explicit_control_flow(i8* %p) alwaysinline {
+; CHECK-NOT: callee_with_explicit_control_flow
+ %r = call i8* @bar(i8* %p)
+ %cond = icmp ne i8* %r, null
+ br i1 %cond, label %ret, label %orig
+
+ret:
+ ret i8* %r
+
+orig:
+ ret i8* %p
+}
+
+define i8* @caller2(i8* %ptr, i64 %x, i1 %cond) {
+; CHECK-LABEL: @caller2
+; CHECK: call i8* @foo
+; CHECK: call i8* @bar
+ %gep = getelementptr inbounds i8, i8* %ptr, i64 %x
+ %p = call nonnull i8* @callee_with_throwable(i8* %gep)
+ %q = call nonnull i8* @callee_with_explicit_control_flow(i8* %gep)
+ br i1 %cond, label %pret, label %qret
+
+pret:
+ ret i8* %p
+
+qret:
+ ret i8* %q
+}
+
+define internal i8* @callee3(i8 *%p) alwaysinline {
+; CHECK-NOT: callee3
+ %r = call noalias i8* @foo(i8* %p)
+ ret i8* %r
+}
+
+; add the deref attribute to the existing attributes on foo.
+define i8* @caller3(i8* %ptr, i64 %x) {
+; CHECK-LABEL: caller3
+; CHECK: call noalias dereferenceable_or_null(12) i8* @foo
+ %gep = getelementptr inbounds i8, i8* %ptr, i64 %x
+ %p = call dereferenceable_or_null(12) i8* @callee3(i8* %gep)
+ ret i8* %p
+}
+
+declare i8* @inf_loop_call(i8*) nounwind
+; We cannot propagate attributes to foo because we do not know whether inf_loop_call
+; will return execution.
+define internal i8* @callee_with_sideeffect_callsite(i8* %p) alwaysinline {
+; CHECK-NOT: callee_with_sideeffect_callsite
+ %r = call i8* @foo(i8* %p)
+ %v = call i8* @inf_loop_call(i8* %p)
+ ret i8* %r
+}
+
+; do not add deref attribute to foo
+define i8* @test4(i8* %ptr, i64 %x) {
+; CHECK-LABEL: test4
+; CHECK: call i8* @foo
+ %gep = getelementptr inbounds i8, i8* %ptr, i64 %x
+ %p = call dereferenceable_or_null(12) i8* @callee_with_sideeffect_callsite(i8* %gep)
+ ret i8* %p
+}
+
+declare i8* @baz(i8*) nounwind readonly
+define internal i8* @callee5(i8* %p) alwaysinline {
+; CHECK-NOT: callee5
+ %r = call i8* @foo(i8* %p)
+ %v = call i8* @baz(i8* %p)
+ ret i8* %r
+}
+
+; add the deref attribute to foo.
+define i8* @test5(i8* %ptr, i64 %x) {
+; CHECK-LABEL: test5
+; CHECK: call dereferenceable_or_null(12) i8* @foo
+ %gep = getelementptr inbounds i8, i8* %ptr, i64 %x
+ %s = call dereferenceable_or_null(12) i8* @callee5(i8* %gep)
+ ret i8* %s
+}
+
+; deref attributes have different values on the callee and the call feeding into
+; the return.
+; AttrBuilder chooses the already existing value and does not overwrite it.
+define internal i8* @callee6(i8* %p) alwaysinline {
+; CHECK-NOT: callee6
+ %r = call dereferenceable_or_null(16) i8* @foo(i8* %p)
+ %v = call i8* @baz(i8* %p)
+ ret i8* %r
+}
+
+
+define i8* @test6(i8* %ptr, i64 %x) {
+; CHECK-LABEL: test6
+; CHECK: call dereferenceable_or_null(16) i8* @foo
+ %gep = getelementptr inbounds i8, i8* %ptr, i64 %x
+ %s = call dereferenceable_or_null(12) i8* @callee6(i8* %gep)
+ ret i8* %s
+}
+
+; We add the attributes from the callee to both the calls below.
+define internal i8* @callee7(i8 *%ptr, i1 %cond) alwaysinline {
+; CHECK-NOT: @callee7(
+ br i1 %cond, label %pass, label %fail
+
+pass:
+ %r = call i8* @foo(i8* noalias %ptr)
+ ret i8* %r
+
+fail:
+ %s = call i8* @baz(i8* %ptr)
+ ret i8* %s
+}
+
+define void @test7(i8* %ptr, i64 %x, i1 %cond) {
+; CHECK-LABEL: @test7
+; CHECK: call nonnull i8* @foo(i8* noalias
+; CHECK: call nonnull i8* @baz
+; CHECK: phi i8*
+; CHECK: call void @snort
+
+ %gep = getelementptr inbounds i8, i8* %ptr, i64 %x
+ %t = call nonnull i8* @callee7(i8* %gep, i1 %cond)
+ call void @snort(i8* %t)
+ ret void
+}
+declare void @snort(i8*)