return false;
AllCallsAreTailCalls = true;
- // The local stack holds all alloca instructions and all byval arguments.
AllocaDerivedValueTracker Tracker;
- for (Argument &Arg : F.args()) {
- if (Arg.hasByValAttr())
- Tracker.walk(&Arg);
- }
for (auto &BB : F) {
for (auto &I : BB)
if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
for (auto &Arg : CI->arg_operands()) {
if (isa<Constant>(Arg.getUser()))
continue;
- if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
- if (!A->hasByValAttr())
- continue;
+ if (isa<Argument>(Arg.getUser()))
+ continue;
SafeToTail = false;
break;
}
static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
const Function *CalledFunc,
InlineFunctionInfo &IFI,
- unsigned ByValAlignment) {
+ unsigned ByValAlignment,
+ bool &AddedNewAllocas) {
PointerType *ArgTy = cast<PointerType>(Arg->getType());
Type *AggTy = ArgTy->getElementType();
// Uses of the argument in the function should use our new alloca
// instead.
+ AddedNewAllocas = true;
return NewAlloca;
}
SmallVector<ReturnInst*, 8> Returns;
ClonedCodeInfo InlinedFunctionInfo;
Function::iterator FirstNewBlock;
+ bool AddedNewAllocas = false;
{ // Scope to destroy VMap after cloning.
ValueToValueMapTy VMap;
// modify the struct.
if (CS.isByValArgument(ArgNo)) {
ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
- CalledFunc->getParamAlignment(ArgNo+1));
+ CalledFunc->getParamAlignment(ArgNo+1),
+ AddedNewAllocas);
if (ActualArg != *AI)
ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
}
// f -> musttail g -> tail f ==> f -> tail f
// f -> g -> musttail f ==> f -> f
// f -> g -> tail f ==> f -> f
+ //
+ // If an alloca was introduced in the frame due to a byval parameter
+ // being passed to a subsequent call, tail calls must have the tail
+ // stripped as they may not access variables in the caller's stack.
+ // A single alloca ripples through out as the alloca may be aliased by
+ // bitcasts or may escape and be mutated outside of the function.
CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
ChildTCK = std::min(CallSiteTailKind, ChildTCK);
- CI->setTailCallKind(ChildTCK);
+ if (AddedNewAllocas)
+ CI->setTailCallKind(CallInst::TCK_None);
+ else
+ CI->setTailCallKind(ChildTCK);
InlinedMustTailCalls |= CI->isMustTailCall();
// Calls inlined through a 'nounwind' call site should be marked
; CHECK: %[[VAL:.*]] = load i32* %x
; CHECK: store i32 %[[VAL]], i32* %[[POS]]
; CHECK: {{^ *}}call void @ext(i32* %[[POS]]
-; CHECK: tail call void @ext(i32* null)
+; CHECK: {{^ *}}call void @ext(i32* null)
; CHECK: ret void
tail call void @qux(i32* byval %x)
ret void
ret void
}
-; Don't insert lifetime end markers here, the lifetime is trivially over due
-; the return.
-; CHECK: define void @test_byval_a(
-; CHECK: musttail call void @test_byval_c(
-; CHECK-NEXT: ret void
-
-declare void @test_byval_c(i32* byval %p)
-define internal void @test_byval_b(i32* byval %p) {
- musttail call void @test_byval_c(i32* byval %p)
- ret void
-}
-define void @test_byval_a(i32* byval %p) {
- musttail call void @test_byval_b(i32* byval %p)
- ret void
-}
-
-; Don't insert a stack restore, we're about to return.
-; CHECK: define void @test_dynalloca_a(
-; CHECK: call i8* @llvm.stacksave(
-; CHECK: alloca i8, i32 %n
-; CHECK: musttail call void @test_dynalloca_c(
-; CHECK-NEXT: ret void
-
-declare void @escape(i8* %buf)
-declare void @test_dynalloca_c(i32* byval %p, i32 %n)
-define internal void @test_dynalloca_b(i32* byval %p, i32 %n) alwaysinline {
- %buf = alloca i8, i32 %n ; dynamic alloca
- call void @escape(i8* %buf) ; escape it
- musttail call void @test_dynalloca_c(i32* byval %p, i32 %n)
- ret void
-}
-define void @test_dynalloca_a(i32* byval %p, i32 %n) {
- musttail call void @test_dynalloca_b(i32* byval %p, i32 %n)
- ret void
-}
-
; We can't merge the returns.
; CHECK: define void @test_multiret_a(
; CHECK: musttail call void @test_multiret_c(
--- /dev/null
+; RUN: opt -dse -inline -S %s | FileCheck %s
+
+declare void @external(i32* byval)
+declare i32 @identity(i32* byval)
+
+; An alloca in the inlinee should not force the tail to be stripped
+
+define void @inlinee_with_alloca() {
+ %local = alloca i32
+ store i32 42, i32* %local, align 4
+ tail call void @external(i32* byval %local)
+ ret void
+}
+
+define void @inliner_without_alloca() {
+ tail call void @inlinee_with_alloca()
+ ret void
+}
+
+; CHECK-LABEL: inliner_without_alloca
+; CHECK-NEXT: %local.i = alloca i32
+; CHECK: store i32 42, i32* %local.i
+; CHECK: tail call void @external
+; CHECK: ret
+
+; An alloca in the inliner should not force the tail to be stripped
+
+define i32 @inliner_with_alloca() {
+ %local = alloca i32
+ store i32 42, i32* %local, align 4
+ %1 = tail call i32 @identity(i32* byval %local)
+ ret i32 %1
+}
+
+; CHECK-LABEL: inliner_with_alloca
+; CHECK: %local = alloca i32
+; CHECK: store i32 42, i32* %local
+; CHECK: %1 = tail call i32 @identity
+; CHECK: ret i32 %1
+
+; Force the synthesis of the value through the byval parameter.
+; The alloca should force the tail to be stripped
+
+define void @inlinee_with_passthru(i32* byval %value) {
+ tail call void @external(i32* byval %value)
+ ret void
+}
+
+define void @strip_tail(i32* %value) {
+ tail call void @inlinee_with_passthru(i32* %value)
+ ret void
+}
+
+; CHECK-LABEL: strip_tail
+; CHECK: %value1 = alloca i32
+; CHECK: {{^ *}}call void @external
+; CHECK: ret void
+
; Don't tail call if a byval arg is captured.
define void @test9(i32* byval %a) {
; CHECK-LABEL: define void @test9(
-; CHECK: {{^ *}}call void @use(
+; CHECK: tail call void @use(
call void @use(i32* %a)
ret void
}
--- /dev/null
+; RUN: opt -mtriple i386 -Os -S %s -o - | FileCheck %s
+; RUN: opt -mtriple x86_64 -Os -S %s -o - | FileCheck %s
+; RUN: opt -mtriple armv7 -Os -S %s -o - | FileCheck %s
+
+%struct.D16 = type { [16 x double] }
+
+declare void @_Z2OpP3D16PKS_S2_(%struct.D16*, %struct.D16*, %struct.D16*)
+
+define void @_Z7TestRefRK3D16S1_(%struct.D16* noalias sret %agg.result, %struct.D16* %RHS, %struct.D16* %LHS) {
+ %1 = alloca %struct.D16*, align 8
+ %2 = alloca %struct.D16*, align 8
+ store %struct.D16* %RHS, %struct.D16** %1, align 8
+ store %struct.D16* %LHS, %struct.D16** %2, align 8
+ %3 = load %struct.D16** %1, align 8
+ %4 = load %struct.D16** %2, align 8
+ call void @_Z2OpP3D16PKS_S2_(%struct.D16* %agg.result, %struct.D16* %3, %struct.D16* %4)
+ ret void
+}
+
+; CHECK: define void @_Z7TestRefRK3D16S1_({{.*}}) {
+; CHECK: tail call void @_Z2OpP3D16PKS_S2_(%struct.D16* %agg.result, %struct.D16* %RHS, %struct.D16* %LHS)
+; CHECK: ret void
+; CHECK: }
+
+define void @_Z7TestVal3D16S_(%struct.D16* noalias sret %agg.result, %struct.D16* byval align 8 %RHS, %struct.D16* byval align 8 %LHS) {
+ call void @_Z2OpP3D16PKS_S2_(%struct.D16* %agg.result, %struct.D16* %RHS, %struct.D16* %LHS)
+ ret void
+}
+
+; CHECK: define void @_Z7TestVal3D16S_({{.*}}) {
+; CHECK: tail call void @_Z2OpP3D16PKS_S2_(%struct.D16* %agg.result, %struct.D16* %RHS, %struct.D16* %LHS)
+; CHECK: ret void
+; CHECK: }
+