From db77e57ea86d941a4262ef60261692f4cb6893e6 Mon Sep 17 00:00:00 2001 From: Nirav Dave Date: Mon, 27 Nov 2017 15:28:15 +0000 Subject: [PATCH] [DAG] Do MergeConsecutiveStores again before Instruction Selection Summary: Now that store-merge is only generates type-safe stores, do a second pass just before instruction selection to allow lowered intrinsics to be merged as well. Reviewers: jyknight, hfinkel, RKSimon, efriedma, rnk, jmolloy Subscribers: javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D33675 llvm-svn: 319036 --- llvm/include/llvm/CodeGen/TargetLowering.h | 2 +- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 - llvm/test/CodeGen/AArch64/arm64-complex-ret.ll | 3 +- llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll | 4 +- llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll | 16 +++---- .../test/CodeGen/AArch64/tailcall-explicit-sret.ll | 14 +++--- .../test/CodeGen/AArch64/tailcall-implicit-sret.ll | 12 +++--- llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll | 3 +- llvm/test/CodeGen/ARM/fp16-promote.ll | 50 ++++++++++++++-------- llvm/test/CodeGen/BPF/undef.ll | 11 ++--- llvm/test/CodeGen/Mips/cconv/vector.ll | 30 +++++++------ llvm/test/CodeGen/Mips/llvm-ir/extractelement.ll | 3 +- llvm/test/CodeGen/SystemZ/fp-move-13.ll | 6 +-- 13 files changed, 79 insertions(+), 77 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 78080ab..4210f58 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -413,7 +413,7 @@ public: /// Allow store merging after legalization in addition to before legalization. /// This may catch stores that do not exist earlier (eg, stores created from /// intrinsics). - virtual bool mergeStoresAfterLegalization() const { return false; } + virtual bool mergeStoresAfterLegalization() const { return true; } /// Returns if it's reasonable to merge stores to MemVT size. virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4481e07..589abaa 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9562,8 +9562,6 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) { static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget) { - if (!DCI.isBeforeLegalize()) - return SDValue(); StoreSDNode *S = cast(N); if (S->isVolatile() || S->isIndexed()) diff --git a/llvm/test/CodeGen/AArch64/arm64-complex-ret.ll b/llvm/test/CodeGen/AArch64/arm64-complex-ret.ll index 250edac..b4a3854 100644 --- a/llvm/test/CodeGen/AArch64/arm64-complex-ret.ll +++ b/llvm/test/CodeGen/AArch64/arm64-complex-ret.ll @@ -2,6 +2,7 @@ define { i192, i192, i21, i192 } @foo(i192) { ; CHECK-LABEL: foo: -; CHECK: stp xzr, xzr, [x8] +; CHECK-DAG: str xzr, [x8, #16] +; CHECK-DAG: str q0, [x8] ret { i192, i192, i21, i192 } {i192 0, i192 1, i21 2, i192 3} } diff --git a/llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll b/llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll index ec7c227..b48f3b4 100644 --- a/llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll +++ b/llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll @@ -19,7 +19,7 @@ entry: } ; CHECK-LABEL: Strh_zero_4 -; CHECK: stp wzr, wzr +; CHECK: str xzr ; CHECK-STRICT-LABEL: Strh_zero_4 ; CHECK-STRICT: strh wzr ; CHECK-STRICT: strh wzr @@ -137,7 +137,7 @@ entry: } ; CHECK-LABEL: Sturh_zero_4 -; CHECK: stp wzr, wzr +; CHECK: stur xzr ; CHECK-STRICT-LABEL: Sturh_zero_4 ; CHECK-STRICT: sturh wzr ; CHECK-STRICT: sturh wzr diff --git a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll index 375877c..a09853a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll @@ -32,11 +32,9 @@ define void @test_simple(i32 %n, ...) { ; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #128 ; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] -; CHECK: mov [[GR_OFFS:w[0-9]+]], #-56 -; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24] - -; CHECK: orr [[VR_OFFS:w[0-9]+]], wzr, #0xffffff80 -; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28] +; CHECK: mov [[GRVR:x[0-9]+]], #-545460846720 +; CHECK: movk [[GRVR]], #65480 +; CHECK: str [[GRVR]], [x[[VA_LIST]], #24] %addr = bitcast %va_list* @var to i8* call void @llvm.va_start(i8* %addr) @@ -70,11 +68,9 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) { ; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #112 ; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] -; CHECK: mov [[GR_OFFS:w[0-9]+]], #-40 -; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24] - -; CHECK: mov [[VR_OFFS:w[0-9]+]], #-11 -; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28] +; CHECK: mov [[GRVR_OFFS:x[0-9]+]], #-40 +; CHECK: movk [[GRVR_OFFS]], #65424, lsl #32 +; CHECK: str [[GRVR_OFFS]], [x[[VA_LIST]], #24] %addr = bitcast %va_list* @var to i8* call void @llvm.va_start(i8* %addr) diff --git a/llvm/test/CodeGen/AArch64/tailcall-explicit-sret.ll b/llvm/test/CodeGen/AArch64/tailcall-explicit-sret.ll index c157933..b60958b 100644 --- a/llvm/test/CodeGen/AArch64/tailcall-explicit-sret.ll +++ b/llvm/test/CodeGen/AArch64/tailcall-explicit-sret.ll @@ -35,7 +35,7 @@ define void @test_tailcall_explicit_sret_alloca_unused() #0 { } ; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_dummyusers: -; CHECK: ldr [[PTRLOAD1:x[0-9]+]], [x0] +; CHECK: ldr [[PTRLOAD1:q[0-9]+]], [x0] ; CHECK: str [[PTRLOAD1]], [sp] ; CHECK: mov x8, sp ; CHECK-NEXT: bl _test_explicit_sret @@ -64,8 +64,8 @@ define void @test_tailcall_explicit_sret_gep(i1024* %ptr) #0 { ; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 ; CHECK: mov x8, sp ; CHECK-NEXT: bl _test_explicit_sret -; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] -; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK-NEXT: ldr [[CALLERSRET1:q[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]] ; CHECK: ret define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 { %l = alloca i1024, align 8 @@ -79,8 +79,8 @@ define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 { ; CHECK-DAG: mov [[FPTR:x[0-9]+]], x0 ; CHECK: mov x0, sp ; CHECK-NEXT: blr [[FPTR]] -; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] -; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ldr [[CALLERSRET1:q[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]] ; CHECK: ret define void @test_indirect_tailcall_explicit_sret_nosret_arg(i1024* sret %arg, void (i1024*)* %f) #0 { %l = alloca i1024, align 8 @@ -94,8 +94,8 @@ define void @test_indirect_tailcall_explicit_sret_nosret_arg(i1024* sret %arg, v ; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 ; CHECK: mov x8, sp ; CHECK-NEXT: blr x0 -; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] -; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ldr [[CALLERSRET1:q[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]] ; CHECK: ret define void @test_indirect_tailcall_explicit_sret_(i1024* sret %arg, i1024 ()* %f) #0 { %ret = tail call i1024 %f() diff --git a/llvm/test/CodeGen/AArch64/tailcall-implicit-sret.ll b/llvm/test/CodeGen/AArch64/tailcall-implicit-sret.ll index 10c4ba4..f449a7e 100644 --- a/llvm/test/CodeGen/AArch64/tailcall-implicit-sret.ll +++ b/llvm/test/CodeGen/AArch64/tailcall-implicit-sret.ll @@ -11,8 +11,8 @@ declare i1024 @test_sret() #0 ; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 ; CHECK: mov x8, sp ; CHECK-NEXT: bl _test_sret -; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] -; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ldr [[CALLERSRET1:q[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]] ; CHECK: ret define i1024 @test_call_sret() #0 { %a = call i1024 @test_sret() @@ -23,8 +23,8 @@ define i1024 @test_call_sret() #0 { ; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 ; CHECK: mov x8, sp ; CHECK-NEXT: bl _test_sret -; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] -; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ldr [[CALLERSRET1:q[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]] ; CHECK: ret define i1024 @test_tailcall_sret() #0 { %a = tail call i1024 @test_sret() @@ -35,8 +35,8 @@ define i1024 @test_tailcall_sret() #0 { ; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 ; CHECK: mov x8, sp ; CHECK-NEXT: blr x0 -; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] -; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ldr [[CALLERSRET1:q[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]] ; CHECK: ret define i1024 @test_indirect_tailcall_sret(i1024 ()* %f) #0 { %a = tail call i1024 %f() diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll index 71c4c83..228d3c7 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -251,8 +251,7 @@ entry: ; R600: MOVA_INT -; SI-PROMOTE-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4 ; encoding: -; SI-PROMOTE-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:5 ; encoding: +; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4 ; encoding: ; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4 ; encoding: [0x04,0x00,0x60,0xe0 ; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:5 ; encoding: [0x05,0x00,0x60,0xe0 diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll index 257d99d..da2a1df 100644 --- a/llvm/test/CodeGen/ARM/fp16-promote.ll +++ b/llvm/test/CodeGen/ARM/fp16-promote.ll @@ -817,25 +817,37 @@ define void @test_fmuladd(half* %p, half* %q, half* %r) #0 { ; CHECK-ALL-LABEL: test_insertelement: ; CHECK-ALL: sub sp, sp, #8 -; CHECK-ALL: ldrh -; CHECK-ALL: ldrh -; CHECK-ALL: ldrh -; CHECK-ALL: ldrh -; CHECK-ALL-DAG: strh -; CHECK-ALL-DAG: strh -; CHECK-ALL-DAG: mov -; CHECK-ALL-DAG: ldrh -; CHECK-ALL-DAG: orr -; CHECK-ALL-DAG: strh -; CHECK-ALL-DAG: strh -; CHECK-ALL-DAG: strh -; CHECK-ALL-DAG: ldrh -; CHECK-ALL-DAG: ldrh -; CHECK-ALL-DAG: ldrh -; CHECK-ALL-DAG: strh -; CHECK-ALL-DAG: strh -; CHECK-ALL-DAG: strh -; CHECK-ALL-DAG: strh + +; CHECK-VFP: and +; CHECK-VFP: mov +; CHECK-VFP: ldrd +; CHECK-VFP: orr +; CHECK-VFP: ldrh +; CHECK-VFP: stm +; CHECK-VFP: strh +; CHECK-VFP: ldm +; CHECK-VFP: stm + +; CHECK-NOVFP: ldrh +; CHECK-NOVFP: ldrh +; CHECK-NOVFP: ldrh +; CHECK-NOVFP: ldrh +; CHECK-NOVFP-DAG: strh +; CHECK-NOVFP-DAG: strh +; CHECK-NOVFP-DAG: mov +; CHECK-NOVFP-DAG: ldrh +; CHECK-NOVFP-DAG: orr +; CHECK-NOVFP-DAG: strh +; CHECK-NOVFP-DAG: strh +; CHECK-NOVFP-DAG: strh +; CHECK-NOVFP-DAG: ldrh +; CHECK-NOVFP-DAG: ldrh +; CHECK-NOVFP-DAG: ldrh +; CHECK-NOVFP-DAG: strh +; CHECK-NOVFP-DAG: strh +; CHECK-NOVFP-DAG: strh +; CHECK-NOVFP-DAG: strh + ; CHECK-ALL: add sp, sp, #8 define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 { %a = load half, half* %p, align 2 diff --git a/llvm/test/CodeGen/BPF/undef.ll b/llvm/test/CodeGen/BPF/undef.ll index 11bc9eb..586a24d 100644 --- a/llvm/test/CodeGen/BPF/undef.ll +++ b/llvm/test/CodeGen/BPF/undef.ll @@ -14,12 +14,10 @@ ; Function Attrs: nounwind uwtable define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 section "socket1" { -; EL: r1 = 134678021 -; EB: r1 = 84281096 -; CHECK: *(u32 *)(r10 - 8) = r1 -; EL: r1 = 2569 -; EB: r1 = 2314 -; CHECK: *(u16 *)(r10 - 4) = r1 + +; EL: r1 = 11033905661445 ll +; EB: r1 = 361984551142686720 ll +; CHECK: *(u64 *)(r10 - 8) = r1 ; CHECK: r1 = 0 ; CHECK: *(u16 *)(r10 + 24) = r1 @@ -35,7 +33,6 @@ define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 s ; CHECK: *(u16 *)(r10 + 4) = r1 ; CHECK: *(u16 *)(r10 + 2) = r1 ; CHECK: *(u16 *)(r10 + 0) = r1 -; CHECK: *(u16 *)(r10 - 2) = r1 ; CHECK: *(u16 *)(r10 + 26) = r1 ; CHECK: r2 = r10 diff --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll index 5a88d06..02d6272 100644 --- a/llvm/test/CodeGen/Mips/cconv/vector.ll +++ b/llvm/test/CodeGen/Mips/cconv/vector.ll @@ -821,8 +821,10 @@ entry: ; MIPS32R5: jal ; MIPS32R5: sw $2, {{[0-9]+}}($sp) -; MIPS32R5-DAG: sb ${{[0-9]+}}, 1(${{[0-9]+}}) -; MIPS32R5-DAG; sb ${{[0-9]+}}, %lo(gv2i8)(${{[0-9]+}}) +; MIPS32R5-DAG; sh ${{[0-9]+}}, %lo(gv2i8)(${{[0-9]+}}) + +; MIPS32R5-NOT: sb ${{[0-9]+}}, 1(${{[0-9]+}}) +; MIPS32R5-NOT; sb ${{[0-9]+}}, %lo(gv2i8)(${{[0-9]+}}) ; MIPS64EB: daddiu $4, $zero, 1543 ; MIPS64EB: daddiu $5, $zero, 3080 @@ -870,14 +872,14 @@ entry: ; MIPS32-NOT: ori $6 ; MIPS32-NOT: ori $7 -; MIPS32R5-DAG: lw $4, {{[0-9]+}}($sp) -; MIPS32R5-DAG: lw $5, {{[0-9]+}}($sp) +; MIPS32R5-NOT: lw $4, {{[0-9]+}}($sp) +; MIPS32R5-NOT: lw $5, {{[0-9]+}}($sp) ; MIPS64: ori $4 ; MIPS64: ori $5 -; MIPS64R5: lw $4 -; MIPS64R5: lw $5 +; MIPS64R5-NOT: lw $4 +; MIPS64R5-NOT: lw $5 ; MIPS32: jal i8_4 ; MIPS64: jalr $25 @@ -996,14 +998,14 @@ entry: ; MIPS32-DAG: ori $4 ; MIPS32-DAG: ori $5 -; MIPS32R5-DAG: lw $4 -; MIPS32R5-DAG: lw $5 +; MIPS32R5-NOT: lw $4 +; MIPS32R5-NOT: lw $5 ; MIPS64: ori $4 ; MIPS64: ori $5 -; MIPS64R5-DAG: lw $4 -; MIPS64R5-DAG: lw $5 +; MIPS64R5-NOT: lw $4 +; MIPS64R5-NOT: lw $5 ; MIPS32: jal i16_2 ; MIPS64: jalr $25 @@ -1037,8 +1039,8 @@ entry: ; MIPS64-DAG: daddiu $4 ; MIPS64-DAG: daddiu $5 -; MIPS64R5-DAG: ld $4 -; MIPS64R5-DAG: ld $5 +; MIPS64R5-NOT: ld $4 +; MIPS64R5-NOT: ld $5 ; MIPS32: jal i16_4 ; MIPS64: jalr $25 @@ -1133,8 +1135,8 @@ entry: ; MIPS64: daddiu $4 ; MIPS64: daddiu $5 -; MIPS64R5-DAG: ld $4 -; MIPS64R5-DAG: ld $5 +; MIPS64R5-NOT ld $4 +; MIPS64R5-NOT: ld $5 ; MIPS32: jal i32_2 ; MIPS64: jalr $25 diff --git a/llvm/test/CodeGen/Mips/llvm-ir/extractelement.ll b/llvm/test/CodeGen/Mips/llvm-ir/extractelement.ll index 3c7df4a..f7b8ea5 100644 --- a/llvm/test/CodeGen/Mips/llvm-ir/extractelement.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/extractelement.ll @@ -12,8 +12,7 @@ define i1 @via_stack_bug(i8 signext %idx) { ; ALL-LABEL: via_stack_bug: ; ALL-DAG: addiu [[ONE:\$[0-9]+]], $zero, 1 -; ALL-DAG: sb [[ONE]], 7($sp) -; ALL-DAG: sb $zero, 6($sp) +; ALL-DAG: sh [[ONE]], 6($sp) ; ALL-DAG: andi [[MASKED_IDX:\$[0-9]+]], $4, 1 ; ALL-DAG: addiu [[VPTR:\$[0-9]+]], $sp, 6 ; ALL-DAG: or [[EPTR:\$[0-9]+]], [[MASKED_IDX]], [[VPTR]] diff --git a/llvm/test/CodeGen/SystemZ/fp-move-13.ll b/llvm/test/CodeGen/SystemZ/fp-move-13.ll index d6c53ea..4ef9d11 100644 --- a/llvm/test/CodeGen/SystemZ/fp-move-13.ll +++ b/llvm/test/CodeGen/SystemZ/fp-move-13.ll @@ -22,10 +22,8 @@ define void @f1(fp128 *%x) { ; so this goes through memory. define void @f2(fp128 *%a, i128 *%b) { ; CHECK-LABEL: f2: -; CHECK: lg -; CHECK: lg -; CHECK: stg -; CHECK: stg +; CHECK: vl +; CHECK: vst ; CHECK: br %r14 %val = load i128 , i128 *%b %res = bitcast i128 %val to fp128 -- 2.7.4