From: Tim Northover Date: Tue, 2 Dec 2014 23:13:39 +0000 (+0000) Subject: AArch64: don't be too greedy when folding :lo12: accesses into mem ops. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ec7ebebe558d42924784dc290d35284864d0088a;p=platform%2Fupstream%2Fllvm.git AArch64: don't be too greedy when folding :lo12: accesses into mem ops. This frequently leads to cases like: ldr xD, [xN, :lo12:var] add xA, xN, :lo12:var ldr xD, [xA, #8] where the ADD would have been needed anyway, and the two distinct addressing modes can prevent the formation of an ldp. Because of how we handle ADRP (aggressively forming an ADRP/ADD pseudo-inst at ISel time), this pattern also results in duplicated ADRP instructions (one on its own to cover the ldr, and one combined with the add). llvm-svn: 223172 --- diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 87a6d80..4456d2a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -569,6 +569,27 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, return isWorthFolding(N); } +/// If there's a use of this ADDlow that's not itself a load/store then we'll +/// need to create a real ADD instruction from it anyway and there's no point in +/// folding it into the mem op. Theoretically, it shouldn't matter, but there's +/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding +/// leads to duplaicated ADRP instructions. +static bool isWorthFoldingADDlow(SDValue N) { + for (auto Use : N->uses()) { + if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && + Use->getOpcode() != ISD::ATOMIC_LOAD && + Use->getOpcode() != ISD::ATOMIC_STORE) + return false; + + // ldar and stlr have much more restrictive addressing modes (just a + // register). + if (cast(Use)->getOrdering() > Monotonic) + return false; + } + + return true; +} + /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit /// immediate" address. The "Size" argument is the size in bytes of the memory /// reference, which determines the scale. @@ -582,7 +603,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, return true; } - if (N.getOpcode() == AArch64ISD::ADDlow) { + if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { GlobalAddressSDNode *GAN = dyn_cast(N.getOperand(1).getNode()); Base = N.getOperand(0); diff --git a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll index 3377849..642d72a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll +++ b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll @@ -29,8 +29,7 @@ define void @fetch_and_nand(i128* %p, i128 %bits) { ; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: str [[DEST_REGHI]] -; CHECK-DAG: str [[DEST_REGLO]] +; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]] %val = atomicrmw nand i128* %p, i128 %bits release store i128 %val, i128* @var, align 16 ret void @@ -45,8 +44,7 @@ define void @fetch_and_or(i128* %p, i128 %bits) { ; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: str [[DEST_REGHI]] -; CHECK-DAG: str [[DEST_REGLO]] +; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]] %val = atomicrmw or i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -61,8 +59,7 @@ define void @fetch_and_add(i128* %p, i128 %bits) { ; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: str [[DEST_REGHI]] -; CHECK-DAG: str [[DEST_REGLO]] +; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]] %val = atomicrmw add i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -77,8 +74,7 @@ define void @fetch_and_sub(i128* %p, i128 %bits) { ; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: str [[DEST_REGHI]] -; CHECK-DAG: str [[DEST_REGLO]] +; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]] %val = atomicrmw sub i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -99,8 +95,7 @@ define void @fetch_and_min(i128* %p, i128 %bits) { ; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: str [[DEST_REGHI]] -; CHECK-DAG: str [[DEST_REGLO]] +; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]] %val = atomicrmw min i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -121,8 +116,7 @@ define void @fetch_and_max(i128* %p, i128 %bits) { ; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: str [[DEST_REGHI]] -; CHECK-DAG: str [[DEST_REGLO]] +; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]] %val = atomicrmw max i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -143,8 +137,7 @@ define void @fetch_and_umin(i128* %p, i128 %bits) { ; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: str [[DEST_REGHI]] -; CHECK-DAG: str [[DEST_REGLO]] +; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]] %val = atomicrmw umin i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -165,8 +158,7 @@ define void @fetch_and_umax(i128* %p, i128 %bits) { ; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: str [[DEST_REGHI]] -; CHECK-DAG: str [[DEST_REGLO]] +; CHECK-DAG: stp [[DEST_REGLO]], [[DEST_REGHI]] %val = atomicrmw umax i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void diff --git a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll index 8e41304..44f2af1 100644 --- a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll @@ -12,6 +12,7 @@ define void @test_simple(i32 %n, ...) { ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]] ; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var +; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var ; CHECK: stp x1, x2, [sp, #[[GR_BASE:[0-9]+]]] ; ... omit middle ones ... @@ -21,11 +22,10 @@ define void @test_simple(i32 %n, ...) { ; ... omit middle ones ... ; CHECK: stp q6, q7, [sp, # -; CHECK: str [[STACK_TOP]], [x[[VA_LIST_HI]], :lo12:var] +; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]] ; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]] ; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #56 -; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var ; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8] ; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp @@ -50,6 +50,7 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) { ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]] ; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var +; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var ; CHECK: stp x3, x4, [sp, #[[GR_BASE:[0-9]+]]] ; ... omit middle ones ... @@ -59,11 +60,10 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) { ; ... omit middle ones ... ; CHECK: str q7, [sp, # -; CHECK: str [[STACK_TOP]], [x[[VA_LIST_HI]], :lo12:var] +; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]] ; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]] ; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #40 -; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var ; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8] ; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp @@ -89,7 +89,8 @@ define void @test_nospare([8 x i64], [8 x float], ...) { call void @llvm.va_start(i8* %addr) ; CHECK-NOT: sub sp, sp ; CHECK: mov [[STACK:x[0-9]+]], sp -; CHECK: str [[STACK]], [{{x[0-9]+}}, :lo12:var] +; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var +; CHECK: str [[STACK]], [x[[VAR]]] ret void } @@ -100,7 +101,8 @@ define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) { ; CHECK-LABEL: test_offsetstack: ; CHECK: sub sp, sp, #80 ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96 -; CHECK: str [[STACK_TOP]], [{{x[0-9]+}}, :lo12:var] +; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var +; CHECK: str [[STACK_TOP]], [x[[VAR]]] %addr = bitcast %va_list* @var to i8* call void @llvm.va_start(i8* %addr) diff --git a/llvm/test/CodeGen/AArch64/func-argpassing.ll b/llvm/test/CodeGen/AArch64/func-argpassing.ll index abb732c..9fc9a5f 100644 --- a/llvm/test/CodeGen/AArch64/func-argpassing.ll +++ b/llvm/test/CodeGen/AArch64/func-argpassing.ll @@ -96,10 +96,8 @@ define [2 x i64] @return_struct() { %addr = bitcast %myStruct* @varstruct to [2 x i64]* %val = load [2 x i64]* %addr ret [2 x i64] %val -; CHECK-DAG: ldr x0, [{{x[0-9]+}}, {{#?}}:lo12:varstruct] - ; Odd register regex below disallows x0 which we want to be live now. -; CHECK-DAG: add {{x[1-9][0-9]*}}, {{x[1-9][0-9]*}}, {{#?}}:lo12:varstruct -; CHECK: ldr x1, [{{x[1-9][0-9]*}}, #8] +; CHECK: add x[[VARSTRUCT:[0-9]+]], {{x[0-9]+}}, :lo12:varstruct +; CHECK: ldp x0, x1, [x[[VARSTRUCT]]] ; Make sure epilogue immediately follows ; CHECK-NEXT: ret } @@ -166,8 +164,8 @@ define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3, define i64 @check_i128_regalign(i32 %val0, i128 %val1, i64 %val2) { ; CHECK-LABEL: check_i128_regalign store i128 %val1, i128* @var128 -; CHECK-DAG: str x2, [{{x[0-9]+}}, {{#?}}:lo12:var128] -; CHECK-DAG: str x3, [{{x[0-9]+}}, #8] +; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 +; CHECK-DAG: stp x2, x3, [x[[VAR128]]] ret i64 %val2 ; CHECK: mov x0, x4 diff --git a/llvm/test/CodeGen/AArch64/func-calls.ll b/llvm/test/CodeGen/AArch64/func-calls.ll index 51979f0..16157f8 100644 --- a/llvm/test/CodeGen/AArch64/func-calls.ll +++ b/llvm/test/CodeGen/AArch64/func-calls.ll @@ -62,8 +62,8 @@ define void @simple_rets() { %arr = call [2 x i64] @return_smallstruct() store [2 x i64] %arr, [2 x i64]* @varsmallstruct ; CHECK: bl return_smallstruct -; CHECK: str x1, [{{x[0-9]+}}, #8] -; CHECK: str x0, [{{x[0-9]+}}, {{#?}}:lo12:varsmallstruct] +; CHECK: add x[[VARSMALLSTRUCT:[0-9]+]], {{x[0-9]+}}, :lo12:varsmallstruct +; CHECK: stp x0, x1, [x[[VARSMALLSTRUCT]]] call void @return_large_struct(%myStruct* sret @varstruct) ; CHECK: add x8, {{x[0-9]+}}, {{#?}}:lo12:varstruct @@ -128,12 +128,12 @@ define void @check_i128_align() { call void @check_i128_stackalign(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 42, i128 %val) -; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:var128] -; CHECK: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8] +; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 +; CHECK: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]] ; CHECK: stp [[I128LO]], [[I128HI]], [sp, #16] -; CHECK-NONEON: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, :lo12:var128] -; CHECK-NONEON: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8] +; CHECK-NONEON: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 +; CHECK-NONEON: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]] ; CHECK-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16] ; CHECK: bl check_i128_stackalign