From 3870857226b68fe19e97969e256e31c5eb681c04 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Thu, 2 Mar 2023 13:23:53 +0000 Subject: [PATCH] [SVE] Restrict cmp+and->pred_cmp isel to instances where the and is the sole user of the compare. Without the single use restriction we may replace the and with a more costly duplicated compare. Differential Revision: https://reviews.llvm.org/D145755 --- llvm/lib/Target/AArch64/SVEInstrFormats.td | 16 ++++++++++------ llvm/test/CodeGen/AArch64/sve-fcmp.ll | 8 ++++---- llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll | 7 +++---- llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll | 8 ++++---- 4 files changed, 21 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 71995fc..4c97ae8 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -47,6 +47,10 @@ def SDT_AArch64Setcc : SDTypeProfile<1, 4, [ ]>; def AArch64setcc_z : SDNode<"AArch64ISD::SETCC_MERGE_ZERO", SDT_AArch64Setcc>; +def AArch64setcc_z_oneuse : PatFrag<(ops node:$pg, node:$op1, node:$op2, node:$cc), + (AArch64setcc_z node:$pg, node:$op1, node:$op2, node:$cc), [{ + return N->hasOneUse(); +}]>; def SVEPatternOperand : AsmOperandClass { let Name = "SVEPattern"; @@ -5028,9 +5032,9 @@ multiclass SVE_SETCC_Pat; def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, intvt:$Op3, invcc)), (cmp $Op1, $Op3, $Op2)>; - def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, cc))), + def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, cc))), (cmp $Pg, $Op2, $Op3)>; - def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, invcc))), + def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, invcc))), (cmp $Pg, $Op3, $Op2)>; } @@ -5040,9 +5044,9 @@ multiclass SVE_SETCC_Pat_With_Zero; def : Pat<(predvt (AArch64setcc_z predvt:$Op1, (SVEDup0), intvt:$Op2, invcc)), (cmp $Op1, $Op2)>; - def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op1, (SVEDup0), cc))), + def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op1, (SVEDup0), cc))), (cmp $Pg, $Op1)>; - def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), (SVEDup0), intvt:$Op1, invcc))), + def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), (SVEDup0), intvt:$Op1, invcc))), (cmp $Pg, $Op1)>; } @@ -5126,13 +5130,13 @@ multiclass SVE_SETCC_Imm_Pat; def : Pat<(predvt (and predvt:$Pg, - (AArch64setcc_z (predvt (AArch64ptrue 31)), + (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), (intvt ZPR:$Zs1), (intvt (splat_vector (immtype:$imm))), cc))), (cmp $Pg, $Zs1, immtype:$imm)>; def : Pat<(predvt (and predvt:$Pg, - (AArch64setcc_z (predvt (AArch64ptrue 31)), + (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), (intvt (splat_vector (immtype:$imm))), (intvt ZPR:$Zs1), commuted_cc))), diff --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll index 5cb44b1..35cbe65c 100644 --- a/llvm/test/CodeGen/AArch64/sve-fcmp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcmp.ll @@ -493,8 +493,8 @@ define %svboolx2 @and_of_multiuse_fcmp_ogt( %pg, %x, %y %and = and %pg, %cmp @@ -507,8 +507,8 @@ define %svboolx2 @and_of_multiuse_fcmp_ogt_zero( %pg, %x, zeroinitializer %and = and %pg, %cmp @@ -521,8 +521,8 @@ define %svboolx2 @and_of_multiuse_fcmp_olt( %pg, %x, %y %and = and %pg, %cmp @@ -535,8 +535,8 @@ define %svboolx2 @and_of_multiuse_fcmp_olt_zero( %pg, %x, zeroinitializer %and = and %pg, %cmp diff --git a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll index 2e6f704..2d4d0b7 100644 --- a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll @@ -23,12 +23,11 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, %i37, < ; CHECK-NEXT: sxtw z5.d, p0/m, z6.d ; CHECK-NEXT: smin z4.d, p0/m, z4.d, z5.d ; CHECK-NEXT: cmpne p1.d, p0/z, z4.d, #0 -; CHECK-NEXT: ld1w { z5.d }, p1/z, [x1] +; CHECK-NEXT: ld1w { z4.d }, p1/z, [x1] ; CHECK-NEXT: ld1w { z0.d }, p1/z, [z0.d] -; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z5.s +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z4.s ; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z3.s -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: cmpne p2.d, p2/z, z4.d, #0 +; CHECK-NEXT: bic p2.b, p1/z, p1.b, p2.b ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 ; CHECK-NEXT: add z2.d, p1/m, z2.d, z1.d ; CHECK-NEXT: uaddv d0, p0, z2.d diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll index ee5969e..a995823 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll @@ -1223,8 +1223,8 @@ define %svboolx2 @and_of_multiuse_icmp_sle( %a, %b, %c %and = and %a, %cmp @@ -1237,8 +1237,8 @@ define %svboolx2 @and_of_multiuse_icmp_sle_imm( %a, insertelement ( undef, i32 1, i64 0), undef, zeroinitializer %cmp = icmp sle %b, %imm @@ -1252,8 +1252,8 @@ define %svboolx2 @and_of_multiuse_icmp_ugt( %a, %b, %c %and = and %a, %cmp @@ -1266,8 +1266,8 @@ define %svboolx2 @and_of_multiuse_icmp_ugt_imm( %a, insertelement ( undef, i32 1, i64 0), undef, zeroinitializer %cmp = icmp ugt %b, %imm -- 2.7.4