From dafd1f29da27c2bb9ed95cf4f3149c68492e4b19 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Thu, 27 Jan 2022 12:15:12 +0000 Subject: [PATCH] [AArch64][SVE] Avoid using ptrue for unpredicated predicate AND. Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D118146 --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 2 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 12 ++++++++++++ llvm/test/CodeGen/AArch64/sve-int-log.ll | 12 ++++-------- llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll | 9 +++------ llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll | 12 ++++++------ 5 files changed, 26 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 3e7c46e..73a6804 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -734,7 +734,7 @@ let Predicates = [HasSVEorStreamingSVE] in { defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>; defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>; - defm AND_PPzPP : sve_int_pred_log<0b0000, "and", int_aarch64_sve_and_z, and>; + defm AND_PPzPP : sve_int_pred_log_and<0b0000, "and", int_aarch64_sve_and_z>; defm BIC_PPzPP : sve_int_pred_log<0b0001, "bic", int_aarch64_sve_bic_z>; defm EOR_PPzPP : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z, xor>; defm SEL_PPPP : sve_int_pred_log<0b0011, "sel", vselect>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index eb96526..574b221 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1633,6 +1633,18 @@ multiclass sve_int_pred_log opc, string asm, SDPatternOperator op, !cast(NAME), PTRUE_D>; } +multiclass sve_int_pred_log_and opc, string asm, SDPatternOperator op> : + sve_int_pred_log { + def : Pat<(nxv16i1 (and nxv16i1:$Op1, nxv16i1:$Op2)), + (!cast(NAME) $Op1, $Op1, $Op2)>; + def : Pat<(nxv8i1 (and nxv8i1:$Op1, nxv8i1:$Op2)), + (!cast(NAME) $Op1, $Op1, $Op2)>; + def : Pat<(nxv4i1 (and nxv4i1:$Op1, nxv4i1:$Op2)), + (!cast(NAME) $Op1, $Op1, $Op2)>; + def : Pat<(nxv2i1 (and nxv2i1:$Op1, nxv2i1:$Op2)), + (!cast(NAME) $Op1, $Op1, $Op2)>; +} + //===----------------------------------------------------------------------===// // SVE Logical Mask Immediate Group //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-int-log.ll b/llvm/test/CodeGen/AArch64/sve-int-log.ll index 2da05d3..e8bdf67 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-log.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-log.ll @@ -49,8 +49,7 @@ define @and_b_zero( %a) { define @and_pred_d( %a, %b) { ; CHECK-LABEL: and_pred_d: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p2.d -; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: ret %res = and %a, %b ret %res @@ -59,8 +58,7 @@ define @and_pred_d( %a, %b) define @and_pred_s( %a, %b) { ; CHECK-LABEL: and_pred_s: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p2.s -; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: ret %res = and %a, %b ret %res @@ -69,8 +67,7 @@ define @and_pred_s( %a, %b) define @and_pred_h( %a, %b) { ; CHECK-LABEL: and_pred_h: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p2.h -; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: ret %res = and %a, %b ret %res @@ -79,8 +76,7 @@ define @and_pred_h( %a, %b) define @and_pred_b( %a, %b) { ; CHECK-LABEL: and_pred_b: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p2.b -; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: ret %res = and %a, %b ret %res diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll index dac524a..a018c56 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll @@ -17,8 +17,7 @@ define @reinterpret_bool_from_h( %pg) { ; CHECK-LABEL: reinterpret_bool_from_h: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: ptrue p2.b -; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %pg) ret %out @@ -28,8 +27,7 @@ define @reinterpret_bool_from_s( %pg) { ; CHECK-LABEL: reinterpret_bool_from_s: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: ptrue p2.b -; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %pg) ret %out @@ -39,8 +37,7 @@ define @reinterpret_bool_from_d( %pg) { ; CHECK-LABEL: reinterpret_bool_from_d: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: ptrue p2.b -; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) ret %out diff --git a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll index 2afcaf7..d24c540 100644 --- a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll @@ -7,7 +7,7 @@ define i1 @andv_nxv32i1( %a) { ; CHECK-LABEL: andv_nxv32i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p2.b -; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: not p0.b, p2/z, p0.b ; CHECK-NEXT: ptest p2, p0.b ; CHECK-NEXT: cset w0, eq @@ -24,10 +24,10 @@ define i1 @andv_nxv64i1( %a) { ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: and p1.b, p1/z, p1.b, p3.b +; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b ; CHECK-NEXT: ptrue p4.b -; CHECK-NEXT: and p1.b, p4/z, p1.b, p3.b -; CHECK-NEXT: and p0.b, p4/z, p0.b, p2.b -; CHECK-NEXT: and p0.b, p4/z, p0.b, p1.b +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: not p0.b, p4/z, p0.b ; CHECK-NEXT: ptest p4, p0.b ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload @@ -73,7 +73,7 @@ define i1 @smaxv_nxv32i1( %a) { ; CHECK-LABEL: smaxv_nxv32i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p2.b -; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: not p0.b, p2/z, p0.b ; CHECK-NEXT: ptest p2, p0.b ; CHECK-NEXT: cset w0, eq @@ -116,7 +116,7 @@ define i1 @uminv_nxv32i1( %a) { ; CHECK-LABEL: uminv_nxv32i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p2.b -; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: not p0.b, p2/z, p0.b ; CHECK-NEXT: ptest p2, p0.b ; CHECK-NEXT: cset w0, eq -- 2.7.4