From 8699efba6dc8d905d144727eb10ff6782376bb87 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Tue, 15 Nov 2022 12:00:00 +0000 Subject: [PATCH] [AArch64][SVE] Fix bad PTEST(PTRUE_ALL, PTEST_LIKE) optimization AArch64InstrInfo::optimizePTestInstr attempts to remove a PTEST of a predicate generating operation that identically sets flags (implictly). When the mask is an all active of matching element size the PTEST is currently removed. For while instructions this is correct since they perform an implicit PTEST with an all active mask. However, for other instructions such as compares the mask could be different. This patch fixes this bug by only removing the PTEST if the same all active mask is used by the predicating-generating instruction. Reviewed By: bsmith Differential Revision: https://reviews.llvm.org/D137718 --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 14 ++++++++++++-- llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll | 2 ++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir | 2 +- llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll | 2 ++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll | 2 ++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll | 2 ++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll | 2 ++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll | 14 ++++++++++++++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll | 2 ++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll | 2 ++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll | 2 ++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll | 2 ++ llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll | 4 ++++ 13 files changed, 49 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index b802358..35a4ea9 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1308,8 +1308,9 @@ bool AArch64InstrInfo::optimizePTestInstr( bool PredIsWhileLike = isWhileOpcode(PredOpcode); if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) { - // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't - // deactivate any lanes OTHER_INST might set. + // For PTEST(PTRUE_ALL, WHILE), if the element size matches the PTEST is + // redundant since WHILE performs an implicit PTEST with an all active + // mask. uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode); uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode); @@ -1318,6 +1319,15 @@ bool AArch64InstrInfo::optimizePTestInstr( (Mask->getOperand(1).getImm() != 31)) return false; + // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the + // PTEST_LIKE instruction uses the same all active mask and the element + // size matches. + if (PredIsPTestLike) { + auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); + if (Mask != PTestLikeMask) + return false; + } + // Fallthough to simply remove the PTEST. } else if (PredIsPTestLike) { // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll index 2eae4f3..2e9c570 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll @@ -37,7 +37,9 @@ define i32 @cmpeq_nxv4i32( %pg, %a, %pg, %a) { ; CHECK-LABEL: cmpeq_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpeq.nxv16i8( %pg, %a, zeroinitializer) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir index 48ef72c..81318aa 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir @@ -170,7 +170,7 @@ body: | liveins: $p0, $z0 ; CHECK-LABEL: name: cmpeq_imm_nxv16i8 - ; CHECK-NOT: PTEST + ; CHECK: PTEST %1:zpr = COPY $z0 %0:ppr_3b = COPY $p0 %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll index a99ded9..5cf6e20 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll @@ -37,7 +37,9 @@ define i32 @cmpge_nxv4i32( %pg, %a, %pg, %a) { ; CHECK-LABEL: cmpge_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, zeroinitializer) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll index 0987891..927137c 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll @@ -37,7 +37,9 @@ define i32 @cmpgt_nxv4i32( %pg, %a, %pg, %a) { ; CHECK-LABEL: cmpgt_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpgt.nxv16i8( %pg, %a, zeroinitializer) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll index 6fca640..f8ff3e8 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll @@ -38,7 +38,9 @@ define i32 @cmphi_nxv4i32( %pg, %a, %pg, %a) { ; CHECK-LABEL: cmphi_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmphi p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphi.nxv16i8( %pg, %a, zeroinitializer) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll index 3b2f679..a53becc 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll @@ -37,7 +37,9 @@ define i32 @cmphs_nxv4i32( %pg, %a, %pg, %a) { ; CHECK-LABEL: cmphs_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmphs p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphs.nxv16i8( %pg, %a, zeroinitializer) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll index d2b0be2..651b6f0 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll @@ -8,7 +8,9 @@ define i32 @cmple_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmple_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmple p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, zeroinitializer, %a) @@ -292,7 +294,9 @@ define i1 @cmp32_ptest_any_xx( %pg, %a, %pg, %a, %b) { ; CHECK-LABEL: cmp8_ptest_first_ax: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, mi ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) @@ -308,7 +312,9 @@ define i1 @cmp8_ptest_first_ax( %pg, %a, %pg, %a, %b) { ; CHECK-LABEL: cmp8_ptest_last_ax: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, lo ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) @@ -323,7 +329,9 @@ define i1 @cmp8_ptest_last_ax( %pg, %a, %pg, %a, %b) { ; CHECK-LABEL: cmp8_ptest_any_ax: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) @@ -339,7 +347,9 @@ define i1 @cmp8_ptest_any_ax( %pg, %a, %pg, %a, %b) { ; CHECK-LABEL: cmp32_ptest_first_ax: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, mi ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) @@ -356,7 +366,9 @@ define i1 @cmp32_ptest_first_ax( %pg, %a, < define i1 @cmp32_ptest_last_ax( %pg, %a, %b) { ; CHECK-LABEL: cmp32_ptest_last_ax: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, lo ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) @@ -372,7 +384,9 @@ define i1 @cmp32_ptest_last_ax( %pg, %a, %pg, %a, %b) { ; CHECK-LABEL: cmp32_ptest_any_ax: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll index c813acf..32d10b2 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll @@ -8,7 +8,9 @@ define i32 @cmplo_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmplo_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmplo p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphi.nxv16i8( %pg, zeroinitializer, %a) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll index 53cd251..73f07e9 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll @@ -8,7 +8,9 @@ define i32 @cmpls_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmpls_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmpls p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphs.nxv16i8( %pg, zeroinitializer, %a) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll index 5b8e739..efcaddb 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll @@ -8,7 +8,9 @@ define i32 @cmplt_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmplt_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmplt p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpgt.nxv16i8( %pg, zeroinitializer, %a) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll index 12c82a0..11c828a 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll @@ -37,7 +37,9 @@ define i32 @cmpne_nxv4i32( %pg, %a, %pg, %a) { ; CHECK-LABEL: cmpne_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpne.nxv16i8( %pg, %a, zeroinitializer) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll index edd87e6..636379c 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll @@ -20,7 +20,9 @@ define i32 @match_nxv16i8( %pg, %a, %pg, %a, %b) { ; CHECK-LABEL: match_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.match.nxv16i8( %pg, %a, %b) @@ -49,7 +51,9 @@ define i32 @nmatch_nxv16i8( %pg, %a, %pg, %a, %b) { ; CHECK-LABEL: nmatch_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: nmatch p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.nmatch.nxv16i8( %pg, %a, %b) -- 2.7.4