From ec864a537160288a9cf7aea965cf33b0851d6d55 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Thu, 23 Mar 2023 13:31:26 -0700 Subject: [PATCH] [AArch64][PeepholeOpt]Optimize ALU + compare to flag-setting ALU The motivating example is in https://godbolt.org/z/45nbdYMK9 - For this example, `subs` is generated for the good case; `sub` followed by `cmp` is generated for the bad case. Since signed overflow is undefined behavior in C/C++ (indicated as `nsw` flag in LLVM IR), `subs` should be generated for the good case as well. This patch relaxes one restriction from "quit optimization when V is used" to "continue if MI produces poison value when signed overflow occurs". This is not meant to be C/C++ specific since it looks at 'NoSWrap' since it looks at MachineInstr flags. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D146820 --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 21 +++++++++++++++++++-- llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll | 12 ++++-------- llvm/test/CodeGen/AArch64/arm64-csel.ll | 3 +-- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index b1cfd68..69ada00 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1692,17 +1692,34 @@ static bool isSUBSRegImm(unsigned Opcode) { /// MI and CmpInstr /// or if MI opcode is not the S form there must be neither defs of flags /// nor uses of flags between MI and CmpInstr. -/// - and C/V flags are not used after CmpInstr +/// - and, if C/V flags are not used after CmpInstr +/// or if N flag is used but MI produces poison value if signed overflow +/// occurs. static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI) { + // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction + // that may or may not set flags. assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END); const unsigned CmpOpcode = CmpInstr.getOpcode(); if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode)) return false; + assert((CmpInstr.getOperand(2).isImm() && + CmpInstr.getOperand(2).getImm() == 0) && + "Caller guarantees that CmpInstr compares with constant 0"); + std::optional NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI); - if (!NZVCUsed || NZVCUsed->C || NZVCUsed->V) + if (!NZVCUsed || NZVCUsed->C) + return false; + + // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either + // '%vreg = add ...' or '%vreg = sub ...'. + // Condition flag V is used to indicate signed overflow. + // 1) MI and CmpInstr set N and V to the same value. + // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when + // signed overflow occurs, so CmpInstr could still be simplified away. + if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap)) return false; AccessKind AccessToCheck = AK_Write; diff --git a/llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll index bceec8d..c24ef37 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll @@ -7,8 +7,7 @@ target triple = "aarch64-unknown-linux-gnu" define i32 @sub_icmp_i32(i32 %0, i32 %1) { ; CHECK-LABEL: sub_icmp_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w0, w0, w1 -; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: subs w0, w0, w1 ; CHECK-NEXT: b.le .LBB0_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: b _Z2f2i @@ -36,8 +35,7 @@ define i32 @sub_icmp_i32(i32 %0, i32 %1) { define i64 @sub_icmp_i64(i64 %0, i64 %1) { ; CHECK-LABEL: sub_icmp_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x0, x0, x1 -; CHECK-NEXT: cmp x0, #0 +; CHECK-NEXT: subs x0, x0, x1 ; CHECK-NEXT: b.le .LBB1_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: b _Z2f4l @@ -63,8 +61,7 @@ define i64 @sub_icmp_i64(i64 %0, i64 %1) { define i64 @add_i64(i64 %0, i64 %1) { ; CHECK-LABEL: add_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: add x0, x1, x0 -; CHECK-NEXT: cmp x0, #0 +; CHECK-NEXT: adds x0, x1, x0 ; CHECK-NEXT: b.le .LBB2_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: b _Z2f4l @@ -90,8 +87,7 @@ define i64 @add_i64(i64 %0, i64 %1) { define i32 @add_i32(i32 %0, i32 %1) { ; CHECK-LABEL: add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w0, w1, w0 -; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: adds w0, w1, w0 ; CHECK-NEXT: b.le .LBB3_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: b _Z2f4l diff --git a/llvm/test/CodeGen/AArch64/arm64-csel.ll b/llvm/test/CodeGen/AArch64/arm64-csel.ll index 246d96f..dd721e7 100644 --- a/llvm/test/CodeGen/AArch64/arm64-csel.ll +++ b/llvm/test/CodeGen/AArch64/arm64-csel.ll @@ -79,8 +79,7 @@ entry: define i32@foo6(i32 %a, i32 %b) nounwind ssp { ; CHECK-LABEL: foo6: ; CHECK: // %bb.0: // %common.ret -; CHECK-NEXT: sub w8, w0, w1 -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w0, w1 ; CHECK-NEXT: csinc w0, w8, wzr, le ; CHECK-NEXT: ret %sub = sub nsw i32 %a, %b -- 2.7.4