From f6515b05205d4324d174dd1f7455c6c8e6671e6b Mon Sep 17 00:00:00 2001 From: Kai Luo Date: Tue, 29 Dec 2020 13:23:17 +0000 Subject: [PATCH] [PowerPC] Do not fold `cmp(d|w)` and `subf` instruction to `subf.` if `nsw` is not present In `PPCInstrInfo::optimizeCompareInstr` we seek opportunities to fold `cmp(d|w)` and `subf` as an `subf.`. However, if `subf.` gets overflow, `cr0` can't reflect the correct order, violating the semantics of `cmp(d|w)`. Fixed https://bugs.llvm.org/show_bug.cgi?id=47830. Reviewed By: #powerpc, nemanjai Differential Revision: https://reviews.llvm.org/D90156 --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 8 ++++++++ llvm/test/CodeGen/PowerPC/pr47830.ll | 5 +++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index deac690a7611..75a498b807cd 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2141,6 +2141,14 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, if (NewOpC == -1) return false; + // This transformation should not be performed if `nsw` is missing and is not + // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in + // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in + // CRReg can reflect if compared values are equal, this optz is still valid. + if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) && + Sub && !Sub->getFlag(MachineInstr::NoSWrap)) + return false; + // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP // needs to be updated to be based on SUB. Push the condition code // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the diff --git a/llvm/test/CodeGen/PowerPC/pr47830.ll b/llvm/test/CodeGen/PowerPC/pr47830.ll index be61a81462c2..bd320907a341 100644 --- a/llvm/test/CodeGen/PowerPC/pr47830.ll +++ b/llvm/test/CodeGen/PowerPC/pr47830.ll @@ -5,8 +5,9 @@ define i64 @f(i64 %a, i64 %b) { ; CHECK-LABEL: f: ; CHECK: # %bb.0: -; CHECK-NEXT: sub. r3, r3, r4 -; CHECK-NEXT: isellt r3, 0, r3 +; CHECK-NEXT: sub r5, r3, r4 +; CHECK-NEXT: cmpd r3, r4 +; CHECK-NEXT: isellt r3, 0, r5 ; CHECK-NEXT: blr %c = icmp slt i64 %a, %b %d = sub i64 %a, %b -- 2.34.1