From a35add4c54be96a42fefc01ae1d74a23e3c16030 Mon Sep 17 00:00:00 2001
From: Simon Dardis <simon.dardis@gmail.com>
Date: Sun, 1 May 2022 19:25:03 +0100
Subject: [PATCH] [MIPS] Correct the implementation of the msub optimization

The MIPS backend attempts to combine integer multiply and addition or
subtraction into a madd or msub operation. This optimization is
heavily restricted due to its utility in many cases.

PR/51114 highlighted that the optimization was performed on an
associative basis which is correct in the `add` case but not in
the `sub` case.

Resolve this bug by performing an early exit in the case where the
multiply is the LHS operand of the subtraction.

This resolves PR/51114.

Thanks to digitalseraphim for reporting the issue!

Differential Revision: https://reviews.llvm.org/D124742
---
 llvm/lib/Target/Mips/MipsISelLowering.cpp |  8 ++++
 llvm/test/CodeGen/Mips/madd-msub.ll       | 76 +++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)

diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index e5dbc50..10b2df0 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -950,6 +950,14 @@ static SDValue performMADD_MSUBCombine(SDNode *ROOTNode, SelectionDAG &CurDAG,
       ROOTNode->getOperand(1).getOpcode() != ISD::MUL)
     return SDValue();
 
+  // In the case where we have a multiplication as the left operand of
+  // of a subtraction, we can't combine into a MipsISD::MSub node as the
+  // the instruction definition of msub(u) places the multiplication on
+  // on the right.
+  if (ROOTNode->getOpcode() == ISD::SUB &&
+      ROOTNode->getOperand(0).getOpcode() == ISD::MUL)
+    return SDValue();
+
   // We don't handle vector types here.
   if (ROOTNode->getValueType(0).isVector())
     return SDValue();
diff --git a/llvm/test/CodeGen/Mips/madd-msub.ll b/llvm/test/CodeGen/Mips/madd-msub.ll
index 8d261c1..3a6fd68 100644
--- a/llvm/test/CodeGen/Mips/madd-msub.ll
+++ b/llvm/test/CodeGen/Mips/madd-msub.ll
@@ -507,3 +507,79 @@ entry:
 
   ret i32 %sub
 }
+
+define i64 @msub5(i32 %a, i32 %b, i32 %c) {
+; 32-LABEL: msub5:
+; 32:       # %bb.0: # %entry
+; 32-NEXT:    mult $5, $4
+; 32-NEXT:    mfhi $1
+; 32-NEXT:    mflo $3
+; 32-NEXT:    sltu $2, $3, $6
+; 32-NEXT:    sra $4, $6, 31
+; 32-NEXT:    subu $1, $1, $4
+; 32-NEXT:    subu $2, $1, $2
+; 32-NEXT:    jr $ra
+; 32-NEXT:    subu $3, $3, $6
+;
+; 32R6-LABEL: msub5:
+; 32R6:       # %bb.0: # %entry
+; 32R6-NEXT:    mul $1, $5, $4
+; 32R6-NEXT:    sltu $2, $1, $6
+; 32R6-NEXT:    muh $3, $5, $4
+; 32R6-NEXT:    sra $4, $6, 31
+; 32R6-NEXT:    subu $3, $3, $4
+; 32R6-NEXT:    subu $2, $3, $2
+; 32R6-NEXT:    jr $ra
+; 32R6-NEXT:    subu $3, $1, $6
+;
+; DSP-LABEL: msub5:
+; DSP:       # %bb.0: # %entry
+; DSP-NEXT:    mult $ac0, $5, $4
+; DSP-NEXT:    mfhi $1, $ac0
+; DSP-NEXT:    mflo $3, $ac0
+; DSP-NEXT:    sltu $2, $3, $6
+; DSP-NEXT:    sra $4, $6, 31
+; DSP-NEXT:    subu $1, $1, $4
+; DSP-NEXT:    subu $2, $1, $2
+; DSP-NEXT:    jr $ra
+; DSP-NEXT:    subu $3, $3, $6
+;
+; 64-LABEL: msub5:
+; 64:       # %bb.0: # %entry
+; 64-NEXT:    sll $1, $4, 0
+; 64-NEXT:    sll $2, $5, 0
+; 64-NEXT:    dmult $2, $1
+; 64-NEXT:    mflo $1
+; 64-NEXT:    sll $2, $6, 0
+; 64-NEXT:    jr $ra
+; 64-NEXT:    dsubu $2, $1, $2
+;
+; 64R6-LABEL: msub5:
+; 64R6:       # %bb.0: # %entry
+; 64R6-NEXT:    sll $1, $4, 0
+; 64R6-NEXT:    sll $2, $5, 0
+; 64R6-NEXT:    dmul $1, $2, $1
+; 64R6-NEXT:    sll $2, $6, 0
+; 64R6-NEXT:    jr $ra
+; 64R6-NEXT:    dsubu $2, $1, $2
+;
+; 16-LABEL: msub5:
+; 16:       # %bb.0: # %entry
+; 16-NEXT:    mult $5, $4
+; 16-NEXT:    mflo $2
+; 16-NEXT:    mfhi $4
+; 16-NEXT:    subu $3, $2, $6
+; 16-NEXT:    sltu $2, $6
+; 16-NEXT:    move $2, $24
+; 16-NEXT:    sra $5, $6, 31
+; 16-NEXT:    subu $4, $4, $5
+; 16-NEXT:    subu $2, $4, $2
+; 16-NEXT:    jrc $ra
+entry:
+  %conv = sext i32 %a to i64
+  %conv3 = sext i32 %b to i64
+  %conv4 = sext i32 %c to i64
+  %mul = mul nsw i64 %conv3, %conv
+  %sub = sub nsw i64 %mul, %conv4
+  ret i64 %sub
+}
-- 
2.7.4