From 07ed62b7d5514937a50b4af4feaa1969911d142e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Sat, 20 Mar 2021 15:14:46 -0700
Subject: [PATCH] [RISCV] Disable (mul (and X, 0xffffffff), (and Y,
 0xffffffff)) optimization when Zba is enabled.

This optimization is trying to save SRLI instructions needed to
implement the ANDs. If we have zext.w we won't save anything.
Because we don't check that the multiply is the only user of the
AND we might even increase instruction count.
---
 llvm/lib/Target/RISCV/RISCVInstrInfoM.td |  2 ++
 llvm/test/CodeGen/RISCV/xaluo.ll         | 24 ++++++++++++------------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index d38b5a9..8d5f3e9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -93,7 +93,9 @@ def : Pat<(and (riscv_remuw (assertzexti32 GPR:$rs1),
 // produce a result where res[63:32]=0 and res[31]=1.
 def : Pat<(srem (sexti32 (i64 GPR:$rs1)), (sexti32 (i64 GPR:$rs2))),
           (REMW GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtM, IsRV64]
 
+let Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba] in {
 // Special case for calculating the full 64-bit product of a 32x32 unsigned
 // multiply where the inputs aren't known to be zero extended. We can shift the
 // inputs left by 32 and use a MULHU. This saves two SRLIs needed to finish
diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll
index 758cf4c..f34093e 100644
--- a/llvm/test/CodeGen/RISCV/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/xaluo.ll
@@ -967,9 +967,9 @@ define zeroext i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) {
 ;
 ; RV64ZBA-LABEL: umulo.i32:
 ; RV64ZBA:       # %bb.0: # %entry
-; RV64ZBA-NEXT:    slli a1, a1, 32
-; RV64ZBA-NEXT:    slli a0, a0, 32
-; RV64ZBA-NEXT:    mulhu a1, a0, a1
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    zext.w a0, a0
+; RV64ZBA-NEXT:    mul a1, a0, a1
 ; RV64ZBA-NEXT:    srli a0, a1, 32
 ; RV64ZBA-NEXT:    snez a0, a0
 ; RV64ZBA-NEXT:    sw a1, 0(a2)
@@ -2270,9 +2270,9 @@ define i32 @umulo.select.i32(i32 %v1, i32 %v2) {
 ;
 ; RV64ZBA-LABEL: umulo.select.i32:
 ; RV64ZBA:       # %bb.0: # %entry
-; RV64ZBA-NEXT:    slli a2, a1, 32
-; RV64ZBA-NEXT:    slli a3, a0, 32
-; RV64ZBA-NEXT:    mulhu a2, a3, a2
+; RV64ZBA-NEXT:    zext.w a2, a1
+; RV64ZBA-NEXT:    zext.w a3, a0
+; RV64ZBA-NEXT:    mul a2, a3, a2
 ; RV64ZBA-NEXT:    srli a2, a2, 32
 ; RV64ZBA-NEXT:    bnez a2, .LBB42_2
 ; RV64ZBA-NEXT:  # %bb.1: # %entry
@@ -2310,9 +2310,9 @@ define i1 @umulo.not.i32(i32 %v1, i32 %v2) {
 ;
 ; RV64ZBA-LABEL: umulo.not.i32:
 ; RV64ZBA:       # %bb.0: # %entry
-; RV64ZBA-NEXT:    slli a1, a1, 32
-; RV64ZBA-NEXT:    slli a0, a0, 32
-; RV64ZBA-NEXT:    mulhu a0, a0, a1
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    zext.w a0, a0
+; RV64ZBA-NEXT:    mul a0, a0, a1
 ; RV64ZBA-NEXT:    srli a0, a0, 32
 ; RV64ZBA-NEXT:    seqz a0, a0
 ; RV64ZBA-NEXT:    ret
@@ -3281,9 +3281,9 @@ define zeroext i1 @umulo.br.i32(i32 %v1, i32 %v2) {
 ;
 ; RV64ZBA-LABEL: umulo.br.i32:
 ; RV64ZBA:       # %bb.0: # %entry
-; RV64ZBA-NEXT:    slli a1, a1, 32
-; RV64ZBA-NEXT:    slli a0, a0, 32
-; RV64ZBA-NEXT:    mulhu a0, a0, a1
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    zext.w a0, a0
+; RV64ZBA-NEXT:    mul a0, a0, a1
 ; RV64ZBA-NEXT:    srli a0, a0, 32
 ; RV64ZBA-NEXT:    beqz a0, .LBB57_2
 ; RV64ZBA-NEXT:  # %bb.1: # %overflow
-- 
2.7.4