[RISCV] Add isel pattern to optimize (mul (and X, 0xffffffff), (and Y, 0xffffffff...

author Craig Topper <craig.topper@sifive.com>

Sat, 20 Mar 2021 19:34:06 +0000 (12:34 -0700)

committer Craig Topper <craig.topper@sifive.com>

Sat, 20 Mar 2021 21:55:46 +0000 (14:55 -0700)
author Craig Topper <craig.topper@sifive.com>
Sat, 20 Mar 2021 19:34:06 +0000 (12:34 -0700)
committer Craig Topper <craig.topper@sifive.com>
Sat, 20 Mar 2021 21:55:46 +0000 (14:55 -0700)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td

index 2bfdc93..d38b5a9 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -93,4 +93,13 @@ def : Pat<(and (riscv_remuw (assertzexti32 GPR:$rs1),
  // produce a result where res[63:32]=0 and res[31]=1.
  def : Pat<(srem (sexti32 (i64 GPR:$rs1)), (sexti32 (i64 GPR:$rs2))),
            (REMW GPR:$rs1, GPR:$rs2)>;
+
+// Special case for calculating the full 64-bit product of a 32x32 unsigned
+// multiply where the inputs aren't known to be zero extended. We can shift the
+// inputs left by 32 and use a MULHU. This saves two SRLIs needed to finish
+// zeroing the upper 32 bits.
+// TODO: If one of the operands is zero extended and the other isn't, we might
+// still be better off shifting both left by 32.
+def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))),
+          (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>;
  } // Predicates = [HasStdExtM, IsRV64]
diff --git a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll

index c4a4de7..682f351 100644 (file)
--- a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll
@@ -10,13 +10,11 @@ define signext i32 @addw(i32 signext %s, i32 signext %n, i32 signext %k) nounwin
  ; CHECK-NEXT:    add a2, a2, a1
  ; CHECK-NEXT:    addi a3, a0, 1
  ; CHECK-NEXT:    mul a3, a2, a3
-; CHECK-NEXT:    slli a2, a2, 32
-; CHECK-NEXT:    srli a2, a2, 32
  ; CHECK-NEXT:    sub a1, a1, a0
  ; CHECK-NEXT:    addi a1, a1, -2
  ; CHECK-NEXT:    slli a1, a1, 32
-; CHECK-NEXT:    srli a1, a1, 32
-; CHECK-NEXT:    mul a1, a2, a1
+; CHECK-NEXT:    slli a2, a2, 32
+; CHECK-NEXT:    mulhu a1, a2, a1
  ; CHECK-NEXT:    srli a1, a1, 1
  ; CHECK-NEXT:    add a0, a3, a0
  ; CHECK-NEXT:    addw a0, a0, a1
@@ -57,13 +55,11 @@ define signext i32 @subw(i32 signext %s, i32 signext %n, i32 signext %k) nounwin
  ; CHECK-NEXT:    not a2, a0
  ; CHECK-NEXT:    add a3, a2, a1
  ; CHECK-NEXT:    mul a2, a3, a2
-; CHECK-NEXT:    slli a3, a3, 32
-; CHECK-NEXT:    srli a3, a3, 32
  ; CHECK-NEXT:    sub a1, a1, a0
  ; CHECK-NEXT:    addi a1, a1, -2
  ; CHECK-NEXT:    slli a1, a1, 32
-; CHECK-NEXT:    srli a1, a1, 32
-; CHECK-NEXT:    mul a1, a3, a1
+; CHECK-NEXT:    slli a3, a3, 32
+; CHECK-NEXT:    mulhu a1, a3, a1
  ; CHECK-NEXT:    srli a1, a1, 1
  ; CHECK-NEXT:    sub a0, a2, a0
  ; CHECK-NEXT:    subw a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll

index e29cfc9..facc0f2 100644 (file)
--- a/llvm/test/CodeGen/RISCV/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/xaluo.ll
@@ -556,10 +556,8 @@ define zeroext i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) {
  ; RV64-LABEL: umulo.i32:
  ; RV64:       # %bb.0: # %entry
  ; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
  ; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    mul a1, a0, a1
+; RV64-NEXT:    mulhu a1, a0, a1
  ; RV64-NEXT:    srli a0, a1, 32
  ; RV64-NEXT:    snez a0, a0
  ; RV64-NEXT:    sw a1, 0(a2)
@@ -1297,10 +1295,8 @@ define i32 @umulo.select.i32(i32 %v1, i32 %v2) {
  ; RV64-LABEL: umulo.select.i32:
  ; RV64:       # %bb.0: # %entry
  ; RV64-NEXT:    slli a2, a1, 32
-; RV64-NEXT:    srli a2, a2, 32
  ; RV64-NEXT:    slli a3, a0, 32
-; RV64-NEXT:    srli a3, a3, 32
-; RV64-NEXT:    mul a2, a3, a2
+; RV64-NEXT:    mulhu a2, a3, a2
  ; RV64-NEXT:    srli a2, a2, 32
  ; RV64-NEXT:    bnez a2, .LBB42_2
  ; RV64-NEXT:  # %bb.1: # %entry
@@ -1324,10 +1320,8 @@ define i1 @umulo.not.i32(i32 %v1, i32 %v2) {
  ; RV64-LABEL: umulo.not.i32:
  ; RV64:       # %bb.0: # %entry
  ; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
  ; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    mulhu a0, a0, a1
  ; RV64-NEXT:    srli a0, a0, 32
  ; RV64-NEXT:    seqz a0, a0
  ; RV64-NEXT:    ret
@@ -1893,10 +1887,8 @@ define zeroext i1 @umulo.br.i32(i32 %v1, i32 %v2) {
  ; RV64-LABEL: umulo.br.i32:
  ; RV64:       # %bb.0: # %entry
  ; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
  ; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    mulhu a0, a0, a1
  ; RV64-NEXT:    srli a0, a0, 32
  ; RV64-NEXT:    beqz a0, .LBB57_2
  ; RV64-NEXT:  # %bb.1: # %overflow
author	Craig Topper <craig.topper@sifive.com>
	Sat, 20 Mar 2021 19:34:06 +0000 (12:34 -0700)
committer	Craig Topper <craig.topper@sifive.com>
	Sat, 20 Mar 2021 21:55:46 +0000 (14:55 -0700)
llvm/lib/Target/RISCV/RISCVInstrInfoM.td		patch \| blob \| history
llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll		patch \| blob \| history
llvm/test/CodeGen/RISCV/xaluo.ll		patch \| blob \| history