From c26e8697d71eea5fa08944a2db039a2187abf27c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Wed, 25 Nov 2020 09:43:16 -0800
Subject: [PATCH] [RISCV] Custom type legalize i32 fshl/fshr on RV64 with Zbt.

This adds custom opcodes for FSLW/FSRW so we can type legalize
fshl/fshr without needing to match a sign_extend_inreg.

I've used the operand order from fshl/fshr to make the isel
pattern similar to the non-W form. It was also hard to decide
another order since the register instruction has the shift amount
as the second operand, but the immediate instruction has it as
the third operand.

Differential Revision: https://reviews.llvm.org/D91479
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 47 +++++++++++++++++++++++++++++
 llvm/lib/Target/RISCV/RISCVISelLowering.h   |  4 +++
 llvm/lib/Target/RISCV/RISCVInstrInfoB.td    | 28 +++++++----------
 llvm/test/CodeGen/RISCV/rv64Zbt.ll          | 36 ++++++----------------
 4 files changed, 72 insertions(+), 43 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 15bd35e..854e0f4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -181,6 +181,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   if (Subtarget.hasStdExtZbt()) {
     setOperationAction(ISD::FSHL, XLenVT, Legal);
     setOperationAction(ISD::FSHR, XLenVT, Legal);
+
+    if (Subtarget.is64Bit()) {
+      setOperationAction(ISD::FSHL, MVT::i32, Custom);
+      setOperationAction(ISD::FSHR, MVT::i32, Custom);
+    }
   }
 
   ISD::CondCode FPCCToExtend[] = {
@@ -1091,6 +1096,26 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW));
     break;
   }
+  case ISD::FSHL:
+  case ISD::FSHR: {
+    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+           Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
+    SDValue NewOp0 =
+        DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+    SDValue NewOp1 =
+        DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+    SDValue NewOp2 =
+        DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
+    // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
+    // Mask the shift amount to 5 bits.
+    NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
+                         DAG.getConstant(0x1f, DL, MVT::i64));
+    unsigned Opc =
+        N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
+    SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
+    break;
+  }
   }
 }
 
@@ -1322,6 +1347,24 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
     }
     break;
   }
+  case RISCVISD::FSLW:
+  case RISCVISD::FSRW: {
+    // Only the lower 32 bits of Values and lower 6 bits of shift amount are
+    // read.
+    SDValue Op0 = N->getOperand(0);
+    SDValue Op1 = N->getOperand(1);
+    SDValue ShAmt = N->getOperand(2);
+    APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
+    APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
+    if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
+        SimplifyDemandedBits(Op1, OpMask, DCI) ||
+        SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
+      if (N->getOpcode() != ISD::DELETED_NODE)
+        DCI.AddToWorklist(N);
+      return SDValue(N, 0);
+    }
+    break;
+  }
   case RISCVISD::GREVIW:
   case RISCVISD::GORCIW: {
     // Only the lower 32 bits of the first operand are read
@@ -1454,6 +1497,8 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
   case RISCVISD::RORW:
   case RISCVISD::GREVIW:
   case RISCVISD::GORCIW:
+  case RISCVISD::FSLW:
+  case RISCVISD::FSRW:
     // TODO: As the result is sign-extended, this is conservatively correct. A
     // more precise answer could be calculated for SRAW depending on known
     // bits in the shift amount.
@@ -2951,6 +2996,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(REMUW)
   NODE_NAME_CASE(ROLW)
   NODE_NAME_CASE(RORW)
+  NODE_NAME_CASE(FSLW)
+  NODE_NAME_CASE(FSRW)
   NODE_NAME_CASE(FMV_W_X_RV64)
   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
   NODE_NAME_CASE(READ_CYCLE_WIDE)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 46c91ed..39aa360 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -46,6 +46,10 @@ enum NodeType : unsigned {
   // instructions.
   ROLW,
   RORW,
+  // RV64IB funnel shifts, with the semantics of the named RISC-V instructions,
+  // but the same operand order as fshl/fshr intrinsics.
+  FSRW,
+  FSLW,
   // FPR32<->GPR transfer operations for RV64. Needed as an i32<->f32 bitcast
   // is not legal on RV64. FMV_W_X_RV64 matches the semantics of the FMV.W.X.
   // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index 4e1164d..b97306c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -17,8 +17,10 @@
 // Operand and SDNode transformation definitions.
 //===----------------------------------------------------------------------===//
 
-def riscv_rolw      : SDNode<"RISCVISD::ROLW", SDTIntShiftOp>;
-def riscv_rorw      : SDNode<"RISCVISD::RORW", SDTIntShiftOp>;
+def riscv_rolw : SDNode<"RISCVISD::ROLW", SDTIntShiftOp>;
+def riscv_rorw : SDNode<"RISCVISD::RORW", SDTIntShiftOp>;
+def riscv_fslw : SDNode<"RISCVISD::FSLW", SDTIntShiftDOp>;
+def riscv_fsrw : SDNode<"RISCVISD::FSRW", SDTIntShiftDOp>;
 
 def UImmLog2XLenHalfAsmOperand : AsmOperandClass {
   let Name = "UImmLog2XLenHalf";
@@ -920,21 +922,13 @@ def : Pat<(riscv_gorciw GPR:$rs1, timm:$shamt), (GORCIW GPR:$rs1, timm:$shamt)>;
 } // Predicates = [HasStdExtZbp, IsRV64]
 
 let Predicates = [HasStdExtZbt, IsRV64] in {
-def : Pat<(sext_inreg (fshl GPR:$rs1, (shl GPR:$rs3, (i64 32)),
-                            (and GPR:$rs2, (i64 31))),
-                      i32),
-          (FSLW GPR:$rs1, (ANDI GPR:$rs2, 31), GPR:$rs3)>;
-def : Pat<(sext_inreg (fshr GPR:$rs3, (shl GPR:$rs1, (i64 32)),
-                            (or GPR:$rs2, (i64 32))),
-                      i32),
-          (FSRW GPR:$rs1, (ANDI GPR:$rs2, 31), GPR:$rs3)>;
-def : Pat<(sext_inreg (fshr GPR:$rs3, (shl GPR:$rs1, (i64 32)),
-                            uimm6gt32:$shamt),
-                      i32),
-          (FSRIW GPR:$rs1, GPR:$rs3, (ImmSub32 uimm6gt32:$shamt))>;
-def : Pat<(sext_inreg (fshl GPR:$rs3, (shl GPR:$rs1, (i64 32)),
-                            uimm5:$shamt),
-                      i32),
+def : Pat<(riscv_fslw GPR:$rs1, GPR:$rs3, GPR:$rs2),
+          (FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(riscv_fsrw GPR:$rs3, GPR:$rs1, GPR:$rs2),
+          (FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(riscv_fsrw GPR:$rs3, GPR:$rs1, uimm5:$shamt),
+          (FSRIW GPR:$rs1, GPR:$rs3, uimm5:$shamt)>;
+def : Pat<(riscv_fslw GPR:$rs3, GPR:$rs1, uimm5:$shamt),
           (FSRIW GPR:$rs1, GPR:$rs3, (ImmROTL2RW uimm5:$shamt))>;
 } // Predicates = [HasStdExtZbt, IsRV64]
 
diff --git a/llvm/test/CodeGen/RISCV/rv64Zbt.ll b/llvm/test/CodeGen/RISCV/rv64Zbt.ll
index d88a2ad..cb8205b 100644
--- a/llvm/test/CodeGen/RISCV/rv64Zbt.ll
+++ b/llvm/test/CodeGen/RISCV/rv64Zbt.ll
@@ -134,7 +134,6 @@ define signext i32 @fshl_i32(i32 signext %a, i32 signext %b, i32 signext %c) nou
 }
 
 ; Similar to fshl_i32 but result is not sign extended.
-; FIXME: This should use fslw
 define void @fshl_i32_nosext(i32 signext %a, i32 signext %b, i32 signext %c, i32* %x) nounwind {
 ; RV64I-LABEL: fshl_i32_nosext:
 ; RV64I:       # %bb.0:
@@ -150,19 +149,15 @@ define void @fshl_i32_nosext(i32 signext %a, i32 signext %b, i32 signext %c, i32
 ;
 ; RV64IB-LABEL: fshl_i32_nosext:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    slli a1, a1, 32
 ; RV64IB-NEXT:    andi a2, a2, 31
-; RV64IB-NEXT:    andi a2, a2, 63
-; RV64IB-NEXT:    fsl a0, a0, a1, a2
+; RV64IB-NEXT:    fslw a0, a0, a1, a2
 ; RV64IB-NEXT:    sw a0, 0(a3)
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBT-LABEL: fshl_i32_nosext:
 ; RV64IBT:       # %bb.0:
-; RV64IBT-NEXT:    slli a1, a1, 32
 ; RV64IBT-NEXT:    andi a2, a2, 31
-; RV64IBT-NEXT:    andi a2, a2, 63
-; RV64IBT-NEXT:    fsl a0, a0, a1, a2
+; RV64IBT-NEXT:    fslw a0, a0, a1, a2
 ; RV64IBT-NEXT:    sw a0, 0(a3)
 ; RV64IBT-NEXT:    ret
   %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
@@ -227,7 +222,6 @@ define signext i32 @fshr_i32(i32 signext %a, i32 signext %b, i32 signext %c) nou
 }
 
 ; Similar to fshr_i32 but result is not sign extended.
-; FIXME: This should use fsrw
 define void @fshr_i32_nosext(i32 signext %a, i32 signext %b, i32 signext %c, i32* %x) nounwind {
 ; RV64I-LABEL: fshr_i32_nosext:
 ; RV64I:       # %bb.0:
@@ -242,19 +236,15 @@ define void @fshr_i32_nosext(i32 signext %a, i32 signext %b, i32 signext %c, i32
 ;
 ; RV64IB-LABEL: fshr_i32_nosext:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    slli a1, a1, 32
-; RV64IB-NEXT:    ori a2, a2, 32
-; RV64IB-NEXT:    andi a2, a2, 63
-; RV64IB-NEXT:    fsr a0, a1, a0, a2
+; RV64IB-NEXT:    andi a2, a2, 31
+; RV64IB-NEXT:    fsrw a0, a1, a0, a2
 ; RV64IB-NEXT:    sw a0, 0(a3)
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBT-LABEL: fshr_i32_nosext:
 ; RV64IBT:       # %bb.0:
-; RV64IBT-NEXT:    slli a1, a1, 32
-; RV64IBT-NEXT:    ori a2, a2, 32
-; RV64IBT-NEXT:    andi a2, a2, 63
-; RV64IBT-NEXT:    fsr a0, a1, a0, a2
+; RV64IBT-NEXT:    andi a2, a2, 31
+; RV64IBT-NEXT:    fsrw a0, a1, a0, a2
 ; RV64IBT-NEXT:    sw a0, 0(a3)
 ; RV64IBT-NEXT:    ret
   %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
@@ -312,7 +302,6 @@ define signext i32 @fshri_i32(i32 signext %a, i32 signext %b) nounwind {
 }
 
 ; Similar to fshr_i32 but result is not sign extended.
-; FIXME: This should use fsriw
 define void @fshri_i32_nosext(i32 signext %a, i32 signext %b, i32* %x) nounwind {
 ; RV64I-LABEL: fshri_i32_nosext:
 ; RV64I:       # %bb.0:
@@ -324,15 +313,13 @@ define void @fshri_i32_nosext(i32 signext %a, i32 signext %b, i32* %x) nounwind
 ;
 ; RV64IB-LABEL: fshri_i32_nosext:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    slli a1, a1, 32
-; RV64IB-NEXT:    fsri a0, a1, a0, 37
+; RV64IB-NEXT:    fsriw a0, a1, a0, 5
 ; RV64IB-NEXT:    sw a0, 0(a2)
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBT-LABEL: fshri_i32_nosext:
 ; RV64IBT:       # %bb.0:
-; RV64IBT-NEXT:    slli a1, a1, 32
-; RV64IBT-NEXT:    fsri a0, a1, a0, 37
+; RV64IBT-NEXT:    fsriw a0, a1, a0, 5
 ; RV64IBT-NEXT:    sw a0, 0(a2)
 ; RV64IBT-NEXT:    ret
   %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 5)
@@ -384,7 +371,6 @@ define signext i32 @fshli_i32(i32 signext %a, i32 signext %b) nounwind {
 }
 
 ; Similar to fshl_i32 but result is not sign extended.
-; FIXME: This should use fsriw
 define void @fshli_i32_nosext(i32 signext %a, i32 signext %b, i32* %x) nounwind {
 ; RV64I-LABEL: fshli_i32_nosext:
 ; RV64I:       # %bb.0:
@@ -396,15 +382,13 @@ define void @fshli_i32_nosext(i32 signext %a, i32 signext %b, i32* %x) nounwind
 ;
 ; RV64IB-LABEL: fshli_i32_nosext:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    slli a1, a1, 32
-; RV64IB-NEXT:    fsri a0, a1, a0, 59
+; RV64IB-NEXT:    fsriw a0, a1, a0, 27
 ; RV64IB-NEXT:    sw a0, 0(a2)
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBT-LABEL: fshli_i32_nosext:
 ; RV64IBT:       # %bb.0:
-; RV64IBT-NEXT:    slli a1, a1, 32
-; RV64IBT-NEXT:    fsri a0, a1, a0, 59
+; RV64IBT-NEXT:    fsriw a0, a1, a0, 27
 ; RV64IBT-NEXT:    sw a0, 0(a2)
 ; RV64IBT-NEXT:    ret
   %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 5)
-- 
2.7.4