From 132546d9397c062d4b2f50fac76392a11d3d6f27 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Wed, 21 Dec 2022 10:39:41 -0800
Subject: [PATCH] [RISCV] Add DAG combine to fold (select C, (add X, Y), Y) ->
 (add (select C, X, 0), Y).

Similar for sub, or, and xor. These are all operations that have 0
as a neutral value. This is based on a similar tranform in InstCombine.

This allows us to remove some XVentanaCondOps patterns and
some code from DAGCombine for RISCVISD::SELECT_CC.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D140465
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp     | 108 ++++++----
 llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td |  13 --
 llvm/test/CodeGen/RISCV/select.ll               | 267 +++++++++---------------
 llvm/test/CodeGen/RISCV/sextw-removal.ll        | 119 +++++------
 4 files changed, 225 insertions(+), 282 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7138273..d34786d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1019,7 +1019,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   setJumpIsExpensive();
 
   setTargetDAGCombine({ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
-                       ISD::OR, ISD::XOR, ISD::SETCC});
+                       ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
   if (Subtarget.is64Bit())
     setTargetDAGCombine(ISD::SRA);
 
@@ -9637,6 +9637,65 @@ static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
   return false;
 }
 
+// Fold
+// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
+// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
+// (select C, (or Y, X), Y)  -> (or Y, (select C, X, 0)).
+// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
+static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
+                                   SDValue TrueVal, SDValue FalseVal,
+                                   bool Swapped) {
+  bool Commutative = true;
+  switch (TrueVal.getOpcode()) {
+  default:
+    return SDValue();
+  case ISD::SUB:
+    Commutative = false;
+    break;
+  case ISD::ADD:
+  case ISD::OR:
+  case ISD::XOR:
+    break;
+  }
+
+  if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
+    return SDValue();
+
+  unsigned OpToFold;
+  if (FalseVal == TrueVal.getOperand(0))
+    OpToFold = 0;
+  else if (Commutative && FalseVal == TrueVal.getOperand(1))
+    OpToFold = 1;
+  else
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  SDLoc DL(N);
+  SDValue Zero = DAG.getConstant(0, DL, VT);
+  SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
+
+  if (Swapped)
+    std::swap(OtherOp, Zero);
+  SDValue NewSel = DAG.getSelect(DL, VT, N->getOperand(0), OtherOp, Zero);
+  return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
+}
+
+static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
+                                    const RISCVSubtarget &Subtarget) {
+  if (Subtarget.hasShortForwardBranchOpt())
+    return SDValue();
+
+  // Only support XLenVT.
+  if (N->getValueType(0) != Subtarget.getXLenVT())
+    return SDValue();
+
+  SDValue TrueVal = N->getOperand(1);
+  SDValue FalseVal = N->getOperand(2);
+  if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
+    return V;
+  return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
+}
+
 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
                                                DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -9806,6 +9865,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
     return SDValue();
   case ISD::TRUNCATE:
     return performTRUNCATECombine(N, DAG, Subtarget);
+  case ISD::SELECT:
+    return performSELECTCombine(N, DAG, Subtarget);
   case RISCVISD::SELECT_CC: {
     // Transform
     SDValue LHS = N->getOperand(0);
@@ -9821,51 +9882,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
     if (TrueV == FalseV)
       return TrueV;
 
-    // (select (x in [0,1] == 0), y, (z ^ y) ) -> (-x & z ) ^ y
-    // (select (x in [0,1] != 0), (z ^ y), y ) -> (-x & z ) ^ y
-    // (select (x in [0,1] == 0), y, (z | y) ) -> (-x & z ) | y
-    // (select (x in [0,1] != 0), (z | y), y ) -> (-x & z ) | y
-    // NOTE: We only do this if the target does not have the short forward
-    // branch optimization.
-    APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
-    if (!Subtarget.hasShortForwardBranchOpt() && isNullConstant(RHS) &&
-        ISD::isIntEqualitySetCC(CCVal) && DAG.MaskedValueIsZero(LHS, Mask)) {
-      unsigned Opcode;
-      SDValue Src1, Src2;
-      // true if FalseV is XOR or OR operator and one of its operands
-      // is equal to Op1
-      // ( a , a op b) || ( b , a op b)
-      auto isOrXorPattern = [&]() {
-        if (CCVal == ISD::SETEQ &&
-            (FalseV.getOpcode() == ISD::XOR || FalseV.getOpcode() == ISD::OR) &&
-            (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) {
-          Src1 = FalseV.getOperand(0) == TrueV ?
-            FalseV.getOperand(1) : FalseV.getOperand(0);
-          Src2 = TrueV;
-          Opcode = FalseV.getOpcode();
-          return true;
-        }
-        if (CCVal == ISD::SETNE &&
-            (TrueV.getOpcode() == ISD::XOR || TrueV.getOpcode() == ISD::OR) &&
-            (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) {
-          Src1 = TrueV.getOperand(0) == FalseV ?
-            TrueV.getOperand(1) : TrueV.getOperand(0);
-          Src2 = FalseV;
-          Opcode = TrueV.getOpcode();
-          return true;
-        }
-
-        return false;
-      };
-
-      if (isOrXorPattern()) {
-        assert(LHS.getValueType() == VT && "Unexpected VT!");
-        SDValue Mask = DAG.getNegative(LHS, DL, VT);             // -x
-        SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z
-        return DAG.getNode(Opcode, DL, VT, And, Src2);           // And Op y
-      }
-    }
-
     // (select (x < 0), y, z)  -> x >> (XLEN - 1) & (y - z) + z
     // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
     if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td
index b4e28e6..18d6515 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td
@@ -28,13 +28,6 @@ def VT_MASKC : VTMaskedMove<0b110, "vt.maskc">,
 def VT_MASKCN : VTMaskedMove<0b111, "vt.maskcn">,
            Sched<[WriteIALU, ReadIALU, ReadIALU]>;
 
-multiclass XVentanaCondops_pats<SDPatternOperator Op, RVInst MI> {
-  def : Pat<(i64 (select GPR:$rc, (Op GPR:$rs1, GPR:$rs2), GPR:$rs1)),
-            (MI $rs1, (VT_MASKC $rs2, $rc))>;
-  def : Pat<(i64 (select GPR:$rc, GPR:$rs1, (Op GPR:$rs1, GPR:$rs2))),
-            (MI $rs1, (VT_MASKCN $rs2, $rc))>;
-}
-
 let Predicates = [IsRV64, HasVendorXVentanaCondOps] in {
 // Directly use MASKC/MASKCN in case of any of the operands being 0.
 def : Pat<(select GPR:$rc, GPR:$rs1, (i64 0)),
@@ -42,12 +35,6 @@ def : Pat<(select GPR:$rc, GPR:$rs1, (i64 0)),
 def : Pat<(select GPR:$rc, (i64 0), GPR:$rs1),
           (VT_MASKCN $rs1, $rc)>;
 
-// Conditional operations patterns.
-defm : XVentanaCondops_pats<add, ADD>;
-defm : XVentanaCondops_pats<sub, SUB>;
-defm : XVentanaCondops_pats<or, OR>;
-defm : XVentanaCondops_pats<xor, XOR>;
-
 // Conditional AND operation patterns.
 def : Pat<(i64 (select GPR:$rc, (and GPR:$rs1, GPR:$rs2), GPR:$rs1)),
           (OR (AND $rs1, $rs2), (VT_MASKCN $rs1, $rc))>;
diff --git a/llvm/test/CodeGen/RISCV/select.ll b/llvm/test/CodeGen/RISCV/select.ll
index de54a46..2e9c4e7 100644
--- a/llvm/test/CodeGen/RISCV/select.ll
+++ b/llvm/test/CodeGen/RISCV/select.ll
@@ -9,25 +9,16 @@ define i16 @select_xor_1(i16 %A, i8 %cond) {
 ; RV32-NEXT:    slli a1, a1, 31
 ; RV32-NEXT:    srai a1, a1, 31
 ; RV32-NEXT:    andi a1, a1, 43
-; RV32-NEXT:    xor a0, a1, a0
+; RV32-NEXT:    xor a0, a0, a1
 ; RV32-NEXT:    ret
 ;
-; NOCONDOPS-LABEL: select_xor_1:
-; NOCONDOPS:       # %bb.0: # %entry
-; NOCONDOPS-NEXT:    slli a1, a1, 63
-; NOCONDOPS-NEXT:    srai a1, a1, 63
-; NOCONDOPS-NEXT:    andi a1, a1, 43
-; NOCONDOPS-NEXT:    xor a0, a1, a0
-; NOCONDOPS-NEXT:    ret
-;
-; CONDOPS-LABEL: select_xor_1:
-; CONDOPS:       # %bb.0: # %entry
-; CONDOPS-NEXT:    andi a1, a1, 1
-; CONDOPS-NEXT:    seqz a1, a1
-; CONDOPS-NEXT:    li a2, 43
-; CONDOPS-NEXT:    vt.maskcn a1, a2, a1
-; CONDOPS-NEXT:    xor a0, a0, a1
-; CONDOPS-NEXT:    ret
+; RV64-LABEL: select_xor_1:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    slli a1, a1, 63
+; RV64-NEXT:    srai a1, a1, 63
+; RV64-NEXT:    andi a1, a1, 43
+; RV64-NEXT:    xor a0, a0, a1
+; RV64-NEXT:    ret
 entry:
  %and = and i8 %cond, 1
  %cmp10 = icmp eq i8 %and, 0
@@ -44,7 +35,7 @@ define i16 @select_xor_1b(i16 %A, i8 %cond) {
 ; RV32-NEXT:    slli a1, a1, 31
 ; RV32-NEXT:    srai a1, a1, 31
 ; RV32-NEXT:    andi a1, a1, 43
-; RV32-NEXT:    xor a0, a1, a0
+; RV32-NEXT:    xor a0, a0, a1
 ; RV32-NEXT:    ret
 ;
 ; NOCONDOPS-LABEL: select_xor_1b:
@@ -52,7 +43,7 @@ define i16 @select_xor_1b(i16 %A, i8 %cond) {
 ; NOCONDOPS-NEXT:    slli a1, a1, 63
 ; NOCONDOPS-NEXT:    srai a1, a1, 63
 ; NOCONDOPS-NEXT:    andi a1, a1, 43
-; NOCONDOPS-NEXT:    xor a0, a1, a0
+; NOCONDOPS-NEXT:    xor a0, a0, a1
 ; NOCONDOPS-NEXT:    ret
 ;
 ; CONDOPS-LABEL: select_xor_1b:
@@ -76,24 +67,16 @@ define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) {
 ; RV32-NEXT:    slli a2, a2, 31
 ; RV32-NEXT:    srai a2, a2, 31
 ; RV32-NEXT:    and a1, a2, a1
-; RV32-NEXT:    xor a0, a1, a0
+; RV32-NEXT:    xor a0, a0, a1
 ; RV32-NEXT:    ret
 ;
-; NOCONDOPS-LABEL: select_xor_2:
-; NOCONDOPS:       # %bb.0: # %entry
-; NOCONDOPS-NEXT:    slli a2, a2, 63
-; NOCONDOPS-NEXT:    srai a2, a2, 63
-; NOCONDOPS-NEXT:    and a1, a2, a1
-; NOCONDOPS-NEXT:    xor a0, a1, a0
-; NOCONDOPS-NEXT:    ret
-;
-; CONDOPS-LABEL: select_xor_2:
-; CONDOPS:       # %bb.0: # %entry
-; CONDOPS-NEXT:    andi a2, a2, 1
-; CONDOPS-NEXT:    seqz a2, a2
-; CONDOPS-NEXT:    vt.maskcn a1, a1, a2
-; CONDOPS-NEXT:    xor a0, a0, a1
-; CONDOPS-NEXT:    ret
+; RV64-LABEL: select_xor_2:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    slli a2, a2, 63
+; RV64-NEXT:    srai a2, a2, 63
+; RV64-NEXT:    and a1, a2, a1
+; RV64-NEXT:    xor a0, a0, a1
+; RV64-NEXT:    ret
 entry:
  %and = and i8 %cond, 1
  %cmp10 = icmp eq i8 %and, 0
@@ -110,7 +93,7 @@ define i32 @select_xor_2b(i32 %A, i32 %B, i8 %cond) {
 ; RV32-NEXT:    slli a2, a2, 31
 ; RV32-NEXT:    srai a2, a2, 31
 ; RV32-NEXT:    and a1, a2, a1
-; RV32-NEXT:    xor a0, a1, a0
+; RV32-NEXT:    xor a0, a0, a1
 ; RV32-NEXT:    ret
 ;
 ; NOCONDOPS-LABEL: select_xor_2b:
@@ -118,7 +101,7 @@ define i32 @select_xor_2b(i32 %A, i32 %B, i8 %cond) {
 ; NOCONDOPS-NEXT:    slli a2, a2, 63
 ; NOCONDOPS-NEXT:    srai a2, a2, 63
 ; NOCONDOPS-NEXT:    and a1, a2, a1
-; NOCONDOPS-NEXT:    xor a0, a1, a0
+; NOCONDOPS-NEXT:    xor a0, a0, a1
 ; NOCONDOPS-NEXT:    ret
 ;
 ; CONDOPS-LABEL: select_xor_2b:
@@ -139,19 +122,17 @@ define i16 @select_xor_3(i16 %A, i8 %cond) {
 ; RV32-LABEL: select_xor_3:
 ; RV32:       # %bb.0: # %entry
 ; RV32-NEXT:    andi a1, a1, 1
-; RV32-NEXT:    bnez a1, .LBB4_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    xori a0, a0, 43
-; RV32-NEXT:  .LBB4_2: # %entry
+; RV32-NEXT:    addi a1, a1, -1
+; RV32-NEXT:    andi a1, a1, 43
+; RV32-NEXT:    xor a0, a0, a1
 ; RV32-NEXT:    ret
 ;
 ; NOCONDOPS-LABEL: select_xor_3:
 ; NOCONDOPS:       # %bb.0: # %entry
 ; NOCONDOPS-NEXT:    andi a1, a1, 1
-; NOCONDOPS-NEXT:    bnez a1, .LBB4_2
-; NOCONDOPS-NEXT:  # %bb.1:
-; NOCONDOPS-NEXT:    xori a0, a0, 43
-; NOCONDOPS-NEXT:  .LBB4_2: # %entry
+; NOCONDOPS-NEXT:    addiw a1, a1, -1
+; NOCONDOPS-NEXT:    andi a1, a1, 43
+; NOCONDOPS-NEXT:    xor a0, a0, a1
 ; NOCONDOPS-NEXT:    ret
 ;
 ; CONDOPS-LABEL: select_xor_3:
@@ -176,19 +157,17 @@ define i16 @select_xor_3b(i16 %A, i8 %cond) {
 ; RV32-LABEL: select_xor_3b:
 ; RV32:       # %bb.0: # %entry
 ; RV32-NEXT:    andi a1, a1, 1
-; RV32-NEXT:    bnez a1, .LBB5_2
-; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    xori a0, a0, 43
-; RV32-NEXT:  .LBB5_2: # %entry
+; RV32-NEXT:    addi a1, a1, -1
+; RV32-NEXT:    andi a1, a1, 43
+; RV32-NEXT:    xor a0, a0, a1
 ; RV32-NEXT:    ret
 ;
 ; NOCONDOPS-LABEL: select_xor_3b:
 ; NOCONDOPS:       # %bb.0: # %entry
 ; NOCONDOPS-NEXT:    andi a1, a1, 1
-; NOCONDOPS-NEXT:    bnez a1, .LBB5_2
-; NOCONDOPS-NEXT:  # %bb.1: # %entry
-; NOCONDOPS-NEXT:    xori a0, a0, 43
-; NOCONDOPS-NEXT:  .LBB5_2: # %entry
+; NOCONDOPS-NEXT:    addiw a1, a1, -1
+; NOCONDOPS-NEXT:    andi a1, a1, 43
+; NOCONDOPS-NEXT:    xor a0, a0, a1
 ; NOCONDOPS-NEXT:    ret
 ;
 ; CONDOPS-LABEL: select_xor_3b:
@@ -210,19 +189,17 @@ define i32 @select_xor_4(i32 %A, i32 %B, i8 %cond) {
 ; RV32-LABEL: select_xor_4:
 ; RV32:       # %bb.0: # %entry
 ; RV32-NEXT:    andi a2, a2, 1
-; RV32-NEXT:    bnez a2, .LBB6_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    xor a0, a1, a0
-; RV32-NEXT:  .LBB6_2: # %entry
+; RV32-NEXT:    addi a2, a2, -1
+; RV32-NEXT:    and a1, a2, a1
+; RV32-NEXT:    xor a0, a0, a1
 ; RV32-NEXT:    ret
 ;
 ; NOCONDOPS-LABEL: select_xor_4:
 ; NOCONDOPS:       # %bb.0: # %entry
 ; NOCONDOPS-NEXT:    andi a2, a2, 1
-; NOCONDOPS-NEXT:    bnez a2, .LBB6_2
-; NOCONDOPS-NEXT:  # %bb.1:
-; NOCONDOPS-NEXT:    xor a0, a1, a0
-; NOCONDOPS-NEXT:  .LBB6_2: # %entry
+; NOCONDOPS-NEXT:    addi a2, a2, -1
+; NOCONDOPS-NEXT:    and a1, a2, a1
+; NOCONDOPS-NEXT:    xor a0, a0, a1
 ; NOCONDOPS-NEXT:    ret
 ;
 ; CONDOPS-LABEL: select_xor_4:
@@ -246,19 +223,17 @@ define i32 @select_xor_4b(i32 %A, i32 %B, i8 %cond) {
 ; RV32-LABEL: select_xor_4b:
 ; RV32:       # %bb.0: # %entry
 ; RV32-NEXT:    andi a2, a2, 1
-; RV32-NEXT:    bnez a2, .LBB7_2
-; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    xor a0, a1, a0
-; RV32-NEXT:  .LBB7_2: # %entry
+; RV32-NEXT:    addi a2, a2, -1
+; RV32-NEXT:    and a1, a2, a1
+; RV32-NEXT:    xor a0, a0, a1
 ; RV32-NEXT:    ret
 ;
 ; NOCONDOPS-LABEL: select_xor_4b:
 ; NOCONDOPS:       # %bb.0: # %entry
 ; NOCONDOPS-NEXT:    andi a2, a2, 1
-; NOCONDOPS-NEXT:    bnez a2, .LBB7_2
-; NOCONDOPS-NEXT:  # %bb.1: # %entry
-; NOCONDOPS-NEXT:    xor a0, a1, a0
-; NOCONDOPS-NEXT:  .LBB7_2: # %entry
+; NOCONDOPS-NEXT:    addi a2, a2, -1
+; NOCONDOPS-NEXT:    and a1, a2, a1
+; NOCONDOPS-NEXT:    xor a0, a0, a1
 ; NOCONDOPS-NEXT:    ret
 ;
 ; CONDOPS-LABEL: select_xor_4b:
@@ -281,24 +256,16 @@ define i32 @select_or(i32 %A, i32 %B, i8 %cond) {
 ; RV32-NEXT:    slli a2, a2, 31
 ; RV32-NEXT:    srai a2, a2, 31
 ; RV32-NEXT:    and a1, a2, a1
-; RV32-NEXT:    or a0, a1, a0
+; RV32-NEXT:    or a0, a0, a1
 ; RV32-NEXT:    ret
 ;
-; NOCONDOPS-LABEL: select_or:
-; NOCONDOPS:       # %bb.0: # %entry
-; NOCONDOPS-NEXT:    slli a2, a2, 63
-; NOCONDOPS-NEXT:    srai a2, a2, 63
-; NOCONDOPS-NEXT:    and a1, a2, a1
-; NOCONDOPS-NEXT:    or a0, a1, a0
-; NOCONDOPS-NEXT:    ret
-;
-; CONDOPS-LABEL: select_or:
-; CONDOPS:       # %bb.0: # %entry
-; CONDOPS-NEXT:    andi a2, a2, 1
-; CONDOPS-NEXT:    seqz a2, a2
-; CONDOPS-NEXT:    vt.maskcn a1, a1, a2
-; CONDOPS-NEXT:    or a0, a0, a1
-; CONDOPS-NEXT:    ret
+; RV64-LABEL: select_or:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    slli a2, a2, 63
+; RV64-NEXT:    srai a2, a2, 63
+; RV64-NEXT:    and a1, a2, a1
+; RV64-NEXT:    or a0, a0, a1
+; RV64-NEXT:    ret
 entry:
  %and = and i8 %cond, 1
  %cmp10 = icmp eq i8 %and, 0
@@ -315,7 +282,7 @@ define i32 @select_or_b(i32 %A, i32 %B, i8 %cond) {
 ; RV32-NEXT:    slli a2, a2, 31
 ; RV32-NEXT:    srai a2, a2, 31
 ; RV32-NEXT:    and a1, a2, a1
-; RV32-NEXT:    or a0, a1, a0
+; RV32-NEXT:    or a0, a0, a1
 ; RV32-NEXT:    ret
 ;
 ; NOCONDOPS-LABEL: select_or_b:
@@ -323,7 +290,7 @@ define i32 @select_or_b(i32 %A, i32 %B, i8 %cond) {
 ; NOCONDOPS-NEXT:    slli a2, a2, 63
 ; NOCONDOPS-NEXT:    srai a2, a2, 63
 ; NOCONDOPS-NEXT:    and a1, a2, a1
-; NOCONDOPS-NEXT:    or a0, a1, a0
+; NOCONDOPS-NEXT:    or a0, a0, a1
 ; NOCONDOPS-NEXT:    ret
 ;
 ; CONDOPS-LABEL: select_or_b:
@@ -346,24 +313,16 @@ define i32 @select_or_1(i32 %A, i32 %B, i32 %cond) {
 ; RV32-NEXT:    slli a2, a2, 31
 ; RV32-NEXT:    srai a2, a2, 31
 ; RV32-NEXT:    and a1, a2, a1
-; RV32-NEXT:    or a0, a1, a0
+; RV32-NEXT:    or a0, a0, a1
 ; RV32-NEXT:    ret
 ;
-; NOCONDOPS-LABEL: select_or_1:
-; NOCONDOPS:       # %bb.0: # %entry
-; NOCONDOPS-NEXT:    slli a2, a2, 63
-; NOCONDOPS-NEXT:    srai a2, a2, 63
-; NOCONDOPS-NEXT:    and a1, a2, a1
-; NOCONDOPS-NEXT:    or a0, a1, a0
-; NOCONDOPS-NEXT:    ret
-;
-; CONDOPS-LABEL: select_or_1:
-; CONDOPS:       # %bb.0: # %entry
-; CONDOPS-NEXT:    andi a2, a2, 1
-; CONDOPS-NEXT:    seqz a2, a2
-; CONDOPS-NEXT:    vt.maskcn a1, a1, a2
-; CONDOPS-NEXT:    or a0, a0, a1
-; CONDOPS-NEXT:    ret
+; RV64-LABEL: select_or_1:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    slli a2, a2, 63
+; RV64-NEXT:    srai a2, a2, 63
+; RV64-NEXT:    and a1, a2, a1
+; RV64-NEXT:    or a0, a0, a1
+; RV64-NEXT:    ret
 entry:
  %and = and i32 %cond, 1
  %cmp10 = icmp eq i32 %and, 0
@@ -380,7 +339,7 @@ define i32 @select_or_1b(i32 %A, i32 %B, i32 %cond) {
 ; RV32-NEXT:    slli a2, a2, 31
 ; RV32-NEXT:    srai a2, a2, 31
 ; RV32-NEXT:    and a1, a2, a1
-; RV32-NEXT:    or a0, a1, a0
+; RV32-NEXT:    or a0, a0, a1
 ; RV32-NEXT:    ret
 ;
 ; NOCONDOPS-LABEL: select_or_1b:
@@ -388,7 +347,7 @@ define i32 @select_or_1b(i32 %A, i32 %B, i32 %cond) {
 ; NOCONDOPS-NEXT:    slli a2, a2, 63
 ; NOCONDOPS-NEXT:    srai a2, a2, 63
 ; NOCONDOPS-NEXT:    and a1, a2, a1
-; NOCONDOPS-NEXT:    or a0, a1, a0
+; NOCONDOPS-NEXT:    or a0, a0, a1
 ; NOCONDOPS-NEXT:    ret
 ;
 ; CONDOPS-LABEL: select_or_1b:
@@ -409,19 +368,17 @@ define i32 @select_or_2(i32 %A, i32 %B, i8 %cond) {
 ; RV32-LABEL: select_or_2:
 ; RV32:       # %bb.0: # %entry
 ; RV32-NEXT:    andi a2, a2, 1
-; RV32-NEXT:    bnez a2, .LBB12_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    or a0, a1, a0
-; RV32-NEXT:  .LBB12_2: # %entry
+; RV32-NEXT:    addi a2, a2, -1
+; RV32-NEXT:    and a1, a2, a1
+; RV32-NEXT:    or a0, a0, a1
 ; RV32-NEXT:    ret
 ;
 ; NOCONDOPS-LABEL: select_or_2:
 ; NOCONDOPS:       # %bb.0: # %entry
 ; NOCONDOPS-NEXT:    andi a2, a2, 1
-; NOCONDOPS-NEXT:    bnez a2, .LBB12_2
-; NOCONDOPS-NEXT:  # %bb.1:
-; NOCONDOPS-NEXT:    or a0, a1, a0
-; NOCONDOPS-NEXT:  .LBB12_2: # %entry
+; NOCONDOPS-NEXT:    addi a2, a2, -1
+; NOCONDOPS-NEXT:    and a1, a2, a1
+; NOCONDOPS-NEXT:    or a0, a0, a1
 ; NOCONDOPS-NEXT:    ret
 ;
 ; CONDOPS-LABEL: select_or_2:
@@ -445,19 +402,17 @@ define i32 @select_or_2b(i32 %A, i32 %B, i8 %cond) {
 ; RV32-LABEL: select_or_2b:
 ; RV32:       # %bb.0: # %entry
 ; RV32-NEXT:    andi a2, a2, 1
-; RV32-NEXT:    bnez a2, .LBB13_2
-; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    or a0, a1, a0
-; RV32-NEXT:  .LBB13_2: # %entry
+; RV32-NEXT:    addi a2, a2, -1
+; RV32-NEXT:    and a1, a2, a1
+; RV32-NEXT:    or a0, a0, a1
 ; RV32-NEXT:    ret
 ;
 ; NOCONDOPS-LABEL: select_or_2b:
 ; NOCONDOPS:       # %bb.0: # %entry
 ; NOCONDOPS-NEXT:    andi a2, a2, 1
-; NOCONDOPS-NEXT:    bnez a2, .LBB13_2
-; NOCONDOPS-NEXT:  # %bb.1: # %entry
-; NOCONDOPS-NEXT:    or a0, a1, a0
-; NOCONDOPS-NEXT:  .LBB13_2: # %entry
+; NOCONDOPS-NEXT:    addi a2, a2, -1
+; NOCONDOPS-NEXT:    and a1, a2, a1
+; NOCONDOPS-NEXT:    or a0, a0, a1
 ; NOCONDOPS-NEXT:    ret
 ;
 ; CONDOPS-LABEL: select_or_2b:
@@ -478,19 +433,17 @@ define i32 @select_or_3(i32 %A, i32 %B, i32 %cond) {
 ; RV32-LABEL: select_or_3:
 ; RV32:       # %bb.0: # %entry
 ; RV32-NEXT:    andi a2, a2, 1
-; RV32-NEXT:    bnez a2, .LBB14_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    or a0, a1, a0
-; RV32-NEXT:  .LBB14_2: # %entry
+; RV32-NEXT:    addi a2, a2, -1
+; RV32-NEXT:    and a1, a2, a1
+; RV32-NEXT:    or a0, a0, a1
 ; RV32-NEXT:    ret
 ;
 ; NOCONDOPS-LABEL: select_or_3:
 ; NOCONDOPS:       # %bb.0: # %entry
 ; NOCONDOPS-NEXT:    andi a2, a2, 1
-; NOCONDOPS-NEXT:    bnez a2, .LBB14_2
-; NOCONDOPS-NEXT:  # %bb.1:
-; NOCONDOPS-NEXT:    or a0, a1, a0
-; NOCONDOPS-NEXT:  .LBB14_2: # %entry
+; NOCONDOPS-NEXT:    addi a2, a2, -1
+; NOCONDOPS-NEXT:    and a1, a2, a1
+; NOCONDOPS-NEXT:    or a0, a0, a1
 ; NOCONDOPS-NEXT:    ret
 ;
 ; CONDOPS-LABEL: select_or_3:
@@ -514,19 +467,17 @@ define i32 @select_or_3b(i32 %A, i32 %B, i32 %cond) {
 ; RV32-LABEL: select_or_3b:
 ; RV32:       # %bb.0: # %entry
 ; RV32-NEXT:    andi a2, a2, 1
-; RV32-NEXT:    bnez a2, .LBB15_2
-; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    or a0, a1, a0
-; RV32-NEXT:  .LBB15_2: # %entry
+; RV32-NEXT:    addi a2, a2, -1
+; RV32-NEXT:    and a1, a2, a1
+; RV32-NEXT:    or a0, a0, a1
 ; RV32-NEXT:    ret
 ;
 ; NOCONDOPS-LABEL: select_or_3b:
 ; NOCONDOPS:       # %bb.0: # %entry
 ; NOCONDOPS-NEXT:    andi a2, a2, 1
-; NOCONDOPS-NEXT:    bnez a2, .LBB15_2
-; NOCONDOPS-NEXT:  # %bb.1: # %entry
-; NOCONDOPS-NEXT:    or a0, a1, a0
-; NOCONDOPS-NEXT:  .LBB15_2: # %entry
+; NOCONDOPS-NEXT:    addi a2, a2, -1
+; NOCONDOPS-NEXT:    and a1, a2, a1
+; NOCONDOPS-NEXT:    or a0, a0, a1
 ; NOCONDOPS-NEXT:    ret
 ;
 ; CONDOPS-LABEL: select_or_3b:
@@ -546,11 +497,9 @@ entry:
 define i32 @select_add_1(i1 zeroext %cond, i32 %a, i32 %b) {
 ; RV32-LABEL: select_add_1:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    beqz a0, .LBB16_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    add a2, a1, a2
-; RV32-NEXT:  .LBB16_2: # %entry
-; RV32-NEXT:    mv a0, a2
+; RV32-NEXT:    neg a0, a0
+; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    add a0, a2, a0
 ; RV32-NEXT:    ret
 ;
 ; NOCONDOPS-LABEL: select_add_1:
@@ -578,11 +527,9 @@ entry:
 define i32 @select_add_2(i1 zeroext %cond, i32 %a, i32 %b) {
 ; RV32-LABEL: select_add_2:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    bnez a0, .LBB17_2
-; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    add a1, a1, a2
-; RV32-NEXT:  .LBB17_2: # %entry
-; RV32-NEXT:    mv a0, a1
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    and a0, a0, a2
+; RV32-NEXT:    add a0, a1, a0
 ; RV32-NEXT:    ret
 ;
 ; NOCONDOPS-LABEL: select_add_2:
@@ -610,11 +557,9 @@ entry:
 define i32 @select_add_3(i1 zeroext %cond, i32 %a) {
 ; RV32-LABEL: select_add_3:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    bnez a0, .LBB18_2
-; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    addi a1, a1, 42
-; RV32-NEXT:  .LBB18_2: # %entry
-; RV32-NEXT:    mv a0, a1
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    andi a0, a0, 42
+; RV32-NEXT:    add a0, a1, a0
 ; RV32-NEXT:    ret
 ;
 ; NOCONDOPS-LABEL: select_add_3:
@@ -674,11 +619,9 @@ entry:
 define i32 @select_sub_2(i1 zeroext %cond, i32 %a, i32 %b) {
 ; RV32-LABEL: select_sub_2:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    bnez a0, .LBB20_2
-; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    sub a1, a1, a2
-; RV32-NEXT:  .LBB20_2: # %entry
-; RV32-NEXT:    mv a0, a1
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    and a0, a0, a2
+; RV32-NEXT:    sub a0, a1, a0
 ; RV32-NEXT:    ret
 ;
 ; NOCONDOPS-LABEL: select_sub_2:
@@ -706,11 +649,9 @@ entry:
 define i32 @select_sub_3(i1 zeroext %cond, i32 %a) {
 ; RV32-LABEL: select_sub_3:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    bnez a0, .LBB21_2
-; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    addi a1, a1, -42
-; RV32-NEXT:  .LBB21_2: # %entry
-; RV32-NEXT:    mv a0, a1
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    andi a0, a0, 42
+; RV32-NEXT:    sub a0, a1, a0
 ; RV32-NEXT:    ret
 ;
 ; NOCONDOPS-LABEL: select_sub_3:
@@ -1161,5 +1102,3 @@ entry:
   %res = select i1 %cond, i32 %a, i32 %c
   ret i32 %res
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll
index 976837e..1d18837 100644
--- a/llvm/test/CodeGen/RISCV/sextw-removal.ll
+++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll
@@ -1024,95 +1024,96 @@ bb7:                                              ; preds = %bb2
 define signext i32 @bug(i32 signext %x) {
 ; CHECK-LABEL: bug:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    beqz a0, .LBB18_10
+; CHECK-NEXT:    beqz a0, .LBB18_11
 ; CHECK-NEXT:  # %bb.1: # %if.end
 ; CHECK-NEXT:    srliw a1, a0, 16
 ; CHECK-NEXT:    beqz a1, .LBB18_3
 ; CHECK-NEXT:  # %bb.2: # %if.end
 ; CHECK-NEXT:    li a1, 32
-; CHECK-NEXT:    srliw a2, a0, 24
-; CHECK-NEXT:    beqz a2, .LBB18_4
-; CHECK-NEXT:    j .LBB18_5
+; CHECK-NEXT:    j .LBB18_4
 ; CHECK-NEXT:  .LBB18_3:
 ; CHECK-NEXT:    slliw a0, a0, 16
 ; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:    srliw a2, a0, 24
-; CHECK-NEXT:    bnez a2, .LBB18_5
-; CHECK-NEXT:  .LBB18_4:
+; CHECK-NEXT:  .LBB18_4: # %if.end
+; CHECK-NEXT:    srliw a3, a0, 24
+; CHECK-NEXT:    snez a2, a3
+; CHECK-NEXT:    bnez a3, .LBB18_6
+; CHECK-NEXT:  # %bb.5:
 ; CHECK-NEXT:    slliw a0, a0, 8
-; CHECK-NEXT:    addi a1, a1, -8
-; CHECK-NEXT:  .LBB18_5: # %if.end
-; CHECK-NEXT:    srliw a2, a0, 28
-; CHECK-NEXT:    beqz a2, .LBB18_11
-; CHECK-NEXT:  # %bb.6: # %if.end
-; CHECK-NEXT:    srliw a2, a0, 30
-; CHECK-NEXT:    beqz a2, .LBB18_12
-; CHECK-NEXT:  .LBB18_7: # %if.end
-; CHECK-NEXT:    bnez a2, .LBB18_9
-; CHECK-NEXT:  .LBB18_8:
-; CHECK-NEXT:    addi a1, a1, -2
-; CHECK-NEXT:  .LBB18_9: # %if.end
+; CHECK-NEXT:  .LBB18_6: # %if.end
+; CHECK-NEXT:    addiw a2, a2, -1
+; CHECK-NEXT:    andi a2, a2, -8
+; CHECK-NEXT:    addw a1, a1, a2
+; CHECK-NEXT:    srliw a3, a0, 28
+; CHECK-NEXT:    snez a2, a3
+; CHECK-NEXT:    bnez a3, .LBB18_8
+; CHECK-NEXT:  # %bb.7:
+; CHECK-NEXT:    slliw a0, a0, 4
+; CHECK-NEXT:  .LBB18_8: # %if.end
+; CHECK-NEXT:    addiw a2, a2, -1
+; CHECK-NEXT:    andi a2, a2, -4
+; CHECK-NEXT:    addw a1, a1, a2
+; CHECK-NEXT:    srliw a3, a0, 30
+; CHECK-NEXT:    snez a2, a3
+; CHECK-NEXT:    bnez a3, .LBB18_10
+; CHECK-NEXT:  # %bb.9:
+; CHECK-NEXT:    slliw a0, a0, 2
+; CHECK-NEXT:  .LBB18_10: # %if.end
+; CHECK-NEXT:    addiw a2, a2, -1
+; CHECK-NEXT:    andi a2, a2, -2
+; CHECK-NEXT:    addw a1, a1, a2
 ; CHECK-NEXT:    not a0, a0
 ; CHECK-NEXT:    srli a0, a0, 31
 ; CHECK-NEXT:    addw a0, a1, a0
-; CHECK-NEXT:  .LBB18_10: # %cleanup
+; CHECK-NEXT:  .LBB18_11: # %cleanup
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB18_11:
-; CHECK-NEXT:    slliw a0, a0, 4
-; CHECK-NEXT:    addi a1, a1, -4
-; CHECK-NEXT:    srliw a2, a0, 30
-; CHECK-NEXT:    bnez a2, .LBB18_7
-; CHECK-NEXT:  .LBB18_12:
-; CHECK-NEXT:    slliw a0, a0, 2
-; CHECK-NEXT:    beqz a2, .LBB18_8
-; CHECK-NEXT:    j .LBB18_9
 ;
 ; NOREMOVAL-LABEL: bug:
 ; NOREMOVAL:       # %bb.0: # %entry
-; NOREMOVAL-NEXT:    beqz a0, .LBB18_10
+; NOREMOVAL-NEXT:    beqz a0, .LBB18_11
 ; NOREMOVAL-NEXT:  # %bb.1: # %if.end
 ; NOREMOVAL-NEXT:    srliw a1, a0, 16
 ; NOREMOVAL-NEXT:    beqz a1, .LBB18_3
 ; NOREMOVAL-NEXT:  # %bb.2: # %if.end
 ; NOREMOVAL-NEXT:    li a1, 32
-; NOREMOVAL-NEXT:    srliw a2, a0, 24
-; NOREMOVAL-NEXT:    beqz a2, .LBB18_4
-; NOREMOVAL-NEXT:    j .LBB18_5
+; NOREMOVAL-NEXT:    j .LBB18_4
 ; NOREMOVAL-NEXT:  .LBB18_3:
 ; NOREMOVAL-NEXT:    slliw a0, a0, 16
 ; NOREMOVAL-NEXT:    li a1, 16
-; NOREMOVAL-NEXT:    srliw a2, a0, 24
-; NOREMOVAL-NEXT:    bnez a2, .LBB18_5
-; NOREMOVAL-NEXT:  .LBB18_4:
+; NOREMOVAL-NEXT:  .LBB18_4: # %if.end
+; NOREMOVAL-NEXT:    srliw a3, a0, 24
+; NOREMOVAL-NEXT:    snez a2, a3
+; NOREMOVAL-NEXT:    bnez a3, .LBB18_6
+; NOREMOVAL-NEXT:  # %bb.5:
 ; NOREMOVAL-NEXT:    slliw a0, a0, 8
-; NOREMOVAL-NEXT:    addi a1, a1, -8
-; NOREMOVAL-NEXT:  .LBB18_5: # %if.end
-; NOREMOVAL-NEXT:    srliw a2, a0, 28
-; NOREMOVAL-NEXT:    beqz a2, .LBB18_11
-; NOREMOVAL-NEXT:  # %bb.6: # %if.end
-; NOREMOVAL-NEXT:    srliw a2, a0, 30
-; NOREMOVAL-NEXT:    beqz a2, .LBB18_12
-; NOREMOVAL-NEXT:  .LBB18_7: # %if.end
+; NOREMOVAL-NEXT:  .LBB18_6: # %if.end
+; NOREMOVAL-NEXT:    addiw a2, a2, -1
+; NOREMOVAL-NEXT:    andi a2, a2, -8
+; NOREMOVAL-NEXT:    addw a1, a1, a2
+; NOREMOVAL-NEXT:    srliw a3, a0, 28
+; NOREMOVAL-NEXT:    snez a2, a3
+; NOREMOVAL-NEXT:    bnez a3, .LBB18_8
+; NOREMOVAL-NEXT:  # %bb.7:
+; NOREMOVAL-NEXT:    slliw a0, a0, 4
+; NOREMOVAL-NEXT:  .LBB18_8: # %if.end
+; NOREMOVAL-NEXT:    addiw a2, a2, -1
+; NOREMOVAL-NEXT:    andi a2, a2, -4
+; NOREMOVAL-NEXT:    addw a1, a1, a2
+; NOREMOVAL-NEXT:    srliw a3, a0, 30
+; NOREMOVAL-NEXT:    snez a2, a3
+; NOREMOVAL-NEXT:    bnez a3, .LBB18_10
+; NOREMOVAL-NEXT:  # %bb.9:
+; NOREMOVAL-NEXT:    slli a0, a0, 2
+; NOREMOVAL-NEXT:  .LBB18_10: # %if.end
 ; NOREMOVAL-NEXT:    sext.w a0, a0
-; NOREMOVAL-NEXT:    bnez a2, .LBB18_9
-; NOREMOVAL-NEXT:  .LBB18_8:
-; NOREMOVAL-NEXT:    addi a1, a1, -2
-; NOREMOVAL-NEXT:  .LBB18_9: # %if.end
+; NOREMOVAL-NEXT:    addiw a2, a2, -1
+; NOREMOVAL-NEXT:    andi a2, a2, -2
+; NOREMOVAL-NEXT:    addw a1, a1, a2
 ; NOREMOVAL-NEXT:    not a0, a0
 ; NOREMOVAL-NEXT:    srli a0, a0, 31
 ; NOREMOVAL-NEXT:    addw a0, a1, a0
-; NOREMOVAL-NEXT:  .LBB18_10: # %cleanup
+; NOREMOVAL-NEXT:  .LBB18_11: # %cleanup
 ; NOREMOVAL-NEXT:    ret
-; NOREMOVAL-NEXT:  .LBB18_11:
-; NOREMOVAL-NEXT:    slliw a0, a0, 4
-; NOREMOVAL-NEXT:    addi a1, a1, -4
-; NOREMOVAL-NEXT:    srliw a2, a0, 30
-; NOREMOVAL-NEXT:    bnez a2, .LBB18_7
-; NOREMOVAL-NEXT:  .LBB18_12:
-; NOREMOVAL-NEXT:    slli a0, a0, 2
-; NOREMOVAL-NEXT:    sext.w a0, a0
-; NOREMOVAL-NEXT:    beqz a2, .LBB18_8
-; NOREMOVAL-NEXT:    j .LBB18_9
 entry:
   %tobool.not = icmp eq i32 %x, 0
   br i1 %tobool.not, label %cleanup, label %if.end
-- 
2.7.4