We don’t have W versions of AND/OR/XOR/ANDN/ORN/XNOR so we should recursively check their users. We should limit the recursion to SelectionDAG::MaxRecursionDepth levels.
We need to add a Depth argument, all existing callers should pass 0 to the Depth. The new recursive calls should increment it by 1. At the top of the function we should give up and return false if Depth >= SelectionDAG::MaxRecursionDepth.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D139462
// may be able to use a W instruction and CSE with the other instruction if
// this has happened. We could try to detect that the CSE opportunity exists
// before doing this, but that would be more complicated.
-// TODO: Does this need to look through AND/OR/XOR to their users to find more
-// opportunities.
-bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
+bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
+ const unsigned Depth) const {
assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
- isa<ConstantSDNode>(Node)) &&
+ isa<ConstantSDNode>(Node) || Depth != 0) &&
"Unexpected opcode");
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
+ return false;
+
for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
// Users of this node should have already been instruction selected
return false;
break;
case RISCV::ANDI:
- if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1))))
- return false;
- break;
+ if (Bits >= (64 - countLeadingZeros(User->getConstantOperandVal(1))))
+ break;
+ goto RecCheck;
case RISCV::ORI: {
uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
- if (Bits < (64 - countLeadingOnes(Imm)))
+ if (Bits >= (64 - countLeadingOnes(Imm)))
+ break;
+ [[fallthrough]];
+ }
+ case RISCV::AND:
+ case RISCV::OR:
+ case RISCV::XOR:
+ case RISCV::ANDN:
+ case RISCV::ORN:
+ case RISCV::XNOR:
+ RecCheck:
+ if (!hasAllNBitUsers(User, Bits, Depth + 1))
return false;
break;
- }
case RISCV::SEXT_B:
case RISCV::PACKH:
if (Bits < 8)
return selectSHXADD_UWOp(N, ShAmt, Val);
}
- bool hasAllNBitUsers(SDNode *Node, unsigned Bits) const;
+ bool hasAllNBitUsers(SDNode *Node, unsigned Bits,
+ const unsigned Depth = 0) const;
bool hasAllHUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 16); }
bool hasAllWUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 32); }
; RV64ZBB-LABEL: pr55484:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: srli a1, a0, 8
-; RV64ZBB-NEXT: slli a0, a0, 8
+; RV64ZBB-NEXT: slliw a0, a0, 8
; RV64ZBB-NEXT: or a0, a1, a0
; RV64ZBB-NEXT: sext.h a0, a0
; RV64ZBB-NEXT: ret
; RV64M-NEXT: sext.w a1, a0
; RV64M-NEXT: beqz a1, .LBB2_2
; RV64M-NEXT: # %bb.1: # %cond.false
-; RV64M-NEXT: neg a1, a0
+; RV64M-NEXT: negw a1, a0
; RV64M-NEXT: and a0, a0, a1
; RV64M-NEXT: lui a1, 30667
; RV64M-NEXT: addiw a1, a1, 1329
;
; RV64M-LABEL: test_cttz_i32_zero_undef:
; RV64M: # %bb.0:
-; RV64M-NEXT: neg a1, a0
+; RV64M-NEXT: negw a1, a0
; RV64M-NEXT: and a0, a0, a1
; RV64M-NEXT: lui a1, 30667
; RV64M-NEXT: addiw a1, a1, 1329
; RV64M-NEXT: and a0, a0, a1
; RV64M-NEXT: add a0, a2, a0
; RV64M-NEXT: srli a1, a0, 4
-; RV64M-NEXT: add a0, a0, a1
+; RV64M-NEXT: addw a0, a0, a1
; RV64M-NEXT: lui a1, 61681
; RV64M-NEXT: addiw a1, a1, -241
; RV64M-NEXT: and a0, a0, a1
; RV64M-NEXT: and a0, a0, a1
; RV64M-NEXT: add a0, a2, a0
; RV64M-NEXT: srli a1, a0, 4
-; RV64M-NEXT: add a0, a0, a1
+; RV64M-NEXT: addw a0, a0, a1
; RV64M-NEXT: lui a1, 61681
; RV64M-NEXT: addiw a1, a1, -241
; RV64M-NEXT: and a0, a0, a1
; RV64M-NEXT: and a0, a0, a1
; RV64M-NEXT: add a0, a2, a0
; RV64M-NEXT: srli a1, a0, 4
-; RV64M-NEXT: add a0, a0, a1
+; RV64M-NEXT: addw a0, a0, a1
; RV64M-NEXT: lui a1, 61681
; RV64M-NEXT: addiw a1, a1, -241
; RV64M-NEXT: and a0, a0, a1
; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz
; RV64IFD-NEXT: feq.d a1, fa0, fa0
; RV64IFD-NEXT: seqz a1, a1
-; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: addiw a1, a1, -1
; RV64IFD-NEXT: and a0, a0, a1
; RV64IFD-NEXT: slli a0, a0, 32
; RV64IFD-NEXT: srli a0, a0, 32
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unorddf2@plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: addiw a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unorddf2@plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: addiw a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz
; RV64IFD-NEXT: feq.d a1, fa0, fa0
; RV64IFD-NEXT: seqz a1, a1
-; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: addiw a1, a1, -1
; RV64IFD-NEXT: and a0, a0, a1
; RV64IFD-NEXT: slli a0, a0, 32
; RV64IFD-NEXT: srli a0, a0, 32
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unorddf2@plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: addiw a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64IF-NEXT: fcvt.wu.s a0, fa0, rtz
; RV64IF-NEXT: feq.s a1, fa0, fa0
; RV64IF-NEXT: seqz a1, a1
-; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: addiw a1, a1, -1
; RV64IF-NEXT: and a0, a0, a1
; RV64IF-NEXT: slli a0, a0, 32
; RV64IF-NEXT: srli a0, a0, 32
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2@plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: addiw a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2@plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: addiw a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64IF-NEXT: fcvt.wu.s a0, fa0, rtz
; RV64IF-NEXT: feq.s a1, fa0, fa0
; RV64IF-NEXT: seqz a1, a1
-; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: addiw a1, a1, -1
; RV64IF-NEXT: and a0, a0, a1
; RV64IF-NEXT: slli a0, a0, 32
; RV64IF-NEXT: srli a0, a0, 32
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2@plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: addiw a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: sgtz a6, a3
; CHECK-NOV-NEXT: sgtz a7, a2
; CHECK-NOV-NEXT: sgtz t0, a1
-; CHECK-NOV-NEXT: neg t0, t0
+; CHECK-NOV-NEXT: negw t0, t0
; CHECK-NOV-NEXT: and a1, t0, a1
-; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: negw a7, a7
; CHECK-NOV-NEXT: and a2, a7, a2
-; CHECK-NOV-NEXT: neg a6, a6
+; CHECK-NOV-NEXT: negw a6, a6
; CHECK-NOV-NEXT: and a3, a6, a3
-; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: negw a4, a4
; CHECK-NOV-NEXT: and a4, a4, a5
; CHECK-NOV-NEXT: sw a4, 12(a0)
; CHECK-NOV-NEXT: sw a3, 8(a0)
; CHECK-NOV-NEXT: sgtz a4, a1
; CHECK-NOV-NEXT: sgtz a5, s2
; CHECK-NOV-NEXT: sgtz a6, a0
-; CHECK-NOV-NEXT: neg a6, a6
+; CHECK-NOV-NEXT: negw a6, a6
; CHECK-NOV-NEXT: and a0, a6, a0
-; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: negw a5, a5
; CHECK-NOV-NEXT: and a5, a5, s2
-; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: negw a4, a4
; CHECK-NOV-NEXT: and a1, a4, a1
-; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: negw a2, a2
; CHECK-NOV-NEXT: and a2, a2, a3
; CHECK-NOV-NEXT: sw a2, 12(s0)
; CHECK-NOV-NEXT: sw a1, 8(s0)
; CHECK-NOV-NEXT: sgtz a6, a3
; CHECK-NOV-NEXT: sgtz a7, a2
; CHECK-NOV-NEXT: sgtz t0, a1
-; CHECK-NOV-NEXT: neg t0, t0
+; CHECK-NOV-NEXT: negw t0, t0
; CHECK-NOV-NEXT: and a1, t0, a1
-; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: negw a7, a7
; CHECK-NOV-NEXT: and a2, a7, a2
-; CHECK-NOV-NEXT: neg a6, a6
+; CHECK-NOV-NEXT: negw a6, a6
; CHECK-NOV-NEXT: and a3, a6, a3
-; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: negw a4, a4
; CHECK-NOV-NEXT: and a4, a4, a5
; CHECK-NOV-NEXT: sh a4, 6(a0)
; CHECK-NOV-NEXT: sh a3, 4(a0)
; CHECK-NOV-NEXT: sgtz t4, a1
; CHECK-NOV-NEXT: sgtz t5, s2
; CHECK-NOV-NEXT: sgtz t6, a0
-; CHECK-NOV-NEXT: neg t6, t6
+; CHECK-NOV-NEXT: negw t6, t6
; CHECK-NOV-NEXT: and a0, t6, a0
-; CHECK-NOV-NEXT: neg t5, t5
+; CHECK-NOV-NEXT: negw t5, t5
; CHECK-NOV-NEXT: and t5, t5, s2
-; CHECK-NOV-NEXT: neg t4, t4
+; CHECK-NOV-NEXT: negw t4, t4
; CHECK-NOV-NEXT: and a1, t4, a1
-; CHECK-NOV-NEXT: neg t3, t3
+; CHECK-NOV-NEXT: negw t3, t3
; CHECK-NOV-NEXT: and a2, t3, a2
-; CHECK-NOV-NEXT: neg t2, t2
+; CHECK-NOV-NEXT: negw t2, t2
; CHECK-NOV-NEXT: and a4, t2, a4
-; CHECK-NOV-NEXT: neg t1, t1
+; CHECK-NOV-NEXT: negw t1, t1
; CHECK-NOV-NEXT: and a5, t1, a5
-; CHECK-NOV-NEXT: neg t0, t0
+; CHECK-NOV-NEXT: negw t0, t0
; CHECK-NOV-NEXT: and a6, t0, a6
-; CHECK-NOV-NEXT: neg a3, a3
+; CHECK-NOV-NEXT: negw a3, a3
; CHECK-NOV-NEXT: and a3, a3, a7
; CHECK-NOV-NEXT: sh a3, 14(s0)
; CHECK-NOV-NEXT: sh a6, 12(s0)
; CHECK-NOV-NEXT: mv a5, a4
; CHECK-NOV-NEXT: .LBB32_5: # %entry
; CHECK-NOV-NEXT: sgtz a4, a5
-; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: negw a4, a4
; CHECK-NOV-NEXT: and a4, a4, a5
; CHECK-NOV-NEXT: sgtz a5, a3
-; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: negw a5, a5
; CHECK-NOV-NEXT: and a3, a5, a3
; CHECK-NOV-NEXT: sgtz a5, a2
-; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: negw a5, a5
; CHECK-NOV-NEXT: and a2, a5, a2
; CHECK-NOV-NEXT: sgtz a5, a1
-; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: negw a5, a5
; CHECK-NOV-NEXT: and a1, a5, a1
; CHECK-NOV-NEXT: sw a1, 12(a0)
; CHECK-NOV-NEXT: sw a2, 8(a0)
; CHECK-NOV-NEXT: mv a3, a2
; CHECK-NOV-NEXT: .LBB35_5: # %entry
; CHECK-NOV-NEXT: sgtz a2, a3
-; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: negw a2, a2
; CHECK-NOV-NEXT: and a2, a2, a3
; CHECK-NOV-NEXT: sgtz a3, a1
-; CHECK-NOV-NEXT: neg a3, a3
+; CHECK-NOV-NEXT: negw a3, a3
; CHECK-NOV-NEXT: and a1, a3, a1
; CHECK-NOV-NEXT: sgtz a3, s2
-; CHECK-NOV-NEXT: neg a3, a3
+; CHECK-NOV-NEXT: negw a3, a3
; CHECK-NOV-NEXT: and a3, a3, s2
; CHECK-NOV-NEXT: sgtz a4, a0
-; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: negw a4, a4
; CHECK-NOV-NEXT: and a0, a4, a0
; CHECK-NOV-NEXT: sw a0, 12(s0)
; CHECK-NOV-NEXT: sw a3, 8(s0)
; CHECK-NOV-NEXT: mv a5, a4
; CHECK-NOV-NEXT: .LBB41_5: # %entry
; CHECK-NOV-NEXT: sgtz a4, a5
-; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: negw a4, a4
; CHECK-NOV-NEXT: and a4, a4, a5
; CHECK-NOV-NEXT: sgtz a5, a3
-; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: negw a5, a5
; CHECK-NOV-NEXT: and a3, a5, a3
; CHECK-NOV-NEXT: sgtz a5, a2
-; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: negw a5, a5
; CHECK-NOV-NEXT: and a2, a5, a2
; CHECK-NOV-NEXT: sgtz a5, a1
-; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: negw a5, a5
; CHECK-NOV-NEXT: and a1, a5, a1
; CHECK-NOV-NEXT: sh a1, 6(a0)
; CHECK-NOV-NEXT: sh a2, 4(a0)
; CHECK-NOV-NEXT: mv a7, a3
; CHECK-NOV-NEXT: .LBB44_9: # %entry
; CHECK-NOV-NEXT: sgtz a3, a7
-; CHECK-NOV-NEXT: neg a3, a3
+; CHECK-NOV-NEXT: negw a3, a3
; CHECK-NOV-NEXT: and a3, a3, a7
; CHECK-NOV-NEXT: sgtz a7, a6
-; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: negw a7, a7
; CHECK-NOV-NEXT: and a6, a7, a6
; CHECK-NOV-NEXT: sgtz a7, a5
-; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: negw a7, a7
; CHECK-NOV-NEXT: and a5, a7, a5
; CHECK-NOV-NEXT: sgtz a7, a4
-; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: negw a7, a7
; CHECK-NOV-NEXT: and a4, a7, a4
; CHECK-NOV-NEXT: sgtz a7, a2
-; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: negw a7, a7
; CHECK-NOV-NEXT: and a2, a7, a2
; CHECK-NOV-NEXT: sgtz a7, a1
-; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: negw a7, a7
; CHECK-NOV-NEXT: and a1, a7, a1
; CHECK-NOV-NEXT: sgtz a7, s2
-; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: negw a7, a7
; CHECK-NOV-NEXT: and a7, a7, s2
; CHECK-NOV-NEXT: sgtz t0, a0
-; CHECK-NOV-NEXT: neg t0, t0
+; CHECK-NOV-NEXT: negw t0, t0
; CHECK-NOV-NEXT: and a0, t0, a0
; CHECK-NOV-NEXT: sh a0, 14(s0)
; CHECK-NOV-NEXT: sh a7, 12(s0)
; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz
; RV64IZFH-NEXT: feq.h a1, fa0, fa0
; RV64IZFH-NEXT: seqz a1, a1
-; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: addiw a1, a1, -1
; RV64IZFH-NEXT: and a0, a0, a1
; RV64IZFH-NEXT: slli a0, a0, 32
; RV64IZFH-NEXT: srli a0, a0, 32
; RV64IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz
; RV64IDZFH-NEXT: feq.h a1, fa0, fa0
; RV64IDZFH-NEXT: seqz a1, a1
-; RV64IDZFH-NEXT: addi a1, a1, -1
+; RV64IDZFH-NEXT: addiw a1, a1, -1
; RV64IDZFH-NEXT: and a0, a0, a1
; RV64IDZFH-NEXT: slli a0, a0, 32
; RV64IDZFH-NEXT: srli a0, a0, 32
; CHECK64-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz
; CHECK64-IZFHMIN-NEXT: feq.s a1, ft0, ft0
; CHECK64-IZFHMIN-NEXT: seqz a1, a1
-; CHECK64-IZFHMIN-NEXT: addi a1, a1, -1
+; CHECK64-IZFHMIN-NEXT: addiw a1, a1, -1
; CHECK64-IZFHMIN-NEXT: and a0, a0, a1
; CHECK64-IZFHMIN-NEXT: slli a0, a0, 32
; CHECK64-IZFHMIN-NEXT: srli a0, a0, 32
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2@plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: addiw a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2@plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: addiw a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz
; RV64IZFH-NEXT: feq.h a1, fa0, fa0
; RV64IZFH-NEXT: seqz a1, a1
-; RV64IZFH-NEXT: addi a1, a1, -1
+; RV64IZFH-NEXT: addiw a1, a1, -1
; RV64IZFH-NEXT: and a0, a0, a1
; RV64IZFH-NEXT: slli a0, a0, 32
; RV64IZFH-NEXT: srli a0, a0, 32
; RV64IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz
; RV64IDZFH-NEXT: feq.h a1, fa0, fa0
; RV64IDZFH-NEXT: seqz a1, a1
-; RV64IDZFH-NEXT: addi a1, a1, -1
+; RV64IDZFH-NEXT: addiw a1, a1, -1
; RV64IDZFH-NEXT: and a0, a0, a1
; RV64IDZFH-NEXT: slli a0, a0, 32
; RV64IDZFH-NEXT: srli a0, a0, 32
; CHECK64-IZFHMIN-NEXT: fcvt.wu.s a0, ft0, rtz
; CHECK64-IZFHMIN-NEXT: feq.s a1, ft0, ft0
; CHECK64-IZFHMIN-NEXT: seqz a1, a1
-; CHECK64-IZFHMIN-NEXT: addi a1, a1, -1
+; CHECK64-IZFHMIN-NEXT: addiw a1, a1, -1
; CHECK64-IZFHMIN-NEXT: and a0, a0, a1
; CHECK64-IZFHMIN-NEXT: slli a0, a0, 32
; CHECK64-IZFHMIN-NEXT: srli a0, a0, 32
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2@plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: addiw a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I: # %bb.0:
; RV64I-NEXT: sraiw a1, a0, 31
; RV64I-NEXT: srliw a1, a1, 29
-; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: addw a1, a0, a1
; RV64I-NEXT: andi a1, a1, -8
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: ret
; RV64IM: # %bb.0:
; RV64IM-NEXT: sraiw a1, a0, 31
; RV64IM-NEXT: srliw a1, a1, 29
-; RV64IM-NEXT: add a1, a0, a1
+; RV64IM-NEXT: addw a1, a0, a1
; RV64IM-NEXT: andi a1, a1, -8
; RV64IM-NEXT: subw a0, a0, a1
; RV64IM-NEXT: ret
; RV64I: # %bb.0:
; RV64I-NEXT: sraiw a1, a0, 31
; RV64I-NEXT: srliw a1, a1, 16
-; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: addw a1, a0, a1
; RV64I-NEXT: lui a2, 1048560
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: subw a0, a0, a1
; RV64IM: # %bb.0:
; RV64IM-NEXT: sraiw a1, a0, 31
; RV64IM-NEXT: srliw a1, a1, 16
-; RV64IM-NEXT: add a1, a0, a1
+; RV64IM-NEXT: addw a1, a0, a1
; RV64IM-NEXT: lui a2, 1048560
; RV64IM-NEXT: and a1, a1, a2
; RV64IM-NEXT: subw a0, a0, a1
; RV64I: # %bb.0:
; RV64I-NEXT: ori a2, a1, 128
; RV64I-NEXT: sll a2, a0, a2
-; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: negw a1, a1
; RV64I-NEXT: ori a1, a1, 64
; RV64I-NEXT: srl a0, a0, a1
; RV64I-NEXT: or a0, a2, a0
; RV64I: # %bb.0:
; RV64I-NEXT: ori a2, a1, 128
; RV64I-NEXT: srl a2, a0, a2
-; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: negw a1, a1
; RV64I-NEXT: ori a1, a1, 64
; RV64I-NEXT: sll a0, a0, a1
; RV64I-NEXT: or a0, a2, a0
; RV64I-LABEL: rori_i32_fshl_nosext:
; RV64I: # %bb.0:
; RV64I-NEXT: srliw a2, a0, 1
-; RV64I-NEXT: slli a0, a0, 31
+; RV64I-NEXT: slliw a0, a0, 31
; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: sw a0, 0(a1)
; RV64I-NEXT: ret
define void @rori_i32_fshr_nosext(i32 signext %a, i32* %x) nounwind {
; RV64I-LABEL: rori_i32_fshr_nosext:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a2, a0, 1
+; RV64I-NEXT: slliw a2, a0, 1
; RV64I-NEXT: srliw a0, a0, 31
; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: sw a0, 0(a1)
define i64 @roriw_bug(i64 %x) nounwind {
; CHECK-LABEL: roriw_bug:
; CHECK: # %bb.0:
-; CHECK-NEXT: slli a1, a0, 31
+; CHECK-NEXT: slliw a1, a0, 31
; CHECK-NEXT: andi a2, a0, -2
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: or a0, a1, a0
; RV64I-NEXT: srliw a4, a0, 24
; RV64I-NEXT: or a2, a2, a4
; RV64I-NEXT: and a3, a0, a3
-; RV64I-NEXT: slli a3, a3, 8
-; RV64I-NEXT: slli a0, a0, 24
+; RV64I-NEXT: slliw a3, a3, 8
+; RV64I-NEXT: slliw a0, a0, 24
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: sw a0, 0(a1)
define signext i32 @pack_i32_3(i16 zeroext %0, i16 zeroext %1, i32 signext %2) {
; RV64I-LABEL: pack_i32_3:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 16
+; RV64I-NEXT: slliw a0, a0, 16
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: addw a0, a0, a2
; RV64I-NEXT: ret
define zeroext i16 @packh_i16_2(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2) {
; RV64I-LABEL: packh_i16_2:
; RV64I: # %bb.0:
-; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: addw a0, a1, a0
+; RV64I-NEXT: slliw a0, a0, 8
; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64-NEXT: fcvt.l.d a0, ft3, rtz
; RV64-NEXT: feq.d a2, ft2, ft2
; RV64-NEXT: seqz a2, a2
-; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: addiw a2, a2, -1
; RV64-NEXT: and a0, a2, a0
; RV64-NEXT: sb a0, 8(sp)
; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma
; RV64-NEXT: fcvt.l.d a0, ft3, rtz
; RV64-NEXT: feq.d a2, ft2, ft2
; RV64-NEXT: seqz a2, a2
-; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: addiw a2, a2, -1
; RV64-NEXT: and a0, a2, a0
; RV64-NEXT: sb a0, 15(sp)
; RV64-NEXT: vslidedown.vi v12, v8, 6
; RV64-NEXT: fcvt.l.d a0, ft3, rtz
; RV64-NEXT: feq.d a2, ft2, ft2
; RV64-NEXT: seqz a2, a2
-; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: addiw a2, a2, -1
; RV64-NEXT: and a0, a2, a0
; RV64-NEXT: sb a0, 14(sp)
; RV64-NEXT: vslidedown.vi v12, v8, 5
; RV64-NEXT: fcvt.l.d a0, ft3, rtz
; RV64-NEXT: feq.d a2, ft2, ft2
; RV64-NEXT: seqz a2, a2
-; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: addiw a2, a2, -1
; RV64-NEXT: and a0, a2, a0
; RV64-NEXT: sb a0, 13(sp)
; RV64-NEXT: vslidedown.vi v12, v8, 4
; RV64-NEXT: fcvt.l.d a0, ft3, rtz
; RV64-NEXT: feq.d a2, ft2, ft2
; RV64-NEXT: seqz a2, a2
-; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: addiw a2, a2, -1
; RV64-NEXT: and a0, a2, a0
; RV64-NEXT: sb a0, 12(sp)
; RV64-NEXT: vslidedown.vi v12, v8, 3
; RV64-NEXT: fcvt.l.d a0, ft3, rtz
; RV64-NEXT: feq.d a2, ft2, ft2
; RV64-NEXT: seqz a2, a2
-; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: addiw a2, a2, -1
; RV64-NEXT: and a0, a2, a0
; RV64-NEXT: sb a0, 11(sp)
; RV64-NEXT: vslidedown.vi v12, v8, 2
; RV64-NEXT: fcvt.l.d a0, ft3, rtz
; RV64-NEXT: feq.d a2, ft2, ft2
; RV64-NEXT: seqz a2, a2
-; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: addiw a2, a2, -1
; RV64-NEXT: and a0, a2, a0
; RV64-NEXT: sb a0, 10(sp)
; RV64-NEXT: vslidedown.vi v8, v8, 1
; RV64-NEXT: fcvt.l.d a0, ft0, rtz
; RV64-NEXT: feq.d a2, ft2, ft2
; RV64-NEXT: seqz a2, a2
-; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: addiw a2, a2, -1
; RV64-NEXT: and a0, a2, a0
; RV64-NEXT: sb a0, 9(sp)
; RV64-NEXT: addi a0, sp, 8
;
; RV64I-LABEL: add_select_all_zeros_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: addiw a0, a0, -1
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: addw a0, a2, a0
; RV64I-NEXT: ret
;
; RV64I-LABEL: sub_select_all_zeros_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: addiw a0, a0, -1
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: subw a0, a2, a0
; RV64I-NEXT: ret
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: add a0, a2, a0
; RV64I-NEXT: srli a2, a0, 4
-; RV64I-NEXT: add a0, a0, a2
+; RV64I-NEXT: addw a0, a0, a2
; RV64I-NEXT: and a0, a0, s2
; RV64I-NEXT: mulw a0, a0, s3
; RV64I-NEXT: srliw a0, a0, 24
; RV64I: # %bb.0:
; RV64I-NEXT: sraiw a1, a0, 31
; RV64I-NEXT: srliw a1, a1, 26
-; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: addw a1, a0, a1
; RV64I-NEXT: andi a1, a1, -64
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: ret
; RV64IM: # %bb.0:
; RV64IM-NEXT: sraiw a1, a0, 31
; RV64IM-NEXT: srliw a1, a1, 26
-; RV64IM-NEXT: add a1, a0, a1
+; RV64IM-NEXT: addw a1, a0, a1
; RV64IM-NEXT: andi a1, a1, -64
; RV64IM-NEXT: subw a0, a0, a1
; RV64IM-NEXT: ret
; RV64I: # %bb.0:
; RV64I-NEXT: sraiw a1, a0, 31
; RV64I-NEXT: srliw a1, a1, 1
-; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: addw a1, a0, a1
; RV64I-NEXT: lui a2, 524288
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: addw a0, a0, a1
; RV64IM: # %bb.0:
; RV64IM-NEXT: sraiw a1, a0, 31
; RV64IM-NEXT: srliw a1, a1, 1
-; RV64IM-NEXT: add a1, a0, a1
+; RV64IM-NEXT: addw a1, a0, a1
; RV64IM-NEXT: lui a2, 524288
; RV64IM-NEXT: and a1, a1, a2
; RV64IM-NEXT: addw a0, a0, a1
; RV64-NEXT: neg a0, a0
; RV64-NEXT: addi a2, a2, -1
; RV64-NEXT: addi a1, a1, -1
-; RV64-NEXT: slli a3, a1, 2
+; RV64-NEXT: slliw a3, a1, 2
; RV64-NEXT: slli a4, a2, 31
; RV64-NEXT: srli a4, a4, 62
; RV64-NEXT: or a3, a4, a3
; RV64M-NEXT: srli a1, a1, 31
; RV64M-NEXT: or a1, a1, a4
; RV64M-NEXT: sd a1, 0(a0)
-; RV64M-NEXT: slli a1, a2, 2
+; RV64M-NEXT: slliw a1, a2, 2
; RV64M-NEXT: slli a3, a3, 31
; RV64M-NEXT: srli a3, a3, 62
; RV64M-NEXT: or a1, a3, a1
; RV64MV-NEXT: slli a4, a3, 33
; RV64MV-NEXT: or a1, a1, a4
; RV64MV-NEXT: sd a1, 0(a0)
-; RV64MV-NEXT: slli a2, a2, 2
+; RV64MV-NEXT: slliw a1, a2, 2
; RV64MV-NEXT: slli a3, a3, 31
; RV64MV-NEXT: srli a3, a3, 62
-; RV64MV-NEXT: or a2, a3, a2
-; RV64MV-NEXT: sw a2, 8(a0)
+; RV64MV-NEXT: or a1, a3, a1
+; RV64MV-NEXT: sw a1, 8(a0)
; RV64MV-NEXT: addi sp, s0, -64
; RV64MV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; RV64MV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
; RV64I-NEXT: lh a3, 16(a1)
; RV64I-NEXT: lh a1, 8(a1)
; RV64I-NEXT: srli a4, a2, 58
-; RV64I-NEXT: add a4, a2, a4
+; RV64I-NEXT: addw a4, a2, a4
; RV64I-NEXT: andi a4, a4, -64
; RV64I-NEXT: subw s1, a2, a4
; RV64I-NEXT: srli a2, a1, 59
-; RV64I-NEXT: add a2, a1, a2
+; RV64I-NEXT: addw a2, a1, a2
; RV64I-NEXT: andi a2, a2, -32
; RV64I-NEXT: subw s2, a1, a2
; RV64I-NEXT: srli a1, a3, 61
-; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: addw a1, a3, a1
; RV64I-NEXT: andi a1, a1, -8
; RV64I-NEXT: subw s3, a3, a1
; RV64I-NEXT: li a1, 95
; RV64IM-NEXT: mulw a3, a3, a6
; RV64IM-NEXT: subw a2, a2, a3
; RV64IM-NEXT: srli a3, a1, 58
-; RV64IM-NEXT: add a3, a1, a3
+; RV64IM-NEXT: addw a3, a1, a3
; RV64IM-NEXT: andi a3, a3, -64
; RV64IM-NEXT: subw a1, a1, a3
; RV64IM-NEXT: srli a3, a5, 59
-; RV64IM-NEXT: add a3, a5, a3
+; RV64IM-NEXT: addw a3, a5, a3
; RV64IM-NEXT: andi a3, a3, -32
; RV64IM-NEXT: subw a5, a5, a3
; RV64IM-NEXT: srli a3, a4, 61
-; RV64IM-NEXT: add a3, a4, a3
+; RV64IM-NEXT: addw a3, a4, a3
; RV64IM-NEXT: andi a3, a3, -8
; RV64IM-NEXT: subw a4, a4, a3
; RV64IM-NEXT: sh a4, 4(a0)
; RV64I-NEXT: lh s1, 24(a1)
; RV64I-NEXT: lh a0, 16(a1)
; RV64I-NEXT: srli a1, a2, 49
-; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: addw a1, a2, a1
; RV64I-NEXT: lui a3, 8
; RV64I-NEXT: and a1, a1, a3
; RV64I-NEXT: subw s3, a2, a1
; RV64IM-NEXT: mulw a3, a3, a5
; RV64IM-NEXT: subw a4, a4, a3
; RV64IM-NEXT: srli a3, a1, 49
-; RV64IM-NEXT: add a3, a1, a3
+; RV64IM-NEXT: addw a3, a1, a3
; RV64IM-NEXT: lui a5, 8
; RV64IM-NEXT: and a3, a3, a5
; RV64IM-NEXT: subw a1, a1, a3
; RV64I-NEXT: lbu a3, 4(a0)
; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
-; RV64I-NEXT: slli a2, a2, 8
+; RV64I-NEXT: slliw a2, a2, 8
; RV64I-NEXT: or a2, a2, a3
-; RV64I-NEXT: slli a4, a4, 16
-; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, a4
+; RV64I-NEXT: slliw a3, a4, 16
+; RV64I-NEXT: slliw a0, a0, 24
+; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a1
; RV64-NEXT: lui a1, 28087
; RV64-NEXT: addiw a1, a1, -585
; RV64-NEXT: call __muldi3@plt
-; RV64-NEXT: slli a1, a0, 26
+; RV64-NEXT: slliw a1, a0, 26
; RV64-NEXT: slli a0, a0, 37
; RV64-NEXT: srli a0, a0, 38
; RV64-NEXT: or a0, a0, a1
; RV64M: # %bb.0:
; RV64M-NEXT: lui a1, 28087
; RV64M-NEXT: addiw a1, a1, -585
-; RV64M-NEXT: mul a0, a0, a1
-; RV64M-NEXT: slli a1, a0, 26
+; RV64M-NEXT: mulw a0, a0, a1
+; RV64M-NEXT: slliw a1, a0, 26
; RV64M-NEXT: slli a0, a0, 37
; RV64M-NEXT: srli a0, a0, 38
; RV64M-NEXT: or a0, a0, a1
; RV64MV: # %bb.0:
; RV64MV-NEXT: lui a1, 28087
; RV64MV-NEXT: addiw a1, a1, -585
-; RV64MV-NEXT: mul a0, a0, a1
-; RV64MV-NEXT: slli a1, a0, 26
+; RV64MV-NEXT: mulw a0, a0, a1
+; RV64MV-NEXT: slliw a1, a0, 26
; RV64MV-NEXT: slli a0, a0, 37
; RV64MV-NEXT: srli a0, a0, 38
; RV64MV-NEXT: or a0, a0, a1
; RV64-NEXT: andi a0, a0, 2047
; RV64-NEXT: li a1, 683
; RV64-NEXT: call __muldi3@plt
-; RV64-NEXT: slli a1, a0, 10
+; RV64-NEXT: slliw a1, a0, 10
; RV64-NEXT: slli a0, a0, 53
; RV64-NEXT: srli a0, a0, 54
; RV64-NEXT: or a0, a0, a1
; RV64M-NEXT: srli a3, a1, 11
; RV64M-NEXT: andi a1, a1, 2047
; RV64M-NEXT: li a4, 683
-; RV64M-NEXT: mul a1, a1, a4
-; RV64M-NEXT: slli a4, a1, 10
+; RV64M-NEXT: mulw a1, a1, a4
+; RV64M-NEXT: slliw a4, a1, 10
; RV64M-NEXT: slli a1, a1, 53
; RV64M-NEXT: srli a1, a1, 54
; RV64M-NEXT: or a1, a1, a4
; RV64-NEXT: sgtz a6, a6
; RV64-NEXT: sgtz a5, a5
; RV64-NEXT: sgtz a4, a4
-; RV64-NEXT: neg a4, a4
+; RV64-NEXT: negw a4, a4
; RV64-NEXT: and a3, a4, a3
-; RV64-NEXT: slli a3, a3, 8
-; RV64-NEXT: neg a4, a5
+; RV64-NEXT: slliw a3, a3, 8
+; RV64-NEXT: negw a4, a5
; RV64-NEXT: and a0, a4, a0
; RV64-NEXT: andi a0, a0, 255
; RV64-NEXT: or a0, a0, a3
-; RV64-NEXT: neg a3, a6
+; RV64-NEXT: negw a3, a6
; RV64-NEXT: and a2, a3, a2
; RV64-NEXT: sb a2, 2(a1)
; RV64-NEXT: sh a0, 0(a1)