setOperationAction(ISD::SMAX, XLenVT, Legal);
setOperationAction(ISD::UMIN, XLenVT, Legal);
setOperationAction(ISD::UMAX, XLenVT, Legal);
+
+ if (Subtarget.is64Bit()) {
+ setOperationAction(ISD::CTTZ, MVT::i32, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
+ setOperationAction(ISD::CTLZ, MVT::i32, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
+ }
} else {
setOperationAction(ISD::CTTZ, XLenVT, Expand);
setOperationAction(ISD::CTLZ, XLenVT, Expand);
"Unexpected custom legalisation");
Results.push_back(customLegalizeToWOp(N, DAG));
break;
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF: {
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ "Unexpected custom legalisation");
+
+ SDValue NewOp0 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+ bool IsCTZ =
+ N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
+ unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
+ SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+ return;
+ }
case ISD::SDIV:
case ISD::UDIV:
case ISD::UREM: {
}
break;
}
+ case RISCVISD::CLZW:
+ case RISCVISD::CTZW: {
+ // Only the lower 32 bits of the first operand are read
+ SDValue Op0 = N->getOperand(0);
+ APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
+ if (SimplifyDemandedBits(Op0, Mask, DCI)) {
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+ break;
+ }
case RISCVISD::FSL:
case RISCVISD::FSR: {
// Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
Known = Known.sext(BitWidth);
break;
}
+ case RISCVISD::CTZW: {
+ KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
+ unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
+ unsigned LowBits = Log2_32(PossibleTZ) + 1;
+ Known.Zero.setBitsFrom(LowBits);
+ break;
+ }
+ case RISCVISD::CLZW: {
+ KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
+ unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
+ unsigned LowBits = Log2_32(PossibleLZ) + 1;
+ Known.Zero.setBitsFrom(LowBits);
+ break;
+ }
case RISCVISD::READ_VLENB:
// We assume VLENB is at least 8 bytes.
// FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits.
NODE_NAME_CASE(REMUW)
NODE_NAME_CASE(ROLW)
NODE_NAME_CASE(RORW)
+ NODE_NAME_CASE(CLZW)
+ NODE_NAME_CASE(CTZW)
NODE_NAME_CASE(FSLW)
NODE_NAME_CASE(FSRW)
NODE_NAME_CASE(FSL)
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
+def riscv_clzw : SDNode<"RISCVISD::CLZW", SDTIntUnaryOp>;
+def riscv_ctzw : SDNode<"RISCVISD::CTZW", SDTIntUnaryOp>;
def riscv_rolw : SDNode<"RISCVISD::ROLW", SDTIntShiftOp>;
def riscv_rorw : SDNode<"RISCVISD::RORW", SDTIntShiftOp>;
def riscv_fslw : SDNode<"RISCVISD::FSLW", SDTIntShiftDOp>;
} // Predicates = [HasStdExtZbt, IsRV64]
let Predicates = [HasStdExtZbb, IsRV64] in {
-def : Pat<(i64 (add (ctlz (and GPR:$rs1, 0xFFFFFFFF)), -32)),
- (CLZW GPR:$rs1)>;
-// computeKnownBits can't figure out that the and mask on the add result is
-// unnecessary so we need to pattern match it away.
-def : Pat<(i64 (and (add (ctlz (and GPR:$rs1, 0xFFFFFFFF)), -32), 0xFFFFFFFF)),
- (CLZW GPR:$rs1)>;
-def : Pat<(i64 (cttz (or GPR:$rs1, 0x100000000))),
- (CTZW GPR:$rs1)>;
+def : Pat<(i64 (riscv_clzw GPR:$rs1)), (CLZW GPR:$rs1)>;
+def : Pat<(i64 (riscv_ctzw GPR:$rs1)), (CTZW GPR:$rs1)>;
def : Pat<(i64 (ctpop (and GPR:$rs1, 0xFFFFFFFF))), (CPOPW GPR:$rs1)>;
} // Predicates = [HasStdExtZbb, IsRV64]
;
; RV64IB-LABEL: log2_i32:
; RV64IB: # %bb.0:
-; RV64IB-NEXT: zext.w a0, a0
-; RV64IB-NEXT: clz a0, a0
-; RV64IB-NEXT: addi a1, zero, 63
+; RV64IB-NEXT: clzw a0, a0
+; RV64IB-NEXT: addi a1, zero, 31
; RV64IB-NEXT: sub a0, a1, a0
; RV64IB-NEXT: ret
;
; RV64IBB-LABEL: log2_i32:
; RV64IBB: # %bb.0:
-; RV64IBB-NEXT: slli a0, a0, 32
-; RV64IBB-NEXT: srli a0, a0, 32
-; RV64IBB-NEXT: clz a0, a0
-; RV64IBB-NEXT: addi a1, zero, 63
+; RV64IBB-NEXT: clzw a0, a0
+; RV64IBB-NEXT: addi a1, zero, 31
; RV64IBB-NEXT: sub a0, a1, a0
; RV64IBB-NEXT: ret
%1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
; RV64IB-LABEL: log2_ceil_i32:
; RV64IB: # %bb.0:
; RV64IB-NEXT: addi a0, a0, -1
-; RV64IB-NEXT: zext.w a0, a0
-; RV64IB-NEXT: clz a0, a0
-; RV64IB-NEXT: addi a1, zero, 64
+; RV64IB-NEXT: clzw a0, a0
+; RV64IB-NEXT: addi a1, zero, 32
; RV64IB-NEXT: sub a0, a1, a0
; RV64IB-NEXT: ret
;
; RV64IBB-LABEL: log2_ceil_i32:
; RV64IBB: # %bb.0:
; RV64IBB-NEXT: addi a0, a0, -1
-; RV64IBB-NEXT: slli a0, a0, 32
-; RV64IBB-NEXT: srli a0, a0, 32
-; RV64IBB-NEXT: clz a0, a0
-; RV64IBB-NEXT: addi a1, zero, 64
+; RV64IBB-NEXT: clzw a0, a0
+; RV64IBB-NEXT: addi a1, zero, 32
; RV64IBB-NEXT: sub a0, a1, a0
; RV64IBB-NEXT: ret
%1 = sub i32 %a, 1
; RV64IB-LABEL: ctlz_lshr_i32:
; RV64IB: # %bb.0:
; RV64IB-NEXT: srliw a0, a0, 1
-; RV64IB-NEXT: clz a0, a0
-; RV64IB-NEXT: addi a0, a0, -32
+; RV64IB-NEXT: clzw a0, a0
; RV64IB-NEXT: ret
;
; RV64IBB-LABEL: ctlz_lshr_i32:
; RV64IBB: # %bb.0:
; RV64IBB-NEXT: srliw a0, a0, 1
-; RV64IBB-NEXT: clz a0, a0
-; RV64IBB-NEXT: addi a0, a0, -32
+; RV64IBB-NEXT: clzw a0, a0
; RV64IBB-NEXT: ret
%1 = lshr i32 %a, 1
%2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
;
; RV64IB-LABEL: cttz_zero_undef_i32:
; RV64IB: # %bb.0:
-; RV64IB-NEXT: ctz a0, a0
+; RV64IB-NEXT: ctzw a0, a0
; RV64IB-NEXT: ret
;
; RV64IBB-LABEL: cttz_zero_undef_i32:
; RV64IBB: # %bb.0:
-; RV64IBB-NEXT: ctz a0, a0
+; RV64IBB-NEXT: ctzw a0, a0
; RV64IBB-NEXT: ret
%1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
ret i32 %1
;
; RV64IB-LABEL: findFirstSet_i32:
; RV64IB: # %bb.0:
-; RV64IB-NEXT: ctz a1, a0
+; RV64IB-NEXT: ctzw a1, a0
; RV64IB-NEXT: addi a2, zero, -1
; RV64IB-NEXT: cmov a0, a0, a1, a2
; RV64IB-NEXT: ret
; RV64IBB-NEXT: addi a0, zero, -1
; RV64IBB-NEXT: beqz a1, .LBB8_2
; RV64IBB-NEXT: # %bb.1:
-; RV64IBB-NEXT: ctz a0, a1
+; RV64IBB-NEXT: ctzw a0, a1
; RV64IBB-NEXT: .LBB8_2:
; RV64IBB-NEXT: ret
%1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
;
; RV64IB-LABEL: ffs_i32:
; RV64IB: # %bb.0:
-; RV64IB-NEXT: ctz a1, a0
+; RV64IB-NEXT: ctzw a1, a0
; RV64IB-NEXT: addi a1, a1, 1
; RV64IB-NEXT: cmov a0, a0, a1, zero
; RV64IB-NEXT: ret
; RV64IBB-NEXT: mv a0, zero
; RV64IBB-NEXT: beqz a1, .LBB9_2
; RV64IBB-NEXT: # %bb.1:
-; RV64IBB-NEXT: ctz a0, a1
+; RV64IBB-NEXT: ctzw a0, a1
; RV64IBB-NEXT: addi a0, a0, 1
; RV64IBB-NEXT: .LBB9_2:
; RV64IBB-NEXT: ret