return !isInt<12>(N->getSExtValue()) && isMask_64(N->getZExtValue());
}], XLenSubTrailingOnes>;
+// Similar to LeadingOnesMask, but only consider leading ones in the lower 32
+// bits.
+def LeadingOnesWMask : PatLeaf<(imm), [{
+ if (!N->hasOneUse())
+ return false;
+ // If the value is a uint32 but not an int32, it must have bit 31 set and
+ // bits 63:32 cleared. After that we're looking for a shifted mask but not
+ // an all ones mask.
+ int64_t Imm = N->getSExtValue();
+ return !isInt<32>(Imm) && isUInt<32>(Imm) && isShiftedMask_64(Imm) &&
+ Imm != UINT64_C(0xffffffff);
+}], TrailingZeros>;
+
//===----------------------------------------------------------------------===//
// Instruction Formats
//===----------------------------------------------------------------------===//
let Predicates = [IsRV64] in {
+def : Pat<(i64 (and GPR:$rs, LeadingOnesWMask:$mask)),
+ (SLLI (SRLIW $rs, LeadingOnesWMask:$mask), LeadingOnesWMask:$mask)>;
+
/// sext and zext
// Sign extend is not needed if all users are W instructions.
%a = and i64 %x, -2147483648
ret i64 %a
}
+
+define i64 @and64_0x00000000fffffff8(i64 %x) {
+; RV32I-LABEL: and64_0x00000000fffffff8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a0, a0, -8
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: and64_0x00000000fffffff8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srliw a0, a0, 3
+; RV64I-NEXT: slli a0, a0, 3
+; RV64I-NEXT: ret
+ %a = and i64 %x, 4294967288
+ ret i64 %a
+}
;
; RV64I-LABEL: test_bswap_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: lui a2, 4080
-; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: srli a2, a0, 8
-; RV64I-NEXT: li a3, 255
-; RV64I-NEXT: slli a4, a3, 24
-; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: slli a1, a0, 24
+; RV64I-NEXT: li a2, 255
+; RV64I-NEXT: slli a3, a2, 40
+; RV64I-NEXT: and a1, a1, a3
+; RV64I-NEXT: srliw a3, a0, 24
+; RV64I-NEXT: slli a3, a3, 32
+; RV64I-NEXT: or a1, a1, a3
+; RV64I-NEXT: slli a3, a0, 40
+; RV64I-NEXT: slli a2, a2, 48
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: slli a3, a0, 56
+; RV64I-NEXT: or a2, a3, a2
; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: srli a2, a0, 40
-; RV64I-NEXT: lui a4, 16
-; RV64I-NEXT: addiw a4, a4, -256
-; RV64I-NEXT: and a2, a2, a4
-; RV64I-NEXT: srli a4, a0, 56
-; RV64I-NEXT: or a2, a2, a4
-; RV64I-NEXT: or a1, a1, a2
-; RV64I-NEXT: slli a2, a0, 24
-; RV64I-NEXT: slli a4, a3, 40
-; RV64I-NEXT: and a2, a2, a4
-; RV64I-NEXT: srliw a4, a0, 24
-; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a2, a2, a4
-; RV64I-NEXT: slli a4, a0, 40
-; RV64I-NEXT: slli a3, a3, 48
-; RV64I-NEXT: and a3, a4, a3
-; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: lui a3, 16
+; RV64I-NEXT: addiw a3, a3, -256
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: srli a3, a0, 56
+; RV64I-NEXT: or a2, a2, a3
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: lui a4, 4080
+; RV64I-NEXT: and a3, a3, a4
+; RV64I-NEXT: srli a0, a0, 8
+; RV64I-NEXT: srliw a0, a0, 24
+; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: or a0, a0, a2
-; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
; RV32ZB-LABEL: test_bswap_i64:
;
; RV64I-LABEL: test_bitreverse_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: lui a2, 4080
-; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: srli a2, a0, 8
-; RV64I-NEXT: li a3, 255
-; RV64I-NEXT: slli a4, a3, 24
-; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: slli a1, a0, 24
+; RV64I-NEXT: li a2, 255
+; RV64I-NEXT: slli a3, a2, 40
+; RV64I-NEXT: and a1, a1, a3
+; RV64I-NEXT: srliw a3, a0, 24
+; RV64I-NEXT: slli a3, a3, 32
+; RV64I-NEXT: or a1, a1, a3
+; RV64I-NEXT: slli a3, a0, 40
+; RV64I-NEXT: slli a2, a2, 48
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: slli a3, a0, 56
+; RV64I-NEXT: or a2, a3, a2
; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: srli a2, a0, 40
-; RV64I-NEXT: lui a4, 16
-; RV64I-NEXT: addiw a4, a4, -256
-; RV64I-NEXT: and a2, a2, a4
-; RV64I-NEXT: srli a4, a0, 56
-; RV64I-NEXT: or a2, a2, a4
-; RV64I-NEXT: or a1, a1, a2
-; RV64I-NEXT: slli a2, a0, 24
-; RV64I-NEXT: slli a4, a3, 40
-; RV64I-NEXT: and a2, a2, a4
-; RV64I-NEXT: srliw a4, a0, 24
-; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a2, a2, a4
-; RV64I-NEXT: slli a4, a0, 40
-; RV64I-NEXT: slli a3, a3, 48
-; RV64I-NEXT: and a3, a4, a3
-; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: lui a3, 16
+; RV64I-NEXT: addiw a3, a3, -256
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: srli a3, a0, 56
+; RV64I-NEXT: or a2, a2, a3
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: lui a4, 4080
+; RV64I-NEXT: and a3, a3, a4
+; RV64I-NEXT: srli a0, a0, 8
+; RV64I-NEXT: srliw a0, a0, 24
+; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: lui a3, %hi(.LCPI6_0)
; RV64I-NEXT: ld a3, %lo(.LCPI6_0)(a3)
; RV64I-NEXT: or a0, a0, a2
-; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: and a1, a1, a3
; RV64I-NEXT: and a0, a0, a3
define signext i16 @srliw_1_sh1add(i16* %0, i32 signext %1) {
; RV64I-LABEL: srliw_1_sh1add:
; RV64I: # %bb.0:
-; RV64I-NEXT: li a2, 1
-; RV64I-NEXT: slli a2, a2, 32
-; RV64I-NEXT: addi a2, a2, -2
-; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: srliw a1, a1, 1
+; RV64I-NEXT: slli a1, a1, 1
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lh a0, 0(a0)
; RV64I-NEXT: ret
define signext i32 @srliw_2_sh2add(i32* %0, i32 signext %1) {
; RV64I-LABEL: srliw_2_sh2add:
; RV64I: # %bb.0:
-; RV64I-NEXT: li a2, 1
-; RV64I-NEXT: slli a2, a2, 32
-; RV64I-NEXT: addi a2, a2, -4
-; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: srliw a1, a1, 2
+; RV64I-NEXT: slli a1, a1, 2
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lw a0, 0(a0)
; RV64I-NEXT: ret
define i64 @srliw_3_sh3add(i64* %0, i32 signext %1) {
; RV64I-LABEL: srliw_3_sh3add:
; RV64I: # %bb.0:
-; RV64I-NEXT: li a2, 1
-; RV64I-NEXT: slli a2, a2, 32
-; RV64I-NEXT: addi a2, a2, -8
-; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: srliw a1, a1, 3
+; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ld a0, 0(a0)
; RV64I-NEXT: ret
define i64 @bswap_i64(i64 %a) {
; RV64I-LABEL: bswap_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: lui a2, 4080
-; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: srli a2, a0, 8
-; RV64I-NEXT: li a3, 255
-; RV64I-NEXT: slli a4, a3, 24
-; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: slli a1, a0, 24
+; RV64I-NEXT: li a2, 255
+; RV64I-NEXT: slli a3, a2, 40
+; RV64I-NEXT: and a1, a1, a3
+; RV64I-NEXT: srliw a3, a0, 24
+; RV64I-NEXT: slli a3, a3, 32
+; RV64I-NEXT: or a1, a1, a3
+; RV64I-NEXT: slli a3, a0, 40
+; RV64I-NEXT: slli a2, a2, 48
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: slli a3, a0, 56
+; RV64I-NEXT: or a2, a3, a2
; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: srli a2, a0, 40
-; RV64I-NEXT: lui a4, 16
-; RV64I-NEXT: addiw a4, a4, -256
-; RV64I-NEXT: and a2, a2, a4
-; RV64I-NEXT: srli a4, a0, 56
-; RV64I-NEXT: or a2, a2, a4
-; RV64I-NEXT: or a1, a1, a2
-; RV64I-NEXT: slli a2, a0, 24
-; RV64I-NEXT: slli a4, a3, 40
-; RV64I-NEXT: and a2, a2, a4
-; RV64I-NEXT: srliw a4, a0, 24
-; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a2, a2, a4
-; RV64I-NEXT: slli a4, a0, 40
-; RV64I-NEXT: slli a3, a3, 48
-; RV64I-NEXT: and a3, a4, a3
-; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: lui a3, 16
+; RV64I-NEXT: addiw a3, a3, -256
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: srli a3, a0, 56
+; RV64I-NEXT: or a2, a2, a3
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: lui a4, 4080
+; RV64I-NEXT: and a3, a3, a4
+; RV64I-NEXT: srli a0, a0, 8
+; RV64I-NEXT: srliw a0, a0, 24
+; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: or a0, a0, a2
-; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: bswap_i64:
define i64 @bswap_i64(i64 %a) {
; RV64I-LABEL: bswap_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: lui a2, 4080
-; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: srli a2, a0, 8
-; RV64I-NEXT: li a3, 255
-; RV64I-NEXT: slli a4, a3, 24
-; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: slli a1, a0, 24
+; RV64I-NEXT: li a2, 255
+; RV64I-NEXT: slli a3, a2, 40
+; RV64I-NEXT: and a1, a1, a3
+; RV64I-NEXT: srliw a3, a0, 24
+; RV64I-NEXT: slli a3, a3, 32
+; RV64I-NEXT: or a1, a1, a3
+; RV64I-NEXT: slli a3, a0, 40
+; RV64I-NEXT: slli a2, a2, 48
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: slli a3, a0, 56
+; RV64I-NEXT: or a2, a3, a2
; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: srli a2, a0, 40
-; RV64I-NEXT: lui a4, 16
-; RV64I-NEXT: addiw a4, a4, -256
-; RV64I-NEXT: and a2, a2, a4
-; RV64I-NEXT: srli a4, a0, 56
-; RV64I-NEXT: or a2, a2, a4
-; RV64I-NEXT: or a1, a1, a2
-; RV64I-NEXT: slli a2, a0, 24
-; RV64I-NEXT: slli a4, a3, 40
-; RV64I-NEXT: and a2, a2, a4
-; RV64I-NEXT: srliw a4, a0, 24
-; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a2, a2, a4
-; RV64I-NEXT: slli a4, a0, 40
-; RV64I-NEXT: slli a3, a3, 48
-; RV64I-NEXT: and a3, a4, a3
-; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: lui a3, 16
+; RV64I-NEXT: addiw a3, a3, -256
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: srli a3, a0, 56
+; RV64I-NEXT: or a2, a2, a3
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: lui a4, 4080
+; RV64I-NEXT: and a3, a3, a4
+; RV64I-NEXT: srli a0, a0, 8
+; RV64I-NEXT: srliw a0, a0, 24
+; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: or a0, a0, a2
-; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBP-LABEL: bswap_i64:
define i64 @bitreverse_i64(i64 %a) nounwind {
; RV64I-LABEL: bitreverse_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: lui a2, 4080
-; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: srli a2, a0, 8
-; RV64I-NEXT: li a3, 255
-; RV64I-NEXT: slli a4, a3, 24
-; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: slli a1, a0, 24
+; RV64I-NEXT: li a2, 255
+; RV64I-NEXT: slli a3, a2, 40
+; RV64I-NEXT: and a1, a1, a3
+; RV64I-NEXT: srliw a3, a0, 24
+; RV64I-NEXT: slli a3, a3, 32
+; RV64I-NEXT: or a1, a1, a3
+; RV64I-NEXT: slli a3, a0, 40
+; RV64I-NEXT: slli a2, a2, 48
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: slli a3, a0, 56
+; RV64I-NEXT: or a2, a3, a2
; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: srli a2, a0, 40
-; RV64I-NEXT: lui a4, 16
-; RV64I-NEXT: addiw a4, a4, -256
-; RV64I-NEXT: and a2, a2, a4
-; RV64I-NEXT: srli a4, a0, 56
-; RV64I-NEXT: or a2, a2, a4
-; RV64I-NEXT: or a1, a1, a2
-; RV64I-NEXT: slli a2, a0, 24
-; RV64I-NEXT: slli a4, a3, 40
-; RV64I-NEXT: and a2, a2, a4
-; RV64I-NEXT: srliw a4, a0, 24
-; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a2, a2, a4
-; RV64I-NEXT: slli a4, a0, 40
-; RV64I-NEXT: slli a3, a3, 48
-; RV64I-NEXT: and a3, a4, a3
-; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: lui a3, 16
+; RV64I-NEXT: addiw a3, a3, -256
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: srli a3, a0, 56
+; RV64I-NEXT: or a2, a2, a3
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: lui a4, 4080
+; RV64I-NEXT: and a3, a3, a4
+; RV64I-NEXT: srli a0, a0, 8
+; RV64I-NEXT: srliw a0, a0, 24
+; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: lui a3, %hi(.LCPI73_0)
; RV64I-NEXT: ld a3, %lo(.LCPI73_0)(a3)
; RV64I-NEXT: or a0, a0, a2
-; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: and a1, a1, a3
; RV64I-NEXT: and a0, a0, a3
; RV64I-NEXT: slli a2, a2, 32
; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: srli a0, a0, 16
-; RV64I-NEXT: lui a2, 65535
-; RV64I-NEXT: slli a2, a2, 4
-; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: srliw a0, a0, 16
+; RV64I-NEXT: slli a0, a0, 16
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
; LP64-LP64F-LP64D-FPELIM-NEXT: srli a1, a1, 32
; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, a1, 8
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: li a1, 1
-; LP64-LP64F-LP64D-FPELIM-NEXT: slli a1, a1, 32
-; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, a1, -8
-; LP64-LP64F-LP64D-FPELIM-NEXT: and a0, a0, a1
+; LP64-LP64F-LP64D-FPELIM-NEXT: srliw a0, a0, 3
+; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 3
; LP64-LP64F-LP64D-FPELIM-NEXT: ld a0, 0(a0)
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80
; LP64-LP64F-LP64D-FPELIM-NEXT: ret
; LP64-LP64F-LP64D-WITHFP-NEXT: srli a1, a1, 32
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, a1, 8
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -24(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: li a1, 1
-; LP64-LP64F-LP64D-WITHFP-NEXT: slli a1, a1, 32
-; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, a1, -8
-; LP64-LP64F-LP64D-WITHFP-NEXT: and a0, a0, a1
+; LP64-LP64F-LP64D-WITHFP-NEXT: srliw a0, a0, 3
+; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 3
; LP64-LP64F-LP64D-WITHFP-NEXT: ld a0, 0(a0)
; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-FPELIM-NEXT: srli a2, a2, 32
; LP64-LP64F-LP64D-FPELIM-NEXT: addi a2, a2, 8
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 8(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: li a2, 1
-; LP64-LP64F-LP64D-FPELIM-NEXT: slli a2, a2, 32
-; LP64-LP64F-LP64D-FPELIM-NEXT: addi a2, a2, -8
-; LP64-LP64F-LP64D-FPELIM-NEXT: and a0, a0, a2
+; LP64-LP64F-LP64D-FPELIM-NEXT: srliw a0, a0, 3
+; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 3
; LP64-LP64F-LP64D-FPELIM-NEXT: ld a0, 0(a0)
; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, a1, a0
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 64
; LP64-LP64F-LP64D-WITHFP-NEXT: srli a2, a2, 32
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a2, a2, 8
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, -24(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: li a2, 1
-; LP64-LP64F-LP64D-WITHFP-NEXT: slli a2, a2, 32
-; LP64-LP64F-LP64D-WITHFP-NEXT: addi a2, a2, -8
-; LP64-LP64F-LP64D-WITHFP-NEXT: and a0, a0, a2
+; LP64-LP64F-LP64D-WITHFP-NEXT: srliw a0, a0, 3
+; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 3
; LP64-LP64F-LP64D-WITHFP-NEXT: ld a0, 0(a0)
; LP64-LP64F-LP64D-WITHFP-NEXT: add a0, a1, a0
; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload