ret i32 %2
}
+define i32 @bitreverse_bswap_i32(i32 %a) {
+; RV32I-LABEL: bitreverse_bswap_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: lui a2, 16
+; RV32I-NEXT: addi a2, a2, -256
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: slli a3, a0, 8
+; RV32I-NEXT: lui a4, 4080
+; RV32I-NEXT: and a3, a3, a4
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: lui a1, 61681
+; RV32I-NEXT: addi a1, a1, -241
+; RV32I-NEXT: and a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 4
+; RV32I-NEXT: lui a3, 986895
+; RV32I-NEXT: addi a3, a3, 240
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: srli a0, a0, 4
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: lui a1, 209715
+; RV32I-NEXT: addi a1, a1, 819
+; RV32I-NEXT: and a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 2
+; RV32I-NEXT: lui a3, 838861
+; RV32I-NEXT: addi a3, a3, -820
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: lui a1, 349525
+; RV32I-NEXT: addi a1, a1, 1365
+; RV32I-NEXT: and a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 1
+; RV32I-NEXT: lui a3, 699051
+; RV32I-NEXT: addi a3, a3, -1366
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: srli a0, a0, 1
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: srli a2, a0, 24
+; RV32I-NEXT: or a1, a1, a2
+; RV32I-NEXT: slli a2, a0, 8
+; RV32I-NEXT: and a2, a2, a4
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: bitreverse_bswap_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: rev.b a0, a0
+; RV32IB-NEXT: ret
+;
+; RV32IBP-LABEL: bitreverse_bswap_i32:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: rev.b a0, a0
+; RV32IBP-NEXT: ret
+ %1 = call i32 @llvm.bitreverse.i32(i32 %a)
+ %2 = call i32 @llvm.bswap.i32(i32 %1)
+ ret i32 %2
+}
+
+define i64 @bitreverse_bswap_i64(i64 %a) {
+; RV32I-LABEL: bitreverse_bswap_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: srli a3, a1, 8
+; RV32I-NEXT: lui a2, 16
+; RV32I-NEXT: addi t1, a2, -256
+; RV32I-NEXT: and a3, a3, t1
+; RV32I-NEXT: srli a4, a1, 24
+; RV32I-NEXT: or a4, a3, a4
+; RV32I-NEXT: slli a5, a1, 8
+; RV32I-NEXT: lui a6, 4080
+; RV32I-NEXT: and a5, a5, a6
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: or a1, a1, a5
+; RV32I-NEXT: or a1, a1, a4
+; RV32I-NEXT: lui a4, 61681
+; RV32I-NEXT: addi a7, a4, -241
+; RV32I-NEXT: and a5, a1, a7
+; RV32I-NEXT: slli a5, a5, 4
+; RV32I-NEXT: lui a3, 986895
+; RV32I-NEXT: addi t0, a3, 240
+; RV32I-NEXT: and a1, a1, t0
+; RV32I-NEXT: srli a1, a1, 4
+; RV32I-NEXT: or a1, a1, a5
+; RV32I-NEXT: lui a5, 209715
+; RV32I-NEXT: addi t2, a5, 819
+; RV32I-NEXT: and a4, a1, t2
+; RV32I-NEXT: slli a4, a4, 2
+; RV32I-NEXT: lui a3, 838861
+; RV32I-NEXT: addi t3, a3, -820
+; RV32I-NEXT: and a1, a1, t3
+; RV32I-NEXT: srli a1, a1, 2
+; RV32I-NEXT: or a1, a1, a4
+; RV32I-NEXT: lui a4, 349525
+; RV32I-NEXT: addi a4, a4, 1365
+; RV32I-NEXT: and a2, a1, a4
+; RV32I-NEXT: slli a2, a2, 1
+; RV32I-NEXT: lui a5, 699051
+; RV32I-NEXT: addi a5, a5, -1366
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: srli a1, a1, 1
+; RV32I-NEXT: or a1, a1, a2
+; RV32I-NEXT: srli a2, a0, 8
+; RV32I-NEXT: and a2, a2, t1
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: or a2, a2, a3
+; RV32I-NEXT: slli a3, a0, 8
+; RV32I-NEXT: and a3, a3, a6
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: and a2, a0, a7
+; RV32I-NEXT: slli a2, a2, 4
+; RV32I-NEXT: and a0, a0, t0
+; RV32I-NEXT: srli a0, a0, 4
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: and a2, a0, t2
+; RV32I-NEXT: slli a2, a2, 2
+; RV32I-NEXT: and a0, a0, t3
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: and a2, a0, a4
+; RV32I-NEXT: slli a2, a2, 1
+; RV32I-NEXT: and a0, a0, a5
+; RV32I-NEXT: srli a0, a0, 1
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: srli a2, a0, 8
+; RV32I-NEXT: and a2, a2, t1
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: or a2, a2, a3
+; RV32I-NEXT: slli a3, a0, 8
+; RV32I-NEXT: and a3, a3, a6
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: srli a2, a1, 8
+; RV32I-NEXT: and a2, a2, t1
+; RV32I-NEXT: srli a3, a1, 24
+; RV32I-NEXT: or a2, a2, a3
+; RV32I-NEXT: slli a3, a1, 8
+; RV32I-NEXT: and a3, a3, a6
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: or a1, a1, a2
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: bitreverse_bswap_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: rev.b a0, a0
+; RV32IB-NEXT: rev.b a1, a1
+; RV32IB-NEXT: ret
+;
+; RV32IBP-LABEL: bitreverse_bswap_i64:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: rev.b a0, a0
+; RV32IBP-NEXT: rev.b a1, a1
+; RV32IBP-NEXT: ret
+ %1 = call i64 @llvm.bitreverse.i64(i64 %a)
+ %2 = call i64 @llvm.bswap.i64(i64 %1)
+ ret i64 %2
+}
+
define i32 @shfl1_i32(i32 %a, i32 %b) nounwind {
; RV32I-LABEL: shfl1_i32:
; RV32I: # %bb.0:
ret i32 %2
}
+define i32 @bitreverse_bswap_i32(i32 %a) {
+; RV64I-LABEL: bitreverse_bswap_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: lui a2, 4080
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: srli a2, a0, 8
+; RV64I-NEXT: addi a3, zero, 255
+; RV64I-NEXT: slli a4, a3, 24
+; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: or a1, a2, a1
+; RV64I-NEXT: srli a2, a0, 40
+; RV64I-NEXT: lui a4, 16
+; RV64I-NEXT: addiw a4, a4, -256
+; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: srli a4, a0, 56
+; RV64I-NEXT: or a2, a2, a4
+; RV64I-NEXT: or a4, a1, a2
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: slli a2, a3, 32
+; RV64I-NEXT: and a2, a1, a2
+; RV64I-NEXT: slli a5, a0, 24
+; RV64I-NEXT: slli a6, a3, 40
+; RV64I-NEXT: and a5, a5, a6
+; RV64I-NEXT: or a5, a5, a2
+; RV64I-NEXT: slli a1, a0, 40
+; RV64I-NEXT: slli a2, a3, 48
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: or a0, a0, a5
+; RV64I-NEXT: or a0, a0, a4
+; RV64I-NEXT: lui a1, 3855
+; RV64I-NEXT: addiw a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: and a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 4
+; RV64I-NEXT: lui a3, 1044721
+; RV64I-NEXT: addiw a3, a3, -241
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, 241
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, -241
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, 240
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: srli a0, a0, 4
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: lui a1, 13107
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: and a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 2
+; RV64I-NEXT: lui a3, 1035469
+; RV64I-NEXT: addiw a3, a3, -819
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, -819
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, -819
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, -820
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: lui a1, 21845
+; RV64I-NEXT: addiw a1, a1, 1365
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 1365
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 1365
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 1365
+; RV64I-NEXT: and a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 1
+; RV64I-NEXT: lui a3, 1026731
+; RV64I-NEXT: addiw a3, a3, -1365
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, -1365
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, -1365
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, -1366
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: srli a0, a0, 1
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 24
+; RV64I-NEXT: addi a3, zero, -1
+; RV64I-NEXT: slli a3, a3, 56
+; RV64I-NEXT: and a1, a1, a3
+; RV64I-NEXT: srli a3, a0, 8
+; RV64I-NEXT: and a3, a3, a6
+; RV64I-NEXT: srli a4, a0, 24
+; RV64I-NEXT: or a3, a3, a4
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: bitreverse_bswap_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: greviw a0, a0, 7
+; RV64IB-NEXT: ret
+;
+; RV64IBP-LABEL: bitreverse_bswap_i32:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: greviw a0, a0, 7
+; RV64IBP-NEXT: ret
+ %1 = call i32 @llvm.bitreverse.i32(i32 %a)
+ %2 = call i32 @llvm.bswap.i32(i32 %1)
+ ret i32 %2
+}
+
+define i64 @bitreverse_bswap_i64(i64 %a) {
+; RV64I-LABEL: bitreverse_bswap_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: lui a6, 4080
+; RV64I-NEXT: and a1, a1, a6
+; RV64I-NEXT: srli a3, a0, 8
+; RV64I-NEXT: addi a5, zero, 255
+; RV64I-NEXT: slli a7, a5, 24
+; RV64I-NEXT: and a3, a3, a7
+; RV64I-NEXT: or a3, a3, a1
+; RV64I-NEXT: srli a4, a0, 40
+; RV64I-NEXT: lui a1, 16
+; RV64I-NEXT: addiw a1, a1, -256
+; RV64I-NEXT: and a4, a4, a1
+; RV64I-NEXT: srli a2, a0, 56
+; RV64I-NEXT: or a2, a4, a2
+; RV64I-NEXT: or a2, a3, a2
+; RV64I-NEXT: slli a4, a0, 8
+; RV64I-NEXT: slli t0, a5, 32
+; RV64I-NEXT: and a3, a4, t0
+; RV64I-NEXT: slli a4, a0, 24
+; RV64I-NEXT: slli t1, a5, 40
+; RV64I-NEXT: and a4, a4, t1
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: slli a4, a0, 40
+; RV64I-NEXT: slli a5, a5, 48
+; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: or a0, a0, a4
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: or a0, a0, a2
+; RV64I-NEXT: lui a2, 3855
+; RV64I-NEXT: addiw a2, a2, 241
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, -241
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 241
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, -241
+; RV64I-NEXT: and a2, a0, a2
+; RV64I-NEXT: slli a2, a2, 4
+; RV64I-NEXT: lui a3, 1044721
+; RV64I-NEXT: addiw a3, a3, -241
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, 241
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, -241
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, 240
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: srli a0, a0, 4
+; RV64I-NEXT: or a0, a0, a2
+; RV64I-NEXT: lui a2, 13107
+; RV64I-NEXT: addiw a2, a2, 819
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 819
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 819
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 819
+; RV64I-NEXT: and a2, a0, a2
+; RV64I-NEXT: slli a2, a2, 2
+; RV64I-NEXT: lui a3, 1035469
+; RV64I-NEXT: addiw a3, a3, -819
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, -819
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, -819
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, -820
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: or a0, a0, a2
+; RV64I-NEXT: lui a2, 21845
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: and a2, a0, a2
+; RV64I-NEXT: slli a2, a2, 1
+; RV64I-NEXT: lui a3, 1026731
+; RV64I-NEXT: addiw a3, a3, -1365
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, -1365
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, -1365
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: addi a3, a3, -1366
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: srli a0, a0, 1
+; RV64I-NEXT: or a0, a0, a2
+; RV64I-NEXT: srli a2, a0, 40
+; RV64I-NEXT: and a1, a2, a1
+; RV64I-NEXT: srli a2, a0, 56
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srli a2, a0, 24
+; RV64I-NEXT: and a2, a2, a6
+; RV64I-NEXT: srli a3, a0, 8
+; RV64I-NEXT: and a3, a3, a7
+; RV64I-NEXT: or a2, a3, a2
+; RV64I-NEXT: or a1, a2, a1
+; RV64I-NEXT: slli a2, a0, 8
+; RV64I-NEXT: and a2, a2, t0
+; RV64I-NEXT: slli a3, a0, 24
+; RV64I-NEXT: and a3, a3, t1
+; RV64I-NEXT: or a2, a3, a2
+; RV64I-NEXT: slli a3, a0, 40
+; RV64I-NEXT: and a3, a3, a5
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: or a0, a0, a2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: bitreverse_bswap_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: rev.b a0, a0
+; RV64IB-NEXT: ret
+;
+; RV64IBP-LABEL: bitreverse_bswap_i64:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: rev.b a0, a0
+; RV64IBP-NEXT: ret
+ %1 = call i64 @llvm.bitreverse.i64(i64 %a)
+ %2 = call i64 @llvm.bswap.i64(i64 %1)
+ ret i64 %2
+}
+
; There's no [un]shfliw instruction as slliu.w occupies the encoding slot that
; would be occupied by shfliw.