// Add any two address hints after any copy hints.
SmallSet<unsigned, 4> TwoAddrHints;
- auto tryAddHint = [&](const MachineOperand &VRRegMO,
- const MachineOperand &MO) -> void {
+ auto tryAddHint = [&](const MachineOperand &VRRegMO, const MachineOperand &MO,
+ bool NeedGPRC) -> void {
Register Reg = MO.getReg();
Register PhysReg =
Register::isPhysicalRegister(Reg) ? Reg : Register(VRM->getPhys(Reg));
- if (PhysReg) {
+ if (PhysReg && (!NeedGPRC || RISCV::GPRCRegClass.contains(PhysReg))) {
assert(!MO.getSubReg() && !VRRegMO.getSubReg() && "Unexpected subreg!");
if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
TwoAddrHints.insert(PhysReg);
// For now we support the compressible instructions which can encode all
// registers and have a single register source.
// TODO: Add more compressed instructions.
- auto isCompressible = [](const MachineInstr &MI) {
+ auto isCompressible = [](const MachineInstr &MI, bool &NeedGPRC) {
+ NeedGPRC = false;
switch (MI.getOpcode()) {
default:
return false;
+ case RISCV::SRAI:
+ case RISCV::SRLI:
+ NeedGPRC = true;
+ return true;
case RISCV::ADD:
case RISCV::SLLI:
return true;
for (auto &MO : MRI->reg_nodbg_operands(VirtReg)) {
const MachineInstr &MI = *MO.getParent();
- if (isCompressible(MI)) {
+ bool NeedGPRC;
+ if (isCompressible(MI, NeedGPRC)) {
unsigned OpIdx = MI.getOperandNo(&MO);
if (OpIdx == 0 && MI.getOperand(1).isReg()) {
- tryAddHint(MO, MI.getOperand(1));
+ tryAddHint(MO, MI.getOperand(1), NeedGPRC);
if (MI.isCommutable() && MI.getOperand(2).isReg())
- tryAddHint(MO, MI.getOperand(2));
+ tryAddHint(MO, MI.getOperand(2), NeedGPRC);
} else if (OpIdx == 1) {
- tryAddHint(MO, MI.getOperand(0));
+ tryAddHint(MO, MI.getOperand(0), NeedGPRC);
} else if (MI.isCommutable() && OpIdx == 2) {
- tryAddHint(MO, MI.getOperand(0));
+ tryAddHint(MO, MI.getOperand(0), NeedGPRC);
}
}
}
; RV64I-NEXT: # %bb.1: # %iffalse
; RV64I-NEXT: li a1, -2
; RV64I-NEXT: .LBB2_2: # %merge
-; RV64I-NEXT: slli a1, a1, 32
-; RV64I-NEXT: srli a0, a1, 32
+; RV64I-NEXT: slli a0, a1, 32
+; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
%a = icmp ne i32 %c, 0
br i1 %a, label %iftrue, label %iffalse
; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: j .LBB10_1
; RV32I-NEXT: .LBB10_4: # %atomicrmw.end
-; RV32I-NEXT: slli a3, a3, 24
-; RV32I-NEXT: srai a0, a3, 24
+; RV32I-NEXT: slli a0, a3, 24
+; RV32I-NEXT: srai a0, a0, 24
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV64I-NEXT: mv a2, s1
; RV64I-NEXT: j .LBB10_1
; RV64I-NEXT: .LBB10_4: # %atomicrmw.end
-; RV64I-NEXT: slli a3, a3, 56
-; RV64I-NEXT: srai a0, a3, 56
+; RV64I-NEXT: slli a0, a3, 56
+; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: j .LBB11_1
; RV32I-NEXT: .LBB11_4: # %atomicrmw.end
-; RV32I-NEXT: slli a3, a3, 24
-; RV32I-NEXT: srai a0, a3, 24
+; RV32I-NEXT: slli a0, a3, 24
+; RV32I-NEXT: srai a0, a0, 24
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV64I-NEXT: mv a2, s1
; RV64I-NEXT: j .LBB11_1
; RV64I-NEXT: .LBB11_4: # %atomicrmw.end
-; RV64I-NEXT: slli a3, a3, 56
-; RV64I-NEXT: srai a0, a3, 56
+; RV64I-NEXT: slli a0, a3, 56
+; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: j .LBB12_1
; RV32I-NEXT: .LBB12_4: # %atomicrmw.end
-; RV32I-NEXT: slli a3, a3, 24
-; RV32I-NEXT: srai a0, a3, 24
+; RV32I-NEXT: slli a0, a3, 24
+; RV32I-NEXT: srai a0, a0, 24
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV64I-NEXT: mv a2, s1
; RV64I-NEXT: j .LBB12_1
; RV64I-NEXT: .LBB12_4: # %atomicrmw.end
-; RV64I-NEXT: slli a3, a3, 56
-; RV64I-NEXT: srai a0, a3, 56
+; RV64I-NEXT: slli a0, a3, 56
+; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: j .LBB13_1
; RV32I-NEXT: .LBB13_4: # %atomicrmw.end
-; RV32I-NEXT: slli a3, a3, 24
-; RV32I-NEXT: srai a0, a3, 24
+; RV32I-NEXT: slli a0, a3, 24
+; RV32I-NEXT: srai a0, a0, 24
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV64I-NEXT: mv a2, s1
; RV64I-NEXT: j .LBB13_1
; RV64I-NEXT: .LBB13_4: # %atomicrmw.end
-; RV64I-NEXT: slli a3, a3, 56
-; RV64I-NEXT: srai a0, a3, 56
+; RV64I-NEXT: slli a0, a3, 56
+; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: j .LBB21_1
; RV32I-NEXT: .LBB21_4: # %atomicrmw.end
-; RV32I-NEXT: slli a3, a3, 16
-; RV32I-NEXT: srai a0, a3, 16
+; RV32I-NEXT: slli a0, a3, 16
+; RV32I-NEXT: srai a0, a0, 16
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV64I-NEXT: mv a2, s1
; RV64I-NEXT: j .LBB21_1
; RV64I-NEXT: .LBB21_4: # %atomicrmw.end
-; RV64I-NEXT: slli a3, a3, 48
-; RV64I-NEXT: srai a0, a3, 48
+; RV64I-NEXT: slli a0, a3, 48
+; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: j .LBB22_1
; RV32I-NEXT: .LBB22_4: # %atomicrmw.end
-; RV32I-NEXT: slli a3, a3, 16
-; RV32I-NEXT: srai a0, a3, 16
+; RV32I-NEXT: slli a0, a3, 16
+; RV32I-NEXT: srai a0, a0, 16
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV64I-NEXT: mv a2, s1
; RV64I-NEXT: j .LBB22_1
; RV64I-NEXT: .LBB22_4: # %atomicrmw.end
-; RV64I-NEXT: slli a3, a3, 48
-; RV64I-NEXT: srai a0, a3, 48
+; RV64I-NEXT: slli a0, a3, 48
+; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV32I-NEXT: mv a2, s0
; RV32I-NEXT: j .LBB23_1
; RV32I-NEXT: .LBB23_4: # %atomicrmw.end
-; RV32I-NEXT: slli a1, a1, 16
-; RV32I-NEXT: srai a0, a1, 16
+; RV32I-NEXT: slli a0, a1, 16
+; RV32I-NEXT: srai a0, a0, 16
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV64I-NEXT: mv a2, s0
; RV64I-NEXT: j .LBB23_1
; RV64I-NEXT: .LBB23_4: # %atomicrmw.end
-; RV64I-NEXT: slli a1, a1, 48
-; RV64I-NEXT: srai a0, a1, 48
+; RV64I-NEXT: slli a0, a1, 48
+; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV32I-NEXT: mv a2, s0
; RV32I-NEXT: j .LBB24_1
; RV32I-NEXT: .LBB24_4: # %atomicrmw.end
-; RV32I-NEXT: slli a1, a1, 16
-; RV32I-NEXT: srai a0, a1, 16
+; RV32I-NEXT: slli a0, a1, 16
+; RV32I-NEXT: srai a0, a0, 16
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV64I-NEXT: mv a2, s0
; RV64I-NEXT: j .LBB24_1
; RV64I-NEXT: .LBB24_4: # %atomicrmw.end
-; RV64I-NEXT: slli a1, a1, 48
-; RV64I-NEXT: srai a0, a1, 48
+; RV64I-NEXT: slli a0, a1, 48
+; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: slli a1, a1, 16
-; RV32I-NEXT: srli a0, a1, 16
+; RV32I-NEXT: slli a0, a1, 16
+; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: call __extendhfsf2@plt
; RV32I-NEXT: call __fixsfsi@plt
; RV32I-NEXT: add a0, s0, a0
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: slli a1, a1, 48
-; RV64I-NEXT: srli a0, a1, 48
+; RV64I-NEXT: slli a0, a1, 48
+; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: call __extendhfsf2@plt
; RV64I-NEXT: call __fixsfdi@plt
; RV64I-NEXT: addw a0, s0, a0
;
; RV32IFZFH-LABEL: fold_demote_h_d:
; RV32IFZFH: # %bb.0:
-; RV32IFZFH-NEXT: srli a0, a1, 16
-; RV32IFZFH-NEXT: fmv.h.x ft0, a0
+; RV32IFZFH-NEXT: srli a1, a1, 16
+; RV32IFZFH-NEXT: fmv.h.x ft0, a1
; RV32IFZFH-NEXT: fsgnj.h fa0, fa0, ft0
; RV32IFZFH-NEXT: ret
;
; RV32I-NEXT: li a0, 32
; RV32I-NEXT: beqz s0, .LBB3_4
; RV32I-NEXT: .LBB3_2:
-; RV32I-NEXT: srli a0, s1, 27
-; RV32I-NEXT: add a0, s4, a0
-; RV32I-NEXT: lbu a0, 0(a0)
+; RV32I-NEXT: srli s1, s1, 27
+; RV32I-NEXT: add s1, s4, s1
+; RV32I-NEXT: lbu a0, 0(s1)
; RV32I-NEXT: j .LBB3_5
; RV32I-NEXT: .LBB3_3:
; RV32I-NEXT: srli a0, a0, 27
; RV32I-NEXT: addi a0, a0, 32
; RV32I-NEXT: j .LBB7_3
; RV32I-NEXT: .LBB7_2:
-; RV32I-NEXT: srli a0, s0, 27
-; RV32I-NEXT: add a0, s4, a0
-; RV32I-NEXT: lbu a0, 0(a0)
+; RV32I-NEXT: srli s0, s0, 27
+; RV32I-NEXT: add s0, s4, s0
+; RV32I-NEXT: lbu a0, 0(s0)
; RV32I-NEXT: .LBB7_3:
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: srai a1, a1, 31
; RV32I-NEXT: add a1, a0, a1
; RV32I-NEXT: sltu a0, a1, a0
-; RV32I-NEXT: add a2, a2, a0
-; RV32I-NEXT: srai a0, a2, 1
-; RV32I-NEXT: srai a1, a2, 31
+; RV32I-NEXT: add a1, a2, a0
+; RV32I-NEXT: srai a0, a1, 1
+; RV32I-NEXT: srai a1, a1, 31
; RV32I-NEXT: ret
;
; RV64I-LABEL: sdiv64_pow2_8589934592:
; RV32I-NEXT: lui a1, 524288
; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: slli s0, s0, 1
-; RV32I-NEXT: srli a1, s0, 1
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: srli s0, s0, 1
+; RV32I-NEXT: or a0, s0, a0
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV64I-NEXT: lui a1, 524288
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: slli s0, s0, 33
-; RV64I-NEXT: srli a1, s0, 33
-; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: srli s0, s0, 33
+; RV64I-NEXT: or a0, s0, a0
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; CHECK-NOV-LABEL: utest_f64i32:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: fcvt.lu.d a0, fa0, rtz
-; CHECK-NOV-NEXT: li a1, -1
-; CHECK-NOV-NEXT: srli a2, a1, 32
+; CHECK-NOV-NEXT: li a2, -1
+; CHECK-NOV-NEXT: srli a2, a2, 32
; CHECK-NOV-NEXT: fcvt.lu.d a1, fa1, rtz
; CHECK-NOV-NEXT: bgeu a0, a2, .LBB1_3
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-LABEL: ustest_f64i32:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz
-; CHECK-NOV-NEXT: li a1, -1
-; CHECK-NOV-NEXT: srli a2, a1, 32
+; CHECK-NOV-NEXT: li a2, -1
+; CHECK-NOV-NEXT: srli a2, a2, 32
; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz
; CHECK-NOV-NEXT: blt a0, a2, .LBB2_2
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-LABEL: utest_f32i32:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a1, fa0, rtz
-; CHECK-NOV-NEXT: li a2, -1
-; CHECK-NOV-NEXT: srli a3, a2, 32
+; CHECK-NOV-NEXT: li a3, -1
+; CHECK-NOV-NEXT: srli a3, a3, 32
; CHECK-NOV-NEXT: fcvt.lu.s a2, fa1, rtz
; CHECK-NOV-NEXT: bgeu a1, a3, .LBB4_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-LABEL: ustest_f32i32:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fa0, rtz
-; CHECK-NOV-NEXT: li a2, -1
-; CHECK-NOV-NEXT: srli a4, a2, 32
+; CHECK-NOV-NEXT: li a4, -1
+; CHECK-NOV-NEXT: srli a4, a4, 32
; CHECK-NOV-NEXT: fcvt.l.s a2, fa1, rtz
; CHECK-NOV-NEXT: bge a1, a4, .LBB5_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: mv a0, s1
; CHECK-NOV-NEXT: call __extendhfsf2@plt
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-NOV-NEXT: li a1, -1
-; CHECK-NOV-NEXT: srli a2, a1, 32
+; CHECK-NOV-NEXT: li a2, -1
+; CHECK-NOV-NEXT: srli a2, a2, 32
; CHECK-NOV-NEXT: bge a0, a2, .LBB8_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
; CHECK-NOV-LABEL: utest_f64i32_mm:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: fcvt.lu.d a0, fa0, rtz
-; CHECK-NOV-NEXT: li a1, -1
-; CHECK-NOV-NEXT: srli a2, a1, 32
+; CHECK-NOV-NEXT: li a2, -1
+; CHECK-NOV-NEXT: srli a2, a2, 32
; CHECK-NOV-NEXT: fcvt.lu.d a1, fa1, rtz
; CHECK-NOV-NEXT: bgeu a0, a2, .LBB28_3
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-LABEL: ustest_f64i32_mm:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz
-; CHECK-NOV-NEXT: li a0, -1
-; CHECK-NOV-NEXT: srli a2, a0, 32
+; CHECK-NOV-NEXT: li a2, -1
+; CHECK-NOV-NEXT: srli a2, a2, 32
; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz
; CHECK-NOV-NEXT: blt a1, a2, .LBB29_2
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-LABEL: utest_f32i32_mm:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a1, fa0, rtz
-; CHECK-NOV-NEXT: li a2, -1
-; CHECK-NOV-NEXT: srli a3, a2, 32
+; CHECK-NOV-NEXT: li a3, -1
+; CHECK-NOV-NEXT: srli a3, a3, 32
; CHECK-NOV-NEXT: fcvt.lu.s a2, fa1, rtz
; CHECK-NOV-NEXT: bgeu a1, a3, .LBB31_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-LABEL: ustest_f32i32_mm:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fa3, rtz
-; CHECK-NOV-NEXT: li a2, -1
-; CHECK-NOV-NEXT: srli a4, a2, 32
+; CHECK-NOV-NEXT: li a4, -1
+; CHECK-NOV-NEXT: srli a4, a4, 32
; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a4, .LBB32_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: mv a0, s1
; CHECK-NOV-NEXT: call __extendhfsf2@plt
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
-; CHECK-NOV-NEXT: li a1, -1
-; CHECK-NOV-NEXT: srli a2, a1, 32
+; CHECK-NOV-NEXT: li a2, -1
+; CHECK-NOV-NEXT: srli a2, a2, 32
; CHECK-NOV-NEXT: bge a0, a2, .LBB35_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
; RV32I-NEXT: lui a1, 1048568
; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: slli s1, s1, 17
-; RV32I-NEXT: srli a1, s1, 17
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: srli s1, s1, 17
+; RV32I-NEXT: or a0, s1, a0
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV64I-NEXT: lui a1, 1048568
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: slli s1, s1, 49
-; RV64I-NEXT: srli a1, s1, 49
-; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: srli s1, s1, 49
+; RV64I-NEXT: or a0, s1, a0
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
define i64 @rol_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: rol_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: slli a3, a2, 26
-; CHECK-NEXT: srli a5, a3, 31
+; CHECK-NEXT: slli a5, a2, 26
+; CHECK-NEXT: srli a5, a5, 31
; CHECK-NEXT: mv a4, a1
; CHECK-NEXT: bnez a5, .LBB7_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: srl a1, a1, a5
; CHECK-NEXT: or a3, a3, a1
; CHECK-NEXT: sll a0, a0, a2
-; CHECK-NEXT: srli a1, a4, 1
-; CHECK-NEXT: srl a1, a1, a5
+; CHECK-NEXT: srli a4, a4, 1
+; CHECK-NEXT: srl a1, a4, a5
; CHECK-NEXT: or a1, a0, a1
; CHECK-NEXT: mv a0, a3
; CHECK-NEXT: ret
; RV32I-NEXT: li a0, 32
; RV32I-NEXT: beqz s0, .LBB3_4
; RV32I-NEXT: .LBB3_2:
-; RV32I-NEXT: srli a0, s1, 27
-; RV32I-NEXT: add a0, s4, a0
-; RV32I-NEXT: lbu a0, 0(a0)
+; RV32I-NEXT: srli s1, s1, 27
+; RV32I-NEXT: add s1, s4, s1
+; RV32I-NEXT: lbu a0, 0(s1)
; RV32I-NEXT: j .LBB3_5
; RV32I-NEXT: .LBB3_3:
; RV32I-NEXT: srli a0, a0, 27
; RV32I: # %bb.0:
; RV32I-NEXT: andi a0, a0, 255
; RV32I-NEXT: slli a2, a2, 24
-; RV32I-NEXT: srli a1, a2, 16
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: srli a2, a2, 16
+; RV32I-NEXT: or a0, a2, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: ret
;
; RV64I-NEXT: addw a0, a1, a0
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: slli a2, a2, 32
-; RV64I-NEXT: srli a1, a2, 32
-; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a2, a2, 32
+; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: pack_i64_allWUsers:
; RV64-NEXT: vle16.v v8, (a0)
; RV64-NEXT: vmv.s.x v12, a1
; RV64-NEXT: slli a2, a2, 32
-; RV64-NEXT: srli a1, a2, 32
-; RV64-NEXT: addi a2, a1, 1
-; RV64-NEXT: vsetvli zero, a2, e16, m4, tu, ma
-; RV64-NEXT: vslideup.vx v8, v12, a1
+; RV64-NEXT: srli a2, a2, 32
+; RV64-NEXT: addi a1, a2, 1
+; RV64-NEXT: vsetvli zero, a1, e16, m4, tu, ma
+; RV64-NEXT: vslideup.vx v8, v12, a2
; RV64-NEXT: vsetvli zero, a3, e16, m4, ta, ma
; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: ret
; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: slli a1, a1, 32
-; CHECK-NEXT: srli a0, a1, 32
-; CHECK-NEXT: addi a1, a0, 1
-; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, ma
-; CHECK-NEXT: vslideup.vx v8, v9, a0
+; CHECK-NEXT: srli a1, a1, 32
+; CHECK-NEXT: addi a0, a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: ret
%r = insertelement <vscale x 1 x i64> %v, i64 %elt, i32 %idx
ret <vscale x 1 x i64> %r
; CHECK-NEXT: vsetvli a2, zero, e64, m2, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: slli a1, a1, 32
-; CHECK-NEXT: srli a0, a1, 32
-; CHECK-NEXT: addi a1, a0, 1
-; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma
-; CHECK-NEXT: vslideup.vx v8, v10, a0
+; CHECK-NEXT: srli a1, a1, 32
+; CHECK-NEXT: addi a0, a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: ret
%r = insertelement <vscale x 2 x i64> %v, i64 %elt, i32 %idx
ret <vscale x 2 x i64> %r
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vmv.s.x v12, a0
; CHECK-NEXT: slli a1, a1, 32
-; CHECK-NEXT: srli a0, a1, 32
-; CHECK-NEXT: addi a1, a0, 1
-; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, ma
-; CHECK-NEXT: vslideup.vx v8, v12, a0
+; CHECK-NEXT: srli a1, a1, 32
+; CHECK-NEXT: addi a0, a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v12, a1
; CHECK-NEXT: ret
%r = insertelement <vscale x 4 x i64> %v, i64 %elt, i32 %idx
ret <vscale x 4 x i64> %r
; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv.s.x v16, a0
; CHECK-NEXT: slli a1, a1, 32
-; CHECK-NEXT: srli a0, a1, 32
-; CHECK-NEXT: addi a1, a0, 1
-; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma
-; CHECK-NEXT: vslideup.vx v8, v16, a0
+; CHECK-NEXT: srli a1, a1, 32
+; CHECK-NEXT: addi a0, a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v16, a1
; CHECK-NEXT: ret
%r = insertelement <vscale x 8 x i64> %v, i64 %elt, i32 %idx
ret <vscale x 8 x i64> %r
; CHECK-RV32-NEXT: # %bb.5:
; CHECK-RV32-NEXT: mv a0, a4
; CHECK-RV32-NEXT: .LBB36_6:
-; CHECK-RV32-NEXT: srli a3, a4, 2
-; CHECK-RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-RV32-NEXT: vslidedown.vx v0, v24, a3
+; CHECK-RV32-NEXT: srli a4, a4, 2
+; CHECK-RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-RV32-NEXT: vslidedown.vx v0, v24, a4
; CHECK-RV32-NEXT: mul a3, a5, a2
; CHECK-RV32-NEXT: add a1, a1, a3
; CHECK-RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-RV64-NEXT: # %bb.5:
; CHECK-RV64-NEXT: mv a0, a4
; CHECK-RV64-NEXT: .LBB36_6:
-; CHECK-RV64-NEXT: srli a3, a4, 2
-; CHECK-RV64-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-RV64-NEXT: vslidedown.vx v0, v24, a3
+; CHECK-RV64-NEXT: srli a4, a4, 2
+; CHECK-RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-RV64-NEXT: vslidedown.vx v0, v24, a4
; CHECK-RV64-NEXT: mul a3, a5, a2
; CHECK-RV64-NEXT: add a1, a1, a3
; CHECK-RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
define i32 @xori64i32(i64 %a) {
; RV32-LABEL: xori64i32:
; RV32: # %bb.0:
-; RV32-NEXT: srai a0, a1, 31
-; RV32-NEXT: lui a1, 524288
-; RV32-NEXT: addi a1, a1, -1
-; RV32-NEXT: xor a0, a0, a1
+; RV32-NEXT: srai a1, a1, 31
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: xor a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: xori64i32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: bnez a0, .LBB19_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: srli a0, a1, 1
-; RV32-NEXT: lui a1, 199729
-; RV32-NEXT: addi a1, a1, -975
-; RV32-NEXT: mulhu a0, a0, a1
-; RV32-NEXT: srli a1, a0, 2
+; RV32-NEXT: srli a1, a1, 1
+; RV32-NEXT: lui a0, 199729
+; RV32-NEXT: addi a0, a0, -975
+; RV32-NEXT: mulhu a1, a1, a0
+; RV32-NEXT: srli a1, a1, 2
; RV32-NEXT: .LBB19_2: # %entry
; RV32-NEXT: mv a0, a1
; RV32-NEXT: ret
; RV64-NEXT: srliw a0, a1, 1
; RV64-NEXT: lui a1, 199729
; RV64-NEXT: addiw a1, a1, -975
-; RV64-NEXT: mul a0, a0, a1
-; RV64-NEXT: srli a1, a0, 34
+; RV64-NEXT: mul a1, a0, a1
+; RV64-NEXT: srli a1, a1, 34
; RV64-NEXT: .LBB19_2: # %entry
; RV64-NEXT: mv a0, a1
; RV64-NEXT: ret
; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: lw s4, 0(s0)
; RV32-NEXT: slli a2, a2, 30
-; RV32-NEXT: srli a1, a2, 31
-; RV32-NEXT: neg a1, a1
+; RV32-NEXT: srli a2, a2, 31
+; RV32-NEXT: neg a1, a2
; RV32-NEXT: li a2, 7
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3@plt
; RV32M-NEXT: or a0, a0, a1
; RV32M-NEXT: lw s4, 0(s0)
; RV32M-NEXT: slli a2, a2, 30
-; RV32M-NEXT: srli a1, a2, 31
-; RV32M-NEXT: neg a1, a1
+; RV32M-NEXT: srli a2, a2, 31
+; RV32M-NEXT: neg a1, a2
; RV32M-NEXT: li a2, 7
; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3@plt
; RV64M-NEXT: or a1, a3, a1
; RV64M-NEXT: sw a1, 8(a0)
; RV64M-NEXT: slli a2, a2, 29
-; RV64M-NEXT: srli a1, a2, 61
-; RV64M-NEXT: sb a1, 12(a0)
+; RV64M-NEXT: srli a2, a2, 61
+; RV64M-NEXT: sb a2, 12(a0)
; RV64M-NEXT: ret
;
; RV32MV-LABEL: test_srem_vec:
; RV64MV-NEXT: sd a1, 0(a0)
; RV64MV-NEXT: slli a2, a2, 2
; RV64MV-NEXT: slli a3, a3, 31
-; RV64MV-NEXT: srli a1, a3, 62
-; RV64MV-NEXT: or a1, a1, a2
+; RV64MV-NEXT: srli a3, a3, 62
+; RV64MV-NEXT: or a1, a3, a2
; RV64MV-NEXT: sw a1, 8(a0)
; RV64MV-NEXT: addi sp, s0, -64
; RV64MV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, a5, a0
; RV32-NEXT: add a0, a0, s0
-; RV32-NEXT: srai a1, a4, 31
-; RV32-NEXT: xor a0, a0, a1
-; RV32-NEXT: xor a1, t6, a1
+; RV32-NEXT: srai a4, a4, 31
+; RV32-NEXT: xor a0, a0, a4
+; RV32-NEXT: xor a1, t6, a4
; RV32-NEXT: or a0, a1, a0
; RV32-NEXT: seqz a0, a0
; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBA-NEXT: add a0, a0, a1
; RV32ZBA-NEXT: add a0, a5, a0
; RV32ZBA-NEXT: add a0, a0, s0
-; RV32ZBA-NEXT: srai a1, a4, 31
-; RV32ZBA-NEXT: xor a0, a0, a1
-; RV32ZBA-NEXT: xor a1, t6, a1
+; RV32ZBA-NEXT: srai a4, a4, 31
+; RV32ZBA-NEXT: xor a0, a0, a4
+; RV32ZBA-NEXT: xor a1, t6, a4
; RV32ZBA-NEXT: or a0, a1, a0
; RV32ZBA-NEXT: seqz a0, a0
; RV32ZBA-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, a5, a0
; RV32-NEXT: add a0, a0, s0
-; RV32-NEXT: srai a1, a4, 31
-; RV32-NEXT: xor a0, a0, a1
-; RV32-NEXT: xor a1, t6, a1
+; RV32-NEXT: srai a4, a4, 31
+; RV32-NEXT: xor a0, a0, a4
+; RV32-NEXT: xor a1, t6, a4
; RV32-NEXT: or a0, a1, a0
; RV32-NEXT: beqz a0, .LBB61_2
; RV32-NEXT: # %bb.1: # %overflow
; RV32ZBA-NEXT: add a0, a0, a1
; RV32ZBA-NEXT: add a0, a5, a0
; RV32ZBA-NEXT: add a0, a0, s0
-; RV32ZBA-NEXT: srai a1, a4, 31
-; RV32ZBA-NEXT: xor a0, a0, a1
-; RV32ZBA-NEXT: xor a1, t6, a1
+; RV32ZBA-NEXT: srai a4, a4, 31
+; RV32ZBA-NEXT: xor a0, a0, a4
+; RV32ZBA-NEXT: xor a1, t6, a4
; RV32ZBA-NEXT: or a0, a1, a0
; RV32ZBA-NEXT: beqz a0, .LBB61_2
; RV32ZBA-NEXT: # %bb.1: # %overflow
; RV32-NEXT: add a0, a0, a5
; RV32-NEXT: add a0, a4, a0
; RV32-NEXT: add a0, a0, t5
-; RV32-NEXT: srai a1, a3, 31
-; RV32-NEXT: xor a0, a0, a1
-; RV32-NEXT: xor a1, t4, a1
+; RV32-NEXT: srai a3, a3, 31
+; RV32-NEXT: xor a0, a0, a3
+; RV32-NEXT: xor a1, t4, a3
; RV32-NEXT: or a0, a1, a0
; RV32-NEXT: beqz a0, .LBB62_2
; RV32-NEXT: # %bb.1: # %overflow
; RV32ZBA-NEXT: add a0, a0, a5
; RV32ZBA-NEXT: add a0, a4, a0
; RV32ZBA-NEXT: add a0, a0, t5
-; RV32ZBA-NEXT: srai a1, a3, 31
-; RV32ZBA-NEXT: xor a0, a0, a1
-; RV32ZBA-NEXT: xor a1, t4, a1
+; RV32ZBA-NEXT: srai a3, a3, 31
+; RV32ZBA-NEXT: xor a0, a0, a3
+; RV32ZBA-NEXT: xor a1, t4, a3
; RV32ZBA-NEXT: or a0, a1, a0
; RV32ZBA-NEXT: beqz a0, .LBB62_2
; RV32ZBA-NEXT: # %bb.1: # %overflow