If the GPR destination register of a VSETVLI instruction is unused, we can replace it with X0. This discards the result, and thus reduces register pressure.
Since after the core insertion/lowering algorithm has run, many user written VSETVLIs will have their GPR result unused (as VTYPE/VLEN is now explicitly read instead), this kicks in for most tests which involve a vsetvli intrinsic for fixed length vectorization. (vscale vectorization generally uses the GPR result to know how far to e.g. advance pointers in a loop and these uses are not removed.) When inserting VSETVLIs to lower psuedos, we prefer the X0 form anyways.
Differential Revision: https://reviews.llvm.org/D124961
// predecessors.
for (MachineBasicBlock &MBB : MF)
emitVSETVLIs(MBB);
+
+ // Once we're fully done rewriting all the instructions, do a final pass
+ // through to check for VSETVLIs which write to an unused destination.
+ // For the non X0, X0 variant, we can replace the destination register
+ // with X0 to reduce register pressure. This is really a generic
+ // optimization which can be applied to any dead def (TODO: generalize).
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
+ MI.getOpcode() == RISCV::PseudoVSETIVLI) {
+ Register VRegDef = MI.getOperand(0).getReg();
+ if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef))
+ MI.getOperand(0).setReg(RISCV::X0);
+ }
+ }
+ }
}
BlockInfo.clear();
define void @test_vsetvli_e64mf8(i32 %avl) nounwind {
; CHECK-LABEL: test_vsetvli_e64mf8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, a0, e64, mf8, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e64, mf8, ta, mu
; CHECK-NEXT: ret
call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 3, i32 5)
ret void
define void @test_vsetvli_e8mf2_zero_avl() nounwind {
; CHECK-LABEL: test_vsetvli_e8mf2_zero_avl:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli a0, 0, e8, mf2, ta, mu
+; CHECK-NEXT: vsetivli zero, 0, e8, mf2, ta, mu
; CHECK-NEXT: ret
call i32 @llvm.riscv.vsetvli.i32(i32 0, i32 0, i32 7)
ret void
define <vscale x 4 x i32> @redundant_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr) nounwind {
; CHECK-LABEL: redundant_vsetvli:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, a0, e32, m2, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vle32.v v8, (a1)
; CHECK-NEXT: ret
%vl = call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 2, i32 1)
; CHECK-LABEL: repeated_vsetvli:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, a0, e32, m2, ta, mu
-; CHECK-NEXT: vsetvli a0, a0, e32, m2, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vle32.v v8, (a1)
; CHECK-NEXT: ret
%vl0 = call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 2, i32 1)
define void @test_vsetvli_e8m1(i64 %avl) nounwind {
; CHECK-LABEL: test_vsetvli_e8m1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, a0, e8, m1, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
; CHECK-NEXT: ret
call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 0, i64 0)
ret void
define void @test_vsetvli_e16mf4(i64 %avl) nounwind {
; CHECK-LABEL: test_vsetvli_e16mf4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: ret
call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 1, i64 6)
ret void
define void @test_vsetvli_e32mf8_zero_avl() nounwind {
; CHECK-LABEL: test_vsetvli_e32mf8_zero_avl:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli a0, 0, e16, mf4, ta, mu
+; CHECK-NEXT: vsetivli zero, 0, e16, mf4, ta, mu
; CHECK-NEXT: ret
call i64 @llvm.riscv.vsetvli.i64(i64 0, i64 1, i64 6)
ret void
define <vscale x 4 x i32> @redundant_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr) nounwind {
; CHECK-LABEL: redundant_vsetvli:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, a0, e32, m2, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vle32.v v8, (a1)
; CHECK-NEXT: ret
%vl = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 1)
; CHECK-LABEL: repeated_vsetvli:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, a0, e32, m2, ta, mu
-; CHECK-NEXT: vsetvli a0, a0, e32, m2, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vle32.v v8, (a1)
; CHECK-NEXT: ret
%vl0 = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 1)
define <vscale x 1 x double> @test1(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: beqz a1, .LBB0_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vfadd.vv v8, v8, v9
define <vscale x 1 x double> @test2(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: beqz a1, .LBB1_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vfadd.vv v9, v8, v9
; CHECK-LABEL: test5:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: bnez a2, .LBB4_3
; CHECK-NEXT: # %bb.1: # %if.else
; CHECK-NEXT: vfsub.vv v9, v8, v9
; CHECK-NEXT: andi a1, a1, 2
; CHECK-NEXT: beqz a1, .LBB5_4
; CHECK-NEXT: .LBB5_2: # %if.then4
-; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0)
; CHECK-NEXT: vlse64.v v9, (a0), zero
; CHECK-NEXT: andi a1, a1, 2
; CHECK-NEXT: bnez a1, .LBB5_2
; CHECK-NEXT: .LBB5_4: # %if.else5
-; CHECK-NEXT: vsetvli a0, a0, e32, m1, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: lui a0, %hi(.LCPI5_2)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_2)
; CHECK-NEXT: vlse32.v v9, (a0), zero
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v9
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v8
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10
- ; CHECK-NEXT: [[PseudoVSETVLI:%[0-9]+]]:gprnox0 = PseudoVSETVLI [[COPY]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: $x0 = PseudoVSETVLI [[COPY]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: BEQ [[COPY3]], [[COPY4]], %bb.2
; CHECK-NEXT: PseudoBR %bb.1
define <vscale x 1 x double> @test1(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: ret
entry:
define <vscale x 1 x double> @test2(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: ret
entry:
define <vscale x 1 x i64> @test3(i64 %avl, <vscale x 1 x i64> %a, <vscale x 1 x i64>* %b, <vscale x 1 x i1> %c) nounwind {
; CHECK-LABEL: test3:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vle64.v v8, (a1), v0.t
; CHECK-NEXT: ret
entry:
define <vscale x 1 x i64> @test4(i64 %avl, <vscale x 1 x i64> %a, <vscale x 1 x i64>* %b, <vscale x 1 x i1> %c) nounwind {
; CHECK-LABEL: test4:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vle64.v v8, (a1), v0.t
; CHECK-NEXT: ret
entry:
define <vscale x 1 x i1> @test5(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i64 %avl) nounwind {
; CHECK-LABEL: test5:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vmseq.vv v8, v8, v9
; CHECK-NEXT: vmand.mm v0, v8, v0
; CHECK-NEXT: ret
define <vscale x 1 x i64> @test8(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i1> %mask) nounwind {
; CHECK-LABEL: test8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli a1, 6, e64, m1, tu, mu
+; CHECK-NEXT: vsetivli zero, 6, e64, m1, tu, mu
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
entry:
define <vscale x 1 x double> @test11(<vscale x 1 x double> %a, double %b) nounwind {
; CHECK-LABEL: test11:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli a0, 6, e64, m1, tu, mu
+; CHECK-NEXT: vsetivli zero, 6, e64, m1, tu, mu
; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: ret
entry:
; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v9
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v8
- ; CHECK-NEXT: [[PseudoVSETVLI:%[0-9]+]]:gprnox0 = PseudoVSETVLI [[COPY]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: $x0 = PseudoVSETVLI [[COPY]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype
; CHECK-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_]]
; CHECK-NEXT: PseudoRET implicit $v8