This extends the backwards walk to allow mutating the previous vsetvl's AVL value if it was not used by any instructions in between. In practice, this mostly benefits vmv.x.s and fvmv.f.s patterns since vector instructions which ignore VL are rare.
Differential Revision: https://reviews.llvm.org/D140048
A.MaskPolicy |= B.MaskPolicy;
}
-// Return true if we can mutate PrevMI's VTYPE to match MI's
-// without changing any the fields which have been used.
-// TODO: Restructure code to allow code reuse between this and isCompatible
-// above.
+// Return true if we can mutate PrevMI to match MI without changing any the
+// fields which would be observed.
static bool canMutatePriorConfig(const MachineInstr &PrevMI,
const MachineInstr &MI,
const DemandedFields &Used) {
- // TODO: Extend this to handle cases where VL does change, but VL
- // has not been used. (e.g. over a vmv.x.s)
- if (!isVLPreservingConfig(MI))
- // Note: `vsetvli x0, x0, vtype' is the canonical instruction
- // for this case. If you find yourself wanting to add other forms
- // to this "unused VTYPE" case, we're probably missing a
- // canonicalization earlier.
- return false;
+ // If the VL values aren't equal, return false if either a) the former is
+ // demanded, or b) we can't rewrite the former to be the later for
+ // implementation reasons.
+ if (!isVLPreservingConfig(MI)) {
+ if (Used.VL)
+ return false;
+
+ // TODO: Requires more care in the mutation...
+ if (isVLPreservingConfig(PrevMI))
+ return false;
+
+ // TODO: Track whether the register is defined between
+ // PrevMI and MI.
+ if (MI.getOperand(1).isReg() &&
+ RISCV::X0 != MI.getOperand(1).getReg())
+ return false;
+ }
if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
return false;
// Leave NextMI unchanged
continue;
} else if (canMutatePriorConfig(MI, *NextMI, Used)) {
+ if (!isVLPreservingConfig(*NextMI)) {
+ if (NextMI->getOperand(1).isImm())
+ MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
+ else
+ MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
+ }
MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
ToDelete.push_back(NextMI);
// fallthrough
; RV32-BITS-UNKNOWN-NEXT: addi s0, sp, 128
; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0
; RV32-BITS-UNKNOWN-NEXT: andi sp, sp, -64
-; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
+; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0
; RV32-BITS-UNKNOWN-NEXT: andi a1, a0, 1
; RV32-BITS-UNKNOWN-NEXT: sb a1, 63(sp)
; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1
; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31
; RV32-BITS-UNKNOWN-NEXT: sb a0, 33(sp)
-; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV32-BITS-UNKNOWN-NEXT: vslidedown.vi v8, v0, 1
; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v8
; RV32-BITS-UNKNOWN-NEXT: andi a1, a0, 1
; RV32-BITS-256-NEXT: addi s0, sp, 128
; RV32-BITS-256-NEXT: .cfi_def_cfa s0, 0
; RV32-BITS-256-NEXT: andi sp, sp, -64
-; RV32-BITS-256-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
+; RV32-BITS-256-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV32-BITS-256-NEXT: vmv.x.s a0, v0
; RV32-BITS-256-NEXT: andi a1, a0, 1
; RV32-BITS-256-NEXT: sb a1, 63(sp)
; RV32-BITS-256-NEXT: slli a0, a0, 1
; RV32-BITS-256-NEXT: srli a0, a0, 31
; RV32-BITS-256-NEXT: sb a0, 33(sp)
-; RV32-BITS-256-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV32-BITS-256-NEXT: vslidedown.vi v8, v0, 1
; RV32-BITS-256-NEXT: vmv.x.s a0, v8
; RV32-BITS-256-NEXT: andi a1, a0, 1
; RV32-BITS-512-NEXT: addi s0, sp, 128
; RV32-BITS-512-NEXT: .cfi_def_cfa s0, 0
; RV32-BITS-512-NEXT: andi sp, sp, -64
-; RV32-BITS-512-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
+; RV32-BITS-512-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV32-BITS-512-NEXT: vmv.x.s a0, v0
; RV32-BITS-512-NEXT: andi a1, a0, 1
; RV32-BITS-512-NEXT: sb a1, 63(sp)
; RV32-BITS-512-NEXT: slli a0, a0, 1
; RV32-BITS-512-NEXT: srli a0, a0, 31
; RV32-BITS-512-NEXT: sb a0, 33(sp)
-; RV32-BITS-512-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV32-BITS-512-NEXT: vslidedown.vi v8, v0, 1
; RV32-BITS-512-NEXT: vmv.x.s a0, v8
; RV32-BITS-512-NEXT: andi a1, a0, 1
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB18_4: # %else2
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: lui a1, 16
; RV64ZVE32F-NEXT: addiw a1, a1, -1
; RV64ZVE32F-NEXT: and a0, a0, a1
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: and a1, a2, a1
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB41_7
; RV64ZVE32F-NEXT: .LBB41_14: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB80_7
; RV64ZVE32F-NEXT: .LBB80_14: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw ft0, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, ft0
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v16, v16, v24
-; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.f.s ft0, v16
; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.f.s ft0, v16
; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v16
; RV32-NEXT: vmv.s.x v9, zero
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vwredsum.vs v8, v8, v9
-; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v8, v8, a1
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: ret
; RV32-NEXT: vmv.s.x v9, zero
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vwredsumu.vs v8, v8, v9
-; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v8, v8, a1
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: ret
; RV32-NEXT: vmv.s.x v10, zero
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vwredsum.vs v8, v8, v10
-; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v8, v8, a1
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: ret
; RV32-NEXT: vmv.s.x v10, zero
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vwredsumu.vs v8, v8, v10
-; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v8, v8, a1
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: ret
; RV32-NEXT: vmv.s.x v12, zero
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vwredsum.vs v8, v8, v12
-; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v8, v8, a1
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: ret
; RV32-NEXT: vmv.s.x v12, zero
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vwredsumu.vs v8, v8, v12
-; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v8, v8, a1
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: ret
; RV32-NEXT: vmul.vv v8, v8, v16
; RV32-NEXT: vrgather.vi v16, v8, 1
; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: vsetivli zero, 0, e32, m8, ta, ma
-; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: vsetivli zero, 1, e32, m8, ta, ma
+; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: vslidedown.vi v8, v8, 1
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: ret
; RV32-NEXT: vmul.vv v8, v8, v16
; RV32-NEXT: vrgather.vi v16, v8, 1
; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: vsetivli zero, 0, e32, m8, ta, ma
-; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: vsetivli zero, 1, e32, m8, ta, ma
+; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: vslidedown.vi v8, v8, 1
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: ret
; RV32-NEXT: vlse64.v v9, (a0), zero
; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, ma
; RV32-NEXT: vwredsum.vs v9, v8, v9, v0.t
-; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v9
; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v8, v9, a1
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: vlse64.v v9, (a0), zero
; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, ma
; RV32-NEXT: vwredsum.vs v9, v8, v9, v0.t
-; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v9
; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v8, v9, a1
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: vlse64.v v10, (a0), zero
; RV32-NEXT: vsetvli zero, a2, e32, m2, tu, ma
; RV32-NEXT: vwredsum.vs v10, v8, v10, v0.t
-; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v10
; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v8, v10, a1
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: vlse64.v v10, (a0), zero
; RV32-NEXT: vsetvli zero, a2, e32, m2, tu, ma
; RV32-NEXT: vwredsumu.vs v10, v8, v10, v0.t
-; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v10
; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v8, v10, a1
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: vmv.s.x v9, zero
; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV32-NEXT: vwredsum.vs v8, v8, v9
-; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v8, v8, a1
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: ret
; RV32-NEXT: vmv.s.x v9, zero
; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV32-NEXT: vwredsumu.vs v8, v8, v9
-; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v8, v8, a1
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: ret
; RV32-NEXT: vmv.s.x v10, zero
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; RV32-NEXT: vwredsum.vs v8, v8, v10
-; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v8, v8, a1
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: ret
; RV32-NEXT: vmv.s.x v10, zero
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; RV32-NEXT: vwredsumu.vs v8, v8, v10
-; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v8, v8, a1
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: ret