#define DEBUG_TYPE "riscv-insert-vsetvli"
#define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass"
+static cl::opt<bool> DisableInsertVSETVLPHIOpt(
+ "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
+ cl::desc("Disable looking through phis when inserting vsetvlis."));
+
namespace {
class VSETVLIInfo {
private:
bool needVSETVLI(const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo);
+ bool needVSETVLIPHI(const VSETVLIInfo &Require, const MachineBasicBlock &MBB);
void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
WorkList.push(S);
}
+// If we weren't able to prove a vsetvli was directly unneeded, it might still
+// be/ unneeded if the AVL is a phi node where all incoming values are VL
+// outputs from the last VSETVLI in their respective basic blocks.
+bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
+ const MachineBasicBlock &MBB) {
+ if (DisableInsertVSETVLPHIOpt)
+ return true;
+
+ if (!Require.hasAVLReg())
+ return true;
+
+ Register AVLReg = Require.getAVLReg();
+ if (!AVLReg.isVirtual())
+ return true;
+
+ // We need the AVL to be produce by a PHI node in this basic block.
+ MachineInstr *PHI = MRI->getVRegDef(AVLReg);
+ if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
+ return true;
+
+ for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
+ PHIOp += 2) {
+ Register InReg = PHI->getOperand(PHIOp).getReg();
+ MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
+ const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
+ // If the exit from the predecessor has the VTYPE we are looking for
+ // we might be able to avoid a VSETVLI.
+ if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
+ return true;
+
+ // We need the PHI input to the be the output of a VSET(I)VLI.
+ MachineInstr *DefMI = MRI->getVRegDef(InReg);
+ if (!DefMI || (DefMI->getOpcode() != RISCV::PseudoVSETVLI &&
+ DefMI->getOpcode() != RISCV::PseudoVSETIVLI))
+ return true;
+
+ // We found a VSET(I)VLI make sure it matches the output of the
+ // predecessor block.
+ VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
+ if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
+ !DefInfo.hasSameVTYPE(PBBInfo.Exit))
+ return true;
+ }
+
+ // If all the incoming values to the PHI checked out, we don't need
+ // to insert a VSETVLI.
+ return false;
+}
+
void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
VSETVLIInfo CurInfo;
// use the predecessor information.
assert(BlockInfo[MBB.getNumber()].Pred.isValid() &&
"Expected a valid predecessor state.");
- if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred)) {
+ if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) &&
+ needVSETVLIPHI(NewInfo, MBB)) {
insertVSETVLI(MBB, MI, NewInfo, BlockInfo[MBB.getNumber()].Pred);
CurInfo = NewInfo;
}
ret <vscale x 1 x double> %3
}
-; FIXME: The last vsetvli is redundant, but we need to look through a phi to
-; prove it.
define <vscale x 1 x double> @test3(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
; CHECK-NEXT: vfadd.vv v25, v8, v9
-; CHECK-NEXT: j .LBB2_3
+; CHECK-NEXT: vfmul.vv v8, v25, v8
+; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_2: # %if.else
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
; CHECK-NEXT: vfsub.vv v25, v8, v9
-; CHECK-NEXT: .LBB2_3: # %if.end
-; CHECK-NEXT: vsetvli zero, a0, e64,m1,ta,mu
; CHECK-NEXT: vfmul.vv v8, v25, v8
; CHECK-NEXT: ret
entry:
ret <vscale x 1 x double> %3
}
-; FIXME: The vsetvli in for.body can be removed, it's redundant by its
-; predecessors, but we need to look through a PHI to prove it.
define void @saxpy_vec(i64 %n, float %a, float* nocapture readonly %x, float* nocapture %y) {
; CHECK-LABEL: saxpy_vec:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: fmv.w.x ft0, a1
; CHECK-NEXT: .LBB8_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vsetvli zero, a4, e32,m8,ta,mu
; CHECK-NEXT: vle32.v v8, (a2)
; CHECK-NEXT: vle32.v v16, (a3)
; CHECK-NEXT: slli a1, a4, 2
; CHECK-NEXT: add a2, a2, a1
-; CHECK-NEXT: vsetvli zero, zero, e32,m8,tu,mu
+; CHECK-NEXT: vsetvli zero, a4, e32,m8,tu,mu
; CHECK-NEXT: vfmacc.vf v16, ft0, v8
; CHECK-NEXT: vsetvli zero, zero, e32,m8,ta,mu
; CHECK-NEXT: vse32.v v16, (a3)