for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
MachineBasicBlock::iterator MI = I;
+ // Check whether MI uses a vector length operand. If so, we prepare for VL
+ // register. We would like to reuse VL register as much as possible. We
+ // also would like to keep the number of LEA instructions as fewer as
+ // possible. Therefore, we use a regular scalar register to hold immediate
+ // values to load VL register. And try to reuse identical scalar registers
+ // to avoid new LVLr instructions as much as possible.
unsigned Reg = getVL(*MI);
if (Reg != VE::NoRegister) {
LLVM_DEBUG(dbgs() << "Vector instruction found: ");
<< ". ");
if (!HasRegForVL || RegForVL != Reg) {
+ // Use VL, but a different value in a different scalar register.
+ // So, generate new LVL instruction just before the current instruction.
LLVM_DEBUG(dbgs() << "Generate a LVL instruction to load "
<< RegName(Reg) << ".\n");
BuildMI(MBB, I, MI->getDebugLoc(), TII->get(VE::LVLr)).addReg(Reg);
} else {
LLVM_DEBUG(dbgs() << "Reuse current VL.\n");
}
- } else if (HasRegForVL) {
- // Old VL is overwritten, so disable HasRegForVL.
- if (MI->findRegisterDefOperandIdx(RegForVL, false, false, TRI) != -1) {
- LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: ");
- LLVM_DEBUG(MI->dump());
- HasRegForVL = false;
- }
}
+ // Check the update of a given scalar register holding an immediate value
+ // for VL register. Also, a call doesn't preserve VL register.
if (HasRegForVL) {
- // The latest VL is killed, so disable HasRegForVL.
- if (MI->killsRegister(RegForVL, TRI)) {
- LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: ");
+ if (MI->definesRegister(RegForVL, TRI) ||
+ MI->modifiesRegister(RegForVL, TRI) ||
+ MI->killsRegister(RegForVL, TRI) || MI->isCall()) {
+ // The latest VL is needed to be updated, so disable HasRegForVL.
+ LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is needed to be updated: ");
LLVM_DEBUG(MI->dump());
HasRegForVL = false;
}
; Check that no redundant 'lvl' is inserted when vector length does not change
; in a basic block.
-
; Function Attrs: nounwind
define void @stable_vl(i32 %evl, i8* %P, i8* %Q) {
; CHECK-LABEL: stable_vl:
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
ret void
}
+
+;;; Check the case we have a call in the middle of vector instructions.
+
+; Function Attrs: nounwind
+define void @call_invl(i32 %evl, i8* %P, i8* %Q) {
+; CHECK-LABEL: call_invl:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: st %s18, 288(, %s11) # 8-byte Folded Spill
+; CHECK-NEXT: st %s19, 296(, %s11) # 8-byte Folded Spill
+; CHECK-NEXT: st %s20, 304(, %s11) # 8-byte Folded Spill
+; CHECK-NEXT: or %s18, 0, %s1
+; CHECK-NEXT: and %s20, %s0, (32)0
+; CHECK-NEXT: lvl %s20
+; CHECK-NEXT: vld %v0, 8, %s1
+; CHECK-NEXT: or %s19, 0, %s2
+; CHECK-NEXT: vst %v0, 16, %s2
+; CHECK-NEXT: lea %s0, fun@lo
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lea.sl %s12, fun@hi(, %s0)
+; CHECK-NEXT: bsic %s10, (, %s12)
+; CHECK-NEXT: lvl %s20
+; CHECK-NEXT: vld %v0, 16, %s18
+; CHECK-NEXT: vst %v0, 16, %s19
+; CHECK-NEXT: vld %v0, 8, %s18
+; CHECK-NEXT: vst %v0, 16, %s19
+; CHECK-NEXT: ld %s20, 304(, %s11) # 8-byte Folded Reload
+; CHECK-NEXT: ld %s19, 296(, %s11) # 8-byte Folded Reload
+; CHECK-NEXT: ld %s18, 288(, %s11) # 8-byte Folded Reload
+; CHECK-NEXT: or %s11, 0, %s9
+ %l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
+ tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, i8* %Q, i32 %evl)
+ call void @fun()
+ %l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, i8* %P, i32 %evl)
+ tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, i8* %Q, i32 %evl)
+ %l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
+ tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
+ ret void
+}
+
+declare void @fun()