[VE] Correct LVLGen (LVL instruction insert pass)

author Kazushi (Jam) Marukawa <marukawa@nec.com>

Sat, 5 Dec 2020 07:53:39 +0000 (16:53 +0900)

committer Kazushi (Jam) Marukawa <marukawa@nec.com>

Tue, 8 Dec 2020 21:33:53 +0000 (06:33 +0900)
author Kazushi (Jam) Marukawa <marukawa@nec.com>
Sat, 5 Dec 2020 07:53:39 +0000 (16:53 +0900)
committer Kazushi (Jam) Marukawa <marukawa@nec.com>
Tue, 8 Dec 2020 21:33:53 +0000 (06:33 +0900)
diff --git a/llvm/lib/Target/VE/LVLGen.cpp b/llvm/lib/Target/VE/LVLGen.cpp

index 08b350a..c458892 100644 (file)
--- a/llvm/lib/Target/VE/LVLGen.cpp
+++ b/llvm/lib/Target/VE/LVLGen.cpp
@@ -68,6 +68,12 @@ bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
    for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
      MachineBasicBlock::iterator MI = I;
  
+    // Check whether MI uses a vector length operand.  If so, we prepare for VL
+    // register.  We would like to reuse VL register as much as possible.  We
+    // also would like to keep the number of LEA instructions as fewer as
+    // possible.  Therefore, we use a regular scalar register to hold immediate
+    // values to load VL register.  And try to reuse identical scalar registers
+    // to avoid new LVLr instructions as much as possible.
      unsigned Reg = getVL(*MI);
      if (Reg != VE::NoRegister) {
        LLVM_DEBUG(dbgs() << "Vector instruction found: ");
@@ -78,6 +84,8 @@ bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
                          << ". ");
  
        if (!HasRegForVL || RegForVL != Reg) {
+        // Use VL, but a different value in a different scalar register.
+        // So, generate new LVL instruction just before the current instruction.
          LLVM_DEBUG(dbgs() << "Generate a LVL instruction to load "
                            << RegName(Reg) << ".\n");
          BuildMI(MBB, I, MI->getDebugLoc(), TII->get(VE::LVLr)).addReg(Reg);
@@ -87,18 +95,15 @@ bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
        } else {
          LLVM_DEBUG(dbgs() << "Reuse current VL.\n");
        }
-    } else if (HasRegForVL) {
-      // Old VL is overwritten, so disable HasRegForVL.
-      if (MI->findRegisterDefOperandIdx(RegForVL, false, false, TRI) != -1) {
-        LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: ");
-        LLVM_DEBUG(MI->dump());
-        HasRegForVL = false;
-      }
      }
+    // Check the update of a given scalar register holding an immediate value
+    // for VL register.  Also, a call doesn't preserve VL register.
      if (HasRegForVL) {
-      // The latest VL is killed, so disable HasRegForVL.
-      if (MI->killsRegister(RegForVL, TRI)) {
-        LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: ");
+      if (MI->definesRegister(RegForVL, TRI) ||
+          MI->modifiesRegister(RegForVL, TRI) ||
+          MI->killsRegister(RegForVL, TRI) || MI->isCall()) {
+        // The latest VL is needed to be updated, so disable HasRegForVL.
+        LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is needed to be updated: ");
          LLVM_DEBUG(MI->dump());
          HasRegForVL = false;
        }
diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll b/llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll

index ac889e7..c4db624 100644 (file)
--- a/llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll
+++ b/llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll
@@ -42,7 +42,6 @@ define void @switching_vl(i32 %evl, i32 %evl2, i8* %P, i8* %Q) {
  ; Check that no redundant 'lvl' is inserted when vector length does not change
  ; in a basic block.
  
-
  ; Function Attrs: nounwind
  define void @stable_vl(i32 %evl, i8* %P, i8* %Q) {
  ; CHECK-LABEL: stable_vl:
@@ -64,3 +63,43 @@ define void @stable_vl(i32 %evl, i8* %P, i8* %Q) {
    tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
    ret void
  }
+
+;;; Check the case we have a call in the middle of vector instructions.
+
+; Function Attrs: nounwind
+define void @call_invl(i32 %evl, i8* %P, i8* %Q) {
+; CHECK-LABEL: call_invl:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st %s18, 288(, %s11) # 8-byte Folded Spill
+; CHECK-NEXT:    st %s19, 296(, %s11) # 8-byte Folded Spill
+; CHECK-NEXT:    st %s20, 304(, %s11) # 8-byte Folded Spill
+; CHECK-NEXT:    or %s18, 0, %s1
+; CHECK-NEXT:    and %s20, %s0, (32)0
+; CHECK-NEXT:    lvl %s20
+; CHECK-NEXT:    vld %v0, 8, %s1
+; CHECK-NEXT:    or %s19, 0, %s2
+; CHECK-NEXT:    vst %v0, 16, %s2
+; CHECK-NEXT:    lea %s0, fun@lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, fun@hi(, %s0)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    lvl %s20
+; CHECK-NEXT:    vld %v0, 16, %s18
+; CHECK-NEXT:    vst %v0, 16, %s19
+; CHECK-NEXT:    vld %v0, 8, %s18
+; CHECK-NEXT:    vst %v0, 16, %s19
+; CHECK-NEXT:    ld %s20, 304(, %s11) # 8-byte Folded Reload
+; CHECK-NEXT:    ld %s19, 296(, %s11) # 8-byte Folded Reload
+; CHECK-NEXT:    ld %s18, 288(, %s11) # 8-byte Folded Reload
+; CHECK-NEXT:    or %s11, 0, %s9
+  %l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, i8* %Q, i32 %evl)
+  call void @fun()
+  %l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, i8* %P, i32 %evl)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, i8* %Q, i32 %evl)
+  %l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
+  ret void
+}
+
+declare void @fun()
author	Kazushi (Jam) Marukawa <marukawa@nec.com>
	Sat, 5 Dec 2020 07:53:39 +0000 (16:53 +0900)
committer	Kazushi (Jam) Marukawa <marukawa@nec.com>
	Tue, 8 Dec 2020 21:33:53 +0000 (06:33 +0900)
llvm/lib/Target/VE/LVLGen.cpp		patch \| blob \| history
llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll		patch \| blob \| history