/// Given a Count of resource usage and a Latency value, return true if a
/// SchedBoundary becomes resource limited.
+/// If we are checking after scheduling a node, we should return true when
+/// we just reach the resource limit.
static bool checkResourceLimit(unsigned LFactor, unsigned Count,
- unsigned Latency) {
- return (int)(Count - (Latency * LFactor)) > (int)LFactor;
+ unsigned Latency, bool AfterSchedNode) {
+ int ResCntFactor = (int)(Count - (Latency * LFactor));
+ if (AfterSchedNode)
+ return ResCntFactor >= (int)LFactor;
+ else
+ return ResCntFactor > (int)LFactor;
}
void SchedBoundary::reset() {
CheckPending = true;
IsResourceLimited =
checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
- getScheduledLatency());
+ getScheduledLatency(), true);
LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
<< '\n');
// resource limited. If a stall occurred, bumpCycle does this.
IsResourceLimited =
checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
- getScheduledLatency());
+ getScheduledLatency(), true);
// Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
// resets CurrMOps. Loop to handle instructions with more MOps than issue in
RemLatency = computeRemLatency(CurrZone);
RemLatencyComputed = true;
OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
- OtherCount, RemLatency);
+ OtherCount, RemLatency, false);
}
// Schedule aggressively for latency in PostRA mode. We don't check for
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 24(r3)
; P9LE-NEXT: lfd f2, 8(r3)
-; P9LE-NEXT: lfd f1, 16(r3)
; P9LE-NEXT: xxmrghd vs0, vs2, vs0
+; P9LE-NEXT: lfd f1, 16(r3)
; P9LE-NEXT: lfd f3, 0(r3)
; P9LE-NEXT: xvcvdpsxws v2, vs0
; P9LE-NEXT: xxmrghd vs0, vs3, vs1
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 24(r3)
; P9LE-NEXT: lfd f2, 8(r3)
-; P9LE-NEXT: lfd f1, 16(r3)
; P9LE-NEXT: xxmrghd vs0, vs2, vs0
+; P9LE-NEXT: lfd f1, 16(r3)
; P9LE-NEXT: lfd f3, 0(r3)
; P9LE-NEXT: xvcvdpuxws v2, vs0
; P9LE-NEXT: xxmrghd vs0, vs3, vs1
; CHECK-PWR9-NEXT: .cfi_offset v31, -304
; CHECK-PWR9-NEXT: std r14, 240(r1) # 8-byte Folded Spill
; CHECK-PWR9-NEXT: std r15, 248(r1) # 8-byte Folded Spill
-; CHECK-PWR9-NEXT: std r16, 256(r1) # 8-byte Folded Spill
; CHECK-PWR9-NEXT: stxv v20, 48(r1) # 16-byte Folded Spill
; CHECK-PWR9-NEXT: stxv v21, 64(r1) # 16-byte Folded Spill
+; CHECK-PWR9-NEXT: std r16, 256(r1) # 8-byte Folded Spill
; CHECK-PWR9-NEXT: stxv v22, 80(r1) # 16-byte Folded Spill
; CHECK-PWR9-NEXT: std r17, 264(r1) # 8-byte Folded Spill
; CHECK-PWR9-NEXT: stxv v23, 96(r1) # 16-byte Folded Spill
; CHECK-PWR9-NEXT: ld r4, 32(r1) # 8-byte Folded Reload
; CHECK-PWR9-NEXT: lxv v31, 224(r1) # 16-byte Folded Reload
; CHECK-PWR9-NEXT: lxv v30, 208(r1) # 16-byte Folded Reload
-; CHECK-PWR9-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload
; CHECK-PWR9-NEXT: add r3, r4, r3
+; CHECK-PWR9-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload
; CHECK-PWR9-NEXT: lxv v28, 176(r1) # 16-byte Folded Reload
; CHECK-PWR9-NEXT: lxv v27, 160(r1) # 16-byte Folded Reload
; CHECK-PWR9-NEXT: lxv v26, 144(r1) # 16-byte Folded Reload
; CHECK-PWR9-NEXT: lfd f29, 504(r1) # 8-byte Folded Reload
; CHECK-PWR9-NEXT: lfd f28, 496(r1) # 8-byte Folded Reload
; CHECK-PWR9-NEXT: lfd f27, 488(r1) # 8-byte Folded Reload
-; CHECK-PWR9-NEXT: lfd f26, 480(r1) # 8-byte Folded Reload
; CHECK-PWR9-NEXT: ld r31, 376(r1) # 8-byte Folded Reload
; CHECK-PWR9-NEXT: ld r30, 368(r1) # 8-byte Folded Reload
; CHECK-PWR9-NEXT: ld r29, 360(r1) # 8-byte Folded Reload
; CHECK-PWR9-NEXT: ld r28, 352(r1) # 8-byte Folded Reload
+; CHECK-PWR9-NEXT: lfd f26, 480(r1) # 8-byte Folded Reload
; CHECK-PWR9-NEXT: ld r27, 344(r1) # 8-byte Folded Reload
; CHECK-PWR9-NEXT: ld r26, 336(r1) # 8-byte Folded Reload
; CHECK-PWR9-NEXT: ld r25, 328(r1) # 8-byte Folded Reload
; CHECK-LABEL: maxVecParam:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xsaddqp v2, v2, v3
+; CHECK-NEXT: lxv v[[REG0:[0-9]+]], 224(r1)
; CHECK-NEXT: xsaddqp v2, v2, v4
; CHECK-NEXT: xsaddqp v2, v2, v5
; CHECK-NEXT: xsaddqp v2, v2, v6
; CHECK-NEXT: xsaddqp v2, v2, v11
; CHECK-NEXT: xsaddqp v2, v2, v12
; CHECK-NEXT: xsaddqp v2, v2, v13
-; CHECK-NEXT: lxv v[[REG0:[0-9]+]], 224(r1)
; CHECK-NEXT: xssubqp v2, v2, v[[REG0]]
; CHECK-NEXT: blr
fp128 %p6, fp128 %p7, fp128 %p8, fp128 %p9, fp128 %p10,
; CHECK-P9-NEXT: lxv vs1, 96(r4)
; CHECK-P9-NEXT: lxv vs2, 80(r4)
; CHECK-P9-NEXT: lxv vs3, 64(r4)
+; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
; CHECK-P9-NEXT: lxv vs4, 48(r4)
-; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4
; CHECK-P9-NEXT: lxv vs5, 32(r4)
; CHECK-P9-NEXT: lxv vs6, 16(r4)
; CHECK-P9-NEXT: lxv vs7, 0(r4)
; CHECK-P9-NEXT: xvcvdpuxds vs7, vs7
; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6
; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5
-; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
+; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4
; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
; CHECK-BE-NEXT: lxv vs1, 96(r4)
; CHECK-BE-NEXT: lxv vs2, 80(r4)
; CHECK-BE-NEXT: lxv vs3, 64(r4)
+; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
; CHECK-BE-NEXT: lxv vs4, 48(r4)
-; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4
; CHECK-BE-NEXT: lxv vs5, 32(r4)
; CHECK-BE-NEXT: lxv vs6, 16(r4)
; CHECK-BE-NEXT: lxv vs7, 0(r4)
; CHECK-BE-NEXT: xvcvdpuxds vs7, vs7
; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6
; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5
-; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
+; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4
; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P9-NEXT: lxv vs1, 96(r4)
; CHECK-P9-NEXT: lxv vs2, 80(r4)
; CHECK-P9-NEXT: lxv vs3, 64(r4)
+; CHECK-P9-NEXT: xvcvdpsxds vs3, vs3
; CHECK-P9-NEXT: lxv vs4, 48(r4)
-; CHECK-P9-NEXT: xvcvdpsxds vs4, vs4
; CHECK-P9-NEXT: lxv vs5, 32(r4)
; CHECK-P9-NEXT: lxv vs6, 16(r4)
; CHECK-P9-NEXT: lxv vs7, 0(r4)
; CHECK-P9-NEXT: xvcvdpsxds vs7, vs7
; CHECK-P9-NEXT: xvcvdpsxds vs6, vs6
; CHECK-P9-NEXT: xvcvdpsxds vs5, vs5
-; CHECK-P9-NEXT: xvcvdpsxds vs3, vs3
+; CHECK-P9-NEXT: xvcvdpsxds vs4, vs4
; CHECK-P9-NEXT: xvcvdpsxds vs2, vs2
; CHECK-P9-NEXT: xvcvdpsxds vs1, vs1
; CHECK-P9-NEXT: xvcvdpsxds vs0, vs0
; CHECK-BE-NEXT: lxv vs1, 96(r4)
; CHECK-BE-NEXT: lxv vs2, 80(r4)
; CHECK-BE-NEXT: lxv vs3, 64(r4)
+; CHECK-BE-NEXT: xvcvdpsxds vs3, vs3
; CHECK-BE-NEXT: lxv vs4, 48(r4)
-; CHECK-BE-NEXT: xvcvdpsxds vs4, vs4
; CHECK-BE-NEXT: lxv vs5, 32(r4)
; CHECK-BE-NEXT: lxv vs6, 16(r4)
; CHECK-BE-NEXT: lxv vs7, 0(r4)
; CHECK-BE-NEXT: xvcvdpsxds vs7, vs7
; CHECK-BE-NEXT: xvcvdpsxds vs6, vs6
; CHECK-BE-NEXT: xvcvdpsxds vs5, vs5
-; CHECK-BE-NEXT: xvcvdpsxds vs3, vs3
+; CHECK-BE-NEXT: xvcvdpsxds vs4, vs4
; CHECK-BE-NEXT: xvcvdpsxds vs2, vs2
; CHECK-BE-NEXT: xvcvdpsxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpsxds vs0, vs0
; CHECK-P9-NEXT: lxv v3, 96(r4)
; CHECK-P9-NEXT: lxv v4, 80(r4)
; CHECK-P9-NEXT: lxv v5, 64(r4)
+; CHECK-P9-NEXT: xvcvuxddp vs4, v5
; CHECK-P9-NEXT: lxv v0, 48(r4)
-; CHECK-P9-NEXT: xvcvuxddp vs3, v0
; CHECK-P9-NEXT: lxv v1, 32(r4)
; CHECK-P9-NEXT: lxv v6, 16(r4)
; CHECK-P9-NEXT: lxv v7, 0(r4)
; CHECK-P9-NEXT: xvcvuxddp vs0, v7
; CHECK-P9-NEXT: xvcvuxddp vs1, v6
; CHECK-P9-NEXT: xvcvuxddp vs2, v1
-; CHECK-P9-NEXT: xvcvuxddp vs4, v5
+; CHECK-P9-NEXT: xvcvuxddp vs3, v0
; CHECK-P9-NEXT: xvcvuxddp vs5, v4
; CHECK-P9-NEXT: xvcvuxddp vs6, v3
; CHECK-P9-NEXT: xvcvuxddp vs7, v2
; CHECK-BE-NEXT: lxv v3, 96(r4)
; CHECK-BE-NEXT: lxv v4, 80(r4)
; CHECK-BE-NEXT: lxv v5, 64(r4)
+; CHECK-BE-NEXT: xvcvuxddp vs4, v5
; CHECK-BE-NEXT: lxv v0, 48(r4)
-; CHECK-BE-NEXT: xvcvuxddp vs3, v0
; CHECK-BE-NEXT: lxv v1, 32(r4)
; CHECK-BE-NEXT: lxv v6, 16(r4)
; CHECK-BE-NEXT: lxv v7, 0(r4)
; CHECK-BE-NEXT: xvcvuxddp vs0, v7
; CHECK-BE-NEXT: xvcvuxddp vs1, v6
; CHECK-BE-NEXT: xvcvuxddp vs2, v1
-; CHECK-BE-NEXT: xvcvuxddp vs4, v5
+; CHECK-BE-NEXT: xvcvuxddp vs3, v0
; CHECK-BE-NEXT: xvcvuxddp vs5, v4
; CHECK-BE-NEXT: xvcvuxddp vs6, v3
; CHECK-BE-NEXT: xvcvuxddp vs7, v2
; CHECK-P9-NEXT: lxv v3, 96(r4)
; CHECK-P9-NEXT: lxv v4, 80(r4)
; CHECK-P9-NEXT: lxv v5, 64(r4)
+; CHECK-P9-NEXT: xvcvsxddp vs4, v5
; CHECK-P9-NEXT: lxv v0, 48(r4)
-; CHECK-P9-NEXT: xvcvsxddp vs3, v0
; CHECK-P9-NEXT: lxv v1, 32(r4)
; CHECK-P9-NEXT: lxv v6, 16(r4)
; CHECK-P9-NEXT: lxv v7, 0(r4)
; CHECK-P9-NEXT: xvcvsxddp vs0, v7
; CHECK-P9-NEXT: xvcvsxddp vs1, v6
; CHECK-P9-NEXT: xvcvsxddp vs2, v1
-; CHECK-P9-NEXT: xvcvsxddp vs4, v5
+; CHECK-P9-NEXT: xvcvsxddp vs3, v0
; CHECK-P9-NEXT: xvcvsxddp vs5, v4
; CHECK-P9-NEXT: xvcvsxddp vs6, v3
; CHECK-P9-NEXT: xvcvsxddp vs7, v2
; CHECK-BE-NEXT: lxv v3, 96(r4)
; CHECK-BE-NEXT: lxv v4, 80(r4)
; CHECK-BE-NEXT: lxv v5, 64(r4)
+; CHECK-BE-NEXT: xvcvsxddp vs4, v5
; CHECK-BE-NEXT: lxv v0, 48(r4)
-; CHECK-BE-NEXT: xvcvsxddp vs3, v0
; CHECK-BE-NEXT: lxv v1, 32(r4)
; CHECK-BE-NEXT: lxv v6, 16(r4)
; CHECK-BE-NEXT: lxv v7, 0(r4)
; CHECK-BE-NEXT: xvcvsxddp vs0, v7
; CHECK-BE-NEXT: xvcvsxddp vs1, v6
; CHECK-BE-NEXT: xvcvsxddp vs2, v1
-; CHECK-BE-NEXT: xvcvsxddp vs4, v5
+; CHECK-BE-NEXT: xvcvsxddp vs3, v0
; CHECK-BE-NEXT: xvcvsxddp vs5, v4
; CHECK-BE-NEXT: xvcvsxddp vs6, v3
; CHECK-BE-NEXT: xvcvsxddp vs7, v2