multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [1], int UOps = 1,
- int LoadLat = 3> {
+ int LoadLat = 3, int LoadUOps = 0> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
def : WriteRes<SchedRW.Folded, !listconcat([SLM_MEC_RSV], ExePorts)> {
let Latency = !add(Lat, LoadLat);
let ResourceCycles = !listconcat([1], Res);
- let NumMicroOps = UOps;
+ let NumMicroOps = !add(UOps, LoadUOps);
}
}
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteFBlendY>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
-defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 4, [4], 3>;
+defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 4, [4], 2, 3, 1>;
defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : X86WriteResPairUnsupported<WriteFShuffle256>;
defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteBlendY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
-defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 4, [4], 2, 3, 1>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>;
# CHECK-NEXT: 1 4 1.00 * blendpd $11, (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 blendps $11, %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * blendps $11, (%rax), %xmm2
-# CHECK-NEXT: 3 4 4.00 blendvpd %xmm0, %xmm0, %xmm2
+# CHECK-NEXT: 2 4 4.00 blendvpd %xmm0, %xmm0, %xmm2
# CHECK-NEXT: 3 7 4.00 * blendvpd %xmm0, (%rax), %xmm2
-# CHECK-NEXT: 3 4 4.00 blendvps %xmm0, %xmm0, %xmm2
+# CHECK-NEXT: 2 4 4.00 blendvps %xmm0, %xmm0, %xmm2
# CHECK-NEXT: 3 7 4.00 * blendvps %xmm0, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 dppd $22, %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * dppd $22, (%rax), %xmm2
# CHECK-NEXT: 1 10 1.00 * mpsadbw $1, (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 packusdw %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * packusdw (%rax), %xmm2
-# CHECK-NEXT: 1 1 1.00 pblendvb %xmm0, %xmm0, %xmm2
-# CHECK-NEXT: 1 4 1.00 * pblendvb %xmm0, (%rax), %xmm2
+# CHECK-NEXT: 2 4 4.00 pblendvb %xmm0, %xmm0, %xmm2
+# CHECK-NEXT: 3 7 4.00 * pblendvb %xmm0, (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pblendw $11, %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * pblendw $11, (%rax), %xmm2
# CHECK-NEXT: 2 4 2.00 pcmpeqq %xmm0, %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
-# CHECK-NEXT: - - - 90.00 25.00 - - 54.00
+# CHECK-NEXT: - - - 96.00 25.00 - - 54.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
# CHECK-NEXT: - - - 1.00 - - - 1.00 mpsadbw $1, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - packusdw %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 packusdw (%rax), %xmm2
-# CHECK-NEXT: - - - 1.00 - - - - pblendvb %xmm0, %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - - - 1.00 pblendvb %xmm0, (%rax), %xmm2
+# CHECK-NEXT: - - - 4.00 - - - - pblendvb %xmm0, %xmm0, %xmm2
+# CHECK-NEXT: - - - 4.00 - - - 1.00 pblendvb %xmm0, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - pblendw $11, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 pblendw $11, (%rax), %xmm2
# CHECK-NEXT: - - - 2.00 2.00 - - - pcmpeqq %xmm0, %xmm2