Znver1/Znver2 were using vector load latency values (which is what WriteFLoad*/WriteVecLoad* are for) instead of the scalar load latency value
TBH I'm not sure clflush/clzero/prefetch ops should be tagged as WriteLoad but at least this makes us more consistent
def : WriteRes<WriteStore, [ZnAGU]>;
def : WriteRes<WriteStoreNT, [ZnAGU]>;
def : WriteRes<WriteMove, [ZnALU]>;
-def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; }
+def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 4; }
// Model the effect of clobbering the read-write mask operand of the GATHER operation.
// Does not cost anything by itself, only has latency, matching that of the WriteLoad,
def : WriteRes<WriteStore, [Zn2AGU]>;
def : WriteRes<WriteStoreNT, [Zn2AGU]>;
def : WriteRes<WriteMove, [Zn2ALU]>;
-def : WriteRes<WriteLoad, [Zn2AGU]> { let Latency = 8; }
+def : WriteRes<WriteLoad, [Zn2AGU]> { let Latency = 4; }
// Model the effect of clobbering the read-write mask operand of the GATHER operation.
// Does not cost anything by itself, only has latency, matching that of the WriteLoad,
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 8 0.50 * * U clflushopt (%rax)
+# CHECK-NEXT: 1 4 0.50 * * U clflushopt (%rax)
# CHECK: Resources:
# CHECK-NEXT: [0] - ZnAGU0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 8 0.50 U clzero
+# CHECK-NEXT: 1 4 0.50 U clzero
# CHECK: Resources:
# CHECK-NEXT: [0] - ZnAGU0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 8 0.50 * * prefetch (%rax)
-# CHECK-NEXT: 1 8 0.50 * * prefetchw (%rax)
+# CHECK-NEXT: 1 4 0.50 * * prefetch (%rax)
+# CHECK-NEXT: 1 4 0.50 * * prefetchw (%rax)
# CHECK: Resources:
# CHECK-NEXT: [0] - ZnAGU0
# CHECK-NEXT: 1 1 1.00 pmovmskb %mm0, %ecx
# CHECK-NEXT: 1 4 1.00 pmulhuw %mm0, %mm2
# CHECK-NEXT: 1 11 1.00 * pmulhuw (%rax), %mm2
-# CHECK-NEXT: 1 8 0.50 * * prefetcht0 (%rax)
-# CHECK-NEXT: 1 8 0.50 * * prefetcht1 (%rax)
-# CHECK-NEXT: 1 8 0.50 * * prefetcht2 (%rax)
-# CHECK-NEXT: 1 8 0.50 * * prefetchnta (%rax)
+# CHECK-NEXT: 1 4 0.50 * * prefetcht0 (%rax)
+# CHECK-NEXT: 1 4 0.50 * * prefetcht1 (%rax)
+# CHECK-NEXT: 1 4 0.50 * * prefetcht2 (%rax)
+# CHECK-NEXT: 1 4 0.50 * * prefetchnta (%rax)
# CHECK-NEXT: 1 3 1.00 psadbw %mm0, %mm2
# CHECK-NEXT: 1 10 1.00 * psadbw (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 pshufw $1, %mm0, %mm2
# CHECK-NEXT: 1 8 0.50 * andnpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.25 andpd %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * andpd (%rax), %xmm2
-# CHECK-NEXT: 1 8 0.50 * * U clflush (%rax)
+# CHECK-NEXT: 1 4 0.50 * * U clflush (%rax)
# CHECK-NEXT: 1 1 0.50 cmpeqpd %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * cmpeqpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 cmpeqsd %xmm0, %xmm2
# CHECK-NEXT: 2 5 0.50 * movzbw (%rax), %di
# CHECK-NEXT: 1 1 0.25 movsbl %al, %edi
# CHECK-NEXT: 1 1 0.25 movzbl %al, %edi
-# CHECK-NEXT: 1 8 0.50 * movsbl (%rax), %edi
-# CHECK-NEXT: 1 8 0.50 * movzbl (%rax), %edi
+# CHECK-NEXT: 1 4 0.50 * movsbl (%rax), %edi
+# CHECK-NEXT: 1 4 0.50 * movzbl (%rax), %edi
# CHECK-NEXT: 1 1 0.25 movsbq %al, %rdi
# CHECK-NEXT: 1 1 0.25 movzbq %al, %rdi
# CHECK-NEXT: 2 5 0.50 * movsbq (%rax), %rdi
# CHECK-NEXT: 2 5 0.50 * movzbq (%rax), %rdi
# CHECK-NEXT: 1 1 0.25 movswl %ax, %edi
# CHECK-NEXT: 1 1 0.25 movzwl %ax, %edi
-# CHECK-NEXT: 1 8 0.50 * movswl (%rax), %edi
-# CHECK-NEXT: 1 8 0.50 * movzwl (%rax), %edi
+# CHECK-NEXT: 1 4 0.50 * movswl (%rax), %edi
+# CHECK-NEXT: 1 4 0.50 * movzwl (%rax), %edi
# CHECK-NEXT: 1 1 0.25 movswq %ax, %rdi
# CHECK-NEXT: 1 1 0.25 movzwq %ax, %rdi
# CHECK-NEXT: 2 5 0.50 * movswq (%rax), %rdi
# CHECK-NEXT: 1 12 0.50 * U fisttpl (%ecx)
# CHECK-NEXT: 1 12 0.50 * U fisttpll (%eax)
# CHECK-NEXT: 1 1 0.50 U fld %st(0)
-# CHECK-NEXT: 1 8 0.50 * U flds (%edx)
-# CHECK-NEXT: 1 8 0.50 * U fldl (%ecx)
+# CHECK-NEXT: 1 4 0.50 * U flds (%edx)
+# CHECK-NEXT: 1 4 0.50 * U fldl (%ecx)
# CHECK-NEXT: 2 1 0.50 * U fldt (%eax)
# CHECK-NEXT: 1 100 0.25 * U fldcw (%eax)
# CHECK-NEXT: 1 100 0.25 * U fldenv (%eax)
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 8 0.33 * * U clflushopt (%rax)
+# CHECK-NEXT: 1 4 0.33 * * U clflushopt (%rax)
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn2AGU0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 8 0.33 U clzero
+# CHECK-NEXT: 1 4 0.33 U clzero
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn2AGU0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 8 0.33 * * prefetch (%rax)
-# CHECK-NEXT: 1 8 0.33 * * prefetchw (%rax)
+# CHECK-NEXT: 1 4 0.33 * * prefetch (%rax)
+# CHECK-NEXT: 1 4 0.33 * * prefetchw (%rax)
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn2AGU0
# CHECK-NEXT: 1 1 1.00 pmovmskb %mm0, %ecx
# CHECK-NEXT: 1 4 1.00 pmulhuw %mm0, %mm2
# CHECK-NEXT: 1 11 1.00 * pmulhuw (%rax), %mm2
-# CHECK-NEXT: 1 8 0.33 * * prefetcht0 (%rax)
-# CHECK-NEXT: 1 8 0.33 * * prefetcht1 (%rax)
-# CHECK-NEXT: 1 8 0.33 * * prefetcht2 (%rax)
-# CHECK-NEXT: 1 8 0.33 * * prefetchnta (%rax)
+# CHECK-NEXT: 1 4 0.33 * * prefetcht0 (%rax)
+# CHECK-NEXT: 1 4 0.33 * * prefetcht1 (%rax)
+# CHECK-NEXT: 1 4 0.33 * * prefetcht2 (%rax)
+# CHECK-NEXT: 1 4 0.33 * * prefetchnta (%rax)
# CHECK-NEXT: 1 3 1.00 psadbw %mm0, %mm2
# CHECK-NEXT: 1 10 1.00 * psadbw (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 pshufw $1, %mm0, %mm2
# CHECK-NEXT: 1 8 0.33 * andnpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.25 andpd %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.33 * andpd (%rax), %xmm2
-# CHECK-NEXT: 1 8 0.33 * * U clflush (%rax)
+# CHECK-NEXT: 1 4 0.33 * * U clflush (%rax)
# CHECK-NEXT: 1 1 0.50 cmpeqpd %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * cmpeqpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 cmpeqsd %xmm0, %xmm2
# CHECK-NEXT: 2 5 0.33 * movzbw (%rax), %di
# CHECK-NEXT: 1 1 0.25 movsbl %al, %edi
# CHECK-NEXT: 1 1 0.25 movzbl %al, %edi
-# CHECK-NEXT: 1 8 0.33 * movsbl (%rax), %edi
-# CHECK-NEXT: 1 8 0.33 * movzbl (%rax), %edi
+# CHECK-NEXT: 1 4 0.33 * movsbl (%rax), %edi
+# CHECK-NEXT: 1 4 0.33 * movzbl (%rax), %edi
# CHECK-NEXT: 1 1 0.25 movsbq %al, %rdi
# CHECK-NEXT: 1 1 0.25 movzbq %al, %rdi
# CHECK-NEXT: 2 5 0.33 * movsbq (%rax), %rdi
# CHECK-NEXT: 2 5 0.33 * movzbq (%rax), %rdi
# CHECK-NEXT: 1 1 0.25 movswl %ax, %edi
# CHECK-NEXT: 1 1 0.25 movzwl %ax, %edi
-# CHECK-NEXT: 1 8 0.33 * movswl (%rax), %edi
-# CHECK-NEXT: 1 8 0.33 * movzwl (%rax), %edi
+# CHECK-NEXT: 1 4 0.33 * movswl (%rax), %edi
+# CHECK-NEXT: 1 4 0.33 * movzwl (%rax), %edi
# CHECK-NEXT: 1 1 0.25 movswq %ax, %rdi
# CHECK-NEXT: 1 1 0.25 movzwq %ax, %rdi
# CHECK-NEXT: 2 5 0.33 * movswq (%rax), %rdi
# CHECK-NEXT: 1 12 0.50 * U fisttpl (%ecx)
# CHECK-NEXT: 1 12 0.50 * U fisttpll (%eax)
# CHECK-NEXT: 1 1 0.50 U fld %st(0)
-# CHECK-NEXT: 1 8 0.33 * U flds (%edx)
-# CHECK-NEXT: 1 8 0.33 * U fldl (%ecx)
+# CHECK-NEXT: 1 4 0.33 * U flds (%edx)
+# CHECK-NEXT: 1 4 0.33 * U fldl (%ecx)
# CHECK-NEXT: 2 1 0.50 * U fldt (%eax)
# CHECK-NEXT: 1 100 0.25 * U fldcw (%eax)
# CHECK-NEXT: 1 100 0.25 * U fldenv (%eax)