From: Simon Pilgrim Date: Mon, 7 Nov 2022 15:17:23 +0000 (+0000) Subject: [X86] Folded MOVDDUPrm has the same sched behaviour as MOVSHDUPrm/MOVSLDUPrm on Haswe... X-Git-Tag: upstream/17.0.6~28336 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5c0cb75787b9a8a7fd488fb05b8350dc798aee9a;p=platform%2Fupstream%2Fllvm.git [X86] Folded MOVDDUPrm has the same sched behaviour as MOVSHDUPrm/MOVSLDUPrm on Haswell/IceLake There can be a difference for MOVDDUPrr but not the load folded broadcast that is purely on Port23 Fixes an old TODO (inherited from SkylakeServer which was fixed at c7662dc3e52801ec824d8473278fb976107d3e57) Confirmed on Agner + uops.info --- diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 44fc1ac..bd4cbe2 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -864,6 +864,7 @@ def HWWriteResGroup0 : SchedWriteRes<[HWPort23]> { def: InstRW<[HWWriteResGroup0], (instrs VBROADCASTSSrm)>; def: InstRW<[HWWriteResGroup0], (instregex "(V?)MOVSHDUPrm", "(V?)MOVSLDUPrm", + "(V?)MOVDDUPrm", "VPBROADCAST(D|Q)rm")>; def HWWriteResGroup0_1 : SchedWriteRes<[HWPort23]> { @@ -881,13 +882,6 @@ def: InstRW<[HWWriteResGroup0_1], (instrs VBROADCASTF128, def: InstRW<[HWWriteResGroup0_1], (instregex "LD_F(32|64|80)m", "VPBROADCAST(D|Q)Yrm")>; -def HWWriteResGroup0_2 : SchedWriteRes<[HWPort23]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[HWWriteResGroup0_2], (instregex "(V?)MOVDDUPrm")>; - def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> { let Latency = 1; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td index 43fb6ee..331fafa 100644 --- a/llvm/lib/Target/X86/X86SchedIceLake.td +++ b/llvm/lib/Target/X86/X86SchedIceLake.td @@ -1066,13 +1066,6 @@ def ICXWriteResGroup57 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort0156]> { } def: InstRW<[ICXWriteResGroup57], (instregex "LAR(16|32|64)rr")>; -def ICXWriteResGroup58 : SchedWriteRes<[ICXPort23]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[ICXWriteResGroup58], (instregex "(V?)MOVDDUPrm")>; // TODO: Should this be ICXWriteResGroup71? - def ICXWriteResGroup61 : SchedWriteRes<[ICXPort5,ICXPort015]> { let Latency = 5; let NumMicroOps = 2; @@ -1174,8 +1167,10 @@ def: InstRW<[ICXWriteResGroup71], (instrs VBROADCASTSSrm, VPBROADCASTQrm, VMOVSHDUPrm, VMOVSLDUPrm, + VMOVDDUPrm, MOVSHDUPrm, - MOVSLDUPrm)>; + MOVSLDUPrm, + MOVDDUPrm)>; def ICXWriteResGroup72 : SchedWriteRes<[ICXPort5]> { let Latency = 6; diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s index ec5c773..ea7d251 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s @@ -1269,7 +1269,7 @@ vzeroupper # CHECK-NEXT: 1 1 1.00 vmovd %xmm0, %ecx # CHECK-NEXT: 2 1 1.00 * vmovd %xmm0, (%rax) # CHECK-NEXT: 1 1 1.00 vmovddup %xmm0, %xmm2 -# CHECK-NEXT: 1 5 0.50 * vmovddup (%rax), %xmm2 +# CHECK-NEXT: 1 6 0.50 * vmovddup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 vmovddup %ymm0, %ymm2 # CHECK-NEXT: 1 7 0.50 * vmovddup (%rax), %ymm2 # CHECK-NEXT: 1 1 0.33 vmovdqa %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s index 6832def..7085718 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s @@ -58,7 +58,7 @@ mwait # CHECK-NEXT: 1 6 0.50 * lddqu (%rax), %xmm2 # CHECK-NEXT: 1 100 0.25 U monitor # CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2 -# CHECK-NEXT: 1 5 0.50 * movddup (%rax), %xmm2 +# CHECK-NEXT: 1 6 0.50 * movddup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 movshdup %xmm0, %xmm2 # CHECK-NEXT: 1 6 0.50 * movshdup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 movsldup %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s index fa0720f..383ddac 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s @@ -1269,7 +1269,7 @@ vzeroupper # CHECK-NEXT: 1 2 1.00 vmovd %xmm0, %ecx # CHECK-NEXT: 2 1 1.00 * vmovd %xmm0, (%rax) # CHECK-NEXT: 1 1 1.00 vmovddup %xmm0, %xmm2 -# CHECK-NEXT: 1 5 0.50 * vmovddup (%rax), %xmm2 +# CHECK-NEXT: 1 6 0.50 * vmovddup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 vmovddup %ymm0, %ymm2 # CHECK-NEXT: 1 7 0.50 * vmovddup (%rax), %ymm2 # CHECK-NEXT: 1 1 0.33 vmovdqa %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s index e09b9e0..4d19424 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s @@ -58,7 +58,7 @@ mwait # CHECK-NEXT: 1 6 0.50 * lddqu (%rax), %xmm2 # CHECK-NEXT: 1 100 0.25 U monitor # CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2 -# CHECK-NEXT: 1 5 0.50 * movddup (%rax), %xmm2 +# CHECK-NEXT: 1 6 0.50 * movddup (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 movshdup %xmm0, %xmm2 # CHECK-NEXT: 1 6 0.50 * movshdup (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 movsldup %xmm0, %xmm2