From 8a83f16ccd089224e7e84c0a1524296fccf61419 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 6 Apr 2018 11:00:51 +0000 Subject: [PATCH] [X86][SandyBridge] SBWriteResPair +5cy Memory Folds As mentioned on D44647, this patch increases the default memory latency to +5cy , which more closely matches what most custom cases are doing for reg-mem instructions. I've bumped LoadLatency, ReadAfterLd and WriteLoad values to 5cy to be consistent. As Sandy Bridge is currently our default generic model, this affects a lot of scheduling tests... Differential Revision: https://reviews.llvm.org/D44654 llvm-svn: 329388 --- llvm/lib/Target/X86/X86SchedSandyBridge.td | 12 +- llvm/test/CodeGen/X86/3dnow-schedule.ll | 48 +- llvm/test/CodeGen/X86/adx-schedule.ll | 8 +- llvm/test/CodeGen/X86/avx-schedule.ll | 4 +- llvm/test/CodeGen/X86/avx2-schedule.ll | 290 ++++---- llvm/test/CodeGen/X86/avx512-schedule.ll | 224 +++--- llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll | 852 +++++++++++----------- llvm/test/CodeGen/X86/avx512vpopcntdq-schedule.ll | 24 +- llvm/test/CodeGen/X86/bmi-schedule.ll | 20 +- llvm/test/CodeGen/X86/bmi2-schedule.ll | 32 +- llvm/test/CodeGen/X86/clwb-schedule.ll | 2 +- llvm/test/CodeGen/X86/f16c-schedule.ll | 20 +- llvm/test/CodeGen/X86/fma-schedule.ll | 192 ++--- llvm/test/CodeGen/X86/fma4-schedule.ll | 128 ++-- llvm/test/CodeGen/X86/mmx-schedule.ll | 204 +++--- llvm/test/CodeGen/X86/movbe-schedule.ll | 6 +- llvm/test/CodeGen/X86/schedule-x86_32.ll | 58 +- llvm/test/CodeGen/X86/schedule-x86_64.ll | 224 +++--- llvm/test/CodeGen/X86/sha-schedule.ll | 14 +- llvm/test/CodeGen/X86/sse41-schedule.ll | 10 +- llvm/test/CodeGen/X86/tbm-schedule.ll | 40 +- llvm/test/CodeGen/X86/x87-schedule.ll | 118 +-- llvm/test/CodeGen/X86/xop-schedule.ll | 166 ++--- 23 files changed, 1348 insertions(+), 1348 deletions(-) diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 827e727..a1df50c 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -18,7 +18,7 @@ def SandyBridgeModel : SchedMachineModel { // FIXME: Identify instructions that aren't a single fused micro-op. let IssueWidth = 4; let MicroOpBufferSize = 168; // Based on the reorder buffer. - let LoadLatency = 4; + let LoadLatency = 5; let MispredictPenalty = 16; // Based on the LSD (loop-stream detector) queue size. @@ -63,9 +63,9 @@ def SBDivider : ProcResource<1>; // FP division and sqrt on port 0. def SBFPDivider : ProcResource<1>; -// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4 +// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5 // cycles after the memory operand. -def : ReadAdvance; +def : ReadAdvance; // Many SchedWrites are defined in pairs with and without a folded load. // Instructions with folded loads are usually micro-fused, so they only appear @@ -75,7 +75,7 @@ def : ReadAdvance; multiclass SBWriteResPair ExePorts, int Lat, list Res = [1], int UOps = 1, - int LoadLat = 4> { + int LoadLat = 5> { // Register variant is using a single cycle on ExePort. def : WriteRes { let Latency = Lat; @@ -84,7 +84,7 @@ multiclass SBWriteResPair { let Latency = !add(Lat, LoadLat); let ResourceCycles = !listconcat([1], Res); @@ -97,7 +97,7 @@ multiclass SBWriteResPair; def : WriteRes; -def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 5; } def : WriteRes; def : WriteRes; diff --git a/llvm/test/CodeGen/X86/3dnow-schedule.ll b/llvm/test/CodeGen/X86/3dnow-schedule.ll index d454339..9369713 100644 --- a/llvm/test/CodeGen/X86/3dnow-schedule.ll +++ b/llvm/test/CodeGen/X86/3dnow-schedule.ll @@ -15,7 +15,7 @@ define i64 @test_pavgusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pavgusb: ; CHECK: # %bb.0: ; CHECK-NEXT: pavgusb %mm1, %mm0 # sched: [5:1.00] -; CHECK-NEXT: pavgusb (%rdi), %mm0 # sched: [9:1.00] +; CHECK-NEXT: pavgusb (%rdi), %mm0 # sched: [10:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a0, x86_mmx %a1) @@ -29,7 +29,7 @@ declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pf2id(x86_mmx* %a0) optsize { ; CHECK-LABEL: test_pf2id: ; CHECK: # %bb.0: -; CHECK-NEXT: pf2id (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pf2id (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: pf2id %mm0, %mm0 # sched: [3:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] @@ -44,7 +44,7 @@ declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone define i64 @test_pf2iw(x86_mmx* %a0) optsize { ; CHECK-LABEL: test_pf2iw: ; CHECK: # %bb.0: -; CHECK-NEXT: pf2iw (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pf2iw (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: pf2iw %mm0, %mm0 # sched: [3:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] @@ -60,7 +60,7 @@ define i64 @test_pfacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfacc: ; CHECK: # %bb.0: ; CHECK-NEXT: pfacc %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfacc (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfacc (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %a0, x86_mmx %a1) @@ -75,7 +75,7 @@ define i64 @test_pfadd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfadd: ; CHECK: # %bb.0: ; CHECK-NEXT: pfadd %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfadd (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfadd (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %a0, x86_mmx %a1) @@ -90,7 +90,7 @@ define i64 @test_pfcmpeq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfcmpeq: ; CHECK: # %bb.0: ; CHECK-NEXT: pfcmpeq %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfcmpeq (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfcmpeq (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %a0, x86_mmx %a1) @@ -105,7 +105,7 @@ define i64 @test_pfcmpge(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfcmpge: ; CHECK: # %bb.0: ; CHECK-NEXT: pfcmpge %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfcmpge (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfcmpge (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %a0, x86_mmx %a1) @@ -120,7 +120,7 @@ define i64 @test_pfcmpgt(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfcmpgt: ; CHECK: # %bb.0: ; CHECK-NEXT: pfcmpgt %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfcmpgt (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfcmpgt (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %a0, x86_mmx %a1) @@ -135,7 +135,7 @@ define i64 @test_pfmax(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfmax: ; CHECK: # %bb.0: ; CHECK-NEXT: pfmax %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfmax (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfmax (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %a0, x86_mmx %a1) @@ -150,7 +150,7 @@ define i64 @test_pfmin(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfmin: ; CHECK: # %bb.0: ; CHECK-NEXT: pfmin %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfmin (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfmin (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %a0, x86_mmx %a1) @@ -165,7 +165,7 @@ define i64 @test_pfmul(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfmul: ; CHECK: # %bb.0: ; CHECK-NEXT: pfmul %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfmul (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfmul (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %a0, x86_mmx %a1) @@ -180,7 +180,7 @@ define i64 @test_pfnacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfnacc: ; CHECK: # %bb.0: ; CHECK-NEXT: pfnacc %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfnacc (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfnacc (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %a0, x86_mmx %a1) @@ -195,7 +195,7 @@ define i64 @test_pfpnacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfpnacc: ; CHECK: # %bb.0: ; CHECK-NEXT: pfpnacc %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfpnacc (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfpnacc (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %a0, x86_mmx %a1) @@ -209,7 +209,7 @@ declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfrcp(x86_mmx* %a0) optsize { ; CHECK-LABEL: test_pfrcp: ; CHECK: # %bb.0: -; CHECK-NEXT: pfrcp (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfrcp (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: pfrcp %mm0, %mm0 # sched: [3:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] @@ -225,7 +225,7 @@ define i64 @test_pfrcpit1(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfrcpit1: ; CHECK: # %bb.0: ; CHECK-NEXT: pfrcpit1 %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfrcpit1 (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfrcpit1 (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %a0, x86_mmx %a1) @@ -240,7 +240,7 @@ define i64 @test_pfrcpit2(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfrcpit2: ; CHECK: # %bb.0: ; CHECK-NEXT: pfrcpit2 %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfrcpit2 (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfrcpit2 (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %a0, x86_mmx %a1) @@ -255,7 +255,7 @@ define i64 @test_pfrsqit1(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfrsqit1: ; CHECK: # %bb.0: ; CHECK-NEXT: pfrsqit1 %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfrsqit1 (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfrsqit1 (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %a0, x86_mmx %a1) @@ -269,7 +269,7 @@ declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfrsqrt(x86_mmx* %a0) optsize { ; CHECK-LABEL: test_pfrsqrt: ; CHECK: # %bb.0: -; CHECK-NEXT: pfrsqrt (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfrsqrt (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: pfrsqrt %mm0, %mm0 # sched: [3:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] @@ -285,7 +285,7 @@ define i64 @test_pfsub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfsub: ; CHECK: # %bb.0: ; CHECK-NEXT: pfsub %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfsub (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfsub (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %a0, x86_mmx %a1) @@ -300,7 +300,7 @@ define i64 @test_pfsubr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfsubr: ; CHECK: # %bb.0: ; CHECK-NEXT: pfsubr %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfsubr (%rdi), %mm0 # sched: [7:1.00] +; CHECK-NEXT: pfsubr (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %a0, x86_mmx %a1) @@ -314,7 +314,7 @@ declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pi2fd(x86_mmx* %a0) optsize { ; CHECK-LABEL: test_pi2fd: ; CHECK: # %bb.0: -; CHECK-NEXT: pi2fd (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pi2fd (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: pi2fd %mm0, %mm0 # sched: [4:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] @@ -329,7 +329,7 @@ declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone define i64 @test_pi2fw(x86_mmx* %a0) optsize { ; CHECK-LABEL: test_pi2fw: ; CHECK: # %bb.0: -; CHECK-NEXT: pi2fw (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pi2fw (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: pi2fw %mm0, %mm0 # sched: [4:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] @@ -345,7 +345,7 @@ define i64 @test_pmulhrw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pmulhrw: ; CHECK: # %bb.0: ; CHECK-NEXT: pmulhrw %mm1, %mm0 # sched: [5:1.00] -; CHECK-NEXT: pmulhrw (%rdi), %mm0 # sched: [9:1.00] +; CHECK-NEXT: pmulhrw (%rdi), %mm0 # sched: [10:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a0, x86_mmx %a1) @@ -381,7 +381,7 @@ define void @test_prefetchw(i8* %a0) optsize { define i64 @test_pswapd(x86_mmx* %a0) optsize { ; CHECK-LABEL: test_pswapd: ; CHECK: # %bb.0: -; CHECK-NEXT: pswapd (%rdi), %mm0 # mm0 = mem[1,0] sched: [5:1.00] +; CHECK-NEXT: pswapd (%rdi), %mm0 # mm0 = mem[1,0] sched: [6:1.00] ; CHECK-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0] sched: [1:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] diff --git a/llvm/test/CodeGen/X86/adx-schedule.ll b/llvm/test/CodeGen/X86/adx-schedule.ll index 13166f7..7741071 100644 --- a/llvm/test/CodeGen/X86/adx-schedule.ll +++ b/llvm/test/CodeGen/X86/adx-schedule.ll @@ -12,8 +12,8 @@ define void @test_adcx(i32 %a0, i32* %a1, i64 %a2, i64* %a3) optsize { ; GENERIC-NEXT: #APP ; GENERIC-NEXT: adcxl %edi, %edi # sched: [1:0.33] ; GENERIC-NEXT: adcxq %rdx, %rdx # sched: [1:0.33] -; GENERIC-NEXT: adcxl (%rsi), %edi # sched: [5:0.50] -; GENERIC-NEXT: adcxq (%rcx), %rdx # sched: [5:0.50] +; GENERIC-NEXT: adcxl (%rsi), %edi # sched: [6:0.50] +; GENERIC-NEXT: adcxq (%rcx), %rdx # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -65,8 +65,8 @@ define void @test_adox(i32 %a0, i32* %a1, i64 %a2, i64* %a3) optsize { ; GENERIC-NEXT: #APP ; GENERIC-NEXT: adoxl %edi, %edi # sched: [1:0.33] ; GENERIC-NEXT: adoxq %rdx, %rdx # sched: [1:0.33] -; GENERIC-NEXT: adoxl (%rsi), %edi # sched: [5:0.50] -; GENERIC-NEXT: adoxq (%rcx), %rdx # sched: [5:0.50] +; GENERIC-NEXT: adoxl (%rsi), %edi # sched: [6:0.50] +; GENERIC-NEXT: adoxq (%rcx), %rdx # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index eff1d6c..26af928 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -1607,13 +1607,13 @@ define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2 ; GENERIC-LABEL: test_dpps: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00] -; GENERIC-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_dpps: ; SANDY: # %bb.0: ; SANDY-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00] -; SANDY-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_dpps: diff --git a/llvm/test/CodeGen/X86/avx2-schedule.ll b/llvm/test/CodeGen/X86/avx2-schedule.ll index 1d1bf6e..0612c01 100644 --- a/llvm/test/CodeGen/X86/avx2-schedule.ll +++ b/llvm/test/CodeGen/X86/avx2-schedule.ll @@ -9,7 +9,7 @@ define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) { ; GENERIC-LABEL: test_broadcasti128: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [5:1.00] +; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:1.00] ; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -236,7 +236,7 @@ define <4 x i32> @test_extracti128(<8 x i32> %a0, <8 x i32> %a1, <4 x i32> *%a2) define <2 x double> @test_gatherdpd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3) { ; GENERIC-LABEL: test_gatherdpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50] +; GENERIC-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_gatherdpd: @@ -271,7 +271,7 @@ declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 define <4 x double> @test_gatherdpd_ymm(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3) { ; GENERIC-LABEL: test_gatherdpd_ymm: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [4:0.50] +; GENERIC-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [5:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_gatherdpd_ymm: @@ -306,7 +306,7 @@ declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32> define <4 x float> @test_gatherdps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3) { ; GENERIC-LABEL: test_gatherdps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50] +; GENERIC-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_gatherdps: @@ -341,7 +341,7 @@ declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x define <8 x float> @test_gatherdps_ymm(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3) { ; GENERIC-LABEL: test_gatherdps_ymm: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [4:0.50] +; GENERIC-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [5:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_gatherdps_ymm: @@ -376,7 +376,7 @@ declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, define <2 x double> @test_gatherqpd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3) { ; GENERIC-LABEL: test_gatherqpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50] +; GENERIC-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_gatherqpd: @@ -411,7 +411,7 @@ declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 define <4 x double> @test_gatherqpd_ymm(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3) { ; GENERIC-LABEL: test_gatherqpd_ymm: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [4:0.50] +; GENERIC-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [5:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_gatherqpd_ymm: @@ -446,7 +446,7 @@ declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64> define <4 x float> @test_gatherqps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3) { ; GENERIC-LABEL: test_gatherqps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50] +; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_gatherqps: @@ -481,7 +481,7 @@ declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x define <4 x float> @test_gatherqps_ymm(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3) { ; GENERIC-LABEL: test_gatherqps_ymm: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [4:0.50] +; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [5:0.50] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -523,7 +523,7 @@ define <8 x i32> @test_inserti128(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) ; GENERIC-LABEL: test_inserti128: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -609,7 +609,7 @@ define <16 x i16> @test_mpsadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_mpsadbw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; GENERIC-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mpsadbw: @@ -653,7 +653,7 @@ define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) { ; GENERIC-LABEL: test_pabsb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpabsb %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [7:1.00] +; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:1.00] ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -703,7 +703,7 @@ define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) { ; GENERIC-LABEL: test_pabsd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpabsd %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [7:1.00] +; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:1.00] ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -753,7 +753,7 @@ define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) { ; GENERIC-LABEL: test_pabsw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpabsw %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [7:1.00] +; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:1.00] ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -803,7 +803,7 @@ define <16 x i16> @test_packssdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_packssdw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_packssdw: @@ -847,7 +847,7 @@ define <32 x i8> @test_packsswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_packsswb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_packsswb: @@ -891,7 +891,7 @@ define <16 x i16> @test_packusdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_packusdw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_packusdw: @@ -935,7 +935,7 @@ define <32 x i8> @test_packuswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_packuswb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_packuswb: @@ -979,7 +979,7 @@ define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_paddb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddb: @@ -1021,7 +1021,7 @@ define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_paddd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddd: @@ -1063,7 +1063,7 @@ define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_paddq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddq: @@ -1105,7 +1105,7 @@ define <32 x i8> @test_paddsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_paddsb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddsb: @@ -1148,7 +1148,7 @@ define <16 x i16> @test_paddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_paddsw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddsw: @@ -1191,7 +1191,7 @@ define <32 x i8> @test_paddusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_paddusb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddusb: @@ -1234,7 +1234,7 @@ define <16 x i16> @test_paddusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_paddusw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddusw: @@ -1277,7 +1277,7 @@ define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { ; GENERIC-LABEL: test_paddw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddw: @@ -1368,7 +1368,7 @@ define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_pand: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1417,7 +1417,7 @@ define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_pandn: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [5:1.00] +; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [6:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1468,7 +1468,7 @@ define <32 x i8> @test_pavgb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_pavgb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pavgb: @@ -1520,7 +1520,7 @@ define <16 x i16> @test_pavgw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { ; GENERIC-LABEL: test_pavgw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pavgw: @@ -1572,7 +1572,7 @@ define <4 x i32> @test_pblendd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_pblendd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.50] -; GENERIC-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [5:0.50] +; GENERIC-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [6:0.50] ; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1621,7 +1621,7 @@ define <8 x i32> @test_pblendd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) ; GENERIC-LABEL: test_pblendd_ymm: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50] -; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [5:0.50] +; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [6:0.50] ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1670,7 +1670,7 @@ define <32 x i8> @test_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2, <32 ; GENERIC-LABEL: test_pblendvb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; GENERIC-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pblendvb: @@ -1713,7 +1713,7 @@ define <16 x i16> @test_pblendw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_pblendw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:0.50] -; GENERIC-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [5:0.50] +; GENERIC-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [6:0.50] ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1762,7 +1762,7 @@ define <16 x i8> @test_pbroadcastb(<16 x i8> %a0, <16 x i8> *%a1) { ; GENERIC-LABEL: test_pbroadcastb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [5:1.00] +; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [6:1.00] ; GENERIC-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1811,7 +1811,7 @@ define <32 x i8> @test_pbroadcastb_ymm(<32 x i8> %a0, <32 x i8> *%a1) { ; GENERIC-LABEL: test_pbroadcastb_ymm: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [5:1.00] +; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [6:1.00] ; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1860,7 +1860,7 @@ define <4 x i32> @test_pbroadcastd(<4 x i32> %a0, <4 x i32> *%a1) { ; GENERIC-LABEL: test_pbroadcastd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [5:1.00] +; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:1.00] ; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1909,7 +1909,7 @@ define <8 x i32> @test_pbroadcastd_ymm(<8 x i32> %a0, <8 x i32> *%a1) { ; GENERIC-LABEL: test_pbroadcastd_ymm: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [5:1.00] +; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [6:1.00] ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1958,7 +1958,7 @@ define <2 x i64> @test_pbroadcastq(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC-LABEL: test_pbroadcastq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [5:1.00] +; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:1.00] ; GENERIC-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2007,7 +2007,7 @@ define <4 x i64> @test_pbroadcastq_ymm(<4 x i64> %a0, <4 x i64> *%a1) { ; GENERIC-LABEL: test_pbroadcastq_ymm: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [5:1.00] +; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [6:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2056,7 +2056,7 @@ define <8 x i16> @test_pbroadcastw(<8 x i16> %a0, <8 x i16> *%a1) { ; GENERIC-LABEL: test_pbroadcastw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [5:1.00] +; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [6:1.00] ; GENERIC-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2105,7 +2105,7 @@ define <16 x i16> @test_pbroadcastw_ymm(<16 x i16> %a0, <16 x i16> *%a1) { ; GENERIC-LABEL: test_pbroadcastw_ymm: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [5:1.00] +; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [6:1.00] ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2154,7 +2154,7 @@ define <32 x i8> @test_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_pcmpeqb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpeqb: @@ -2198,7 +2198,7 @@ define <8 x i32> @test_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_pcmpeqd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpeqd: @@ -2242,7 +2242,7 @@ define <4 x i64> @test_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_pcmpeqq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpeqq: @@ -2286,7 +2286,7 @@ define <16 x i16> @test_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_pcmpeqw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpeqw: @@ -2330,7 +2330,7 @@ define <32 x i8> @test_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_pcmpgtb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpgtb: @@ -2374,7 +2374,7 @@ define <8 x i32> @test_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_pcmpgtd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpgtd: @@ -2418,7 +2418,7 @@ define <4 x i64> @test_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_pcmpgtq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpgtq: @@ -2462,7 +2462,7 @@ define <16 x i16> @test_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_pcmpgtw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpgtw: @@ -2506,7 +2506,7 @@ define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_perm2i128: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [5:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2555,7 +2555,7 @@ define <8 x i32> @test_permd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_permd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2605,7 +2605,7 @@ define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) { ; GENERIC-LABEL: test_permpd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [6:1.00] ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2654,7 +2654,7 @@ define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2 ; GENERIC-LABEL: test_permps: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2704,7 +2704,7 @@ define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) { ; GENERIC-LABEL: test_permq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [5:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [6:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2752,7 +2752,7 @@ define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) { define <4 x i32> @test_pgatherdd(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3) { ; GENERIC-LABEL: test_pgatherdd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50] +; GENERIC-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pgatherdd: @@ -2787,7 +2787,7 @@ declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32> define <8 x i32> @test_pgatherdd_ymm(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3) { ; GENERIC-LABEL: test_pgatherdd_ymm: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [4:0.50] +; GENERIC-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pgatherdd_ymm: @@ -2822,7 +2822,7 @@ declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x define <2 x i64> @test_pgatherdq(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3) { ; GENERIC-LABEL: test_pgatherdq: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50] +; GENERIC-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pgatherdq: @@ -2857,7 +2857,7 @@ declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64> define <4 x i64> @test_pgatherdq_ymm(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3) { ; GENERIC-LABEL: test_pgatherdq_ymm: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [4:0.50] +; GENERIC-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [5:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pgatherdq_ymm: @@ -2892,7 +2892,7 @@ declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x define <4 x i32> @test_pgatherqd(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3) { ; GENERIC-LABEL: test_pgatherqd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50] +; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pgatherqd: @@ -2927,7 +2927,7 @@ declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32> define <4 x i32> @test_pgatherqd_ymm(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3) { ; GENERIC-LABEL: test_pgatherqd_ymm: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [4:0.50] +; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2968,7 +2968,7 @@ declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x define <2 x i64> @test_pgatherqq(<2 x i64> %a0, i8 *%a1, <2 x i64> %a2, <2 x i64> %a3) { ; GENERIC-LABEL: test_pgatherqq: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50] +; GENERIC-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pgatherqq: @@ -3003,7 +3003,7 @@ declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64> define <4 x i64> @test_pgatherqq_ymm(<4 x i64> %a0, i8 *%a1, <4 x i64> %a2, <4 x i64> %a3) { ; GENERIC-LABEL: test_pgatherqq_ymm: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [4:0.50] +; GENERIC-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pgatherqq_ymm: @@ -3039,7 +3039,7 @@ define <8 x i32> @test_phaddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_phaddd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; GENERIC-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phaddd: @@ -3082,7 +3082,7 @@ define <16 x i16> @test_phaddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_phaddsw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phaddsw: @@ -3125,7 +3125,7 @@ define <16 x i16> @test_phaddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_phaddw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; GENERIC-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phaddw: @@ -3168,7 +3168,7 @@ define <8 x i32> @test_phsubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_phsubd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; GENERIC-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phsubd: @@ -3211,7 +3211,7 @@ define <16 x i16> @test_phsubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_phsubsw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phsubsw: @@ -3254,7 +3254,7 @@ define <16 x i16> @test_phsubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_phsubw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; GENERIC-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phsubw: @@ -3297,7 +3297,7 @@ define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) ; GENERIC-LABEL: test_pmaddubsw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaddubsw: @@ -3341,7 +3341,7 @@ define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_pmaddwd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaddwd: @@ -3581,7 +3581,7 @@ define <32 x i8> @test_pmaxsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_pmaxsb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxsb: @@ -3624,7 +3624,7 @@ define <8 x i32> @test_pmaxsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_pmaxsd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxsd: @@ -3667,7 +3667,7 @@ define <16 x i16> @test_pmaxsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_pmaxsw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxsw: @@ -3710,7 +3710,7 @@ define <32 x i8> @test_pmaxub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_pmaxub: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxub: @@ -3753,7 +3753,7 @@ define <8 x i32> @test_pmaxud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_pmaxud: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxud: @@ -3796,7 +3796,7 @@ define <16 x i16> @test_pmaxuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_pmaxuw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxuw: @@ -3839,7 +3839,7 @@ define <32 x i8> @test_pminsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_pminsb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminsb: @@ -3882,7 +3882,7 @@ define <8 x i32> @test_pminsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_pminsd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminsd: @@ -3925,7 +3925,7 @@ define <16 x i16> @test_pminsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_pminsw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminsw: @@ -3968,7 +3968,7 @@ define <32 x i8> @test_pminub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_pminub: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminub: @@ -4011,7 +4011,7 @@ define <8 x i32> @test_pminud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_pminud: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminud: @@ -4054,7 +4054,7 @@ define <16 x i16> @test_pminuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_pminuw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminuw: @@ -4138,7 +4138,7 @@ define <8 x i32> @test_pmovsxbd(<16 x i8> %a0, <16 x i8> *%a1) { ; GENERIC-LABEL: test_pmovsxbd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [6:1.00] ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4189,7 +4189,7 @@ define <4 x i64> @test_pmovsxbq(<16 x i8> %a0, <16 x i8> *%a1) { ; GENERIC-LABEL: test_pmovsxbq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [6:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4240,7 +4240,7 @@ define <16 x i16> @test_pmovsxbw(<16 x i8> %a0, <16 x i8> *%a1) { ; GENERIC-LABEL: test_pmovsxbw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [6:1.00] ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4289,7 +4289,7 @@ define <4 x i64> @test_pmovsxdq(<4 x i32> %a0, <4 x i32> *%a1) { ; GENERIC-LABEL: test_pmovsxdq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [6:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4338,7 +4338,7 @@ define <8 x i32> @test_pmovsxwd(<8 x i16> %a0, <8 x i16> *%a1) { ; GENERIC-LABEL: test_pmovsxwd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [6:1.00] ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4387,7 +4387,7 @@ define <4 x i64> @test_pmovsxwq(<8 x i16> %a0, <8 x i16> *%a1) { ; GENERIC-LABEL: test_pmovsxwq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [6:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4438,7 +4438,7 @@ define <8 x i32> @test_pmovzxbd(<16 x i8> %a0, <16 x i8> *%a1) { ; GENERIC-LABEL: test_pmovzxbd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00] ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4489,7 +4489,7 @@ define <4 x i64> @test_pmovzxbq(<16 x i8> %a0, <16 x i8> *%a1) { ; GENERIC-LABEL: test_pmovzxbq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4540,7 +4540,7 @@ define <16 x i16> @test_pmovzxbw(<16 x i8> %a0, <16 x i8> *%a1) { ; GENERIC-LABEL: test_pmovzxbw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00] ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4589,7 +4589,7 @@ define <4 x i64> @test_pmovzxdq(<4 x i32> %a0, <4 x i32> *%a1) { ; GENERIC-LABEL: test_pmovzxdq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4638,7 +4638,7 @@ define <8 x i32> @test_pmovzxwd(<8 x i16> %a0, <8 x i16> *%a1) { ; GENERIC-LABEL: test_pmovzxwd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4687,7 +4687,7 @@ define <4 x i64> @test_pmovzxwq(<8 x i16> %a0, <8 x i16> *%a1) { ; GENERIC-LABEL: test_pmovzxwq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4738,7 +4738,7 @@ define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_pmuldq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmuldq: @@ -4782,7 +4782,7 @@ define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2 ; GENERIC-LABEL: test_pmulhrsw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmulhrsw: @@ -4825,7 +4825,7 @@ define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_pmulhuw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmulhuw: @@ -4868,7 +4868,7 @@ define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_pmulhw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmulhw: @@ -4953,7 +4953,7 @@ define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_pmullw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmullw: @@ -4995,7 +4995,7 @@ define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_pmuludq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmuludq: @@ -5039,7 +5039,7 @@ define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_por: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5088,7 +5088,7 @@ define <4 x i64> @test_psadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_psadbw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psadbw: @@ -5132,7 +5132,7 @@ define <32 x i8> @test_pshufb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_pshufb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pshufb: @@ -5175,7 +5175,7 @@ define <8 x i32> @test_pshufd(<8 x i32> %a0, <8 x i32> *%a1) { ; GENERIC-LABEL: test_pshufd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [6:1.00] ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5224,7 +5224,7 @@ define <16 x i16> @test_pshufhw(<16 x i16> %a0, <16 x i16> *%a1) { ; GENERIC-LABEL: test_pshufhw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [6:1.00] ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5273,7 +5273,7 @@ define <16 x i16> @test_pshuflw(<16 x i16> %a0, <16 x i16> *%a1) { ; GENERIC-LABEL: test_pshuflw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5322,7 +5322,7 @@ define <32 x i8> @test_psignb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_psignb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psignb: @@ -5365,7 +5365,7 @@ define <8 x i32> @test_psignd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_psignd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psignd: @@ -5408,7 +5408,7 @@ define <16 x i16> @test_psignw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_psignw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psignw: @@ -5451,7 +5451,7 @@ define <8 x i32> @test_pslld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_pslld: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5535,7 +5535,7 @@ define <4 x i64> @test_psllq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-LABEL: test_psllq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5585,7 +5585,7 @@ define <4 x i32> @test_psllvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_psllvd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psllvd: @@ -5628,7 +5628,7 @@ define <8 x i32> @test_psllvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) ; GENERIC-LABEL: test_psllvd_ymm: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psllvd_ymm: @@ -5671,7 +5671,7 @@ define <2 x i64> @test_psllvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-LABEL: test_psllvq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psllvq: @@ -5714,7 +5714,7 @@ define <4 x i64> @test_psllvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) ; GENERIC-LABEL: test_psllvq_ymm: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psllvq_ymm: @@ -5757,7 +5757,7 @@ define <16 x i16> @test_psllw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_psllw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5807,7 +5807,7 @@ define <8 x i32> @test_psrad(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_psrad: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5857,7 +5857,7 @@ define <4 x i32> @test_psravd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_psravd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psravd: @@ -5900,7 +5900,7 @@ define <8 x i32> @test_psravd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) ; GENERIC-LABEL: test_psravd_ymm: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psravd_ymm: @@ -5943,7 +5943,7 @@ define <16 x i16> @test_psraw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_psraw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5993,7 +5993,7 @@ define <8 x i32> @test_psrld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_psrld: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6077,7 +6077,7 @@ define <4 x i64> @test_psrlq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-LABEL: test_psrlq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6127,7 +6127,7 @@ define <4 x i32> @test_psrlvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_psrlvd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrlvd: @@ -6170,7 +6170,7 @@ define <8 x i32> @test_psrlvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) ; GENERIC-LABEL: test_psrlvd_ymm: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrlvd_ymm: @@ -6213,7 +6213,7 @@ define <2 x i64> @test_psrlvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-LABEL: test_psrlvq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrlvq: @@ -6256,7 +6256,7 @@ define <4 x i64> @test_psrlvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) ; GENERIC-LABEL: test_psrlvq_ymm: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrlvq_ymm: @@ -6299,7 +6299,7 @@ define <16 x i16> @test_psrlw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_psrlw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6349,7 +6349,7 @@ define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_psubb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubb: @@ -6391,7 +6391,7 @@ define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_psubd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubd: @@ -6433,7 +6433,7 @@ define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_psubq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubq: @@ -6475,7 +6475,7 @@ define <32 x i8> @test_psubsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_psubsb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubsb: @@ -6518,7 +6518,7 @@ define <16 x i16> @test_psubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_psubsw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubsw: @@ -6561,7 +6561,7 @@ define <32 x i8> @test_psubusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_psubusb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubusb: @@ -6604,7 +6604,7 @@ define <16 x i16> @test_psubusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; GENERIC-LABEL: test_psubusw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubusw: @@ -6647,7 +6647,7 @@ define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { ; GENERIC-LABEL: test_psubw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubw: @@ -6689,7 +6689,7 @@ define <32 x i8> @test_punpckhbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_punpckhbw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] -; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [5:1.00] +; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpckhbw: @@ -6731,7 +6731,7 @@ define <8 x i32> @test_punpckhdq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_punpckhdq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00] +; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00] ; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6786,7 +6786,7 @@ define <4 x i64> @test_punpckhqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) ; GENERIC-LABEL: test_punpckhqdq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [5:1.00] +; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6835,7 +6835,7 @@ define <16 x i16> @test_punpckhwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a ; GENERIC-LABEL: test_punpckhwd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] -; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [5:1.00] +; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpckhwd: @@ -6877,7 +6877,7 @@ define <32 x i8> @test_punpcklbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; GENERIC-LABEL: test_punpcklbw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] -; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [5:1.00] +; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpcklbw: @@ -6919,7 +6919,7 @@ define <8 x i32> @test_punpckldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_punpckldq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [5:1.00] +; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00] ; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00] ; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6974,7 +6974,7 @@ define <4 x i64> @test_punpcklqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) ; GENERIC-LABEL: test_punpcklqdq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [5:1.00] +; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00] ; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7023,7 +7023,7 @@ define <16 x i16> @test_punpcklwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a ; GENERIC-LABEL: test_punpcklwd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] -; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [5:1.00] +; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpcklwd: @@ -7065,7 +7065,7 @@ define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_pxor: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index 41ea3c3..7b7651c 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -22,7 +22,7 @@ entry: define <8 x double> @addpd512fold(<8 x double> %y) { ; GENERIC-LABEL: addpd512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: addpd512fold: @@ -52,7 +52,7 @@ entry: define <16 x float> @addps512fold(<16 x float> %y) { ; GENERIC-LABEL: addps512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: addps512fold: @@ -82,7 +82,7 @@ entry: define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) { ; GENERIC-LABEL: subpd512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: subpd512fold: @@ -113,7 +113,7 @@ entry: define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) { ; GENERIC-LABEL: subps512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: subps512fold: @@ -186,7 +186,7 @@ entry: define <8 x double> @mulpd512fold(<8 x double> %y) { ; GENERIC-LABEL: mulpd512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [9:1.00] +; GENERIC-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mulpd512fold: @@ -216,7 +216,7 @@ entry: define <16 x float> @mulps512fold(<16 x float> %y) { ; GENERIC-LABEL: mulps512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [9:1.00] +; GENERIC-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mulps512fold: @@ -246,7 +246,7 @@ entry: define <8 x double> @divpd512fold(<8 x double> %y) { ; GENERIC-LABEL: divpd512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [28:1.00] +; GENERIC-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [29:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: divpd512fold: @@ -276,7 +276,7 @@ entry: define <16 x float> @divps512fold(<16 x float> %y) { ; GENERIC-LABEL: divps512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [28:1.00] +; GENERIC-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [29:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: divps512fold: @@ -305,7 +305,7 @@ define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { ; GENERIC-LABEL: vpaddq_fold_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddq_fold_test: @@ -320,7 +320,7 @@ define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { ; GENERIC-LABEL: vpaddq_broadcast_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddq_broadcast_test: @@ -334,7 +334,7 @@ define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { ; GENERIC-LABEL: vpaddq_broadcast2_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddq_broadcast2_test: @@ -371,7 +371,7 @@ define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { ; GENERIC-LABEL: vpaddd_fold_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddd_fold_test: @@ -386,7 +386,7 @@ define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { ; GENERIC-LABEL: vpaddd_broadcast_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddd_broadcast_test: @@ -437,7 +437,7 @@ define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 ; GENERIC-LABEL: vpaddd_mask_fold_test: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [7:1.00] +; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddd_mask_fold_test: @@ -456,7 +456,7 @@ define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) ; GENERIC-LABEL: vpaddd_mask_broadcast_test: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [7:1.00] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddd_mask_broadcast_test: @@ -474,7 +474,7 @@ define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 ; GENERIC-LABEL: vpaddd_maskz_fold_test: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00] +; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddd_maskz_fold_test: @@ -493,7 +493,7 @@ define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) ; GENERIC-LABEL: vpaddd_maskz_broadcast_test: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpaddd_maskz_broadcast_test: @@ -629,7 +629,7 @@ define <8 x double> @sqrtE(<8 x double> %a) nounwind { define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { ; GENERIC-LABEL: fadd_broadcast: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: fadd_broadcast: @@ -643,7 +643,7 @@ define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { ; GENERIC-LABEL: addq_broadcast: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: addq_broadcast: @@ -657,7 +657,7 @@ define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { ; GENERIC-LABEL: orq_broadcast: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: orq_broadcast: @@ -671,7 +671,7 @@ define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { ; GENERIC-LABEL: andd512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: andd512fold: @@ -687,7 +687,7 @@ entry: define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { ; GENERIC-LABEL: andqbrst: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: andqbrst: @@ -892,7 +892,7 @@ define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, < ; GENERIC-LABEL: test_mask_fold_vaddpd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [7:1.00] +; GENERIC-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mask_fold_vaddpd: @@ -911,7 +911,7 @@ define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, < ; GENERIC-LABEL: test_maskz_fold_vaddpd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00] +; GENERIC-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_maskz_fold_vaddpd: @@ -929,7 +929,7 @@ define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, < define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind { ; GENERIC-LABEL: test_broadcast_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_broadcast_vaddpd: @@ -948,7 +948,7 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> ; GENERIC-LABEL: test_mask_broadcast_vaddpd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [7:1.00] +; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -972,7 +972,7 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, ; GENERIC-LABEL: test_maskz_broadcast_vaddpd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00] +; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_maskz_broadcast_vaddpd: @@ -994,7 +994,7 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, define <16 x float> @test_fxor(<16 x float> %a) { ; GENERIC-LABEL: test_fxor: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_fxor: @@ -1009,7 +1009,7 @@ define <16 x float> @test_fxor(<16 x float> %a) { define <8 x float> @test_fxor_8f32(<8 x float> %a) { ; GENERIC-LABEL: test_fxor_8f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_fxor_8f32: @@ -1023,7 +1023,7 @@ define <8 x float> @test_fxor_8f32(<8 x float> %a) { define <8 x double> @fabs_v8f64(<8 x double> %p) ; GENERIC-LABEL: fabs_v8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: fabs_v8f64: @@ -1039,7 +1039,7 @@ declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) define <16 x float> @fabs_v16f32(<16 x float> %p) ; GENERIC-LABEL: fabs_v16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: fabs_v16f32: @@ -1403,7 +1403,7 @@ define <2 x float> @sltof2f32(<2 x i64> %a) { define <4 x float> @slto4f32_mem(<4 x i64>* %a) { ; GENERIC-LABEL: slto4f32_mem: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtqq2psy (%rdi), %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: vcvtqq2psy (%rdi), %xmm0 # sched: [9:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: slto4f32_mem: @@ -2829,7 +2829,7 @@ define <8 x float> @ubto8f32(<8 x i32> %a) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ubto8f32: @@ -2869,7 +2869,7 @@ define <4 x float> @ubto4f32(<4 x i32> %a) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ubto4f32: @@ -2957,7 +2957,7 @@ define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x8mem_to_8x16: @@ -2977,7 +2977,7 @@ define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x8mem_to_8x16: @@ -2998,7 +2998,7 @@ define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_16x8mem_to_16x16: @@ -3018,7 +3018,7 @@ define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_16x8mem_to_16x16: @@ -3104,7 +3104,7 @@ define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_32x8mem_to_32x16: @@ -3124,7 +3124,7 @@ define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_32x8mem_to_32x16: @@ -3210,7 +3210,7 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x8mem_to_4x32: @@ -3230,7 +3230,7 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x8mem_to_4x32: @@ -3250,7 +3250,7 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x8mem_to_8x32: @@ -3270,7 +3270,7 @@ define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x8mem_to_8x32: @@ -3290,7 +3290,7 @@ define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_16x8mem_to_16x32: @@ -3310,7 +3310,7 @@ define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_16x8mem_to_16x32: @@ -3396,7 +3396,7 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_2x8mem_to_2x64: @@ -3415,7 +3415,7 @@ define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwin ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_2x8mem_to_2x64mask: @@ -3449,7 +3449,7 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x8mem_to_4x64: @@ -3469,7 +3469,7 @@ define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwin ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x8mem_to_4x64mask: @@ -3487,7 +3487,7 @@ define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwin define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone { ; GENERIC-LABEL: sext_4x8mem_to_4x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x8mem_to_4x64: @@ -3504,7 +3504,7 @@ define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x8mem_to_8x64: @@ -3524,7 +3524,7 @@ define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwin ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x8mem_to_8x64mask: @@ -3542,7 +3542,7 @@ define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwin define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone { ; GENERIC-LABEL: sext_8x8mem_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x8mem_to_8x64: @@ -3559,7 +3559,7 @@ define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x16mem_to_4x32: @@ -3579,7 +3579,7 @@ define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x16mem_to_4x32mask: @@ -3615,7 +3615,7 @@ define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x16mem_to_8x32: @@ -3635,7 +3635,7 @@ define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x16mem_to_8x32mask: @@ -3653,7 +3653,7 @@ define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounw define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone { ; GENERIC-LABEL: sext_8x16mem_to_8x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x16mem_to_8x32: @@ -3703,7 +3703,7 @@ define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) noun ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_16x16mem_to_16x32: @@ -3723,7 +3723,7 @@ define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_16x16mem_to_16x32mask: @@ -3741,7 +3741,7 @@ define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone { ; GENERIC-LABEL: sext_16x16mem_to_16x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_16x16mem_to_16x32: @@ -3790,7 +3790,7 @@ define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_2x16mem_to_2x64: @@ -3810,7 +3810,7 @@ define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_2x16mem_to_2x64mask: @@ -3845,7 +3845,7 @@ define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x16mem_to_4x64: @@ -3865,7 +3865,7 @@ define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x16mem_to_4x64mask: @@ -3883,7 +3883,7 @@ define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounw define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone { ; GENERIC-LABEL: sext_4x16mem_to_4x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x16mem_to_4x64: @@ -3900,7 +3900,7 @@ define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x16mem_to_8x64: @@ -3920,7 +3920,7 @@ define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x16mem_to_8x64mask: @@ -3938,7 +3938,7 @@ define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounw define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone { ; GENERIC-LABEL: sext_8x16mem_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x16mem_to_8x64: @@ -3988,7 +3988,7 @@ define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_2x32mem_to_2x64: @@ -4008,7 +4008,7 @@ define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_2x32mem_to_2x64mask: @@ -4043,7 +4043,7 @@ define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x32mem_to_4x64: @@ -4063,7 +4063,7 @@ define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x32mem_to_4x64mask: @@ -4081,7 +4081,7 @@ define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounw define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone { ; GENERIC-LABEL: sext_4x32mem_to_4x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x32mem_to_4x64: @@ -4131,7 +4131,7 @@ define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00] +; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x32mem_to_8x64: @@ -4151,7 +4151,7 @@ define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x32mem_to_8x64mask: @@ -4169,7 +4169,7 @@ define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounw define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone { ; GENERIC-LABEL: sext_8x32mem_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x32mem_to_8x64: @@ -4473,7 +4473,7 @@ define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) { ; GENERIC-LABEL: extload_v8i64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4809,7 +4809,7 @@ define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <1 ; GENERIC-LABEL: test_x86_fnmsub_ps_z: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4918,7 +4918,7 @@ define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) { define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind { ; GENERIC-LABEL: test231_br: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [9:1.00] +; GENERIC-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00] ; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4936,7 +4936,7 @@ define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind { ; GENERIC-LABEL: test213_br: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test213_br: @@ -4955,7 +4955,7 @@ define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [9:1.00] +; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [10:1.00] ; GENERIC-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4979,7 +4979,7 @@ define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [9:1.00] +; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] ; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [3:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5006,7 +5006,7 @@ define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <1 ; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [7:1.00] +; GENERIC-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5028,7 +5028,7 @@ define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <1 define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpandd: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5048,7 +5048,7 @@ entry: define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpandnd: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5070,7 +5070,7 @@ entry: define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpord: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5090,7 +5090,7 @@ entry: define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpxord: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5110,7 +5110,7 @@ entry: define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpandq: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5129,7 +5129,7 @@ entry: define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpandnq: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5149,7 +5149,7 @@ entry: define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vporq: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5168,7 +5168,7 @@ entry: define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpxorq: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6743,7 +6743,7 @@ define i32 @mask8_zext(i8 %x) { define void @mask16_mem(i16* %ptr) { ; GENERIC-LABEL: mask16_mem: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [4:0.50] +; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50] ; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6765,7 +6765,7 @@ define void @mask16_mem(i16* %ptr) { define void @mask8_mem(i8* %ptr) { ; GENERIC-LABEL: mask8_mem: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [4:0.50] +; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] ; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6814,8 +6814,8 @@ define i16 @mand16(i16 %x, i16 %y) { define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) { ; GENERIC-LABEL: mand16_mem: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [4:0.50] -; GENERIC-NEXT: kmovw (%rsi), %k1 # sched: [4:0.50] +; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50] +; GENERIC-NEXT: kmovw (%rsi), %k1 # sched: [5:0.50] ; GENERIC-NEXT: kandw %k1, %k0, %k2 # sched: [1:1.00] ; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: korw %k0, %k2, %k0 # sched: [1:1.00] @@ -7589,7 +7589,7 @@ define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) { define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { ; GENERIC-LABEL: test_build_vec_v64i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_build_vec_v64i1: @@ -7742,7 +7742,7 @@ End: define <8 x i64> @load_8i1(<8 x i1>* %a) { ; GENERIC-LABEL: load_8i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [4:0.50] +; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] ; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7759,7 +7759,7 @@ define <8 x i64> @load_8i1(<8 x i1>* %a) { define <16 x i32> @load_16i1(<16 x i1>* %a) { ; GENERIC-LABEL: load_16i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [4:0.50] +; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50] ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7776,7 +7776,7 @@ define <16 x i32> @load_16i1(<16 x i1>* %a) { define <2 x i16> @load_2i1(<2 x i1>* %a) { ; GENERIC-LABEL: load_2i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [4:0.50] +; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] ; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7793,7 +7793,7 @@ define <2 x i16> @load_2i1(<2 x i1>* %a) { define <4 x i16> @load_4i1(<4 x i1>* %a) { ; GENERIC-LABEL: load_4i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [4:0.50] +; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7810,7 +7810,7 @@ define <4 x i16> @load_4i1(<4 x i1>* %a) { define <32 x i16> @load_32i1(<32 x i1>* %a) { ; GENERIC-LABEL: load_32i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd (%rdi), %k0 # sched: [4:0.50] +; GENERIC-NEXT: kmovd (%rdi), %k0 # sched: [5:0.50] ; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7827,7 +7827,7 @@ define <32 x i16> @load_32i1(<32 x i1>* %a) { define <64 x i8> @load_64i1(<64 x i1>* %a) { ; GENERIC-LABEL: load_64i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovq (%rdi), %k0 # sched: [4:0.50] +; GENERIC-NEXT: kmovq (%rdi), %k0 # sched: [5:0.50] ; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8258,7 +8258,7 @@ define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) { define <16 x float> @_ss16xfloat_load(float* %a.ptr) { ; GENERIC-LABEL: _ss16xfloat_load: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: _ss16xfloat_load: @@ -8275,7 +8275,7 @@ define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 ; GENERIC-LABEL: _ss16xfloat_mask_load: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: _ss16xfloat_mask_load: @@ -8295,7 +8295,7 @@ define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) ; GENERIC-LABEL: _ss16xfloat_maskz_load: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: _ss16xfloat_maskz_load: @@ -8369,7 +8369,7 @@ define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) { define <8 x double> @_sd8xdouble_load(double* %a.ptr) { ; GENERIC-LABEL: _sd8xdouble_load: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: _sd8xdouble_load: @@ -8386,7 +8386,7 @@ define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 ; GENERIC-LABEL: _sd8xdouble_mask_load: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: _sd8xdouble_mask_load: @@ -8406,7 +8406,7 @@ define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) ; GENERIC-LABEL: _sd8xdouble_maskz_load: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: _sd8xdouble_maskz_load: @@ -8700,7 +8700,7 @@ define <16 x float> @broadcast_ss_spill(float %x) { ; GENERIC-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] ; GENERIC-NEXT: callq func_f32 -; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [5:1.00] +; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [6:1.00] ; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8730,7 +8730,7 @@ define <8 x double> @broadcast_sd_spill(double %x) { ; GENERIC-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] ; GENERIC-NEXT: callq func_f64 -; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [5:1.00] +; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [6:1.00] ; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll b/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll index dc4fde8..2dd1bfc 100755 --- a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll @@ -202,7 +202,7 @@ define <16 x i16> @test_16xi16_perm_mem_mask0(<16 x i16>* %vp) { ; GENERIC-LABEL: test_16xi16_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] -; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi16_perm_mem_mask0: @@ -219,7 +219,7 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi16_perm_mem_mask0: @@ -240,7 +240,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask0: @@ -261,7 +261,7 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi16_perm_mem_mask1: @@ -282,7 +282,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask1: @@ -303,7 +303,7 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi16_perm_mem_mask2: @@ -324,7 +324,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask2: @@ -344,7 +344,7 @@ define <16 x i16> @test_16xi16_perm_mem_mask3(<16 x i16>* %vp) { ; GENERIC-LABEL: test_16xi16_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] -; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi16_perm_mem_mask3: @@ -361,7 +361,7 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi16_perm_mem_mask3: @@ -382,7 +382,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask3: @@ -596,7 +596,7 @@ define <32 x i16> @test_32xi16_perm_mem_mask0(<32 x i16>* %vp) { ; GENERIC-LABEL: test_32xi16_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50] -; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi16_perm_mem_mask0: @@ -613,7 +613,7 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50] ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_mem_mask0: @@ -634,7 +634,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50] ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask0: @@ -655,7 +655,7 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [6:0.50] ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_mem_mask1: @@ -676,7 +676,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [6:0.50] ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask1: @@ -697,7 +697,7 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [6:0.50] ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_mem_mask2: @@ -718,7 +718,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [6:0.50] ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask2: @@ -738,7 +738,7 @@ define <32 x i16> @test_32xi16_perm_mem_mask3(<32 x i16>* %vp) { ; GENERIC-LABEL: test_32xi16_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50] -; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi16_perm_mem_mask3: @@ -755,7 +755,7 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50] ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_mem_mask3: @@ -776,7 +776,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50] ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask3: @@ -990,7 +990,7 @@ define <8 x i32> @test_8xi32_perm_mem_mask0(<8 x i32>* %vp) { ; GENERIC-LABEL: test_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [7:0.50] -; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_perm_mem_mask0: @@ -1007,7 +1007,7 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi32_perm_mem_mask0: @@ -1028,7 +1028,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask0: @@ -1049,7 +1049,7 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi32_perm_mem_mask1: @@ -1070,7 +1070,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask1: @@ -1091,7 +1091,7 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi32_perm_mem_mask2: @@ -1112,7 +1112,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask2: @@ -1132,7 +1132,7 @@ define <8 x i32> @test_8xi32_perm_mem_mask3(<8 x i32>* %vp) { ; GENERIC-LABEL: test_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [7:0.50] -; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_perm_mem_mask3: @@ -1149,7 +1149,7 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi32_perm_mem_mask3: @@ -1170,7 +1170,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask3: @@ -1384,7 +1384,7 @@ define <16 x i32> @test_16xi32_perm_mem_mask0(<16 x i32>* %vp) { ; GENERIC-LABEL: test_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50] -; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_perm_mem_mask0: @@ -1401,7 +1401,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi32_perm_mem_mask0: @@ -1422,7 +1422,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i3 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask0: @@ -1443,7 +1443,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi32_perm_mem_mask1: @@ -1464,7 +1464,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i3 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask1: @@ -1485,7 +1485,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi32_perm_mem_mask2: @@ -1506,7 +1506,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i3 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask2: @@ -1526,7 +1526,7 @@ define <16 x i32> @test_16xi32_perm_mem_mask3(<16 x i32>* %vp) { ; GENERIC-LABEL: test_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50] -; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_perm_mem_mask3: @@ -1543,7 +1543,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi32_perm_mem_mask3: @@ -1564,7 +1564,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i3 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask3: @@ -1757,7 +1757,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %mask define <4 x i64> @test_4xi64_perm_mem_mask0(<4 x i64>* %vp) { ; GENERIC-LABEL: test_4xi64_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_perm_mem_mask0: @@ -1772,7 +1772,7 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %ve ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [5:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi64_perm_mem_mask0: @@ -1791,7 +1791,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> % ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [5:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask0: @@ -1810,7 +1810,7 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %ve ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [5:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi64_perm_mem_mask1: @@ -1829,7 +1829,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> % ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [5:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask1: @@ -1848,7 +1848,7 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %ve ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [5:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi64_perm_mem_mask2: @@ -1867,7 +1867,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> % ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [5:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask2: @@ -1885,7 +1885,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> % define <4 x i64> @test_4xi64_perm_mem_mask3(<4 x i64>* %vp) { ; GENERIC-LABEL: test_4xi64_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_perm_mem_mask3: @@ -1900,7 +1900,7 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %ve ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [5:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi64_perm_mem_mask3: @@ -1919,7 +1919,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> % ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [5:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask3: @@ -2293,7 +2293,7 @@ define <8 x i64> @test_8xi64_perm_mem_mask0(<8 x i64>* %vp) { ; GENERIC-LABEL: test_8xi64_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] sched: [6:0.50] -; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_perm_mem_mask0: @@ -2310,7 +2310,7 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi64_perm_mem_mask0: @@ -2331,7 +2331,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask0: @@ -2351,7 +2351,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [5:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask1: @@ -2370,7 +2370,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i6 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [5:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1: @@ -2390,7 +2390,7 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi64_perm_mem_mask2: @@ -2411,7 +2411,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask2: @@ -2430,7 +2430,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> % define <8 x i64> @test_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp) { ; GENERIC-LABEL: test_8xi64_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_perm_imm_mem_mask3: @@ -2445,7 +2445,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [5:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask3: @@ -2464,7 +2464,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i6 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [5:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3: @@ -2484,7 +2484,7 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi64_perm_mem_mask4: @@ -2505,7 +2505,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask4: @@ -2525,7 +2525,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [5:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask5: @@ -2544,7 +2544,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i6 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [5:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5: @@ -2563,7 +2563,7 @@ define <8 x i64> @test_8xi64_perm_mem_mask6(<8 x i64>* %vp) { ; GENERIC-LABEL: test_8xi64_perm_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] sched: [6:0.50] -; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_perm_mem_mask6: @@ -2580,7 +2580,7 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi64_perm_mem_mask6: @@ -2601,7 +2601,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask6: @@ -2621,7 +2621,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [5:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask7: @@ -2640,7 +2640,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i6 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [5:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7: @@ -2853,7 +2853,7 @@ define <8 x float> @test_8xfloat_perm_mem_mask0(<8 x float>* %vp) { ; GENERIC-LABEL: test_8xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] sched: [7:0.50] -; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_perm_mem_mask0: @@ -2870,7 +2870,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x fl ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask0: @@ -2891,7 +2891,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask0: @@ -2912,7 +2912,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x fl ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask1: @@ -2933,7 +2933,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask1: @@ -2954,7 +2954,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x fl ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask2: @@ -2975,7 +2975,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask2: @@ -2995,7 +2995,7 @@ define <8 x float> @test_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x i32> %mas ; GENERIC-LABEL: test_8xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] sched: [7:0.50] -; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_perm_mem_mask3: @@ -3012,7 +3012,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x fl ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask3: @@ -3033,7 +3033,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask3: @@ -3247,7 +3247,7 @@ define <16 x float> @test_16xfloat_perm_mem_mask0(<16 x float>* %vp) { ; GENERIC-LABEL: test_16xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50] -; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_perm_mem_mask0: @@ -3264,7 +3264,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask0: @@ -3285,7 +3285,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp, <1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask0: @@ -3306,7 +3306,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask1: @@ -3327,7 +3327,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask1(<16 x float>* %vp, <1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask1: @@ -3348,7 +3348,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask2: @@ -3369,7 +3369,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask2(<16 x float>* %vp, <1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask2: @@ -3389,7 +3389,7 @@ define <16 x float> @test_16xfloat_perm_mem_mask3(<16 x float>* %vp) { ; GENERIC-LABEL: test_16xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50] -; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_perm_mem_mask3: @@ -3406,7 +3406,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask3: @@ -3427,7 +3427,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask3(<16 x float>* %vp, <1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask3: @@ -3620,7 +3620,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec, <4 x i define <4 x double> @test_4xdouble_perm_mem_mask0(<4 x double>* %vp) { ; GENERIC-LABEL: test_4xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_perm_mem_mask0: @@ -3635,7 +3635,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask0: @@ -3654,7 +3654,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask0: @@ -3673,7 +3673,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask1: @@ -3692,7 +3692,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask1: @@ -3711,7 +3711,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask2: @@ -3730,7 +3730,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask2: @@ -3748,7 +3748,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 define <4 x double> @test_4xdouble_perm_mem_mask3(<4 x double>* %vp) { ; GENERIC-LABEL: test_4xdouble_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_perm_mem_mask3: @@ -3763,7 +3763,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask3: @@ -3782,7 +3782,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask3: @@ -4156,7 +4156,7 @@ define <8 x double> @test_8xdouble_perm_mem_mask0(<8 x double>* %vp) { ; GENERIC-LABEL: test_8xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] sched: [6:0.50] -; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_perm_mem_mask0: @@ -4173,7 +4173,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask0: @@ -4194,7 +4194,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask0: @@ -4214,7 +4214,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask1: @@ -4233,7 +4233,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1: @@ -4253,7 +4253,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask2: @@ -4274,7 +4274,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask2: @@ -4293,7 +4293,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 define <8 x double> @test_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp) { ; GENERIC-LABEL: test_8xdouble_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_perm_imm_mem_mask3: @@ -4308,7 +4308,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask3: @@ -4327,7 +4327,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3: @@ -4347,7 +4347,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask4: @@ -4368,7 +4368,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask4: @@ -4388,7 +4388,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask5: @@ -4407,7 +4407,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5: @@ -4426,7 +4426,7 @@ define <8 x double> @test_8xdouble_perm_mem_mask6(<8 x double>* %vp) { ; GENERIC-LABEL: test_8xdouble_perm_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] sched: [6:0.50] -; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_perm_mem_mask6: @@ -4443,7 +4443,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask6: @@ -4464,7 +4464,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask6: @@ -4484,7 +4484,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask7: @@ -4503,7 +4503,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [5:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7: @@ -4535,7 +4535,7 @@ define <16 x i8> @test_masked_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %vec2, ; GENERIC-LABEL: test_masked_16xi8_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4555,7 +4555,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %mask ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi8_perm_mask0: @@ -4572,7 +4572,7 @@ define <16 x i8> @test_masked_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %vec2, ; GENERIC-LABEL: test_masked_16xi8_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4592,7 +4592,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %mask ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi8_perm_mask1: @@ -4609,7 +4609,7 @@ define <16 x i8> @test_masked_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %vec2, ; GENERIC-LABEL: test_masked_16xi8_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4629,7 +4629,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %mask ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi8_perm_mask2: @@ -4659,7 +4659,7 @@ define <16 x i8> @test_masked_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %vec2, ; GENERIC-LABEL: test_masked_16xi8_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4679,7 +4679,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %mask ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi8_perm_mask3: @@ -4713,7 +4713,7 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi8_perm_mem_mask0: @@ -4734,7 +4734,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask0: @@ -4755,7 +4755,7 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi8_perm_mem_mask1: @@ -4776,7 +4776,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask1: @@ -4797,7 +4797,7 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi8_perm_mem_mask2: @@ -4818,7 +4818,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask2: @@ -4855,7 +4855,7 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi8_perm_mem_mask3: @@ -4876,7 +4876,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask3: @@ -4895,7 +4895,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> % define <32 x i8> @test_32xi8_perm_mask0(<32 x i8> %vec) { ; GENERIC-LABEL: test_32xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi8_perm_mask0: @@ -4909,7 +4909,7 @@ define <32 x i8> @test_masked_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %vec2, ; GENERIC-LABEL: test_masked_32xi8_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4929,7 +4929,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %mask ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi8_perm_mask0: @@ -4946,7 +4946,7 @@ define <32 x i8> @test_masked_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %vec2, ; GENERIC-LABEL: test_masked_32xi8_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4966,7 +4966,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %mask ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi8_perm_mask1: @@ -4983,7 +4983,7 @@ define <32 x i8> @test_masked_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %vec2, ; GENERIC-LABEL: test_masked_32xi8_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5003,7 +5003,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %mask ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi8_perm_mask2: @@ -5019,7 +5019,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %mask define <32 x i8> @test_32xi8_perm_mask3(<32 x i8> %vec) { ; GENERIC-LABEL: test_32xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi8_perm_mask3: @@ -5033,7 +5033,7 @@ define <32 x i8> @test_masked_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %vec2, ; GENERIC-LABEL: test_masked_32xi8_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5053,7 +5053,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %mask ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi8_perm_mask3: @@ -5070,7 +5070,7 @@ define <32 x i8> @test_32xi8_perm_mem_mask0(<32 x i8>* %vp) { ; GENERIC-LABEL: test_32xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi8_perm_mem_mask0: @@ -5087,7 +5087,7 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi8_perm_mem_mask0: @@ -5108,7 +5108,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] ; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask0: @@ -5129,7 +5129,7 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi8_perm_mem_mask1: @@ -5150,7 +5150,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] ; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask1: @@ -5171,7 +5171,7 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi8_perm_mem_mask2: @@ -5192,7 +5192,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] ; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask2: @@ -5212,7 +5212,7 @@ define <32 x i8> @test_32xi8_perm_mem_mask3(<32 x i8>* %vp) { ; GENERIC-LABEL: test_32xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi8_perm_mem_mask3: @@ -5229,7 +5229,7 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] ; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi8_perm_mem_mask3: @@ -5250,7 +5250,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] ; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask3: @@ -5269,7 +5269,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> % define <64 x i8> @test_64xi8_perm_mask0(<64 x i8> %vec) { ; GENERIC-LABEL: test_64xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_64xi8_perm_mask0: @@ -5283,7 +5283,7 @@ define <64 x i8> @test_masked_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %vec2, ; GENERIC-LABEL: test_masked_64xi8_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5303,7 +5303,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %mask ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mask0: @@ -5320,7 +5320,7 @@ define <64 x i8> @test_masked_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %vec2, ; GENERIC-LABEL: test_masked_64xi8_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5340,7 +5340,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %mask ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mask1: @@ -5357,7 +5357,7 @@ define <64 x i8> @test_masked_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %vec2, ; GENERIC-LABEL: test_masked_64xi8_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5377,7 +5377,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %mask ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mask2: @@ -5393,7 +5393,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %mask define <64 x i8> @test_64xi8_perm_mask3(<64 x i8> %vec) { ; GENERIC-LABEL: test_64xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_64xi8_perm_mask3: @@ -5407,7 +5407,7 @@ define <64 x i8> @test_masked_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %vec2, ; GENERIC-LABEL: test_masked_64xi8_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5427,7 +5427,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %mask ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mask3: @@ -5444,7 +5444,7 @@ define <64 x i8> @test_64xi8_perm_mem_mask0(<64 x i8>* %vp) { ; GENERIC-LABEL: test_64xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [6:0.50] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_64xi8_perm_mem_mask0: @@ -5461,7 +5461,7 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_64xi8_perm_mem_mask0: @@ -5482,7 +5482,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask0: @@ -5503,7 +5503,7 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_64xi8_perm_mem_mask1: @@ -5524,7 +5524,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask1: @@ -5545,7 +5545,7 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_64xi8_perm_mem_mask2: @@ -5566,7 +5566,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask2: @@ -5586,7 +5586,7 @@ define <64 x i8> @test_64xi8_perm_mem_mask3(<64 x i8>* %vp) { ; GENERIC-LABEL: test_64xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [6:0.50] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_64xi8_perm_mem_mask3: @@ -5603,7 +5603,7 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_64xi8_perm_mem_mask3: @@ -5624,7 +5624,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50] ; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [5:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask3: @@ -5993,7 +5993,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask0: @@ -6012,7 +6012,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask0: @@ -6031,7 +6031,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask1: @@ -6050,7 +6050,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i1 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask1: @@ -6069,7 +6069,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask2: @@ -6088,7 +6088,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask2: @@ -6121,7 +6121,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask3: @@ -6140,7 +6140,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i1 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask3: @@ -6159,7 +6159,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask4: @@ -6178,7 +6178,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask4: @@ -6197,7 +6197,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask5: @@ -6216,7 +6216,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i1 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask5: @@ -6249,7 +6249,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask6: @@ -6268,7 +6268,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask6: @@ -6287,7 +6287,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask7: @@ -6306,7 +6306,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i1 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask7: @@ -6659,7 +6659,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i1 define <16 x i16> @test_16xi16_perm_high_mem_mask0(<16 x i16>* %vp) { ; GENERIC-LABEL: test_16xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi16_perm_high_mem_mask0: @@ -6674,7 +6674,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask0: @@ -6693,7 +6693,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask0: @@ -6712,7 +6712,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask1: @@ -6731,7 +6731,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask1: @@ -6750,7 +6750,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask2: @@ -6769,7 +6769,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask2: @@ -6787,7 +6787,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 define <16 x i16> @test_16xi16_perm_low_mem_mask3(<16 x i16>* %vp) { ; GENERIC-LABEL: test_16xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi16_perm_low_mem_mask3: @@ -6802,7 +6802,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask3: @@ -6821,7 +6821,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask3: @@ -6840,7 +6840,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask4: @@ -6859,7 +6859,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask4: @@ -6878,7 +6878,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask5: @@ -6897,7 +6897,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask5: @@ -6915,7 +6915,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 define <16 x i16> @test_16xi16_perm_high_mem_mask6(<16 x i16>* %vp) { ; GENERIC-LABEL: test_16xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi16_perm_high_mem_mask6: @@ -6930,7 +6930,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask6: @@ -6949,7 +6949,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask6: @@ -6968,7 +6968,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask7: @@ -6987,7 +6987,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask7: @@ -7340,7 +7340,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i1 define <32 x i16> @test_32xi16_perm_high_mem_mask0(<32 x i16>* %vp) { ; GENERIC-LABEL: test_32xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi16_perm_high_mem_mask0: @@ -7355,7 +7355,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask0: @@ -7374,7 +7374,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask0: @@ -7393,7 +7393,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask1: @@ -7412,7 +7412,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask1: @@ -7431,7 +7431,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask2: @@ -7450,7 +7450,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask2: @@ -7468,7 +7468,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 define <32 x i16> @test_32xi16_perm_low_mem_mask3(<32 x i16>* %vp) { ; GENERIC-LABEL: test_32xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi16_perm_low_mem_mask3: @@ -7483,7 +7483,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask3: @@ -7502,7 +7502,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask3: @@ -7521,7 +7521,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask4: @@ -7540,7 +7540,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask4: @@ -7558,7 +7558,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [6:1.00] ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1} # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7579,7 +7579,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [6:1.00] ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7600,7 +7600,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 define <32 x i16> @test_32xi16_perm_high_mem_mask6(<32 x i16>* %vp) { ; GENERIC-LABEL: test_32xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi16_perm_high_mem_mask6: @@ -7615,7 +7615,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask6: @@ -7634,7 +7634,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [5:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask6: @@ -7653,7 +7653,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask7: @@ -7672,7 +7672,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [5:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask7: @@ -7879,7 +7879,7 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %ve ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi32_perm_mem_mask0: @@ -7898,7 +7898,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> % ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask0: @@ -7917,7 +7917,7 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %ve ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi32_perm_mem_mask1: @@ -7936,7 +7936,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> % ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask1: @@ -7955,7 +7955,7 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %ve ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi32_perm_mem_mask2: @@ -7974,7 +7974,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> % ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask2: @@ -8007,7 +8007,7 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %ve ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi32_perm_mem_mask3: @@ -8026,7 +8026,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> % ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask3: @@ -8233,7 +8233,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %v ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_8xi32_perm_mem_mask0: @@ -8252,7 +8252,7 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask0: @@ -8271,7 +8271,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %v ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_8xi32_perm_mem_mask1: @@ -8290,7 +8290,7 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask1: @@ -8309,7 +8309,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %v ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_8xi32_perm_mem_mask2: @@ -8328,7 +8328,7 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask2: @@ -8361,7 +8361,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %v ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_8xi32_perm_mem_mask3: @@ -8380,7 +8380,7 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask3: @@ -8572,7 +8572,7 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> define <16 x i32> @test2_16xi32_perm_mem_mask0(<16 x i32>* %vp) { ; GENERIC-LABEL: test2_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [5:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_16xi32_perm_mem_mask0: @@ -8587,7 +8587,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask0: @@ -8606,7 +8606,7 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask0: @@ -8625,7 +8625,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask1: @@ -8644,7 +8644,7 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask1: @@ -8663,7 +8663,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask2: @@ -8682,7 +8682,7 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask2: @@ -8700,7 +8700,7 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i define <16 x i32> @test2_16xi32_perm_mem_mask3(<16 x i32>* %vp) { ; GENERIC-LABEL: test2_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [5:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_16xi32_perm_mem_mask3: @@ -8715,7 +8715,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask3: @@ -8734,7 +8734,7 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [5:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask3: @@ -8941,7 +8941,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8962,7 +8962,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: @@ -8981,7 +8981,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9002,7 +9002,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: @@ -9021,7 +9021,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9042,7 +9042,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: @@ -9075,7 +9075,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9096,7 +9096,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: @@ -9288,7 +9288,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, < define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_shuff_mem_mask0: @@ -9303,7 +9303,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <1 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9324,7 +9324,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: @@ -9343,7 +9343,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <1 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9364,7 +9364,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: @@ -9383,7 +9383,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <1 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9404,7 +9404,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: @@ -9422,7 +9422,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_shuff_mem_mask3: @@ -9437,7 +9437,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <1 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9458,7 +9458,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: @@ -9665,7 +9665,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9686,7 +9686,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: @@ -9705,7 +9705,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9726,7 +9726,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: @@ -9745,7 +9745,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9766,7 +9766,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: @@ -9799,7 +9799,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9820,7 +9820,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: @@ -10012,7 +10012,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, < define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_shuff_mem_mask0: @@ -10027,7 +10027,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10048,7 +10048,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: @@ -10067,7 +10067,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10088,7 +10088,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: @@ -10107,7 +10107,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10128,7 +10128,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: @@ -10146,7 +10146,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_shuff_mem_mask3: @@ -10161,7 +10161,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10182,7 +10182,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [5:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: @@ -10374,7 +10374,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> define <8 x i32> @test_8xi32_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p) { ; GENERIC-LABEL: test_8xi32_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [5:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_shuff_mem_mask0: @@ -10389,7 +10389,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10410,7 +10410,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask0: @@ -10429,7 +10429,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10450,7 +10450,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask1: @@ -10469,7 +10469,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10490,7 +10490,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask2: @@ -10508,7 +10508,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i define <8 x i32> @test_8xi32_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p) { ; GENERIC-LABEL: test_8xi32_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [5:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_shuff_mem_mask3: @@ -10523,7 +10523,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10544,7 +10544,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask3: @@ -10736,7 +10736,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask3(<16 x i32> %vec1, <16 x i define <16 x i32> @test_16xi32_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p) { ; GENERIC-LABEL: test_16xi32_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_shuff_mem_mask0: @@ -10751,7 +10751,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i3 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10772,7 +10772,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask0: @@ -10791,7 +10791,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i3 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10812,7 +10812,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask1: @@ -10831,7 +10831,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i3 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10852,7 +10852,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask2: @@ -10870,7 +10870,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 define <16 x i32> @test_16xi32_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p) { ; GENERIC-LABEL: test_16xi32_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_shuff_mem_mask3: @@ -10885,7 +10885,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i3 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10906,7 +10906,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [5:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask3: @@ -11098,7 +11098,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> define <4 x i64> @test_4xi64_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p) { ; GENERIC-LABEL: test_4xi64_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [5:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_shuff_mem_mask0: @@ -11113,7 +11113,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11134,7 +11134,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask0: @@ -11153,7 +11153,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11174,7 +11174,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask1: @@ -11193,7 +11193,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11214,7 +11214,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask2: @@ -11232,7 +11232,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i define <4 x i64> @test_4xi64_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p) { ; GENERIC-LABEL: test_4xi64_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [5:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_shuff_mem_mask3: @@ -11247,7 +11247,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11268,7 +11268,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask3: @@ -11460,7 +11460,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> define <8 x i64> @test_8xi64_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p) { ; GENERIC-LABEL: test_8xi64_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_shuff_mem_mask0: @@ -11475,7 +11475,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11496,7 +11496,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask0: @@ -11515,7 +11515,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11536,7 +11536,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask1: @@ -11555,7 +11555,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11576,7 +11576,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask2: @@ -11594,7 +11594,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i define <8 x i64> @test_8xi64_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p) { ; GENERIC-LABEL: test_8xi64_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_shuff_mem_mask3: @@ -11609,7 +11609,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11630,7 +11630,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [5:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask3: @@ -11837,7 +11837,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11858,7 +11858,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %v ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0: @@ -11877,7 +11877,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11898,7 +11898,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %v ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1: @@ -11917,7 +11917,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11938,7 +11938,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask2(<4 x float> %v ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2: @@ -11971,7 +11971,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11992,7 +11992,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask3(<4 x float> %v ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3: @@ -12199,7 +12199,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1, ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12220,7 +12220,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %v ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0: @@ -12239,7 +12239,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1, ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12260,7 +12260,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %v ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1: @@ -12279,7 +12279,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1, ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12300,7 +12300,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask2(<8 x float> %v ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2: @@ -12333,7 +12333,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1, ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12354,7 +12354,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask3(<8 x float> %v ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3: @@ -12546,7 +12546,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask3(<16 x float> %ve define <16 x float> @test_16xfloat_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_unpack_low_mem_mask0: @@ -12561,7 +12561,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12582,7 +12582,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0: @@ -12601,7 +12601,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12622,7 +12622,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1: @@ -12641,7 +12641,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12662,7 +12662,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float> ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2: @@ -12680,7 +12680,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float> define <16 x float> @test_16xfloat_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_unpack_low_mem_mask3: @@ -12695,7 +12695,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12716,7 +12716,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask3(<16 x float> ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3: @@ -12836,7 +12836,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12857,7 +12857,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0: @@ -12876,7 +12876,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12897,7 +12897,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask1(<2 x double> ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1: @@ -13104,7 +13104,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13125,7 +13125,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double> ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0: @@ -13144,7 +13144,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13165,7 +13165,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double> ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1: @@ -13184,7 +13184,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13205,7 +13205,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask2(<4 x double> ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2: @@ -13238,7 +13238,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13259,7 +13259,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask3(<4 x double> ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3: @@ -13451,7 +13451,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask3(<8 x double> %ve define <8 x double> @test_8xdouble_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_unpack_low_mem_mask0: @@ -13466,7 +13466,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13487,7 +13487,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0: @@ -13506,7 +13506,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13527,7 +13527,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1: @@ -13546,7 +13546,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13567,7 +13567,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double> ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2: @@ -13585,7 +13585,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double> define <8 x double> @test_8xdouble_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_unpack_low_mem_mask3: @@ -13600,7 +13600,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13621,7 +13621,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask3(<8 x double> ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3: @@ -13828,7 +13828,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13849,7 +13849,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> % ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0: @@ -13868,7 +13868,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13889,7 +13889,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> % ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1: @@ -13908,7 +13908,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13929,7 +13929,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask2(<4 x float> % ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2: @@ -13962,7 +13962,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13983,7 +13983,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask3(<4 x float> % ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3: @@ -14190,7 +14190,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1, ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14211,7 +14211,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> % ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0: @@ -14230,7 +14230,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1, ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14251,7 +14251,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> % ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1: @@ -14270,7 +14270,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1, ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14291,7 +14291,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask2(<8 x float> % ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2: @@ -14324,7 +14324,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1, ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14345,7 +14345,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask3(<8 x float> % ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3: @@ -14537,7 +14537,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask3(<16 x float> %v define <16 x float> @test_16xfloat_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_unpack_high_mem_mask0: @@ -14552,7 +14552,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %ve ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14573,7 +14573,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0: @@ -14592,7 +14592,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %ve ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14613,7 +14613,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1: @@ -14632,7 +14632,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %ve ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14653,7 +14653,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2: @@ -14671,7 +14671,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float define <16 x float> @test_16xfloat_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_unpack_high_mem_mask3: @@ -14686,7 +14686,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %ve ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14707,7 +14707,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask3(<16 x float ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3: @@ -14827,7 +14827,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %ve ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14848,7 +14848,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0: @@ -14867,7 +14867,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %ve ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14888,7 +14888,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask1(<2 x double ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1: @@ -15095,7 +15095,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %ve ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15116,7 +15116,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0: @@ -15135,7 +15135,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %ve ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15156,7 +15156,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1: @@ -15175,7 +15175,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %ve ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15196,7 +15196,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask2(<4 x double ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2: @@ -15229,7 +15229,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %ve ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15250,7 +15250,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask3(<4 x double ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3: @@ -15442,7 +15442,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask3(<8 x double> %v define <8 x double> @test_8xdouble_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_unpack_high_mem_mask0: @@ -15457,7 +15457,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %ve ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15478,7 +15478,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0: @@ -15497,7 +15497,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %ve ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15518,7 +15518,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1: @@ -15537,7 +15537,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %ve ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15558,7 +15558,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2: @@ -15576,7 +15576,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double define <8 x double> @test_8xdouble_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_unpack_high_mem_mask3: @@ -15591,7 +15591,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %ve ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15612,7 +15612,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask3(<8 x double ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3: diff --git a/llvm/test/CodeGen/X86/avx512vpopcntdq-schedule.ll b/llvm/test/CodeGen/X86/avx512vpopcntdq-schedule.ll index d44038a..e1cde71 100644 --- a/llvm/test/CodeGen/X86/avx512vpopcntdq-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512vpopcntdq-schedule.ll @@ -10,12 +10,12 @@ define void @test_vpopcntd(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> *%a2, i16 ; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 # sched: [3:1.00] ; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [3:1.00] ; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] -; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [7:1.00] -; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [7:1.00] -; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [7:1.00] -; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [7:1.00] +; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:1.00] +; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:1.00] +; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] +; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:1.00] +; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:1.00] +; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -48,12 +48,12 @@ define void @test_vpopcntq(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> *%a2, i8 %a3) ; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 # sched: [3:1.00] ; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [3:1.00] ; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] -; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [7:1.00] -; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [7:1.00] -; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [7:1.00] -; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [7:1.00] +; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:1.00] +; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:1.00] +; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] +; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:1.00] +; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:1.00] +; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] diff --git a/llvm/test/CodeGen/X86/bmi-schedule.ll b/llvm/test/CodeGen/X86/bmi-schedule.ll index d5ab1cf..fd5c51b 100644 --- a/llvm/test/CodeGen/X86/bmi-schedule.ll +++ b/llvm/test/CodeGen/X86/bmi-schedule.ll @@ -73,7 +73,7 @@ define i32 @test_andn_i32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: test_andn_i32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: andnl (%rdx), %edi, %eax # sched: [5:0.50] +; GENERIC-NEXT: andnl (%rdx), %edi, %eax # sched: [6:0.50] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -123,7 +123,7 @@ define i64 @test_andn_i64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: test_andn_i64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: andnq (%rdx), %rdi, %rax # sched: [5:0.50] +; GENERIC-NEXT: andnq (%rdx), %rdi, %rax # sched: [6:0.50] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -172,7 +172,7 @@ define i64 @test_andn_i64(i64 %a0, i64 %a1, i64 *%a2) { define i32 @test_bextr_i32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: test_bextr_i32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: bextrl %edi, (%rdx), %ecx # sched: [6:1.00] +; GENERIC-NEXT: bextrl %edi, (%rdx), %ecx # sched: [7:1.00] ; GENERIC-NEXT: bextrl %edi, %esi, %eax # sched: [2:1.00] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -222,7 +222,7 @@ declare i32 @llvm.x86.bmi.bextr.32(i32, i32) define i64 @test_bextr_i64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: test_bextr_i64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [6:1.00] +; GENERIC-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [7:1.00] ; GENERIC-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:1.00] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -272,7 +272,7 @@ declare i64 @llvm.x86.bmi.bextr.64(i64, i64) define i32 @test_blsi_i32(i32 %a0, i32 *%a1) { ; GENERIC-LABEL: test_blsi_i32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: blsil (%rsi), %ecx # sched: [5:0.50] +; GENERIC-NEXT: blsil (%rsi), %ecx # sched: [6:0.50] ; GENERIC-NEXT: blsil %edi, %eax # sched: [1:0.33] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -323,7 +323,7 @@ define i32 @test_blsi_i32(i32 %a0, i32 *%a1) { define i64 @test_blsi_i64(i64 %a0, i64 *%a1) { ; GENERIC-LABEL: test_blsi_i64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: blsiq (%rsi), %rcx # sched: [5:0.50] +; GENERIC-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50] ; GENERIC-NEXT: blsiq %rdi, %rax # sched: [1:0.33] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -374,7 +374,7 @@ define i64 @test_blsi_i64(i64 %a0, i64 *%a1) { define i32 @test_blsmsk_i32(i32 %a0, i32 *%a1) { ; GENERIC-LABEL: test_blsmsk_i32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: blsmskl (%rsi), %ecx # sched: [5:0.50] +; GENERIC-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50] ; GENERIC-NEXT: blsmskl %edi, %eax # sched: [1:0.33] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -425,7 +425,7 @@ define i32 @test_blsmsk_i32(i32 %a0, i32 *%a1) { define i64 @test_blsmsk_i64(i64 %a0, i64 *%a1) { ; GENERIC-LABEL: test_blsmsk_i64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: blsmskq (%rsi), %rcx # sched: [5:0.50] +; GENERIC-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50] ; GENERIC-NEXT: blsmskq %rdi, %rax # sched: [1:0.33] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -476,7 +476,7 @@ define i64 @test_blsmsk_i64(i64 %a0, i64 *%a1) { define i32 @test_blsr_i32(i32 %a0, i32 *%a1) { ; GENERIC-LABEL: test_blsr_i32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: blsrl (%rsi), %ecx # sched: [5:0.50] +; GENERIC-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50] ; GENERIC-NEXT: blsrl %edi, %eax # sched: [1:0.33] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -527,7 +527,7 @@ define i32 @test_blsr_i32(i32 %a0, i32 *%a1) { define i64 @test_blsr_i64(i64 %a0, i64 *%a1) { ; GENERIC-LABEL: test_blsr_i64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: blsrq (%rsi), %rcx # sched: [5:0.50] +; GENERIC-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50] ; GENERIC-NEXT: blsrq %rdi, %rax # sched: [1:0.33] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] diff --git a/llvm/test/CodeGen/X86/bmi2-schedule.ll b/llvm/test/CodeGen/X86/bmi2-schedule.ll index 0a79b66..01d8291 100644 --- a/llvm/test/CodeGen/X86/bmi2-schedule.ll +++ b/llvm/test/CodeGen/X86/bmi2-schedule.ll @@ -9,7 +9,7 @@ define i32 @test_bzhi_i32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: test_bzhi_i32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: bzhil %edi, (%rdx), %ecx # sched: [5:1.00] +; GENERIC-NEXT: bzhil %edi, (%rdx), %ecx # sched: [6:1.00] ; GENERIC-NEXT: bzhil %edi, %esi, %eax # sched: [1:1.00] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -59,7 +59,7 @@ declare i32 @llvm.x86.bmi.bzhi.32(i32, i32) define i64 @test_bzhi_i64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: test_bzhi_i64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [5:1.00] +; GENERIC-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [6:1.00] ; GENERIC-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:1.00] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -111,7 +111,7 @@ define void @test_mulx_i32(i32 %a0, i32 %a1, i32* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: mulxl %esi, %esi, %edi # sched: [3:1.00] -; GENERIC-NEXT: mulxl (%rdx), %esi, %edi # sched: [7:1.00] +; GENERIC-NEXT: mulxl (%rdx), %esi, %edi # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -164,7 +164,7 @@ define i64 @test_mulx_i64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC-NEXT: movq %rdx, %rax # sched: [1:0.33] ; GENERIC-NEXT: movq %rdi, %rdx # sched: [1:0.33] ; GENERIC-NEXT: mulxq %rsi, %rsi, %rcx # sched: [3:1.00] -; GENERIC-NEXT: mulxq (%rax), %rdx, %rax # sched: [7:1.00] +; GENERIC-NEXT: mulxq (%rax), %rdx, %rax # sched: [8:1.00] ; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -229,7 +229,7 @@ define i64 @test_mulx_i64(i64 %a0, i64 %a1, i64 *%a2) { define i32 @test_pdep_i32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: test_pdep_i32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: pdepl (%rdx), %edi, %ecx # sched: [5:0.50] +; GENERIC-NEXT: pdepl (%rdx), %edi, %ecx # sched: [6:0.50] ; GENERIC-NEXT: pdepl %esi, %edi, %eax # sched: [1:0.33] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -279,7 +279,7 @@ declare i32 @llvm.x86.bmi.pdep.32(i32, i32) define i64 @test_pdep_i64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: test_pdep_i64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [5:0.50] +; GENERIC-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [6:0.50] ; GENERIC-NEXT: pdepq %rsi, %rdi, %rax # sched: [1:0.33] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -329,7 +329,7 @@ declare i64 @llvm.x86.bmi.pdep.64(i64, i64) define i32 @test_pext_i32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: test_pext_i32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: pextl (%rdx), %edi, %ecx # sched: [5:0.50] +; GENERIC-NEXT: pextl (%rdx), %edi, %ecx # sched: [6:0.50] ; GENERIC-NEXT: pextl %esi, %edi, %eax # sched: [1:0.33] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -379,7 +379,7 @@ declare i32 @llvm.x86.bmi.pext.32(i32, i32) define i64 @test_pext_i64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: test_pext_i64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: pextq (%rdx), %rdi, %rcx # sched: [5:0.50] +; GENERIC-NEXT: pextq (%rdx), %rdi, %rcx # sched: [6:0.50] ; GENERIC-NEXT: pextq %rsi, %rdi, %rax # sched: [1:0.33] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -430,7 +430,7 @@ define i32 @test_rorx_i32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: test_rorx_i32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50] -; GENERIC-NEXT: rorxl $5, (%rdx), %eax # sched: [5:0.50] +; GENERIC-NEXT: rorxl $5, (%rdx), %eax # sched: [6:0.50] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -483,7 +483,7 @@ define i64 @test_rorx_i64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: test_rorx_i64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50] -; GENERIC-NEXT: rorxq $5, (%rdx), %rax # sched: [5:0.50] +; GENERIC-NEXT: rorxq $5, (%rdx), %rax # sched: [6:0.50] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -536,7 +536,7 @@ define i32 @test_sarx_i32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: test_sarx_i32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50] -; GENERIC-NEXT: sarxl %esi, (%rdx), %eax # sched: [5:0.50] +; GENERIC-NEXT: sarxl %esi, (%rdx), %eax # sched: [6:0.50] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -585,7 +585,7 @@ define i64 @test_sarx_i64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: test_sarx_i64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50] -; GENERIC-NEXT: sarxq %rsi, (%rdx), %rax # sched: [5:0.50] +; GENERIC-NEXT: sarxq %rsi, (%rdx), %rax # sched: [6:0.50] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -634,7 +634,7 @@ define i32 @test_shlx_i32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: test_shlx_i32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50] -; GENERIC-NEXT: shlxl %esi, (%rdx), %eax # sched: [5:0.50] +; GENERIC-NEXT: shlxl %esi, (%rdx), %eax # sched: [6:0.50] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -683,7 +683,7 @@ define i64 @test_shlx_i64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: test_shlx_i64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50] -; GENERIC-NEXT: shlxq %rsi, (%rdx), %rax # sched: [5:0.50] +; GENERIC-NEXT: shlxq %rsi, (%rdx), %rax # sched: [6:0.50] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -732,7 +732,7 @@ define i32 @test_shrx_i32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: test_shrx_i32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50] -; GENERIC-NEXT: shrxl %esi, (%rdx), %eax # sched: [5:0.50] +; GENERIC-NEXT: shrxl %esi, (%rdx), %eax # sched: [6:0.50] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -781,7 +781,7 @@ define i64 @test_shrx_i64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: test_shrx_i64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50] -; GENERIC-NEXT: shrxq %rsi, (%rdx), %rax # sched: [5:0.50] +; GENERIC-NEXT: shrxq %rsi, (%rdx), %rax # sched: [6:0.50] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/clwb-schedule.ll b/llvm/test/CodeGen/X86/clwb-schedule.ll index 24931ad..4d8e3e0 100644 --- a/llvm/test/CodeGen/X86/clwb-schedule.ll +++ b/llvm/test/CodeGen/X86/clwb-schedule.ll @@ -5,7 +5,7 @@ define void @clwb(i8* %a0) nounwind { ; GENERIC-LABEL: clwb: ; GENERIC: # %bb.0: -; GENERIC-NEXT: clwb (%rdi) # sched: [4:0.50] +; GENERIC-NEXT: clwb (%rdi) # sched: [5:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: clwb: diff --git a/llvm/test/CodeGen/X86/f16c-schedule.ll b/llvm/test/CodeGen/X86/f16c-schedule.ll index 0f03ed6..5b67836 100644 --- a/llvm/test/CodeGen/X86/f16c-schedule.ll +++ b/llvm/test/CodeGen/X86/f16c-schedule.ll @@ -10,14 +10,14 @@ define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) { ; GENERIC-LABEL: test_vcvtph2ps_128: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00] +; GENERIC-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00] ; GENERIC-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; IVY-LABEL: test_vcvtph2ps_128: ; IVY: # %bb.0: -; IVY-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00] +; IVY-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00] ; IVY-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00] ; IVY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; IVY-NEXT: retq # sched: [1:1.00] @@ -67,14 +67,14 @@ declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) { ; GENERIC-LABEL: test_vcvtph2ps_256: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00] +; GENERIC-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00] ; GENERIC-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; IVY-LABEL: test_vcvtph2ps_256: ; IVY: # %bb.0: -; IVY-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00] +; IVY-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00] ; IVY-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00] ; IVY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; IVY-NEXT: retq # sched: [1:1.00] @@ -125,13 +125,13 @@ define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16> ; GENERIC-LABEL: test_vcvtps2ph_128: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [7:1.00] +; GENERIC-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; IVY-LABEL: test_vcvtps2ph_128: ; IVY: # %bb.0: ; IVY-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00] -; IVY-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [7:1.00] +; IVY-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [8:1.00] ; IVY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_vcvtps2ph_128: @@ -175,15 +175,15 @@ define <8 x i16> @test_vcvtps2ph_256(<8 x float> %a0, <8 x float> %a1, <8 x i16> ; GENERIC-LABEL: test_vcvtps2ph_256: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00] -; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00] +; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; IVY-LABEL: test_vcvtps2ph_256: ; IVY: # %bb.0: ; IVY-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00] -; IVY-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00] -; IVY-NEXT: vzeroupper +; IVY-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00] +; IVY-NEXT: vzeroupper # sched: [100:0.33] ; IVY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_vcvtps2ph_256: diff --git a/llvm/test/CodeGen/X86/fma-schedule.ll b/llvm/test/CodeGen/X86/fma-schedule.ll index 050f3d6..9976de3 100644 --- a/llvm/test/CodeGen/X86/fma-schedule.ll +++ b/llvm/test/CodeGen/X86/fma-schedule.ll @@ -18,9 +18,9 @@ define void @test_vfmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; GENERIC-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50] -; GENERIC-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] +; GENERIC-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -106,9 +106,9 @@ define void @test_vfmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> ; GENERIC-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] ; GENERIC-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] ; GENERIC-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [9:0.50] -; GENERIC-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [9:0.50] -; GENERIC-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [9:0.50] +; GENERIC-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50] +; GENERIC-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50] +; GENERIC-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -200,9 +200,9 @@ define void @test_vfmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2 ; GENERIC-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50] -; GENERIC-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] +; GENERIC-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -288,9 +288,9 @@ define void @test_vfmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2 ; GENERIC-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] ; GENERIC-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] ; GENERIC-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [9:0.50] -; GENERIC-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [9:0.50] -; GENERIC-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [9:0.50] +; GENERIC-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50] +; GENERIC-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50] +; GENERIC-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -382,9 +382,9 @@ define void @test_vfmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; GENERIC-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50] -; GENERIC-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] +; GENERIC-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -470,9 +470,9 @@ define void @test_vfmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2 ; GENERIC-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50] -; GENERIC-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] +; GENERIC-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -562,9 +562,9 @@ define void @test_vfmaddsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [9:0.50] -; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] +; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -650,9 +650,9 @@ define void @test_vfmaddsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x doubl ; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] ; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] ; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [9:0.50] -; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [9:0.50] -; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [9:0.50] +; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50] +; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50] +; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -744,9 +744,9 @@ define void @test_vfmaddsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> ; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [9:0.50] -; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] +; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -832,9 +832,9 @@ define void @test_vfmaddsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> ; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] ; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] ; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [9:0.50] -; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [9:0.50] -; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [9:0.50] +; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50] +; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50] +; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -930,9 +930,9 @@ define void @test_vfmsubaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [9:0.50] -; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] +; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1018,9 +1018,9 @@ define void @test_vfmsubaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x doubl ; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] ; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] ; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [9:0.50] -; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [9:0.50] -; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [9:0.50] +; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50] +; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50] +; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1112,9 +1112,9 @@ define void @test_vfmsubaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> ; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [9:0.50] -; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] +; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1200,9 +1200,9 @@ define void @test_vfmsubaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> ; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] ; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] ; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [9:0.50] -; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [9:0.50] -; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [9:0.50] +; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50] +; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50] +; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1298,9 +1298,9 @@ define void @test_vfmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; GENERIC-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50] -; GENERIC-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] +; GENERIC-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1386,9 +1386,9 @@ define void @test_vfmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> ; GENERIC-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] ; GENERIC-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] ; GENERIC-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [9:0.50] -; GENERIC-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [9:0.50] -; GENERIC-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [9:0.50] +; GENERIC-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50] +; GENERIC-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50] +; GENERIC-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1480,9 +1480,9 @@ define void @test_vfmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2 ; GENERIC-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50] -; GENERIC-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] +; GENERIC-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1568,9 +1568,9 @@ define void @test_vfmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2 ; GENERIC-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] ; GENERIC-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] ; GENERIC-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [9:0.50] -; GENERIC-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [9:0.50] -; GENERIC-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [9:0.50] +; GENERIC-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50] +; GENERIC-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50] +; GENERIC-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1662,9 +1662,9 @@ define void @test_vfmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; GENERIC-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50] -; GENERIC-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] +; GENERIC-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1750,9 +1750,9 @@ define void @test_vfmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2 ; GENERIC-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50] -; GENERIC-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] +; GENERIC-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1842,9 +1842,9 @@ define void @test_vfnmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; GENERIC-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] -; GENERIC-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] +; GENERIC-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1930,9 +1930,9 @@ define void @test_vfnmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> ; GENERIC-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] ; GENERIC-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] ; GENERIC-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [9:0.50] -; GENERIC-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [9:0.50] -; GENERIC-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [9:0.50] +; GENERIC-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50] +; GENERIC-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50] +; GENERIC-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2024,9 +2024,9 @@ define void @test_vfnmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a ; GENERIC-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] -; GENERIC-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] +; GENERIC-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2112,9 +2112,9 @@ define void @test_vfnmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a ; GENERIC-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] ; GENERIC-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] ; GENERIC-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [9:0.50] -; GENERIC-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [9:0.50] -; GENERIC-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [9:0.50] +; GENERIC-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50] +; GENERIC-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50] +; GENERIC-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2206,9 +2206,9 @@ define void @test_vfnmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; GENERIC-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] -; GENERIC-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] +; GENERIC-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2294,9 +2294,9 @@ define void @test_vfnmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a ; GENERIC-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] -; GENERIC-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] +; GENERIC-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2386,9 +2386,9 @@ define void @test_vfnmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; GENERIC-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50] -; GENERIC-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] +; GENERIC-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2474,9 +2474,9 @@ define void @test_vfnmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> ; GENERIC-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] ; GENERIC-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] ; GENERIC-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [9:0.50] -; GENERIC-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [9:0.50] -; GENERIC-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [9:0.50] +; GENERIC-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50] +; GENERIC-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50] +; GENERIC-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2568,9 +2568,9 @@ define void @test_vfnmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a ; GENERIC-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50] -; GENERIC-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] +; GENERIC-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2656,9 +2656,9 @@ define void @test_vfnmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a ; GENERIC-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] ; GENERIC-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] ; GENERIC-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [9:0.50] -; GENERIC-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [9:0.50] -; GENERIC-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [9:0.50] +; GENERIC-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50] +; GENERIC-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50] +; GENERIC-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2750,9 +2750,9 @@ define void @test_vfnmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; GENERIC-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50] -; GENERIC-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] +; GENERIC-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2838,9 +2838,9 @@ define void @test_vfnmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a ; GENERIC-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] ; GENERIC-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] ; GENERIC-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50] -; GENERIC-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50] -; GENERIC-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50] +; GENERIC-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] +; GENERIC-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] +; GENERIC-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/fma4-schedule.ll b/llvm/test/CodeGen/X86/fma4-schedule.ll index 468b708..65d5273 100644 --- a/llvm/test/CodeGen/X86/fma4-schedule.ll +++ b/llvm/test/CodeGen/X86/fma4-schedule.ll @@ -14,8 +14,8 @@ define void @test_vfmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -36,8 +36,8 @@ define void @test_vfmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -60,8 +60,8 @@ define void @test_vfmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -82,8 +82,8 @@ define void @test_vfmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -106,8 +106,8 @@ define void @test_vfmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -128,8 +128,8 @@ define void @test_vfmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -154,8 +154,8 @@ define void @test_vfmaddsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -176,8 +176,8 @@ define void @test_vfmaddsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x doubl ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -200,8 +200,8 @@ define void @test_vfmaddsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -222,8 +222,8 @@ define void @test_vfmaddsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -250,8 +250,8 @@ define void @test_vfmsubaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -272,8 +272,8 @@ define void @test_vfmsubaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x doubl ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -296,8 +296,8 @@ define void @test_vfmsubaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -318,8 +318,8 @@ define void @test_vfmsubaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -346,8 +346,8 @@ define void @test_vfmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -368,8 +368,8 @@ define void @test_vfmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -392,8 +392,8 @@ define void @test_vfmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -414,8 +414,8 @@ define void @test_vfmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -438,8 +438,8 @@ define void @test_vfmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -460,8 +460,8 @@ define void @test_vfmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -486,8 +486,8 @@ define void @test_vfnmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -508,8 +508,8 @@ define void @test_vfnmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50] -; GENERIC-NEXT: vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50] +; GENERIC-NEXT: vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] +; GENERIC-NEXT: vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -532,8 +532,8 @@ define void @test_vfnmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -554,8 +554,8 @@ define void @test_vfnmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50] -; GENERIC-NEXT: vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50] +; GENERIC-NEXT: vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] +; GENERIC-NEXT: vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -578,8 +578,8 @@ define void @test_vfnmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -600,8 +600,8 @@ define void @test_vfnmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -626,8 +626,8 @@ define void @test_vfnmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -648,8 +648,8 @@ define void @test_vfnmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50] -; GENERIC-NEXT: vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50] +; GENERIC-NEXT: vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] +; GENERIC-NEXT: vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -672,8 +672,8 @@ define void @test_vfnmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -694,8 +694,8 @@ define void @test_vfnmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [9:0.50] -; GENERIC-NEXT: vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50] +; GENERIC-NEXT: vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] +; GENERIC-NEXT: vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -718,8 +718,8 @@ define void @test_vfnmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -740,8 +740,8 @@ define void @test_vfnmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [9:0.50] -; GENERIC-NEXT: vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; GENERIC-NEXT: vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] +; GENERIC-NEXT: vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/mmx-schedule.ll b/llvm/test/CodeGen/X86/mmx-schedule.ll index 2309468..42d4024 100644 --- a/llvm/test/CodeGen/X86/mmx-schedule.ll +++ b/llvm/test/CodeGen/X86/mmx-schedule.ll @@ -637,7 +637,7 @@ define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: test_movd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: movd %edi, %mm1 # sched: [1:0.33] -; GENERIC-NEXT: movd (%rsi), %mm2 # sched: [4:0.50] +; GENERIC-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] ; GENERIC-NEXT: paddd %mm1, %mm2 # sched: [3:1.00] ; GENERIC-NEXT: paddd %mm2, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: movd %mm2, %ecx # sched: [1:0.33] @@ -670,7 +670,7 @@ define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) { ; SANDY-LABEL: test_movd: ; SANDY: # %bb.0: ; SANDY-NEXT: movd %edi, %mm1 # sched: [1:0.33] -; SANDY-NEXT: movd (%rsi), %mm2 # sched: [4:0.50] +; SANDY-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] ; SANDY-NEXT: paddd %mm1, %mm2 # sched: [3:1.00] ; SANDY-NEXT: paddd %mm2, %mm0 # sched: [3:1.00] ; SANDY-NEXT: movd %mm2, %ecx # sched: [1:0.33] @@ -893,7 +893,7 @@ declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind define void @test_movq(i64 *%a0) { ; GENERIC-LABEL: test_movq: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movq (%rdi), %mm0 # sched: [4:0.50] +; GENERIC-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] ; GENERIC-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -916,7 +916,7 @@ define void @test_movq(i64 *%a0) { ; ; SANDY-LABEL: test_movq: ; SANDY: # %bb.0: -; SANDY-NEXT: movq (%rdi), %mm0 # sched: [4:0.50] +; SANDY-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] ; SANDY-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] ; SANDY-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] @@ -1263,7 +1263,7 @@ define i64 @test_packssdw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_packssdw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: packssdw (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1284,7 +1284,7 @@ define i64 @test_packssdw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_packssdw: ; SANDY: # %bb.0: ; SANDY-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: packssdw (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -1341,7 +1341,7 @@ define i64 @test_packsswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_packsswb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: packsswb (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1362,7 +1362,7 @@ define i64 @test_packsswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_packsswb: ; SANDY: # %bb.0: ; SANDY-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: packsswb (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -1419,7 +1419,7 @@ define i64 @test_packuswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_packuswb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: packuswb (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1440,7 +1440,7 @@ define i64 @test_packuswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_packuswb: ; SANDY: # %bb.0: ; SANDY-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: packuswb (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -1497,7 +1497,7 @@ define i64 @test_paddb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_paddb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: paddb %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: paddb (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: paddb (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1518,7 +1518,7 @@ define i64 @test_paddb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_paddb: ; SANDY: # %bb.0: ; SANDY-NEXT: paddb %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: paddb (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: paddb (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -1575,7 +1575,7 @@ define i64 @test_paddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_paddd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: paddd %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: paddd (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: paddd (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1596,7 +1596,7 @@ define i64 @test_paddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_paddd: ; SANDY: # %bb.0: ; SANDY-NEXT: paddd %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: paddd (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: paddd (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -1731,7 +1731,7 @@ define i64 @test_paddsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_paddsb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: paddsb %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: paddsb (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: paddsb (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1752,7 +1752,7 @@ define i64 @test_paddsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_paddsb: ; SANDY: # %bb.0: ; SANDY-NEXT: paddsb %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: paddsb (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: paddsb (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -1809,7 +1809,7 @@ define i64 @test_paddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_paddsw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: paddsw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: paddsw (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: paddsw (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1830,7 +1830,7 @@ define i64 @test_paddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_paddsw: ; SANDY: # %bb.0: ; SANDY-NEXT: paddsw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: paddsw (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: paddsw (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -1887,7 +1887,7 @@ define i64 @test_paddusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_paddusb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: paddusb %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: paddusb (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: paddusb (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1908,7 +1908,7 @@ define i64 @test_paddusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_paddusb: ; SANDY: # %bb.0: ; SANDY-NEXT: paddusb %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: paddusb (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: paddusb (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -1965,7 +1965,7 @@ define i64 @test_paddusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_paddusw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: paddusw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: paddusw (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: paddusw (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1986,7 +1986,7 @@ define i64 @test_paddusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_paddusw: ; SANDY: # %bb.0: ; SANDY-NEXT: paddusw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: paddusw (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: paddusw (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -2043,7 +2043,7 @@ define i64 @test_paddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_paddw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: paddw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: paddw (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: paddw (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2064,7 +2064,7 @@ define i64 @test_paddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_paddw: ; SANDY: # %bb.0: ; SANDY-NEXT: paddw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: paddw (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: paddw (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -2199,7 +2199,7 @@ define i64 @test_pand(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pand: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pand %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: pand (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: pand (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2220,7 +2220,7 @@ define i64 @test_pand(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_pand: ; SANDY: # %bb.0: ; SANDY-NEXT: pand %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: pand (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: pand (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -2277,7 +2277,7 @@ define i64 @test_pandn(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pandn: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pandn %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: pandn (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: pandn (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2298,7 +2298,7 @@ define i64 @test_pandn(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_pandn: ; SANDY: # %bb.0: ; SANDY-NEXT: pandn %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: pandn (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: pandn (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -2355,7 +2355,7 @@ define i64 @test_pavgb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pavgb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pavgb %mm1, %mm0 # sched: [5:1.00] -; GENERIC-NEXT: pavgb (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: pavgb (%rdi), %mm0 # sched: [10:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2376,7 +2376,7 @@ define i64 @test_pavgb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_pavgb: ; SANDY: # %bb.0: ; SANDY-NEXT: pavgb %mm1, %mm0 # sched: [5:1.00] -; SANDY-NEXT: pavgb (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: pavgb (%rdi), %mm0 # sched: [10:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -2433,7 +2433,7 @@ define i64 @test_pavgw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pavgw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pavgw %mm1, %mm0 # sched: [5:1.00] -; GENERIC-NEXT: pavgw (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: pavgw (%rdi), %mm0 # sched: [10:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2454,7 +2454,7 @@ define i64 @test_pavgw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_pavgw: ; SANDY: # %bb.0: ; SANDY-NEXT: pavgw %mm1, %mm0 # sched: [5:1.00] -; SANDY-NEXT: pavgw (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: pavgw (%rdi), %mm0 # sched: [10:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -2511,7 +2511,7 @@ define i64 @test_pcmpeqb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pcmpeqb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pcmpeqb %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pcmpeqb (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2532,7 +2532,7 @@ define i64 @test_pcmpeqb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_pcmpeqb: ; SANDY: # %bb.0: ; SANDY-NEXT: pcmpeqb %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pcmpeqb (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -2589,7 +2589,7 @@ define i64 @test_pcmpeqd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pcmpeqd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pcmpeqd %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pcmpeqd (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2610,7 +2610,7 @@ define i64 @test_pcmpeqd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_pcmpeqd: ; SANDY: # %bb.0: ; SANDY-NEXT: pcmpeqd %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pcmpeqd (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -2667,7 +2667,7 @@ define i64 @test_pcmpeqw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pcmpeqw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pcmpeqw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pcmpeqw (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2688,7 +2688,7 @@ define i64 @test_pcmpeqw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_pcmpeqw: ; SANDY: # %bb.0: ; SANDY-NEXT: pcmpeqw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pcmpeqw (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -2745,7 +2745,7 @@ define i64 @test_pcmpgtb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pcmpgtb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pcmpgtb %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pcmpgtb (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2766,7 +2766,7 @@ define i64 @test_pcmpgtb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_pcmpgtb: ; SANDY: # %bb.0: ; SANDY-NEXT: pcmpgtb %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pcmpgtb (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -2823,7 +2823,7 @@ define i64 @test_pcmpgtd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pcmpgtd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pcmpgtd %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pcmpgtd (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2844,7 +2844,7 @@ define i64 @test_pcmpgtd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_pcmpgtd: ; SANDY: # %bb.0: ; SANDY-NEXT: pcmpgtd %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pcmpgtd (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -2901,7 +2901,7 @@ define i64 @test_pcmpgtw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pcmpgtw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pcmpgtw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pcmpgtw (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2922,7 +2922,7 @@ define i64 @test_pcmpgtw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_pcmpgtw: ; SANDY: # %bb.0: ; SANDY-NEXT: pcmpgtw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pcmpgtw (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -3747,7 +3747,7 @@ define i64 @test_pmaxsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pmaxsw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pmaxsw %mm1, %mm0 # sched: [5:1.00] -; GENERIC-NEXT: pmaxsw (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: pmaxsw (%rdi), %mm0 # sched: [10:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3768,7 +3768,7 @@ define i64 @test_pmaxsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_pmaxsw: ; SANDY: # %bb.0: ; SANDY-NEXT: pmaxsw %mm1, %mm0 # sched: [5:1.00] -; SANDY-NEXT: pmaxsw (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: pmaxsw (%rdi), %mm0 # sched: [10:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -3825,7 +3825,7 @@ define i64 @test_pmaxub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pmaxub: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pmaxub %mm1, %mm0 # sched: [5:1.00] -; GENERIC-NEXT: pmaxub (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: pmaxub (%rdi), %mm0 # sched: [10:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3846,7 +3846,7 @@ define i64 @test_pmaxub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_pmaxub: ; SANDY: # %bb.0: ; SANDY-NEXT: pmaxub %mm1, %mm0 # sched: [5:1.00] -; SANDY-NEXT: pmaxub (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: pmaxub (%rdi), %mm0 # sched: [10:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -3903,7 +3903,7 @@ define i64 @test_pminsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pminsw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pminsw %mm1, %mm0 # sched: [5:1.00] -; GENERIC-NEXT: pminsw (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: pminsw (%rdi), %mm0 # sched: [10:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3924,7 +3924,7 @@ define i64 @test_pminsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_pminsw: ; SANDY: # %bb.0: ; SANDY-NEXT: pminsw %mm1, %mm0 # sched: [5:1.00] -; SANDY-NEXT: pminsw (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: pminsw (%rdi), %mm0 # sched: [10:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -3981,7 +3981,7 @@ define i64 @test_pminub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pminub: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pminub %mm1, %mm0 # sched: [5:1.00] -; GENERIC-NEXT: pminub (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: pminub (%rdi), %mm0 # sched: [10:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4002,7 +4002,7 @@ define i64 @test_pminub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_pminub: ; SANDY: # %bb.0: ; SANDY-NEXT: pminub %mm1, %mm0 # sched: [5:1.00] -; SANDY-NEXT: pminub (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: pminub (%rdi), %mm0 # sched: [10:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -4504,7 +4504,7 @@ define i64 @test_por(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_por: ; GENERIC: # %bb.0: ; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: por (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: por (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4525,7 +4525,7 @@ define i64 @test_por(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_por: ; SANDY: # %bb.0: ; SANDY-NEXT: por %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: por (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: por (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -4737,7 +4737,7 @@ declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pshufw(x86_mmx *%a0) optsize { ; GENERIC-LABEL: test_pshufw: ; GENERIC: # %bb.0: -; GENERIC-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [5:1.00] +; GENERIC-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] ; GENERIC-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4758,7 +4758,7 @@ define i64 @test_pshufw(x86_mmx *%a0) optsize { ; ; SANDY-LABEL: test_pshufw: ; SANDY: # %bb.0: -; SANDY-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [5:1.00] +; SANDY-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] ; SANDY-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] @@ -5050,7 +5050,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pslld: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: pslld (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: pslld $7, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5074,7 +5074,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_pslld: ; SANDY: # %bb.0: ; SANDY-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: pslld (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: pslld $7, %mm0 # sched: [1:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] @@ -5140,7 +5140,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_psllq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: psllq (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: psllq $7, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5164,7 +5164,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_psllq: ; SANDY: # %bb.0: ; SANDY-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: psllq (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: psllq $7, %mm0 # sched: [1:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] @@ -5230,7 +5230,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_psllw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: psllw (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: psllw $7, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5254,7 +5254,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_psllw: ; SANDY: # %bb.0: ; SANDY-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: psllw (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: psllw $7, %mm0 # sched: [1:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] @@ -5320,7 +5320,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_psrad: ; GENERIC: # %bb.0: ; GENERIC-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: psrad (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: psrad $7, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5344,7 +5344,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_psrad: ; SANDY: # %bb.0: ; SANDY-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: psrad (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: psrad $7, %mm0 # sched: [1:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] @@ -5410,7 +5410,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_psraw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: psraw (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: psraw $7, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5434,7 +5434,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_psraw: ; SANDY: # %bb.0: ; SANDY-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: psraw (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: psraw $7, %mm0 # sched: [1:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] @@ -5500,7 +5500,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_psrld: ; GENERIC: # %bb.0: ; GENERIC-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: psrld (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: psrld $7, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5524,7 +5524,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_psrld: ; SANDY: # %bb.0: ; SANDY-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: psrld (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: psrld $7, %mm0 # sched: [1:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] @@ -5590,7 +5590,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_psrlq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: psrlq (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: psrlq $7, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5614,7 +5614,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_psrlq: ; SANDY: # %bb.0: ; SANDY-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: psrlq (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: psrlq $7, %mm0 # sched: [1:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] @@ -5680,7 +5680,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_psrlw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: psrlw (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: psrlw $7, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5704,7 +5704,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_psrlw: ; SANDY: # %bb.0: ; SANDY-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: psrlw (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: psrlw $7, %mm0 # sched: [1:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] @@ -5770,7 +5770,7 @@ define i64 @test_psubb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_psubb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: psubb %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: psubb (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: psubb (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5791,7 +5791,7 @@ define i64 @test_psubb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_psubb: ; SANDY: # %bb.0: ; SANDY-NEXT: psubb %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: psubb (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: psubb (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -5848,7 +5848,7 @@ define i64 @test_psubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_psubd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: psubd %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: psubd (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: psubd (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5869,7 +5869,7 @@ define i64 @test_psubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_psubd: ; SANDY: # %bb.0: ; SANDY-NEXT: psubd %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: psubd (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: psubd (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -5926,7 +5926,7 @@ define i64 @test_psubq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_psubq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: psubq %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: psubq (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: psubq (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5947,7 +5947,7 @@ define i64 @test_psubq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_psubq: ; SANDY: # %bb.0: ; SANDY-NEXT: psubq %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: psubq (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: psubq (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -6004,7 +6004,7 @@ define i64 @test_psubsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_psubsb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: psubsb %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: psubsb (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: psubsb (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6025,7 +6025,7 @@ define i64 @test_psubsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_psubsb: ; SANDY: # %bb.0: ; SANDY-NEXT: psubsb %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: psubsb (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: psubsb (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -6082,7 +6082,7 @@ define i64 @test_psubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_psubsw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: psubsw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: psubsw (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: psubsw (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6103,7 +6103,7 @@ define i64 @test_psubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_psubsw: ; SANDY: # %bb.0: ; SANDY-NEXT: psubsw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: psubsw (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: psubsw (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -6160,7 +6160,7 @@ define i64 @test_psubusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_psubusb: ; GENERIC: # %bb.0: ; GENERIC-NEXT: psubusb %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: psubusb (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: psubusb (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6181,7 +6181,7 @@ define i64 @test_psubusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_psubusb: ; SANDY: # %bb.0: ; SANDY-NEXT: psubusb %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: psubusb (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: psubusb (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -6238,7 +6238,7 @@ define i64 @test_psubusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_psubusw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: psubusw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: psubusw (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: psubusw (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6259,7 +6259,7 @@ define i64 @test_psubusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_psubusw: ; SANDY: # %bb.0: ; SANDY-NEXT: psubusw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: psubusw (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: psubusw (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -6316,7 +6316,7 @@ define i64 @test_psubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_psubw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: psubw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: psubw (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: psubw (%rdi), %mm0 # sched: [8:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6337,7 +6337,7 @@ define i64 @test_psubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_psubw: ; SANDY: # %bb.0: ; SANDY-NEXT: psubw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: psubw (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: psubw (%rdi), %mm0 # sched: [8:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -6394,7 +6394,7 @@ define i64 @test_punpckhbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_punpckhbw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] -; GENERIC-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [5:1.00] +; GENERIC-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6415,7 +6415,7 @@ define i64 @test_punpckhbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_punpckhbw: ; SANDY: # %bb.0: ; SANDY-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] -; SANDY-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [5:1.00] +; SANDY-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -6472,7 +6472,7 @@ define i64 @test_punpckhdq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_punpckhdq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] -; GENERIC-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [5:1.00] +; GENERIC-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6493,7 +6493,7 @@ define i64 @test_punpckhdq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_punpckhdq: ; SANDY: # %bb.0: ; SANDY-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] -; SANDY-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [5:1.00] +; SANDY-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -6550,7 +6550,7 @@ define i64 @test_punpckhwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_punpckhwd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; GENERIC-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [5:1.00] +; GENERIC-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6571,7 +6571,7 @@ define i64 @test_punpckhwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_punpckhwd: ; SANDY: # %bb.0: ; SANDY-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; SANDY-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [5:1.00] +; SANDY-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -6628,7 +6628,7 @@ define i64 @test_punpcklbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_punpcklbw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; GENERIC-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [5:1.00] +; GENERIC-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6649,7 +6649,7 @@ define i64 @test_punpcklbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_punpcklbw: ; SANDY: # %bb.0: ; SANDY-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; SANDY-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [5:1.00] +; SANDY-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -6706,7 +6706,7 @@ define i64 @test_punpckldq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_punpckldq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] -; GENERIC-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [5:1.00] +; GENERIC-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6727,7 +6727,7 @@ define i64 @test_punpckldq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_punpckldq: ; SANDY: # %bb.0: ; SANDY-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] -; SANDY-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [5:1.00] +; SANDY-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -6784,7 +6784,7 @@ define i64 @test_punpcklwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_punpcklwd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] -; GENERIC-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [5:1.00] +; GENERIC-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6805,7 +6805,7 @@ define i64 @test_punpcklwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_punpcklwd: ; SANDY: # %bb.0: ; SANDY-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] -; SANDY-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [5:1.00] +; SANDY-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -6862,7 +6862,7 @@ define i64 @test_pxor(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pxor: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pxor %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: pxor (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: pxor (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6883,7 +6883,7 @@ define i64 @test_pxor(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-LABEL: test_pxor: ; SANDY: # %bb.0: ; SANDY-NEXT: pxor %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: pxor (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: pxor (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/movbe-schedule.ll b/llvm/test/CodeGen/X86/movbe-schedule.ll index 667b0d8..c0b65f3 100644 --- a/llvm/test/CodeGen/X86/movbe-schedule.ll +++ b/llvm/test/CodeGen/X86/movbe-schedule.ll @@ -12,7 +12,7 @@ define i16 @test_movbe_i16(i16 *%a0, i16 %a1, i16 *%a2) { ; GENERIC-LABEL: test_movbe_i16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movbew (%rdi), %ax # sched: [5:0.50] +; GENERIC-NEXT: movbew (%rdi), %ax # sched: [6:0.50] ; GENERIC-NEXT: movbew %si, (%rdx) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -72,7 +72,7 @@ declare i16 @llvm.bswap.i16(i16) define i32 @test_movbe_i32(i32 *%a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: test_movbe_i32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movbel (%rdi), %eax # sched: [5:0.50] +; GENERIC-NEXT: movbel (%rdi), %eax # sched: [6:0.50] ; GENERIC-NEXT: movbel %esi, (%rdx) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -132,7 +132,7 @@ declare i32 @llvm.bswap.i32(i32) define i64 @test_movbe_i64(i64 *%a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: test_movbe_i64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movbeq (%rdi), %rax # sched: [5:0.50] +; GENERIC-NEXT: movbeq (%rdi), %rax # sched: [6:0.50] ; GENERIC-NEXT: movbeq %rsi, (%rdx) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/schedule-x86_32.ll b/llvm/test/CodeGen/X86/schedule-x86_32.ll index 81f4477..282016e 100644 --- a/llvm/test/CodeGen/X86/schedule-x86_32.ll +++ b/llvm/test/CodeGen/X86/schedule-x86_32.ll @@ -42,7 +42,7 @@ define i8 @test_aaa(i8 %a0) optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: aaa # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_aaa: ; HASWELL: # %bb.0: @@ -126,7 +126,7 @@ define i8 @test_aad(i16 %a0) optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: aad # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_aad: ; HASWELL: # %bb.0: @@ -210,7 +210,7 @@ define i16 @test_aam(i8 %a0) optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: aam # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_aam: ; HASWELL: # %bb.0: @@ -294,7 +294,7 @@ define i8 @test_aas(i8 %a0) optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: aas # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_aas: ; HASWELL: # %bb.0: @@ -382,7 +382,7 @@ define void @test_arpl(i16 %a0, i16 *%a1) optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: arpl %ax, (%ecx) # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_arpl: ; HASWELL: # %bb.0: @@ -504,7 +504,7 @@ define void @test_bound(i16 %a0, i16 *%a1, i32 %a2, i32 *%a3) optsize { ; SANDY-NEXT: bound %ecx, (%edx) # sched: [100:0.33] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: popl %esi # sched: [6:0.50] -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_bound: ; HASWELL: # %bb.0: @@ -636,7 +636,7 @@ define i8 @test_daa(i8 %a0) optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: daa # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_daa: ; HASWELL: # %bb.0: @@ -720,7 +720,7 @@ define i8 @test_das(i8 %a0) optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: das # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_das: ; HASWELL: # %bb.0: @@ -812,7 +812,7 @@ define void @test_dec16(i16 %a0, i16* %a1) optsize { ; SANDY-NEXT: decw %ax # sched: [1:0.33] ; SANDY-NEXT: decw (%ecx) # sched: [7:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_dec16: ; HASWELL: # %bb.0: @@ -915,7 +915,7 @@ define void @test_dec32(i32 %a0, i32* %a1) optsize { ; SANDY-NEXT: decl %eax # sched: [1:0.33] ; SANDY-NEXT: decl (%ecx) # sched: [7:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_dec32: ; HASWELL: # %bb.0: @@ -1019,7 +1019,7 @@ define void @test_inc16(i16 %a0, i16* %a1) optsize { ; SANDY-NEXT: incw %ax # sched: [1:0.33] ; SANDY-NEXT: incw (%ecx) # sched: [7:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_inc16: ; HASWELL: # %bb.0: @@ -1122,7 +1122,7 @@ define void @test_inc32(i32 %a0, i32* %a1) optsize { ; SANDY-NEXT: incl %eax # sched: [1:0.33] ; SANDY-NEXT: incl (%ecx) # sched: [7:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_inc32: ; HASWELL: # %bb.0: @@ -1214,7 +1214,7 @@ define void @test_into() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: into # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_into: ; HASWELL: # %bb.0: @@ -1296,7 +1296,7 @@ define void @test_jcxz_jecxz() optsize { ; SANDY-NEXT: jcxz JXTGT # sched: [2:1.00] ; SANDY-NEXT: jecxz JXTGT # sched: [2:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_jcxz_jecxz: ; HASWELL: # %bb.0: @@ -1382,7 +1382,7 @@ define void @test_leave() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: leave # sched: [7:0.67] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_leave: ; HASWELL: # %bb.0: @@ -1496,7 +1496,7 @@ define void @test_pop_push() optsize { ; SANDY-NEXT: pushl %fs # sched: [100:0.33] ; SANDY-NEXT: pushl %gs # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_pop_push: ; HASWELL: # %bb.0: @@ -1660,7 +1660,7 @@ define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize { ; SANDY-NEXT: # sched: [1:1.00] ; SANDY-NEXT: pushw $7 # sched: [1:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_pop_push_16: ; HASWELL: # %bb.0: @@ -1812,7 +1812,7 @@ define i32 @test_pop_push_32(i32 %a0, i32 *%a1) optsize { ; SANDY-NEXT: # sched: [1:1.00] ; SANDY-NEXT: pushl $7 # sched: [1:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_pop_push_32: ; HASWELL: # %bb.0: @@ -1941,12 +1941,12 @@ define void @test_popa_popf_pusha_pushf() optsize { ; SANDY-LABEL: test_popa_popf_pusha_pushf: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP -; SANDY-NEXT: popal # sched: [4:0.50] -; SANDY-NEXT: popfl # sched: [4:0.50] +; SANDY-NEXT: popal # sched: [5:0.50] +; SANDY-NEXT: popfl # sched: [5:0.50] ; SANDY-NEXT: pushal # sched: [1:1.00] ; SANDY-NEXT: pushfl # sched: [1:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_popa_popf_pusha_pushf: ; HASWELL: # %bb.0: @@ -2049,14 +2049,14 @@ define void @test_ret() optsize { ; SANDY-LABEL: test_ret: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; SANDY-NEXT: retl $4095 # imm = 0xFFF -; SANDY-NEXT: # sched: [5:1.00] -; SANDY-NEXT: lretl # sched: [5:1.00] +; SANDY-NEXT: # sched: [6:1.00] +; SANDY-NEXT: lretl # sched: [6:1.00] ; SANDY-NEXT: lretl $4095 # imm = 0xFFF -; SANDY-NEXT: # sched: [5:1.00] +; SANDY-NEXT: # sched: [6:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_ret: ; HASWELL: # %bb.0: @@ -2160,7 +2160,7 @@ define i8 @test_salc() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: salc # sched: [1:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_salc: ; HASWELL: # %bb.0: @@ -2252,9 +2252,9 @@ define void @test_xchg_32(i32 %a0, i32 %a1, i32 *%a2) optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: xchgl %eax, %eax # sched: [1:0.33] ; SANDY-NEXT: xchgl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: xchgl %eax, (%edx) # sched: [5:1.00] +; SANDY-NEXT: xchgl %eax, (%edx) # sched: [6:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_xchg_32: ; HASWELL: # %bb.0: diff --git a/llvm/test/CodeGen/X86/schedule-x86_64.ll b/llvm/test/CodeGen/X86/schedule-x86_64.ll index f036b18..5a86ac6 100644 --- a/llvm/test/CodeGen/X86/schedule-x86_64.ll +++ b/llvm/test/CodeGen/X86/schedule-x86_64.ll @@ -4638,8 +4638,8 @@ define void @test_cmpxchg8b_cmpxchg16b(i8 *%a0) optsize { ; GENERIC-LABEL: test_cmpxchg8b_cmpxchg16b: ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP -; GENERIC-NEXT: cmpxchg8b (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: cmpxchg16b (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: cmpxchg8b (%rdi) # sched: [6:1.00] +; GENERIC-NEXT: cmpxchg16b (%rdi) # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4662,8 +4662,8 @@ define void @test_cmpxchg8b_cmpxchg16b(i8 *%a0) optsize { ; SANDY-LABEL: test_cmpxchg8b_cmpxchg16b: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP -; SANDY-NEXT: cmpxchg8b (%rdi) # sched: [5:1.00] -; SANDY-NEXT: cmpxchg16b (%rdi) # sched: [5:1.00] +; SANDY-NEXT: cmpxchg8b (%rdi) # sched: [6:1.00] +; SANDY-NEXT: cmpxchg16b (%rdi) # sched: [6:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -5132,13 +5132,13 @@ define void @test_div(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 ; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] ; GENERIC-NEXT: #APP ; GENERIC-NEXT: divb %dil # sched: [25:10.00] -; GENERIC-NEXT: divb (%r8) # sched: [29:10.00] +; GENERIC-NEXT: divb (%r8) # sched: [30:10.00] ; GENERIC-NEXT: divw %si # sched: [25:10.00] -; GENERIC-NEXT: divw (%r9) # sched: [29:10.00] +; GENERIC-NEXT: divw (%r9) # sched: [30:10.00] ; GENERIC-NEXT: divl %edx # sched: [25:10.00] -; GENERIC-NEXT: divl (%rax) # sched: [29:10.00] +; GENERIC-NEXT: divl (%rax) # sched: [30:10.00] ; GENERIC-NEXT: divq %rcx # sched: [25:10.00] -; GENERIC-NEXT: divq (%r10) # sched: [29:10.00] +; GENERIC-NEXT: divq (%r10) # sched: [30:10.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5180,13 +5180,13 @@ define void @test_div(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 ; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] ; SANDY-NEXT: #APP ; SANDY-NEXT: divb %dil # sched: [25:10.00] -; SANDY-NEXT: divb (%r8) # sched: [29:10.00] +; SANDY-NEXT: divb (%r8) # sched: [30:10.00] ; SANDY-NEXT: divw %si # sched: [25:10.00] -; SANDY-NEXT: divw (%r9) # sched: [29:10.00] +; SANDY-NEXT: divw (%r9) # sched: [30:10.00] ; SANDY-NEXT: divl %edx # sched: [25:10.00] -; SANDY-NEXT: divl (%rax) # sched: [29:10.00] +; SANDY-NEXT: divl (%rax) # sched: [30:10.00] ; SANDY-NEXT: divq %rcx # sched: [25:10.00] -; SANDY-NEXT: divq (%r10) # sched: [29:10.00] +; SANDY-NEXT: divq (%r10) # sched: [30:10.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -5380,13 +5380,13 @@ define void @test_idiv(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 ; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] ; GENERIC-NEXT: #APP ; GENERIC-NEXT: idivb %dil # sched: [25:10.00] -; GENERIC-NEXT: idivb (%r8) # sched: [29:10.00] +; GENERIC-NEXT: idivb (%r8) # sched: [30:10.00] ; GENERIC-NEXT: idivw %si # sched: [25:10.00] -; GENERIC-NEXT: idivw (%r9) # sched: [29:10.00] +; GENERIC-NEXT: idivw (%r9) # sched: [30:10.00] ; GENERIC-NEXT: idivl %edx # sched: [25:10.00] -; GENERIC-NEXT: idivl (%rax) # sched: [29:10.00] +; GENERIC-NEXT: idivl (%rax) # sched: [30:10.00] ; GENERIC-NEXT: idivq %rcx # sched: [25:10.00] -; GENERIC-NEXT: idivq (%r10) # sched: [29:10.00] +; GENERIC-NEXT: idivq (%r10) # sched: [30:10.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5428,13 +5428,13 @@ define void @test_idiv(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 ; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] ; SANDY-NEXT: #APP ; SANDY-NEXT: idivb %dil # sched: [25:10.00] -; SANDY-NEXT: idivb (%r8) # sched: [29:10.00] +; SANDY-NEXT: idivb (%r8) # sched: [30:10.00] ; SANDY-NEXT: idivw %si # sched: [25:10.00] -; SANDY-NEXT: idivw (%r9) # sched: [29:10.00] +; SANDY-NEXT: idivw (%r9) # sched: [30:10.00] ; SANDY-NEXT: idivl %edx # sched: [25:10.00] -; SANDY-NEXT: idivl (%rax) # sched: [29:10.00] +; SANDY-NEXT: idivl (%rax) # sched: [30:10.00] ; SANDY-NEXT: idivq %rcx # sched: [25:10.00] -; SANDY-NEXT: idivq (%r10) # sched: [29:10.00] +; SANDY-NEXT: idivq (%r10) # sched: [30:10.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -5542,7 +5542,7 @@ define void @test_imul_8(i8 %a0, i8* %a1) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: imulb %dil # sched: [3:1.00] -; GENERIC-NEXT: imulb (%rsi) # sched: [7:1.00] +; GENERIC-NEXT: imulb (%rsi) # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5566,7 +5566,7 @@ define void @test_imul_8(i8 %a0, i8* %a1) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: #APP ; SANDY-NEXT: imulb %dil # sched: [3:1.00] -; SANDY-NEXT: imulb (%rsi) # sched: [7:1.00] +; SANDY-NEXT: imulb (%rsi) # sched: [8:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -5625,15 +5625,15 @@ define void @test_imul_16(i16 %a0, i16* %a1) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: imulw %di # sched: [3:1.00] -; GENERIC-NEXT: imulw (%rsi) # sched: [7:1.00] +; GENERIC-NEXT: imulw (%rsi) # sched: [8:1.00] ; GENERIC-NEXT: imulw %di, %di # sched: [3:1.00] -; GENERIC-NEXT: imulw (%rsi), %di # sched: [7:1.00] +; GENERIC-NEXT: imulw (%rsi), %di # sched: [8:1.00] ; GENERIC-NEXT: imulw $511, %di, %di # imm = 0x1FF ; GENERIC-NEXT: # sched: [3:1.00] ; GENERIC-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF -; GENERIC-NEXT: # sched: [7:1.00] +; GENERIC-NEXT: # sched: [8:1.00] ; GENERIC-NEXT: imulw $7, %di, %di # sched: [3:1.00] -; GENERIC-NEXT: imulw $7, (%rsi), %di # sched: [7:1.00] +; GENERIC-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5673,15 +5673,15 @@ define void @test_imul_16(i16 %a0, i16* %a1) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: #APP ; SANDY-NEXT: imulw %di # sched: [3:1.00] -; SANDY-NEXT: imulw (%rsi) # sched: [7:1.00] +; SANDY-NEXT: imulw (%rsi) # sched: [8:1.00] ; SANDY-NEXT: imulw %di, %di # sched: [3:1.00] -; SANDY-NEXT: imulw (%rsi), %di # sched: [7:1.00] +; SANDY-NEXT: imulw (%rsi), %di # sched: [8:1.00] ; SANDY-NEXT: imulw $511, %di, %di # imm = 0x1FF ; SANDY-NEXT: # sched: [3:1.00] ; SANDY-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF -; SANDY-NEXT: # sched: [7:1.00] +; SANDY-NEXT: # sched: [8:1.00] ; SANDY-NEXT: imulw $7, %di, %di # sched: [3:1.00] -; SANDY-NEXT: imulw $7, (%rsi), %di # sched: [7:1.00] +; SANDY-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -5788,15 +5788,15 @@ define void @test_imul_32(i32 %a0, i32* %a1) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: imull %edi # sched: [3:1.00] -; GENERIC-NEXT: imull (%rsi) # sched: [7:1.00] +; GENERIC-NEXT: imull (%rsi) # sched: [8:1.00] ; GENERIC-NEXT: imull %edi, %edi # sched: [3:1.00] -; GENERIC-NEXT: imull (%rsi), %edi # sched: [7:1.00] +; GENERIC-NEXT: imull (%rsi), %edi # sched: [8:1.00] ; GENERIC-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 ; GENERIC-NEXT: # sched: [3:1.00] ; GENERIC-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [7:1.00] +; GENERIC-NEXT: # sched: [8:1.00] ; GENERIC-NEXT: imull $7, %edi, %edi # sched: [3:1.00] -; GENERIC-NEXT: imull $7, (%rsi), %edi # sched: [7:1.00] +; GENERIC-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5836,15 +5836,15 @@ define void @test_imul_32(i32 %a0, i32* %a1) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: #APP ; SANDY-NEXT: imull %edi # sched: [3:1.00] -; SANDY-NEXT: imull (%rsi) # sched: [7:1.00] +; SANDY-NEXT: imull (%rsi) # sched: [8:1.00] ; SANDY-NEXT: imull %edi, %edi # sched: [3:1.00] -; SANDY-NEXT: imull (%rsi), %edi # sched: [7:1.00] +; SANDY-NEXT: imull (%rsi), %edi # sched: [8:1.00] ; SANDY-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 ; SANDY-NEXT: # sched: [3:1.00] ; SANDY-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [7:1.00] +; SANDY-NEXT: # sched: [8:1.00] ; SANDY-NEXT: imull $7, %edi, %edi # sched: [3:1.00] -; SANDY-NEXT: imull $7, (%rsi), %edi # sched: [7:1.00] +; SANDY-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -5951,15 +5951,15 @@ define void @test_imul_64(i64 %a0, i64* %a1) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: imulq %rdi # sched: [3:1.00] -; GENERIC-NEXT: imulq (%rsi) # sched: [7:1.00] +; GENERIC-NEXT: imulq (%rsi) # sched: [8:1.00] ; GENERIC-NEXT: imulq %rdi, %rdi # sched: [3:1.00] -; GENERIC-NEXT: imulq (%rsi), %rdi # sched: [7:1.00] +; GENERIC-NEXT: imulq (%rsi), %rdi # sched: [8:1.00] ; GENERIC-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 ; GENERIC-NEXT: # sched: [3:1.00] ; GENERIC-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [7:1.00] +; GENERIC-NEXT: # sched: [8:1.00] ; GENERIC-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00] -; GENERIC-NEXT: imulq $7, (%rsi), %rdi # sched: [7:1.00] +; GENERIC-NEXT: imulq $7, (%rsi), %rdi # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5999,15 +5999,15 @@ define void @test_imul_64(i64 %a0, i64* %a1) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: #APP ; SANDY-NEXT: imulq %rdi # sched: [3:1.00] -; SANDY-NEXT: imulq (%rsi) # sched: [7:1.00] +; SANDY-NEXT: imulq (%rsi) # sched: [8:1.00] ; SANDY-NEXT: imulq %rdi, %rdi # sched: [3:1.00] -; SANDY-NEXT: imulq (%rsi), %rdi # sched: [7:1.00] +; SANDY-NEXT: imulq (%rsi), %rdi # sched: [8:1.00] ; SANDY-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 ; SANDY-NEXT: # sched: [3:1.00] ; SANDY-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [7:1.00] +; SANDY-NEXT: # sched: [8:1.00] ; SANDY-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00] -; SANDY-NEXT: imulq $7, (%rsi), %rdi # sched: [7:1.00] +; SANDY-NEXT: imulq $7, (%rsi), %rdi # sched: [8:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -9888,7 +9888,7 @@ define void @test_popf_pushf() optsize { ; GENERIC-LABEL: test_popf_pushf: ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP -; GENERIC-NEXT: popfq # sched: [4:0.50] +; GENERIC-NEXT: popfq # sched: [5:0.50] ; GENERIC-NEXT: pushfq # sched: [5:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9912,7 +9912,7 @@ define void @test_popf_pushf() optsize { ; SANDY-LABEL: test_popf_pushf: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP -; SANDY-NEXT: popfq # sched: [4:0.50] +; SANDY-NEXT: popfq # sched: [5:0.50] ; SANDY-NEXT: pushfq # sched: [5:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] @@ -9974,16 +9974,16 @@ define void @test_rcl_rcr_8(i8 %a0, i8 %a1, i8 *%a2) optsize { ; GENERIC-NEXT: #APP ; GENERIC-NEXT: rclb %dil # sched: [1:0.50] ; GENERIC-NEXT: rcrb %dil # sched: [1:0.50] -; GENERIC-NEXT: rclb (%rdx) # sched: [5:1.00] -; GENERIC-NEXT: rcrb (%rdx) # sched: [5:1.00] +; GENERIC-NEXT: rclb (%rdx) # sched: [6:1.00] +; GENERIC-NEXT: rcrb (%rdx) # sched: [6:1.00] ; GENERIC-NEXT: rclb $7, %dil # sched: [1:0.50] ; GENERIC-NEXT: rcrb $7, %dil # sched: [1:0.50] -; GENERIC-NEXT: rclb $7, (%rdx) # sched: [5:1.00] -; GENERIC-NEXT: rcrb $7, (%rdx) # sched: [5:1.00] +; GENERIC-NEXT: rclb $7, (%rdx) # sched: [6:1.00] +; GENERIC-NEXT: rcrb $7, (%rdx) # sched: [6:1.00] ; GENERIC-NEXT: rclb %cl, %dil # sched: [1:0.50] ; GENERIC-NEXT: rcrb %cl, %dil # sched: [1:0.50] -; GENERIC-NEXT: rclb %cl, (%rdx) # sched: [5:1.00] -; GENERIC-NEXT: rcrb %cl, (%rdx) # sched: [5:1.00] +; GENERIC-NEXT: rclb %cl, (%rdx) # sched: [6:1.00] +; GENERIC-NEXT: rcrb %cl, (%rdx) # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10028,16 +10028,16 @@ define void @test_rcl_rcr_8(i8 %a0, i8 %a1, i8 *%a2) optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: rclb %dil # sched: [1:0.50] ; SANDY-NEXT: rcrb %dil # sched: [1:0.50] -; SANDY-NEXT: rclb (%rdx) # sched: [5:1.00] -; SANDY-NEXT: rcrb (%rdx) # sched: [5:1.00] +; SANDY-NEXT: rclb (%rdx) # sched: [6:1.00] +; SANDY-NEXT: rcrb (%rdx) # sched: [6:1.00] ; SANDY-NEXT: rclb $7, %dil # sched: [1:0.50] ; SANDY-NEXT: rcrb $7, %dil # sched: [1:0.50] -; SANDY-NEXT: rclb $7, (%rdx) # sched: [5:1.00] -; SANDY-NEXT: rcrb $7, (%rdx) # sched: [5:1.00] +; SANDY-NEXT: rclb $7, (%rdx) # sched: [6:1.00] +; SANDY-NEXT: rcrb $7, (%rdx) # sched: [6:1.00] ; SANDY-NEXT: rclb %cl, %dil # sched: [1:0.50] ; SANDY-NEXT: rcrb %cl, %dil # sched: [1:0.50] -; SANDY-NEXT: rclb %cl, (%rdx) # sched: [5:1.00] -; SANDY-NEXT: rcrb %cl, (%rdx) # sched: [5:1.00] +; SANDY-NEXT: rclb %cl, (%rdx) # sched: [6:1.00] +; SANDY-NEXT: rcrb %cl, (%rdx) # sched: [6:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -10157,16 +10157,16 @@ define void @test_rcl_rcr_16(i16 %a0, i16 %a1, i16 *%a2) optsize { ; GENERIC-NEXT: #APP ; GENERIC-NEXT: rclw %di # sched: [1:0.50] ; GENERIC-NEXT: rcrw %di # sched: [1:0.50] -; GENERIC-NEXT: rclw (%rdx) # sched: [5:1.00] -; GENERIC-NEXT: rcrw (%rdx) # sched: [5:1.00] +; GENERIC-NEXT: rclw (%rdx) # sched: [6:1.00] +; GENERIC-NEXT: rcrw (%rdx) # sched: [6:1.00] ; GENERIC-NEXT: rclw $7, %di # sched: [1:0.50] ; GENERIC-NEXT: rcrw $7, %di # sched: [1:0.50] -; GENERIC-NEXT: rclw $7, (%rdx) # sched: [5:1.00] -; GENERIC-NEXT: rcrw $7, (%rdx) # sched: [5:1.00] +; GENERIC-NEXT: rclw $7, (%rdx) # sched: [6:1.00] +; GENERIC-NEXT: rcrw $7, (%rdx) # sched: [6:1.00] ; GENERIC-NEXT: rclw %cl, %di # sched: [1:0.50] ; GENERIC-NEXT: rcrw %cl, %di # sched: [1:0.50] -; GENERIC-NEXT: rclw %cl, (%rdx) # sched: [5:1.00] -; GENERIC-NEXT: rcrw %cl, (%rdx) # sched: [5:1.00] +; GENERIC-NEXT: rclw %cl, (%rdx) # sched: [6:1.00] +; GENERIC-NEXT: rcrw %cl, (%rdx) # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10211,16 +10211,16 @@ define void @test_rcl_rcr_16(i16 %a0, i16 %a1, i16 *%a2) optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: rclw %di # sched: [1:0.50] ; SANDY-NEXT: rcrw %di # sched: [1:0.50] -; SANDY-NEXT: rclw (%rdx) # sched: [5:1.00] -; SANDY-NEXT: rcrw (%rdx) # sched: [5:1.00] +; SANDY-NEXT: rclw (%rdx) # sched: [6:1.00] +; SANDY-NEXT: rcrw (%rdx) # sched: [6:1.00] ; SANDY-NEXT: rclw $7, %di # sched: [1:0.50] ; SANDY-NEXT: rcrw $7, %di # sched: [1:0.50] -; SANDY-NEXT: rclw $7, (%rdx) # sched: [5:1.00] -; SANDY-NEXT: rcrw $7, (%rdx) # sched: [5:1.00] +; SANDY-NEXT: rclw $7, (%rdx) # sched: [6:1.00] +; SANDY-NEXT: rcrw $7, (%rdx) # sched: [6:1.00] ; SANDY-NEXT: rclw %cl, %di # sched: [1:0.50] ; SANDY-NEXT: rcrw %cl, %di # sched: [1:0.50] -; SANDY-NEXT: rclw %cl, (%rdx) # sched: [5:1.00] -; SANDY-NEXT: rcrw %cl, (%rdx) # sched: [5:1.00] +; SANDY-NEXT: rclw %cl, (%rdx) # sched: [6:1.00] +; SANDY-NEXT: rcrw %cl, (%rdx) # sched: [6:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -10340,16 +10340,16 @@ define void @test_rcl_rcr_32(i32 %a0, i32 %a1, i32 *%a2) optsize { ; GENERIC-NEXT: #APP ; GENERIC-NEXT: rcll %edi # sched: [1:0.50] ; GENERIC-NEXT: rcrl %edi # sched: [1:0.50] -; GENERIC-NEXT: rcll (%rdx) # sched: [5:1.00] -; GENERIC-NEXT: rcrl (%rdx) # sched: [5:1.00] +; GENERIC-NEXT: rcll (%rdx) # sched: [6:1.00] +; GENERIC-NEXT: rcrl (%rdx) # sched: [6:1.00] ; GENERIC-NEXT: rcll $7, %edi # sched: [1:0.50] ; GENERIC-NEXT: rcrl $7, %edi # sched: [1:0.50] -; GENERIC-NEXT: rcll $7, (%rdx) # sched: [5:1.00] -; GENERIC-NEXT: rcrl $7, (%rdx) # sched: [5:1.00] +; GENERIC-NEXT: rcll $7, (%rdx) # sched: [6:1.00] +; GENERIC-NEXT: rcrl $7, (%rdx) # sched: [6:1.00] ; GENERIC-NEXT: rcll %cl, %edi # sched: [1:0.50] ; GENERIC-NEXT: rcrl %cl, %edi # sched: [1:0.50] -; GENERIC-NEXT: rcll %cl, (%rdx) # sched: [5:1.00] -; GENERIC-NEXT: rcrl %cl, (%rdx) # sched: [5:1.00] +; GENERIC-NEXT: rcll %cl, (%rdx) # sched: [6:1.00] +; GENERIC-NEXT: rcrl %cl, (%rdx) # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10394,16 +10394,16 @@ define void @test_rcl_rcr_32(i32 %a0, i32 %a1, i32 *%a2) optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: rcll %edi # sched: [1:0.50] ; SANDY-NEXT: rcrl %edi # sched: [1:0.50] -; SANDY-NEXT: rcll (%rdx) # sched: [5:1.00] -; SANDY-NEXT: rcrl (%rdx) # sched: [5:1.00] +; SANDY-NEXT: rcll (%rdx) # sched: [6:1.00] +; SANDY-NEXT: rcrl (%rdx) # sched: [6:1.00] ; SANDY-NEXT: rcll $7, %edi # sched: [1:0.50] ; SANDY-NEXT: rcrl $7, %edi # sched: [1:0.50] -; SANDY-NEXT: rcll $7, (%rdx) # sched: [5:1.00] -; SANDY-NEXT: rcrl $7, (%rdx) # sched: [5:1.00] +; SANDY-NEXT: rcll $7, (%rdx) # sched: [6:1.00] +; SANDY-NEXT: rcrl $7, (%rdx) # sched: [6:1.00] ; SANDY-NEXT: rcll %cl, %edi # sched: [1:0.50] ; SANDY-NEXT: rcrl %cl, %edi # sched: [1:0.50] -; SANDY-NEXT: rcll %cl, (%rdx) # sched: [5:1.00] -; SANDY-NEXT: rcrl %cl, (%rdx) # sched: [5:1.00] +; SANDY-NEXT: rcll %cl, (%rdx) # sched: [6:1.00] +; SANDY-NEXT: rcrl %cl, (%rdx) # sched: [6:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -10523,16 +10523,16 @@ define void @test_rcl_rcr_64(i64 %a0, i64 %a1, i64 *%a2) optsize { ; GENERIC-NEXT: #APP ; GENERIC-NEXT: rclq %rdi # sched: [1:0.50] ; GENERIC-NEXT: rcrq %rdi # sched: [1:0.50] -; GENERIC-NEXT: rclq (%rdx) # sched: [5:1.00] -; GENERIC-NEXT: rcrq (%rdx) # sched: [5:1.00] +; GENERIC-NEXT: rclq (%rdx) # sched: [6:1.00] +; GENERIC-NEXT: rcrq (%rdx) # sched: [6:1.00] ; GENERIC-NEXT: rclq $7, %rdi # sched: [1:0.50] ; GENERIC-NEXT: rcrq $7, %rdi # sched: [1:0.50] -; GENERIC-NEXT: rclq $7, (%rdx) # sched: [5:1.00] -; GENERIC-NEXT: rcrq $7, (%rdx) # sched: [5:1.00] +; GENERIC-NEXT: rclq $7, (%rdx) # sched: [6:1.00] +; GENERIC-NEXT: rcrq $7, (%rdx) # sched: [6:1.00] ; GENERIC-NEXT: rclq %cl, %rdi # sched: [1:0.50] ; GENERIC-NEXT: rcrq %cl, %rdi # sched: [1:0.50] -; GENERIC-NEXT: rclq %cl, (%rdx) # sched: [5:1.00] -; GENERIC-NEXT: rcrq %cl, (%rdx) # sched: [5:1.00] +; GENERIC-NEXT: rclq %cl, (%rdx) # sched: [6:1.00] +; GENERIC-NEXT: rcrq %cl, (%rdx) # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10577,16 +10577,16 @@ define void @test_rcl_rcr_64(i64 %a0, i64 %a1, i64 *%a2) optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: rclq %rdi # sched: [1:0.50] ; SANDY-NEXT: rcrq %rdi # sched: [1:0.50] -; SANDY-NEXT: rclq (%rdx) # sched: [5:1.00] -; SANDY-NEXT: rcrq (%rdx) # sched: [5:1.00] +; SANDY-NEXT: rclq (%rdx) # sched: [6:1.00] +; SANDY-NEXT: rcrq (%rdx) # sched: [6:1.00] ; SANDY-NEXT: rclq $7, %rdi # sched: [1:0.50] ; SANDY-NEXT: rcrq $7, %rdi # sched: [1:0.50] -; SANDY-NEXT: rclq $7, (%rdx) # sched: [5:1.00] -; SANDY-NEXT: rcrq $7, (%rdx) # sched: [5:1.00] +; SANDY-NEXT: rclq $7, (%rdx) # sched: [6:1.00] +; SANDY-NEXT: rcrq $7, (%rdx) # sched: [6:1.00] ; SANDY-NEXT: rclq %cl, %rdi # sched: [1:0.50] ; SANDY-NEXT: rcrq %cl, %rdi # sched: [1:0.50] -; SANDY-NEXT: rclq %cl, (%rdx) # sched: [5:1.00] -; SANDY-NEXT: rcrq %cl, (%rdx) # sched: [5:1.00] +; SANDY-NEXT: rclq %cl, (%rdx) # sched: [6:1.00] +; SANDY-NEXT: rcrq %cl, (%rdx) # sched: [6:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -10949,10 +10949,10 @@ define void @test_ret() optsize { ; GENERIC-NEXT: #APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq $4095 # imm = 0xFFF -; GENERIC-NEXT: # sched: [5:1.00] -; GENERIC-NEXT: lretl # sched: [5:1.00] +; GENERIC-NEXT: # sched: [6:1.00] +; GENERIC-NEXT: lretl # sched: [6:1.00] ; GENERIC-NEXT: lretl $4095 # imm = 0xFFF -; GENERIC-NEXT: # sched: [5:1.00] +; GENERIC-NEXT: # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10985,10 +10985,10 @@ define void @test_ret() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: retq # sched: [1:1.00] ; SANDY-NEXT: retq $4095 # imm = 0xFFF -; SANDY-NEXT: # sched: [5:1.00] -; SANDY-NEXT: lretl # sched: [5:1.00] +; SANDY-NEXT: # sched: [6:1.00] +; SANDY-NEXT: lretl # sched: [6:1.00] ; SANDY-NEXT: lretl $4095 # imm = 0xFFF -; SANDY-NEXT: # sched: [5:1.00] +; SANDY-NEXT: # sched: [6:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -16121,7 +16121,7 @@ define void @test_xchg_8(i8 %a0, i8 %a1, i8 *%a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: xchgb %sil, %dil # sched: [1:0.33] -; GENERIC-NEXT: xchgb %dil, (%rdx) # sched: [5:1.00] +; GENERIC-NEXT: xchgb %dil, (%rdx) # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -16145,7 +16145,7 @@ define void @test_xchg_8(i8 %a0, i8 %a1, i8 *%a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: #APP ; SANDY-NEXT: xchgb %sil, %dil # sched: [1:0.33] -; SANDY-NEXT: xchgb %dil, (%rdx) # sched: [5:1.00] +; SANDY-NEXT: xchgb %dil, (%rdx) # sched: [6:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -16205,7 +16205,7 @@ define void @test_xchg_16(i16 %a0, i16 %a1, i16 *%a2) optsize { ; GENERIC-NEXT: #APP ; GENERIC-NEXT: xchgw %di, %ax # sched: [1:0.33] ; GENERIC-NEXT: xchgw %si, %di # sched: [1:0.33] -; GENERIC-NEXT: xchgw %di, (%rdx) # sched: [5:1.00] +; GENERIC-NEXT: xchgw %di, (%rdx) # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -16232,7 +16232,7 @@ define void @test_xchg_16(i16 %a0, i16 %a1, i16 *%a2) optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: xchgw %di, %ax # sched: [1:0.33] ; SANDY-NEXT: xchgw %si, %di # sched: [1:0.33] -; SANDY-NEXT: xchgw %di, (%rdx) # sched: [5:1.00] +; SANDY-NEXT: xchgw %di, (%rdx) # sched: [6:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -16298,7 +16298,7 @@ define void @test_xchg_32(i32 %a0, i32 %a1, i32 *%a2) optsize { ; GENERIC-NEXT: #APP ; GENERIC-NEXT: xchgl %edi, %eax # sched: [1:0.33] ; GENERIC-NEXT: xchgl %esi, %edi # sched: [1:0.33] -; GENERIC-NEXT: xchgl %edi, (%rdx) # sched: [5:1.00] +; GENERIC-NEXT: xchgl %edi, (%rdx) # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -16325,7 +16325,7 @@ define void @test_xchg_32(i32 %a0, i32 %a1, i32 *%a2) optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: xchgl %edi, %eax # sched: [1:0.33] ; SANDY-NEXT: xchgl %esi, %edi # sched: [1:0.33] -; SANDY-NEXT: xchgl %edi, (%rdx) # sched: [5:1.00] +; SANDY-NEXT: xchgl %edi, (%rdx) # sched: [6:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -16391,7 +16391,7 @@ define void @test_xchg_64(i64 %a0, i64 %a1, i64 *%a2) optsize { ; GENERIC-NEXT: #APP ; GENERIC-NEXT: xchgq %rdi, %rax # sched: [1:0.33] ; GENERIC-NEXT: xchgq %rsi, %rdi # sched: [1:0.33] -; GENERIC-NEXT: xchgq %rdi, (%rdx) # sched: [5:1.00] +; GENERIC-NEXT: xchgq %rdi, (%rdx) # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -16418,7 +16418,7 @@ define void @test_xchg_64(i64 %a0, i64 %a1, i64 *%a2) optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: xchgq %rdi, %rax # sched: [1:0.33] ; SANDY-NEXT: xchgq %rsi, %rdi # sched: [1:0.33] -; SANDY-NEXT: xchgq %rdi, (%rdx) # sched: [5:1.00] +; SANDY-NEXT: xchgq %rdi, (%rdx) # sched: [6:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -16483,7 +16483,7 @@ define void @test_xlat() optsize { ; GENERIC-LABEL: test_xlat: ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP -; GENERIC-NEXT: xlatb # sched: [4:0.50] +; GENERIC-NEXT: xlatb # sched: [5:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -16504,7 +16504,7 @@ define void @test_xlat() optsize { ; SANDY-LABEL: test_xlat: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP -; SANDY-NEXT: xlatb # sched: [4:0.50] +; SANDY-NEXT: xlatb # sched: [5:0.50] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/sha-schedule.ll b/llvm/test/CodeGen/X86/sha-schedule.ll index cecdc1c..8be750a 100644 --- a/llvm/test/CodeGen/X86/sha-schedule.ll +++ b/llvm/test/CodeGen/X86/sha-schedule.ll @@ -12,7 +12,7 @@ define <4 x i32> @test_sha1msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_sha1msg1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; GOLDMONT-LABEL: test_sha1msg1: @@ -43,7 +43,7 @@ define <4 x i32> @test_sha1msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_sha1msg2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; GOLDMONT-LABEL: test_sha1msg2: @@ -74,7 +74,7 @@ define <4 x i32> @test_sha1nexte(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_sha1nexte: ; GENERIC: # %bb.0: ; GENERIC-NEXT: sha1nexte %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: sha1nexte (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: sha1nexte (%rdi), %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; GOLDMONT-LABEL: test_sha1nexte: @@ -105,7 +105,7 @@ define <4 x i32> @test_sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_sha1rnds4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; GOLDMONT-LABEL: test_sha1rnds4: @@ -140,7 +140,7 @@ define <4 x i32> @test_sha256msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) ; GENERIC-LABEL: test_sha256msg1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; GOLDMONT-LABEL: test_sha256msg1: @@ -171,7 +171,7 @@ define <4 x i32> @test_sha256msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) ; GENERIC-LABEL: test_sha256msg2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; GOLDMONT-LABEL: test_sha256msg2: @@ -204,7 +204,7 @@ define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, ; GENERIC-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] ; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [5:1.00] -; GENERIC-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [9:1.00] +; GENERIC-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:1.00] ; GENERIC-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/sse41-schedule.ll b/llvm/test/CodeGen/X86/sse41-schedule.ll index 6462fe4..b5a6e36 100644 --- a/llvm/test/CodeGen/X86/sse41-schedule.ll +++ b/llvm/test/CodeGen/X86/sse41-schedule.ll @@ -623,7 +623,7 @@ define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2 ; GENERIC-LABEL: test_dpps: ; GENERIC: # %bb.0: ; GENERIC-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00] -; GENERIC-NEXT: dpps $7, (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: dpps $7, (%rdi), %xmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_dpps: @@ -635,13 +635,13 @@ define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2 ; SANDY-SSE-LABEL: test_dpps: ; SANDY-SSE: # %bb.0: ; SANDY-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00] -; SANDY-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [8:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_dpps: ; SANDY: # %bb.0: ; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00] -; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_dpps: @@ -1903,7 +1903,7 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) { ; GENERIC-LABEL: test_pextrw: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] -; GENERIC-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pextrw: @@ -1915,7 +1915,7 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) { ; SANDY-SSE-LABEL: test_pextrw: ; SANDY-SSE: # %bb.0: ; SANDY-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] -; SANDY-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [6:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_pextrw: diff --git a/llvm/test/CodeGen/X86/tbm-schedule.ll b/llvm/test/CodeGen/X86/tbm-schedule.ll index 1310b12..ac335a6 100644 --- a/llvm/test/CodeGen/X86/tbm-schedule.ll +++ b/llvm/test/CodeGen/X86/tbm-schedule.ll @@ -10,7 +10,7 @@ define i32 @test_x86_tbm_bextri_u32(i32 %a0, i32* nocapture %p1) nounwind { ; GENERIC-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 ; GENERIC-NEXT: # sched: [1:0.33] ; GENERIC-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04 -; GENERIC-NEXT: # sched: [5:0.50] +; GENERIC-NEXT: # sched: [6:0.50] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -35,7 +35,7 @@ define i64 @test_x86_tbm_bextri_u64(i64 %a0, i64* nocapture %p1) nounwind { ; GENERIC-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 ; GENERIC-NEXT: # sched: [1:0.33] ; GENERIC-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04 -; GENERIC-NEXT: # sched: [5:0.50] +; GENERIC-NEXT: # sched: [6:0.50] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -58,7 +58,7 @@ define i32 @test_x86_tbm_blcfill_u32(i32 %a0, i32* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_blcfill_u32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: blcfilll %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: blcfilll (%rsi), %eax # sched: [5:0.50] +; GENERIC-NEXT: blcfilll (%rsi), %eax # sched: [6:0.50] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -81,7 +81,7 @@ define i64 @test_x86_tbm_blcfill_u64(i64 %a0, i64* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_blcfill_u64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: blcfillq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: blcfillq (%rsi), %rax # sched: [5:0.50] +; GENERIC-NEXT: blcfillq (%rsi), %rax # sched: [6:0.50] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -104,7 +104,7 @@ define i32 @test_x86_tbm_blci_u32(i32 %a0, i32* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_blci_u32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: blcil %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: blcil (%rsi), %eax # sched: [5:0.50] +; GENERIC-NEXT: blcil (%rsi), %eax # sched: [6:0.50] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -129,7 +129,7 @@ define i64 @test_x86_tbm_blci_u64(i64 %a0, i64* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_blci_u64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: blciq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: blciq (%rsi), %rax # sched: [5:0.50] +; GENERIC-NEXT: blciq (%rsi), %rax # sched: [6:0.50] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -154,7 +154,7 @@ define i32 @test_x86_tbm_blcic_u32(i32 %a0, i32* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_blcic_u32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: blcicl %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: blcicl (%rsi), %eax # sched: [5:0.50] +; GENERIC-NEXT: blcicl (%rsi), %eax # sched: [6:0.50] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -179,7 +179,7 @@ define i64 @test_x86_tbm_blcic_u64(i64 %a0, i64* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_blcic_u64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: blcicq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: blcicq (%rsi), %rax # sched: [5:0.50] +; GENERIC-NEXT: blcicq (%rsi), %rax # sched: [6:0.50] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -204,7 +204,7 @@ define i32 @test_x86_tbm_blcmsk_u32(i32 %a0, i32* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_blcmsk_u32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: blcmskl %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: blcmskl (%rsi), %eax # sched: [5:0.50] +; GENERIC-NEXT: blcmskl (%rsi), %eax # sched: [6:0.50] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -227,7 +227,7 @@ define i64 @test_x86_tbm_blcmsk_u64(i64 %a0, i64* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_blcmsk_u64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: blcmskq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: blcmskq (%rsi), %rax # sched: [5:0.50] +; GENERIC-NEXT: blcmskq (%rsi), %rax # sched: [6:0.50] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -250,7 +250,7 @@ define i32 @test_x86_tbm_blcs_u32(i32 %a0, i32* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_blcs_u32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: blcsl %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: blcsl (%rsi), %eax # sched: [5:0.50] +; GENERIC-NEXT: blcsl (%rsi), %eax # sched: [6:0.50] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -273,7 +273,7 @@ define i64 @test_x86_tbm_blcs_u64(i64 %a0, i64* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_blcs_u64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: blcsq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: blcsq (%rsi), %rax # sched: [5:0.50] +; GENERIC-NEXT: blcsq (%rsi), %rax # sched: [6:0.50] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -296,7 +296,7 @@ define i32 @test_x86_tbm_blsfill_u32(i32 %a0, i32* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_blsfill_u32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: blsfilll %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: blsfilll (%rsi), %eax # sched: [5:0.50] +; GENERIC-NEXT: blsfilll (%rsi), %eax # sched: [6:0.50] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -319,7 +319,7 @@ define i64 @test_x86_tbm_blsfill_u64(i64 %a0, i64* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_blsfill_u64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: blsfillq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: blsfillq (%rsi), %rax # sched: [5:0.50] +; GENERIC-NEXT: blsfillq (%rsi), %rax # sched: [6:0.50] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -342,7 +342,7 @@ define i32 @test_x86_tbm_blsic_u32(i32 %a0, i32* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_blsic_u32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: blsicl %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: blsicl (%rsi), %eax # sched: [5:0.50] +; GENERIC-NEXT: blsicl (%rsi), %eax # sched: [6:0.50] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -367,7 +367,7 @@ define i64 @test_x86_tbm_blsic_u64(i64 %a0, i64* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_blsic_u64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: blsicq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: blsicq (%rsi), %rax # sched: [5:0.50] +; GENERIC-NEXT: blsicq (%rsi), %rax # sched: [6:0.50] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -392,7 +392,7 @@ define i32 @test_x86_tbm_t1mskc_u32(i32 %a0, i32* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_t1mskc_u32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: t1mskcl %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: t1mskcl (%rsi), %eax # sched: [5:0.50] +; GENERIC-NEXT: t1mskcl (%rsi), %eax # sched: [6:0.50] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -417,7 +417,7 @@ define i64 @test_x86_tbm_t1mskc_u64(i64 %a0, i64* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_t1mskc_u64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: t1mskcq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: t1mskcq (%rsi), %rax # sched: [5:0.50] +; GENERIC-NEXT: t1mskcq (%rsi), %rax # sched: [6:0.50] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -442,7 +442,7 @@ define i32 @test_x86_tbm_tzmsk_u32(i32 %a0, i32* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_tzmsk_u32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: tzmskl %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: tzmskl (%rsi), %eax # sched: [5:0.50] +; GENERIC-NEXT: tzmskl (%rsi), %eax # sched: [6:0.50] ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -467,7 +467,7 @@ define i64 @test_x86_tbm_tzmsk_u64(i64 %a0, i64* nocapture %p1) nounwind { ; GENERIC-LABEL: test_x86_tbm_tzmsk_u64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: tzmskq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: tzmskq (%rsi), %rax # sched: [5:0.50] +; GENERIC-NEXT: tzmskq (%rsi), %rax # sched: [6:0.50] ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/x87-schedule.ll b/llvm/test/CodeGen/X86/x87-schedule.ll index 1d44408..41b62e8 100644 --- a/llvm/test/CodeGen/X86/x87-schedule.ll +++ b/llvm/test/CodeGen/X86/x87-schedule.ll @@ -38,7 +38,7 @@ define void @test_f2xm1() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: f2xm1 # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_f2xm1: ; HASWELL: # %bb.0: @@ -112,7 +112,7 @@ define void @test_fabs() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fabs # sched: [1:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fabs: ; HASWELL: # %bb.0: @@ -206,7 +206,7 @@ define void @test_fadd(float *%a0, double *%a1) optsize { ; SANDY-NEXT: fadds (%ecx) # sched: [10:1.00] ; SANDY-NEXT: faddl (%eax) # sched: [10:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fadd: ; HASWELL: # %bb.0: @@ -330,7 +330,7 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize { ; SANDY-NEXT: fiadds (%ecx) # sched: [13:2.00] ; SANDY-NEXT: fiaddl (%eax) # sched: [13:2.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_faddp_fiadd: ; HASWELL: # %bb.0: @@ -442,7 +442,7 @@ define void @test_fbld_fbstp(i8* %a0) optsize { ; SANDY-NEXT: fbld (%eax) # sched: [100:0.33] ; SANDY-NEXT: fbstp (%eax) # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fbld_fbstp: ; HASWELL: # %bb.0: @@ -528,7 +528,7 @@ define void @test_fchs() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fchs # sched: [1:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fchs: ; HASWELL: # %bb.0: @@ -606,7 +606,7 @@ define void @test_fclex() optsize { ; SANDY-NEXT: wait # sched: [100:0.33] ; SANDY-NEXT: fnclex # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fclex: ; HASWELL: # %bb.0: @@ -686,7 +686,7 @@ define void @test_fnclex() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fnclex # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fnclex: ; HASWELL: # %bb.0: @@ -788,7 +788,7 @@ define void @test_fcmov() optsize { ; SANDY-NEXT: fcmovnu %st(1), %st(0) # sched: [3:2.00] ; SANDY-NEXT: fcmovu %st(1), %st(0) # sched: [3:2.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fcmov: ; HASWELL: # %bb.0: @@ -924,7 +924,7 @@ define void @test_fcom(float *%a0, double *%a1) optsize { ; SANDY-NEXT: fcoms (%ecx) # sched: [8:1.00] ; SANDY-NEXT: fcoml (%eax) # sched: [8:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fcom: ; HASWELL: # %bb.0: @@ -1052,7 +1052,7 @@ define void @test_fcomp_fcompp(float *%a0, double *%a1) optsize { ; SANDY-NEXT: fcompl (%eax) # sched: [8:1.00] ; SANDY-NEXT: fcompp # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fcomp_fcompp: ; HASWELL: # %bb.0: @@ -1166,7 +1166,7 @@ define void @test_fcomi_fcomip() optsize { ; SANDY-NEXT: fcomi %st(3) # sched: [3:1.00] ; SANDY-NEXT: fcompi %st(3) # sched: [3:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fcomi_fcomip: ; HASWELL: # %bb.0: @@ -1246,7 +1246,7 @@ define void @test_fcos() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fcos # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fcos: ; HASWELL: # %bb.0: @@ -1320,7 +1320,7 @@ define void @test_fdecstp() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fdecstp # sched: [1:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fdecstp: ; HASWELL: # %bb.0: @@ -1414,7 +1414,7 @@ define void @test_fdiv(float *%a0, double *%a1) optsize { ; SANDY-NEXT: fdivs (%ecx) # sched: [31:1.00] ; SANDY-NEXT: fdivl (%eax) # sched: [31:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fdiv: ; HASWELL: # %bb.0: @@ -1538,7 +1538,7 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize { ; SANDY-NEXT: fidivs (%ecx) # sched: [34:1.00] ; SANDY-NEXT: fidivl (%eax) # sched: [34:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fdivp_fidiv: ; HASWELL: # %bb.0: @@ -1662,7 +1662,7 @@ define void @test_fdivr(float *%a0, double *%a1) optsize { ; SANDY-NEXT: fdivrs (%ecx) # sched: [31:1.00] ; SANDY-NEXT: fdivrl (%eax) # sched: [31:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fdivr: ; HASWELL: # %bb.0: @@ -1786,7 +1786,7 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize { ; SANDY-NEXT: fidivrs (%ecx) # sched: [34:1.00] ; SANDY-NEXT: fidivrl (%eax) # sched: [34:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fdivrp_fidivr: ; HASWELL: # %bb.0: @@ -1890,7 +1890,7 @@ define void @test_ffree() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: ffree %st(0) # sched: [1:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_ffree: ; HASWELL: # %bb.0: @@ -1984,7 +1984,7 @@ define void @test_ficom(i16 *%a0, i32 *%a1) optsize { ; SANDY-NEXT: ficomps (%ecx) # sched: [11:2.00] ; SANDY-NEXT: ficompl (%eax) # sched: [11:2.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_ficom: ; HASWELL: # %bb.0: @@ -2108,7 +2108,7 @@ define void @test_fild(i16 *%a0, i32 *%a1, i64 *%a2) optsize { ; SANDY-NEXT: fildl (%ecx) # sched: [10:1.00] ; SANDY-NEXT: fildll (%eax) # sched: [10:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fild: ; HASWELL: # %bb.0: @@ -2212,7 +2212,7 @@ define void @test_fincstp() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fincstp # sched: [1:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fincstp: ; HASWELL: # %bb.0: @@ -2290,7 +2290,7 @@ define void @test_finit() optsize { ; SANDY-NEXT: wait # sched: [100:0.33] ; SANDY-NEXT: fninit # sched: [5:1.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_finit: ; HASWELL: # %bb.0: @@ -2370,7 +2370,7 @@ define void @test_fninit() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fninit # sched: [5:1.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fninit: ; HASWELL: # %bb.0: @@ -2484,7 +2484,7 @@ define void @test_fist_fistp_fisttp(i16* %a0, i32* %a1, i64 *%a2) optsize { ; SANDY-NEXT: fisttpl (%ecx) # sched: [5:1.00] ; SANDY-NEXT: fisttpll (%eax) # sched: [5:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fist_fistp_fisttp: ; HASWELL: # %bb.0: @@ -2642,7 +2642,7 @@ define void @test_fld(i16* %a0, i32* %a1, i64 *%a2) optsize { ; SANDY-NEXT: fldl (%ecx) # sched: [9:1.00] ; SANDY-NEXT: fldt (%eax) # sched: [9:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fld: ; HASWELL: # %bb.0: @@ -2760,7 +2760,7 @@ define void @test_fldcw_fldenv(i8* %a0) optsize { ; SANDY-NEXT: fldcw (%eax) # sched: [8:2.00] ; SANDY-NEXT: fldenv (%eax) # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fldcw_fldenv: ; HASWELL: # %bb.0: @@ -2866,7 +2866,7 @@ define void @test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz() optsize { ; SANDY-NEXT: fldpi # sched: [100:0.33] ; SANDY-NEXT: fldz # sched: [1:?] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: ; HASWELL: # %bb.0: @@ -2990,7 +2990,7 @@ define void @test_fmul(float *%a0, double *%a1) optsize { ; SANDY-NEXT: fmuls (%ecx) # sched: [12:1.00] ; SANDY-NEXT: fmull (%eax) # sched: [12:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fmul: ; HASWELL: # %bb.0: @@ -3114,7 +3114,7 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize { ; SANDY-NEXT: fimuls (%ecx) # sched: [15:1.00] ; SANDY-NEXT: fimull (%eax) # sched: [15:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fmulp_fimul: ; HASWELL: # %bb.0: @@ -3218,7 +3218,7 @@ define void @test_fnop() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fnop # sched: [1:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fnop: ; HASWELL: # %bb.0: @@ -3292,7 +3292,7 @@ define void @test_fpatan() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fpatan # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fpatan: ; HASWELL: # %bb.0: @@ -3370,7 +3370,7 @@ define void @test_fprem_fprem1() optsize { ; SANDY-NEXT: fprem # sched: [100:0.33] ; SANDY-NEXT: fprem1 # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fprem_fprem1: ; HASWELL: # %bb.0: @@ -3450,7 +3450,7 @@ define void @test_fptan() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fptan # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fptan: ; HASWELL: # %bb.0: @@ -3524,7 +3524,7 @@ define void @test_frndint() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: frndint # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_frndint: ; HASWELL: # %bb.0: @@ -3602,7 +3602,7 @@ define void @test_frstor(i8* %a0) optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: frstor (%eax) # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_frstor: ; HASWELL: # %bb.0: @@ -3690,7 +3690,7 @@ define void @test_fsave(i8* %a0) optsize { ; SANDY-NEXT: wait # sched: [100:0.33] ; SANDY-NEXT: fnsave (%eax) # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fsave: ; HASWELL: # %bb.0: @@ -3780,7 +3780,7 @@ define void @test_fnsave(i8* %a0) optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fnsave (%eax) # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fnsave: ; HASWELL: # %bb.0: @@ -3860,7 +3860,7 @@ define void @test_fscale() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fscale # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fscale: ; HASWELL: # %bb.0: @@ -3934,7 +3934,7 @@ define void @test_fsin() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fsin # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fsin: ; HASWELL: # %bb.0: @@ -4008,7 +4008,7 @@ define void @test_fsincos() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fsincos # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fsincos: ; HASWELL: # %bb.0: @@ -4082,7 +4082,7 @@ define void @test_fsqrt() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fsqrt # sched: [14:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fsqrt: ; HASWELL: # %bb.0: @@ -4192,7 +4192,7 @@ define void @test_fst_fstp(i16* %a0, i32* %a1, i64 *%a2) optsize { ; SANDY-NEXT: fstpl (%ecx) # sched: [6:1.00] ; SANDY-NEXT: fstpt (%eax) # sched: [6:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fst_fstp: ; HASWELL: # %bb.0: @@ -4344,7 +4344,7 @@ define void @test_fstcw_fstenv_fstsw(i8* %a0) optsize { ; SANDY-NEXT: wait # sched: [100:0.33] ; SANDY-NEXT: fnstsw (%eax) # sched: [7:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fstcw_fstenv_fstsw: ; HASWELL: # %bb.0: @@ -4466,7 +4466,7 @@ define void @test_fnstcw_fnstenv_fnstsw(i8* %a0) optsize { ; SANDY-NEXT: fnstenv (%eax) # sched: [100:0.33] ; SANDY-NEXT: fnstsw (%eax) # sched: [7:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fnstcw_fnstenv_fnstsw: ; HASWELL: # %bb.0: @@ -4578,7 +4578,7 @@ define void @test_fsub(float *%a0, double *%a1) optsize { ; SANDY-NEXT: fsubs (%ecx) # sched: [10:1.00] ; SANDY-NEXT: fsubl (%eax) # sched: [10:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fsub: ; HASWELL: # %bb.0: @@ -4702,7 +4702,7 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize { ; SANDY-NEXT: fisubs (%ecx) # sched: [13:2.00] ; SANDY-NEXT: fisubl (%eax) # sched: [13:2.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fsubp_fisub: ; HASWELL: # %bb.0: @@ -4826,7 +4826,7 @@ define void @test_fsubr(float *%a0, double *%a1) optsize { ; SANDY-NEXT: fsubrs (%ecx) # sched: [10:1.00] ; SANDY-NEXT: fsubrl (%eax) # sched: [10:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fsubr: ; HASWELL: # %bb.0: @@ -4950,7 +4950,7 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize { ; SANDY-NEXT: fisubrs (%ecx) # sched: [13:2.00] ; SANDY-NEXT: fisubrl (%eax) # sched: [13:2.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fsubrp_fisubr: ; HASWELL: # %bb.0: @@ -5054,7 +5054,7 @@ define void @test_ftst() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: ftst # sched: [3:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_ftst: ; HASWELL: # %bb.0: @@ -5144,7 +5144,7 @@ define void @test_fucom_fucomp_fucompp() optsize { ; SANDY-NEXT: fucomp %st(3) # sched: [1:1.00] ; SANDY-NEXT: fucompp # sched: [3:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fucom_fucomp_fucompp: ; HASWELL: # %bb.0: @@ -5246,7 +5246,7 @@ define void @test_fucomi_fucomip() optsize { ; SANDY-NEXT: fucomi %st(3) # sched: [3:1.00] ; SANDY-NEXT: fucompi %st(3) # sched: [3:1.00] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fucomi_fucomip: ; HASWELL: # %bb.0: @@ -5326,7 +5326,7 @@ define void @test_fwait() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: wait # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fwait: ; HASWELL: # %bb.0: @@ -5400,7 +5400,7 @@ define void @test_fxam() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fxam # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fxam: ; HASWELL: # %bb.0: @@ -5478,7 +5478,7 @@ define void @test_fxch() optsize { ; SANDY-NEXT: fxch %st(1) # sched: [1:0.33] ; SANDY-NEXT: fxch %st(3) # sched: [1:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fxch: ; HASWELL: # %bb.0: @@ -5566,7 +5566,7 @@ define void @test_fxrstor_fxsave(i8* %a0) optsize { ; SANDY-NEXT: fxrstor (%eax) # sched: [5:2.00] ; SANDY-NEXT: fxsave (%eax) # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fxrstor_fxsave: ; HASWELL: # %bb.0: @@ -5652,7 +5652,7 @@ define void @test_fxtract() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fxtract # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fxtract: ; HASWELL: # %bb.0: @@ -5726,7 +5726,7 @@ define void @test_fyl2x() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fyl2x # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fyl2x: ; HASWELL: # %bb.0: @@ -5800,7 +5800,7 @@ define void @test_fyl2xp1() optsize { ; SANDY-NEXT: #APP ; SANDY-NEXT: fyl2xp1 # sched: [100:0.33] ; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [5:1.00] +; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fyl2xp1: ; HASWELL: # %bb.0: diff --git a/llvm/test/CodeGen/X86/xop-schedule.ll b/llvm/test/CodeGen/X86/xop-schedule.ll index 16f87b2..07bad6a 100644 --- a/llvm/test/CodeGen/X86/xop-schedule.ll +++ b/llvm/test/CodeGen/X86/xop-schedule.ll @@ -11,8 +11,8 @@ define void @test_vfrczpd(<2 x double> %a0, <4 x double> %a1, <2 x double> *%a2, ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfrczpd %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: vfrczpd %ymm1, %ymm1 # sched: [3:1.00] -; GENERIC-NEXT: vfrczpd (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vfrczpd (%rsi), %ymm1 # sched: [7:1.00] +; GENERIC-NEXT: vfrczpd (%rdi), %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: vfrczpd (%rsi), %ymm1 # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -37,8 +37,8 @@ define void @test_vfrczps(<4 x float> %a0, <4 x double> %a1, <4 x float> *%a2, < ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfrczps %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: vfrczps %ymm1, %ymm1 # sched: [3:1.00] -; GENERIC-NEXT: vfrczps (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vfrczps (%rsi), %ymm1 # sched: [7:1.00] +; GENERIC-NEXT: vfrczps (%rdi), %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: vfrczps (%rsi), %ymm1 # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -62,7 +62,7 @@ define void @test_vfrczsd(<2 x double> %a0, <2 x double> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfrczsd %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vfrczsd (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: vfrczsd (%rdi), %xmm0 # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -82,7 +82,7 @@ define void @test_vfrczss(<4 x float> %a0, <4 x double> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfrczss %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vfrczss (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: vfrczss (%rdi), %xmm0 # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -102,8 +102,8 @@ define void @test_vpcmov_128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -124,8 +124,8 @@ define void @test_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2, <4 x i ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -151,10 +151,10 @@ define void @test_vpcom(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -183,10 +183,10 @@ define void @test_vpcomu(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -212,8 +212,8 @@ define void @test_vpermil2pd_128(<2 x double> %a0, <2 x double> %a1, <2 x double ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -234,8 +234,8 @@ define void @test_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -258,8 +258,8 @@ define void @test_vpermil2ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -280,8 +280,8 @@ define void @test_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -304,7 +304,7 @@ define void @test_vphaddbd(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vphaddbd %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vphaddbd (%rdi), %xmm0 # sched: [5:0.50] +; GENERIC-NEXT: vphaddbd (%rdi), %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -324,7 +324,7 @@ define void @test_vphaddbq(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vphaddbq %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vphaddbq (%rdi), %xmm0 # sched: [5:0.50] +; GENERIC-NEXT: vphaddbq (%rdi), %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -344,7 +344,7 @@ define void @test_vphaddbw(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vphaddbw %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vphaddbw (%rdi), %xmm0 # sched: [5:0.50] +; GENERIC-NEXT: vphaddbw (%rdi), %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -364,7 +364,7 @@ define void @test_vphadddq(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vphadddq %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vphadddq (%rdi), %xmm0 # sched: [5:0.50] +; GENERIC-NEXT: vphadddq (%rdi), %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -384,7 +384,7 @@ define void @test_vphaddubd(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vphaddubd %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vphaddubd (%rdi), %xmm0 # sched: [5:0.50] +; GENERIC-NEXT: vphaddubd (%rdi), %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -404,7 +404,7 @@ define void @test_vphaddubq(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vphaddubq %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vphaddubq (%rdi), %xmm0 # sched: [5:0.50] +; GENERIC-NEXT: vphaddubq (%rdi), %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -424,7 +424,7 @@ define void @test_vphaddubw(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vphaddubw %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vphaddubw (%rdi), %xmm0 # sched: [5:0.50] +; GENERIC-NEXT: vphaddubw (%rdi), %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -444,7 +444,7 @@ define void @test_vphaddudq(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vphaddudq %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vphaddudq (%rdi), %xmm0 # sched: [5:0.50] +; GENERIC-NEXT: vphaddudq (%rdi), %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -464,7 +464,7 @@ define void @test_vphadduwd(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vphadduwd %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vphadduwd (%rdi), %xmm0 # sched: [5:0.50] +; GENERIC-NEXT: vphadduwd (%rdi), %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -484,7 +484,7 @@ define void @test_vphadduwq(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vphadduwq %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vphadduwq (%rdi), %xmm0 # sched: [5:0.50] +; GENERIC-NEXT: vphadduwq (%rdi), %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -504,7 +504,7 @@ define void @test_vphaddwd(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vphaddwd %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vphaddwd (%rdi), %xmm0 # sched: [5:0.50] +; GENERIC-NEXT: vphaddwd (%rdi), %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -524,7 +524,7 @@ define void @test_vphaddwq(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vphaddwq %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vphaddwq (%rdi), %xmm0 # sched: [5:0.50] +; GENERIC-NEXT: vphaddwq (%rdi), %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -544,7 +544,7 @@ define void @test_vphsubbw(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vphsubbw %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vphsubbw (%rdi), %xmm0 # sched: [5:0.50] +; GENERIC-NEXT: vphsubbw (%rdi), %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -564,7 +564,7 @@ define void @test_vphsubdq(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vphsubdq %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vphsubdq (%rdi), %xmm0 # sched: [5:0.50] +; GENERIC-NEXT: vphsubdq (%rdi), %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -584,7 +584,7 @@ define void @test_vphsubwd(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vphsubwd %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vphsubwd (%rdi), %xmm0 # sched: [5:0.50] +; GENERIC-NEXT: vphsubwd (%rdi), %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -604,7 +604,7 @@ define void @test_vpmacsdd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -624,7 +624,7 @@ define void @test_vpmacsdqh(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i6 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -644,7 +644,7 @@ define void @test_vpmacsdql(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i6 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -664,7 +664,7 @@ define void @test_vpmacssdd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i6 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -684,7 +684,7 @@ define void @test_vpmacssdqh(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -704,7 +704,7 @@ define void @test_vpmacssdql(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -724,7 +724,7 @@ define void @test_vpmacsswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i6 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -744,7 +744,7 @@ define void @test_vpmacssww(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i6 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -764,7 +764,7 @@ define void @test_vpmacswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -784,7 +784,7 @@ define void @test_vpmacsww(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -804,7 +804,7 @@ define void @test_vpmadcsswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -824,7 +824,7 @@ define void @test_vpmadcswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i6 ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -844,8 +844,8 @@ define void @test_vpperm(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -869,22 +869,22 @@ define void @test_vprot(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-NEXT: vprotd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vprotq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vprotw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vprotb (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vprotd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vprotq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vprotw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vprotb %xmm0, (%rdi), %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vprotd %xmm0, (%rdi), %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vprotq %xmm0, (%rdi), %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vprotw %xmm0, (%rdi), %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: vprotb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vprotd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vprotq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vprotw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vprotb %xmm0, (%rdi), %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vprotd %xmm0, (%rdi), %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vprotq %xmm0, (%rdi), %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vprotw %xmm0, (%rdi), %xmm0 # sched: [6:1.00] ; GENERIC-NEXT: vprotb $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vprotd $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vprotq $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vprotw $7, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vprotb $7, (%rdi), %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vprotd $7, (%rdi), %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vprotq $7, (%rdi), %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vprotw $7, (%rdi), %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: vprotb $7, (%rdi), %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vprotd $7, (%rdi), %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vprotq $7, (%rdi), %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vprotw $7, (%rdi), %xmm0 # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -925,14 +925,14 @@ define void @test_vpsha(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-NEXT: vpshad %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpshaq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpshaw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpshab (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpshad (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpshaq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpshaw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpshab %xmm0, (%rdi), %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpshad %xmm0, (%rdi), %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpshaq %xmm0, (%rdi), %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpshaw %xmm0, (%rdi), %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpshab (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpshad (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpshaq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpshaw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpshab %xmm0, (%rdi), %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpshad %xmm0, (%rdi), %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpshaq %xmm0, (%rdi), %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpshaw %xmm0, (%rdi), %xmm0 # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -965,14 +965,14 @@ define void @test_vpshl(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-NEXT: vpshld %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpshlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpshlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpshlb (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpshld (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpshlq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpshlw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpshlb %xmm0, (%rdi), %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpshld %xmm0, (%rdi), %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpshlq %xmm0, (%rdi), %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpshlw %xmm0, (%rdi), %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: vpshlb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpshld (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpshlq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpshlw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpshlb %xmm0, (%rdi), %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpshld %xmm0, (%rdi), %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpshlq %xmm0, (%rdi), %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpshlw %xmm0, (%rdi), %xmm0 # sched: [6:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; -- 2.7.4