From caa203aed51cc93674755676fa430da20ec58504 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 26 Mar 2018 13:15:20 +0000 Subject: [PATCH] [X86][Btver2] Double the AGU and schedule pipe resources for YMM Both the AGUs and schedule pipes are double pumped for 256-bit instructions as well as the functional units which we already model. llvm-svn: 328491 --- llvm/lib/Target/X86/X86ScheduleBtVer2.td | 62 +++---- llvm/test/CodeGen/X86/avx-schedule.ll | 30 +-- llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s | 4 +- .../tools/llvm-mca/X86/BtVer2/resources-avx1.s | 206 ++++++++++----------- 4 files changed, 151 insertions(+), 151 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 4d0cebd5..bee7b5b 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -374,7 +374,7 @@ def : InstRW<[JWriteFHAddY], (instrs VHADDPDYrr, VHADDPSYrr, VHSUBPDYrr, VHSUBPS def JWriteFHAddYLd: SchedWriteRes<[JLAGU, JFPU0, JFPA]> { let Latency = 8; - let ResourceCycles = [1, 2, 2]; + let ResourceCycles = [2, 2, 2]; } def : InstRW<[JWriteFHAddYLd], (instrs VHADDPDYrm, VHADDPSYrm, VHSUBPDYrm, VHSUBPSYrm)>; @@ -507,7 +507,7 @@ def : InstRW<[JWriteFLogicY], (instrs VORPDYrr, VORPSYrr, def JWriteFLogicYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { let Latency = 6; - let ResourceCycles = [1, 2, 2]; + let ResourceCycles = [2, 2, 2]; let NumMicroOps = 2; } def : InstRW<[JWriteFLogicYLd], (instrs VORPDYrm, VORPSYrm, @@ -517,14 +517,14 @@ def : InstRW<[JWriteFLogicYLd], (instrs VORPDYrm, VORPSYrm, def JWriteVDPPSY: SchedWriteRes<[JFPU1, JFPM, JFPA]> { let Latency = 12; - let ResourceCycles = [1, 6, 6]; + let ResourceCycles = [2, 6, 6]; let NumMicroOps = 10; } def : InstRW<[JWriteVDPPSY], (instrs VDPPSYrri)>; def JWriteVDPPSYLd: SchedWriteRes<[JLAGU, JFPU1, JFPM, JFPA]> { let Latency = 17; - let ResourceCycles = [1, 1, 6, 6]; + let ResourceCycles = [2, 2, 6, 6]; let NumMicroOps = 10; } def : InstRW<[JWriteVDPPSYLd, ReadAfterLd], (instrs VDPPSYrmi)>; @@ -539,7 +539,7 @@ def : InstRW<[JWriteFAddY], (instrs VADDPDYrr, VADDPSYrr, def JWriteFAddYLd: SchedWriteRes<[JLAGU, JFPU0, JFPA]> { let Latency = 8; - let ResourceCycles = [1, 2, 2]; + let ResourceCycles = [2, 2, 2]; } def : InstRW<[JWriteFAddYLd, ReadAfterLd], (instrs VADDPDYrm, VADDPSYrm, VSUBPDYrm, VSUBPSYrm, @@ -547,37 +547,37 @@ def : InstRW<[JWriteFAddYLd, ReadAfterLd], (instrs VADDPDYrm, VADDPSYrm, def JWriteFDivY: SchedWriteRes<[JFPU1, JFPM]> { let Latency = 38; - let ResourceCycles = [1, 38]; + let ResourceCycles = [2, 38]; } def : InstRW<[JWriteFDivY], (instrs VDIVPDYrr, VDIVPSYrr)>; def JWriteFDivYLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> { let Latency = 43; - let ResourceCycles = [1, 1, 38]; + let ResourceCycles = [2, 2, 38]; } def : InstRW<[JWriteFDivYLd, ReadAfterLd], (instrs VDIVPDYrm, VDIVPSYrm)>; def JWriteVMULYPD: SchedWriteRes<[JFPU1, JFPM]> { let Latency = 4; - let ResourceCycles = [1, 4]; + let ResourceCycles = [2, 4]; } def : InstRW<[JWriteVMULYPD], (instrs VMULPDYrr)>; def JWriteVMULYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> { let Latency = 9; - let ResourceCycles = [1, 1, 4]; + let ResourceCycles = [2, 2, 4]; } def : InstRW<[JWriteVMULYPDLd, ReadAfterLd], (instrs VMULPDYrm)>; def JWriteVMULYPS: SchedWriteRes<[JFPU1, JFPM]> { let Latency = 2; - let ResourceCycles = [1, 2]; + let ResourceCycles = [2, 2]; } def : InstRW<[JWriteVMULYPS], (instrs VMULPSYrr, VRCPPSYr, VRSQRTPSYr)>; def JWriteVMULYPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> { let Latency = 7; - let ResourceCycles = [1, 1, 2]; + let ResourceCycles = [2, 2, 2]; } def : InstRW<[JWriteVMULYPSLd, ReadAfterLd], (instrs VMULPSYrm, VRCPPSYm, VRSQRTPSYm)>; @@ -595,7 +595,7 @@ def : InstRW<[JWriteVMULPDLd], (instrs MULPDrm, MULSDrm, VMULPDrm, VMULSDrm)>; def JWriteVCVTY: SchedWriteRes<[JFPU1, JSTC]> { let Latency = 3; - let ResourceCycles = [1, 2]; + let ResourceCycles = [2, 2]; } def : InstRW<[JWriteVCVTY], (instrs VCVTDQ2PDYrr, VCVTDQ2PSYrr, VCVTPS2DQYrr, VCVTTPS2DQYrr, @@ -603,7 +603,7 @@ def : InstRW<[JWriteVCVTY], (instrs VCVTDQ2PDYrr, VCVTDQ2PSYrr, def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> { let Latency = 8; - let ResourceCycles = [1, 1, 2]; + let ResourceCycles = [2, 2, 2]; } def : InstRW<[JWriteVCVTYLd, ReadAfterLd], (instrs VCVTDQ2PDYrm, VCVTDQ2PSYrm, VCVTPS2DQYrm, VCVTTPS2DQYrm, @@ -621,7 +621,7 @@ def : InstRW<[JWriteMOVNTSt], (instrs MOVNTPDmr, MOVNTPSmr, MOVNTSD, MOVNTSS, VM def JWriteVMOVNTPYSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> { let Latency = 3; - let ResourceCycles = [1, 2, 1]; + let ResourceCycles = [2, 2, 2]; } def : InstRW<[JWriteVMOVNTPYSt], (instrs VMOVNTDQYmr, VMOVNTPDYmr, VMOVNTPSYmr)>; @@ -639,13 +639,13 @@ def : InstRW<[JWriteFCmpLd], (instregex "(V)?M(AX|IN)(P|S)(D|S)rm", def JWriteVCVTPDY: SchedWriteRes<[JFPU1, JSTC, JFPX]> { let Latency = 6; - let ResourceCycles = [1, 2, 4]; + let ResourceCycles = [2, 2, 4]; } def : InstRW<[JWriteVCVTPDY], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr, VCVTPD2PSYrr)>; def JWriteVCVTPDYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC, JFPX]> { let Latency = 11; - let ResourceCycles = [1, 1, 2, 4]; + let ResourceCycles = [2, 2, 2, 4]; } def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>; @@ -679,20 +679,20 @@ def : InstRW<[JWriteVPERMLd, ReadAfterLd], (instrs VPERMILPDrm, VPERMILPSrm)>; def JWriteVPERMY: SchedWriteRes<[JFPU01, JFPX]> { let Latency = 3; - let ResourceCycles = [1, 6]; + let ResourceCycles = [2, 6]; let NumMicroOps = 6; } def : InstRW<[JWriteVPERMY], (instrs VBLENDVPDYrr, VBLENDVPSYrr, VPERMILPDYrr, VPERMILPSYrr)>; def JWriteVPERMYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { let Latency = 8; - let ResourceCycles = [1, 1, 6]; + let ResourceCycles = [2, 2, 6]; let NumMicroOps = 6; } def : InstRW<[JWriteVPERMYLd, ReadAfterLd], (instrs VBLENDVPDYrm, VBLENDVPSYrm, VPERMILPDYrm, VPERMILPSYrm)>; def JWriteShuffleY: SchedWriteRes<[JFPU01, JFPX]> { - let ResourceCycles = [1, 2]; + let ResourceCycles = [2, 2]; let NumMicroOps = 2; } def : InstRW<[JWriteShuffleY], (instrs VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr, @@ -700,7 +700,7 @@ def : InstRW<[JWriteShuffleY], (instrs VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr, def JWriteShuffleYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { let Latency = 6; - let ResourceCycles = [1, 1, 2]; + let ResourceCycles = [2, 2, 2]; let NumMicroOps = 2; } def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VMOVDDUPYrm, VMOVSHDUPYrm, VMOVSLDUPYrm, @@ -708,19 +708,19 @@ def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VMOVDDUPYrm, VMOVSHDUPYrm, def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { let Latency = 6; - let ResourceCycles = [1, 1, 4]; + let ResourceCycles = [1, 2, 4]; } def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, VBROADCASTSSYrm)>; def JWriteFPAY22: SchedWriteRes<[JFPU0, JFPA]> { let Latency = 2; - let ResourceCycles = [1, 2]; + let ResourceCycles = [2, 2]; } def : InstRW<[JWriteFPAY22], (instregex "VCMPP(S|D)Yrri", "VM(AX|IN)P(D|S)Yrr")>; def JWriteFPAY22Ld: SchedWriteRes<[JLAGU, JFPU0, JFPA]> { let Latency = 7; - let ResourceCycles = [1, 1, 2]; + let ResourceCycles = [2, 2, 2]; } def : InstRW<[JWriteFPAY22Ld, ReadAfterLd], (instregex "VCMPP(S|D)Yrmi", "VM(AX|IN)P(D|S)Yrm")>; @@ -732,7 +732,7 @@ def : InstRW<[JWriteVMaskMovLd], (instrs VMASKMOVPDrm, VMASKMOVPSrm)>; def JWriteVMaskMovYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { let Latency = 6; - let ResourceCycles = [1, 1, 4]; + let ResourceCycles = [2, 2, 4]; let NumMicroOps = 2; } def : InstRW<[JWriteVMaskMovYLd], (instrs VMASKMOVPDYrm, VMASKMOVPSYrm)>; @@ -745,7 +745,7 @@ def : InstRW<[JWriteVMaskMovSt], (instrs VMASKMOVPDmr, VMASKMOVPSmr)>; def JWriteVMaskMovYSt: SchedWriteRes<[JFPU01, JFPX, JSAGU]> { let Latency = 6; - let ResourceCycles = [1, 4, 1]; + let ResourceCycles = [2, 4, 2]; let NumMicroOps = 2; } def : InstRW<[JWriteVMaskMovYSt], (instrs VMASKMOVPDYmr, VMASKMOVPSYmr)>; @@ -767,14 +767,14 @@ def : InstRW<[JWriteVMOVMSK], (instrs MOVMSKPDrr, VMOVMSKPDrr, VMOVMSKPDYrr, // and ALU0 in the integer unit is occupied instead. def JWriteVTESTY: SchedWriteRes<[JFPU01, JFPX, JFPA]> { let Latency = 4; - let ResourceCycles = [1, 2, 2]; + let ResourceCycles = [2, 2, 2]; let NumMicroOps = 3; } def : InstRW<[JWriteVTESTY], (instrs VPTESTYrr, VTESTPDYrr, VTESTPSYrr)>; def JWriteVTESTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX, JFPA]> { let Latency = 9; - let ResourceCycles = [1, 1, 2, 2]; + let ResourceCycles = [2, 2, 2, 2]; let NumMicroOps = 3; } def : InstRW<[JWriteVTESTYLd], (instrs VPTESTYrm, VTESTPDYrm, VTESTPSYrm)>; @@ -791,25 +791,25 @@ def : InstRW<[JWriteVTESTLd], (instrs PTESTrm, VPTESTrm, VTESTPDrm, VTESTPSrm)>; def JWriteVSQRTYPD: SchedWriteRes<[JFPU1, JFPM]> { let Latency = 54; - let ResourceCycles = [1, 54]; + let ResourceCycles = [2, 54]; } def : InstRW<[JWriteVSQRTYPD], (instrs VSQRTPDYr)>; def JWriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> { let Latency = 59; - let ResourceCycles = [1, 1, 54]; + let ResourceCycles = [2, 2, 54]; } def : InstRW<[JWriteVSQRTYPDLd], (instrs VSQRTPDYm)>; def JWriteVSQRTYPS: SchedWriteRes<[JFPU1, JFPM]> { let Latency = 42; - let ResourceCycles = [1, 42]; + let ResourceCycles = [2, 42]; } def : InstRW<[JWriteVSQRTYPS], (instrs VSQRTPSYr)>; def JWriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> { let Latency = 47; - let ResourceCycles = [1, 1, 42]; + let ResourceCycles = [2, 2, 42]; } def : InstRW<[JWriteVSQRTYPSLd], (instrs VSQRTPSYm)>; diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index 1cee7b5..263d071 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -273,7 +273,7 @@ define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; BTVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -342,7 +342,7 @@ define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; BTVER2-LABEL: test_andnotps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; BTVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -411,7 +411,7 @@ define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; BTVER2-LABEL: test_andpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; BTVER2-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -478,7 +478,7 @@ define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; BTVER2-LABEL: test_andps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; BTVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -2690,7 +2690,7 @@ define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) { ; ; BTVER2-LABEL: test_movddup: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [6:1.00] +; BTVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [6:2.00] ; BTVER2-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -3030,7 +3030,7 @@ define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) { ; ; BTVER2-LABEL: test_movshdup: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [6:1.00] +; BTVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [6:2.00] ; BTVER2-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -3093,7 +3093,7 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) { ; ; BTVER2-LABEL: test_movsldup: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [6:1.00] +; BTVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [6:2.00] ; BTVER2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -3393,7 +3393,7 @@ define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) ; BTVER2-LABEL: orpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; BTVER2-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3460,7 +3460,7 @@ define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2 ; BTVER2-LABEL: test_orps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; BTVER2-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3652,7 +3652,7 @@ define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) { ; ; BTVER2-LABEL: test_permilpd_ymm: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [6:1.00] +; BTVER2-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [6:2.00] ; BTVER2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -3778,7 +3778,7 @@ define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) { ; ; BTVER2-LABEL: test_permilps_ymm: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [6:1.00] +; BTVER2-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [6:2.00] ; BTVER2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -4318,7 +4318,7 @@ define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; BTVER2-LABEL: test_shufpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; BTVER2-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [6:1.00] +; BTVER2-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [6:2.00] ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -4381,7 +4381,7 @@ define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; BTVER2-LABEL: test_shufps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; BTVER2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [6:1.00] +; BTVER2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [6:2.00] ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -5248,7 +5248,7 @@ define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; BTVER2-LABEL: test_xorpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; BTVER2-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -5315,7 +5315,7 @@ define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; BTVER2-LABEL: test_xorps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; BTVER2-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s index aa5f3ef..2fd3efe 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s @@ -62,7 +62,7 @@ vsqrtps %ymm0, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: - - - 3.00 63.00 5.00 4.00 - - - 1.00 0.50 0.50 2.00 +# CHECK-NEXT: - - - 3.00 63.00 5.00 5.00 - - - 1.00 0.50 0.50 2.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: @@ -73,7 +73,7 @@ vsqrtps %ymm0, %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 21.00 - 1.00 - - - - - - - vsqrtps %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - - 42.00 - 1.00 - - - - - - - vsqrtps %ymm0, %ymm2 +# CHECK-NEXT: - - - - 42.00 - 2.00 - - - - - - - vsqrtps %ymm0, %ymm2 # CHECK: Timeline view: diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s index f9987f4..c9b43a5 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s @@ -1024,11 +1024,11 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vaddpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 2.00 - 2.00 - 1.00 - - - - - - vaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - 2.00 - - - - - - vaddpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vaddps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 2.00 - 2.00 - 1.00 - - - - - - vaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - 2.00 - - - - - - vaddps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vaddsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddss %xmm0, %xmm1, %xmm2 @@ -1036,11 +1036,11 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddsubpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vaddsubpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 2.00 - 2.00 - 1.00 - - - - - - vaddsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - 2.00 - - - - - - vaddsubpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddsubps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vaddsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 2.00 - 2.00 - 1.00 - - - - - - vaddsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - 2.00 - - - - - - vaddsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 vaesdec %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - 1.00 - - - - - 1.00 vaesdec (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 vaesdeclast %xmm0, %xmm1, %xmm2 @@ -1056,19 +1056,19 @@ vzeroupper # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vandnpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vandnpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vandnpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 1.00 - - - - - - vandnpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vandnpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vandnps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vandnps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vandnps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 1.00 - - - - - - vandnps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vandnps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vandpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vandpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vandpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 1.00 - - - - - - vandpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vandpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vandps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vandps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vandps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 1.00 - - - - - - vandps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vandps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vblendpd $11, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vblendpd $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vblendpd $11, %ymm0, %ymm1, %ymm2 @@ -1079,24 +1079,24 @@ vzeroupper # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vblendps $11, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 - - - - - - - vblendvpd %xmm3, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 1.00 - - - - - - vblendvpd %xmm3, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - 3.00 3.00 0.50 0.50 - - - - - - - vblendvpd %ymm3, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 3.00 3.00 0.50 0.50 1.00 - - - - - - vblendvpd %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 3.00 3.00 1.00 1.00 - - - - - - - vblendvpd %ymm3, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - 3.00 3.00 1.00 1.00 2.00 - - - - - - vblendvpd %ymm3, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 - - - - - - - vblendvps %xmm3, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 1.00 - - - - - - vblendvps %xmm3, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - 3.00 3.00 0.50 0.50 - - - - - - - vblendvps %ymm3, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 3.00 3.00 0.50 0.50 1.00 - - - - - - vblendvps %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 3.00 3.00 1.00 1.00 - - - - - - - vblendvps %ymm3, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - 3.00 3.00 1.00 1.00 2.00 - - - - - - vblendvps %ymm3, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vbroadcastf128 (%rax), %ymm2 -# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 1.00 - - - - - - vbroadcastsd (%rax), %ymm2 +# CHECK-NEXT: - - - 2.00 2.00 1.00 1.00 1.00 - - - - - - vbroadcastsd (%rax), %ymm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vbroadcastss (%rax), %xmm2 -# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 1.00 - - - - - - vbroadcastss (%rax), %ymm2 +# CHECK-NEXT: - - - 2.00 2.00 1.00 1.00 1.00 - - - - - - vbroadcastss (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vcmppd $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vcmppd $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - 2.00 - 1.00 - - - - - - - - vcmppd $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 2.00 - 1.00 - 1.00 - - - - - - vcmppd $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vcmppd $0, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - 2.00 - - - - - - vcmppd $0, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vcmpps $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vcmpps $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - 2.00 - 1.00 - - - - - - - - vcmpps $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 2.00 - 1.00 - 1.00 - - - - - - vcmpps $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vcmpps $0, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - 2.00 - - - - - - vcmpps $0, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vcmpsd $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vcmpsd $0, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vcmpss $0, %xmm0, %xmm1, %xmm2 @@ -1107,24 +1107,24 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vcomiss (%rax), %xmm1 # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtdq2pd %xmm0, %xmm2 # CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvtdq2pd (%rax), %xmm2 -# CHECK-NEXT: - - - - - - 1.00 - - - 2.00 - - - vcvtdq2pd %xmm0, %ymm2 -# CHECK-NEXT: - - - - - - 1.00 1.00 - - 2.00 - - - vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - 2.00 - - - 2.00 - - - vcvtdq2pd %xmm0, %ymm2 +# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vcvtdq2pd (%rax), %ymm2 # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvtdq2ps (%rax), %xmm2 -# CHECK-NEXT: - - - - - - 1.00 - - - 2.00 - - - vcvtdq2ps %ymm0, %ymm2 -# CHECK-NEXT: - - - - - - 1.00 1.00 - - 2.00 - - - vcvtdq2ps (%rax), %ymm2 +# CHECK-NEXT: - - - - - - 2.00 - - - 2.00 - - - vcvtdq2ps %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtpd2dq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvtpd2dqx (%rax), %xmm2 -# CHECK-NEXT: - - - 2.00 2.00 - 1.00 - - - 2.00 - - - vcvtpd2dq %ymm0, %xmm2 -# CHECK-NEXT: - - - 2.00 2.00 - 1.00 1.00 - - 2.00 - - - vcvtpd2dqy (%rax), %xmm2 +# CHECK-NEXT: - - - 2.00 2.00 - 2.00 - - - 2.00 - - - vcvtpd2dq %ymm0, %xmm2 +# CHECK-NEXT: - - - 2.00 2.00 - 2.00 2.00 - - 2.00 - - - vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvtpd2psx (%rax), %xmm2 -# CHECK-NEXT: - - - 2.00 2.00 - 1.00 - - - 2.00 - - - vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: - - - 2.00 2.00 - 1.00 1.00 - - 2.00 - - - vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: - - - 2.00 2.00 - 2.00 - - - 2.00 - - - vcvtpd2ps %ymm0, %xmm2 +# CHECK-NEXT: - - - 2.00 2.00 - 2.00 2.00 - - 2.00 - - - vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvtps2dq (%rax), %xmm2 -# CHECK-NEXT: - - - - - - 1.00 - - - 2.00 - - - vcvtps2dq %ymm0, %ymm2 -# CHECK-NEXT: - - - - - - 1.00 1.00 - - 2.00 - - - vcvtps2dq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - 2.00 - - - 2.00 - - - vcvtps2dq %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vcvtps2dq (%rax), %ymm2 # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtps2pd %xmm0, %ymm2 @@ -1151,24 +1151,24 @@ vzeroupper # CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvtss2si (%rax), %rcx # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvttpd2dqx (%rax), %xmm2 -# CHECK-NEXT: - - - 2.00 2.00 - 1.00 - - - 2.00 - - - vcvttpd2dq %ymm0, %xmm2 -# CHECK-NEXT: - - - 2.00 2.00 - 1.00 1.00 - - 2.00 - - - vcvttpd2dqy (%rax), %xmm2 +# CHECK-NEXT: - - - 2.00 2.00 - 2.00 - - - 2.00 - - - vcvttpd2dq %ymm0, %xmm2 +# CHECK-NEXT: - - - 2.00 2.00 - 2.00 2.00 - - 2.00 - - - vcvttpd2dqy (%rax), %xmm2 # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvttps2dq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvttps2dq (%rax), %xmm2 -# CHECK-NEXT: - - - - - - 1.00 - - - 2.00 - - - vcvttps2dq %ymm0, %ymm2 -# CHECK-NEXT: - - - - - - 1.00 1.00 - - 2.00 - - - vcvttps2dq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - 2.00 - - - 2.00 - - - vcvttps2dq %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vcvttps2dq (%rax), %ymm2 # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvttsd2si %xmm0, %ecx # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvttsd2si %xmm0, %rcx # CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvttsd2si (%rax), %ecx # CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvttsd2si (%rax), %rcx # CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - vdivpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 19.00 - 1.00 1.00 - - - - - - vdivpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - 38.00 - 1.00 - - - - - - - vdivpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - - 38.00 - 1.00 1.00 - - - - - - vdivpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - 38.00 - 2.00 - - - - - - - vdivpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - 38.00 - 2.00 2.00 - - - - - - vdivpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - vdivps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 19.00 - 1.00 1.00 - - - - - - vdivps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - 38.00 - 1.00 - - - - - - - vdivps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - - 38.00 - 1.00 1.00 - - - - - - vdivps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - 38.00 - 2.00 - - - - - - - vdivps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - 38.00 - 2.00 2.00 - - - - - - vdivps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - vdivsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 19.00 - 1.00 1.00 - - - - - - vdivsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - vdivss %xmm0, %xmm1, %xmm2 @@ -1177,8 +1177,8 @@ vzeroupper # CHECK-NEXT: - - - 3.00 3.00 - 1.00 1.00 - - - - - - vdppd $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 3.00 3.00 - 1.00 - - - - - - - vdpps $22, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 3.00 3.00 - 1.00 1.00 - - - - - - vdpps $22, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - 6.00 6.00 - 1.00 - - - - - - - vdpps $22, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 6.00 6.00 - 1.00 1.00 - - - - - - vdpps $22, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 6.00 6.00 - 2.00 - - - - - - - vdpps $22, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - 6.00 6.00 - 2.00 2.00 - - - - - - vdpps $22, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vextractf128 $1, %ymm0, %xmm2 # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vextractf128 $1, %ymm0, (%rax) # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vextractps $1, %xmm0, %ecx @@ -1186,19 +1186,19 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vhaddpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vhaddpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vhaddpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 2.00 - 2.00 - 1.00 - - - - - - vhaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - 2.00 - - - - - - vhaddpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vhaddps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vhaddps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vhaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 2.00 - 2.00 - 1.00 - - - - - - vhaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - 2.00 - - - - - - vhaddps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vhsubpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vhsubpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vhsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 2.00 - 2.00 - 1.00 - - - - - - vhsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - 2.00 - - - - - - vhsubpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vhsubps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vhsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vhsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 2.00 - 2.00 - 1.00 - - - - - - vhsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - 2.00 - - - - - - vhsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vinsertf128 $1, %xmm0, %ymm1, %ymm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vinsertf128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vinsertps $1, %xmm0, %xmm1, %xmm2 @@ -1208,33 +1208,33 @@ vzeroupper # CHECK-NEXT: - - - - - - - 1.00 - - - - - - vldmxcsr (%rax) # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmaskmovdqu %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 1.00 0.50 0.50 1.00 - - - - - - vmaskmovpd (%rax), %xmm0, %xmm2 -# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 1.00 - - - - - - vmaskmovpd (%rax), %ymm0, %ymm2 +# CHECK-NEXT: - - - 2.00 2.00 1.00 1.00 2.00 - - - - - - vmaskmovpd (%rax), %ymm0, %ymm2 # CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 - - 1.00 - - - - vmaskmovpd %xmm0, %xmm1, (%rax) -# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 - - 1.00 - - - - vmaskmovpd %ymm0, %ymm1, (%rax) +# CHECK-NEXT: - - - 2.00 2.00 1.00 1.00 - - 2.00 - - - - vmaskmovpd %ymm0, %ymm1, (%rax) # CHECK-NEXT: - - - 1.00 1.00 0.50 0.50 1.00 - - - - - - vmaskmovps (%rax), %xmm0, %xmm2 -# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 1.00 - - - - - - vmaskmovps (%rax), %ymm0, %ymm2 +# CHECK-NEXT: - - - 2.00 2.00 1.00 1.00 2.00 - - - - - - vmaskmovps (%rax), %ymm0, %ymm2 # CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 - - 1.00 - - - - vmaskmovps %xmm0, %xmm1, (%rax) -# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 - - 1.00 - - - - vmaskmovps %ymm0, %ymm1, (%rax) +# CHECK-NEXT: - - - 2.00 2.00 1.00 1.00 - - 2.00 - - - - vmaskmovps %ymm0, %ymm1, (%rax) # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vmaxpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmaxpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - 2.00 - 1.00 - - - - - - - - vmaxpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 2.00 - 1.00 - 1.00 - - - - - - vmaxpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vmaxpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - 2.00 - - - - - - vmaxpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vmaxps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmaxps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - 2.00 - 1.00 - - - - - - - - vmaxps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 2.00 - 1.00 - 1.00 - - - - - - vmaxps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vmaxps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - 2.00 - - - - - - vmaxps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vmaxsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmaxsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vmaxss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmaxss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vminpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vminpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - 2.00 - 1.00 - - - - - - - - vminpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 2.00 - 1.00 - 1.00 - - - - - - vminpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vminpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - 2.00 - - - - - - vminpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vminps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vminps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - 2.00 - 1.00 - - - - - - - - vminps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 2.00 - 1.00 - 1.00 - - - - - - vminps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vminps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - 2.00 - - - - - - vminps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vminsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vminsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vminss %xmm0, %xmm1, %xmm2 @@ -1257,8 +1257,8 @@ vzeroupper # CHECK-NEXT: - - - - - - - - - 1.00 - - - - vmovd %xmm0, (%rax) # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vmovddup %xmm0, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vmovddup (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 0.50 0.50 - - - - - - - vmovddup %ymm0, %ymm2 -# CHECK-NEXT: - - - 1.00 1.00 0.50 0.50 1.00 - - - - - - vmovddup (%rax), %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vmovddup %ymm0, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vmovddup (%rax), %ymm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vmovdqa %xmm0, %xmm2 # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovdqa %xmm0, (%rax) # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vmovdqa (%rax), %xmm2 @@ -1286,13 +1286,13 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vmovmskps %xmm0, %ecx # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vmovmskps %ymm0, %ecx # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovntdq %xmm0, (%rax) -# CHECK-NEXT: - - - - - - 1.00 - - 1.00 2.00 - - - vmovntdq %ymm0, (%rax) +# CHECK-NEXT: - - - - - - 2.00 - - 2.00 2.00 - - - vmovntdq %ymm0, (%rax) # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vmovntdqa (%rax), %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vmovntdqa (%rax), %ymm2 # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovntpd %xmm0, (%rax) -# CHECK-NEXT: - - - - - - 1.00 - - 1.00 2.00 - - - vmovntpd %ymm0, (%rax) +# CHECK-NEXT: - - - - - - 2.00 - - 2.00 2.00 - - - vmovntpd %ymm0, (%rax) # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovntps %xmm0, (%rax) -# CHECK-NEXT: - - - - - - 1.00 - - 1.00 2.00 - - - vmovntps %ymm0, (%rax) +# CHECK-NEXT: - - - - - - 2.00 - - 2.00 2.00 - - - vmovntps %ymm0, (%rax) # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vmovq %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - vmovq %rax, %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovq (%rax), %xmm2 @@ -1303,12 +1303,12 @@ vzeroupper # CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovsd (%rax), %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vmovshdup %xmm0, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vmovshdup (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 0.50 0.50 - - - - - - - vmovshdup %ymm0, %ymm2 -# CHECK-NEXT: - - - 1.00 1.00 0.50 0.50 1.00 - - - - - - vmovshdup (%rax), %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vmovshdup %ymm0, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vmovshdup (%rax), %ymm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vmovsldup %xmm0, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vmovsldup (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 0.50 0.50 - - - - - - - vmovsldup %ymm0, %ymm2 -# CHECK-NEXT: - - - 1.00 1.00 0.50 0.50 1.00 - - - - - - vmovsldup (%rax), %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vmovsldup %ymm0, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vmovsldup (%rax), %ymm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vmovss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - - - vmovss %xmm0, (%rax) # CHECK-NEXT: - - - - - - - 1.00 - - - - - - vmovss (%rax), %xmm2 @@ -1328,12 +1328,12 @@ vzeroupper # CHECK-NEXT: - - - - - 1.00 - 1.00 - - - - - 2.00 vmpsadbw $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 2.00 - 1.00 - - - - - - - vmulpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 2.00 - 1.00 1.00 - - - - - - vmulpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - 4.00 - 1.00 - - - - - - - vmulpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - - 4.00 - 1.00 1.00 - - - - - - vmulpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - 4.00 - 2.00 - - - - - - - vmulpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - 4.00 - 2.00 2.00 - - - - - - vmulpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vmulps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 1.00 - 1.00 1.00 - - - - - - vmulps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - 2.00 - 1.00 - - - - - - - vmulps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - - 2.00 - 1.00 1.00 - - - - - - vmulps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - 2.00 - 2.00 2.00 - - - - - - vmulps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - 2.00 - 1.00 - - - - - - - vmulsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 2.00 - 1.00 1.00 - - - - - - vmulsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vmulss %xmm0, %xmm1, %xmm2 @@ -1341,11 +1341,11 @@ vzeroupper # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vorpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vorpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vorpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 1.00 - - - - - - vorpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vorpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vorps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vorps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 1.00 - - - - - - vorps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vpabsb %xmm0, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vpabsb (%rax), %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vpabsd %xmm0, %xmm2 @@ -1414,18 +1414,18 @@ vzeroupper # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vpermilpd $1, (%rax), %xmm2 # CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 - - - - - - - vpermilpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 1.00 - - - - - - vpermilpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 0.50 0.50 - - - - - - - vpermilpd $1, %ymm0, %ymm2 -# CHECK-NEXT: - - - 1.00 1.00 0.50 0.50 1.00 - - - - - - vpermilpd $1, (%rax), %ymm2 -# CHECK-NEXT: - - - 3.00 3.00 0.50 0.50 - - - - - - - vpermilpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 3.00 3.00 0.50 0.50 1.00 - - - - - - vpermilpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vpermilpd $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vpermilpd $1, (%rax), %ymm2 +# CHECK-NEXT: - - - 3.00 3.00 1.00 1.00 - - - - - - - vpermilpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - 3.00 3.00 1.00 1.00 2.00 - - - - - - vpermilpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vpermilps $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vpermilps $1, (%rax), %xmm2 # CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 - - - - - - - vpermilps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 1.00 - - - - - - vpermilps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 0.50 0.50 - - - - - - - vpermilps $1, %ymm0, %ymm2 -# CHECK-NEXT: - - - 1.00 1.00 0.50 0.50 1.00 - - - - - - vpermilps $1, (%rax), %ymm2 -# CHECK-NEXT: - - - 3.00 3.00 0.50 0.50 - - - - - - - vpermilps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 3.00 3.00 0.50 0.50 1.00 - - - - - - vpermilps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vpermilps $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vpermilps $1, (%rax), %ymm2 +# CHECK-NEXT: - - - 3.00 3.00 1.00 1.00 - - - - - - - vpermilps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - 3.00 3.00 1.00 1.00 2.00 - - - - - - vpermilps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vpextrb $1, %xmm0, %ecx # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - 1.00 - 0.50 0.50 - vpextrb $1, %xmm0, (%rax) # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vpextrd $1, %xmm0, %ecx @@ -1585,8 +1585,8 @@ vzeroupper # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vpsubw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vptest %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vptest (%rax), %xmm1 -# CHECK-NEXT: - - - 3.00 1.00 0.50 0.50 - - - - - - - vptest %ymm0, %ymm1 -# CHECK-NEXT: - - - 3.00 1.00 0.50 0.50 1.00 - - - - - - vptest (%rax), %ymm1 +# CHECK-NEXT: - - - 3.00 1.00 1.00 1.00 - - - - - - - vptest %ymm0, %ymm1 +# CHECK-NEXT: - - - 3.00 1.00 1.00 1.00 2.00 - - - - - - vptest (%rax), %ymm1 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vpunpckhbw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vpunpckhbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vpunpckhdq %xmm0, %xmm1, %xmm2 @@ -1607,44 +1607,44 @@ vzeroupper # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vpxor (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vrcpps %xmm0, %xmm2 # CHECK-NEXT: - - - - 1.00 - 1.00 1.00 - - - - - - vrcpps (%rax), %xmm2 -# CHECK-NEXT: - - - - 2.00 - 1.00 - - - - - - - vrcpps %ymm0, %ymm2 -# CHECK-NEXT: - - - - 2.00 - 1.00 1.00 - - - - - - vrcpps (%rax), %ymm2 +# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vrcpps %ymm0, %ymm2 +# CHECK-NEXT: - - - - 2.00 - 2.00 2.00 - - - - - - vrcpps (%rax), %ymm2 # CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vrcpss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 1.00 - 1.00 1.00 - - - - - - vrcpss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vroundpd $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vroundpd $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - - - 1.00 - - - 2.00 - - - vroundpd $1, %ymm0, %ymm2 -# CHECK-NEXT: - - - - - - 1.00 1.00 - - 2.00 - - - vroundpd $1, (%rax), %ymm2 +# CHECK-NEXT: - - - - - - 2.00 - - - 2.00 - - - vroundpd $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vroundpd $1, (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vroundps $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vroundps $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - - - 1.00 - - - 2.00 - - - vroundps $1, %ymm0, %ymm2 -# CHECK-NEXT: - - - - - - 1.00 1.00 - - 2.00 - - - vroundps $1, (%rax), %ymm2 +# CHECK-NEXT: - - - - - - 2.00 - - - 2.00 - - - vroundps $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vroundps $1, (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vroundsd $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vroundsd $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vroundss $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vrsqrtps %xmm0, %xmm2 # CHECK-NEXT: - - - - 1.00 - 1.00 1.00 - - - - - - vrsqrtps (%rax), %xmm2 -# CHECK-NEXT: - - - - 2.00 - 1.00 - - - - - - - vrsqrtps %ymm0, %ymm2 -# CHECK-NEXT: - - - - 2.00 - 1.00 1.00 - - - - - - vrsqrtps (%rax), %ymm2 +# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vrsqrtps %ymm0, %ymm2 +# CHECK-NEXT: - - - - 2.00 - 2.00 2.00 - - - - - - vrsqrtps (%rax), %ymm2 # CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vrsqrtss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 1.00 - 1.00 1.00 - - - - - - vrsqrtss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vshufpd $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vshufpd $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 0.50 0.50 - - - - - - - vshufpd $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 1.00 1.00 0.50 0.50 1.00 - - - - - - vshufpd $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vshufpd $1, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vshufpd $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vshufps $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vshufps $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 0.50 0.50 - - - - - - - vshufps $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 1.00 1.00 0.50 0.50 1.00 - - - - - - vshufps $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vshufps $1, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vshufps $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - 21.00 - 1.00 - - - - - - - vsqrtpd %xmm0, %xmm2 # CHECK-NEXT: - - - - 21.00 - 1.00 1.00 - - - - - - vsqrtpd (%rax), %xmm2 -# CHECK-NEXT: - - - - 54.00 - 1.00 - - - - - - - vsqrtpd %ymm0, %ymm2 -# CHECK-NEXT: - - - - 54.00 - 1.00 1.00 - - - - - - vsqrtpd (%rax), %ymm2 +# CHECK-NEXT: - - - - 54.00 - 2.00 - - - - - - - vsqrtpd %ymm0, %ymm2 +# CHECK-NEXT: - - - - 54.00 - 2.00 2.00 - - - - - - vsqrtpd (%rax), %ymm2 # CHECK-NEXT: - - - - 21.00 - 1.00 - - - - - - - vsqrtps %xmm0, %xmm2 # CHECK-NEXT: - - - - 21.00 - 1.00 1.00 - - - - - - vsqrtps (%rax), %xmm2 -# CHECK-NEXT: - - - - 42.00 - 1.00 - - - - - - - vsqrtps %ymm0, %ymm2 -# CHECK-NEXT: - - - - 42.00 - 1.00 1.00 - - - - - - vsqrtps (%rax), %ymm2 +# CHECK-NEXT: - - - - 42.00 - 2.00 - - - - - - - vsqrtps %ymm0, %ymm2 +# CHECK-NEXT: - - - - 42.00 - 2.00 2.00 - - - - - - vsqrtps (%rax), %ymm2 # CHECK-NEXT: - - - - 21.00 - 1.00 - - - - - - - vsqrtsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 21.00 - 1.00 1.00 - - - - - - vsqrtsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 21.00 - 1.00 - - - - - - - vsqrtss %xmm0, %xmm1, %xmm2 @@ -1653,23 +1653,23 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vsubpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vsubpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 2.00 - 2.00 - 1.00 - - - - - - vsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - 2.00 - - - - - - vsubpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vsubps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 2.00 - 2.00 - 1.00 - - - - - - vsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 2.00 - 2.00 - 2.00 - - - - - - vsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vsubsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vsubsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vsubss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vsubss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vtestpd %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vtestpd (%rax), %xmm1 -# CHECK-NEXT: - - - 3.00 1.00 0.50 0.50 - - - - - - - vtestpd %ymm0, %ymm1 -# CHECK-NEXT: - - - 3.00 1.00 0.50 0.50 1.00 - - - - - - vtestpd (%rax), %ymm1 +# CHECK-NEXT: - - - 3.00 1.00 1.00 1.00 - - - - - - - vtestpd %ymm0, %ymm1 +# CHECK-NEXT: - - - 3.00 1.00 1.00 1.00 2.00 - - - - - - vtestpd (%rax), %ymm1 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vtestps %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vtestps (%rax), %xmm1 -# CHECK-NEXT: - - - 3.00 1.00 0.50 0.50 - - - - - - - vtestps %ymm0, %ymm1 -# CHECK-NEXT: - - - 3.00 1.00 0.50 0.50 1.00 - - - - - - vtestps (%rax), %ymm1 +# CHECK-NEXT: - - - 3.00 1.00 1.00 1.00 - - - - - - - vtestps %ymm0, %ymm1 +# CHECK-NEXT: - - - 3.00 1.00 1.00 1.00 2.00 - - - - - - vtestps (%rax), %ymm1 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vucomisd %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vucomisd (%rax), %xmm1 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vucomiss %xmm0, %xmm1 @@ -1693,10 +1693,10 @@ vzeroupper # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vxorpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vxorpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vxorpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 1.00 - - - - - - vxorpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vxorpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vxorps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vxorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vxorps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 1.00 - - - - - - vxorps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vxorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - - vzeroall # CHECK-NEXT: - - - - - - - - - - - - - - vzeroupper -- 2.7.4