; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
; SKX-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_aesdec:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [13:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_aesdec:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_aesdec:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_aesdeclast:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [13:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_aesdeclast:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_aesdeclast:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_aesenc:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [13:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_aesenc:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_aesenc:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_aesenclast:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [13:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_aesenclast:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_aesenclast:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_aesimc:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [12:2.00]
+; BDVER2-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [18:2.00]
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_aesimc:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaesimc %xmm0, %xmm0 # sched: [12:2.00]
+; BDVER2-NEXT: vaesimc (%rdi), %xmm1 # sched: [18:2.00]
+; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_aesimc:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [2:1.00]
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_aeskeygenassist:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [8:3.67]
+; BDVER2-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [8:3.33]
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_aeskeygenassist:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [8:3.67]
+; BDVER2-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [8:3.33]
+; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_aeskeygenassist:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [2:1.00]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_addpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_addpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKX-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_addps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_addps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_addsubpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_addsubpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_addsubps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_addsubps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_andnotpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_andnotpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_andnotps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_andnotps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_andpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_andpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_andps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_andps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blendpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
+; BDVER2-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blendpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blendps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
+; BDVER2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blendps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00]
; SKX-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blendvpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
+; BDVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blendvpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
; SKX-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blendvps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
+; BDVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blendvps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
; SKX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_broadcastf128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_broadcastf128:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:2.00]
; SKX-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_broadcastsd_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_broadcastsd_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:2.00]
; SKX-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_broadcastss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_broadcastss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:1.00]
; SKX-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_broadcastss_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_broadcastss_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:2.00]
; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmppd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
+; BDVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmppd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmpps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
+; BDVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmpps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtdq2pd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtdq2pd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:2.00]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtdq2ps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtdq2ps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:2.00]
; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtpd2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
+; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtpd2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:2.00]
; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvttpd2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
+; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvttpd2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:2.00]
; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtpd2ps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00]
+; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtpd2ps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:2.00]
; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtps2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00]
+; BDVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtps2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [8:2.00]
; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvttps2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00]
+; BDVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvttps2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [8:2.00]
; SKX-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [21:8.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_divpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:44.00]
+; BDVER2-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:44.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_divpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [38:38.00]
; SKX-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [18:5.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_divps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:28.00]
+; BDVER2-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [36:28.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_divps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [38:38.00]
; SKX-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:1.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_dpps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00]
+; BDVER2-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [19:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_dpps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:6.00]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_extractf128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_extractf128:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_haddpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
+; BDVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_haddpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKX-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_haddps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
+; BDVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_haddps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKX-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_hsubpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
+; BDVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_hsubpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKX-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_hsubps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
+; BDVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_hsubps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_insertf128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
+; BDVER2-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
+; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_insertf128:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
; SKX-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lddqu:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lddqu:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vlddqu (%rdi), %ymm0 # sched: [5:1.00]
; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_maskmovpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
+; BDVER2-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_maskmovpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [6:1.00]
; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_maskmovpd_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
+; BDVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_maskmovpd_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [6:2.00]
; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_maskmovps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
+; BDVER2-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_maskmovps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [6:1.00]
; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_maskmovps_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
+; BDVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_maskmovps_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [6:2.00]
; SKX-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_maxpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_maxpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
; SKX-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_maxps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_maxps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
; SKX-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_minpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_minpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
; SKX-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_minps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_minps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
; SKX-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movapd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
+; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movapd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovapd (%rdi), %ymm0 # sched: [5:1.00]
; SKX-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movaps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
+; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movaps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps (%rdi), %ymm0 # sched: [5:1.00]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movddup:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
+; BDVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movddup:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [6:2.00]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movmskpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00]
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movmskpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movmskps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00]
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movmskps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movntdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movntdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movntpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movntpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
; SKX-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movntps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movntps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movshdup:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
+; BDVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movshdup:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [6:2.00]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movsldup:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
+; BDVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movsldup:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [6:2.00]
; SKX-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movupd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50]
+; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movupd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovupd (%rdi), %ymm0 # sched: [5:1.00]
; SKX-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movups:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50]
+; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movups:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovups (%rdi), %ymm0 # sched: [5:1.00]
; SKX-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_mulpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_mulpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:4.00]
; SKX-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_mulps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_mulps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: orpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: orpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_orps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_orps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_perm2f128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
+; BDVER2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_perm2f128:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_permilpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
+; BDVER2-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_permilpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [6:1.00]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_permilpd_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
+; BDVER2-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_permilpd_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [6:2.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_permilps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
+; BDVER2-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_permilps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_permilps_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
+; BDVER2-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_permilps_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [6:2.00]
; SKX-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_permilvarpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_permilvarpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; SKX-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_permilvarpd_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_permilvarpd_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
; SKX-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_permilvarps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_permilvarps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; SKX-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_permilvarps_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_permilvarps_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rcpps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps (%rdi), %ymm1 # sched: [14:2.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rcpps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpps (%rdi), %ymm1 # sched: [7:2.00]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_roundpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_roundpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:2.00]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_roundps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_roundps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:2.00]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rsqrtps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:2.00]
+; BDVER2-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:2.00]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rsqrtps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [7:2.00]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_shufpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
+; BDVER2-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_shufpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_shufps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
+; BDVER2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_shufps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sqrtpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:44.00]
+; BDVER2-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [45:44.00]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sqrtpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [59:54.00]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sqrtps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:28.00]
+; BDVER2-NEXT: vsqrtps %ymm0, %ymm0 # sched: [29:28.00]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sqrtps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vsqrtps (%rdi), %ymm1 # sched: [47:42.00]
; SKX-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_subpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_subpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKX-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_subps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_subps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKX-NEXT: adcl $0, %eax # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_testpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25]
+; BDVER2-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: setb %al # sched: [1:0.50]
+; BDVER2-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: adcl $0, %eax # sched: [2:0.67]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_testpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_testpd_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25]
+; BDVER2-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: setb %al # sched: [1:0.50]
+; BDVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: adcl $0, %eax # sched: [2:0.67]
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_testpd_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50]
; SKX-NEXT: adcl $0, %eax # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_testps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25]
+; BDVER2-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: setb %al # sched: [1:0.50]
+; BDVER2-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: adcl $0, %eax # sched: [2:0.67]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_testps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_testps_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25]
+; BDVER2-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: setb %al # sched: [1:0.50]
+; BDVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: adcl $0, %eax # sched: [2:0.67]
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_testps_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_unpckhpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
+; BDVER2-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_unpckhpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_unpckhps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
+; BDVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_unpckhps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_unpcklpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
+; BDVER2-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_unpcklpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_unpcklps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
+; BDVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_unpcklps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xorpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xorpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xorps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xorps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SKX-NEXT: vzeroall # sched: [12:5.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_zeroall:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vzeroall # sched: [9:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_zeroall:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vzeroall # sched: [90:36.50]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_zeroupper:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_zeroupper:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vzeroupper # sched: [46:18.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_avx256_zero_idioms:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:1.00]
+; BDVER2-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:1.00]
+; BDVER2-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_avx256_zero_idioms:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=VZ --check-prefix=AVX
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=VZ --check-prefix=AVX512
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mattr=+avx,+fast-partial-ymm-or-zmm-write | FileCheck %s --check-prefix=ALL --check-prefix=NO-VZ --check-prefix=FAST-ymm-zmm
+; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=NO-VZ --check-prefix=BDVER2
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=ALL --check-prefix=NO-VZ --check-prefix=BTVER2
declare i32 @foo()
; FAST-ymm-zmm-NEXT: addq $56, %rsp
; FAST-ymm-zmm-NEXT: retq
;
+; BDVER2-LABEL: test01:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: subq $56, %rsp
+; BDVER2-NEXT: vmovups %ymm2, (%rsp) # 32-byte Spill
+; BDVER2-NEXT: vmovaps {{.*}}(%rip), %xmm0
+; BDVER2-NEXT: vzeroupper
+; BDVER2-NEXT: callq do_sse
+; BDVER2-NEXT: vmovaps %xmm0, {{.*}}(%rip)
+; BDVER2-NEXT: callq do_sse
+; BDVER2-NEXT: vmovaps %xmm0, {{.*}}(%rip)
+; BDVER2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
+; BDVER2-NEXT: addq $56, %rsp
+; BDVER2-NEXT: retq
+;
; BTVER2-LABEL: test01:
; BTVER2: # %bb.0:
; BTVER2-NEXT: subq $56, %rsp
; VZ-NEXT: vzeroupper
; VZ-NEXT: jmp do_sse # TAILCALL
;
-; NO-VZ-LABEL: test02:
-; NO-VZ: # %bb.0:
-; NO-VZ-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; NO-VZ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; NO-VZ-NEXT: jmp do_sse # TAILCALL
+; FAST-ymm-zmm-LABEL: test02:
+; FAST-ymm-zmm: # %bb.0:
+; FAST-ymm-zmm-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; FAST-ymm-zmm-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; FAST-ymm-zmm-NEXT: jmp do_sse # TAILCALL
+;
+; BDVER2-LABEL: test02:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; BDVER2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; BDVER2-NEXT: vzeroupper
+; BDVER2-NEXT: jmp do_sse # TAILCALL
+;
+; BTVER2-LABEL: test02:
+; BTVER2: # %bb.0:
+; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; BTVER2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; BTVER2-NEXT: jmp do_sse # TAILCALL
%add.i = fadd <8 x float> %a, %b
%add.low = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %add.i, i8 0)
%call3 = tail call <4 x float> @do_sse(<4 x float> %add.low) nounwind
; FAST-ymm-zmm-NEXT: popq %rbx
; FAST-ymm-zmm-NEXT: retq
;
+; BDVER2-LABEL: test03:
+; BDVER2: # %bb.0: # %entry
+; BDVER2-NEXT: pushq %rbx
+; BDVER2-NEXT: subq $16, %rsp
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; BDVER2-NEXT: .p2align 4, 0x90
+; BDVER2-NEXT: .LBB3_1: # %while.cond
+; BDVER2-NEXT: # =>This Inner Loop Header: Depth=1
+; BDVER2-NEXT: callq foo
+; BDVER2-NEXT: testl %eax, %eax
+; BDVER2-NEXT: jne .LBB3_1
+; BDVER2-NEXT: # %bb.2: # %for.body.preheader
+; BDVER2-NEXT: movl $4, %ebx
+; BDVER2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; BDVER2-NEXT: .p2align 4, 0x90
+; BDVER2-NEXT: .LBB3_3: # %for.body
+; BDVER2-NEXT: # =>This Inner Loop Header: Depth=1
+; BDVER2-NEXT: callq do_sse
+; BDVER2-NEXT: callq do_sse
+; BDVER2-NEXT: vmovaps {{.*}}(%rip), %ymm0
+; BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0
+; BDVER2-NEXT: vzeroupper
+; BDVER2-NEXT: callq do_sse
+; BDVER2-NEXT: addl $-1, %ebx
+; BDVER2-NEXT: jne .LBB3_3
+; BDVER2-NEXT: # %bb.4: # %for.end
+; BDVER2-NEXT: addq $16, %rsp
+; BDVER2-NEXT: popq %rbx
+; BDVER2-NEXT: retq
+;
; BTVER2-LABEL: test03:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: pushq %rbx
; VZ-NEXT: vzeroupper
; VZ-NEXT: retq
;
-; NO-VZ-LABEL: test04:
-; NO-VZ: # %bb.0:
-; NO-VZ-NEXT: pushq %rax
-; NO-VZ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; NO-VZ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; NO-VZ-NEXT: callq do_avx
-; NO-VZ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; NO-VZ-NEXT: popq %rax
-; NO-VZ-NEXT: retq
+; FAST-ymm-zmm-LABEL: test04:
+; FAST-ymm-zmm: # %bb.0:
+; FAST-ymm-zmm-NEXT: pushq %rax
+; FAST-ymm-zmm-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; FAST-ymm-zmm-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; FAST-ymm-zmm-NEXT: callq do_avx
+; FAST-ymm-zmm-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; FAST-ymm-zmm-NEXT: popq %rax
+; FAST-ymm-zmm-NEXT: retq
+;
+; BDVER2-LABEL: test04:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pushq %rax
+; BDVER2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; BDVER2-NEXT: callq do_avx
+; BDVER2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; BDVER2-NEXT: popq %rax
+; BDVER2-NEXT: vzeroupper
+; BDVER2-NEXT: retq
+;
+; BTVER2-LABEL: test04:
+; BTVER2: # %bb.0:
+; BTVER2-NEXT: pushq %rax
+; BTVER2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; BTVER2-NEXT: callq do_avx
+; BTVER2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; BTVER2-NEXT: popq %rax
+; BTVER2-NEXT: retq
%shuf = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%call = call <8 x float> @do_avx(<8 x float> %shuf) nounwind
%shuf2 = shufflevector <8 x float> %call, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_andn_i32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: andnl (%rdx), %edi, %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_andn_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: andnl (%rdx), %edi, %eax # sched: [4:1.00]
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_andn_i64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: andnq (%rdx), %rdi, %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_andn_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: andnq (%rdx), %rdi, %rax # sched: [4:1.00]
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bextr_i32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: bextrl %edi, (%rdx), %ecx # sched: [7:1.00]
+; BDVER2-NEXT: bextrl %edi, %esi, %eax # sched: [2:1.00]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bextr_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: bextrl %edi, (%rdx), %ecx # sched: [4:1.00]
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bextr_i64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [7:1.00]
+; BDVER2-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:1.00]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bextr_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [4:1.00]
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blsi_i32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsil (%rsi), %ecx # sched: [6:0.50]
+; BDVER2-NEXT: blsil %edi, %eax # sched: [1:0.33]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blsi_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsil (%rsi), %ecx # sched: [5:1.00]
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blsi_i64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50]
+; BDVER2-NEXT: blsiq %rdi, %rax # sched: [1:0.33]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blsi_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsiq (%rsi), %rcx # sched: [5:1.00]
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blsmsk_i32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50]
+; BDVER2-NEXT: blsmskl %edi, %eax # sched: [1:0.33]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blsmsk_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsmskl (%rsi), %ecx # sched: [5:1.00]
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blsmsk_i64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50]
+; BDVER2-NEXT: blsmskq %rdi, %rax # sched: [1:0.33]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blsmsk_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsmskq (%rsi), %rcx # sched: [5:1.00]
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blsr_i32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50]
+; BDVER2-NEXT: blsrl %edi, %eax # sched: [1:0.33]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blsr_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsrl (%rsi), %ecx # sched: [5:1.00]
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blsr_i64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50]
+; BDVER2-NEXT: blsrq %rdi, %rax # sched: [1:0.33]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blsr_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsrq (%rsi), %rcx # sched: [5:1.00]
; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cttz_i16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: tzcntw (%rsi), %cx # sched: [8:1.00]
+; BDVER2-NEXT: tzcntw %di, %ax # sched: [3:1.00]
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cttz_i16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: tzcntw (%rsi), %cx # sched: [5:1.00]
; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cttz_i32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: tzcntl (%rsi), %ecx # sched: [8:1.00]
+; BDVER2-NEXT: tzcntl %edi, %eax # sched: [3:1.00]
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cttz_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: tzcntl (%rsi), %ecx # sched: [5:1.00]
; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cttz_i64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: tzcntq (%rsi), %rcx # sched: [8:1.00]
+; BDVER2-NEXT: tzcntq %rdi, %rax # sched: [3:1.00]
+; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cttz_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: tzcntq (%rsi), %rcx # sched: [5:1.00]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmov_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmovow %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovnow %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovbw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovbw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovbw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovaew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovaew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovaew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovnew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovnew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovbew %si, %di # sched: [3:1.00]
+; BDVER2-NEXT: cmovbew %si, %di # sched: [3:1.00]
+; BDVER2-NEXT: cmovaw %si, %di # sched: [3:1.00]
+; BDVER2-NEXT: cmovaw %si, %di # sched: [3:1.00]
+; BDVER2-NEXT: cmovsw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovnsw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovpw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovpw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovnpw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovnpw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovlw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovlw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovgew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovgew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovlew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovlew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovgw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovgw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovow (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovnow (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovnew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovnew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovbew (%rdx), %di # sched: [8:1.00]
+; BDVER2-NEXT: cmovbew (%rdx), %di # sched: [8:1.00]
+; BDVER2-NEXT: cmovaw (%rdx), %di # sched: [8:1.00]
+; BDVER2-NEXT: cmovaw (%rdx), %di # sched: [8:1.00]
+; BDVER2-NEXT: cmovsw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovnsw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovpw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovpw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovnpw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovnpw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovlw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovlw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovgew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovgew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovlew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovlew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovgw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovgw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmov_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmov_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmovol %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnol %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovbl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovbl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovbl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovael %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovael %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovael %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovel %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovel %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnel %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnel %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovbel %esi, %edi # sched: [3:1.00]
+; BDVER2-NEXT: cmovbel %esi, %edi # sched: [3:1.00]
+; BDVER2-NEXT: cmoval %esi, %edi # sched: [3:1.00]
+; BDVER2-NEXT: cmoval %esi, %edi # sched: [3:1.00]
+; BDVER2-NEXT: cmovsl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnsl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovpl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovpl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnpl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnpl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovll %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovll %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovgel %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovgel %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovlel %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovlel %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovgl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovgl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovol (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnol (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovel (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovel (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnel (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnel (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovbel (%rdx), %edi # sched: [8:1.00]
+; BDVER2-NEXT: cmovbel (%rdx), %edi # sched: [8:1.00]
+; BDVER2-NEXT: cmoval (%rdx), %edi # sched: [8:1.00]
+; BDVER2-NEXT: cmoval (%rdx), %edi # sched: [8:1.00]
+; BDVER2-NEXT: cmovsl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnsl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovpl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovpl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovll (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovll (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovgel (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovgel (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovlel (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovlel (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovgl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovgl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmov_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmov_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmovoq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnoq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmoveq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmoveq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovneq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovneq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovbeq %rsi, %rdi # sched: [3:1.00]
+; BDVER2-NEXT: cmovbeq %rsi, %rdi # sched: [3:1.00]
+; BDVER2-NEXT: cmovaq %rsi, %rdi # sched: [3:1.00]
+; BDVER2-NEXT: cmovaq %rsi, %rdi # sched: [3:1.00]
+; BDVER2-NEXT: cmovsq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnsq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovpq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovpq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovlq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovlq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovleq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovleq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovgq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovgq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovoq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnoq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [8:1.00]
+; BDVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [8:1.00]
+; BDVER2-NEXT: cmovaq (%rdx), %rdi # sched: [8:1.00]
+; BDVER2-NEXT: cmovaq (%rdx), %rdi # sched: [8:1.00]
+; BDVER2-NEXT: cmovsq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnsq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmov_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+f16c | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_vcvtph2ps_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00]
+; BDVER2-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_vcvtph2ps_128:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_vcvtph2ps_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00]
+; BDVER2-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_vcvtph2ps_256:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:2.00]
; SKYLAKE-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [6:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_vcvtps2ph_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_vcvtps2ph_128:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_vcvtps2ph_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00]
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_vcvtps2ph_256:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:2.00]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddpd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddpd_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddps_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddps_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddsd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddsd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddss_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddss_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddsubpd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
+; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddsubpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddsubpd_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
+; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddsubpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddsubps_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
+; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddsubps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddsubps_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
+; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddsubps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubaddpd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubaddpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubaddpd_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubaddpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubaddps_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubaddps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubaddps_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubaddps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubpd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubpd_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubps_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubps_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubsd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubsd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubss_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubss_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmaddpd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmaddpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmaddpd_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmaddpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmaddps_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmaddps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmaddps_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmaddps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmaddsd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmaddsd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmaddss_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmaddss_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmsubpd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmsubpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmsubpd_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmsubpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmsubps_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmsubps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmsubps_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmsubps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmsubsd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmsubsd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmsubss_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmsubss_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER --check-prefix=BDVER1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER --check-prefix=BDVER1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER --check-prefix=BDVER1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER --check-prefix=BDVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma4 | FileCheck %s --check-prefixes=CHECK,GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma4 | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER12,BDVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma4 -mattr=-fma | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER12,BDVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 -mattr=-fma | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER34,BDVER3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 -mattr=-fma | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER34,BDVER4
;
; VFMADD
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddpd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddpd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddpd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmaddpd $2, $1, $0, $0 \0A\09 vfmaddpd $3, $1, $0, $0 \0A\09 vfmaddpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddpd_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddpd_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddpd_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfmaddpd $2, $1, $0, $0 \0A\09 vfmaddpd $3, $1, $0, $0 \0A\09 vfmaddpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddps_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddps_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddps_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmaddps $2, $1, $0, $0 \0A\09 vfmaddps $3, $1, $0, $0 \0A\09 vfmaddps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddps_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmaddps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmaddps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddps_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddps_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmaddps (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmaddps %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfmaddps $2, $1, $0, $0 \0A\09 vfmaddps $3, $1, $0, $0 \0A\09 vfmaddps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddsd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddsd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddsd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmaddsd $2, $1, $0, $0 \0A\09 vfmaddsd $3, $1, $0, $0 \0A\09 vfmaddsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddss_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddss_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddss_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmaddss $2, $1, $0, $0 \0A\09 vfmaddss $3, $1, $0, $0 \0A\09 vfmaddss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddsubpd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddsubpd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddsubpd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmaddsubpd $2, $1, $0, $0 \0A\09 vfmaddsubpd $3, $1, $0, $0 \0A\09 vfmaddsubpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddsubpd_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddsubpd_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddsubpd_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfmaddsubpd $2, $1, $0, $0 \0A\09 vfmaddsubpd $3, $1, $0, $0 \0A\09 vfmaddsubpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddsubps_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddsubps_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddsubps_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmaddsubps $2, $1, $0, $0 \0A\09 vfmaddsubps $3, $1, $0, $0 \0A\09 vfmaddsubps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddsubps_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddsubps_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddsubps_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfmaddsubps $2, $1, $0, $0 \0A\09 vfmaddsubps $3, $1, $0, $0 \0A\09 vfmaddsubps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubaddpd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubaddpd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubaddpd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmsubaddpd $2, $1, $0, $0 \0A\09 vfmsubaddpd $3, $1, $0, $0 \0A\09 vfmsubaddpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubaddpd_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubaddpd_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubaddpd_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfmsubaddpd $2, $1, $0, $0 \0A\09 vfmsubaddpd $3, $1, $0, $0 \0A\09 vfmsubaddpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubaddps_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubaddps_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubaddps_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmsubaddps $2, $1, $0, $0 \0A\09 vfmsubaddps $3, $1, $0, $0 \0A\09 vfmsubaddps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubaddps_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubaddps_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubaddps_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfmsubaddps $2, $1, $0, $0 \0A\09 vfmsubaddps $3, $1, $0, $0 \0A\09 vfmsubaddps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubpd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubpd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubpd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmsubpd $2, $1, $0, $0 \0A\09 vfmsubpd $3, $1, $0, $0 \0A\09 vfmsubpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubpd_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubpd_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubpd_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfmsubpd $2, $1, $0, $0 \0A\09 vfmsubpd $3, $1, $0, $0 \0A\09 vfmsubpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubps_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubps_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubps_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubps (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmsubps $2, $1, $0, $0 \0A\09 vfmsubps $3, $1, $0, $0 \0A\09 vfmsubps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubps_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmsubps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmsubps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubps_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubps_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmsubps (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmsubps %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfmsubps $2, $1, $0, $0 \0A\09 vfmsubps $3, $1, $0, $0 \0A\09 vfmsubps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubsd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubsd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubsd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmsubsd $2, $1, $0, $0 \0A\09 vfmsubsd $3, $1, $0, $0 \0A\09 vfmsubsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubss_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubss (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubss %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubss_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubss_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubss (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubss %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmsubss $2, $1, $0, $0 \0A\09 vfmsubss $3, $1, $0, $0 \0A\09 vfmsubss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmaddpd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmaddpd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmaddpd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfnmaddpd $2, $1, $0, $0 \0A\09 vfnmaddpd $3, $1, $0, $0 \0A\09 vfnmaddpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmaddpd_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmaddpd_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmaddpd_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfnmaddpd $2, $1, $0, $0 \0A\09 vfnmaddpd $3, $1, $0, $0 \0A\09 vfnmaddpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmaddps_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmaddps_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmaddps_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfnmaddps $2, $1, $0, $0 \0A\09 vfnmaddps $3, $1, $0, $0 \0A\09 vfnmaddps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmaddps_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmaddps_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmaddps_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfnmaddps $2, $1, $0, $0 \0A\09 vfnmaddps $3, $1, $0, $0 \0A\09 vfnmaddps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmaddsd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmaddsd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmaddsd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfnmaddsd $2, $1, $0, $0 \0A\09 vfnmaddsd $3, $1, $0, $0 \0A\09 vfnmaddsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmaddss_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmaddss_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmaddss_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfnmaddss $2, $1, $0, $0 \0A\09 vfnmaddss $3, $1, $0, $0 \0A\09 vfnmaddss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmsubpd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmsubpd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmsubpd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfnmsubpd $2, $1, $0, $0 \0A\09 vfnmsubpd $3, $1, $0, $0 \0A\09 vfnmsubpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmsubpd_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmsubpd_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmsubpd_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfnmsubpd $2, $1, $0, $0 \0A\09 vfnmsubpd $3, $1, $0, $0 \0A\09 vfnmsubpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmsubps_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmsubps_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmsubps_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfnmsubps $2, $1, $0, $0 \0A\09 vfnmsubps $3, $1, $0, $0 \0A\09 vfnmsubps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmsubps_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmsubps_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmsubps_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfnmsubps $2, $1, $0, $0 \0A\09 vfnmsubps $3, $1, $0, $0 \0A\09 vfnmsubps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmsubsd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmsubsd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmsubsd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfnmsubsd $2, $1, $0, $0 \0A\09 vfnmsubsd $3, $1, $0, $0 \0A\09 vfnmsubsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmsubss_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmsubss_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmsubss_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfnmsubss $2, $1, $0, $0 \0A\09 vfnmsubss $3, $1, $0, $0 \0A\09 vfnmsubss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=-slow-3ops-lea | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKYLAKE-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_offset:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
; SKYLAKE-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_offset_big:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
; SKYLAKE-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
; SKYLAKE-NEXT: addl $16, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_offset:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_offset_big:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
; SKYLAKE-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_mul:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_mul:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
; SKYLAKE-NEXT: addl $-32, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_mul_offset:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_mul_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_mul_offset_big:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_mul_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
; SKYLAKE-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_scale:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_scale:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
; SKYLAKE-NEXT: addl $96, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_scale_offset:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_scale_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_scale_offset_big:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_scale_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=-slow-3ops-lea | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKYLAKE-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_offset:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_offset_big:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: addq $16, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_offset:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [2:1.00]
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_offset_big:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [2:1.00]
; SKYLAKE-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_mul:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_mul:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00]
; SKYLAKE-NEXT: addq $-32, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_mul_offset:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_mul_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [2:1.00]
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_mul_offset_big:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_mul_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [2:1.00]
; SKYLAKE-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_scale:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_scale:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [2:1.00]
; SKYLAKE-NEXT: addq $96, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_scale_offset:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_scale_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [2:1.00]
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_scale_offset_big:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_scale_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [2:1.00]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=x86-64 -mattr=+lwp | FileCheck %s --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER1
-; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=x86-64 -mattr=+lwp | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER1
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=x86-64 -mattr=+lwp | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3
; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4
; GENERIC-NEXT: llwpcb %rdi # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_llwpcb:
-; BDVER: # %bb.0:
-; BDVER-NEXT: llwpcb %rdi
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_llwpcb:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: llwpcb %rdi # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_llwpcb:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: llwpcb %rdi
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_llwpcb:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: llwpcb %rdi
+; BDVER4-NEXT: retq
tail call void @llvm.x86.llwpcb(i8 *%a0)
ret void
}
; GENERIC-NEXT: slwpcb %rax # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_slwpcb:
-; BDVER: # %bb.0:
-; BDVER-NEXT: slwpcb %rax
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_slwpcb:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: slwpcb %rax # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_slwpcb:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: slwpcb %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_slwpcb:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: slwpcb %rax
+; BDVER4-NEXT: retq
%1 = tail call i8* @llvm.x86.slwpcb()
ret i8 *%1
}
; GENERIC-NEXT: setb %al # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_lwpins32_rri:
-; BDVER: # %bb.0:
-; BDVER-NEXT: addl %esi, %esi
-; BDVER-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF
-; BDVER-NEXT: setb %al
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_lwpins32_rri:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: addl %esi, %esi # sched: [1:0.33]
+; BDVER12-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF
+; BDVER12-NEXT: # sched: [100:0.33]
+; BDVER12-NEXT: setb %al # sched: [1:0.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_lwpins32_rri:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: addl %esi, %esi
+; BDVER3-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF
+; BDVER3-NEXT: setb %al
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_lwpins32_rri:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: addl %esi, %esi
+; BDVER4-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF
+; BDVER4-NEXT: setb %al
+; BDVER4-NEXT: retq
%1 = add i32 %a1, %a1
%2 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %1, i32 2309737967)
ret i8 %2
; GENERIC-NEXT: setb %al # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_lwpins32_rmi:
-; BDVER: # %bb.0:
-; BDVER-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210
-; BDVER-NEXT: setb %al
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_lwpins32_rmi:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210
+; BDVER12-NEXT: # sched: [100:0.33]
+; BDVER12-NEXT: setb %al # sched: [1:0.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_lwpins32_rmi:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210
+; BDVER3-NEXT: setb %al
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_lwpins32_rmi:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210
+; BDVER4-NEXT: setb %al
+; BDVER4-NEXT: retq
%a1 = load i32, i32 *%p1
%1 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %a1, i32 1985229328)
ret i8 %1
; GENERIC-NEXT: setb %al # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_lwpins64_rri:
-; BDVER: # %bb.0:
-; BDVER-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF
-; BDVER-NEXT: setb %al
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_lwpins64_rri:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF
+; BDVER12-NEXT: # sched: [100:0.33]
+; BDVER12-NEXT: setb %al # sched: [1:0.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_lwpins64_rri:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF
+; BDVER3-NEXT: setb %al
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_lwpins64_rri:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF
+; BDVER4-NEXT: setb %al
+; BDVER4-NEXT: retq
%1 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 2309737967)
ret i8 %1
}
; GENERIC-NEXT: setb %al # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_lwpins64_rmi:
-; BDVER: # %bb.0:
-; BDVER-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210
-; BDVER-NEXT: setb %al
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_lwpins64_rmi:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210
+; BDVER12-NEXT: # sched: [100:0.33]
+; BDVER12-NEXT: setb %al # sched: [1:0.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_lwpins64_rmi:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210
+; BDVER3-NEXT: setb %al
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_lwpins64_rmi:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210
+; BDVER4-NEXT: setb %al
+; BDVER4-NEXT: retq
%a1 = load i32, i32 *%p1
%1 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 1985229328)
ret i8 %1
; GENERIC-NEXT: # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_lwpval32_rri:
-; BDVER: # %bb.0:
-; BDVER-NEXT: addl %esi, %esi
-; BDVER-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_lwpval32_rri:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: addl %esi, %esi # sched: [1:0.33]
+; BDVER12-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98
+; BDVER12-NEXT: # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_lwpval32_rri:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: addl %esi, %esi
+; BDVER3-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_lwpval32_rri:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: addl %esi, %esi
+; BDVER4-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98
+; BDVER4-NEXT: retq
%1 = add i32 %a1, %a1
tail call void @llvm.x86.lwpval32(i32 %a0, i32 %1, i32 4275878552)
ret void
; GENERIC-NEXT: # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_lwpval32_rmi:
-; BDVER: # %bb.0:
-; BDVER-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_lwpval32_rmi:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678
+; BDVER12-NEXT: # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_lwpval32_rmi:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_lwpval32_rmi:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678
+; BDVER4-NEXT: retq
%a1 = load i32, i32 *%p1
tail call void @llvm.x86.lwpval32(i32 %a0, i32 %a1, i32 305419896)
ret void
; GENERIC-NEXT: # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_lwpval64_rri:
-; BDVER: # %bb.0:
-; BDVER-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_lwpval64_rri:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98
+; BDVER12-NEXT: # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_lwpval64_rri:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_lwpval64_rri:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98
+; BDVER4-NEXT: retq
tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 4275878552)
ret void
}
; GENERIC-NEXT: # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_lwpval64_rmi:
-; BDVER: # %bb.0:
-; BDVER-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_lwpval64_rmi:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678
+; BDVER12-NEXT: # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_lwpval64_rmi:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_lwpval64_rmi:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678
+; BDVER4-NEXT: retq
%a1 = load i32, i32 *%p1
tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 305419896)
ret void
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ctlz_i16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: lzcntw (%rsi), %cx # sched: [8:1.00]
+; BDVER2-NEXT: lzcntw %di, %ax # sched: [3:1.00]
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ctlz_i16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: lzcntw (%rsi), %cx # sched: [4:1.00]
; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ctlz_i32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: lzcntl (%rsi), %ecx # sched: [8:1.00]
+; BDVER2-NEXT: lzcntl %edi, %eax # sched: [3:1.00]
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ctlz_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: lzcntl (%rsi), %ecx # sched: [4:1.00]
; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ctlz_i64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: lzcntq (%rsi), %rcx # sched: [8:1.00]
+; BDVER2-NEXT: lzcntq %rdi, %rax # sched: [3:1.00]
+; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ctlz_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: lzcntq (%rsi), %rcx # sched: [4:1.00]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtpd2pi:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
+; BDVER2-NEXT: por %mm1, %mm0 # sched: [1:0.33]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtpd2pi:
; BTVER2: # %bb.0:
; BTVER2-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [8:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtpi2pd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtpi2pd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [8:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtpi2ps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtpi2ps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00]
; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtps2pi:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00]
+; BDVER2-NEXT: por %mm0, %mm1 # sched: [1:0.33]
+; BDVER2-NEXT: movq %mm1, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtps2pi:
; BTVER2: # %bb.0:
; BTVER2-NEXT: cvtps2pi (%rdi), %mm1 # sched: [8:1.00]
; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvttpd2pi:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
+; BDVER2-NEXT: por %mm1, %mm0 # sched: [1:0.33]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvttpd2pi:
; BTVER2: # %bb.0:
; BTVER2-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [8:1.00]
; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvttps2pi:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00]
+; BDVER2-NEXT: por %mm0, %mm1 # sched: [1:0.33]
+; BDVER2-NEXT: movq %mm1, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvttps2pi:
; BTVER2: # %bb.0:
; BTVER2-NEXT: cvttps2pi (%rdi), %mm1 # sched: [8:1.00]
; SKX-NEXT: emms # sched: [10:4.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_emms:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: emms # sched: [31:10.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_emms:
; BTVER2: # %bb.0:
; BTVER2-NEXT: emms # sched: [2:0.50]
; SKX-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_maskmovq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_maskmovq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: maskmovq %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movl %ecx, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movd %edi, %mm1 # sched: [1:1.00]
+; BDVER2-NEXT: movd (%rsi), %mm2 # sched: [5:0.50]
+; BDVER2-NEXT: paddd %mm1, %mm2 # sched: [3:1.00]
+; BDVER2-NEXT: paddd %mm2, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: movd %mm2, %ecx # sched: [2:1.00]
+; BDVER2-NEXT: movd %mm0, %eax # sched: [2:1.00]
+; BDVER2-NEXT: movl %ecx, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movd %edi, %mm1 # sched: [8:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movdq2q:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00]
+; BDVER2-NEXT: paddd %mm0, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movdq2q:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.50]
; SKX-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movntq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movntq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movntq %mm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: movq %mm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq (%rdi), %mm0 # sched: [5:0.50]
+; BDVER2-NEXT: paddd %mm0, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: movq %mm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movq (%rdi), %mm0 # sched: [5:1.00]
; SKX-NEXT: movq2dq %mm0, %xmm0 # sched: [2:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movq2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movq2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pabsb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pabsb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pabsb (%rdi), %mm0 # sched: [6:1.00]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pabsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pabsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pabsd (%rdi), %mm0 # sched: [6:1.00]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pabsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pabsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pabsw (%rdi), %mm0 # sched: [6:1.00]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_packssdw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_packssdw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_packsswb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_packsswb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_packuswb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_packuswb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_paddb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: paddb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: paddb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_paddb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: paddb %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_paddd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: paddd %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: paddd (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_paddd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: paddd %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_paddq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: paddq %mm1, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: paddq (%rdi), %mm0 # sched: [7:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_paddq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: paddq %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_paddsb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: paddsb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: paddsb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_paddsb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_paddsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: paddsw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: paddsw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_paddsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_paddusb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: paddusb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: paddusb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_paddusb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_paddusw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: paddusw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: paddusw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_paddusw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_paddw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: paddw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: paddw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_paddw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: paddw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_palignr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_palignr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pand:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pand %mm1, %mm0 # sched: [1:0.33]
+; BDVER2-NEXT: pand (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pand:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pand %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pandn:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pandn %mm1, %mm0 # sched: [1:0.33]
+; BDVER2-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pandn:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pandn %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pavgb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pavgb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pavgb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pavgb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pavgw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pavgw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pavgw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pavgw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pcmpeqb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pcmpeqb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pcmpeqb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pcmpeqd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pcmpeqd %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pcmpeqd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pcmpeqw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pcmpeqw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pcmpeqw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pcmpgtb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pcmpgtb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pcmpgtb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pcmpgtd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pcmpgtd %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pcmpgtd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pcmpgtw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pcmpgtw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pcmpgtw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pextrw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pextrw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_phaddd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50]
+; BDVER2-NEXT: phaddd (%rdi), %mm0 # sched: [8:1.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_phaddd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phaddd %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_phaddsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: phaddsw %mm1, %mm0 # sched: [3:1.50]
+; BDVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [8:1.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_phaddsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phaddsw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_phaddw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: phaddw %mm1, %mm0 # sched: [3:1.50]
+; BDVER2-NEXT: phaddw (%rdi), %mm0 # sched: [8:1.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_phaddw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phaddw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_phsubd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50]
+; BDVER2-NEXT: phsubd (%rdi), %mm0 # sched: [8:1.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_phsubd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phsubd %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_phsubsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: phsubsw %mm1, %mm0 # sched: [3:1.50]
+; BDVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [8:1.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_phsubsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phsubsw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_phsubw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: phsubw %mm1, %mm0 # sched: [3:1.50]
+; BDVER2-NEXT: phsubw (%rdi), %mm0 # sched: [8:1.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_phsubw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phsubw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pinsrw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00]
+; BDVER2-NEXT: movswl (%rsi), %eax # sched: [5:0.50]
+; BDVER2-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pinsrw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [7:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmaddwd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00]
+; BDVER2-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmaddwd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmaddwd %mm1, %mm0 # sched: [2:1.00]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmaddubsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00]
+; BDVER2-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmaddubsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmaddubsw %mm1, %mm0 # sched: [2:1.00]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmaxsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmaxsw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pmaxsw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmaxsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmaxub:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmaxub %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pmaxub (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmaxub:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pminsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pminsw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pminsw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pminsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pminub:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pminub %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pminub (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pminub:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pminub %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: pmovmskb %mm0, %eax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmovmskb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmovmskb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmovmskb %mm0, %eax # sched: [3:1.00]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmulhrsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00]
+; BDVER2-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmulhrsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmulhrsw %mm1, %mm0 # sched: [2:1.00]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmulhw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00]
+; BDVER2-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmulhw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmulhw %mm1, %mm0 # sched: [2:1.00]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmulhuw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00]
+; BDVER2-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmulhuw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmulhuw %mm1, %mm0 # sched: [2:1.00]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmullw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00]
+; BDVER2-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmullw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmullw %mm1, %mm0 # sched: [2:1.00]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmuludq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00]
+; BDVER2-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmuludq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmuludq %mm1, %mm0 # sched: [2:1.00]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_por:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: por %mm1, %mm0 # sched: [1:0.33]
+; BDVER2-NEXT: por (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_por:
; BTVER2: # %bb.0:
; BTVER2-NEXT: por %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psadbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00]
+; BDVER2-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psadbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psadbw %mm1, %mm0 # sched: [2:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pshufb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pshufb %mm1, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: pshufb (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pshufb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pshufb %mm1, %mm0 # sched: [2:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pshufw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
+; BDVER2-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pshufw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psignb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psignb %mm1, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psignb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psignb %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psignd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psignd %mm1, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psignd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psignd %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psignw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psignw %mm1, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psignw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psignw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pslld:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pslld %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: pslld $7, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pslld:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pslld %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psllq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psllq %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: psllq $7, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psllq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psllq %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psllw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psllw %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: psllw $7, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psllw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psllw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psrad:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psrad %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: psrad $7, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psrad:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psrad %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psraw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psraw %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: psraw $7, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psraw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psraw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psrld:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psrld %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: psrld $7, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psrld:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psrld %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psrlq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: psrlq $7, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psrlq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psrlq %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psrlw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: psrlw $7, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psrlw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psrlw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psubb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psubb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: psubb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psubb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psubb %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psubd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psubd %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: psubd (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psubd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psubd %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psubq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psubq %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: psubq (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psubq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psubq %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psubsb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psubsb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: psubsb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psubsb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psubsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psubsw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: psubsw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psubsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psubusb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psubusb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: psubusb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psubusb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psubusw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psubusw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: psubusw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psubusw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psubw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psubw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: psubw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psubw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psubw %mm1, %mm0 # sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_punpckhbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
+; BDVER2-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_punpckhbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_punpckhdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
+; BDVER2-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_punpckhdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_punpckhwd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
+; BDVER2-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_punpckhwd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_punpcklbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
+; BDVER2-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_punpcklbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_punpckldq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
+; BDVER2-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_punpckldq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_punpcklwd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
+; BDVER2-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_punpcklwd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.50]
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pxor:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pxor %mm1, %mm0 # sched: [1:0.33]
+; BDVER2-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pxor:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pxor %mm1, %mm0 # sched: [1:0.50]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+popcnt | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ctpop_i16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: popcntw (%rsi), %cx # sched: [9:1.00]
+; BDVER2-NEXT: popcntw %di, %ax # sched: [3:1.00]
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ctpop_i16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: popcntw (%rsi), %cx # sched: [4:1.00]
; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ctpop_i32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: popcntl (%rsi), %ecx # sched: [9:1.00]
+; BDVER2-NEXT: popcntl %edi, %eax # sched: [3:1.00]
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ctpop_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: popcntl (%rsi), %ecx # sched: [4:1.00]
; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ctpop_i64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: popcntq (%rsi), %rcx # sched: [9:1.00]
+; BDVER2-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
+; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ctpop_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: popcntq (%rsi), %rcx # sched: [4:1.00]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+fma4 -mattr=+avx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
; FMA-RECIP-NEXT: vdivss %xmm0, %xmm1, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: f32_no_estimate:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [14:14.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: f32_no_estimate:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
; FMA-RECIP-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: f32_one_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: f32_one_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
; FMA-RECIP-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: f32_two_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: f32_two_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00]
; FMA-RECIP-NEXT: vdivps %xmm0, %xmm1, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v4f32_no_estimate:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1] sched: [6:0.50]
+; BDVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:14.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v4f32_no_estimate:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1] sched: [5:1.00]
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v4f32_one_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v4f32_one_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:1.00]
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v4f32_two_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v4f32_two_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1] sched: [5:1.00]
; FMA-RECIP-NEXT: vdivps %ymm0, %ymm1, %ymm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v8f32_no_estimate:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:28.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v8f32_no_estimate:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v8f32_one_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v8f32_one_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v8f32_two_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v8f32_two_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; FMA-RECIP-NEXT: vdivps %ymm1, %ymm2, %ymm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v16f32_no_estimate:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [29:28.00]
+; BDVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [29:28.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v16f32_no_estimate:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v16f32_one_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vrcpps %ymm1, %ymm4 # sched: [7:2.00]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm1, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm4, %ymm1, %ymm4, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v16f32_one_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v16f32_two_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v16f32_two_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+fma4 -mattr=+avx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
; FMA-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: f32_no_step_2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: f32_no_step_2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
; FMA-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: f32_one_step_2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: f32_one_step_2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
; FMA-RECIP-NEXT: vmulss %xmm0, %xmm1, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: f32_one_step_2_divs:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
+; BDVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: f32_one_step_2_divs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
; FMA-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: f32_two_step_2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: f32_two_step_2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00]
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v4f32_one_step2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v4f32_one_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:1.00]
; FMA-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v4f32_one_step_2_divs:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
+; BDVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v4f32_one_step_2_divs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:1.00]
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v4f32_two_step2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v4f32_two_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1] sched: [5:1.00]
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v8f32_one_step2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v8f32_one_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; FMA-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v8f32_one_step_2_divs:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:1.00]
+; BDVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v8f32_one_step_2_divs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v8f32_two_step2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v8f32_two_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v8f32_no_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v8f32_no_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v8f32_no_step2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v8f32_no_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v16f32_one_step2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm4 # sched: [7:2.00]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm4, %ymm0, %ymm4, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v16f32_one_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; FMA-RECIP-NEXT: vmulps %ymm1, %ymm2, %ymm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v16f32_one_step_2_divs:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [12:1.00]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [12:1.00]
+; BDVER2-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v16f32_one_step_2_divs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v16f32_two_step2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v16f32_two_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v16f32_no_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
+; BDVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [7:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v16f32_no_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v16f32_no_step2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [7:2.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v16f32_no_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [2:2.00]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+slow-shld | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER12 --check-prefix=BDVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+slow-shld | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER12 --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER1
; uint64_t lshift10(uint64_t a, uint64_t b)
; GENERIC-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift10_optsize:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift10_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: shldq $10, %rsi, %rax # sched: [3:3.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift10_optsize:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq %rdi, %rax
-; BDVER1-NEXT: shldq $10, %rsi, %rax
-; BDVER1-NEXT: retq
entry:
%shl = shl i64 %a, 10
%shr = lshr i64 %b, 54
; GENERIC-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift10:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: shlq $10, %rdi # sched: [1:0.50]
+; BDVER12-NEXT: shrq $54, %rsi # sched: [1:0.50]
+; BDVER12-NEXT: leaq (%rsi,%rdi), %rax # sched: [1:0.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift10:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: shlq $10, %rdi # sched: [1:0.50]
; BTVER2-NEXT: shrq $54, %rsi # sched: [1:0.50]
; BTVER2-NEXT: leaq (%rsi,%rdi), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift10:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: shlq $10, %rdi
-; BDVER1-NEXT: shrq $54, %rsi
-; BDVER1-NEXT: leaq (%rsi,%rdi), %rax
-; BDVER1-NEXT: retq
entry:
%shl = shl i64 %a, 10
%shr = lshr i64 %b, 54
; GENERIC-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: rshift10_optsize:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: rshift10_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: shrdq $62, %rsi, %rax # sched: [3:3.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: rshift10_optsize:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq %rdi, %rax
-; BDVER1-NEXT: shrdq $62, %rsi, %rax
-; BDVER1-NEXT: retq
entry:
%shl = lshr i64 %a, 62
%shr = shl i64 %b, 2
; GENERIC-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: rshift10:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: shrq $62, %rdi # sched: [1:0.50]
+; BDVER12-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: rshift10:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: shrq $62, %rdi # sched: [1:0.50]
; BTVER2-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [2:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: rshift10:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: shrq $62, %rdi
-; BDVER1-NEXT: leaq (%rdi,%rsi,4), %rax
-; BDVER1-NEXT: retq
entry:
%shl = lshr i64 %a, 62
%shr = shl i64 %b, 2
; GENERIC-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift_cl_optsize:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.33]
+; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
+; BDVER12-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift_cl_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shldq %cl, %rsi, %rax # sched: [4:4.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift_cl_optsize:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq %rdx, %rcx
-; BDVER1-NEXT: movq %rdi, %rax
-; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER1-NEXT: shldq %cl, %rsi, %rax
-; BDVER1-NEXT: retq
entry:
%shl = shl i64 %a, %c
%sub = sub nsw i64 64, %c
; GENERIC-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift_cl:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.33]
+; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: shlq %cl, %rdi # sched: [3:1.50]
+; BDVER12-NEXT: negl %ecx # sched: [1:0.33]
+; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
+; BDVER12-NEXT: shrq %cl, %rax # sched: [3:1.50]
+; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift_cl:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: shrq %cl, %rax # sched: [1:0.50]
; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift_cl:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq %rdx, %rcx
-; BDVER1-NEXT: movq %rsi, %rax
-; BDVER1-NEXT: shlq %cl, %rdi
-; BDVER1-NEXT: negl %ecx
-; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER1-NEXT: shrq %cl, %rax
-; BDVER1-NEXT: orq %rdi, %rax
-; BDVER1-NEXT: retq
entry:
%shl = shl i64 %a, %c
%sub = sub nsw i64 64, %c
; GENERIC-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: rshift_cl_optsize:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.33]
+; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
+; BDVER12-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: rshift_cl_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shrdq %cl, %rsi, %rax # sched: [4:4.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: rshift_cl_optsize:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq %rdx, %rcx
-; BDVER1-NEXT: movq %rdi, %rax
-; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER1-NEXT: shrdq %cl, %rsi, %rax
-; BDVER1-NEXT: retq
entry:
%shr = lshr i64 %a, %c
%sub = sub nsw i64 64, %c
; GENERIC-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: rshift_cl:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.33]
+; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: shrq %cl, %rdi # sched: [3:1.50]
+; BDVER12-NEXT: negl %ecx # sched: [1:0.33]
+; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
+; BDVER12-NEXT: shlq %cl, %rax # sched: [3:1.50]
+; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: rshift_cl:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50]
; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: rshift_cl:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq %rdx, %rcx
-; BDVER1-NEXT: movq %rsi, %rax
-; BDVER1-NEXT: shrq %cl, %rdi
-; BDVER1-NEXT: negl %ecx
-; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER1-NEXT: shlq %cl, %rax
-; BDVER1-NEXT: orq %rdi, %rax
-; BDVER1-NEXT: retq
entry:
%shr = lshr i64 %a, %c
%sub = sub nsw i64 64, %c
; GENERIC-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift_mem_cl_optsize:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq %rsi, %rcx # sched: [1:0.33]
+; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
+; BDVER12-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift_mem_cl_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [9:11.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift_mem_cl_optsize:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq %rsi, %rcx
-; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER1-NEXT: shldq %cl, %rdi, {{.*}}(%rip)
-; BDVER1-NEXT: retq
entry:
%b = load i64, i64* @x
%shl = shl i64 %b, %c
; GENERIC-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift_mem_cl:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq %rsi, %rcx # sched: [1:0.33]
+; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
+; BDVER12-NEXT: shlq %cl, %rax # sched: [3:1.50]
+; BDVER12-NEXT: negl %ecx # sched: [1:0.33]
+; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
+; BDVER12-NEXT: shrq %cl, %rdi # sched: [3:1.50]
+; BDVER12-NEXT: orq %rax, %rdi # sched: [1:0.33]
+; BDVER12-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift_mem_cl:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00]
; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50]
; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift_mem_cl:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq %rsi, %rcx
-; BDVER1-NEXT: movq {{.*}}(%rip), %rax
-; BDVER1-NEXT: shlq %cl, %rax
-; BDVER1-NEXT: negl %ecx
-; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER1-NEXT: shrq %cl, %rdi
-; BDVER1-NEXT: orq %rax, %rdi
-; BDVER1-NEXT: movq %rdi, {{.*}}(%rip)
-; BDVER1-NEXT: retq
entry:
%b = load i64, i64* @x
%shl = shl i64 %b, %c
; GENERIC-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift_mem:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
+; BDVER12-NEXT: shlq $10, %rax # sched: [1:0.50]
+; BDVER12-NEXT: shrq $54, %rdi # sched: [1:0.50]
+; BDVER12-NEXT: orq %rax, %rdi # sched: [1:0.33]
+; BDVER12-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift_mem:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00]
; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50]
; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift_mem:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq {{.*}}(%rip), %rax
-; BDVER1-NEXT: shlq $10, %rax
-; BDVER1-NEXT: shrq $54, %rdi
-; BDVER1-NEXT: orq %rax, %rdi
-; BDVER1-NEXT: movq %rdi, {{.*}}(%rip)
-; BDVER1-NEXT: retq
entry:
%b = load i64, i64* @x
%shl = shl i64 %b, 10
; GENERIC-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift_mem_optsize:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [8:1.00]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift_mem_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [9:11.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift_mem_optsize:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: shldq $10, %rdi, {{.*}}(%rip)
-; BDVER1-NEXT: retq
entry:
%b = load i64, i64* @x
%shl = shl i64 %b, 10
; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift_mem_b:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
+; BDVER12-NEXT: shlq $10, %rdi # sched: [1:0.50]
+; BDVER12-NEXT: shrq $54, %rax # sched: [1:0.50]
+; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift_mem_b:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00]
; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift_mem_b:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq {{.*}}(%rip), %rax
-; BDVER1-NEXT: shlq $10, %rdi
-; BDVER1-NEXT: shrq $54, %rax
-; BDVER1-NEXT: orq %rdi, %rax
-; BDVER1-NEXT: movq %rax, {{.*}}(%rip)
-; BDVER1-NEXT: retq
entry:
%a = load i64, i64* @x
%shl = shl i64 %b, 10
; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift_mem_b_optsize:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
+; BDVER12-NEXT: shrdq $54, %rdi, %rax # sched: [2:0.67]
+; BDVER12-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift_mem_b_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00]
; BTVER2-NEXT: shrdq $54, %rdi, %rax # sched: [3:3.00]
; BTVER2-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift_mem_b_optsize:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq {{.*}}(%rip), %rax
-; BDVER1-NEXT: shrdq $54, %rdi, %rax
-; BDVER1-NEXT: movq %rax, {{.*}}(%rip)
-; BDVER1-NEXT: retq
entry:
%a = load i64, i64* @x
%shl = shl i64 %b, 10
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_aaa:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: aaa # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_aaa:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_aad:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: aad # sched: [100:0.33]
+; BDVER2-NEXT: aad $16 # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_aad:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_aam:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: aam # sched: [100:0.33]
+; BDVER2-NEXT: aam $16 # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_aam:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_aas:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: aas # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_aas:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_arpl:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: arpl %ax, (%ecx) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_arpl:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
; SKX-NEXT: .cfi_def_cfa_offset 4
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_bound:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pushl %esi # sched: [5:1.00]
+; BDVER2-NEXT: .cfi_def_cfa_offset 8
+; BDVER2-NEXT: .cfi_offset %esi, -8
+; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: bound %ax, (%esi) # sched: [100:0.33]
+; BDVER2-NEXT: bound %ecx, (%edx) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: popl %esi # sched: [6:0.50]
+; BDVER2-NEXT: .cfi_def_cfa_offset 4
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_bound:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pushl %esi # sched: [1:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_daa:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: daa # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_daa:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_das:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: das # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_das:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_dec16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: decw %ax # sched: [1:0.33]
+; BDVER2-NEXT: decw (%ecx) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_dec16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_dec32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: decl %eax # sched: [1:0.33]
+; BDVER2-NEXT: decl (%ecx) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_dec32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_inc16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: incw %ax # sched: [1:0.33]
+; BDVER2-NEXT: incw (%ecx) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_inc16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_inc32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: incl %eax # sched: [1:0.33]
+; BDVER2-NEXT: incl (%ecx) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_inc32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_into:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: into # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_into:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_jcxz_jecxz:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: JXTGT:
+; BDVER2-NEXT: jcxz JXTGT # sched: [2:1.00]
+; BDVER2-NEXT: jecxz JXTGT # sched: [2:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_jcxz_jecxz:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_leave:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: leave # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_leave:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_pop_push:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: popl %ds # sched: [100:0.33]
+; BDVER2-NEXT: popl %es # sched: [100:0.33]
+; BDVER2-NEXT: popl %ss # sched: [100:0.33]
+; BDVER2-NEXT: popl %fs # sched: [100:0.33]
+; BDVER2-NEXT: popl %gs # sched: [100:0.33]
+; BDVER2-NEXT: pushl %cs # sched: [100:0.33]
+; BDVER2-NEXT: pushl %ds # sched: [100:0.33]
+; BDVER2-NEXT: pushl %es # sched: [100:0.33]
+; BDVER2-NEXT: pushl %ss # sched: [100:0.33]
+; BDVER2-NEXT: pushl %fs # sched: [100:0.33]
+; BDVER2-NEXT: pushl %gs # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_pop_push:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_pop_push_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: popw %ax # sched: [6:0.50]
+; BDVER2-NEXT: popw (%ecx) # sched: [6:0.50]
+; BDVER2-NEXT: pushw %ax # sched: [5:1.00]
+; BDVER2-NEXT: pushw (%ecx) # sched: [5:1.00]
+; BDVER2-NEXT: pushw $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [1:1.00]
+; BDVER2-NEXT: pushw $7 # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_pop_push_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_pop_push_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: popl %eax # sched: [6:0.50]
+; BDVER2-NEXT: popl (%ecx) # sched: [6:0.50]
+; BDVER2-NEXT: pushl %eax # sched: [5:1.00]
+; BDVER2-NEXT: pushl (%ecx) # sched: [5:1.00]
+; BDVER2-NEXT: pushl $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [1:1.00]
+; BDVER2-NEXT: pushl $7 # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_pop_push_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_popa_popf_pusha_pushf:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: popal # sched: [5:0.50]
+; BDVER2-NEXT: popfl # sched: [5:0.50]
+; BDVER2-NEXT: pushal # sched: [1:1.00]
+; BDVER2-NEXT: pushfl # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_popa_popf_pusha_pushf:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_ret:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+; BDVER2-NEXT: retl $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [6:1.00]
+; BDVER2-NEXT: lretl # sched: [6:1.00]
+; BDVER2-NEXT: lretl $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_ret:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_salc:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: salc # sched: [1:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_salc:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_xchg_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xchgl %eax, %eax # sched: [2:1.00]
+; BDVER2-NEXT: xchgl %ecx, %eax # sched: [2:1.00]
+; BDVER2-NEXT: xchgl %eax, (%edx) # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_xchg_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_adc_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: adcb $7, %al # sched: [2:0.67]
+; BDVER2-NEXT: adcb $7, %dil # sched: [2:0.67]
+; BDVER2-NEXT: adcb $7, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: adcb %dl, %dil # sched: [2:0.67]
+; BDVER2-NEXT: adcb %dil, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: adcb (%rsi), %dil # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_adc_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_adc_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: adcw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: adcw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: adcw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [9:1.00]
+; BDVER2-NEXT: adcw $7, %di # sched: [2:0.67]
+; BDVER2-NEXT: adcw $7, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: adcw %dx, %di # sched: [2:0.67]
+; BDVER2-NEXT: adcw %di, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: adcw (%rsi), %di # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_adc_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_adc_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: adcl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: adcl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [9:1.00]
+; BDVER2-NEXT: adcl $7, %edi # sched: [2:0.67]
+; BDVER2-NEXT: adcl $7, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: adcl %edx, %edi # sched: [2:0.67]
+; BDVER2-NEXT: adcl %edi, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: adcl (%rsi), %edi # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_adc_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_adc_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: adcq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: adcq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [9:1.00]
+; BDVER2-NEXT: adcq $7, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: adcq $7, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: adcq %rdx, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: adcq %rdi, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: adcq (%rsi), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_adc_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_add_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: addb $7, %al # sched: [1:0.33]
+; BDVER2-NEXT: addb $7, %dil # sched: [1:0.33]
+; BDVER2-NEXT: addb $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: addb %dl, %dil # sched: [1:0.33]
+; BDVER2-NEXT: addb %dil, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: addb (%rsi), %dil # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_add_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_add_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: addw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: addw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: addw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: addw $7, %di # sched: [1:0.33]
+; BDVER2-NEXT: addw $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: addw %dx, %di # sched: [1:0.33]
+; BDVER2-NEXT: addw %di, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: addw (%rsi), %di # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_add_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_add_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: addl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: addl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: addl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: addl $7, %edi # sched: [1:0.33]
+; BDVER2-NEXT: addl $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: addl %edx, %edi # sched: [1:0.33]
+; BDVER2-NEXT: addl %edi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: addl (%rsi), %edi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_add_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_add_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: addq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: addq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: addq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: addq $7, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: addq $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: addq %rdx, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: addq %rdi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: addq (%rsi), %rdi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_add_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_and_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: andb $7, %al # sched: [1:0.33]
+; BDVER2-NEXT: andb $7, %dil # sched: [1:0.33]
+; BDVER2-NEXT: andb $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: andb %dl, %dil # sched: [1:0.33]
+; BDVER2-NEXT: andb %dil, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: andb (%rsi), %dil # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_and_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_and_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: andw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: andw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: andw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: andw $7, %di # sched: [1:0.33]
+; BDVER2-NEXT: andw $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: andw %dx, %di # sched: [1:0.33]
+; BDVER2-NEXT: andw %di, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: andw (%rsi), %di # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_and_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_and_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: andl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: andl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: andl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: andl $7, %edi # sched: [1:0.33]
+; BDVER2-NEXT: andl $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: andl %edx, %edi # sched: [1:0.33]
+; BDVER2-NEXT: andl %edi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: andl (%rsi), %edi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_and_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_and_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: andq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: andq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: andq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: andq $7, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: andq $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: andq %rdx, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: andq %rdi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: andq (%rsi), %rdi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_and_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bsf16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: bsfw %di, %ax # sched: [3:1.00]
+; BDVER2-NEXT: bsfw (%rsi), %cx # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bsf16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: orl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bsf32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: bsfl %edi, %eax # sched: [3:1.00]
+; BDVER2-NEXT: bsfl (%rsi), %ecx # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bsf32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: orq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bsf64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: bsfq %rdi, %rax # sched: [3:1.00]
+; BDVER2-NEXT: bsfq (%rsi), %rcx # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bsf64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bsr16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: bsrw %di, %ax # sched: [3:1.00]
+; BDVER2-NEXT: bsrw (%rsi), %cx # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bsr16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: orl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bsr32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: bsrl %edi, %eax # sched: [3:1.00]
+; BDVER2-NEXT: bsrl (%rsi), %ecx # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bsr32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: orq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bsr64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: bsrq %rdi, %rax # sched: [3:1.00]
+; BDVER2-NEXT: bsrq (%rsi), %rcx # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bsr64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: bswapl %eax # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bswap32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.33]
+; BDVER2-NEXT: bswapl %eax # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bswap32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50]
; SKX-NEXT: bswapq %rax # sched: [2:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bswap64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER2-NEXT: bswapq %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bswap64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bt_btc_btr_bts_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: btw %si, %di # sched: [1:0.50]
+; BDVER2-NEXT: btcw %si, %di # sched: [1:0.50]
+; BDVER2-NEXT: btrw %si, %di # sched: [1:0.50]
+; BDVER2-NEXT: btsw %si, %di # sched: [1:0.50]
+; BDVER2-NEXT: btw %si, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btcw %si, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btrw %si, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btsw %si, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btw $7, %di # sched: [1:0.50]
+; BDVER2-NEXT: btcw $7, %di # sched: [1:0.50]
+; BDVER2-NEXT: btrw $7, %di # sched: [1:0.50]
+; BDVER2-NEXT: btsw $7, %di # sched: [1:0.50]
+; BDVER2-NEXT: btw $7, (%rdx) # sched: [6:0.50]
+; BDVER2-NEXT: btcw $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: btrw $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: btsw $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bt_btc_btr_bts_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bt_btc_btr_bts_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: btl %esi, %edi # sched: [1:0.50]
+; BDVER2-NEXT: btcl %esi, %edi # sched: [1:0.50]
+; BDVER2-NEXT: btrl %esi, %edi # sched: [1:0.50]
+; BDVER2-NEXT: btsl %esi, %edi # sched: [1:0.50]
+; BDVER2-NEXT: btl %esi, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btcl %esi, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btrl %esi, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btsl %esi, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btl $7, %edi # sched: [1:0.50]
+; BDVER2-NEXT: btcl $7, %edi # sched: [1:0.50]
+; BDVER2-NEXT: btrl $7, %edi # sched: [1:0.50]
+; BDVER2-NEXT: btsl $7, %edi # sched: [1:0.50]
+; BDVER2-NEXT: btl $7, (%rdx) # sched: [6:0.50]
+; BDVER2-NEXT: btcl $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: btrl $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: btsl $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bt_btc_btr_bts_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bt_btc_btr_bts_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: btq %rsi, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: btcq %rsi, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: btrq %rsi, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: btsq %rsi, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: btq %rsi, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btcq %rsi, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btrq %rsi, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btsq %rsi, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btq $7, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: btcq $7, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: btrq $7, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: btsq $7, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: btq $7, (%rdx) # sched: [6:0.50]
+; BDVER2-NEXT: btcq $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: btrq $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: btsq $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bt_btc_btr_bts_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cbtw # sched: [1:0.33]
+; BDVER2-NEXT: cltd # sched: [1:0.50]
+; BDVER2-NEXT: cltq # sched: [1:0.33]
+; BDVER2-NEXT: cqto # sched: [1:0.50]
+; BDVER2-NEXT: cwtd # sched: [2:1.00]
+; BDVER2-NEXT: cwtl # sched: [1:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_clc_cld_cmc:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: clc # sched: [1:0.25]
+; BDVER2-NEXT: cld # sched: [1:0.33]
+; BDVER2-NEXT: cmc # sched: [1:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_clc_cld_cmc:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmp_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpb $7, %al # sched: [1:0.33]
+; BDVER2-NEXT: cmpb $7, %dil # sched: [1:0.33]
+; BDVER2-NEXT: cmpb $7, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: cmpb %dil, %dil # sched: [1:0.33]
+; BDVER2-NEXT: cmpb %dil, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: cmpb (%rsi), %dil # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmp_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmp_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: cmpw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: cmpw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [6:0.50]
+; BDVER2-NEXT: cmpw $7, %di # sched: [1:0.33]
+; BDVER2-NEXT: cmpw $7, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: cmpw %di, %di # sched: [1:0.33]
+; BDVER2-NEXT: cmpw %di, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: cmpw (%rsi), %di # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmp_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmp_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: cmpl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [6:0.50]
+; BDVER2-NEXT: cmpl $7, %edi # sched: [1:0.33]
+; BDVER2-NEXT: cmpl $7, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: cmpl %edi, %edi # sched: [1:0.33]
+; BDVER2-NEXT: cmpl %edi, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: cmpl (%rsi), %edi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmp_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmp_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: cmpq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [6:0.50]
+; BDVER2-NEXT: cmpq $7, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: cmpq $7, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: cmpq %rdi, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: cmpq %rdi, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: cmpq (%rsi), %rdi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmp_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [8:1.00]
+; BDVER2-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [8:1.00]
+; BDVER2-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [8:1.00]
+; BDVER2-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmpxchg_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpxchgb %dil, %sil # sched: [5:1.33]
+; BDVER2-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmpxchg_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmpxchg_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpxchgw %di, %si # sched: [5:1.33]
+; BDVER2-NEXT: cmpxchgw %di, (%rdx) # sched: [8:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmpxchg_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmpxchg_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpxchgl %edi, %esi # sched: [5:1.33]
+; BDVER2-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmpxchg_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmpxchg_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpxchgq %rdi, %rsi # sched: [5:1.33]
+; BDVER2-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmpxchg_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmpxchg8b_cmpxchg16b:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpxchg8b (%rdi) # sched: [6:1.00]
+; BDVER2-NEXT: cmpxchg16b (%rdi) # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmpxchg8b_cmpxchg16b:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cpuid:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cpuid # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cpuid:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_dec8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: decb %dil # sched: [1:0.33]
+; BDVER2-NEXT: decb (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_dec8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_dec16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: decw %di # sched: [1:0.33]
+; BDVER2-NEXT: decw (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_dec16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_dec32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: decl %edi # sched: [1:0.33]
+; BDVER2-NEXT: decl (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_dec32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_dec64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: decq %rdi # sched: [1:0.33]
+; BDVER2-NEXT: decq (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_dec64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_div:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: divb %dil # sched: [25:10.00]
+; BDVER2-NEXT: divb (%r8) # sched: [30:10.00]
+; BDVER2-NEXT: divw %si # sched: [25:10.00]
+; BDVER2-NEXT: divw (%r9) # sched: [30:10.00]
+; BDVER2-NEXT: divl %edx # sched: [25:10.00]
+; BDVER2-NEXT: divl (%rax) # sched: [30:10.00]
+; BDVER2-NEXT: divq %rcx # sched: [25:10.00]
+; BDVER2-NEXT: divq (%r10) # sched: [30:10.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_div:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_enter:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: enter $7, $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_enter:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_idiv:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: idivb %dil # sched: [25:10.00]
+; BDVER2-NEXT: idivb (%r8) # sched: [30:10.00]
+; BDVER2-NEXT: idivw %si # sched: [25:10.00]
+; BDVER2-NEXT: idivw (%r9) # sched: [30:10.00]
+; BDVER2-NEXT: idivl %edx # sched: [25:10.00]
+; BDVER2-NEXT: idivl (%rax) # sched: [30:10.00]
+; BDVER2-NEXT: idivq %rcx # sched: [25:10.00]
+; BDVER2-NEXT: idivq (%r10) # sched: [30:10.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_idiv:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_imul_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: imulb %dil # sched: [3:1.00]
+; BDVER2-NEXT: imulb (%rsi) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_imul_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_imul_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: imulw %di # sched: [4:1.33]
+; BDVER2-NEXT: imulw (%rsi) # sched: [9:1.33]
+; BDVER2-NEXT: imulw %dx, %di # sched: [3:1.00]
+; BDVER2-NEXT: imulw (%rsi), %di # sched: [8:1.00]
+; BDVER2-NEXT: imulw $511, %di, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [4:1.00]
+; BDVER2-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [8:1.00]
+; BDVER2-NEXT: imulw $7, %di, %di # sched: [4:1.00]
+; BDVER2-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_imul_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_imul_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: imull %edi # sched: [4:1.00]
+; BDVER2-NEXT: imull (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: imull %edx, %edi # sched: [3:1.00]
+; BDVER2-NEXT: imull (%rsi), %edi # sched: [8:1.00]
+; BDVER2-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [3:1.00]
+; BDVER2-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [8:1.00]
+; BDVER2-NEXT: imull $7, %edi, %edi # sched: [3:1.00]
+; BDVER2-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_imul_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_imul_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: imulq %rdi # sched: [4:1.00]
+; BDVER2-NEXT: imulq (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: imulq %rdx, %rdi # sched: [3:1.00]
+; BDVER2-NEXT: imulq (%rsi), %rdi # sched: [8:1.00]
+; BDVER2-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [3:1.00]
+; BDVER2-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [8:1.00]
+; BDVER2-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00]
+; BDVER2-NEXT: imulq $7, (%rsi), %rdi # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_imul_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_in:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: inb $7, %al # sched: [100:0.33]
+; BDVER2-NEXT: inw $7, %ax # sched: [100:0.33]
+; BDVER2-NEXT: inl $7, %eax # sched: [100:0.33]
+; BDVER2-NEXT: inb %dx, %al # sched: [100:0.33]
+; BDVER2-NEXT: inw %dx, %ax # sched: [100:0.33]
+; BDVER2-NEXT: inl %dx, %eax # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_in:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_inc8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: incb %dil # sched: [1:0.33]
+; BDVER2-NEXT: incb (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_inc8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_inc16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: incw %di # sched: [1:0.33]
+; BDVER2-NEXT: incw (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_inc16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_inc32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: incl %edi # sched: [1:0.33]
+; BDVER2-NEXT: incl (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_inc32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_inc64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: incq %rdi # sched: [1:0.33]
+; BDVER2-NEXT: incq (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_inc64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ins:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: insb %dx, %es:(%rdi) # sched: [100:0.33]
+; BDVER2-NEXT: insw %dx, %es:(%rdi) # sched: [100:0.33]
+; BDVER2-NEXT: insl %dx, %es:(%rdi) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ins:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_int:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: int $7 # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_int:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_invlpg_invlpga:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: invlpg (%rdi) # sched: [100:0.33]
+; BDVER2-NEXT: invlpga %rax, %ecx # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_invlpg_invlpga:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_jcc:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: JCCTGT:
+; BDVER2-NEXT: jo JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jno JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jb JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jb JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jb JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jae JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jae JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jae JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: je JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: je JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jne JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jne JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jbe JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jbe JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: ja JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: ja JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: js JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jns JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jp JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jp JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jnp JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jnp JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jl JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jl JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jge JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jge JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jle JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jle JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jg JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jg JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_jcc:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_jecxz_jrcxz:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: JXTGT:
+; BDVER2-NEXT: jecxz JXTGT # sched: [2:1.00]
+; BDVER2-NEXT: jrcxz JXTGT # sched: [2:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_jecxz_jrcxz:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lahf_sahf:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: lahf # sched: [1:0.50]
+; BDVER2-NEXT: sahf # sched: [1:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lahf_sahf:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_leave:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: leave # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_leave:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lods:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: lodsb (%rsi), %al # sched: [7:0.67]
+; BDVER2-NEXT: lodsw (%rsi), %ax # sched: [7:0.67]
+; BDVER2-NEXT: lodsl (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: lodsq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lods:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_loop:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: LTGT:
+; BDVER2-NEXT: loop LTGT # sched: [1:1.00]
+; BDVER2-NEXT: loope LTGT # sched: [1:1.00]
+; BDVER2-NEXT: loopne LTGT # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_loop:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movnti:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: movntil %edi, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movnti:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movs:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: movsb (%rsi), %es:(%rdi) # sched: [8:1.00]
+; BDVER2-NEXT: movsw (%rsi), %es:(%rdi) # sched: [8:1.00]
+; BDVER2-NEXT: movsl (%rsi), %es:(%rdi) # sched: [8:1.00]
+; BDVER2-NEXT: movsq (%rsi), %es:(%rdi) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: orq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movslq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: movslq %edi, %rax # sched: [1:0.33]
+; BDVER2-NEXT: movslq (%rsi), %rcx # sched: [5:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movslq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_mul:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: mulb %dil # sched: [3:1.00]
+; BDVER2-NEXT: mulb (%r8) # sched: [8:1.00]
+; BDVER2-NEXT: mulw %si # sched: [4:1.33]
+; BDVER2-NEXT: mulw (%r9) # sched: [9:1.33]
+; BDVER2-NEXT: mull %edx # sched: [4:1.00]
+; BDVER2-NEXT: mull (%rax) # sched: [9:1.00]
+; BDVER2-NEXT: mulq %rcx # sched: [4:1.00]
+; BDVER2-NEXT: mulq (%r10) # sched: [9:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_mul:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_neg:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: negb %dil # sched: [1:0.33]
+; BDVER2-NEXT: negb (%r8) # sched: [7:1.00]
+; BDVER2-NEXT: negw %si # sched: [1:0.33]
+; BDVER2-NEXT: negw (%r9) # sched: [7:1.00]
+; BDVER2-NEXT: negl %edx # sched: [1:0.33]
+; BDVER2-NEXT: negl (%rax) # sched: [7:1.00]
+; BDVER2-NEXT: negq %rcx # sched: [1:0.33]
+; BDVER2-NEXT: negq (%r10) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_neg:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_nop:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: nop # sched: [1:0.25]
+; BDVER2-NEXT: nopw %di # sched: [1:0.25]
+; BDVER2-NEXT: nopw (%rcx) # sched: [1:0.25]
+; BDVER2-NEXT: nopl %esi # sched: [1:0.25]
+; BDVER2-NEXT: nopl (%r8) # sched: [1:0.25]
+; BDVER2-NEXT: nopq %rdx # sched: [1:0.25]
+; BDVER2-NEXT: nopq (%r9) # sched: [1:0.25]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_nop:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_not:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: notb %dil # sched: [1:0.33]
+; BDVER2-NEXT: notb (%r8) # sched: [7:1.00]
+; BDVER2-NEXT: notw %si # sched: [1:0.33]
+; BDVER2-NEXT: notw (%r9) # sched: [7:1.00]
+; BDVER2-NEXT: notl %edx # sched: [1:0.33]
+; BDVER2-NEXT: notl (%rax) # sched: [7:1.00]
+; BDVER2-NEXT: notq %rcx # sched: [1:0.33]
+; BDVER2-NEXT: notq (%r10) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_not:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_or_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: orb $7, %al # sched: [1:0.33]
+; BDVER2-NEXT: orb $7, %dil # sched: [1:0.33]
+; BDVER2-NEXT: orb $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: orb %dl, %dil # sched: [1:0.33]
+; BDVER2-NEXT: orb %dil, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: orb (%rsi), %dil # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_or_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_or_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: orw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: orw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: orw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: orw $7, %di # sched: [1:0.33]
+; BDVER2-NEXT: orw $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: orw %dx, %di # sched: [1:0.33]
+; BDVER2-NEXT: orw %di, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: orw (%rsi), %di # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_or_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_or_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: orl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: orl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: orl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: orl $7, %edi # sched: [1:0.33]
+; BDVER2-NEXT: orl $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: orl %edx, %edi # sched: [1:0.33]
+; BDVER2-NEXT: orl %edi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: orl (%rsi), %edi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_or_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_or_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: orq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: orq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: orq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: orq $7, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: orq $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: orq %rdx, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: orq %rdi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: orq (%rsi), %rdi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_or_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_out:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: outb %al, $7 # sched: [100:0.33]
+; BDVER2-NEXT: outw %ax, $7 # sched: [100:0.33]
+; BDVER2-NEXT: outl %eax, $7 # sched: [100:0.33]
+; BDVER2-NEXT: outb %al, %dx # sched: [100:0.33]
+; BDVER2-NEXT: outw %ax, %dx # sched: [100:0.33]
+; BDVER2-NEXT: outl %eax, %dx # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_out:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_outs:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: outsb (%rsi), %dx # sched: [100:0.33]
+; BDVER2-NEXT: outsw (%rsi), %dx # sched: [100:0.33]
+; BDVER2-NEXT: outsl (%rsi), %dx # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_outs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pause:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: pause # sched: [4:1.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pause:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pop_push:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: popq %fs # sched: [100:0.33]
+; BDVER2-NEXT: popq %gs # sched: [100:0.33]
+; BDVER2-NEXT: pushq %fs # sched: [3:1.00]
+; BDVER2-NEXT: pushq %gs # sched: [5:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pop_push:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pop_push_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: popw %ax # sched: [6:0.50]
+; BDVER2-NEXT: popw (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: pushw %di # sched: [5:1.00]
+; BDVER2-NEXT: pushw (%rsi) # sched: [5:1.00]
+; BDVER2-NEXT: pushw $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [1:1.00]
+; BDVER2-NEXT: pushw $7 # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pop_push_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pop_push_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: popq %rax # sched: [6:0.50]
+; BDVER2-NEXT: popq (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: pushq %rdi # sched: [5:1.00]
+; BDVER2-NEXT: pushq (%rsi) # sched: [5:1.00]
+; BDVER2-NEXT: pushq $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [1:1.00]
+; BDVER2-NEXT: pushq $7 # sched: [5:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pop_push_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_popf_pushf:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: popfq # sched: [5:0.50]
+; BDVER2-NEXT: pushfq # sched: [5:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_popf_pushf:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rcl_rcr_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rclb %dil # sched: [2:1.50]
+; BDVER2-NEXT: rcrb %dil # sched: [2:1.50]
+; BDVER2-NEXT: rclb (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrb (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rclb $7, %dil # sched: [5:4.00]
+; BDVER2-NEXT: rcrb $7, %dil # sched: [5:4.00]
+; BDVER2-NEXT: rclb $7, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrb $7, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rclb %cl, %dil # sched: [5:4.00]
+; BDVER2-NEXT: rcrb %cl, %dil # sched: [5:4.00]
+; BDVER2-NEXT: rclb %cl, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrb %cl, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rcl_rcr_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rcl_rcr_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rclw %di # sched: [2:1.50]
+; BDVER2-NEXT: rcrw %di # sched: [2:1.50]
+; BDVER2-NEXT: rclw (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrw (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rclw $7, %di # sched: [5:4.00]
+; BDVER2-NEXT: rcrw $7, %di # sched: [5:4.00]
+; BDVER2-NEXT: rclw $7, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrw $7, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rclw %cl, %di # sched: [5:4.00]
+; BDVER2-NEXT: rcrw %cl, %di # sched: [5:4.00]
+; BDVER2-NEXT: rclw %cl, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrw %cl, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rcl_rcr_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rcl_rcr_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rcll %edi # sched: [2:1.50]
+; BDVER2-NEXT: rcrl %edi # sched: [2:1.50]
+; BDVER2-NEXT: rcll (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrl (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcll $7, %edi # sched: [5:4.00]
+; BDVER2-NEXT: rcrl $7, %edi # sched: [5:4.00]
+; BDVER2-NEXT: rcll $7, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrl $7, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcll %cl, %edi # sched: [5:4.00]
+; BDVER2-NEXT: rcrl %cl, %edi # sched: [5:4.00]
+; BDVER2-NEXT: rcll %cl, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrl %cl, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rcl_rcr_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rcl_rcr_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rclq %rdi # sched: [2:1.50]
+; BDVER2-NEXT: rcrq %rdi # sched: [2:1.50]
+; BDVER2-NEXT: rclq (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrq (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rclq $7, %rdi # sched: [5:4.00]
+; BDVER2-NEXT: rcrq $7, %rdi # sched: [5:4.00]
+; BDVER2-NEXT: rclq $7, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrq $7, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rclq %cl, %rdi # sched: [5:4.00]
+; BDVER2-NEXT: rcrq %cl, %rdi # sched: [5:4.00]
+; BDVER2-NEXT: rclq %cl, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrq %cl, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rcl_rcr_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rdmsr_wrmsr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rdmsr # sched: [100:0.33]
+; BDVER2-NEXT: wrmsr # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rdmsr_wrmsr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rdpmc:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rdpmc # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rdpmc:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rdtsc_rdtscp:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rdtsc # sched: [100:0.33]
+; BDVER2-NEXT: rdtscp # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rdtsc_rdtscp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ret:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+; BDVER2-NEXT: retq $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [6:1.00]
+; BDVER2-NEXT: lretl # sched: [6:1.00]
+; BDVER2-NEXT: lretl $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ret:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rol_ror_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rolb %dil # sched: [2:1.00]
+; BDVER2-NEXT: rorb %dil # sched: [2:1.00]
+; BDVER2-NEXT: rolb (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rorb (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rolb $7, %dil # sched: [2:1.00]
+; BDVER2-NEXT: rorb $7, %dil # sched: [2:1.00]
+; BDVER2-NEXT: rolb $7, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rorb $7, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rolb %cl, %dil # sched: [3:1.50]
+; BDVER2-NEXT: rorb %cl, %dil # sched: [3:1.50]
+; BDVER2-NEXT: rolb %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: rorb %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rol_ror_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rol_ror_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rolw %di # sched: [2:1.00]
+; BDVER2-NEXT: rorw %di # sched: [2:1.00]
+; BDVER2-NEXT: rolw (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rorw (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rolw $7, %di # sched: [2:1.00]
+; BDVER2-NEXT: rorw $7, %di # sched: [2:1.00]
+; BDVER2-NEXT: rolw $7, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rorw $7, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rolw %cl, %di # sched: [3:1.50]
+; BDVER2-NEXT: rorw %cl, %di # sched: [3:1.50]
+; BDVER2-NEXT: rolw %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: rorw %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rol_ror_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rol_ror_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: roll %edi # sched: [2:1.00]
+; BDVER2-NEXT: rorl %edi # sched: [2:1.00]
+; BDVER2-NEXT: roll (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rorl (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: roll $7, %edi # sched: [2:1.00]
+; BDVER2-NEXT: rorl $7, %edi # sched: [2:1.00]
+; BDVER2-NEXT: roll $7, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rorl $7, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: roll %cl, %edi # sched: [3:1.50]
+; BDVER2-NEXT: rorl %cl, %edi # sched: [3:1.50]
+; BDVER2-NEXT: roll %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: rorl %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rol_ror_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rol_ror_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rolq %rdi # sched: [2:1.00]
+; BDVER2-NEXT: rorq %rdi # sched: [2:1.00]
+; BDVER2-NEXT: rolq (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rorq (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rolq $7, %rdi # sched: [2:1.00]
+; BDVER2-NEXT: rorq $7, %rdi # sched: [2:1.00]
+; BDVER2-NEXT: rolq $7, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rorq $7, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rolq %cl, %rdi # sched: [3:1.50]
+; BDVER2-NEXT: rorq %cl, %rdi # sched: [3:1.50]
+; BDVER2-NEXT: rolq %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: rorq %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rol_ror_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sar_shl_shr_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: sarb %dil # sched: [1:0.50]
+; BDVER2-NEXT: shlb %dil # sched: [1:0.50]
+; BDVER2-NEXT: shrb %dil # sched: [1:0.50]
+; BDVER2-NEXT: sarb (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shlb (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shrb (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: sarb $7, %dil # sched: [1:0.50]
+; BDVER2-NEXT: shlb $7, %dil # sched: [1:0.50]
+; BDVER2-NEXT: shrb $7, %dil # sched: [1:0.50]
+; BDVER2-NEXT: sarb $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shlb $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shrb $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: sarb %cl, %dil # sched: [3:1.50]
+; BDVER2-NEXT: shlb %cl, %dil # sched: [3:1.50]
+; BDVER2-NEXT: shrb %cl, %dil # sched: [3:1.50]
+; BDVER2-NEXT: sarb %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: shlb %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: shrb %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sar_shl_shr_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sar_shl_shr_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: sarw %di # sched: [1:0.50]
+; BDVER2-NEXT: shlw %di # sched: [1:0.50]
+; BDVER2-NEXT: shrw %di # sched: [1:0.50]
+; BDVER2-NEXT: sarw (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shlw (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shrw (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: sarw $7, %di # sched: [1:0.50]
+; BDVER2-NEXT: shlw $7, %di # sched: [1:0.50]
+; BDVER2-NEXT: shrw $7, %di # sched: [1:0.50]
+; BDVER2-NEXT: sarw $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shlw $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shrw $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: sarw %cl, %di # sched: [3:1.50]
+; BDVER2-NEXT: shlw %cl, %di # sched: [3:1.50]
+; BDVER2-NEXT: shrw %cl, %di # sched: [3:1.50]
+; BDVER2-NEXT: sarw %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: shlw %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: shrw %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sar_shl_shr_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sar_shl_shr_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: sarl %edi # sched: [1:0.50]
+; BDVER2-NEXT: shll %edi # sched: [1:0.50]
+; BDVER2-NEXT: shrl %edi # sched: [1:0.50]
+; BDVER2-NEXT: sarl (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shll (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shrl (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: sarl $7, %edi # sched: [1:0.50]
+; BDVER2-NEXT: shll $7, %edi # sched: [1:0.50]
+; BDVER2-NEXT: shrl $7, %edi # sched: [1:0.50]
+; BDVER2-NEXT: sarl $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shll $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shrl $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: sarl %cl, %edi # sched: [3:1.50]
+; BDVER2-NEXT: shll %cl, %edi # sched: [3:1.50]
+; BDVER2-NEXT: shrl %cl, %edi # sched: [3:1.50]
+; BDVER2-NEXT: sarl %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: shll %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: shrl %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sar_shl_shr_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sar_shl_shr_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: sarq %rdi # sched: [1:0.50]
+; BDVER2-NEXT: shlq %rdi # sched: [1:0.50]
+; BDVER2-NEXT: shrq %rdi # sched: [1:0.50]
+; BDVER2-NEXT: sarq (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shlq (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shrq (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: sarq $7, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: shlq $7, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: shrq $7, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: sarq $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shlq $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shrq $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: sarq %cl, %rdi # sched: [3:1.50]
+; BDVER2-NEXT: shlq %cl, %rdi # sched: [3:1.50]
+; BDVER2-NEXT: shrq %cl, %rdi # sched: [3:1.50]
+; BDVER2-NEXT: sarq %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: shlq %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: shrq %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sar_shl_shr_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sbb_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: sbbb $7, %al # sched: [2:0.67]
+; BDVER2-NEXT: sbbb $7, %dil # sched: [2:0.67]
+; BDVER2-NEXT: sbbb $7, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: sbbb %dl, %dil # sched: [2:0.67]
+; BDVER2-NEXT: sbbb %dil, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: sbbb (%rsi), %dil # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sbb_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sbb_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: sbbw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: sbbw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: sbbw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [9:1.00]
+; BDVER2-NEXT: sbbw $7, %di # sched: [2:0.67]
+; BDVER2-NEXT: sbbw $7, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: sbbw %dx, %di # sched: [2:0.67]
+; BDVER2-NEXT: sbbw %di, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: sbbw (%rsi), %di # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sbb_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sbb_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: sbbl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: sbbl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [9:1.00]
+; BDVER2-NEXT: sbbl $7, %edi # sched: [2:0.67]
+; BDVER2-NEXT: sbbl $7, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: sbbl %edx, %edi # sched: [2:0.67]
+; BDVER2-NEXT: sbbl %edi, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: sbbl (%rsi), %edi # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sbb_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sbb_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: sbbq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: sbbq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [9:1.00]
+; BDVER2-NEXT: sbbq $7, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: sbbq $7, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: sbbq %rdx, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: sbbq %rdi, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: sbbq (%rsi), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sbb_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_scas:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: scasb %es:(%rdi), %al # sched: [2:0.67]
+; BDVER2-NEXT: scasw %es:(%rdi), %ax # sched: [2:0.67]
+; BDVER2-NEXT: scasl %es:(%rdi), %eax # sched: [2:0.67]
+; BDVER2-NEXT: scasq %es:(%rdi), %rax # sched: [2:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_scas:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_setcc:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: seto %dil # sched: [1:0.50]
+; BDVER2-NEXT: setno %dil # sched: [1:0.50]
+; BDVER2-NEXT: setb %dil # sched: [1:0.50]
+; BDVER2-NEXT: setae %dil # sched: [1:0.50]
+; BDVER2-NEXT: sete %dil # sched: [1:0.50]
+; BDVER2-NEXT: setne %dil # sched: [1:0.50]
+; BDVER2-NEXT: setbe %dil # sched: [2:1.00]
+; BDVER2-NEXT: seta %dil # sched: [2:1.00]
+; BDVER2-NEXT: sets %dil # sched: [1:0.50]
+; BDVER2-NEXT: setns %dil # sched: [1:0.50]
+; BDVER2-NEXT: setp %dil # sched: [1:0.50]
+; BDVER2-NEXT: setnp %dil # sched: [1:0.50]
+; BDVER2-NEXT: setl %dil # sched: [1:0.50]
+; BDVER2-NEXT: setge %dil # sched: [1:0.50]
+; BDVER2-NEXT: setle %dil # sched: [1:0.50]
+; BDVER2-NEXT: setg %dil # sched: [1:0.50]
+; BDVER2-NEXT: seto (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setno (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setb (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setae (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: sete (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setne (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setbe (%rsi) # sched: [3:1.00]
+; BDVER2-NEXT: seta (%rsi) # sched: [3:1.00]
+; BDVER2-NEXT: sets (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setns (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setp (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setnp (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setl (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setge (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setle (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setg (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_setcc:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_shld_shrd_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: shldw %cl, %si, %di # sched: [4:1.50]
+; BDVER2-NEXT: shrdw %cl, %si, %di # sched: [4:1.50]
+; BDVER2-NEXT: shldw %cl, %si, (%rdx) # sched: [10:1.50]
+; BDVER2-NEXT: shrdw %cl, %si, (%rdx) # sched: [10:1.50]
+; BDVER2-NEXT: shldw $7, %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: shrdw $7, %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: shldw $7, %si, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: shrdw $7, %si, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_shld_shrd_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_shld_shrd_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: shldl %cl, %esi, %edi # sched: [4:1.50]
+; BDVER2-NEXT: shrdl %cl, %esi, %edi # sched: [4:1.50]
+; BDVER2-NEXT: shldl %cl, %esi, (%rdx) # sched: [10:1.50]
+; BDVER2-NEXT: shrdl %cl, %esi, (%rdx) # sched: [10:1.50]
+; BDVER2-NEXT: shldl $7, %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: shrdl $7, %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: shldl $7, %esi, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: shrdl $7, %esi, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_shld_shrd_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_shld_shrd_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: shldq %cl, %rsi, %rdi # sched: [4:1.50]
+; BDVER2-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:1.50]
+; BDVER2-NEXT: shldq %cl, %rsi, (%rdx) # sched: [10:1.50]
+; BDVER2-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [10:1.50]
+; BDVER2-NEXT: shldq $7, %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: shrdq $7, %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: shldq $7, %rsi, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: shrdq $7, %rsi, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_shld_shrd_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_stc_std:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: stc # sched: [1:0.33]
+; BDVER2-NEXT: std # sched: [1:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_stc_std:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_stos:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: stosb %al, %es:(%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: stosw %ax, %es:(%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: stosl %eax, %es:(%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: stosq %rax, %es:(%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_stos:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sub_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: subb $7, %al # sched: [1:0.33]
+; BDVER2-NEXT: subb $7, %dil # sched: [1:0.33]
+; BDVER2-NEXT: subb $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: subb %dl, %dil # sched: [1:0.33]
+; BDVER2-NEXT: subb %dil, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: subb (%rsi), %dil # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sub_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sub_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: subw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: subw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: subw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: subw $7, %di # sched: [1:0.33]
+; BDVER2-NEXT: subw $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: subw %dx, %di # sched: [1:0.33]
+; BDVER2-NEXT: subw %di, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: subw (%rsi), %di # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sub_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sub_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: subl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: subl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: subl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: subl $7, %edi # sched: [1:0.33]
+; BDVER2-NEXT: subl $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: subl %edx, %edi # sched: [1:0.33]
+; BDVER2-NEXT: subl %edi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: subl (%rsi), %edi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sub_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sub_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: subq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: subq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: subq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: subq $7, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: subq $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: subq %rdx, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: subq %rdi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: subq (%rsi), %rdi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sub_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_test_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: testb $7, %al # sched: [1:0.33]
+; BDVER2-NEXT: testb $7, %dil # sched: [1:0.33]
+; BDVER2-NEXT: testb $7, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: testb %dil, %dil # sched: [1:0.33]
+; BDVER2-NEXT: testb %dil, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_test_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_test_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: testw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: testw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: testw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [6:0.50]
+; BDVER2-NEXT: testw %di, %di # sched: [1:0.33]
+; BDVER2-NEXT: testw %di, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_test_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_test_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: testl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: testl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: testl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [6:0.50]
+; BDVER2-NEXT: testl %edi, %edi # sched: [1:0.33]
+; BDVER2-NEXT: testl %edi, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_test_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_test_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: testq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: testq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: testq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [6:0.50]
+; BDVER2-NEXT: testq %rdi, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: testq %rdi, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_test_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ud2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: ud2 # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ud2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xadd_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xaddb %dil, %sil # sched: [2:1.00]
+; BDVER2-NEXT: xaddb %dil, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xadd_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xadd_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xaddw %di, %si # sched: [2:1.00]
+; BDVER2-NEXT: xaddw %di, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xadd_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xadd_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xaddl %edi, %esi # sched: [2:1.00]
+; BDVER2-NEXT: xaddl %edi, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xadd_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xadd_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xaddq %rdi, %rsi # sched: [2:1.00]
+; BDVER2-NEXT: xaddq %rdi, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xadd_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xchg_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xchgb %sil, %dil # sched: [2:1.00]
+; BDVER2-NEXT: xchgb %dil, (%rdx) # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xchg_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xchg_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xchgw %di, %ax # sched: [2:1.00]
+; BDVER2-NEXT: xchgw %si, %di # sched: [2:1.00]
+; BDVER2-NEXT: xchgw %di, (%rdx) # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xchg_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xchg_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xchgl %edi, %eax # sched: [2:1.00]
+; BDVER2-NEXT: xchgl %esi, %edi # sched: [2:1.00]
+; BDVER2-NEXT: xchgl %edi, (%rdx) # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xchg_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xchg_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xchgq %rdi, %rax # sched: [2:1.00]
+; BDVER2-NEXT: xchgq %rsi, %rdi # sched: [2:1.00]
+; BDVER2-NEXT: xchgq %rdi, (%rdx) # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xchg_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xlat:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xlatb # sched: [5:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xlat:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xor_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xorb $7, %al # sched: [1:0.33]
+; BDVER2-NEXT: xorb $7, %dil # sched: [1:0.33]
+; BDVER2-NEXT: xorb $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: xorb %dl, %dil # sched: [1:0.33]
+; BDVER2-NEXT: xorb %dil, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: xorb (%rsi), %dil # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xor_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xor_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xorw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: xorw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: xorw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: xorw $7, %di # sched: [1:0.33]
+; BDVER2-NEXT: xorw $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: xorw %dx, %di # sched: [1:0.33]
+; BDVER2-NEXT: xorw %di, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: xorw (%rsi), %di # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xor_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xor_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xorl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: xorl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: xorl $7, %edi # sched: [1:0.33]
+; BDVER2-NEXT: xorl $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: xorl %edx, %edi # sched: [1:0.33]
+; BDVER2-NEXT: xorl %edi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: xorl (%rsi), %edi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xor_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xor_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xorq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: xorq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: xorq $7, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: xorq $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: xorq %rdx, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: xorq %rdi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: xorq (%rsi), %rdi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xor_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s --check-prefix=CORE2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck %s --check-prefix=NEHALEM
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 | FileCheck %s --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
define void @copy16bytes(i8* nocapture %a, i8* nocapture readonly %b) {
+; CORE2-LABEL: copy16bytes:
+; CORE2: ## %bb.0:
+; CORE2-NEXT: movq (%rsi), %rax
+; CORE2-NEXT: movq 8(%rsi), %rcx
+; CORE2-NEXT: movq %rcx, 8(%rdi)
+; CORE2-NEXT: movq %rax, (%rdi)
+; CORE2-NEXT: retq
+;
+; NEHALEM-LABEL: copy16bytes:
+; NEHALEM: ## %bb.0:
+; NEHALEM-NEXT: movups (%rsi), %xmm0
+; NEHALEM-NEXT: movups %xmm0, (%rdi)
+; NEHALEM-NEXT: retq
+;
+; BDVER2-LABEL: copy16bytes:
+; BDVER2: ## %bb.0:
+; BDVER2-NEXT: movups (%rsi), %xmm0
+; BDVER2-NEXT: movups %xmm0, (%rdi)
+; BDVER2-NEXT: retq
+;
+; BTVER2-LABEL: copy16bytes:
+; BTVER2: ## %bb.0:
+; BTVER2-NEXT: vmovups (%rsi), %xmm0
+; BTVER2-NEXT: vmovups %xmm0, (%rdi)
+; BTVER2-NEXT: retq
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i1 false)
ret void
; CHECK-LABEL: copy16bytes
- ; CORE2: movq
- ; CORE2-NEXT: movq
- ; CORE2-NEXT: movq
- ; CORE2-NEXT: movq
- ; CORE2-NEXT: retq
- ; NEHALEM: movups
- ; NEHALEM-NEXT: movups
- ; NEHALEM-NEXT: retq
- ; BTVER2: movups
- ; BTVER2-NEXT: movups
- ; BTVER2-NEXT: retq
}
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_addps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_addps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_addps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_addss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_addss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_addss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_andps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_andps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_andps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_andnotps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_andnotps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_andnotps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cmpps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cmpps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cmpps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [2:1.00]
; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cmpss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cmpss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cmpss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_comiss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_comiss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-NEXT: sete %cl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-NEXT: sete %dl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %dl # sched: [1:0.33]
+; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.33]
+; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_comiss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtsi2ss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:2.00]
+; BDVER2-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00]
+; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtsi2ss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00]
+; BDVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtsi2ss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [14:1.00]
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtsi2ssq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
+; BDVER2-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00]
+; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtsi2ssq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
+; BDVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtsi2ssq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [14:1.00]
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtss2si:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00]
+; BDVER2-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtss2si:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00]
+; BDVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [10:1.00]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtss2si:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [12:1.00]
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtss2siq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00]
+; BDVER2-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtss2siq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00]
+; BDVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [10:1.00]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtss2siq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [12:1.00]
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvttss2si:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00]
+; BDVER2-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvttss2si:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00]
+; BDVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [10:1.00]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvttss2si:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [12:1.00]
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvttss2siq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00]
+; BDVER2-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvttss2siq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00]
+; BDVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [10:1.00]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvttss2siq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00]
; SKX-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:5.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_divps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: divps %xmm1, %xmm0 # sched: [14:14.00]
+; BDVER2-SSE-NEXT: divps (%rdi), %xmm0 # sched: [20:14.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_divps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:14.00]
+; BDVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:14.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_divps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: divps %xmm1, %xmm0 # sched: [19:19.00]
; SKX-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:3.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_divss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: divss %xmm1, %xmm0 # sched: [14:14.00]
+; BDVER2-SSE-NEXT: divss (%rdi), %xmm0 # sched: [20:14.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_divss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:14.00]
+; BDVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:14.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_divss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: divss %xmm1, %xmm0 # sched: [19:19.00]
; SKX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_ldmxcsr:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_ldmxcsr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; BDVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_ldmxcsr:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_maxps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_maxps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_maxps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_maxss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_maxss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_maxss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_minps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_minps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_minps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: minps %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_minss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_minss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_minss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: minss %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movaps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movaps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movaps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [5:1.00]
; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movhlps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movhlps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movhlps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movhps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movhps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movhps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movlhps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movlhps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movlhps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movlps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movlps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movlps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; SKX-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movmskps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movmskps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movmskps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00]
; SKX-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movntps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movntps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movntps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [3:1.00]
; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movss_mem:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movss_mem:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movss_mem:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movss_reg:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movss_reg:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movss_reg:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movups:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movups:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movups:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movups (%rdi), %xmm0 # sched: [5:1.00]
; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_mulps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_mulps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_mulps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_mulss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_mulss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_mulss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_orps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_orps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_orps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_prefetch:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: #APP
+; BDVER2-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
+; BDVER2-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
+; BDVER2-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
+; BDVER2-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
+; BDVER2-SSE-NEXT: #NO_APP
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_prefetch:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
+; BDVER2-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
+; BDVER2-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
+; BDVER2-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_prefetch:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: #APP
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_rcpps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_rcpps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_rcpps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [2:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_rcpss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_rcpss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_rcpss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_rsqrtps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_rsqrtps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_rsqrtps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [2:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_rsqrtss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_rsqrtss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_rsqrtss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
; SKX-NEXT: sfence # sched: [2:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_sfence:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: sfence # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_sfence:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: sfence # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_sfence:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: sfence # sched: [1:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_shufps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; BDVER2-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_shufps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; BDVER2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_shufps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_sqrtps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [14:14.00]
+; BDVER2-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [20:14.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_sqrtps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:14.00]
+; BDVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:14.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_sqrtps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [21:21.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_sqrtss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [14:14.00]
+; BDVER2-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [14:14.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_sqrtss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00]
+; BDVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
+; BDVER2-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:14.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_sqrtss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:1.00]
; SKX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_stmxcsr:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; BDVER2-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_stmxcsr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; BDVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_stmxcsr:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_subps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_subps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_subps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_subss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_subss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_subss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_ucomiss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_ucomiss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-NEXT: sete %cl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-NEXT: sete %dl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %dl # sched: [1:0.33]
+; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.33]
+; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_ucomiss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_unpckhps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; BDVER2-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_unpckhps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; BDVER2-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_unpckhps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_unpcklps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; BDVER2-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_unpcklps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; BDVER2-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_unpcklps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
; SKX-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_xorps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_xorps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_xorps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_fnop:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: #APP
+; BDVER2-SSE-NEXT: nop # sched: [1:0.25]
+; BDVER2-SSE-NEXT: #NO_APP
+; BDVER2-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [0:0.25]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_fnop:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: nop # sched: [1:0.25]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [0:0.25]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_fnop:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [0:0.50]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx,+xop -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_addpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_addpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_addpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_addsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_addsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_addsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_andpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_andpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_andpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_andnotpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_andnotpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_andnotpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: clflush (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_clflush:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: clflush (%rdi) # sched: [5:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_clflush:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: clflush (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_clflush:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: clflush (%rdi) # sched: [5:1.00]
; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cmppd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cmppd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cmppd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [2:1.00]
; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cmpsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cmpsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cmpsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_comisd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_comisd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-NEXT: sete %cl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-NEXT: sete %dl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %dl # sched: [1:0.33]
+; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.33]
+; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_comisd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtdq2pd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtdq2pd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtdq2pd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [3:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtdq2ps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtdq2ps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtdq2ps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtpd2dq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtpd2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [10:1.00]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtpd2dq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [3:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtpd2ps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtpd2ps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtpd2ps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [3:1.00]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtps2dq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtps2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtps2dq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtps2pd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtps2pd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtps2pd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtsd2si:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00]
+; BDVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtsd2si:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtsd2si %xmm0, %ecx # sched: [5:1.00]
+; BDVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [10:1.00]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtsd2si:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [12:1.00]
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtsd2siq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00]
+; BDVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtsd2siq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00]
+; BDVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [10:1.00]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtsd2siq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [12:1.00]
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtsd2ss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtsd2ss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
+; BDVER2-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
+; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtsd2ss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [7:2.00]
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtsi2sd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtsi2sd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
+; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtsi2sd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [14:1.00]
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtsi2sdq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtsi2sdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
+; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtsi2sdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [14:1.00]
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtss2sd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtss2sd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
+; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtss2sd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [7:2.00]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvttpd2dq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvttpd2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [10:1.00]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvttpd2dq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [3:1.00]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvttps2dq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvttps2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvttps2dq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvttsd2si:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00]
+; BDVER2-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvttsd2si:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00]
+; BDVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [10:1.00]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvttsd2si:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [12:1.00]
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvttsd2siq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00]
+; BDVER2-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvttsd2siq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00]
+; BDVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [10:1.00]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvttsd2siq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [12:1.00]
; SKX-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:4.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_divpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [22:22.00]
+; BDVER2-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [28:22.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_divpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:22.00]
+; BDVER2-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:22.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_divpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [19:19.00]
; SKX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:4.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_divsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [22:22.00]
+; BDVER2-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [28:22.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_divsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:22.00]
+; BDVER2-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:22.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_divsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [19:19.00]
; SKX-NEXT: lfence # sched: [2:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_lfence:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: lfence # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_lfence:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: lfence # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_lfence:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: lfence # sched: [1:1.00]
; SKX-NEXT: mfence # sched: [3:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_mfence:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: mfence # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_mfence:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: mfence # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_mfence:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: mfence # sched: [1:1.00]
; SKX-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_maskmovdqu:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_maskmovdqu:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_maskmovdqu:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; SKX-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_maxpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_maxpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_maxpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_maxsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_maxsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_maxsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_minpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_minpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_minpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_minsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_minsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_minsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movapd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movapd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movapd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:1.00]
; SKX-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movdqa:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movdqa:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movdqa:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:1.00]
; SKX-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movdqu:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movdqu:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movdqu:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:1.00]
; SKX-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00]
+; BDVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
+; BDVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
+; BDVER2-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
; SKX-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movd_64:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00]
+; BDVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movd_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
+; BDVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vmovq %xmm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movd_64:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movhpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movhpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movhpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movlpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movlpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movlpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; SKX-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movmskpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movmskpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movmskpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00]
; SKX-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movntdqa:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movntdqa:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movntdqa:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movntpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movntpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movntpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movq_mem:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movq_mem:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movq_mem:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
; SKX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movq_reg:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movq_reg:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; BDVER2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movq_reg:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
; SKX-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movsd_mem:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movsd_mem:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
+; BDVER2-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movsd_mem:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movsd_reg:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movsd_reg:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movsd_reg:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50]
; SKX-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movupd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movupd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movupd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:1.00]
; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_mulpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_mulpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_mulpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:2.00]
; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_mulsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_mulsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_mulsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:2.00]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_orpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_orpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_orpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_packssdw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_packssdw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_packssdw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_packsswb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_packsswb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_packsswb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_packuswb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_packuswb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_packuswb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_paddb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_paddb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_paddb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_paddd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_paddd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_paddd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_paddq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_paddq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_paddq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_paddsb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_paddsb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_paddsb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_paddsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_paddsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_paddsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_paddusb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_paddusb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_paddusb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_paddusw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_paddusw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_paddusw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_paddw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_paddw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_paddw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pand:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pand:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pand:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pandn:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pandn:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pandn:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pavgb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pavgb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pavgb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pavgw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pavgw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pavgw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpeqb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpeqb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcomeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-NEXT: vpcomeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpeqb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpeqd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpeqd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcomeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-NEXT: vpcomeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpeqd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpeqw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpeqw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcomeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-NEXT: vpcomeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpeqw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpgtb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpgtb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcomgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-NEXT: vpcomgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpgtb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpgtd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpgtd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcomgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-NEXT: vpcomeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpgtd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpgtw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpgtw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcomgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-NEXT: vpcomgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpgtw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pextrw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pextrw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pextrw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00]
; SKX-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pinsrw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pinsrw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pinsrw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [7:0.50]
; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmaddwd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmaddwd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmaddwd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmaxsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmaxsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmaxsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmaxub:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmaxub:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmaxub:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pminsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pminsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pminsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pminub:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pminub:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pminub:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovmskb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovmskb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovmskb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00]
; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmulhuw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmulhuw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmulhuw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmulhw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmulhw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmulhw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmullw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmullw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmullw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmuludq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmuludq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmuludq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_por:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_por:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_por:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psadbw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psadbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psadbw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [2:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pshufd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
+; BDVER2-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pshufd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50]
+; BDVER2-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pshufd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pshufhw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
+; BDVER2-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pshufhw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
+; BDVER2-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
+; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pshufhw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pshuflw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
+; BDVER2-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pshuflw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
+; BDVER2-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
+; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pshuflw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
; SKX-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pslld:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pslld:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pslld:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pslldq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pslldq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pslldq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
; SKX-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psllq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psllq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psllq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psllw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psllw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psllw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psrad:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psrad:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psrad:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psraw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psraw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psraw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psrld:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psrld:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psrld:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psrldq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psrldq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psrldq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
; SKX-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psrlq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psrlq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psrlq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psrlw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psrlw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psrlw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psubb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psubb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psubb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psubd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psubd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psubd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psubq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psubq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psubq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psubsb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psubsb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psubsb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psubsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psubsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psubsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psubusb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psubusb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psubusb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psubusw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psubusw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psubusw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psubw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psubw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psubw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_punpckhbw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
+; BDVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_punpckhbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
+; BDVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_punpckhbw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_punpckhdq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BDVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_punpckhdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BDVER2-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_punpckhdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_punpckhqdq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; BDVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_punpckhqdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; BDVER2-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_punpckhqdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_punpckhwd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; BDVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_punpckhwd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; BDVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_punpckhwd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_punpcklbw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; BDVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_punpcklbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; BDVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_punpcklbw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_punpckldq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; BDVER2-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_punpckldq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; BDVER2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_punpckldq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_punpcklqdq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; BDVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_punpcklqdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; BDVER2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_punpcklqdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_punpcklwd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BDVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_punpcklwd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BDVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_punpcklwd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pxor:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pxor:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pxor:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_shufpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; BDVER2-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_shufpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; BDVER2-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_shufpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_sqrtpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:21.00]
+; BDVER2-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:21.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_sqrtpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:21.00]
+; BDVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:21.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_sqrtpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [27:27.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_sqrtsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [21:21.00]
+; BDVER2-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [21:21.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_sqrtsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:21.00]
+; BDVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50]
+; BDVER2-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:21.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_sqrtsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:1.00]
; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_subpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_subpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_subpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_subsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_subsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_subsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_ucomisd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_ucomisd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-NEXT: sete %cl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-NEXT: sete %dl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %dl # sched: [1:0.33]
+; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.33]
+; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_ucomisd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_unpckhpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; BDVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_unpckhpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_unpckhpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_unpcklpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
+; BDVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_unpcklpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00]
+; BDVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_unpcklpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_xorpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_xorpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_xorpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+sse3 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+sse3 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_addsubpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_addsubpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_addsubpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_addsubps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_addsubps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_addsubps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_haddpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
+; BDVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_haddpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
+; BDVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_haddpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_haddps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
+; BDVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_haddps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
+; BDVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_haddps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_hsubpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
+; BDVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_hsubpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
+; BDVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_hsubpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_hsubps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
+; BDVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_hsubps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
+; BDVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_hsubps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_lddqu:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_lddqu:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_lddqu:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:1.00]
; SKX-NEXT: monitor # sched: [100:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_monitor:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.33]
+; BDVER2-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
+; BDVER2-SSE-NEXT: monitor # sched: [100:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_monitor:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl %esi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
+; BDVER2-NEXT: monitor # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_monitor:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.50]
; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movddup:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
+; BDVER2-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
+; BDVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movddup:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
+; BDVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50]
+; BDVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movddup:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movshdup:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
+; BDVER2-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movshdup:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
+; BDVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movshdup:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movsldup:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
+; BDVER2-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movsldup:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
+; BDVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movsldup:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:0.50]
; SKX-NEXT: mwait # sched: [20:2.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_mwait:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.33]
+; BDVER2-SSE-NEXT: mwait # sched: [100:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_mwait:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl %esi, %eax # sched: [1:0.33]
+; BDVER2-NEXT: movl %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: mwait # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_mwait:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.50]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx,+xop -mattr=+sse4.1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_blendpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50]
+; BDVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_blendpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50]
+; BDVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_blendpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_blendps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
+; BDVER2-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_blendps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
+; BDVER2-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_blendps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
; SKX-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_blendvpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_blendvpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_blendvpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.50]
; SKX-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_blendvps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_blendvps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_blendvps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50]
; SKX-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_dppd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_dppd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_dppd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:3.00]
; SKX-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_dpps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00]
+; BDVER2-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [18:2.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_dpps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00]
+; BDVER2-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [18:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_dpps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [11:3.00]
; SKX-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_extractps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_extractps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_extractps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_insertps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; BDVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_insertps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; BDVER2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_insertps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
; SKX-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movntdqa:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movntdqa:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movntdqa:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:1.00]
; SKX-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_mpsadbw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_mpsadbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_mpsadbw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [3:2.00]
; SKX-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_packusdw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_packusdw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_packusdw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pblendvb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pblendvb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pblendvb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.50]
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pblendw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
+; BDVER2-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pblendw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
+; BDVER2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50]
+; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pblendw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpeqq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpeqq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcomeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpcomeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpeqq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pextrb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pextrb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pextrb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pextrd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pextrd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pextrd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pextrq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pextrq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
+; BDVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pextrq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pextrw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pextrw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pextrw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_phminposuw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_phminposuw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_phminposuw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [7:1.00]
; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pinsrb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pinsrb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pinsrb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [7:0.50]
; SKX-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pinsrd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pinsrd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pinsrd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [7:0.50]
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pinsrq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pinsrq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pinsrq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [7:0.50]
; SKX-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmaxsb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmaxsb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmaxsb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmaxsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmaxsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmaxsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmaxud:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmaxud:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmaxud:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmaxuw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmaxuw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmaxuw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pminsb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pminsb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pminsb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pminsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pminsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pminsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pminud:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pminud:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pminud:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pminuw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pminuw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pminuw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovsxbw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovsxbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovsxbw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovsxbd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovsxbd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovsxbd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovsxbq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovsxbq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovsxbq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovsxdq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovsxdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovsxdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovsxwd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovsxwd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovsxwd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovsxwq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovsxwq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovsxwq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovzxbw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovzxbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
+; BDVER2-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
+; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovzxbw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovzxbd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovzxbd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
+; BDVER2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovzxbd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovzxbq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovzxbq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
+; BDVER2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovzxbq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovzxdq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovzxdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
+; BDVER2-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovzxdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovzxwd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovzxwd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
+; BDVER2-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovzxwd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovzxwq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovzxwq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
+; BDVER2-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovzxwq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmuldq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmuldq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [11:1.00]
+; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmuldq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [7:1.00]
; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmulld:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmulld:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmulld:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [4:2.00]
; SKX-NEXT: movzbl %cl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_ptest:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: setb %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: setb %cl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_ptest:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: setb %al # sched: [1:0.50]
+; BDVER2-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: setb %cl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-NEXT: movzbl %cl, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_ptest:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_roundpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_roundpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_roundpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_roundps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_roundps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [9:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_roundps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_roundsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_roundsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_roundsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_roundss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_roundss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_roundss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.2,+pclmul -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx,+xop -mattr=+sse4.2,+pclmul -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: crc32_32_8:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: crc32_32_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.33]
+; BDVER2-NEXT: crc32b %sil, %eax # sched: [3:1.00]
+; BDVER2-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: crc32_32_8:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
; SKX-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: crc32_32_16:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: crc32_32_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.33]
+; BDVER2-NEXT: crc32w %si, %eax # sched: [3:1.00]
+; BDVER2-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: crc32_32_16:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
; SKX-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: crc32_32_32:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: crc32_32_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.33]
+; BDVER2-NEXT: crc32l %esi, %eax # sched: [3:1.00]
+; BDVER2-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: crc32_32_32:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: crc32_64_8:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: crc32_64_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER2-NEXT: crc32b %sil, %eax # sched: [3:1.00]
+; BDVER2-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: crc32_64_8:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50]
; SKX-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: crc32_64_64:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: crc32_64_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER2-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
+; BDVER2-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: crc32_64_64:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50]
; SKX-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpestri:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67]
+; BDVER2-SSE-NEXT: movl %ecx, %esi # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
+; BDVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; BDVER2-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpestri:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl $7, %eax # sched: [1:0.33]
+; BDVER2-NEXT: movl $7, %edx # sched: [1:0.33]
+; BDVER2-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67]
+; BDVER2-NEXT: movl %ecx, %esi # sched: [1:0.33]
+; BDVER2-NEXT: movl $7, %eax # sched: [1:0.33]
+; BDVER2-NEXT: movl $7, %edx # sched: [1:0.33]
+; BDVER2-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
+; BDVER2-NEXT: # kill: def $ecx killed $ecx def $rcx
+; BDVER2-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpestri:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
; SKX-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpestrm:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67]
+; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpestrm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl $7, %eax # sched: [1:0.33]
+; BDVER2-NEXT: movl $7, %edx # sched: [1:0.33]
+; BDVER2-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67]
+; BDVER2-NEXT: movl $7, %eax # sched: [1:0.33]
+; BDVER2-NEXT: movl $7, %edx # sched: [1:0.33]
+; BDVER2-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpestrm:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
; SKX-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpistri:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
+; BDVER2-SSE-NEXT: movl %ecx, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
+; BDVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; BDVER2-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpistri:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
+; BDVER2-NEXT: movl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
+; BDVER2-NEXT: # kill: def $ecx killed $ecx def $rcx
+; BDVER2-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpistri:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [7:2.00]
; SKX-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpistrm:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
+; BDVER2-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpistrm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
+; BDVER2-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpistrm:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [8:2.00]
; SKX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpgtq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpgtq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcomgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpcomgtq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpgtq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pclmulqdq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [14:6.00]
+; BDVER2-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [14:5.67]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pclmulqdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [14:6.00]
+; BDVER2-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [14:5.67]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pclmulqdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [2:1.00]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4a | FileCheck %s --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4a | FileCheck %s --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=ZNVER1
; GENERIC-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_extrq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_extrq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_extrqi:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_extrqi:
; BTVER2: # %bb.0:
; BTVER2-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: insertq %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_insertq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: insertq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_insertq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: insertq %xmm1, %xmm0 # sched: [2:2.00]
; GENERIC-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_insertqi:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_insertqi:
; BTVER2: # %bb.0:
; BTVER2-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [2:2.00]
; GENERIC-NEXT: movntsd %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_movntsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movntsd %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movntsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movntsd %xmm0, (%rdi) # sched: [3:1.00]
; GENERIC-NEXT: movntss %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_movntss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movntss %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movntss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movntss %xmm0, (%rdi) # sched: [3:1.00]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+ssse3 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pabsb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pabsb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pabsb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50]
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pabsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pabsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pabsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50]
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pabsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pabsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pabsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50]
; SKX-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_palignr:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
+; BDVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
+; BDVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_palignr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
+; BDVER2-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_palignr:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
; SKX-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_phaddd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50]
+; BDVER2-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_phaddd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER2-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_phaddd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_phaddsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:1.50]
+; BDVER2-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_phaddsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER2-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_phaddsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_phaddw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:1.50]
+; BDVER2-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_phaddw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER2-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_phaddw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_phsubd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50]
+; BDVER2-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_phsubd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER2-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_phsubd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_phsubsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:1.50]
+; BDVER2-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_phsubsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER2-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_phsubsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_phsubw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:1.50]
+; BDVER2-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_phsubw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER2-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_phsubw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmaddubsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmaddubsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmaddubsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmulhrsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmulhrsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmulhrsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [2:1.00]
; SKX-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pshufb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pshufb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pshufb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [2:2.00]
; SKX-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psignb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psignb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psignb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psignd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psignd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psignd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psignw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psignw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psignw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+tbm | FileCheck %s --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+tbm | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_bextri_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
-; BDVER-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_bextri_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
+; BDVER2-NEXT: # sched: [2:1.00]
+; BDVER2-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_bextri_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
+; BDVER3-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_bextri_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
+; BDVER4-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = lshr i32 %a0, 4
%m0 = lshr i32 %a1, 4
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_bextri_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
-; BDVER-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_bextri_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
+; BDVER2-NEXT: # sched: [2:1.00]
+; BDVER2-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_bextri_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
+; BDVER3-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_bextri_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
+; BDVER4-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = lshr i64 %a0, 4
%m0 = lshr i64 %a1, 4
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blcfill_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcfilll %edi, %ecx
-; BDVER-NEXT: blcfilll (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blcfill_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcfilll %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: blcfilll (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blcfill_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcfilll %edi, %ecx
+; BDVER3-NEXT: blcfilll (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blcfill_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcfilll %edi, %ecx
+; BDVER4-NEXT: blcfilll (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = add i32 %a0, 1
%m0 = add i32 %a1, 1
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blcfill_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcfillq %rdi, %rcx
-; BDVER-NEXT: blcfillq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blcfill_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcfillq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: blcfillq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blcfill_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcfillq %rdi, %rcx
+; BDVER3-NEXT: blcfillq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blcfill_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcfillq %rdi, %rcx
+; BDVER4-NEXT: blcfillq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = add i64 %a0, 1
%m0 = add i64 %a1, 1
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blci_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcil %edi, %ecx
-; BDVER-NEXT: blcil (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blci_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcil %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: blcil (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blci_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcil %edi, %ecx
+; BDVER3-NEXT: blcil (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blci_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcil %edi, %ecx
+; BDVER4-NEXT: blcil (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = add i32 1, %a0
%m0 = add i32 1, %a1
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blci_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blciq %rdi, %rcx
-; BDVER-NEXT: blciq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blci_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blciq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: blciq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blci_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blciq %rdi, %rcx
+; BDVER3-NEXT: blciq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blci_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blciq %rdi, %rcx
+; BDVER4-NEXT: blciq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = add i64 1, %a0
%m0 = add i64 1, %a1
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blcic_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcicl %edi, %ecx
-; BDVER-NEXT: blcicl (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blcic_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcicl %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: blcicl (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blcic_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcicl %edi, %ecx
+; BDVER3-NEXT: blcicl (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blcic_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcicl %edi, %ecx
+; BDVER4-NEXT: blcicl (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = xor i32 %a0, -1
%m0 = xor i32 %a1, -1
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blcic_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcicq %rdi, %rcx
-; BDVER-NEXT: blcicq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blcic_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcicq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: blcicq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blcic_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcicq %rdi, %rcx
+; BDVER3-NEXT: blcicq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blcic_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcicq %rdi, %rcx
+; BDVER4-NEXT: blcicq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = xor i64 %a0, -1
%m0 = xor i64 %a1, -1
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blcmsk_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcmskl %edi, %ecx
-; BDVER-NEXT: blcmskl (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blcmsk_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcmskl %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: blcmskl (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blcmsk_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcmskl %edi, %ecx
+; BDVER3-NEXT: blcmskl (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blcmsk_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcmskl %edi, %ecx
+; BDVER4-NEXT: blcmskl (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = add i32 %a0, 1
%m0 = add i32 %a1, 1
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blcmsk_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcmskq %rdi, %rcx
-; BDVER-NEXT: blcmskq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blcmsk_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcmskq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: blcmskq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blcmsk_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcmskq %rdi, %rcx
+; BDVER3-NEXT: blcmskq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blcmsk_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcmskq %rdi, %rcx
+; BDVER4-NEXT: blcmskq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = add i64 %a0, 1
%m0 = add i64 %a1, 1
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blcs_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcsl %edi, %ecx
-; BDVER-NEXT: blcsl (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blcs_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcsl %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: blcsl (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blcs_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcsl %edi, %ecx
+; BDVER3-NEXT: blcsl (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blcs_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcsl %edi, %ecx
+; BDVER4-NEXT: blcsl (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = add i32 %a0, 1
%m0 = add i32 %a1, 1
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blcs_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcsq %rdi, %rcx
-; BDVER-NEXT: blcsq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blcs_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcsq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: blcsq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blcs_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcsq %rdi, %rcx
+; BDVER3-NEXT: blcsq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blcs_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcsq %rdi, %rcx
+; BDVER4-NEXT: blcsq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = add i64 %a0, 1
%m0 = add i64 %a1, 1
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blsfill_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blsfilll %edi, %ecx
-; BDVER-NEXT: blsfilll (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blsfill_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsfilll %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: blsfilll (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blsfill_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blsfilll %edi, %ecx
+; BDVER3-NEXT: blsfilll (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blsfill_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blsfilll %edi, %ecx
+; BDVER4-NEXT: blsfilll (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = add i32 %a0, -1
%m0 = add i32 %a1, -1
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blsfill_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blsfillq %rdi, %rcx
-; BDVER-NEXT: blsfillq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blsfill_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsfillq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: blsfillq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blsfill_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blsfillq %rdi, %rcx
+; BDVER3-NEXT: blsfillq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blsfill_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blsfillq %rdi, %rcx
+; BDVER4-NEXT: blsfillq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = add i64 %a0, -1
%m0 = add i64 %a1, -1
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blsic_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blsicl %edi, %ecx
-; BDVER-NEXT: blsicl (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blsic_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsicl %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: blsicl (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blsic_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blsicl %edi, %ecx
+; BDVER3-NEXT: blsicl (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blsic_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blsicl %edi, %ecx
+; BDVER4-NEXT: blsicl (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = xor i32 %a0, -1
%m0 = xor i32 %a1, -1
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blsic_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blsicq %rdi, %rcx
-; BDVER-NEXT: blsicq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blsic_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsicq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: blsicq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blsic_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blsicq %rdi, %rcx
+; BDVER3-NEXT: blsicq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blsic_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blsicq %rdi, %rcx
+; BDVER4-NEXT: blsicq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = xor i64 %a0, -1
%m0 = xor i64 %a1, -1
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_t1mskc_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: t1mskcl %edi, %ecx
-; BDVER-NEXT: t1mskcl (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_t1mskc_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: t1mskcl %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: t1mskcl (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_t1mskc_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: t1mskcl %edi, %ecx
+; BDVER3-NEXT: t1mskcl (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_t1mskc_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: t1mskcl %edi, %ecx
+; BDVER4-NEXT: t1mskcl (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = xor i32 %a0, -1
%m0 = xor i32 %a1, -1
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_t1mskc_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: t1mskcq %rdi, %rcx
-; BDVER-NEXT: t1mskcq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_t1mskc_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: t1mskcq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: t1mskcq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_t1mskc_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: t1mskcq %rdi, %rcx
+; BDVER3-NEXT: t1mskcq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_t1mskc_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: t1mskcq %rdi, %rcx
+; BDVER4-NEXT: t1mskcq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = xor i64 %a0, -1
%m0 = xor i64 %a1, -1
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_tzmsk_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: tzmskl %edi, %ecx
-; BDVER-NEXT: tzmskl (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_tzmsk_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: tzmskl %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: tzmskl (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_tzmsk_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: tzmskl %edi, %ecx
+; BDVER3-NEXT: tzmskl (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_tzmsk_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: tzmskl %edi, %ecx
+; BDVER4-NEXT: tzmskl (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = xor i32 %a0, -1
%m0 = xor i32 %a1, -1
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_tzmsk_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: tzmskq %rdi, %rcx
-; BDVER-NEXT: tzmskq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_tzmsk_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: tzmskq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: tzmskq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_tzmsk_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: tzmskq %rdi, %rcx
+; BDVER3-NEXT: tzmskq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_tzmsk_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: tzmskq %rdi, %rcx
+; BDVER4-NEXT: tzmskq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = xor i64 %a0, -1
%m0 = xor i64 %a1, -1
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_f2xm1:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: f2xm1 # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_f2xm1:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fabs:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fabs # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fabs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fadd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fadd %st(0), %st(1) # sched: [3:1.00]
+; BDVER2-NEXT: fadd %st(2) # sched: [3:1.00]
+; BDVER2-NEXT: fadds (%ecx) # sched: [10:1.00]
+; BDVER2-NEXT: faddl (%eax) # sched: [10:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fadd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_faddp_fiadd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: faddp %st(1) # sched: [3:1.00]
+; BDVER2-NEXT: faddp %st(2) # sched: [3:1.00]
+; BDVER2-NEXT: fiadds (%ecx) # sched: [13:2.00]
+; BDVER2-NEXT: fiaddl (%eax) # sched: [13:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_faddp_fiadd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fbld_fbstp:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fbld (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: fbstp (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fbld_fbstp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fchs:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fchs # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fchs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fclex:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: wait # sched: [100:0.33]
+; BDVER2-NEXT: fnclex # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fclex:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fnclex:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fnclex # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fnclex:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fcmov:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fcmovb %st(1), %st(0) # sched: [3:2.00]
+; BDVER2-NEXT: fcmovbe %st(1), %st(0) # sched: [3:2.00]
+; BDVER2-NEXT: fcmove %st(1), %st(0) # sched: [3:2.00]
+; BDVER2-NEXT: fcmovnb %st(1), %st(0) # sched: [3:2.00]
+; BDVER2-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:2.00]
+; BDVER2-NEXT: fcmovne %st(1), %st(0) # sched: [3:2.00]
+; BDVER2-NEXT: fcmovnu %st(1), %st(0) # sched: [3:2.00]
+; BDVER2-NEXT: fcmovu %st(1), %st(0) # sched: [3:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fcmov:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fcom:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fcom %st(1) # sched: [1:1.00]
+; BDVER2-NEXT: fcom %st(3) # sched: [1:1.00]
+; BDVER2-NEXT: fcoms (%ecx) # sched: [8:1.00]
+; BDVER2-NEXT: fcoml (%eax) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fcom:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fcomp_fcompp:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fcomp %st(1) # sched: [1:1.00]
+; BDVER2-NEXT: fcomp %st(3) # sched: [1:1.00]
+; BDVER2-NEXT: fcomps (%ecx) # sched: [8:1.00]
+; BDVER2-NEXT: fcompl (%eax) # sched: [8:1.00]
+; BDVER2-NEXT: fcompp # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fcomp_fcompp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fcomi_fcomip:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fcomi %st(3) # sched: [3:1.00]
+; BDVER2-NEXT: fcompi %st(3) # sched: [3:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fcomi_fcomip:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fcos:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fcos # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fcos:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fdecstp:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fdecstp # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fdecstp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fdiv:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fdiv %st(0), %st(1) # sched: [14:14.00]
+; BDVER2-NEXT: fdiv %st(2) # sched: [14:14.00]
+; BDVER2-NEXT: fdivs (%ecx) # sched: [31:1.00]
+; BDVER2-NEXT: fdivl (%eax) # sched: [31:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fdiv:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fdivp_fidiv:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fdivp %st(1) # sched: [14:14.00]
+; BDVER2-NEXT: fdivp %st(2) # sched: [14:14.00]
+; BDVER2-NEXT: fidivs (%ecx) # sched: [34:1.00]
+; BDVER2-NEXT: fidivl (%eax) # sched: [34:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fdivp_fidiv:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fdivr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fdivr %st(0), %st(1) # sched: [14:14.00]
+; BDVER2-NEXT: fdivr %st(2) # sched: [14:14.00]
+; BDVER2-NEXT: fdivrs (%ecx) # sched: [31:1.00]
+; BDVER2-NEXT: fdivrl (%eax) # sched: [31:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fdivr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fdivrp_fidivr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fdivrp %st(1) # sched: [14:14.00]
+; BDVER2-NEXT: fdivrp %st(2) # sched: [14:14.00]
+; BDVER2-NEXT: fidivrs (%ecx) # sched: [34:1.00]
+; BDVER2-NEXT: fidivrl (%eax) # sched: [34:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fdivrp_fidivr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_ffree:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: ffree %st(0) # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_ffree:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_ficom:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: ficoms (%ecx) # sched: [11:2.00]
+; BDVER2-NEXT: ficoml (%eax) # sched: [11:2.00]
+; BDVER2-NEXT: ficomps (%ecx) # sched: [11:2.00]
+; BDVER2-NEXT: ficompl (%eax) # sched: [11:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_ficom:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fild:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: filds (%edx) # sched: [10:1.00]
+; BDVER2-NEXT: fildl (%ecx) # sched: [10:1.00]
+; BDVER2-NEXT: fildll (%eax) # sched: [10:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fild:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fincstp:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fincstp # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fincstp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_finit:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: wait # sched: [100:0.33]
+; BDVER2-NEXT: fninit # sched: [5:1.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_finit:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fninit:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fninit # sched: [5:1.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fninit:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fist_fistp_fisttp:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fists (%edx) # sched: [9:1.00]
+; BDVER2-NEXT: fistl (%ecx) # sched: [9:1.00]
+; BDVER2-NEXT: fistps (%edx) # sched: [9:1.00]
+; BDVER2-NEXT: fistpl (%ecx) # sched: [9:1.00]
+; BDVER2-NEXT: fistpll (%eax) # sched: [9:1.00]
+; BDVER2-NEXT: fisttps (%edx) # sched: [5:1.00]
+; BDVER2-NEXT: fisttpl (%ecx) # sched: [5:1.00]
+; BDVER2-NEXT: fisttpll (%eax) # sched: [5:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fist_fistp_fisttp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fld:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fld %st(0) # sched: [1:1.00]
+; BDVER2-NEXT: flds (%edx) # sched: [9:1.00]
+; BDVER2-NEXT: fldl (%ecx) # sched: [9:1.00]
+; BDVER2-NEXT: fldt (%eax) # sched: [9:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fld:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fldcw_fldenv:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fldcw (%eax) # sched: [8:2.00]
+; BDVER2-NEXT: fldenv (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fldcw_fldenv:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fld1 # sched: [1:1.00]
+; BDVER2-NEXT: fldl2e # sched: [1:1.00]
+; BDVER2-NEXT: fldl2t # sched: [1:1.00]
+; BDVER2-NEXT: fldlg2 # sched: [1:1.00]
+; BDVER2-NEXT: fldln2 # sched: [1:1.00]
+; BDVER2-NEXT: fldpi # sched: [1:1.00]
+; BDVER2-NEXT: fldz # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fmul:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fmul %st(0), %st(1) # sched: [5:1.00]
+; BDVER2-NEXT: fmul %st(2) # sched: [5:1.00]
+; BDVER2-NEXT: fmuls (%ecx) # sched: [12:1.00]
+; BDVER2-NEXT: fmull (%eax) # sched: [12:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fmul:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fmulp_fimul:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fmulp %st(1) # sched: [5:1.00]
+; BDVER2-NEXT: fmulp %st(2) # sched: [5:1.00]
+; BDVER2-NEXT: fimuls (%ecx) # sched: [15:1.00]
+; BDVER2-NEXT: fimull (%eax) # sched: [15:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fmulp_fimul:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fnop:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fnop # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fnop:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fpatan:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fpatan # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fpatan:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fprem_fprem1:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fprem # sched: [100:0.33]
+; BDVER2-NEXT: fprem1 # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fprem_fprem1:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fptan:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fptan # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fptan:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_frndint:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: frndint # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_frndint:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_frstor:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: frstor (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_frstor:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fsave:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: wait # sched: [100:0.33]
+; BDVER2-NEXT: fnsave (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fsave:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fnsave:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fnsave (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fnsave:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fscale:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fscale # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fscale:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fsin:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fsin # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fsin:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fsincos:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fsincos # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fsincos:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fsqrt:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fsqrt # sched: [24:24.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fsqrt:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fst_fstp:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fst %st(0) # sched: [1:1.00]
+; BDVER2-NEXT: fsts (%edx) # sched: [6:1.00]
+; BDVER2-NEXT: fstl (%ecx) # sched: [6:1.00]
+; BDVER2-NEXT: fstp %st(0) # sched: [1:1.00]
+; BDVER2-NEXT: fstpl (%edx) # sched: [6:1.00]
+; BDVER2-NEXT: fstpl (%ecx) # sched: [6:1.00]
+; BDVER2-NEXT: fstpt (%eax) # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fst_fstp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fstcw_fstenv_fstsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: wait # sched: [100:0.33]
+; BDVER2-NEXT: fnstcw (%eax) # sched: [7:1.00]
+; BDVER2-NEXT: wait # sched: [100:0.33]
+; BDVER2-NEXT: fnstenv (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: wait # sched: [100:0.33]
+; BDVER2-NEXT: fnstsw (%eax) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fstcw_fstenv_fstsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fnstcw_fnstenv_fnstsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fnstcw (%eax) # sched: [7:1.00]
+; BDVER2-NEXT: fnstenv (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: fnstsw (%eax) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fnstcw_fnstenv_fnstsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fsub:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fsub %st(0), %st(1) # sched: [3:1.00]
+; BDVER2-NEXT: fsub %st(2) # sched: [3:1.00]
+; BDVER2-NEXT: fsubs (%ecx) # sched: [10:1.00]
+; BDVER2-NEXT: fsubl (%eax) # sched: [10:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fsub:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fsubp_fisub:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fsubp %st(1) # sched: [3:1.00]
+; BDVER2-NEXT: fsubp %st(2) # sched: [3:1.00]
+; BDVER2-NEXT: fisubs (%ecx) # sched: [13:2.00]
+; BDVER2-NEXT: fisubl (%eax) # sched: [13:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fsubp_fisub:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fsubr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00]
+; BDVER2-NEXT: fsubr %st(2) # sched: [3:1.00]
+; BDVER2-NEXT: fsubrs (%ecx) # sched: [10:1.00]
+; BDVER2-NEXT: fsubrl (%eax) # sched: [10:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fsubr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fsubrp_fisubr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fsubrp %st(1) # sched: [3:1.00]
+; BDVER2-NEXT: fsubrp %st(2) # sched: [3:1.00]
+; BDVER2-NEXT: fisubrs (%ecx) # sched: [13:2.00]
+; BDVER2-NEXT: fisubrl (%eax) # sched: [13:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fsubrp_fisubr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_ftst:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: ftst # sched: [3:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_ftst:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fucom_fucomp_fucompp:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fucom %st(1) # sched: [1:1.00]
+; BDVER2-NEXT: fucom %st(3) # sched: [1:1.00]
+; BDVER2-NEXT: fucomp %st(1) # sched: [1:1.00]
+; BDVER2-NEXT: fucomp %st(3) # sched: [1:1.00]
+; BDVER2-NEXT: fucompp # sched: [3:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fucom_fucomp_fucompp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fucomi_fucomip:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fucomi %st(3) # sched: [3:1.00]
+; BDVER2-NEXT: fucompi %st(3) # sched: [3:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fucomi_fucomip:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fwait:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: wait # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fwait:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fxam:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fxam # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fxam:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fxch:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fxch %st(1) # sched: [1:0.33]
+; BDVER2-NEXT: fxch %st(3) # sched: [1:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fxch:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fxrstor_fxsave:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fxrstor (%eax) # sched: [5:2.00]
+; BDVER2-NEXT: fxsave (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fxrstor_fxsave:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fxtract:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fxtract # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fxtract:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fyl2x:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fyl2x # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fyl2x:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fyl2xp1:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fyl2xp1 # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fyl2xp1:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+xop | FileCheck %s --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+xop | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+xop | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfrczpd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfrczpd %xmm0, %xmm0
-; BDVER-NEXT: vfrczpd %ymm1, %ymm1
-; BDVER-NEXT: vfrczpd (%rdi), %xmm0
-; BDVER-NEXT: vfrczpd (%rsi), %ymm1
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfrczpd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfrczpd %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER12-NEXT: vfrczpd %ymm1, %ymm1 # sched: [3:1.00]
+; BDVER12-NEXT: vfrczpd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER12-NEXT: vfrczpd (%rsi), %ymm1 # sched: [10:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vfrczpd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vfrczpd %xmm0, %xmm0
+; BDVER3-NEXT: vfrczpd %ymm1, %ymm1
+; BDVER3-NEXT: vfrczpd (%rdi), %xmm0
+; BDVER3-NEXT: vfrczpd (%rsi), %ymm1
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: vzeroupper
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vfrczpd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vfrczpd %xmm0, %xmm0
+; BDVER4-NEXT: vfrczpd %ymm1, %ymm1
+; BDVER4-NEXT: vfrczpd (%rdi), %xmm0
+; BDVER4-NEXT: vfrczpd (%rsi), %ymm1
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: vzeroupper
+; BDVER4-NEXT: retq
call void asm sideeffect "vfrczpd $0, $0 \0a\09 vfrczpd $1, $1 \0a\09 vfrczpd $2, $0 \0a\09 vfrczpd $3, $1", "x,x,*m,*m"(<2 x double> %a0, <4 x double> %a1, <2 x double> *%a2, <4 x double> *%a3)
ret void
}
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfrczps:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfrczps %xmm0, %xmm0
-; BDVER-NEXT: vfrczps %ymm1, %ymm1
-; BDVER-NEXT: vfrczps (%rdi), %xmm0
-; BDVER-NEXT: vfrczps (%rsi), %ymm1
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfrczps:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfrczps %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER12-NEXT: vfrczps %ymm1, %ymm1 # sched: [3:1.00]
+; BDVER12-NEXT: vfrczps (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER12-NEXT: vfrczps (%rsi), %ymm1 # sched: [10:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vfrczps:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vfrczps %xmm0, %xmm0
+; BDVER3-NEXT: vfrczps %ymm1, %ymm1
+; BDVER3-NEXT: vfrczps (%rdi), %xmm0
+; BDVER3-NEXT: vfrczps (%rsi), %ymm1
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: vzeroupper
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vfrczps:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vfrczps %xmm0, %xmm0
+; BDVER4-NEXT: vfrczps %ymm1, %ymm1
+; BDVER4-NEXT: vfrczps (%rdi), %xmm0
+; BDVER4-NEXT: vfrczps (%rsi), %ymm1
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: vzeroupper
+; BDVER4-NEXT: retq
call void asm sideeffect "vfrczps $0, $0 \0a\09 vfrczps $1, $1 \0a\09 vfrczps $2, $0 \0a\09 vfrczps $3, $1", "x,x,*m,*m"(<4 x float> %a0, <4 x double> %a1, <4 x float> *%a2, <4 x double> *%a3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfrczsd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfrczsd %xmm0, %xmm0
-; BDVER-NEXT: vfrczsd (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfrczsd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfrczsd %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER12-NEXT: vfrczsd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vfrczsd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vfrczsd %xmm0, %xmm0
+; BDVER3-NEXT: vfrczsd (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vfrczsd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vfrczsd %xmm0, %xmm0
+; BDVER4-NEXT: vfrczsd (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vfrczsd $0, $0 \0a\09 vfrczsd $1, $0", "x,*m"(<2 x double> %a0, <2 x double> *%a1)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfrczss:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfrczss %xmm0, %xmm0
-; BDVER-NEXT: vfrczss (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfrczss:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfrczss %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER12-NEXT: vfrczss (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vfrczss:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vfrczss %xmm0, %xmm0
+; BDVER3-NEXT: vfrczss (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vfrczss:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vfrczss %xmm0, %xmm0
+; BDVER4-NEXT: vfrczss (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vfrczss $0, $0 \0a\09 vfrczss $1, $0", "x,*m"(<4 x float> %a0, <4 x double> *%a1)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpcmov_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpcmov_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpcmov_128:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpcmov_128:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpcmov $2, $1, $0, $0 \0a\09 vpcmov $3, $1, $0, $0 \0a\09 vpcmov $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpcmov_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpcmov_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER12-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpcmov_256:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER3-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER3-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: vzeroupper
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpcmov_256:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER4-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER4-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: vzeroupper
+; BDVER4-NEXT: retq
call void asm sideeffect "vpcmov $2, $1, $0, $0 \0a\09 vpcmov $3, $1, $0, $0 \0a\09 vpcmov $2, $3, $0, $0", "x,x,x,*m"(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2, <4 x i64> *%a3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpcom:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpcom:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpcom:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpcom:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpcomb $3, $1, $0, $0 \0a\09 vpcomd $3, $1, $0, $0 \0a\09 vpcomq $3, $1, $0, $0 \0a\09 vpcomw $3, $1, $0, $0 \0a\09 vpcomb $3, $2, $0, $0 \0a\09 vpcomd $3, $2, $0, $0 \0a\09 vpcomq $3, $2, $0, $0 \0a\09 vpcomw $3, $2, $0, $0", "x,x,*m,i"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2, i8 3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpcomu:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpcomu:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpcomu:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpcomu:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpcomub $3, $1, $0, $0 \0a\09 vpcomud $3, $1, $0, $0 \0a\09 vpcomuq $3, $1, $0, $0 \0a\09 vpcomuw $3, $1, $0, $0 \0a\09 vpcomub $3, $2, $0, $0 \0a\09 vpcomud $3, $2, $0, $0 \0a\09 vpcomuq $3, $2, $0, $0 \0a\09 vpcomuw $3, $2, $0, $0", "x,x,*m,i"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2, i8 3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpermil2pd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpermil2pd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpermil2pd_128:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpermil2pd_128:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpermil2pd $4, $2, $1, $0, $0 \0a\09 vpermil2pd $4, $2, $3, $0, $0 \0a\09 vpermil2pd $4, $3, $1, $0, $0", "x,x,x,*m,i"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3, i8 3)
ret void
}
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpermil2pd_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpermil2pd_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER12-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpermil2pd_256:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER3-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0
+; BDVER3-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: vzeroupper
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpermil2pd_256:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER4-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0
+; BDVER4-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: vzeroupper
+; BDVER4-NEXT: retq
call void asm sideeffect "vpermil2pd $4, $2, $1, $0, $0 \0a\09 vpermil2pd $4, $2, $3, $0, $0 \0a\09 vpermil2pd $4, $3, $1, $0, $0", "x,x,x,*m,i"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3, i8 3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpermil2ps_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpermil2ps_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpermil2ps_128:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpermil2ps_128:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpermil2ps $4, $2, $1, $0, $0 \0a\09 vpermil2ps $4, $2, $3, $0, $0 \0a\09 vpermil2ps $4, $3, $1, $0, $0", "x,x,x,*m,i"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3, i8 3)
ret void
}
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpermil2ps_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpermil2ps_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER12-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpermil2ps_256:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER3-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0
+; BDVER3-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: vzeroupper
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpermil2ps_256:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER4-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0
+; BDVER4-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: vzeroupper
+; BDVER4-NEXT: retq
call void asm sideeffect "vpermil2ps $4, $2, $1, $0, $0 \0a\09 vpermil2ps $4, $2, $3, $0, $0 \0a\09 vpermil2ps $4, $3, $1, $0, $0", "x,x,x,*m,i"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3, i8 3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddbd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddbd %xmm0, %xmm0
-; BDVER-NEXT: vphaddbd (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddbd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddbd %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddbd (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddbd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddbd %xmm0, %xmm0
+; BDVER3-NEXT: vphaddbd (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddbd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddbd %xmm0, %xmm0
+; BDVER4-NEXT: vphaddbd (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddbd $0, $0 \0a\09 vphaddbd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddbq:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddbq %xmm0, %xmm0
-; BDVER-NEXT: vphaddbq (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddbq:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddbq %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddbq (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddbq:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddbq %xmm0, %xmm0
+; BDVER3-NEXT: vphaddbq (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddbq:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddbq %xmm0, %xmm0
+; BDVER4-NEXT: vphaddbq (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddbq $0, $0 \0a\09 vphaddbq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddbw:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddbw %xmm0, %xmm0
-; BDVER-NEXT: vphaddbw (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddbw:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddbw %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddbw (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddbw:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddbw %xmm0, %xmm0
+; BDVER3-NEXT: vphaddbw (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddbw:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddbw %xmm0, %xmm0
+; BDVER4-NEXT: vphaddbw (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddbw $0, $0 \0a\09 vphaddbw $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphadddq:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphadddq %xmm0, %xmm0
-; BDVER-NEXT: vphadddq (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphadddq:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphadddq %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphadddq (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphadddq:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphadddq %xmm0, %xmm0
+; BDVER3-NEXT: vphadddq (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphadddq:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphadddq %xmm0, %xmm0
+; BDVER4-NEXT: vphadddq (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphadddq $0, $0 \0a\09 vphadddq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddubd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddubd %xmm0, %xmm0
-; BDVER-NEXT: vphaddubd (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddubd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddubd %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddubd (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddubd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddubd %xmm0, %xmm0
+; BDVER3-NEXT: vphaddubd (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddubd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddubd %xmm0, %xmm0
+; BDVER4-NEXT: vphaddubd (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddubd $0, $0 \0a\09 vphaddubd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddubq:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddubq %xmm0, %xmm0
-; BDVER-NEXT: vphaddubq (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddubq:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddubq %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddubq (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddubq:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddubq %xmm0, %xmm0
+; BDVER3-NEXT: vphaddubq (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddubq:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddubq %xmm0, %xmm0
+; BDVER4-NEXT: vphaddubq (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddubq $0, $0 \0a\09 vphaddubq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddubw:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddubw %xmm0, %xmm0
-; BDVER-NEXT: vphaddubw (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddubw:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddubw %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddubw (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddubw:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddubw %xmm0, %xmm0
+; BDVER3-NEXT: vphaddubw (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddubw:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddubw %xmm0, %xmm0
+; BDVER4-NEXT: vphaddubw (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddubw $0, $0 \0a\09 vphaddubw $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddudq:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddudq %xmm0, %xmm0
-; BDVER-NEXT: vphaddudq (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddudq:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddudq %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddudq (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddudq:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddudq %xmm0, %xmm0
+; BDVER3-NEXT: vphaddudq (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddudq:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddudq %xmm0, %xmm0
+; BDVER4-NEXT: vphaddudq (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddudq $0, $0 \0a\09 vphaddudq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphadduwd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphadduwd %xmm0, %xmm0
-; BDVER-NEXT: vphadduwd (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphadduwd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphadduwd %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphadduwd (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphadduwd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphadduwd %xmm0, %xmm0
+; BDVER3-NEXT: vphadduwd (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphadduwd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphadduwd %xmm0, %xmm0
+; BDVER4-NEXT: vphadduwd (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphadduwd $0, $0 \0a\09 vphadduwd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphadduwq:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphadduwq %xmm0, %xmm0
-; BDVER-NEXT: vphadduwq (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphadduwq:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphadduwq %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphadduwq (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphadduwq:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphadduwq %xmm0, %xmm0
+; BDVER3-NEXT: vphadduwq (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphadduwq:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphadduwq %xmm0, %xmm0
+; BDVER4-NEXT: vphadduwq (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphadduwq $0, $0 \0a\09 vphadduwq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddwd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddwd %xmm0, %xmm0
-; BDVER-NEXT: vphaddwd (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddwd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddwd %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddwd (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddwd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddwd %xmm0, %xmm0
+; BDVER3-NEXT: vphaddwd (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddwd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddwd %xmm0, %xmm0
+; BDVER4-NEXT: vphaddwd (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddwd $0, $0 \0a\09 vphaddwd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddwq:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddwq %xmm0, %xmm0
-; BDVER-NEXT: vphaddwq (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddwq:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddwq %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddwq (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddwq:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddwq %xmm0, %xmm0
+; BDVER3-NEXT: vphaddwq (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddwq:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddwq %xmm0, %xmm0
+; BDVER4-NEXT: vphaddwq (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddwq $0, $0 \0a\09 vphaddwq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphsubbw:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphsubbw %xmm0, %xmm0
-; BDVER-NEXT: vphsubbw (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphsubbw:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphsubbw %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphsubbw (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphsubbw:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphsubbw %xmm0, %xmm0
+; BDVER3-NEXT: vphsubbw (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphsubbw:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphsubbw %xmm0, %xmm0
+; BDVER4-NEXT: vphsubbw (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphsubbw $0, $0 \0a\09 vphsubbw $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphsubdq:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphsubdq %xmm0, %xmm0
-; BDVER-NEXT: vphsubdq (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphsubdq:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphsubdq %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphsubdq (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphsubdq:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphsubdq %xmm0, %xmm0
+; BDVER3-NEXT: vphsubdq (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphsubdq:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphsubdq %xmm0, %xmm0
+; BDVER4-NEXT: vphsubdq (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphsubdq $0, $0 \0a\09 vphsubdq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphsubwd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphsubwd %xmm0, %xmm0
-; BDVER-NEXT: vphsubwd (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphsubwd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphsubwd %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphsubwd (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphsubwd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphsubwd %xmm0, %xmm0
+; BDVER3-NEXT: vphsubwd (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphsubwd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphsubwd %xmm0, %xmm0
+; BDVER4-NEXT: vphsubwd (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphsubwd $0, $0 \0a\09 vphsubwd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacsdd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacsdd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacsdd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacsdd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacsdd $2, $1, $0, $0 \0a\09 vpmacsdd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacsdqh:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacsdqh:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacsdqh:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacsdqh:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacsdqh $2, $1, $0, $0 \0a\09 vpmacsdqh $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacsdql:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacsdql:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacsdql:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacsdql:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacsdql $2, $1, $0, $0 \0a\09 vpmacsdql $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacssdd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacssdd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacssdd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacssdd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacssdd $2, $1, $0, $0 \0a\09 vpmacssdd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacssdqh:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacssdqh:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacssdqh:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacssdqh:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacssdqh $2, $1, $0, $0 \0a\09 vpmacssdqh $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacssdql:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacssdql:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacssdql:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacssdql:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacssdql $2, $1, $0, $0 \0a\09 vpmacssdql $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacsswd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacsswd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacsswd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacsswd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacsswd $2, $1, $0, $0 \0a\09 vpmacsswd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacssww:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacssww:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacssww:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacssww:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacssww $2, $1, $0, $0 \0a\09 vpmacssww $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacswd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacswd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacswd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacswd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacswd $2, $1, $0, $0 \0a\09 vpmacswd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacsww:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacsww:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacsww:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacsww:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacsww $2, $1, $0, $0 \0a\09 vpmacsww $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmadcsswd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmadcsswd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmadcsswd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmadcsswd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmadcsswd $2, $1, $0, $0 \0a\09 vpmadcsswd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmadcswd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmadcswd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmadcswd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmadcswd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmadcswd $2, $1, $0, $0 \0a\09 vpmadcswd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpperm:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpperm:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpperm:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpperm:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpperm $2, $1, $0, $0 \0A\09 vpperm $3, $1, $0, $0 \0A\09 vpperm $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vprot:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vprotb %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vprotd %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vprotq %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vprotw %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vprotb (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vprotd (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vprotq (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vprotw (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vprotb %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vprotd %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vprotq %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vprotw %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vprotb $7, %xmm0, %xmm0
-; BDVER-NEXT: vprotd $7, %xmm0, %xmm0
-; BDVER-NEXT: vprotq $7, %xmm0, %xmm0
-; BDVER-NEXT: vprotw $7, %xmm0, %xmm0
-; BDVER-NEXT: vprotb $7, (%rdi), %xmm0
-; BDVER-NEXT: vprotd $7, (%rdi), %xmm0
-; BDVER-NEXT: vprotq $7, (%rdi), %xmm0
-; BDVER-NEXT: vprotw $7, (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vprot:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vprotb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vprotd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vprotq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vprotw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vprotb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotb %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotd %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotq %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotw %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotb $7, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vprotd $7, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vprotq $7, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vprotw $7, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vprotb $7, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotd $7, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotq $7, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotw $7, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vprot:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vprotb %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vprotd %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vprotq %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vprotw %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vprotb (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vprotd (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vprotq (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vprotw (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vprotb %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vprotd %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vprotq %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vprotw %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vprotb $7, %xmm0, %xmm0
+; BDVER3-NEXT: vprotd $7, %xmm0, %xmm0
+; BDVER3-NEXT: vprotq $7, %xmm0, %xmm0
+; BDVER3-NEXT: vprotw $7, %xmm0, %xmm0
+; BDVER3-NEXT: vprotb $7, (%rdi), %xmm0
+; BDVER3-NEXT: vprotd $7, (%rdi), %xmm0
+; BDVER3-NEXT: vprotq $7, (%rdi), %xmm0
+; BDVER3-NEXT: vprotw $7, (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vprot:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vprotb %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vprotd %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vprotq %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vprotw %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vprotb (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vprotd (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vprotq (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vprotw (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vprotb %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vprotd %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vprotq %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vprotw %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vprotb $7, %xmm0, %xmm0
+; BDVER4-NEXT: vprotd $7, %xmm0, %xmm0
+; BDVER4-NEXT: vprotq $7, %xmm0, %xmm0
+; BDVER4-NEXT: vprotw $7, %xmm0, %xmm0
+; BDVER4-NEXT: vprotb $7, (%rdi), %xmm0
+; BDVER4-NEXT: vprotd $7, (%rdi), %xmm0
+; BDVER4-NEXT: vprotq $7, (%rdi), %xmm0
+; BDVER4-NEXT: vprotw $7, (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vprotb $1, $0, $0 \0A\09 vprotd $1, $0, $0 \0A\09 vprotq $1, $0, $0 \0A\09 vprotw $1, $0, $0 \0A\09 vprotb $2, $0, $0 \0A\09 vprotd $2, $0, $0 \0A\09 vprotq $2, $0, $0 \0A\09 vprotw $2, $0, $0 \0A\09 vprotb $0, $2, $0 \0A\09 vprotd $0, $2, $0 \0A\09 vprotq $0, $2, $0 \0A\09 vprotw $0, $2, $0 \0A\09 vprotb $3, $0, $0 \0A\09 vprotd $3, $0, $0 \0A\09 vprotq $3, $0, $0 \0A\09 vprotw $3, $0, $0 \0A\09 vprotb $3, $2, $0 \0A\09 vprotd $3, $2, $0 \0A\09 vprotq $3, $2, $0 \0A\09 vprotw $3, $2, $0", "x,x,*m,i"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2, i8 7)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpsha:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpshab %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpshad %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpshaq %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpshaw %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpshab (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpshad (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpshaq (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpshaw (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpshab %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vpshad %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vpshaq %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vpshaw %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpsha:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpshab %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpshad %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpshaq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpshaw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpshab (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshad (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshaq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshaw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshab %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshad %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshaq %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshaw %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpsha:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpshad %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpshaq %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpshaw %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpshab (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpshad (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpshaq (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpshaw (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpshab %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vpshad %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vpshaq %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vpshaw %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpsha:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpshad %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpshaq %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpshaw %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpshab (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpshad (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpshaq (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpshaw (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpshab %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vpshad %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vpshaq %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vpshaw %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpshab $1, $0, $0 \0A\09 vpshad $1, $0, $0 \0A\09 vpshaq $1, $0, $0 \0A\09 vpshaw $1, $0, $0 \0A\09 vpshab $2, $0, $0 \0A\09 vpshad $2, $0, $0 \0A\09 vpshaq $2, $0, $0 \0A\09 vpshaw $2, $0, $0 \0A\09 vpshab $0, $2, $0 \0A\09 vpshad $0, $2, $0 \0A\09 vpshaq $0, $2, $0 \0A\09 vpshaw $0, $2, $0", "x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
ret void
}
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpshl:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpshlb %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpshld %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpshlq %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpshlw %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpshlb (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpshld (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpshlq (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpshlw (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpshlb %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vpshld %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vpshlq %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vpshlw %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpshl:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpshlb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpshld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpshlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpshlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpshlb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshlb %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshld %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshlq %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshlw %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpshl:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpshld %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpshlq %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpshlw %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpshlb (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpshld (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpshlq (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpshlw (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpshlb %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vpshld %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vpshlq %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vpshlw %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpshl:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpshld %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpshlq %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpshlw %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpshlb (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpshld (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpshlq (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpshlw (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpshlb %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vpshld %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vpshlq %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vpshlw %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpshlb $1, $0, $0 \0A\09 vpshld $1, $0, $0 \0A\09 vpshlq $1, $0, $0 \0A\09 vpshlw $1, $0, $0 \0A\09 vpshlb $2, $0, $0 \0A\09 vpshld $2, $0, $0 \0A\09 vpshlq $2, $0, $0 \0A\09 vpshlw $2, $0, $0 \0A\09 vpshlb $0, $2, $0 \0A\09 vpshld $0, $2, $0 \0A\09 vpshlq $0, $2, $0 \0A\09 vpshlw $0, $2, $0", "x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
ret void
}
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1000 -timeline < %s | FileCheck %s
+
+add %eax, %ecx
+add %esi, %eax
+add %eax, %edx
+
+# CHECK: Iterations: 1000
+# CHECK-NEXT: Instructions: 3000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 2.99
+# CHECK-NEXT: IPC: 2.99
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.33 addl %eax, %ecx
+# CHECK-NEXT: 1 1 0.33 addl %esi, %eax
+# CHECK-NEXT: 1 1 0.33 addl %eax, %edx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - addl %eax, %ecx
+# CHECK-NEXT: - - - - - 1.00 - - addl %esi, %eax
+# CHECK-NEXT: - - 1.00 - - - - - addl %eax, %edx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . addl %eax, %ecx
+# CHECK-NEXT: [0,1] DeER . . . addl %esi, %eax
+# CHECK-NEXT: [0,2] D=eER. . . addl %eax, %edx
+# CHECK-NEXT: [1,0] D=eER. . . addl %eax, %ecx
+# CHECK-NEXT: [1,1] .DeER. . . addl %esi, %eax
+# CHECK-NEXT: [1,2] .D=eER . . addl %eax, %edx
+# CHECK-NEXT: [2,0] .D=eER . . addl %eax, %ecx
+# CHECK-NEXT: [2,1] .D=eER . . addl %esi, %eax
+# CHECK-NEXT: [2,2] . D=eER . . addl %eax, %edx
+# CHECK-NEXT: [3,0] . D=eER . . addl %eax, %ecx
+# CHECK-NEXT: [3,1] . D=eER . . addl %esi, %eax
+# CHECK-NEXT: [3,2] . D==eER . . addl %eax, %edx
+# CHECK-NEXT: [4,0] . D=eER . . addl %eax, %ecx
+# CHECK-NEXT: [4,1] . D=eER . . addl %esi, %eax
+# CHECK-NEXT: [4,2] . D==eER . . addl %eax, %edx
+# CHECK-NEXT: [5,0] . D==eER . . addl %eax, %ecx
+# CHECK-NEXT: [5,1] . D=eER . . addl %esi, %eax
+# CHECK-NEXT: [5,2] . D==eER. . addl %eax, %edx
+# CHECK-NEXT: [6,0] . D==eER. . addl %eax, %ecx
+# CHECK-NEXT: [6,1] . D==eER. . addl %esi, %eax
+# CHECK-NEXT: [6,2] . D==eER . addl %eax, %edx
+# CHECK-NEXT: [7,0] . D==eER . addl %eax, %ecx
+# CHECK-NEXT: [7,1] . D==eER . addl %esi, %eax
+# CHECK-NEXT: [7,2] . D===eER . addl %eax, %edx
+# CHECK-NEXT: [8,0] . .D==eER . addl %eax, %ecx
+# CHECK-NEXT: [8,1] . .D==eER . addl %esi, %eax
+# CHECK-NEXT: [8,2] . .D===eER. addl %eax, %edx
+# CHECK-NEXT: [9,0] . .D===eER. addl %eax, %ecx
+# CHECK-NEXT: [9,1] . . D==eER. addl %esi, %eax
+# CHECK-NEXT: [9,2] . . D===eER addl %eax, %edx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 10 2.5 0.1 0.0 addl %eax, %ecx
+# CHECK-NEXT: 1. 10 2.2 0.1 0.0 addl %esi, %eax
+# CHECK-NEXT: 2. 10 3.0 0.0 0.0 addl %eax, %edx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -resource-pressure=false -timeline -timeline-max-iterations=2 < %s | FileCheck %s
+
+## Sets register RAX.
+imulq $5, %rcx, %rax
+
+## Kills the previous definition of RAX.
+## The upper portion of RAX is cleared.
+lzcnt %ecx, %eax
+
+## The AND can start immediately after the LZCNT.
+## It doesn't need to wait for the IMUL.
+and %rcx, %rax
+bsf %rax, %rcx
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 803
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 imulq $5, %rcx, %rax
+# CHECK-NEXT: 1 3 1.00 lzcntl %ecx, %eax
+# CHECK-NEXT: 1 1 0.33 andq %rcx, %rax
+# CHECK-NEXT: 1 3 1.00 bsfq %rax, %rcx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345678
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . . imulq $5, %rcx, %rax
+# CHECK-NEXT: [0,1] D=eeeER . . . lzcntl %ecx, %eax
+# CHECK-NEXT: [0,2] D====eER . . . andq %rcx, %rax
+# CHECK-NEXT: [0,3] D=====eeeER . . bsfq %rax, %rcx
+# CHECK-NEXT: [1,0] .D=======eeeER . . imulq $5, %rcx, %rax
+# CHECK-NEXT: [1,1] .D========eeeER. . lzcntl %ecx, %eax
+# CHECK-NEXT: [1,2] .D===========eER . andq %rcx, %rax
+# CHECK-NEXT: [1,3] .D============eeeER bsfq %rax, %rcx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 4.5 0.5 0.0 imulq $5, %rcx, %rax
+# CHECK-NEXT: 1. 2 5.5 1.5 0.0 lzcntl %ecx, %eax
+# CHECK-NEXT: 2. 2 8.5 0.0 0.0 andq %rcx, %rax
+# CHECK-NEXT: 3. 2 9.5 0.0 0.0 bsfq %rax, %rcx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -resource-pressure=false -timeline -timeline-max-iterations=2 < %s | FileCheck %s
+
+# In this test, the VDIVPS takes 38 cycles to write to register YMM3. The first
+# VADDPS does not depend on the VDIVPS (the WAW dependency is eliminated at
+# register renaming stage). So the first VADDPS can be executed in parallel to
+# the VDIVPS. That VADDPS also writes to register XMM3, and the upper half of
+# YMM3 is implicitly cleared. As a consequence, the definition of YMM3 from the
+# VDIVPS is killed, and the subsequent VADDPS instructions don't need to wait
+# for the VDIVPS to complete.
+# The block reciprocal throughput is limited by the VDIVPS reciprocal throughput
+# (which is 38 cycles). The sequence of VADDPS can be executed in parallel on
+# the FPA unit; their latency is "hidden" by the long latency of the VDIVPS.
+
+vdivps %ymm0, %ymm1, %ymm3
+vaddps %xmm0, %xmm1, %xmm3
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vandps %xmm4, %xmm1, %xmm0
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1800
+# CHECK-NEXT: Total Cycles: 2804
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.71
+# CHECK-NEXT: IPC: 0.64
+# CHECK-NEXT: Block RThroughput: 28.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 3 29 28.00 vdivps %ymm0, %ymm1, %ymm3
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 1 1.00 vandps %xmm4, %xmm1, %xmm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . vdivps %ymm0, %ymm1, %ymm3
+# CHECK-NEXT: [0,1] DeeeE--------------------------R . . . . . . vaddps %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: [0,2] .D==eeeE-----------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,3] .D===eeeE----------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,4] .D====eeeE---------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,5] .D=====eeeE--------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,6] . D=====eeeE-------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,7] . D======eeeE------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,8] . D=======eeeE-----------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,9] . D========eeeE----------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,10] . D========eeeE---------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,11] . D=========eeeE--------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,12] . D==========eeeE-------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,13] . D===========eeeE------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,14] . D===========eeeE-----------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,15] . D============eeeE----------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,16] . D=============eeeE---------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,17] . D================eE--------R . . . . . . vandps %xmm4, %xmm1, %xmm0
+# CHECK-NEXT: [1,0] . D=======================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeER vdivps %ymm0, %ymm1, %ymm3
+# CHECK-NEXT: [1,1] . D================eeeE---------------------------------R vaddps %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: [1,2] . .D==================eeeE------------------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,3] . .D===================eeeE-----------------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,4] . .D====================eeeE----------------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,5] . .D=====================eeeE---------------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,6] . . D=====================eeeE--------------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,7] . . D======================eeeE-------------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,8] . . D=======================eeeE------------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,9] . . D========================eeeE-----------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,10] . . D========================eeeE----------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,11] . . D=========================eeeE---------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,12] . . D==========================eeeE--------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,13] . . D===========================eeeE-------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,14] . . D===========================eeeE------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,15] . . D============================eeeE-----------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,16] . . D=============================eeeE----------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,17] . . D================================eE---------------R vandps %xmm4, %xmm1, %xmm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 12.5 4.0 0.0 vdivps %ymm0, %ymm1, %ymm3
+# CHECK-NEXT: 1. 2 9.0 0.5 29.5 vaddps %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2. 2 11.0 0.0 26.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 3. 2 12.0 1.0 25.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 4. 2 13.0 2.0 24.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 5. 2 14.0 3.0 23.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 6. 2 14.0 4.0 22.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 7. 2 15.0 5.0 21.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 8. 2 16.0 6.0 20.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 9. 2 17.0 7.0 19.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 10. 2 17.0 8.0 18.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 11. 2 18.0 9.0 17.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 12. 2 19.0 10.0 16.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 13. 2 20.0 11.0 15.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 14. 2 20.0 12.0 14.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 15. 2 21.0 13.0 13.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 16. 2 22.0 14.0 12.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 17. 2 25.0 0.0 11.5 vandps %xmm4, %xmm1, %xmm0
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s
+
+# Perf stat reports an IPC of 1.97 for this block of code.
+
+# The CMP instruction doesn't depend on the value of EAX. It can set the flags
+# without having to read the inputs.
+
+cmp %eax, %eax
+cmovae %ebx, %eax
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 3000
+# CHECK-NEXT: Total Cycles: 4503
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.67
+# CHECK-NEXT: Block RThroughput: 0.8
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.33 cmpl %eax, %eax
+# CHECK-NEXT: 2 2 0.67 cmovael %ebx, %eax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - 1.00 - - cmpl %eax, %eax
+# CHECK-NEXT: - - 1.00 1.00 - - - - cmovael %ebx, %eax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . .. cmpl %eax, %eax
+# CHECK-NEXT: [0,1] D=eeER .. cmovael %ebx, %eax
+# CHECK-NEXT: [1,0] D===eER .. cmpl %eax, %eax
+# CHECK-NEXT: [1,1] .D===eeER .. cmovael %ebx, %eax
+# CHECK-NEXT: [2,0] .D=====eER.. cmpl %eax, %eax
+# CHECK-NEXT: [2,1] . D=====eeER cmovael %ebx, %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 3.7 0.3 0.0 cmpl %eax, %eax
+# CHECK-NEXT: 1. 3 4.0 0.0 0.0 cmovael %ebx, %eax
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s
+
+# perf stat reports an IPC of 2.00 for this block of code.
+
+# All of the vector packed compares from this test are dependency breaking
+# instructions. That means, there is no RAW dependency between any of the
+# instructions, and the code can be fully parallelized in hardware.
+
+vpcmpeqb %xmm0, %xmm0, %xmm1
+vpcmpeqw %xmm1, %xmm1, %xmm2
+vpcmpeqd %xmm2, %xmm2, %xmm3
+vpcmpeqq %xmm3, %xmm3, %xmm0
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 6000
+# CHECK-NEXT: Total Cycles: 6003
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm3, %xmm3, %xmm0
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 2.00 - 2.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - 1.00 - - vpcmpeqb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: - - - 1.00 - - - - vpcmpeqw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vpcmpeqd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: - - - 1.00 - - - - vpcmpeqq %xmm3, %xmm3, %xmm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . vpcmpeqb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [0,1] D=eER. . . vpcmpeqw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [0,2] D==eER . . vpcmpeqd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [0,3] D===eER . . vpcmpeqq %xmm3, %xmm3, %xmm0
+# CHECK-NEXT: [1,0] .D===eER . . vpcmpeqb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [1,1] .D====eER . . vpcmpeqw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [1,2] .D=====eER. . vpcmpeqd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [1,3] .D======eER . vpcmpeqq %xmm3, %xmm3, %xmm0
+# CHECK-NEXT: [2,0] . D======eER . vpcmpeqb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [2,1] . D=======eER . vpcmpeqw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [2,2] . D========eER. vpcmpeqd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [2,3] . D=========eER vpcmpeqq %xmm3, %xmm3, %xmm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 4.0 0.3 0.0 vpcmpeqb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1. 3 5.0 0.0 0.0 vpcmpeqw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: 2. 3 6.0 0.0 0.0 vpcmpeqd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: 3. 3 7.0 0.0 0.0 vpcmpeqq %xmm3, %xmm3, %xmm0
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s
+
+# perf stat reports an IPC of 2.00 for this block of code.
+
+# All of the vector packed compares from this test are zero idioms. These zero
+# idioms are all detected and removed by the register renamer. That means, no
+# uOp is executed, and there is no RAW dependency for any of the packed
+# compares.
+
+vpcmpgtb %xmm0, %xmm0, %xmm1
+vpcmpgtw %xmm1, %xmm1, %xmm2
+vpcmpgtd %xmm2, %xmm2, %xmm3
+vpcmpgtq %xmm3, %xmm3, %xmm0
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 6000
+# CHECK-NEXT: Total Cycles: 1501
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 4.00
+# CHECK-NEXT: IPC: 4.00
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 0 0.25 vpcmpgtb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1 0 0.25 vpcmpgtw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: 1 0 0.25 vpcmpgtd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: 1 0 0.25 vpcmpgtq %xmm3, %xmm3, %xmm0
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - - - - vpcmpgtb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: - - - - - - - - vpcmpgtw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - - - vpcmpgtd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: - - - - - - - - vpcmpgtq %xmm3, %xmm3, %xmm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123
+
+# CHECK: [0,0] DR . vpcmpgtb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [0,1] DR . vpcmpgtw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [0,2] DR . vpcmpgtd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [0,3] DR . vpcmpgtq %xmm3, %xmm3, %xmm0
+# CHECK-NEXT: [1,0] .DR. vpcmpgtb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [1,1] .DR. vpcmpgtw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [1,2] .DR. vpcmpgtd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [1,3] .DR. vpcmpgtq %xmm3, %xmm3, %xmm0
+# CHECK-NEXT: [2,0] . DR vpcmpgtb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [2,1] . DR vpcmpgtw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [2,2] . DR vpcmpgtd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [2,3] . DR vpcmpgtq %xmm3, %xmm3, %xmm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 0.0 0.0 0.0 vpcmpgtb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1. 3 0.0 0.0 0.0 vpcmpgtw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: 2. 3 0.0 0.0 0.0 vpcmpgtd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: 3. 3 0.0 0.0 0.0 vpcmpgtq %xmm3, %xmm3, %xmm0
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s
+
+# perf stat reports an IPC of 1.00 for this code block.
+
+# Although both SBB are dependency breaking instructions, there is still an
+# implicit dependency on EFLAGS which limits the ILP. So, the hardware backend
+# can only execute one instruction per cycle.
+
+sbb %edx, %edx
+sbb %eax, %eax
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 3000
+# CHECK-NEXT: Total Cycles: 6003
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 2 0.67 sbbl %edx, %edx
+# CHECK-NEXT: 2 2 0.67 sbbl %eax, %eax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.33 1.33 - 1.33 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.67 0.67 - 0.67 - - sbbl %edx, %edx
+# CHECK-NEXT: - - 0.67 0.67 - 0.67 - - sbbl %eax, %eax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . sbbl %edx, %edx
+# CHECK-NEXT: [0,1] D==eeER . . sbbl %eax, %eax
+# CHECK-NEXT: [1,0] .D===eeER . . sbbl %edx, %edx
+# CHECK-NEXT: [1,1] .D=====eeER . sbbl %eax, %eax
+# CHECK-NEXT: [2,0] . D======eeER . sbbl %edx, %edx
+# CHECK-NEXT: [2,1] . D========eeER sbbl %eax, %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 4.0 0.3 0.0 sbbl %edx, %edx
+# CHECK-NEXT: 1. 3 6.0 0.0 0.0 sbbl %eax, %eax
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s
+
+# perf stat reports a throughput of 1.51 IPC for this block of code.
+
+# The SBB does not depend on the value of register EAX. That means, it doesn't
+# have to wait for the IMUL to write-back on EAX. However, it still depends on
+# the ADD for EFLAGS.
+
+imul %edx, %eax
+add %edx, %edx
+sbb %eax, %eax
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 4500
+# CHECK-NEXT: Total Cycles: 7503
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.80
+# CHECK-NEXT: IPC: 0.60
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 imull %edx, %eax
+# CHECK-NEXT: 1 1 0.33 addl %edx, %edx
+# CHECK-NEXT: 2 2 0.67 sbbl %eax, %eax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.33 1.33 - 1.33 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - imull %edx, %eax
+# CHECK-NEXT: - - 0.33 0.33 - 0.34 - - addl %edx, %edx
+# CHECK-NEXT: - - 1.00 - - 1.00 - - sbbl %eax, %eax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234567
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . . imull %edx, %eax
+# CHECK-NEXT: [0,1] DeE--R . . . addl %edx, %edx
+# CHECK-NEXT: [0,2] D===eeER . . . sbbl %eax, %eax
+# CHECK-NEXT: [1,0] .D====eeeER . . imull %edx, %eax
+# CHECK-NEXT: [1,1] .DeE------R . . addl %edx, %edx
+# CHECK-NEXT: [1,2] .D=======eeER . . sbbl %eax, %eax
+# CHECK-NEXT: [2,0] . D========eeeER . imull %edx, %eax
+# CHECK-NEXT: [2,1] . DeE----------R . addl %edx, %edx
+# CHECK-NEXT: [2,2] . D===========eeER sbbl %eax, %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 5.0 0.3 0.0 imull %edx, %eax
+# CHECK-NEXT: 1. 3 1.0 0.3 6.0 addl %edx, %edx
+# CHECK-NEXT: 2. 3 8.0 0.0 0.0 sbbl %eax, %eax
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=500 -timeline < %s | FileCheck %s
+
+vpmuld %xmm0, %xmm0, %xmm1
+vpaddd %xmm1, %xmm1, %xmm0
+vpaddd %xmm0, %xmm0, %xmm3
+
+# CHECK: Iterations: 500
+# CHECK-NEXT: Instructions: 1500
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 1.00 vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1 1 0.50 vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: 1 1 0.50 vpaddd %xmm0, %xmm0, %xmm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 1.00 - - - - - vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: - - - - - 1.00 - - vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: - - - 1.00 - - - - vpaddd %xmm0, %xmm0, %xmm3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeER . . . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [0,1] D=====eER . . . . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [0,2] D======eER. . . . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [1,0] D======eeeeeER . . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [1,1] .D==========eER. . . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [1,2] .D===========eER . . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [2,0] .D===========eeeeeER. . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [2,1] .D================eER . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [2,2] . D================eER . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [3,0] . D================eeeeeER . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [3,1] . D=====================eER . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [3,2] . D======================eER . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [4,0] . D=====================eeeeeER . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [4,1] . D==========================eER . . . . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [4,2] . D===========================eER . . . . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [5,0] . D===========================eeeeeER . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [5,1] . D===============================eER . . . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [5,2] . D================================eER. . . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [6,0] . D================================eeeeeER . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [6,1] . D=====================================eER. . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [6,2] . D=====================================eER . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [7,0] . D=====================================eeeeeER. . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [7,1] . D==========================================eER . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [7,2] . D===========================================eER . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [8,0] . .D==========================================eeeeeER . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [8,1] . .D===============================================eER . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [8,2] . .D================================================eER . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [9,0] . .D================================================eeeeeER . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [9,1] . . D====================================================eER. vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [9,2] . . D=====================================================eER vpaddd %xmm0, %xmm0, %xmm3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 10 25.0 0.1 0.0 vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1. 10 29.7 0.0 0.0 vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: 2. 10 30.5 0.0 0.0 vpaddd %xmm0, %xmm0, %xmm3
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=300 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+vmulps %xmm0, %xmm1, %xmm2
+vhaddps %xmm2, %xmm2, %xmm3
+vhaddps %xmm3, %xmm3, %xmm4
+
+# CHECK: Iterations: 300
+# CHECK-NEXT: Instructions: 900
+# CHECK-NEXT: Total Cycles: 1211
+# CHECK-NEXT: Total uOps: 2100
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.73
+# CHECK-NEXT: IPC: 0.74
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 1.00 vmulps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 5 2.00 vhaddps %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: 3 5 2.00 vhaddps %xmm3, %xmm3, %xmm4
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 2.00 - 4.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 1.00 - - - - - vmulps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 - - vhaddps %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: - - - 1.00 - 2.00 - - vhaddps %xmm3, %xmm3, %xmm4
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeeER . . . . vmulps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . vhaddps %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [0,2] .D==========eeeeeER . . vhaddps %xmm3, %xmm3, %xmm4
+# CHECK-NEXT: [1,0] .DeeeeeE----------R . . vmulps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,1] . D=====eeeeeE----R . . vhaddps %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [1,2] . D==========eeeeeER . vhaddps %xmm3, %xmm3, %xmm4
+# CHECK-NEXT: [2,0] . DeeeeeE----------R . vmulps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [2,1] . D=====eeeeeE----R . vhaddps %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [2,2] . D==========eeeeeER vhaddps %xmm3, %xmm3, %xmm4
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 1.0 1.0 6.7 vmulps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1. 3 6.0 0.7 2.7 vhaddps %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: 2. 3 11.0 1.0 0.0 vhaddps %xmm3, %xmm3, %xmm4
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s
+
+vshufps $0, %xmm0, %xmm1, %xmm1
+vhaddps (%rdi), %xmm1, %xmm2
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 15
+# CHECK-NEXT: Total uOps: 5
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: IPC: 0.13
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 1.00 vshufps $0, %xmm0, %xmm1, %xmm1
+# CHECK-NEXT: 4 11 2.00 * vhaddps (%rdi), %xmm1, %xmm2
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . vshufps $0, %xmm0, %xmm1, %xmm1
+# CHECK-NEXT: [0,1] .DeeeeeeeeeeeER vhaddps (%rdi), %xmm1, %xmm2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vshufps $0, %xmm0, %xmm1, %xmm1
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 vhaddps (%rdi), %xmm1, %xmm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s
+
+vshufps $0, %xmm0, %xmm1, %xmm1
+vhaddps (%rdi), %ymm1, %ymm2
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 16
+# CHECK-NEXT: Total uOps: 5
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.13
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 1.00 vshufps $0, %xmm0, %xmm1, %xmm1
+# CHECK-NEXT: 4 12 2.00 * vhaddps (%rdi), %ymm1, %ymm2
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . vshufps $0, %xmm0, %xmm1, %xmm1
+# CHECK-NEXT: [0,1] .DeeeeeeeeeeeeER vhaddps (%rdi), %ymm1, %ymm2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vshufps $0, %xmm0, %xmm1, %xmm1
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 vhaddps (%rdi), %ymm1, %ymm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info=true < %s | FileCheck %s --check-prefix=ENABLED
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefix=DISABLED
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info < %s | FileCheck %s -check-prefix=ENABLED
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false < %s | FileCheck %s -check-prefix=ENABLED
+
+vmulps %xmm0, %xmm1, %xmm2
+vhaddps %xmm2, %xmm2, %xmm3
+vhaddps %xmm3, %xmm3, %xmm4
+
+# DISABLED-NOT: Instruction Info:
+
+
+# ENABLED: Iterations: 100
+# ENABLED-NEXT: Instructions: 300
+# ENABLED-NEXT: Total Cycles: 414
+# ENABLED-NEXT: Total uOps: 700
+
+
+# ENABLED: Dispatch Width: 4
+# ENABLED-NEXT: uOps Per Cycle: 1.69
+# ENABLED-NEXT: IPC: 0.72
+# ENABLED-NEXT: Block RThroughput: 4.0
+
+# ENABLED: Instruction Info:
+# ENABLED-NEXT: [1]: #uOps
+# ENABLED-NEXT: [2]: Latency
+# ENABLED-NEXT: [3]: RThroughput
+# ENABLED-NEXT: [4]: MayLoad
+# ENABLED-NEXT: [5]: MayStore
+# ENABLED-NEXT: [6]: HasSideEffects (U)
+
+# ENABLED: [1] [2] [3] [4] [5] [6] Instructions:
+# ENABLED-NEXT: 1 5 1.00 vmulps %xmm0, %xmm1, %xmm2
+# ENABLED-NEXT: 3 5 2.00 vhaddps %xmm2, %xmm2, %xmm3
+# ENABLED-NEXT: 3 5 2.00 vhaddps %xmm3, %xmm3, %xmm4
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s
+
+vmovaps (%rsi), %xmm0
+vmovaps %xmm0, (%rdi)
+vmovaps 16(%rsi), %xmm0
+vmovaps %xmm0, 16(%rdi)
+vmovaps 32(%rsi), %xmm0
+vmovaps %xmm0, 32(%rdi)
+vmovaps 48(%rsi), %xmm0
+vmovaps %xmm0, 48(%rdi)
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 2803
+# CHECK-NEXT: Total uOps: 800
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.29
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.50 * vmovaps (%rsi), %xmm0
+# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi)
+# CHECK-NEXT: 1 6 0.50 * vmovaps 16(%rsi), %xmm0
+# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi)
+# CHECK-NEXT: 1 6 0.50 * vmovaps 32(%rsi), %xmm0
+# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi)
+# CHECK-NEXT: 1 6 0.50 * vmovaps 48(%rsi), %xmm0
+# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - - 4.00 - - 8.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - - - 1.00 vmovaps (%rsi), %xmm0
+# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, (%rdi)
+# CHECK-NEXT: - - - - - - - 1.00 vmovaps 16(%rsi), %xmm0
+# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 16(%rdi)
+# CHECK-NEXT: - - - - - - - 1.00 vmovaps 32(%rsi), %xmm0
+# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 32(%rdi)
+# CHECK-NEXT: - - - - - - - 1.00 vmovaps 48(%rsi), %xmm0
+# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 48(%rdi)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . vmovaps (%rsi), %xmm0
+# CHECK-NEXT: [0,1] D======eER. . . . . vmovaps %xmm0, (%rdi)
+# CHECK-NEXT: [0,2] D=======eeeeeeER . . . vmovaps 16(%rsi), %xmm0
+# CHECK-NEXT: [0,3] D=============eER . . . vmovaps %xmm0, 16(%rdi)
+# CHECK-NEXT: [0,4] .D=============eeeeeeER . . vmovaps 32(%rsi), %xmm0
+# CHECK-NEXT: [0,5] .D===================eER . . vmovaps %xmm0, 32(%rdi)
+# CHECK-NEXT: [0,6] .D====================eeeeeeER. vmovaps 48(%rsi), %xmm0
+# CHECK-NEXT: [0,7] .D==========================eER vmovaps %xmm0, 48(%rdi)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 vmovaps %xmm0, (%rdi)
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 vmovaps 16(%rsi), %xmm0
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 vmovaps %xmm0, 16(%rdi)
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 vmovaps 32(%rsi), %xmm0
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 vmovaps %xmm0, 32(%rdi)
+# CHECK-NEXT: 6. 1 21.0 0.0 0.0 vmovaps 48(%rsi), %xmm0
+# CHECK-NEXT: 7. 1 27.0 0.0 0.0 vmovaps %xmm0, 48(%rdi)
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -timeline -timeline-max-iterations=1 < %s | FileCheck %s
+
+vmovaps (%rsi), %xmm0
+vmovaps %xmm0, (%rdi)
+vmovaps 16(%rsi), %xmm0
+vmovaps %xmm0, 16(%rdi)
+vmovaps 32(%rsi), %xmm0
+vmovaps %xmm0, 32(%rdi)
+vmovaps 48(%rsi), %xmm0
+vmovaps %xmm0, 48(%rdi)
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 409
+# CHECK-NEXT: Total uOps: 800
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.96
+# CHECK-NEXT: IPC: 1.96
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.50 * vmovaps (%rsi), %xmm0
+# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi)
+# CHECK-NEXT: 1 6 0.50 * vmovaps 16(%rsi), %xmm0
+# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi)
+# CHECK-NEXT: 1 6 0.50 * vmovaps 32(%rsi), %xmm0
+# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi)
+# CHECK-NEXT: 1 6 0.50 * vmovaps 48(%rsi), %xmm0
+# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - - 4.00 - 3.94 4.06
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - - 0.97 0.03 vmovaps (%rsi), %xmm0
+# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, (%rdi)
+# CHECK-NEXT: - - - - - - 0.03 0.97 vmovaps 16(%rsi), %xmm0
+# CHECK-NEXT: - - - - 1.00 - 0.97 0.03 vmovaps %xmm0, 16(%rdi)
+# CHECK-NEXT: - - - - - - 1.00 - vmovaps 32(%rsi), %xmm0
+# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 32(%rdi)
+# CHECK-NEXT: - - - - - - - 1.00 vmovaps 48(%rsi), %xmm0
+# CHECK-NEXT: - - - - 1.00 - 0.97 0.03 vmovaps %xmm0, 48(%rdi)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . vmovaps (%rsi), %xmm0
+# CHECK-NEXT: [0,1] D======eER. . vmovaps %xmm0, (%rdi)
+# CHECK-NEXT: [0,2] DeeeeeeE-R. . vmovaps 16(%rsi), %xmm0
+# CHECK-NEXT: [0,3] D=======eER . vmovaps %xmm0, 16(%rdi)
+# CHECK-NEXT: [0,4] .DeeeeeeE-R . vmovaps 32(%rsi), %xmm0
+# CHECK-NEXT: [0,5] .D=======eER. vmovaps %xmm0, 32(%rdi)
+# CHECK-NEXT: [0,6] .DeeeeeeE--R. vmovaps 48(%rsi), %xmm0
+# CHECK-NEXT: [0,7] .D========eER vmovaps %xmm0, 48(%rdi)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 vmovaps %xmm0, (%rdi)
+# CHECK-NEXT: 2. 1 1.0 1.0 1.0 vmovaps 16(%rsi), %xmm0
+# CHECK-NEXT: 3. 1 8.0 0.0 0.0 vmovaps %xmm0, 16(%rdi)
+# CHECK-NEXT: 4. 1 1.0 1.0 1.0 vmovaps 32(%rsi), %xmm0
+# CHECK-NEXT: 5. 1 8.0 0.0 0.0 vmovaps %xmm0, 32(%rdi)
+# CHECK-NEXT: 6. 1 1.0 1.0 2.0 vmovaps 48(%rsi), %xmm0
+# CHECK-NEXT: 7. 1 9.0 0.0 0.0 vmovaps %xmm0, 48(%rdi)
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=1 -register-file-stats < %s | FileCheck %s
+
+# These are dependency-breaking one-idioms.
+# Much like zero-idioms, but they produce ones, and do consume resources.
+
+# perf stats reports a throughput of 2.00 IPC.
+
+pcmpeqb %mm2, %mm2
+pcmpeqd %mm2, %mm2
+pcmpeqw %mm2, %mm2
+
+pcmpeqb %xmm2, %xmm2
+pcmpeqd %xmm2, %xmm2
+pcmpeqq %xmm2, %xmm2
+pcmpeqw %xmm2, %xmm2
+
+vpcmpeqb %xmm3, %xmm3, %xmm3
+vpcmpeqd %xmm3, %xmm3, %xmm3
+vpcmpeqq %xmm3, %xmm3, %xmm3
+vpcmpeqw %xmm3, %xmm3, %xmm3
+
+vpcmpeqb %xmm3, %xmm3, %xmm5
+vpcmpeqd %xmm3, %xmm3, %xmm5
+vpcmpeqq %xmm3, %xmm3, %xmm5
+vpcmpeqw %xmm3, %xmm3, %xmm5
+
+# FIXME: their handling is broken in llvm-mca.
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1500
+# CHECK-NEXT: Total Cycles: 903
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.66
+# CHECK-NEXT: IPC: 1.66
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 pcmpeqb %mm2, %mm2
+# CHECK-NEXT: 1 3 1.00 pcmpeqd %mm2, %mm2
+# CHECK-NEXT: 1 3 1.00 pcmpeqw %mm2, %mm2
+# CHECK-NEXT: 1 1 0.50 pcmpeqb %xmm2, %xmm2
+# CHECK-NEXT: 1 1 0.50 pcmpeqd %xmm2, %xmm2
+# CHECK-NEXT: 1 1 0.50 pcmpeqq %xmm2, %xmm2
+# CHECK-NEXT: 1 1 0.50 pcmpeqw %xmm2, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm3, %xmm3, %xmm5
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 1500
+# CHECK-NEXT: Max number of mappings used: 168
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 7.65 - 7.35 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - pcmpeqb %mm2, %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pcmpeqd %mm2, %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pcmpeqw %mm2, %mm2
+# CHECK-NEXT: - - - 0.75 - 0.25 - - pcmpeqb %xmm2, %xmm2
+# CHECK-NEXT: - - - 0.49 - 0.51 - - pcmpeqd %xmm2, %xmm2
+# CHECK-NEXT: - - - 0.64 - 0.36 - - pcmpeqq %xmm2, %xmm2
+# CHECK-NEXT: - - - 0.21 - 0.79 - - pcmpeqw %xmm2, %xmm2
+# CHECK-NEXT: - - - 0.44 - 0.56 - - vpcmpeqb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - 0.26 - 0.74 - - vpcmpeqd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - 0.25 - 0.75 - - vpcmpeqq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - 1.00 - - vpcmpeqw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - 0.25 - 0.75 - - vpcmpeqb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - 0.55 - 0.45 - - vpcmpeqd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - 0.44 - 0.56 - - vpcmpeqq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - 0.37 - 0.63 - - vpcmpeqw %xmm3, %xmm3, %xmm5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER .. pcmpeqb %mm2, %mm2
+# CHECK-NEXT: [0,1] D===eeeER .. pcmpeqd %mm2, %mm2
+# CHECK-NEXT: [0,2] D======eeeER pcmpeqw %mm2, %mm2
+# CHECK-NEXT: [0,3] DeE--------R pcmpeqb %xmm2, %xmm2
+# CHECK-NEXT: [0,4] .DeE-------R pcmpeqd %xmm2, %xmm2
+# CHECK-NEXT: [0,5] .D=eE------R pcmpeqq %xmm2, %xmm2
+# CHECK-NEXT: [0,6] .D==eE-----R pcmpeqw %xmm2, %xmm2
+# CHECK-NEXT: [0,7] .DeE-------R vpcmpeqb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,8] . DeE------R vpcmpeqd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,9] . D==eE----R vpcmpeqq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,10] . D===eE---R vpcmpeqw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,11] . D====eE--R vpcmpeqb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,12] . D====eE-R vpcmpeqd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,13] . D====eE-R vpcmpeqq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,14] . D=====eER vpcmpeqw %xmm3, %xmm3, %xmm5
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 pcmpeqb %mm2, %mm2
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 pcmpeqd %mm2, %mm2
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 pcmpeqw %mm2, %mm2
+# CHECK-NEXT: 3. 1 1.0 1.0 8.0 pcmpeqb %xmm2, %xmm2
+# CHECK-NEXT: 4. 1 1.0 0.0 7.0 pcmpeqd %xmm2, %xmm2
+# CHECK-NEXT: 5. 1 2.0 0.0 6.0 pcmpeqq %xmm2, %xmm2
+# CHECK-NEXT: 6. 1 3.0 0.0 5.0 pcmpeqw %xmm2, %xmm2
+# CHECK-NEXT: 7. 1 1.0 1.0 7.0 vpcmpeqb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 8. 1 1.0 0.0 6.0 vpcmpeqd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 9. 1 3.0 1.0 4.0 vpcmpeqq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 10. 1 4.0 0.0 3.0 vpcmpeqw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 11. 1 5.0 0.0 2.0 vpcmpeqb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 12. 1 5.0 1.0 1.0 vpcmpeqd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 13. 1 5.0 1.0 1.0 vpcmpeqq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 14. 1 6.0 2.0 0.0 vpcmpeqw %xmm3, %xmm3, %xmm5
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s
+
+imul %rax, %rbx
+lzcnt %ax, %bx
+add %ecx, %ebx
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 3
+# CHECK-NEXT: Total Cycles: 8
+# CHECK-NEXT: Total uOps: 3
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.38
+# CHECK-NEXT: IPC: 0.38
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 imulq %rax, %rbx
+# CHECK-NEXT: 1 3 1.00 lzcntw %ax, %bx
+# CHECK-NEXT: 1 1 0.33 addl %ecx, %ebx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeeeER . imulq %rax, %rbx
+# CHECK-NEXT: [0,1] D=eeeER. lzcntw %ax, %bx
+# CHECK-NEXT: [0,2] D====eER addl %ecx, %ebx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rbx
+# CHECK-NEXT: 1. 1 2.0 2.0 0.0 lzcntw %ax, %bx
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 addl %ecx, %ebx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1500 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+# perf stat reports a throughput of 1.00 IPC for this code snippet.
+
+# The ILP is limited by the false dependency on %dx. So, the mov cannot execute
+# in parallel with the add.
+
+add %cx, %dx
+mov %ax, %dx
+xor %bx, %dx
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 4500
+# CHECK-NEXT: Total Cycles: 1504
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 2.99
+# CHECK-NEXT: IPC: 2.99
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.33 addw %cx, %dx
+# CHECK-NEXT: 1 1 0.33 movw %ax, %dx
+# CHECK-NEXT: 1 1 0.33 xorw %bx, %dx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.67 - - 0.33 - - addw %cx, %dx
+# CHECK-NEXT: - - - 0.67 - 0.33 - - movw %ax, %dx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorw %bx, %dx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DeER .. addw %cx, %dx
+# CHECK-NEXT: [0,1] DeER .. movw %ax, %dx
+# CHECK-NEXT: [0,2] D=eER.. xorw %bx, %dx
+# CHECK-NEXT: [1,0] D==eER. addw %cx, %dx
+# CHECK-NEXT: [1,1] .DeE-R. movw %ax, %dx
+# CHECK-NEXT: [1,2] .D=eER. xorw %bx, %dx
+# CHECK-NEXT: [2,0] .D==eER addw %cx, %dx
+# CHECK-NEXT: [2,1] .DeE--R movw %ax, %dx
+# CHECK-NEXT: [2,2] . DeE-R xorw %bx, %dx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 2.3 0.3 0.0 addw %cx, %dx
+# CHECK-NEXT: 1. 3 1.0 1.0 1.0 movw %ax, %dx
+# CHECK-NEXT: 2. 3 1.7 0.0 0.3 xorw %bx, %dx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1500 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+# perf stat reports a throughput of 0.60 IPC for this code snippet.
+
+# The lzcnt cannot execute in parallel with the imul because there is a false
+# dependency on %bx.
+
+imul %ax, %bx
+lzcnt %ax, %bx
+add %cx, %bx
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 4500
+# CHECK-NEXT: Total Cycles: 3005
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.50
+# CHECK-NEXT: IPC: 1.50
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 imulw %ax, %bx
+# CHECK-NEXT: 1 3 1.00 lzcntw %ax, %bx
+# CHECK-NEXT: 1 1 0.33 addw %cx, %bx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 0.50 2.00 - 0.50 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - imulw %ax, %bx
+# CHECK-NEXT: - - - 1.00 - - - - lzcntw %ax, %bx
+# CHECK-NEXT: - - 0.50 - - 0.50 - - addw %cx, %bx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER .. imulw %ax, %bx
+# CHECK-NEXT: [0,1] D=eeeER .. lzcntw %ax, %bx
+# CHECK-NEXT: [0,2] D====eER .. addw %cx, %bx
+# CHECK-NEXT: [1,0] D=====eeeER. imulw %ax, %bx
+# CHECK-NEXT: [1,1] .D=eeeE---R. lzcntw %ax, %bx
+# CHECK-NEXT: [1,2] .D====eE--R. addw %cx, %bx
+# CHECK-NEXT: [2,0] .D=====eeeER imulw %ax, %bx
+# CHECK-NEXT: [2,1] .D==eeeE---R lzcntw %ax, %bx
+# CHECK-NEXT: [2,2] . D====eE--R addw %cx, %bx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 4.3 0.3 0.0 imulw %ax, %bx
+# CHECK-NEXT: 1. 3 2.3 2.3 2.0 lzcntw %ax, %bx
+# CHECK-NEXT: 2. 3 5.0 0.0 1.3 addw %cx, %bx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1500 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+# perf stat reports a throughput of 1.00 IPC for this code snippet.
+
+lzcnt %ax, %bx ## partial register stall.
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 1500
+# CHECK-NEXT: Total Cycles: 1505
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 lzcntw %ax, %bx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 1.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - lzcntw %ax, %bx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeeeER . lzcntw %ax, %bx
+# CHECK-NEXT: [1,0] D=eeeER. lzcntw %ax, %bx
+# CHECK-NEXT: [2,0] D==eeeER lzcntw %ax, %bx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 2.0 2.0 0.0 lzcntw %ax, %bx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1500 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+# perf stat reports a throughput of 0.60 IPC for this code snippet.
+# Each lzcnt has a false dependency on %ecx; the first lzcnt has to wait on the
+# imul. However, the folded load can start immediately.
+# The last lzcnt has a false dependency on %cx. However, even in this case, the
+# folded load can start immediately.
+
+imul %edx, %ecx
+lzcnt (%rsp), %cx
+lzcnt 2(%rsp), %cx
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 4500
+# CHECK-NEXT: Total Cycles: 4510
+# CHECK-NEXT: Total uOps: 7500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.66
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 imull %edx, %ecx
+# CHECK-NEXT: 2 8 1.00 * lzcntw (%rsp), %cx
+# CHECK-NEXT: 2 8 1.00 * lzcntw 2(%rsp), %cx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 3.00 - - - 2.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - imull %edx, %ecx
+# CHECK-NEXT: - - - 1.00 - - - 1.00 lzcntw (%rsp), %cx
+# CHECK-NEXT: - - - 1.00 - - - 1.00 lzcntw 2(%rsp), %cx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345678
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . . imull %edx, %ecx
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . . lzcntw (%rsp), %cx
+# CHECK-NEXT: [0,2] .D=eeeeeeeeER . . lzcntw 2(%rsp), %cx
+# CHECK-NEXT: [1,0] .D=========eeeER . imull %edx, %ecx
+# CHECK-NEXT: [1,1] . D=eeeeeeeeE--R . lzcntw (%rsp), %cx
+# CHECK-NEXT: [1,2] . D==eeeeeeeeE-R . lzcntw 2(%rsp), %cx
+# CHECK-NEXT: [2,0] . D==========eeeER imull %edx, %ecx
+# CHECK-NEXT: [2,1] . D==eeeeeeeeE---R lzcntw (%rsp), %cx
+# CHECK-NEXT: [2,2] . D==eeeeeeeeE--R lzcntw 2(%rsp), %cx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 7.3 0.3 0.0 imull %edx, %ecx
+# CHECK-NEXT: 1. 3 2.3 2.3 1.7 lzcntw (%rsp), %cx
+# CHECK-NEXT: 2. 3 2.7 2.7 1.0 lzcntw 2(%rsp), %cx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s
+
+imul %ax, %cx
+add %al, %cl
+add %ecx, %ebx
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 3
+# CHECK-NEXT: Total Cycles: 8
+# CHECK-NEXT: Total uOps: 3
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.38
+# CHECK-NEXT: IPC: 0.38
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 imulw %ax, %cx
+# CHECK-NEXT: 1 1 0.33 addb %al, %cl
+# CHECK-NEXT: 1 1 0.33 addl %ecx, %ebx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeeeER . imulw %ax, %cx
+# CHECK-NEXT: [0,1] D===eER. addb %al, %cl
+# CHECK-NEXT: [0,2] D====eER addl %ecx, %ebx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulw %ax, %cx
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 addb %al, %cl
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 addl %ecx, %ebx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=2 < %s | FileCheck %s
+
+# VALU0/VALU1
+vpmulld %xmm0, %xmm1, %xmm2
+vpand %xmm0, %xmm1, %xmm2
+
+# VIMUL/STC
+vcvttps2dq %xmm0, %xmm2
+vpclmulqdq $0, %xmm0, %xmm1, %xmm2
+
+# FPA/FPM
+vaddps %xmm0, %xmm1, %xmm2
+vsqrtps %xmm0, %xmm2
+
+# FPA/FPM YMM
+vaddps %ymm0, %ymm1, %ymm2
+vsqrtps %ymm0, %ymm2
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 4256
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.23
+# CHECK-NEXT: IPC: 0.19
+# CHECK-NEXT: Block RThroughput: 42.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 1.00 vpmulld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.33 vpand %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: 1 14 6.00 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 14 14.00 vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 3 29 28.00 vsqrtps %ymm0, %ymm2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - 42.00 6.03 3.96 - 17.01 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 1.00 - - - - - vpmulld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.05 0.06 - 0.89 - - vpand %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: - - 1.98 0.90 - 15.12 - - vpclmulqdq $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - 14.00 1.00 - - - - - vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - 28.00 2.00 - - 1.00 - - vsqrtps %ymm0, %ymm2
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . .. vpmulld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,1] DeE----R . . . . . .. vpand %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,2] DeeeE--R . . . . . .. vcvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: [0,3] D=eeeeeeeeeeeeeeER . . . .. vpclmulqdq $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,4] .DeeeE-----------R . . . .. vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,5] .DeeeeeeeeeeeeeeER . . . .. vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: [0,6] .D=eeeE----------R . . . .. vaddps %ymm0, %ymm1, %ymm2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 1.0 1.0 79.0 vpmulld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1. 2 1.0 1.0 82.5 vpand %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2. 2 1.5 1.5 80.0 vcvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: 3. 2 1.5 1.5 74.0 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4. 2 2.0 2.0 84.0 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 5. 2 9.5 9.5 65.0 vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 6. 2 2.5 2.5 83.0 vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 7. 2 147.5 147.5 0.0 vsqrtps %ymm0, %ymm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -lqueue=2 -iterations=2 -resource-pressure=false -timeline -timeline-max-cycles=104 < %s | FileCheck %s
+
+int3
+stmxcsr (%rsp)
+
+# CHECK: Iterations: 2
+# CHECK-NEXT: Instructions: 4
+# CHECK-NEXT: Total Cycles: 213
+# CHECK-NEXT: Total uOps: 10
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.05
+# CHECK-NEXT: IPC: 0.02
+# CHECK-NEXT: Block RThroughput: 1.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 100 0.33 * * U int3
+# CHECK-NEXT: 4 5 1.00 * * U stmxcsr (%rsp)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER int3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 3.0 0.5 0.0 int3
+# CHECK-NEXT: 1. 2 100.0 0.0 0.0 stmxcsr (%rsp)
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+add %eax, %ecx
+add %eax, %edx
+add %eax, %ebx
+add %edx, %esi
+add %ebx, %eax
+add %edx, %esi
+add %ebx, %eax
+add %ebx, %eax
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 800
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 1.99
+# CHECK-NEXT: Block RThroughput: 2.7
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.33 addl %eax, %ecx
+# CHECK-NEXT: 1 1 0.33 addl %eax, %edx
+# CHECK-NEXT: 1 1 0.33 addl %eax, %ebx
+# CHECK-NEXT: 1 1 0.33 addl %edx, %esi
+# CHECK-NEXT: 1 1 0.33 addl %ebx, %eax
+# CHECK-NEXT: 1 1 0.33 addl %edx, %esi
+# CHECK-NEXT: 1 1 0.33 addl %ebx, %eax
+# CHECK-NEXT: 1 1 0.33 addl %ebx, %eax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 2.66 2.67 - 2.67 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.33 0.33 - 0.34 - - addl %eax, %ecx
+# CHECK-NEXT: - - 0.33 0.34 - 0.33 - - addl %eax, %edx
+# CHECK-NEXT: - - 0.34 0.33 - 0.33 - - addl %eax, %ebx
+# CHECK-NEXT: - - 0.33 0.33 - 0.34 - - addl %edx, %esi
+# CHECK-NEXT: - - 0.33 0.34 - 0.33 - - addl %ebx, %eax
+# CHECK-NEXT: - - 0.34 0.33 - 0.33 - - addl %edx, %esi
+# CHECK-NEXT: - - 0.33 0.33 - 0.34 - - addl %ebx, %eax
+# CHECK-NEXT: - - 0.33 0.34 - 0.33 - - addl %ebx, %eax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . addl %eax, %ecx
+# CHECK-NEXT: [0,1] DeER . . . addl %eax, %edx
+# CHECK-NEXT: [0,2] DeER . . . addl %eax, %ebx
+# CHECK-NEXT: [0,3] D=eER. . . addl %edx, %esi
+# CHECK-NEXT: [0,4] .DeER. . . addl %ebx, %eax
+# CHECK-NEXT: [0,5] .D=eER . . addl %edx, %esi
+# CHECK-NEXT: [0,6] .D=eER . . addl %ebx, %eax
+# CHECK-NEXT: [0,7] .D==eER . . addl %ebx, %eax
+# CHECK-NEXT: [1,0] . D==eER . . addl %eax, %ecx
+# CHECK-NEXT: [1,1] . D==eER . . addl %eax, %edx
+# CHECK-NEXT: [1,2] . D==eER . . addl %eax, %ebx
+# CHECK-NEXT: [1,3] . D===eER . . addl %edx, %esi
+# CHECK-NEXT: [1,4] . D==eER . . addl %ebx, %eax
+# CHECK-NEXT: [1,5] . D===eER. . addl %edx, %esi
+# CHECK-NEXT: [1,6] . D===eER. . addl %ebx, %eax
+# CHECK-NEXT: [1,7] . D====eER . addl %ebx, %eax
+# CHECK-NEXT: [2,0] . D====eER . addl %eax, %ecx
+# CHECK-NEXT: [2,1] . D====eER . addl %eax, %edx
+# CHECK-NEXT: [2,2] . D====eER . addl %eax, %ebx
+# CHECK-NEXT: [2,3] . D=====eER . addl %edx, %esi
+# CHECK-NEXT: [2,4] . D====eER . addl %ebx, %eax
+# CHECK-NEXT: [2,5] . D=====eER. addl %edx, %esi
+# CHECK-NEXT: [2,6] . D=====eER. addl %ebx, %eax
+# CHECK-NEXT: [2,7] . D======eER addl %ebx, %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 3.0 0.3 0.0 addl %eax, %ecx
+# CHECK-NEXT: 1. 3 3.0 0.3 0.0 addl %eax, %edx
+# CHECK-NEXT: 2. 3 3.0 0.3 0.0 addl %eax, %ebx
+# CHECK-NEXT: 3. 3 4.0 0.0 0.0 addl %edx, %esi
+# CHECK-NEXT: 4. 3 3.0 0.0 0.0 addl %ebx, %eax
+# CHECK-NEXT: 5. 3 4.0 0.0 0.0 addl %edx, %esi
+# CHECK-NEXT: 6. 3 4.0 0.0 0.0 addl %ebx, %eax
+# CHECK-NEXT: 7. 3 5.0 0.0 0.0 addl %ebx, %eax
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -retire-stats -iterations=1 < %s | FileCheck %s
+
+ vsqrtps %xmm0, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 16
+# CHECK-NEXT: Total Cycles: 20
+# CHECK-NEXT: Total uOps: 16
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.80
+# CHECK-NEXT: IPC: 0.80
+# CHECK-NEXT: Block RThroughput: 15.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 14 14.00 vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+
+# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
+# CHECK-NEXT: [# retired], [# cycles]
+# CHECK-NEXT: 0, 16 (80.0%)
+# CHECK-NEXT: 1, 3 (15.0%)
+# CHECK-NEXT: 13, 1 (5.0%)
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s
+
+# The vmul can start executing 3cy in advance. That is beause the first use
+# operand (i.e. %xmm1) is a ReadAfterLd. That means, the memory operand is
+# evaluated before %xmm1.
+
+vaddps %xmm0, %xmm0, %xmm1
+vmulps (%rdi), %xmm1, %xmm2
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 14
+# CHECK-NEXT: Total uOps: 3
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.21
+# CHECK-NEXT: IPC: 0.14
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 2 11 1.00 * vmulps (%rdi), %xmm1, %xmm2
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [0,1] DeeeeeeeeeeeER vmulps (%rdi), %xmm1, %xmm2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 vmulps (%rdi), %xmm1, %xmm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=0 -timeline < %s | FileCheck %s
+
+ imull %esi
+ imull (%rdi)
+
+# The second integer multiply can start at cycle 2 because the implicit reads
+# can start after the load operand is evaluated.
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 13
+# CHECK-NEXT: Total uOps: 7
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.54
+# CHECK-NEXT: IPC: 0.15
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 3 4 1.00 imull %esi
+# CHECK-NEXT: 4 9 1.00 * imull (%rdi)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . . imull %esi
+# CHECK-NEXT: [0,1] .DeeeeeeeeeER imull (%rdi)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imull %esi
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 imull (%rdi)
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=0 -timeline -dispatch=3 < %s | FileCheck %s
+
+ add %rdi, %rsi
+ add (%rsp), %rsi
+ add %rdx, %r8
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 3
+# CHECK-NEXT: Total Cycles: 9
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.44
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 1.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.33 addq %rdi, %rsi
+# CHECK-NEXT: 2 6 0.50 * addq (%rsp), %rsi
+# CHECK-NEXT: 1 1 0.33 addq %rdx, %r8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeER . . addq %rdi, %rsi
+# CHECK-NEXT: [0,1] DeeeeeeER addq (%rsp), %rsi
+# CHECK-NEXT: [0,2] .DeE----R addq %rdx, %r8
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 addq %rdi, %rsi
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 addq (%rsp), %rsi
+# CHECK-NEXT: 2. 1 1.0 1.0 4.0 addq %rdx, %r8
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=3 -timeline -register-file-stats < %s | FileCheck %s
+
+# The register move from XMM0 to XMM1 can be eliminated at register renaming
+# stage. So, it should not consume pipeline resources.
+
+vxorps %xmm0, %xmm0, %xmm0
+vmovaps %xmm0, %xmm1
+vaddps %xmm1, %xmm1, %xmm2
+
+# CHECK: Iterations: 3
+# CHECK-NEXT: Instructions: 9
+# CHECK-NEXT: Total Cycles: 9
+# CHECK-NEXT: Total uOps: 9
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 0 0.25 vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: 1 1 1.00 vmovaps %xmm0, %xmm1
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm1, %xmm1, %xmm2
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 9
+# CHECK-NEXT: Max number of mappings used: 8
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 1.00 - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - - - - vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: - - - - - 1.00 - - vmovaps %xmm0, %xmm1
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm1, %xmm1, %xmm2
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DR . . vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [0,1] DeER . . vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [0,2] D=eeeER . vaddps %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [1,0] D-----R . vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [1,1] .DeE--R . vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [1,2] .D=eeeER. vaddps %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [2,0] .D-----R. vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [2,1] .D=eE--R. vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [2,2] . D=eeeER vaddps %xmm1, %xmm1, %xmm2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 0.0 0.0 3.3 vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: 1. 3 1.3 1.3 1.3 vmovaps %xmm0, %xmm1
+# CHECK-NEXT: 2. 3 2.0 0.0 0.0 vaddps %xmm1, %xmm1, %xmm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=3 -timeline -register-file-stats < %s | FileCheck %s
+
+pxor %mm0, %mm0
+movq %mm0, %mm1
+
+xorps %xmm0, %xmm0
+movaps %xmm0, %xmm1
+movups %xmm1, %xmm2
+movapd %xmm2, %xmm3
+movupd %xmm3, %xmm4
+movdqa %xmm4, %xmm5
+movdqu %xmm5, %xmm0
+
+# CHECK: Iterations: 3
+# CHECK-NEXT: Instructions: 27
+# CHECK-NEXT: Total Cycles: 22
+# CHECK-NEXT: Total uOps: 27
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.23
+# CHECK-NEXT: IPC: 1.23
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.33 pxor %mm0, %mm0
+# CHECK-NEXT: 1 1 0.50 movq %mm0, %mm1
+# CHECK-NEXT: 1 0 0.25 xorps %xmm0, %xmm0
+# CHECK-NEXT: 1 1 1.00 movaps %xmm0, %xmm1
+# CHECK-NEXT: 1 1 1.00 movups %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 movapd %xmm2, %xmm3
+# CHECK-NEXT: 1 1 1.00 movupd %xmm3, %xmm4
+# CHECK-NEXT: 1 1 0.33 movdqa %xmm4, %xmm5
+# CHECK-NEXT: 1 1 0.33 movdqu %xmm5, %xmm0
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 27
+# CHECK-NEXT: Max number of mappings used: 21
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.67 1.67 - 4.67 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 0.67 - 0.33 - - pxor %mm0, %mm0
+# CHECK-NEXT: - - 1.00 - - - - - movq %mm0, %mm1
+# CHECK-NEXT: - - - - - - - - xorps %xmm0, %xmm0
+# CHECK-NEXT: - - - - - 1.00 - - movaps %xmm0, %xmm1
+# CHECK-NEXT: - - - - - 1.00 - - movups %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - movapd %xmm2, %xmm3
+# CHECK-NEXT: - - - - - 1.00 - - movupd %xmm3, %xmm4
+# CHECK-NEXT: - - - 1.00 - - - - movdqa %xmm4, %xmm5
+# CHECK-NEXT: - - 0.67 - - 0.33 - - movdqu %xmm5, %xmm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeER . . . .. pxor %mm0, %mm0
+# CHECK-NEXT: [0,1] D=eER. . . .. movq %mm0, %mm1
+# CHECK-NEXT: [0,2] D---R. . . .. xorps %xmm0, %xmm0
+# CHECK-NEXT: [0,3] D=eER. . . .. movaps %xmm0, %xmm1
+# CHECK-NEXT: [0,4] .D=eER . . .. movups %xmm1, %xmm2
+# CHECK-NEXT: [0,5] .D==eER . . .. movapd %xmm2, %xmm3
+# CHECK-NEXT: [0,6] .D===eER . . .. movupd %xmm3, %xmm4
+# CHECK-NEXT: [0,7] .D====eER . . .. movdqa %xmm4, %xmm5
+# CHECK-NEXT: [0,8] . D====eER. . .. movdqu %xmm5, %xmm0
+# CHECK-NEXT: [1,0] . DeE----R. . .. pxor %mm0, %mm0
+# CHECK-NEXT: [1,1] . D=eE---R. . .. movq %mm0, %mm1
+# CHECK-NEXT: [1,2] . D=====ER. . .. xorps %xmm0, %xmm0
+# CHECK-NEXT: [1,3] . D====eER . .. movaps %xmm0, %xmm1
+# CHECK-NEXT: [1,4] . D=====eER . .. movups %xmm1, %xmm2
+# CHECK-NEXT: [1,5] . D======eER . .. movapd %xmm2, %xmm3
+# CHECK-NEXT: [1,6] . D=======eER . .. movupd %xmm3, %xmm4
+# CHECK-NEXT: [1,7] . D=======eER. .. movdqa %xmm4, %xmm5
+# CHECK-NEXT: [1,8] . D========eER .. movdqu %xmm5, %xmm0
+# CHECK-NEXT: [2,0] . DeE--------R .. pxor %mm0, %mm0
+# CHECK-NEXT: [2,1] . D=eE-------R .. movq %mm0, %mm1
+# CHECK-NEXT: [2,2] . D========ER .. xorps %xmm0, %xmm0
+# CHECK-NEXT: [2,3] . D========eER .. movaps %xmm0, %xmm1
+# CHECK-NEXT: [2,4] . D=========eER .. movups %xmm1, %xmm2
+# CHECK-NEXT: [2,5] . D==========eER .. movapd %xmm2, %xmm3
+# CHECK-NEXT: [2,6] . .D==========eER.. movupd %xmm3, %xmm4
+# CHECK-NEXT: [2,7] . .D===========eER. movdqa %xmm4, %xmm5
+# CHECK-NEXT: [2,8] . .D============eER movdqu %xmm5, %xmm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 1.0 1.0 4.0 pxor %mm0, %mm0
+# CHECK-NEXT: 1. 3 2.0 0.0 3.3 movq %mm0, %mm1
+# CHECK-NEXT: 2. 3 5.0 0.0 1.0 xorps %xmm0, %xmm0
+# CHECK-NEXT: 3. 3 5.3 0.7 0.0 movaps %xmm0, %xmm1
+# CHECK-NEXT: 4. 3 6.0 0.0 0.0 movups %xmm1, %xmm2
+# CHECK-NEXT: 5. 3 7.0 0.0 0.0 movapd %xmm2, %xmm3
+# CHECK-NEXT: 6. 3 7.7 0.0 0.0 movupd %xmm3, %xmm4
+# CHECK-NEXT: 7. 3 8.3 0.0 0.0 movdqa %xmm4, %xmm5
+# CHECK-NEXT: 8. 3 9.0 0.0 0.0 movdqu %xmm5, %xmm0
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=3 -timeline -register-file-stats < %s | FileCheck %s
+
+vxorps %xmm0, %xmm0, %xmm0
+vmovaps %xmm0, %xmm1
+vmovups %xmm1, %xmm2
+vmovapd %xmm2, %xmm3
+vmovupd %xmm3, %xmm4
+vmovdqa %xmm4, %xmm5
+vmovdqu %xmm5, %xmm0
+
+# CHECK: Iterations: 3
+# CHECK-NEXT: Instructions: 21
+# CHECK-NEXT: Total Cycles: 21
+# CHECK-NEXT: Total uOps: 21
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 0 0.25 vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: 1 1 1.00 vmovaps %xmm0, %xmm1
+# CHECK-NEXT: 1 1 1.00 vmovups %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vmovapd %xmm2, %xmm3
+# CHECK-NEXT: 1 1 1.00 vmovupd %xmm3, %xmm4
+# CHECK-NEXT: 1 1 0.33 vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: 1 1 0.33 vmovdqu %xmm5, %xmm0
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 21
+# CHECK-NEXT: Max number of mappings used: 17
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 1.00 - 4.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - - - - vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: - - - - - 1.00 - - vmovaps %xmm0, %xmm1
+# CHECK-NEXT: - - - - - 1.00 - - vmovups %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovapd %xmm2, %xmm3
+# CHECK-NEXT: - - - - - 1.00 - - vmovupd %xmm3, %xmm4
+# CHECK-NEXT: - - - 1.00 - - - - vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: - - 1.00 - - - - - vmovdqu %xmm5, %xmm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DR . . . . vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [0,1] DeER . . . . vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [0,2] D=eER. . . . vmovups %xmm1, %xmm2
+# CHECK-NEXT: [0,3] D==eER . . . vmovapd %xmm2, %xmm3
+# CHECK-NEXT: [0,4] .D==eER . . . vmovupd %xmm3, %xmm4
+# CHECK-NEXT: [0,5] .D===eER . . . vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: [0,6] .D====eER . . . vmovdqu %xmm5, %xmm0
+# CHECK-NEXT: [1,0] .D=====ER . . . vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [1,1] . D====eER. . . vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [1,2] . D=====eER . . vmovups %xmm1, %xmm2
+# CHECK-NEXT: [1,3] . D======eER . . vmovapd %xmm2, %xmm3
+# CHECK-NEXT: [1,4] . D=======eER . . vmovupd %xmm3, %xmm4
+# CHECK-NEXT: [1,5] . D=======eER . . vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: [1,6] . D========eER. . vmovdqu %xmm5, %xmm0
+# CHECK-NEXT: [2,0] . D=========ER. . vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [2,1] . D=========eER . vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [2,2] . D=========eER . vmovups %xmm1, %xmm2
+# CHECK-NEXT: [2,3] . D==========eER . vmovapd %xmm2, %xmm3
+# CHECK-NEXT: [2,4] . D===========eER . vmovupd %xmm3, %xmm4
+# CHECK-NEXT: [2,5] . D============eER. vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: [2,6] . D============eER vmovdqu %xmm5, %xmm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 5.3 0.0 0.0 vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: 1. 3 5.3 0.3 0.0 vmovaps %xmm0, %xmm1
+# CHECK-NEXT: 2. 3 6.0 0.0 0.0 vmovups %xmm1, %xmm2
+# CHECK-NEXT: 3. 3 7.0 0.0 0.0 vmovapd %xmm2, %xmm3
+# CHECK-NEXT: 4. 3 7.7 0.0 0.0 vmovupd %xmm3, %xmm4
+# CHECK-NEXT: 5. 3 8.3 0.0 0.0 vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: 6. 3 9.0 0.0 0.0 vmovdqu %xmm5, %xmm0
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=3 -timeline -register-file-stats < %s | FileCheck %s
+
+xor %eax, %eax
+mov %eax, %ebx
+mov %ebx, %ecx
+mov %ecx, %edx
+mov %edx, %eax
+
+# CHECK: Iterations: 3
+# CHECK-NEXT: Instructions: 15
+# CHECK-NEXT: Total Cycles: 15
+# CHECK-NEXT: Total uOps: 15
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 1.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 0 0.25 xorl %eax, %eax
+# CHECK-NEXT: 1 1 0.33 movl %eax, %ebx
+# CHECK-NEXT: 1 1 0.33 movl %ebx, %ecx
+# CHECK-NEXT: 1 1 0.33 movl %ecx, %edx
+# CHECK-NEXT: 1 1 0.33 movl %edx, %eax
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 18
+# CHECK-NEXT: Max number of mappings used: 15
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.33 1.33 - 1.33 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - - - - xorl %eax, %eax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movl %eax, %ebx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movl %ebx, %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movl %ecx, %edx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movl %edx, %eax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DR . . . xorl %eax, %eax
+# CHECK-NEXT: [0,1] DeER . . . movl %eax, %ebx
+# CHECK-NEXT: [0,2] D=eER. . . movl %ebx, %ecx
+# CHECK-NEXT: [0,3] D==eER . . movl %ecx, %edx
+# CHECK-NEXT: [0,4] .D==eER . . movl %edx, %eax
+# CHECK-NEXT: [1,0] .D===ER . . xorl %eax, %eax
+# CHECK-NEXT: [1,1] .D===eER . . movl %eax, %ebx
+# CHECK-NEXT: [1,2] .D====eER . . movl %ebx, %ecx
+# CHECK-NEXT: [1,3] . D====eER. . movl %ecx, %edx
+# CHECK-NEXT: [1,4] . D=====eER . movl %edx, %eax
+# CHECK-NEXT: [2,0] . D======ER . xorl %eax, %eax
+# CHECK-NEXT: [2,1] . D======eER . movl %eax, %ebx
+# CHECK-NEXT: [2,2] . D======eER . movl %ebx, %ecx
+# CHECK-NEXT: [2,3] . D=======eER. movl %ecx, %edx
+# CHECK-NEXT: [2,4] . D========eER movl %edx, %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 3.7 0.0 0.0 xorl %eax, %eax
+# CHECK-NEXT: 1. 3 4.0 0.3 0.0 movl %eax, %ebx
+# CHECK-NEXT: 2. 3 4.7 0.0 0.0 movl %ebx, %ecx
+# CHECK-NEXT: 3. 3 5.3 0.0 0.0 movl %ecx, %edx
+# CHECK-NEXT: 4. 3 6.0 0.0 0.0 movl %edx, %eax
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=3 -timeline -register-file-stats < %s | FileCheck %s
+
+xor %rax, %rax
+mov %rax, %rbx
+mov %rbx, %rcx
+mov %rcx, %rdx
+mov %rdx, %rax
+
+# CHECK: Iterations: 3
+# CHECK-NEXT: Instructions: 15
+# CHECK-NEXT: Total Cycles: 15
+# CHECK-NEXT: Total uOps: 15
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 1.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 0 0.25 xorq %rax, %rax
+# CHECK-NEXT: 1 1 0.33 movq %rax, %rbx
+# CHECK-NEXT: 1 1 0.33 movq %rbx, %rcx
+# CHECK-NEXT: 1 1 0.33 movq %rcx, %rdx
+# CHECK-NEXT: 1 1 0.33 movq %rdx, %rax
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 18
+# CHECK-NEXT: Max number of mappings used: 15
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.33 1.33 - 1.33 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - - - - xorq %rax, %rax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq %rax, %rbx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq %rbx, %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq %rcx, %rdx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq %rdx, %rax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DR . . . xorq %rax, %rax
+# CHECK-NEXT: [0,1] DeER . . . movq %rax, %rbx
+# CHECK-NEXT: [0,2] D=eER. . . movq %rbx, %rcx
+# CHECK-NEXT: [0,3] D==eER . . movq %rcx, %rdx
+# CHECK-NEXT: [0,4] .D==eER . . movq %rdx, %rax
+# CHECK-NEXT: [1,0] .D===ER . . xorq %rax, %rax
+# CHECK-NEXT: [1,1] .D===eER . . movq %rax, %rbx
+# CHECK-NEXT: [1,2] .D====eER . . movq %rbx, %rcx
+# CHECK-NEXT: [1,3] . D====eER. . movq %rcx, %rdx
+# CHECK-NEXT: [1,4] . D=====eER . movq %rdx, %rax
+# CHECK-NEXT: [2,0] . D======ER . xorq %rax, %rax
+# CHECK-NEXT: [2,1] . D======eER . movq %rax, %rbx
+# CHECK-NEXT: [2,2] . D======eER . movq %rbx, %rcx
+# CHECK-NEXT: [2,3] . D=======eER. movq %rcx, %rdx
+# CHECK-NEXT: [2,4] . D========eER movq %rdx, %rax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 3.7 0.0 0.0 xorq %rax, %rax
+# CHECK-NEXT: 1. 3 4.0 0.3 0.0 movq %rax, %rbx
+# CHECK-NEXT: 2. 3 4.7 0.0 0.0 movq %rbx, %rcx
+# CHECK-NEXT: 3. 3 5.3 0.0 0.0 movq %rcx, %rdx
+# CHECK-NEXT: 4. 3 6.0 0.0 0.0 movq %rdx, %rax
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=5 -instruction-info=false -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s
+
+vaddps %xmm0, %xmm0, %xmm0
+vmulps %xmm0, %xmm0, %xmm0
+
+# CHECK: Iterations: 5
+# CHECK-NEXT: Instructions: 10
+# CHECK-NEXT: Total Cycles: 43
+# CHECK-NEXT: Total uOps: 10
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.23
+# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT - Register unavailable: 0
+# CHECK-NEXT: RCU - Retire tokens unavailable: 0
+# CHECK-NEXT: SCHEDQ - Scheduler full: 0
+# CHECK-NEXT: LQ - Load queue full: 0
+# CHECK-NEXT: SQ - Store queue full: 0
+# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
+
+# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+# CHECK-NEXT: [# dispatched], [# cycles]
+# CHECK-NEXT: 0, 40 (93.0%)
+# CHECK-NEXT: 2, 1 (2.3%)
+# CHECK-NEXT: 4, 2 (4.7%)
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 10
+# CHECK-NEXT: Max number of mappings used: 10
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 1.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: - - 1.00 - - - - - vmulps %xmm0, %xmm0, %xmm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeER . . . . . . . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [0,1] D===eeeeeER . . . . . . . vmulps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [1,0] D========eeeER . . . . . . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [1,1] D===========eeeeeER . . . . . . vmulps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [2,0] .D===============eeeER . . . . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [2,1] .D==================eeeeeER . . . . vmulps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [3,0] .D=======================eeeER. . . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [3,1] .D==========================eeeeeER. . . vmulps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [4,0] . D==============================eeeER . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [4,1] . D=================================eeeeeER vmulps %xmm0, %xmm0, %xmm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 5 16.2 0.2 0.0 vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: 1. 5 19.2 0.0 0.0 vmulps %xmm0, %xmm0, %xmm0
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -register-file-size=5 -iterations=5 -instruction-info=false -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s
+
+vaddps %xmm0, %xmm0, %xmm0
+vmulps %xmm0, %xmm0, %xmm0
+
+# CHECK: Iterations: 5
+# CHECK-NEXT: Instructions: 10
+# CHECK-NEXT: Total Cycles: 43
+# CHECK-NEXT: Total uOps: 10
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.23
+# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT - Register unavailable: 20 (46.5%)
+# CHECK-NEXT: RCU - Retire tokens unavailable: 0
+# CHECK-NEXT: SCHEDQ - Scheduler full: 0
+# CHECK-NEXT: LQ - Load queue full: 0
+# CHECK-NEXT: SQ - Store queue full: 0
+# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
+
+# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+# CHECK-NEXT: [# dispatched], [# cycles]
+# CHECK-NEXT: 0, 36 (83.7%)
+# CHECK-NEXT: 1, 6 (14.0%)
+# CHECK-NEXT: 4, 1 (2.3%)
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 10
+# CHECK-NEXT: Max number of mappings used: 5
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 1.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: - - 1.00 - - - - - vmulps %xmm0, %xmm0, %xmm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeER . . . . . . . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [0,1] D===eeeeeER . . . . . . . vmulps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [1,0] D========eeeER . . . . . . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [1,1] D===========eeeeeER . . . . . . vmulps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [2,0] .D===============eeeER . . . . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [2,1] . D==============eeeeeER . . . . vmulps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [3,0] . . D==============eeeER. . . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [3,1] . . . D==============eeeeeER. . . vmulps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [4,0] . . . . D==============eeeER . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [4,1] . . . . .D==============eeeeeER vmulps %xmm0, %xmm0, %xmm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 5 11.2 0.2 0.0 vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: 1. 5 12.2 0.0 0.0 vmulps %xmm0, %xmm0, %xmm0
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -register-file-size=5 -iterations=2 -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s
+
+idiv %eax
+
+# CHECK: Iterations: 2
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 55
+# CHECK-NEXT: Total uOps: 2
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.04
+# CHECK-NEXT: IPC: 0.04
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 25 10.00 U idivl %eax
+
+# CHECK: Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT - Register unavailable: 27 (49.1%)
+# CHECK-NEXT: RCU - Retire tokens unavailable: 0
+# CHECK-NEXT: SCHEDQ - Scheduler full: 0
+# CHECK-NEXT: LQ - Load queue full: 0
+# CHECK-NEXT: SQ - Store queue full: 0
+# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
+
+# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+# CHECK-NEXT: [# dispatched], [# cycles]
+# CHECK-NEXT: 0, 53 (96.4%)
+# CHECK-NEXT: 1, 2 (3.6%)
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 6
+# CHECK-NEXT: Max number of mappings used: 3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: 10.00 - 1.00 - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: 10.00 - 1.00 - - - - - idivl %eax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 01234
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . idivl %eax
+# CHECK-NEXT: [1,0] . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeER idivl %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 1.0 1.0 0.0 idivl %eax
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=22 -dispatch-stats -register-file-stats -resource-pressure=false -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+idiv %eax
+
+# CHECK: Iterations: 22
+# CHECK-NEXT: Instructions: 22
+# CHECK-NEXT: Total Cycles: 553
+# CHECK-NEXT: Total uOps: 22
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.04
+# CHECK-NEXT: IPC: 0.04
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 25 10.00 U idivl %eax
+
+# CHECK: Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT - Register unavailable: 0
+# CHECK-NEXT: RCU - Retire tokens unavailable: 0
+# CHECK-NEXT: SCHEDQ - Scheduler full: 0
+# CHECK-NEXT: LQ - Load queue full: 0
+# CHECK-NEXT: SQ - Store queue full: 0
+# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
+
+# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+# CHECK-NEXT: [# dispatched], [# cycles]
+# CHECK-NEXT: 0, 547 (98.9%)
+# CHECK-NEXT: 2, 1 (0.2%)
+# CHECK-NEXT: 4, 5 (0.9%)
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 66
+# CHECK-NEXT: Max number of mappings used: 66
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . . . idivl %eax
+# CHECK-NEXT: [1,0] D=========================eeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . idivl %eax
+# CHECK-NEXT: [2,0] D==================================================eeeeeeeeeeeeeeeeeeeeeeeeeER idivl %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 26.0 0.3 0.0 idivl %eax
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=false -instruction-info=false -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s
+
+ vdivps %ymm0, %ymm0, %ymm1
+ vaddps %ymm0, %ymm0, %ymm2
+ vaddps %ymm0, %ymm0, %ymm3
+ vaddps %ymm0, %ymm0, %ymm4
+ vaddps %ymm0, %ymm0, %ymm5
+ vaddps %ymm0, %ymm0, %ymm6
+ vaddps %ymm0, %ymm0, %ymm7
+ vaddps %ymm0, %ymm0, %ymm8
+ vaddps %ymm0, %ymm0, %ymm9
+ vaddps %ymm0, %ymm0, %ymm10
+ vaddps %ymm0, %ymm0, %ymm11
+ vaddps %ymm0, %ymm0, %ymm12
+ vaddps %ymm0, %ymm0, %ymm13
+ vaddps %ymm0, %ymm0, %ymm14
+ vaddps %ymm0, %ymm0, %ymm15
+ vaddps %ymm2, %ymm0, %ymm0
+ vaddps %ymm2, %ymm0, %ymm3
+ vaddps %ymm2, %ymm0, %ymm4
+ vaddps %ymm2, %ymm0, %ymm5
+ vaddps %ymm2, %ymm0, %ymm6
+ vaddps %ymm2, %ymm0, %ymm7
+ vaddps %ymm2, %ymm0, %ymm8
+ vaddps %ymm2, %ymm0, %ymm9
+ vaddps %ymm2, %ymm0, %ymm10
+ vaddps %ymm2, %ymm0, %ymm11
+ vaddps %ymm2, %ymm0, %ymm12
+ vaddps %ymm2, %ymm0, %ymm13
+ vaddps %ymm2, %ymm0, %ymm14
+ vaddps %ymm2, %ymm0, %ymm15
+ vaddps %ymm3, %ymm0, %ymm2
+ vaddps %ymm3, %ymm0, %ymm4
+ vaddps %ymm3, %ymm0, %ymm5
+ vaddps %ymm3, %ymm0, %ymm6
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 33
+# CHECK-NEXT: Total Cycles: 37
+# CHECK-NEXT: Total uOps: 35
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.95
+# CHECK-NEXT: IPC: 0.89
+# CHECK-NEXT: Block RThroughput: 32.0
+
+# CHECK: Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT - Register unavailable: 0
+# CHECK-NEXT: RCU - Retire tokens unavailable: 0
+# CHECK-NEXT: SCHEDQ - Scheduler full: 0
+# CHECK-NEXT: LQ - Load queue full: 0
+# CHECK-NEXT: SQ - Store queue full: 0
+# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
+
+# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+# CHECK-NEXT: [# dispatched], [# cycles]
+# CHECK-NEXT: 0, 28 (75.7%)
+# CHECK-NEXT: 3, 1 (2.7%)
+# CHECK-NEXT: 4, 8 (21.6%)
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 33
+# CHECK-NEXT: Max number of mappings used: 33
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER .. vdivps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [0,1] DeeeE--------------------------R .. vaddps %ymm0, %ymm0, %ymm2
+# CHECK-NEXT: [0,2] .DeeeE-------------------------R .. vaddps %ymm0, %ymm0, %ymm3
+# CHECK-NEXT: [0,3] .D=eeeE------------------------R .. vaddps %ymm0, %ymm0, %ymm4
+# CHECK-NEXT: [0,4] .D==eeeE-----------------------R .. vaddps %ymm0, %ymm0, %ymm5
+# CHECK-NEXT: [0,5] .D===eeeE----------------------R .. vaddps %ymm0, %ymm0, %ymm6
+# CHECK-NEXT: [0,6] . D===eeeE---------------------R .. vaddps %ymm0, %ymm0, %ymm7
+# CHECK-NEXT: [0,7] . D=====eeeE-------------------R .. vaddps %ymm0, %ymm0, %ymm8
+# CHECK-NEXT: [0,8] . D======eeeE------------------R .. vaddps %ymm0, %ymm0, %ymm9
+# CHECK-NEXT: [0,9] . D=======eeeE-----------------R .. vaddps %ymm0, %ymm0, %ymm10
+# CHECK-NEXT: [0,10] . D=======eeeE----------------R .. vaddps %ymm0, %ymm0, %ymm11
+# CHECK-NEXT: [0,11] . D========eeeE---------------R .. vaddps %ymm0, %ymm0, %ymm12
+# CHECK-NEXT: [0,12] . D=========eeeE--------------R .. vaddps %ymm0, %ymm0, %ymm13
+# CHECK-NEXT: [0,13] . D===========eeeE------------R .. vaddps %ymm0, %ymm0, %ymm14
+# CHECK-NEXT: [0,14] . D===========eeeE-----------R .. vaddps %ymm0, %ymm0, %ymm15
+# CHECK-NEXT: [0,15] . D==eeeE--------------------R .. vaddps %ymm2, %ymm0, %ymm0
+# CHECK-NEXT: [0,16] . D=========eeeE-------------R .. vaddps %ymm2, %ymm0, %ymm3
+# CHECK-NEXT: [0,17] . D============eeeE----------R .. vaddps %ymm2, %ymm0, %ymm4
+# CHECK-NEXT: [0,18] . D============eeeE---------R .. vaddps %ymm2, %ymm0, %ymm5
+# CHECK-NEXT: [0,19] . D=============eeeE--------R .. vaddps %ymm2, %ymm0, %ymm6
+# CHECK-NEXT: [0,20] . D==============eeeE-------R .. vaddps %ymm2, %ymm0, %ymm7
+# CHECK-NEXT: [0,21] . D===============eeeE------R .. vaddps %ymm2, %ymm0, %ymm8
+# CHECK-NEXT: [0,22] . .D===============eeeE-----R .. vaddps %ymm2, %ymm0, %ymm9
+# CHECK-NEXT: [0,23] . .D================eeeE----R .. vaddps %ymm2, %ymm0, %ymm10
+# CHECK-NEXT: [0,24] . .D=================eeeE---R .. vaddps %ymm2, %ymm0, %ymm11
+# CHECK-NEXT: [0,25] . .D==================eeeE--R .. vaddps %ymm2, %ymm0, %ymm12
+# CHECK-NEXT: [0,26] . . D==================eeeE-R .. vaddps %ymm2, %ymm0, %ymm13
+# CHECK-NEXT: [0,27] . . D===================eeeER .. vaddps %ymm2, %ymm0, %ymm14
+# CHECK-NEXT: [0,28] . . D====================eeeER .. vaddps %ymm2, %ymm0, %ymm15
+# CHECK-NEXT: [0,29] . . D=====================eeeER .. vaddps %ymm3, %ymm0, %ymm2
+# CHECK-NEXT: [0,30] . . D=====================eeeER.. vaddps %ymm3, %ymm0, %ymm4
+# CHECK-NEXT: [0,31] . . D======================eeeER. vaddps %ymm3, %ymm0, %ymm5
+# CHECK-NEXT: [0,32] . . D=======================eeeER vaddps %ymm3, %ymm0, %ymm6
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vdivps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: 1. 1 1.0 1.0 26.0 vaddps %ymm0, %ymm0, %ymm2
+# CHECK-NEXT: 2. 1 1.0 1.0 25.0 vaddps %ymm0, %ymm0, %ymm3
+# CHECK-NEXT: 3. 1 2.0 2.0 24.0 vaddps %ymm0, %ymm0, %ymm4
+# CHECK-NEXT: 4. 1 3.0 3.0 23.0 vaddps %ymm0, %ymm0, %ymm5
+# CHECK-NEXT: 5. 1 4.0 4.0 22.0 vaddps %ymm0, %ymm0, %ymm6
+# CHECK-NEXT: 6. 1 4.0 4.0 21.0 vaddps %ymm0, %ymm0, %ymm7
+# CHECK-NEXT: 7. 1 6.0 6.0 19.0 vaddps %ymm0, %ymm0, %ymm8
+# CHECK-NEXT: 8. 1 7.0 7.0 18.0 vaddps %ymm0, %ymm0, %ymm9
+# CHECK-NEXT: 9. 1 8.0 8.0 17.0 vaddps %ymm0, %ymm0, %ymm10
+# CHECK-NEXT: 10. 1 8.0 8.0 16.0 vaddps %ymm0, %ymm0, %ymm11
+# CHECK-NEXT: 11. 1 9.0 9.0 15.0 vaddps %ymm0, %ymm0, %ymm12
+# CHECK-NEXT: 12. 1 10.0 10.0 14.0 vaddps %ymm0, %ymm0, %ymm13
+# CHECK-NEXT: 13. 1 12.0 12.0 12.0 vaddps %ymm0, %ymm0, %ymm14
+# CHECK-NEXT: 14. 1 12.0 12.0 11.0 vaddps %ymm0, %ymm0, %ymm15
+# CHECK-NEXT: 15. 1 3.0 3.0 20.0 vaddps %ymm2, %ymm0, %ymm0
+# CHECK-NEXT: 16. 1 10.0 4.0 13.0 vaddps %ymm2, %ymm0, %ymm3
+# CHECK-NEXT: 17. 1 13.0 7.0 10.0 vaddps %ymm2, %ymm0, %ymm4
+# CHECK-NEXT: 18. 1 13.0 8.0 9.0 vaddps %ymm2, %ymm0, %ymm5
+# CHECK-NEXT: 19. 1 14.0 9.0 8.0 vaddps %ymm2, %ymm0, %ymm6
+# CHECK-NEXT: 20. 1 15.0 10.0 7.0 vaddps %ymm2, %ymm0, %ymm7
+# CHECK-NEXT: 21. 1 16.0 11.0 6.0 vaddps %ymm2, %ymm0, %ymm8
+# CHECK-NEXT: 22. 1 16.0 12.0 5.0 vaddps %ymm2, %ymm0, %ymm9
+# CHECK-NEXT: 23. 1 17.0 13.0 4.0 vaddps %ymm2, %ymm0, %ymm10
+# CHECK-NEXT: 24. 1 18.0 14.0 3.0 vaddps %ymm2, %ymm0, %ymm11
+# CHECK-NEXT: 25. 1 19.0 15.0 2.0 vaddps %ymm2, %ymm0, %ymm12
+# CHECK-NEXT: 26. 1 19.0 16.0 1.0 vaddps %ymm2, %ymm0, %ymm13
+# CHECK-NEXT: 27. 1 20.0 17.0 0.0 vaddps %ymm2, %ymm0, %ymm14
+# CHECK-NEXT: 28. 1 21.0 18.0 0.0 vaddps %ymm2, %ymm0, %ymm15
+# CHECK-NEXT: 29. 1 22.0 12.0 0.0 vaddps %ymm3, %ymm0, %ymm2
+# CHECK-NEXT: 30. 1 22.0 13.0 0.0 vaddps %ymm3, %ymm0, %ymm4
+# CHECK-NEXT: 31. 1 23.0 14.0 0.0 vaddps %ymm3, %ymm0, %ymm5
+# CHECK-NEXT: 32. 1 24.0 15.0 0.0 vaddps %ymm3, %ymm0, %ymm6
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+femms
+
+pavgusb %mm0, %mm2
+pavgusb (%rax), %mm2
+
+pf2id %mm0, %mm2
+pf2id (%rax), %mm2
+
+pf2iw %mm0, %mm2
+pf2iw (%rax), %mm2
+
+pfacc %mm0, %mm2
+pfacc (%rax), %mm2
+
+pfadd %mm0, %mm2
+pfadd (%rax), %mm2
+
+pfcmpeq %mm0, %mm2
+pfcmpeq (%rax), %mm2
+
+pfcmpge %mm0, %mm2
+pfcmpge (%rax), %mm2
+
+pfcmpgt %mm0, %mm2
+pfcmpgt (%rax), %mm2
+
+pfmax %mm0, %mm2
+pfmax (%rax), %mm2
+
+pfmin %mm0, %mm2
+pfmin (%rax), %mm2
+
+pfmul %mm0, %mm2
+pfmul (%rax), %mm2
+
+pfnacc %mm0, %mm2
+pfnacc (%rax), %mm2
+
+pfpnacc %mm0, %mm2
+pfpnacc (%rax), %mm2
+
+pfrcp %mm0, %mm2
+pfrcp (%rax), %mm2
+
+pfrcpit1 %mm0, %mm2
+pfrcpit1 (%rax), %mm2
+
+pfrcpit2 %mm0, %mm2
+pfrcpit2 (%rax), %mm2
+
+pfrsqit1 %mm0, %mm2
+pfrsqit1 (%rax), %mm2
+
+pfrsqrt %mm0, %mm2
+pfrsqrt (%rax), %mm2
+
+pfsub %mm0, %mm2
+pfsub (%rax), %mm2
+
+pfsubr %mm0, %mm2
+pfsubr (%rax), %mm2
+
+pi2fd %mm0, %mm2
+pi2fd (%rax), %mm2
+
+pi2fw %mm0, %mm2
+pi2fw (%rax), %mm2
+
+pmulhrw %mm0, %mm2
+pmulhrw (%rax), %mm2
+
+prefetch (%rax)
+prefetchw (%rax)
+
+pswapd %mm0, %mm2
+pswapd (%rax), %mm2
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 31 31 10.33 * * U femms
+# CHECK-NEXT: 1 3 1.00 pavgusb %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * pavgusb (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pf2id %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pf2id (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pf2iw %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pf2iw (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfacc %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfacc (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfadd %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfadd (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfcmpeq %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfcmpeq (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfcmpge %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfcmpge (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfcmpgt %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfcmpgt (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfmax %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfmax (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfmin %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfmin (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfmul %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfmul (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfnacc %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfnacc (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfpnacc %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfpnacc (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfrcp %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfrcp (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfrcpit1 %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfrcpit1 (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfrcpit2 %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfrcpit2 (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfrsqit1 %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfrsqit1 (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfrsqrt %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfrsqrt (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfsub %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfsub (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfsubr %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfsubr (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pi2fd %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pi2fd (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pi2fw %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pi2fw (%rax), %mm2
+# CHECK-NEXT: 1 5 1.00 pmulhrw %mm0, %mm2
+# CHECK-NEXT: 2 10 1.00 * pmulhrw (%rax), %mm2
+# CHECK-NEXT: 1 5 0.50 * * prefetch (%rax)
+# CHECK-NEXT: 1 5 0.50 * * prefetchw (%rax)
+# CHECK-NEXT: 1 1 1.00 pswapd %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * pswapd (%rax), %mm2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 12.33 54.33 - 12.33 13.00 13.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 10.33 10.33 - 10.33 - - femms
+# CHECK-NEXT: - - - 1.00 - - - - pavgusb %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pavgusb (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pf2id %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pf2id (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pf2iw %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pf2iw (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfacc %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfacc (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfadd %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfadd (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfcmpeq %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfcmpeq (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfcmpge %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfcmpge (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfcmpgt %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfcmpgt (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfmax %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfmax (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfmin %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfmin (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfmul %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfmul (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfnacc %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfnacc (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfpnacc %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfpnacc (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfrcp %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfrcp (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfrcpit1 %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfrcpit1 (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfrcpit2 %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfrcpit2 (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfrsqit1 %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfrsqit1 (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfrsqrt %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfrsqrt (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfsub %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfsub (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfsubr %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfsubr (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pi2fd %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pi2fd (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pi2fw %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pi2fw (%rax), %mm2
+# CHECK-NEXT: - - 1.00 - - - - - pmulhrw %mm0, %mm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmulhrw (%rax), %mm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 prefetch (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 prefetchw (%rax)
+# CHECK-NEXT: - - - - - 1.00 - - pswapd %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 pswapd (%rax), %mm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+adcx %ebx, %ecx
+adcx (%rbx), %ecx
+adcx %rbx, %rcx
+adcx (%rbx), %rcx
+
+adox %ebx, %ecx
+adox (%rbx), %ecx
+adox %rbx, %rcx
+adox (%rbx), %rcx
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 2 0.67 adcxl %ebx, %ecx
+# CHECK-NEXT: 3 7 0.67 * adcxl (%rbx), %ecx
+# CHECK-NEXT: 2 2 0.67 adcxq %rbx, %rcx
+# CHECK-NEXT: 3 7 0.67 * adcxq (%rbx), %rcx
+# CHECK-NEXT: 2 2 0.67 adoxl %ebx, %ecx
+# CHECK-NEXT: 3 7 0.67 * adoxl (%rbx), %ecx
+# CHECK-NEXT: 2 2 0.67 adoxq %rbx, %rcx
+# CHECK-NEXT: 3 7 0.67 * adoxq (%rbx), %rcx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 6.67 2.67 - 6.67 2.00 2.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcxl %ebx, %ecx
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 adcxl (%rbx), %ecx
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcxq %rbx, %rcx
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 adcxq (%rbx), %rcx
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adoxl %ebx, %ecx
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 adoxl (%rbx), %ecx
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adoxq %rbx, %rcx
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 adoxq (%rbx), %rcx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+aesdec %xmm0, %xmm2
+aesdec (%rax), %xmm2
+
+aesdeclast %xmm0, %xmm2
+aesdeclast (%rax), %xmm2
+
+aesenc %xmm0, %xmm2
+aesenc (%rax), %xmm2
+
+aesenclast %xmm0, %xmm2
+aesenclast (%rax), %xmm2
+
+aesimc %xmm0, %xmm2
+aesimc (%rax), %xmm2
+
+aeskeygenassist $22, %xmm0, %xmm2
+aeskeygenassist $22, (%rax), %xmm2
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 7 1.00 aesdec %xmm0, %xmm2
+# CHECK-NEXT: 3 13 1.00 * aesdec (%rax), %xmm2
+# CHECK-NEXT: 2 7 1.00 aesdeclast %xmm0, %xmm2
+# CHECK-NEXT: 3 13 1.00 * aesdeclast (%rax), %xmm2
+# CHECK-NEXT: 2 7 1.00 aesenc %xmm0, %xmm2
+# CHECK-NEXT: 3 13 1.00 * aesenc (%rax), %xmm2
+# CHECK-NEXT: 2 7 1.00 aesenclast %xmm0, %xmm2
+# CHECK-NEXT: 3 13 1.00 * aesenclast (%rax), %xmm2
+# CHECK-NEXT: 2 12 2.00 aesimc %xmm0, %xmm2
+# CHECK-NEXT: 3 18 2.00 * aesimc (%rax), %xmm2
+# CHECK-NEXT: 1 8 3.67 aeskeygenassist $22, %xmm0, %xmm2
+# CHECK-NEXT: 1 8 3.33 * aeskeygenassist $22, (%rax), %xmm2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 9.67 9.67 - 21.67 3.00 3.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - aesdec %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 0.50 0.50 aesdec (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - aesdeclast %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 0.50 0.50 aesdeclast (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - aesenc %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 0.50 0.50 aesenc (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - aesenclast %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 0.50 0.50 aesenclast (%rax), %xmm2
+# CHECK-NEXT: - - - - - 2.00 - - aesimc %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 2.00 0.50 0.50 aesimc (%rax), %xmm2
+# CHECK-NEXT: - - 3.67 3.67 - 3.67 - - aeskeygenassist $22, %xmm0, %xmm2
+# CHECK-NEXT: - - 3.33 3.33 - 3.33 0.50 0.50 aeskeygenassist $22, (%rax), %xmm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+vaddpd %xmm0, %xmm1, %xmm2
+vaddpd (%rax), %xmm1, %xmm2
+
+vaddpd %ymm0, %ymm1, %ymm2
+vaddpd (%rax), %ymm1, %ymm2
+
+vaddps %xmm0, %xmm1, %xmm2
+vaddps (%rax), %xmm1, %xmm2
+
+vaddps %ymm0, %ymm1, %ymm2
+vaddps (%rax), %ymm1, %ymm2
+
+vaddsd %xmm0, %xmm1, %xmm2
+vaddsd (%rax), %xmm1, %xmm2
+
+vaddss %xmm0, %xmm1, %xmm2
+vaddss (%rax), %xmm1, %xmm2
+
+vaddsubpd %xmm0, %xmm1, %xmm2
+vaddsubpd (%rax), %xmm1, %xmm2
+
+vaddsubpd %ymm0, %ymm1, %ymm2
+vaddsubpd (%rax), %ymm1, %ymm2
+
+vaddsubps %xmm0, %xmm1, %xmm2
+vaddsubps (%rax), %xmm1, %xmm2
+
+vaddsubps %ymm0, %ymm1, %ymm2
+vaddsubps (%rax), %ymm1, %ymm2
+
+vaesdec %xmm0, %xmm1, %xmm2
+vaesdec (%rax), %xmm1, %xmm2
+
+vaesdeclast %xmm0, %xmm1, %xmm2
+vaesdeclast (%rax), %xmm1, %xmm2
+
+vaesenc %xmm0, %xmm1, %xmm2
+vaesenc (%rax), %xmm1, %xmm2
+
+vaesenclast %xmm0, %xmm1, %xmm2
+vaesenclast (%rax), %xmm1, %xmm2
+
+vaesimc %xmm0, %xmm2
+vaesimc (%rax), %xmm2
+
+vaeskeygenassist $22, %xmm0, %xmm2
+vaeskeygenassist $22, (%rax), %xmm2
+
+vandnpd %xmm0, %xmm1, %xmm2
+vandnpd (%rax), %xmm1, %xmm2
+
+vandnpd %ymm0, %ymm1, %ymm2
+vandnpd (%rax), %ymm1, %ymm2
+
+vandnps %xmm0, %xmm1, %xmm2
+vandnps (%rax), %xmm1, %xmm2
+
+vandnps %ymm0, %ymm1, %ymm2
+vandnps (%rax), %ymm1, %ymm2
+
+vandpd %xmm0, %xmm1, %xmm2
+vandpd (%rax), %xmm1, %xmm2
+
+vandpd %ymm0, %ymm1, %ymm2
+vandpd (%rax), %ymm1, %ymm2
+
+vandps %xmm0, %xmm1, %xmm2
+vandps (%rax), %xmm1, %xmm2
+
+vandps %ymm0, %ymm1, %ymm2
+vandps (%rax), %ymm1, %ymm2
+
+vblendpd $11, %xmm0, %xmm1, %xmm2
+vblendpd $11, (%rax), %xmm1, %xmm2
+
+vblendpd $11, %ymm0, %ymm1, %ymm2
+vblendpd $11, (%rax), %ymm1, %ymm2
+
+vblendps $11, %xmm0, %xmm1, %xmm2
+vblendps $11, (%rax), %xmm1, %xmm2
+
+vblendps $11, %ymm0, %ymm1, %ymm2
+vblendps $11, (%rax), %ymm1, %ymm2
+
+vblendvpd %xmm3, %xmm0, %xmm1, %xmm2
+vblendvpd %xmm3, (%rax), %xmm1, %xmm2
+
+vblendvpd %ymm3, %ymm0, %ymm1, %ymm2
+vblendvpd %ymm3, (%rax), %ymm1, %ymm2
+
+vblendvps %xmm3, %xmm0, %xmm1, %xmm2
+vblendvps %xmm3, (%rax), %xmm1, %xmm2
+
+vblendvps %ymm3, %ymm0, %ymm1, %ymm2
+vblendvps %ymm3, (%rax), %ymm1, %ymm2
+
+vbroadcastf128 (%rax), %ymm2
+
+vbroadcastsd (%rax), %ymm2
+
+vbroadcastss (%rax), %xmm2
+vbroadcastss (%rax), %ymm2
+
+vcmppd $0, %xmm0, %xmm1, %xmm2
+vcmppd $0, (%rax), %xmm1, %xmm2
+
+vcmppd $0, %ymm0, %ymm1, %ymm2
+vcmppd $0, (%rax), %ymm1, %ymm2
+
+vcmpps $0, %xmm0, %xmm1, %xmm2
+vcmpps $0, (%rax), %xmm1, %xmm2
+
+vcmpps $0, %ymm0, %ymm1, %ymm2
+vcmpps $0, (%rax), %ymm1, %ymm2
+
+vcmpsd $0, %xmm0, %xmm1, %xmm2
+vcmpsd $0, (%rax), %xmm1, %xmm2
+
+vcmpss $0, %xmm0, %xmm1, %xmm2
+vcmpss $0, (%rax), %xmm1, %xmm2
+
+vcomisd %xmm0, %xmm1
+vcomisd (%rax), %xmm1
+
+vcomiss %xmm0, %xmm1
+vcomiss (%rax), %xmm1
+
+vcvtdq2pd %xmm0, %xmm2
+vcvtdq2pd (%rax), %xmm2
+
+vcvtdq2pd %xmm0, %ymm2
+vcvtdq2pd (%rax), %ymm2
+
+vcvtdq2ps %xmm0, %xmm2
+vcvtdq2ps (%rax), %xmm2
+
+vcvtdq2ps %ymm0, %ymm2
+vcvtdq2ps (%rax), %ymm2
+
+vcvtpd2dqx %xmm0, %xmm2
+vcvtpd2dqx (%rax), %xmm2
+
+vcvtpd2dqy %ymm0, %xmm2
+vcvtpd2dqy (%rax), %xmm2
+
+vcvtpd2psx %xmm0, %xmm2
+vcvtpd2psx (%rax), %xmm2
+
+vcvtpd2psy %ymm0, %xmm2
+vcvtpd2psy (%rax), %xmm2
+
+vcvtps2dq %xmm0, %xmm2
+vcvtps2dq (%rax), %xmm2
+
+vcvtps2dq %ymm0, %ymm2
+vcvtps2dq (%rax), %ymm2
+
+vcvtps2pd %xmm0, %xmm2
+vcvtps2pd (%rax), %xmm2
+
+vcvtps2pd %xmm0, %ymm2
+vcvtps2pd (%rax), %ymm2
+
+vcvtsd2si %xmm0, %ecx
+vcvtsd2si %xmm0, %rcx
+vcvtsd2si (%rax), %ecx
+vcvtsd2si (%rax), %rcx
+
+vcvtsd2ss %xmm0, %xmm1, %xmm2
+vcvtsd2ss (%rax), %xmm1, %xmm2
+
+vcvtsi2sdl %ecx, %xmm0, %xmm2
+vcvtsi2sdq %rcx, %xmm0, %xmm2
+vcvtsi2sdl (%rax), %xmm0, %xmm2
+vcvtsi2sdq (%rax), %xmm0, %xmm2
+
+vcvtsi2ssl %ecx, %xmm0, %xmm2
+vcvtsi2ssq %rcx, %xmm0, %xmm2
+vcvtsi2ssl (%rax), %xmm0, %xmm2
+vcvtsi2ssq (%rax), %xmm0, %xmm2
+
+vcvtss2sd %xmm0, %xmm1, %xmm2
+vcvtss2sd (%rax), %xmm1, %xmm2
+
+vcvtss2si %xmm0, %ecx
+vcvtss2si %xmm0, %rcx
+vcvtss2si (%rax), %ecx
+vcvtss2si (%rax), %rcx
+
+vcvttpd2dqx %xmm0, %xmm2
+vcvttpd2dqx (%rax), %xmm2
+
+vcvttpd2dqy %ymm0, %xmm2
+vcvttpd2dqy (%rax), %xmm2
+
+vcvttps2dq %xmm0, %xmm2
+vcvttps2dq (%rax), %xmm2
+
+vcvttps2dq %ymm0, %ymm2
+vcvttps2dq (%rax), %ymm2
+
+vcvttsd2si %xmm0, %ecx
+vcvttsd2si %xmm0, %rcx
+vcvttsd2si (%rax), %ecx
+vcvttsd2si (%rax), %rcx
+
+vcvttss2si %xmm0, %ecx
+vcvttss2si %xmm0, %rcx
+vcvttss2si (%rax), %ecx
+vcvttss2si (%rax), %rcx
+
+vdivpd %xmm0, %xmm1, %xmm2
+vdivpd (%rax), %xmm1, %xmm2
+
+vdivpd %ymm0, %ymm1, %ymm2
+vdivpd (%rax), %ymm1, %ymm2
+
+vdivps %xmm0, %xmm1, %xmm2
+vdivps (%rax), %xmm1, %xmm2
+
+vdivps %ymm0, %ymm1, %ymm2
+vdivps (%rax), %ymm1, %ymm2
+
+vdivsd %xmm0, %xmm1, %xmm2
+vdivsd (%rax), %xmm1, %xmm2
+
+vdivss %xmm0, %xmm1, %xmm2
+vdivss (%rax), %xmm1, %xmm2
+
+vdppd $22, %xmm0, %xmm1, %xmm2
+vdppd $22, (%rax), %xmm1, %xmm2
+
+vdpps $22, %xmm0, %xmm1, %xmm2
+vdpps $22, (%rax), %xmm1, %xmm2
+
+vdpps $22, %ymm0, %ymm1, %ymm2
+vdpps $22, (%rax), %ymm1, %ymm2
+
+vextractf128 $1, %ymm0, %xmm2
+vextractf128 $1, %ymm0, (%rax)
+
+vextractps $1, %xmm0, %rcx
+vextractps $1, %xmm0, (%rax)
+
+vhaddpd %xmm0, %xmm1, %xmm2
+vhaddpd (%rax), %xmm1, %xmm2
+
+vhaddpd %ymm0, %ymm1, %ymm2
+vhaddpd (%rax), %ymm1, %ymm2
+
+vhaddps %xmm0, %xmm1, %xmm2
+vhaddps (%rax), %xmm1, %xmm2
+
+vhaddps %ymm0, %ymm1, %ymm2
+vhaddps (%rax), %ymm1, %ymm2
+
+vhsubpd %xmm0, %xmm1, %xmm2
+vhsubpd (%rax), %xmm1, %xmm2
+
+vhsubpd %ymm0, %ymm1, %ymm2
+vhsubpd (%rax), %ymm1, %ymm2
+
+vhsubps %xmm0, %xmm1, %xmm2
+vhsubps (%rax), %xmm1, %xmm2
+
+vhsubps %ymm0, %ymm1, %ymm2
+vhsubps (%rax), %ymm1, %ymm2
+
+vinsertf128 $1, %xmm0, %ymm1, %ymm2
+vinsertf128 $1, (%rax), %ymm1, %ymm2
+
+vinsertps $1, %xmm0, %xmm1, %xmm2
+vinsertps $1, (%rax), %xmm1, %xmm2
+
+vlddqu (%rax), %xmm2
+vlddqu (%rax), %ymm2
+
+vldmxcsr (%rax)
+
+vmaskmovdqu %xmm0, %xmm1
+
+vmaskmovpd (%rax), %xmm0, %xmm2
+vmaskmovpd (%rax), %ymm0, %ymm2
+
+vmaskmovpd %xmm0, %xmm1, (%rax)
+vmaskmovpd %ymm0, %ymm1, (%rax)
+
+vmaskmovps (%rax), %xmm0, %xmm2
+vmaskmovps (%rax), %ymm0, %ymm2
+
+vmaskmovps %xmm0, %xmm1, (%rax)
+vmaskmovps %ymm0, %ymm1, (%rax)
+
+vmaxpd %xmm0, %xmm1, %xmm2
+vmaxpd (%rax), %xmm1, %xmm2
+
+vmaxpd %ymm0, %ymm1, %ymm2
+vmaxpd (%rax), %ymm1, %ymm2
+
+vmaxps %xmm0, %xmm1, %xmm2
+vmaxps (%rax), %xmm1, %xmm2
+
+vmaxps %ymm0, %ymm1, %ymm2
+vmaxps (%rax), %ymm1, %ymm2
+
+vmaxsd %xmm0, %xmm1, %xmm2
+vmaxsd (%rax), %xmm1, %xmm2
+
+vmaxss %xmm0, %xmm1, %xmm2
+vmaxss (%rax), %xmm1, %xmm2
+
+vminpd %xmm0, %xmm1, %xmm2
+vminpd (%rax), %xmm1, %xmm2
+
+vminpd %ymm0, %ymm1, %ymm2
+vminpd (%rax), %ymm1, %ymm2
+
+vminps %xmm0, %xmm1, %xmm2
+vminps (%rax), %xmm1, %xmm2
+
+vminps %ymm0, %ymm1, %ymm2
+vminps (%rax), %ymm1, %ymm2
+
+vminsd %xmm0, %xmm1, %xmm2
+vminsd (%rax), %xmm1, %xmm2
+
+vminss %xmm0, %xmm1, %xmm2
+vminss (%rax), %xmm1, %xmm2
+
+vmovapd %xmm0, %xmm2
+vmovapd %xmm0, (%rax)
+vmovapd (%rax), %xmm2
+
+vmovapd %ymm0, %ymm2
+vmovapd %ymm0, (%rax)
+vmovapd (%rax), %ymm2
+
+vmovaps %xmm0, %xmm2
+vmovaps %xmm0, (%rax)
+vmovaps (%rax), %xmm2
+
+vmovaps %ymm0, %ymm2
+vmovaps %ymm0, (%rax)
+vmovaps (%rax), %ymm2
+
+vmovd %eax, %xmm2
+vmovd (%rax), %xmm2
+
+vmovd %xmm0, %ecx
+vmovd %xmm0, (%rax)
+
+vmovddup %xmm0, %xmm2
+vmovddup (%rax), %xmm2
+
+vmovddup %ymm0, %ymm2
+vmovddup (%rax), %ymm2
+
+vmovdqa %xmm0, %xmm2
+vmovdqa %xmm0, (%rax)
+vmovdqa (%rax), %xmm2
+
+vmovdqa %ymm0, %ymm2
+vmovdqa %ymm0, (%rax)
+vmovdqa (%rax), %ymm2
+
+vmovdqu %xmm0, %xmm2
+vmovdqu %xmm0, (%rax)
+vmovdqu (%rax), %xmm2
+
+vmovdqu %ymm0, %ymm2
+vmovdqu %ymm0, (%rax)
+vmovdqu (%rax), %ymm2
+
+vmovhlps %xmm0, %xmm1, %xmm2
+vmovlhps %xmm0, %xmm1, %xmm2
+
+vmovhpd %xmm0, (%rax)
+vmovhpd (%rax), %xmm1, %xmm2
+
+vmovhps %xmm0, (%rax)
+vmovhps (%rax), %xmm1, %xmm2
+
+vmovlpd %xmm0, (%rax)
+vmovlpd (%rax), %xmm1, %xmm2
+
+vmovlps %xmm0, (%rax)
+vmovlps (%rax), %xmm1, %xmm2
+
+vmovmskpd %xmm0, %rcx
+vmovmskpd %ymm0, %rcx
+
+vmovmskps %xmm0, %rcx
+vmovmskps %ymm0, %rcx
+
+vmovntdq %xmm0, (%rax)
+vmovntdq %ymm0, (%rax)
+
+vmovntdqa (%rax), %xmm2
+vmovntdqa (%rax), %ymm2
+
+vmovntpd %xmm0, (%rax)
+vmovntpd %ymm0, (%rax)
+
+vmovntps %xmm0, (%rax)
+vmovntps %ymm0, (%rax)
+
+vmovq %xmm0, %xmm2
+
+vmovq %rax, %xmm2
+vmovq (%rax), %xmm2
+
+vmovq %xmm0, %rcx
+vmovq %xmm0, (%rax)
+
+vmovsd %xmm0, %xmm1, %xmm2
+vmovsd %xmm0, (%rax)
+vmovsd (%rax), %xmm2
+
+vmovshdup %xmm0, %xmm2
+vmovshdup (%rax), %xmm2
+
+vmovshdup %ymm0, %ymm2
+vmovshdup (%rax), %ymm2
+
+vmovsldup %xmm0, %xmm2
+vmovsldup (%rax), %xmm2
+
+vmovsldup %ymm0, %ymm2
+vmovsldup (%rax), %ymm2
+
+vmovss %xmm0, %xmm1, %xmm2
+vmovss %xmm0, (%rax)
+vmovss (%rax), %xmm2
+
+vmovupd %xmm0, %xmm2
+vmovupd %xmm0, (%rax)
+vmovupd (%rax), %xmm2
+
+vmovupd %ymm0, %ymm2
+vmovupd %ymm0, (%rax)
+vmovupd (%rax), %ymm2
+
+vmovups %xmm0, %xmm2
+vmovups %xmm0, (%rax)
+vmovups (%rax), %xmm2
+
+vmovups %ymm0, %ymm2
+vmovups %ymm0, (%rax)
+vmovups (%rax), %ymm2
+
+vmpsadbw $1, %xmm0, %xmm1, %xmm2
+vmpsadbw $1, (%rax), %xmm1, %xmm2
+
+vmulpd %xmm0, %xmm1, %xmm2
+vmulpd (%rax), %xmm1, %xmm2
+
+vmulpd %ymm0, %ymm1, %ymm2
+vmulpd (%rax), %ymm1, %ymm2
+
+vmulps %xmm0, %xmm1, %xmm2
+vmulps (%rax), %xmm1, %xmm2
+
+vmulps %ymm0, %ymm1, %ymm2
+vmulps (%rax), %ymm1, %ymm2
+
+vmulsd %xmm0, %xmm1, %xmm2
+vmulsd (%rax), %xmm1, %xmm2
+
+vmulss %xmm0, %xmm1, %xmm2
+vmulss (%rax), %xmm1, %xmm2
+
+vorpd %xmm0, %xmm1, %xmm2
+vorpd (%rax), %xmm1, %xmm2
+
+vorpd %ymm0, %ymm1, %ymm2
+vorpd (%rax), %ymm1, %ymm2
+
+vorps %xmm0, %xmm1, %xmm2
+vorps (%rax), %xmm1, %xmm2
+
+vorps %ymm0, %ymm1, %ymm2
+vorps (%rax), %ymm1, %ymm2
+
+vpabsb %xmm0, %xmm2
+vpabsb (%rax), %xmm2
+
+vpabsd %xmm0, %xmm2
+vpabsd (%rax), %xmm2
+
+vpabsw %xmm0, %xmm2
+vpabsw (%rax), %xmm2
+
+vpackssdw %xmm0, %xmm1, %xmm2
+vpackssdw (%rax), %xmm1, %xmm2
+
+vpacksswb %xmm0, %xmm1, %xmm2
+vpacksswb (%rax), %xmm1, %xmm2
+
+vpackusdw %xmm0, %xmm1, %xmm2
+vpackusdw (%rax), %xmm1, %xmm2
+
+vpackuswb %xmm0, %xmm1, %xmm2
+vpackuswb (%rax), %xmm1, %xmm2
+
+vpaddb %xmm0, %xmm1, %xmm2
+vpaddb (%rax), %xmm1, %xmm2
+
+vpaddd %xmm0, %xmm1, %xmm2
+vpaddd (%rax), %xmm1, %xmm2
+
+vpaddq %xmm0, %xmm1, %xmm2
+vpaddq (%rax), %xmm1, %xmm2
+
+vpaddsb %xmm0, %xmm1, %xmm2
+vpaddsb (%rax), %xmm1, %xmm2
+
+vpaddsw %xmm0, %xmm1, %xmm2
+vpaddsw (%rax), %xmm1, %xmm2
+
+vpaddusb %xmm0, %xmm1, %xmm2
+vpaddusb (%rax), %xmm1, %xmm2
+
+vpaddusw %xmm0, %xmm1, %xmm2
+vpaddusw (%rax), %xmm1, %xmm2
+
+vpaddw %xmm0, %xmm1, %xmm2
+vpaddw (%rax), %xmm1, %xmm2
+
+vpalignr $1, %xmm0, %xmm1, %xmm2
+vpalignr $1, (%rax), %xmm1, %xmm2
+
+vpand %xmm0, %xmm1, %xmm2
+vpand (%rax), %xmm1, %xmm2
+
+vpandn %xmm0, %xmm1, %xmm2
+vpandn (%rax), %xmm1, %xmm2
+
+vpavgb %xmm0, %xmm1, %xmm2
+vpavgb (%rax), %xmm1, %xmm2
+
+vpavgw %xmm0, %xmm1, %xmm2
+vpavgw (%rax), %xmm1, %xmm2
+
+vpblendvb %xmm3, %xmm0, %xmm1, %xmm2
+vpblendvb %xmm3, (%rax), %xmm1, %xmm2
+
+vpblendw $11, %xmm0, %xmm1, %xmm2
+vpblendw $11, (%rax), %xmm1, %xmm2
+
+vpclmulqdq $11, %xmm0, %xmm1, %xmm2
+vpclmulqdq $11, (%rax), %xmm1, %xmm2
+
+vpcmpeqb %xmm0, %xmm1, %xmm2
+vpcmpeqb (%rax), %xmm1, %xmm2
+
+vpcmpeqd %xmm0, %xmm1, %xmm2
+vpcmpeqd (%rax), %xmm1, %xmm2
+
+vpcmpeqq %xmm0, %xmm1, %xmm2
+vpcmpeqq (%rax), %xmm1, %xmm2
+
+vpcmpeqw %xmm0, %xmm1, %xmm2
+vpcmpeqw (%rax), %xmm1, %xmm2
+
+vpcmpestri $1, %xmm0, %xmm2
+vpcmpestri $1, (%rax), %xmm2
+
+vpcmpestrm $1, %xmm0, %xmm2
+vpcmpestrm $1, (%rax), %xmm2
+
+vpcmpgtb %xmm0, %xmm1, %xmm2
+vpcmpgtb (%rax), %xmm1, %xmm2
+
+vpcmpgtd %xmm0, %xmm1, %xmm2
+vpcmpgtd (%rax), %xmm1, %xmm2
+
+vpcmpgtq %xmm0, %xmm1, %xmm2
+vpcmpgtq (%rax), %xmm1, %xmm2
+
+vpcmpgtw %xmm0, %xmm1, %xmm2
+vpcmpgtw (%rax), %xmm1, %xmm2
+
+vpcmpistri $1, %xmm0, %xmm2
+vpcmpistri $1, (%rax), %xmm2
+
+vpcmpistrm $1, %xmm0, %xmm2
+vpcmpistrm $1, (%rax), %xmm2
+
+vperm2f128 $1, %ymm0, %ymm1, %ymm2
+vperm2f128 $1, (%rax), %ymm1, %ymm2
+
+vpermilpd $1, %xmm0, %xmm2
+vpermilpd $1, (%rax), %xmm2
+vpermilpd %xmm0, %xmm1, %xmm2
+vpermilpd (%rax), %xmm1, %xmm2
+
+vpermilpd $1, %ymm0, %ymm2
+vpermilpd $1, (%rax), %ymm2
+vpermilpd %ymm0, %ymm1, %ymm2
+vpermilpd (%rax), %ymm1, %ymm2
+
+vpermilps $1, %xmm0, %xmm2
+vpermilps $1, (%rax), %xmm2
+vpermilps %xmm0, %xmm1, %xmm2
+vpermilps (%rax), %xmm1, %xmm2
+
+vpermilps $1, %ymm0, %ymm2
+vpermilps $1, (%rax), %ymm2
+vpermilps %ymm0, %ymm1, %ymm2
+vpermilps (%rax), %ymm1, %ymm2
+
+vpextrb $1, %xmm0, %ecx
+vpextrb $1, %xmm0, (%rax)
+
+vpextrd $1, %xmm0, %ecx
+vpextrd $1, %xmm0, (%rax)
+
+vpextrq $1, %xmm0, %rcx
+vpextrq $1, %xmm0, (%rax)
+
+vpextrw $1, %xmm0, %ecx
+vpextrw $1, %xmm0, (%rax)
+
+vphaddd %xmm0, %xmm1, %xmm2
+vphaddd (%rax), %xmm1, %xmm2
+
+vphaddsw %xmm0, %xmm1, %xmm2
+vphaddsw (%rax), %xmm1, %xmm2
+
+vphaddw %xmm0, %xmm1, %xmm2
+vphaddw (%rax), %xmm1, %xmm2
+
+vphminposuw %xmm0, %xmm2
+vphminposuw (%rax), %xmm2
+
+vphsubd %xmm0, %xmm1, %xmm2
+vphsubd (%rax), %xmm1, %xmm2
+
+vphsubsw %xmm0, %xmm1, %xmm2
+vphsubsw (%rax), %xmm1, %xmm2
+
+vphsubw %xmm0, %xmm1, %xmm2
+vphsubw (%rax), %xmm1, %xmm2
+
+vpinsrb $1, %eax, %xmm1, %xmm2
+vpinsrb $1, (%rax), %xmm1, %xmm2
+
+vpinsrd $1, %eax, %xmm1, %xmm2
+vpinsrd $1, (%rax), %xmm1, %xmm2
+
+vpinsrq $1, %rax, %xmm1, %xmm2
+vpinsrq $1, (%rax), %xmm1, %xmm2
+
+vpinsrw $1, %eax, %xmm1, %xmm2
+vpinsrw $1, (%rax), %xmm1, %xmm2
+
+vpmaddubsw %xmm0, %xmm1, %xmm2
+vpmaddubsw (%rax), %xmm1, %xmm2
+
+vpmaddwd %xmm0, %xmm1, %xmm2
+vpmaddwd (%rax), %xmm1, %xmm2
+
+vpmaxsb %xmm0, %xmm1, %xmm2
+vpmaxsb (%rax), %xmm1, %xmm2
+
+vpmaxsd %xmm0, %xmm1, %xmm2
+vpmaxsd (%rax), %xmm1, %xmm2
+
+vpmaxsw %xmm0, %xmm1, %xmm2
+vpmaxsw (%rax), %xmm1, %xmm2
+
+vpmaxub %xmm0, %xmm1, %xmm2
+vpmaxub (%rax), %xmm1, %xmm2
+
+vpmaxud %xmm0, %xmm1, %xmm2
+vpmaxud (%rax), %xmm1, %xmm2
+
+vpmaxuw %xmm0, %xmm1, %xmm2
+vpmaxuw (%rax), %xmm1, %xmm2
+
+vpminsb %xmm0, %xmm1, %xmm2
+vpminsb (%rax), %xmm1, %xmm2
+
+vpminsd %xmm0, %xmm1, %xmm2
+vpminsd (%rax), %xmm1, %xmm2
+
+vpminsw %xmm0, %xmm1, %xmm2
+vpminsw (%rax), %xmm1, %xmm2
+
+vpminub %xmm0, %xmm1, %xmm2
+vpminub (%rax), %xmm1, %xmm2
+
+vpminud %xmm0, %xmm1, %xmm2
+vpminud (%rax), %xmm1, %xmm2
+
+vpminuw %xmm0, %xmm1, %xmm2
+vpminuw (%rax), %xmm1, %xmm2
+
+vpmovmskb %xmm0, %rcx
+
+vpmovsxbd %xmm0, %xmm2
+vpmovsxbd (%rax), %xmm2
+
+vpmovsxbq %xmm0, %xmm2
+vpmovsxbq (%rax), %xmm2
+
+vpmovsxbw %xmm0, %xmm2
+vpmovsxbw (%rax), %xmm2
+
+vpmovsxdq %xmm0, %xmm2
+vpmovsxdq (%rax), %xmm2
+
+vpmovsxwd %xmm0, %xmm2
+vpmovsxwd (%rax), %xmm2
+
+vpmovsxwq %xmm0, %xmm2
+vpmovsxwq (%rax), %xmm2
+
+vpmovzxbd %xmm0, %xmm2
+vpmovzxbd (%rax), %xmm2
+
+vpmovzxbq %xmm0, %xmm2
+vpmovzxbq (%rax), %xmm2
+
+vpmovzxbw %xmm0, %xmm2
+vpmovzxbw (%rax), %xmm2
+
+vpmovzxdq %xmm0, %xmm2
+vpmovzxdq (%rax), %xmm2
+
+vpmovzxwd %xmm0, %xmm2
+vpmovzxwd (%rax), %xmm2
+
+vpmovzxwq %xmm0, %xmm2
+vpmovzxwq (%rax), %xmm2
+
+vpmuldq %xmm0, %xmm1, %xmm2
+vpmuldq (%rax), %xmm1, %xmm2
+
+vpmulhrsw %xmm0, %xmm1, %xmm2
+vpmulhrsw (%rax), %xmm1, %xmm2
+
+vpmulhuw %xmm0, %xmm1, %xmm2
+vpmulhuw (%rax), %xmm1, %xmm2
+
+vpmulhw %xmm0, %xmm1, %xmm2
+vpmulhw (%rax), %xmm1, %xmm2
+
+vpmulld %xmm0, %xmm1, %xmm2
+vpmulld (%rax), %xmm1, %xmm2
+
+vpmullw %xmm0, %xmm1, %xmm2
+vpmullw (%rax), %xmm1, %xmm2
+
+vpmuludq %xmm0, %xmm1, %xmm2
+vpmuludq (%rax), %xmm1, %xmm2
+
+vpor %xmm0, %xmm1, %xmm2
+vpor (%rax), %xmm1, %xmm2
+
+vpsadbw %xmm0, %xmm1, %xmm2
+vpsadbw (%rax), %xmm1, %xmm2
+
+vpshufb %xmm0, %xmm1, %xmm2
+vpshufb (%rax), %xmm1, %xmm2
+
+vpshufd $1, %xmm0, %xmm2
+vpshufd $1, (%rax), %xmm2
+
+vpshufhw $1, %xmm0, %xmm2
+vpshufhw $1, (%rax), %xmm2
+
+vpshuflw $1, %xmm0, %xmm2
+vpshuflw $1, (%rax), %xmm2
+
+vpsignb %xmm0, %xmm1, %xmm2
+vpsignb (%rax), %xmm1, %xmm2
+
+vpsignd %xmm0, %xmm1, %xmm2
+vpsignd (%rax), %xmm1, %xmm2
+
+vpsignw %xmm0, %xmm1, %xmm2
+vpsignw (%rax), %xmm1, %xmm2
+
+vpslld $1, %xmm0, %xmm2
+vpslld %xmm0, %xmm1, %xmm2
+vpslld (%rax), %xmm1, %xmm2
+
+vpslldq $1, %xmm1, %xmm2
+
+vpsllq $1, %xmm0, %xmm2
+vpsllq %xmm0, %xmm1, %xmm2
+vpsllq (%rax), %xmm1, %xmm2
+
+vpsllw $1, %xmm0, %xmm2
+vpsllw %xmm0, %xmm1, %xmm2
+vpsllw (%rax), %xmm1, %xmm2
+
+vpsrad $1, %xmm0, %xmm2
+vpsrad %xmm0, %xmm1, %xmm2
+vpsrad (%rax), %xmm1, %xmm2
+
+vpsraw $1, %xmm0, %xmm2
+vpsraw %xmm0, %xmm1, %xmm2
+vpsraw (%rax), %xmm1, %xmm2
+
+vpsrld $1, %xmm0, %xmm2
+vpsrld %xmm0, %xmm1, %xmm2
+vpsrld (%rax), %xmm1, %xmm2
+
+vpsrldq $1, %xmm1, %xmm2
+
+vpsrlq $1, %xmm0, %xmm2
+vpsrlq %xmm0, %xmm1, %xmm2
+vpsrlq (%rax), %xmm1, %xmm2
+
+vpsrlw $1, %xmm0, %xmm2
+vpsrlw %xmm0, %xmm1, %xmm2
+vpsrlw (%rax), %xmm1, %xmm2
+
+vpsubb %xmm0, %xmm1, %xmm2
+vpsubb (%rax), %xmm1, %xmm2
+
+vpsubd %xmm0, %xmm1, %xmm2
+vpsubd (%rax), %xmm1, %xmm2
+
+vpsubq %xmm0, %xmm1, %xmm2
+vpsubq (%rax), %xmm1, %xmm2
+
+vpsubsb %xmm0, %xmm1, %xmm2
+vpsubsb (%rax), %xmm1, %xmm2
+
+vpsubsw %xmm0, %xmm1, %xmm2
+vpsubsw (%rax), %xmm1, %xmm2
+
+vpsubusb %xmm0, %xmm1, %xmm2
+vpsubusb (%rax), %xmm1, %xmm2
+
+vpsubusw %xmm0, %xmm1, %xmm2
+vpsubusw (%rax), %xmm1, %xmm2
+
+vpsubw %xmm0, %xmm1, %xmm2
+vpsubw (%rax), %xmm1, %xmm2
+
+vptest %xmm0, %xmm1
+vptest (%rax), %xmm1
+
+vptest %ymm0, %ymm1
+vptest (%rax), %ymm1
+
+vpunpckhbw %xmm0, %xmm1, %xmm2
+vpunpckhbw (%rax), %xmm1, %xmm2
+
+vpunpckhdq %xmm0, %xmm1, %xmm2
+vpunpckhdq (%rax), %xmm1, %xmm2
+
+vpunpckhqdq %xmm0, %xmm1, %xmm2
+vpunpckhqdq (%rax), %xmm1, %xmm2
+
+vpunpckhwd %xmm0, %xmm1, %xmm2
+vpunpckhwd (%rax), %xmm1, %xmm2
+
+vpunpcklbw %xmm0, %xmm1, %xmm2
+vpunpcklbw (%rax), %xmm1, %xmm2
+
+vpunpckldq %xmm0, %xmm1, %xmm2
+vpunpckldq (%rax), %xmm1, %xmm2
+
+vpunpcklqdq %xmm0, %xmm1, %xmm2
+vpunpcklqdq (%rax), %xmm1, %xmm2
+
+vpunpcklwd %xmm0, %xmm1, %xmm2
+vpunpcklwd (%rax), %xmm1, %xmm2
+
+vpxor %xmm0, %xmm1, %xmm2
+vpxor (%rax), %xmm1, %xmm2
+
+vrcpps %xmm0, %xmm2
+vrcpps (%rax), %xmm2
+
+vrcpps %ymm0, %ymm2
+vrcpps (%rax), %ymm2
+
+vrcpss %xmm0, %xmm1, %xmm2
+vrcpss (%rax), %xmm1, %xmm2
+
+vroundpd $1, %xmm0, %xmm2
+vroundpd $1, (%rax), %xmm2
+
+vroundpd $1, %ymm0, %ymm2
+vroundpd $1, (%rax), %ymm2
+
+vroundps $1, %xmm0, %xmm2
+vroundps $1, (%rax), %xmm2
+
+vroundps $1, %ymm0, %ymm2
+vroundps $1, (%rax), %ymm2
+
+vroundsd $1, %xmm0, %xmm1, %xmm2
+vroundsd $1, (%rax), %xmm1, %xmm2
+
+vroundss $1, %xmm0, %xmm1, %xmm2
+vroundss $1, (%rax), %xmm1, %xmm2
+
+vrsqrtps %xmm0, %xmm2
+vrsqrtps (%rax), %xmm2
+
+vrsqrtps %ymm0, %ymm2
+vrsqrtps (%rax), %ymm2
+
+vrsqrtss %xmm0, %xmm1, %xmm2
+vrsqrtss (%rax), %xmm1, %xmm2
+
+vshufpd $1, %xmm0, %xmm1, %xmm2
+vshufpd $1, (%rax), %xmm1, %xmm2
+
+vshufpd $1, %ymm0, %ymm1, %ymm2
+vshufpd $1, (%rax), %ymm1, %ymm2
+
+vshufps $1, %xmm0, %xmm1, %xmm2
+vshufps $1, (%rax), %xmm1, %xmm2
+
+vshufps $1, %ymm0, %ymm1, %ymm2
+vshufps $1, (%rax), %ymm1, %ymm2
+
+vsqrtpd %xmm0, %xmm2
+vsqrtpd (%rax), %xmm2
+
+vsqrtpd %ymm0, %ymm2
+vsqrtpd (%rax), %ymm2
+
+vsqrtps %xmm0, %xmm2
+vsqrtps (%rax), %xmm2
+
+vsqrtps %ymm0, %ymm2
+vsqrtps (%rax), %ymm2
+
+vsqrtsd %xmm0, %xmm1, %xmm2
+vsqrtsd (%rax), %xmm1, %xmm2
+
+vsqrtss %xmm0, %xmm1, %xmm2
+vsqrtss (%rax), %xmm1, %xmm2
+
+vstmxcsr (%rax)
+
+vsubpd %xmm0, %xmm1, %xmm2
+vsubpd (%rax), %xmm1, %xmm2
+
+vsubpd %ymm0, %ymm1, %ymm2
+vsubpd (%rax), %ymm1, %ymm2
+
+vsubps %xmm0, %xmm1, %xmm2
+vsubps (%rax), %xmm1, %xmm2
+
+vsubps %ymm0, %ymm1, %ymm2
+vsubps (%rax), %ymm1, %ymm2
+
+vsubsd %xmm0, %xmm1, %xmm2
+vsubsd (%rax), %xmm1, %xmm2
+
+vsubss %xmm0, %xmm1, %xmm2
+vsubss (%rax), %xmm1, %xmm2
+
+vtestpd %xmm0, %xmm1
+vtestpd (%rax), %xmm1
+
+vtestpd %ymm0, %ymm1
+vtestpd (%rax), %ymm1
+
+vtestps %xmm0, %xmm1
+vtestps (%rax), %xmm1
+
+vtestps %ymm0, %ymm1
+vtestps (%rax), %ymm1
+
+vucomisd %xmm0, %xmm1
+vucomisd (%rax), %xmm1
+
+vucomiss %xmm0, %xmm1
+vucomiss (%rax), %xmm1
+
+vunpckhpd %xmm0, %xmm1, %xmm2
+vunpckhpd (%rax), %xmm1, %xmm2
+
+vunpckhpd %ymm0, %ymm1, %ymm2
+vunpckhpd (%rax), %ymm1, %ymm2
+
+vunpckhps %xmm0, %xmm1, %xmm2
+vunpckhps (%rax), %xmm1, %xmm2
+
+vunpckhps %ymm0, %ymm1, %ymm2
+vunpckhps (%rax), %ymm1, %ymm2
+
+vunpcklpd %xmm0, %xmm1, %xmm2
+vunpcklpd (%rax), %xmm1, %xmm2
+
+vunpcklpd %ymm0, %ymm1, %ymm2
+vunpcklpd (%rax), %ymm1, %ymm2
+
+vunpcklps %xmm0, %xmm1, %xmm2
+vunpcklps (%rax), %xmm1, %xmm2
+
+vunpcklps %ymm0, %ymm1, %ymm2
+vunpcklps (%rax), %ymm1, %ymm2
+
+vxorpd %xmm0, %xmm1, %xmm2
+vxorpd (%rax), %xmm1, %xmm2
+
+vxorpd %ymm0, %ymm1, %ymm2
+vxorpd (%rax), %ymm1, %ymm2
+
+vxorps %xmm0, %xmm1, %xmm2
+vxorps (%rax), %xmm1, %xmm2
+
+vxorps %ymm0, %ymm1, %ymm2
+vxorps (%rax), %ymm1, %ymm2
+
+vzeroall
+vzeroupper
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 vaddpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vaddpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 1.00 * vaddpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vaddps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 1.00 * vaddps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 3 1.00 vaddsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vaddsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vaddss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddsubpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vaddsubpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddsubpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 1.00 * vaddsubpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 3 1.00 vaddsubps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vaddsubps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddsubps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 1.00 * vaddsubps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 7 1.00 vaesdec %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 13 1.00 * vaesdec (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 vaesdeclast %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 13 1.00 * vaesdeclast (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 vaesenc %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 13 1.00 * vaesenc (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 vaesenclast %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 13 1.00 * vaesenclast (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 12 2.00 vaesimc %xmm0, %xmm2
+# CHECK-NEXT: 3 18 2.00 * vaesimc (%rax), %xmm2
+# CHECK-NEXT: 1 8 3.67 vaeskeygenassist $22, %xmm0, %xmm2
+# CHECK-NEXT: 1 8 3.33 * vaeskeygenassist $22, (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 vandnpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vandnpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vandnpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vandnpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vandnps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vandnps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vandnps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vandnps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vandpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vandpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vandpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vandpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vandps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vandps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vandps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vandps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 0.50 vblendpd $11, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vblendpd $11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vblendpd $11, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 0.50 * vblendpd $11, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 0.50 vblendps $11, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vblendps $11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vblendps $11, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 0.50 * vblendps $11, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 1.00 vblendvpd %xmm3, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 8 1.00 * vblendvpd %xmm3, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 2 1.00 vblendvpd %ymm3, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 3 9 1.00 * vblendvpd %ymm3, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 1.00 vblendvps %xmm3, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 8 1.00 * vblendvps %xmm3, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 2 1.00 vblendvps %ymm3, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 3 9 1.00 * vblendvps %ymm3, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 7 1.00 * vbroadcastf128 (%rax), %ymm2
+# CHECK-NEXT: 1 7 0.50 * vbroadcastsd (%rax), %ymm2
+# CHECK-NEXT: 1 6 0.50 * vbroadcastss (%rax), %xmm2
+# CHECK-NEXT: 1 7 0.50 * vbroadcastss (%rax), %ymm2
+# CHECK-NEXT: 1 3 1.00 vcmppd $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vcmppd $0, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vcmppd $0, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 1.00 * vcmppd $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 3 1.00 vcmpps $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vcmpps $0, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vcmpps $0, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 1.00 * vcmpps $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 3 1.00 vcmpsd $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vcmpsd $0, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vcmpss $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vcmpss $0, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 2 1.00 vcomisd %xmm0, %xmm1
+# CHECK-NEXT: 3 8 1.00 * vcomisd (%rax), %xmm1
+# CHECK-NEXT: 2 2 1.00 vcomiss %xmm0, %xmm1
+# CHECK-NEXT: 3 8 1.00 * vcomiss (%rax), %xmm1
+# CHECK-NEXT: 2 4 1.00 vcvtdq2pd %xmm0, %xmm2
+# CHECK-NEXT: 3 10 1.00 * vcvtdq2pd (%rax), %xmm2
+# CHECK-NEXT: 2 4 1.00 vcvtdq2pd %xmm0, %ymm2
+# CHECK-NEXT: 3 10 1.00 * vcvtdq2pd (%rax), %ymm2
+# CHECK-NEXT: 1 3 1.00 vcvtdq2ps %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vcvtdq2ps (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 vcvtdq2ps %ymm0, %ymm2
+# CHECK-NEXT: 2 10 1.00 * vcvtdq2ps (%rax), %ymm2
+# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %xmm0, %xmm2
+# CHECK-NEXT: 3 10 1.00 * vcvtpd2dqx (%rax), %xmm2
+# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %ymm0, %xmm2
+# CHECK-NEXT: 3 11 1.00 * vcvtpd2dqy (%rax), %xmm2
+# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %xmm0, %xmm2
+# CHECK-NEXT: 3 10 1.00 * vcvtpd2psx (%rax), %xmm2
+# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %ymm0, %xmm2
+# CHECK-NEXT: 3 11 1.00 * vcvtpd2psy (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vcvtps2dq (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 vcvtps2dq %ymm0, %ymm2
+# CHECK-NEXT: 2 10 1.00 * vcvtps2dq (%rax), %ymm2
+# CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %xmm2
+# CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %ymm2
+# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm2
+# CHECK-NEXT: 2 5 1.00 vcvtsd2si %xmm0, %ecx
+# CHECK-NEXT: 2 5 1.00 vcvtsd2si %xmm0, %rcx
+# CHECK-NEXT: 3 10 1.00 * vcvtsd2si (%rax), %ecx
+# CHECK-NEXT: 3 10 1.00 * vcvtsd2si (%rax), %rcx
+# CHECK-NEXT: 2 4 1.00 vcvtsd2ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 10 1.00 * vcvtsd2ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 4 1.00 vcvtsi2sdl %ecx, %xmm0, %xmm2
+# CHECK-NEXT: 2 4 1.00 vcvtsi2sdq %rcx, %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vcvtsi2sdl (%rax), %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vcvtsi2sdq (%rax), %xmm0, %xmm2
+# CHECK-NEXT: 3 5 2.00 vcvtsi2ssl %ecx, %xmm0, %xmm2
+# CHECK-NEXT: 3 5 2.00 vcvtsi2ssq %rcx, %xmm0, %xmm2
+# CHECK-NEXT: 3 10 1.00 * vcvtsi2ssl (%rax), %xmm0, %xmm2
+# CHECK-NEXT: 3 10 1.00 * vcvtsi2ssq (%rax), %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vcvtss2sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 5 1.00 vcvtss2si %xmm0, %ecx
+# CHECK-NEXT: 2 5 1.00 vcvtss2si %xmm0, %rcx
+# CHECK-NEXT: 3 10 1.00 * vcvtss2si (%rax), %ecx
+# CHECK-NEXT: 3 10 1.00 * vcvtss2si (%rax), %rcx
+# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %xmm0, %xmm2
+# CHECK-NEXT: 3 10 1.00 * vcvttpd2dqx (%rax), %xmm2
+# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %ymm0, %xmm2
+# CHECK-NEXT: 3 11 1.00 * vcvttpd2dqy (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vcvttps2dq (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 vcvttps2dq %ymm0, %ymm2
+# CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax), %ymm2
+# CHECK-NEXT: 2 5 1.00 vcvttsd2si %xmm0, %ecx
+# CHECK-NEXT: 2 5 1.00 vcvttsd2si %xmm0, %rcx
+# CHECK-NEXT: 3 10 1.00 * vcvttsd2si (%rax), %ecx
+# CHECK-NEXT: 3 10 1.00 * vcvttsd2si (%rax), %rcx
+# CHECK-NEXT: 2 5 1.00 vcvttss2si %xmm0, %ecx
+# CHECK-NEXT: 2 5 1.00 vcvttss2si %xmm0, %rcx
+# CHECK-NEXT: 3 10 1.00 * vcvttss2si (%rax), %ecx
+# CHECK-NEXT: 3 10 1.00 * vcvttss2si (%rax), %rcx
+# CHECK-NEXT: 1 22 22.00 vdivpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 28 22.00 * vdivpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 45 44.00 vdivpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 52 44.00 * vdivpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 14 14.00 vdivps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 20 14.00 * vdivps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 29 28.00 vdivps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 36 28.00 * vdivps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 22 22.00 vdivsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 28 22.00 * vdivsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 14 14.00 vdivss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 20 14.00 * vdivss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 9 1.00 vdppd $22, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 15 1.00 * vdppd $22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 4 12 2.00 vdpps $22, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 5 18 2.00 * vdpps $22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 4 12 2.00 vdpps $22, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 5 19 2.00 * vdpps $22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vextractf128 $1, %ymm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 * vextractf128 $1, %ymm0, (%rax)
+# CHECK-NEXT: 2 3 1.00 vextractps $1, %xmm0, %ecx
+# CHECK-NEXT: 3 5 1.00 * vextractps $1, %xmm0, (%rax)
+# CHECK-NEXT: 3 5 2.00 vhaddpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 11 2.00 * vhaddpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 5 2.00 vhaddpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 12 2.00 * vhaddpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 3 5 2.00 vhaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 11 2.00 * vhaddps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 5 2.00 vhaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 12 2.00 * vhaddps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 3 5 2.00 vhsubpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 11 2.00 * vhsubpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 5 2.00 vhsubpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 12 2.00 * vhsubpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 3 5 2.00 vhsubps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 11 2.00 * vhsubps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 5 2.00 vhsubps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 12 2.00 * vhsubps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vinsertf128 $1, %xmm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 7 0.50 * vinsertf128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vinsertps $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vinsertps $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 6 0.50 * vlddqu (%rax), %xmm2
+# CHECK-NEXT: 1 7 0.50 * vlddqu (%rax), %ymm2
+# CHECK-NEXT: 4 5 1.00 * * U vldmxcsr (%rax)
+# CHECK-NEXT: 1 1 1.00 * * U vmaskmovdqu %xmm0, %xmm1
+# CHECK-NEXT: 3 8 1.00 * vmaskmovpd (%rax), %xmm0, %xmm2
+# CHECK-NEXT: 3 9 1.00 * vmaskmovpd (%rax), %ymm0, %ymm2
+# CHECK-NEXT: 3 5 1.00 * * vmaskmovpd %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: 3 5 1.00 * * vmaskmovpd %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: 3 8 1.00 * vmaskmovps (%rax), %xmm0, %xmm2
+# CHECK-NEXT: 3 9 1.00 * vmaskmovps (%rax), %ymm0, %ymm2
+# CHECK-NEXT: 3 5 1.00 * * vmaskmovps %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: 3 5 1.00 * * vmaskmovps %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: 1 3 1.00 vmaxpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vmaxpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vmaxpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 1.00 * vmaxpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 3 1.00 vmaxps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vmaxps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vmaxps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 1.00 * vmaxps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 3 1.00 vmaxsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vmaxsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vmaxss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vmaxss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vminpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vminpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vminpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 1.00 * vminpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 3 1.00 vminps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vminps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vminps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 1.00 * vminps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 3 1.00 vminsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vminsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vminss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vminss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vmovapd %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 * vmovapd %xmm0, (%rax)
+# CHECK-NEXT: 1 6 0.50 * vmovapd (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 vmovapd %ymm0, %ymm2
+# CHECK-NEXT: 1 1 1.00 * vmovapd %ymm0, (%rax)
+# CHECK-NEXT: 1 7 0.50 * vmovapd (%rax), %ymm2
+# CHECK-NEXT: 1 1 1.00 vmovaps %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rax)
+# CHECK-NEXT: 1 6 0.50 * vmovaps (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 vmovaps %ymm0, %ymm2
+# CHECK-NEXT: 1 1 1.00 * vmovaps %ymm0, (%rax)
+# CHECK-NEXT: 1 7 0.50 * vmovaps (%rax), %ymm2
+# CHECK-NEXT: 1 1 1.00 vmovd %eax, %xmm2
+# CHECK-NEXT: 1 6 0.50 * vmovd (%rax), %xmm2
+# CHECK-NEXT: 1 2 1.00 vmovd %xmm0, %ecx
+# CHECK-NEXT: 1 1 1.00 * vmovd %xmm0, (%rax)
+# CHECK-NEXT: 1 1 1.00 vmovddup %xmm0, %xmm2
+# CHECK-NEXT: 1 6 0.50 * vmovddup (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 vmovddup %ymm0, %ymm2
+# CHECK-NEXT: 1 7 0.50 * vmovddup (%rax), %ymm2
+# CHECK-NEXT: 1 1 0.33 vmovdqa %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 * vmovdqa %xmm0, (%rax)
+# CHECK-NEXT: 1 6 0.50 * vmovdqa (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vmovdqa %ymm0, %ymm2
+# CHECK-NEXT: 1 1 1.00 * vmovdqa %ymm0, (%rax)
+# CHECK-NEXT: 1 7 0.50 * vmovdqa (%rax), %ymm2
+# CHECK-NEXT: 1 1 0.33 vmovdqu %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 * vmovdqu %xmm0, (%rax)
+# CHECK-NEXT: 1 6 0.50 * vmovdqu (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vmovdqu %ymm0, %ymm2
+# CHECK-NEXT: 1 1 1.00 * vmovdqu %ymm0, (%rax)
+# CHECK-NEXT: 1 7 0.50 * vmovdqu (%rax), %ymm2
+# CHECK-NEXT: 1 1 1.00 vmovhlps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vmovlhps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 * vmovhpd %xmm0, (%rax)
+# CHECK-NEXT: 2 7 1.00 * vmovhpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 * vmovhps %xmm0, (%rax)
+# CHECK-NEXT: 2 7 1.00 * vmovhps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 * vmovlpd %xmm0, (%rax)
+# CHECK-NEXT: 2 7 1.00 * vmovlpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 * vmovlps %xmm0, (%rax)
+# CHECK-NEXT: 2 7 1.00 * vmovlps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 2 1.00 vmovmskpd %xmm0, %ecx
+# CHECK-NEXT: 1 2 1.00 vmovmskpd %ymm0, %ecx
+# CHECK-NEXT: 1 2 1.00 vmovmskps %xmm0, %ecx
+# CHECK-NEXT: 1 2 1.00 vmovmskps %ymm0, %ecx
+# CHECK-NEXT: 1 1 1.00 * vmovntdq %xmm0, (%rax)
+# CHECK-NEXT: 1 1 1.00 * vmovntdq %ymm0, (%rax)
+# CHECK-NEXT: 1 6 0.50 * vmovntdqa (%rax), %xmm2
+# CHECK-NEXT: 1 7 0.50 * vmovntdqa (%rax), %ymm2
+# CHECK-NEXT: 1 1 1.00 * vmovntpd %xmm0, (%rax)
+# CHECK-NEXT: 1 1 1.00 * vmovntpd %ymm0, (%rax)
+# CHECK-NEXT: 1 1 1.00 * vmovntps %xmm0, (%rax)
+# CHECK-NEXT: 1 1 1.00 * vmovntps %ymm0, (%rax)
+# CHECK-NEXT: 1 1 0.33 vmovq %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 vmovq %rax, %xmm2
+# CHECK-NEXT: 1 6 0.50 * vmovq (%rax), %xmm2
+# CHECK-NEXT: 1 2 1.00 vmovq %xmm0, %rcx
+# CHECK-NEXT: 1 1 1.00 * vmovq %xmm0, (%rax)
+# CHECK-NEXT: 1 1 1.00 vmovsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 * vmovsd %xmm0, (%rax)
+# CHECK-NEXT: 1 6 0.50 * vmovsd (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 vmovshdup %xmm0, %xmm2
+# CHECK-NEXT: 1 6 0.50 * vmovshdup (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 vmovshdup %ymm0, %ymm2
+# CHECK-NEXT: 1 7 0.50 * vmovshdup (%rax), %ymm2
+# CHECK-NEXT: 1 1 1.00 vmovsldup %xmm0, %xmm2
+# CHECK-NEXT: 1 6 0.50 * vmovsldup (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 vmovsldup %ymm0, %ymm2
+# CHECK-NEXT: 1 7 0.50 * vmovsldup (%rax), %ymm2
+# CHECK-NEXT: 1 1 1.00 vmovss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 * vmovss %xmm0, (%rax)
+# CHECK-NEXT: 1 6 0.50 * vmovss (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 vmovupd %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 * vmovupd %xmm0, (%rax)
+# CHECK-NEXT: 1 6 0.50 * vmovupd (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 vmovupd %ymm0, %ymm2
+# CHECK-NEXT: 1 1 1.00 * vmovupd %ymm0, (%rax)
+# CHECK-NEXT: 1 7 0.50 * vmovupd (%rax), %ymm2
+# CHECK-NEXT: 1 1 1.00 vmovups %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 * vmovups %xmm0, (%rax)
+# CHECK-NEXT: 1 6 0.50 * vmovups (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 vmovups %ymm0, %ymm2
+# CHECK-NEXT: 1 1 1.00 * vmovups %ymm0, (%rax)
+# CHECK-NEXT: 1 7 0.50 * vmovups (%rax), %ymm2
+# CHECK-NEXT: 3 7 1.00 vmpsadbw $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 13 1.00 * vmpsadbw $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vmulpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vmulpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vmulpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 12 1.00 * vmulpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 1.00 vmulps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vmulps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vmulps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 12 1.00 * vmulps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 1.00 vmulsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vmulsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vmulss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vmulss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vorpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vorpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vorpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vorpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vorps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vorps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vorps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vorps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 0.50 vpabsb %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpabsb (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpabsd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpabsd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpabsw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpabsw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpackssdw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpackssdw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpacksswb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpacksswb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpackusdw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpackusdw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpackuswb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpackuswb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpaddb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpaddb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpaddd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpaddd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpaddq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpaddq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpaddsb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpaddsb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpaddsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpaddsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpaddusb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpaddusb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpaddusw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpaddusw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpaddw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpaddw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpalignr $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpalignr $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.33 vpand %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpand (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.33 vpandn %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpandn (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpavgb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpavgb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpavgw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpavgw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 2 1.00 vpblendvb %xmm3, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 8 1.00 * vpblendvb %xmm3, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpblendw $11, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpblendw $11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 14 6.00 vpclmulqdq $11, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 14 5.67 * vpclmulqdq $11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpcmpeqb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpcmpeqd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpcmpeqq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpcmpeqw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 4 2.67 vpcmpestri $1, %xmm0, %xmm2
+# CHECK-NEXT: 1 4 2.33 * vpcmpestri $1, (%rax), %xmm2
+# CHECK-NEXT: 1 11 2.67 vpcmpestrm $1, %xmm0, %xmm2
+# CHECK-NEXT: 1 11 2.33 * vpcmpestrm $1, (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpcmpgtb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpcmpgtb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpcmpgtd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpcmpgtd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vpcmpgtq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vpcmpgtq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpcmpgtw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpcmpgtw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 11 3.00 vpcmpistri $1, %xmm0, %xmm2
+# CHECK-NEXT: 4 17 3.00 * vpcmpistri $1, (%rax), %xmm2
+# CHECK-NEXT: 3 11 3.00 vpcmpistrm $1, %xmm0, %xmm2
+# CHECK-NEXT: 4 17 3.00 * vpcmpistrm $1, (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 vperm2f128 $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vpermilpd $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vpermilpd $1, (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 vpermilpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vpermilpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vpermilpd $1, %ymm0, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vpermilpd $1, (%rax), %ymm2
+# CHECK-NEXT: 1 1 1.00 vpermilpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vpermilpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vpermilps $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vpermilps $1, (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 vpermilps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vpermilps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vpermilps $1, %ymm0, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vpermilps $1, (%rax), %ymm2
+# CHECK-NEXT: 1 1 1.00 vpermilps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vpermilps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 3 1.00 vpextrb $1, %xmm0, %ecx
+# CHECK-NEXT: 3 5 1.00 * vpextrb $1, %xmm0, (%rax)
+# CHECK-NEXT: 2 3 1.00 vpextrd $1, %xmm0, %ecx
+# CHECK-NEXT: 4 5 1.00 * vpextrd $1, %xmm0, (%rax)
+# CHECK-NEXT: 2 3 1.00 vpextrq $1, %xmm0, %rcx
+# CHECK-NEXT: 4 5 1.00 * vpextrq $1, %xmm0, (%rax)
+# CHECK-NEXT: 2 3 1.00 vpextrw $1, %xmm0, %ecx
+# CHECK-NEXT: 3 5 1.00 * vpextrw $1, %xmm0, (%rax)
+# CHECK-NEXT: 3 3 1.50 vphaddd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.50 * vphaddd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 3 1.50 vphaddsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.50 * vphaddsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 3 1.50 vphaddw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.50 * vphaddw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vphminposuw %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vphminposuw (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.50 vphsubd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.50 * vphsubd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 3 1.50 vphsubsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.50 * vphsubsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 3 1.50 vphsubw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.50 * vphsubw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 2 1.00 vpinsrb $1, %eax, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpinsrb $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 2 1.00 vpinsrd $1, %eax, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpinsrd $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 2 1.00 vpinsrq $1, %rax, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpinsrq $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 2 1.00 vpinsrw $1, %eax, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpinsrw $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vpmaddubsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vpmaddubsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vpmaddwd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vpmaddwd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpmaxsb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmaxsb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpmaxsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmaxsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpmaxsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmaxsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpmaxub %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmaxub (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpmaxud %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmaxud (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpmaxuw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmaxuw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpminsb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpminsb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpminsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpminsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpminsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpminsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpminub %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpminub (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpminud %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpminud (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpminuw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpminuw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 2 1.00 vpmovmskb %xmm0, %ecx
+# CHECK-NEXT: 1 1 0.50 vpmovsxbd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmovsxbd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpmovsxbq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmovsxbq (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpmovsxbw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmovsxbw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpmovsxdq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmovsxdq (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpmovsxwd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmovsxwd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpmovsxwq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmovsxwq (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpmovzxbd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmovzxbd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpmovzxbq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmovzxbq (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpmovzxbw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmovzxbw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpmovzxdq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmovzxdq (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpmovzxwd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmovzxwd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpmovzxwq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpmovzxwq (%rax), %xmm2
+# CHECK-NEXT: 1 5 1.00 vpmuldq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vpmuldq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vpmulhrsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vpmulhrsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vpmulhuw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vpmulhuw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vpmulhw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vpmulhw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vpmulld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vpmulld (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vpmullw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vpmullw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vpmuludq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vpmuludq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.33 vpor %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpor (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vpsadbw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vpsadbw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpshufb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpshufb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpshufd $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpshufd $1, (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpshufhw $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpshufhw $1, (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpshuflw $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpshuflw $1, (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 vpsignb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpsignb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpsignd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpsignd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpsignw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpsignw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vpslld $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 2 1.00 vpslld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 8 1.00 * vpslld (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpslldq $1, %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vpsllq $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 2 1.00 vpsllq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 8 1.00 * vpsllq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vpsllw $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 2 1.00 vpsllw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 8 1.00 * vpsllw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vpsrad $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 2 1.00 vpsrad %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 8 1.00 * vpsrad (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vpsraw $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 2 1.00 vpsraw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 8 1.00 * vpsraw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vpsrld $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 2 1.00 vpsrld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 8 1.00 * vpsrld (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpsrldq $1, %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vpsrlq $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 2 1.00 vpsrlq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 8 1.00 * vpsrlq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vpsrlw $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 2 1.00 vpsrlw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 8 1.00 * vpsrlw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpsubb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpsubb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpsubd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpsubd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpsubq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpsubq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpsubsb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpsubsb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpsubsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpsubsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpsubusb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpsubusb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpsubusw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpsubusw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpsubw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpsubw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 2 1.00 vptest %xmm0, %xmm1
+# CHECK-NEXT: 3 8 1.00 * vptest (%rax), %xmm1
+# CHECK-NEXT: 2 2 1.00 vptest %ymm0, %ymm1
+# CHECK-NEXT: 3 9 1.00 * vptest (%rax), %ymm1
+# CHECK-NEXT: 1 1 0.50 vpunpckhbw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpunpckhbw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpunpckhdq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpunpckhdq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpunpckhqdq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpunpckhqdq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpunpckhwd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpunpckhwd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpunpcklbw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpunpcklbw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpunpckldq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpunpckldq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpunpcklqdq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpunpcklqdq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpunpcklwd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpunpcklwd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.33 vpxor %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 0.50 * vpxor (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vrcpps %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vrcpps (%rax), %xmm2
+# CHECK-NEXT: 3 7 2.00 vrcpps %ymm0, %ymm2
+# CHECK-NEXT: 4 14 2.00 * vrcpps (%rax), %ymm2
+# CHECK-NEXT: 1 5 1.00 vrcpss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vrcpss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vroundpd $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vroundpd $1, (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 vroundpd $1, %ymm0, %ymm2
+# CHECK-NEXT: 2 10 1.00 * vroundpd $1, (%rax), %ymm2
+# CHECK-NEXT: 1 3 1.00 vroundps $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vroundps $1, (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 vroundps $1, %ymm0, %ymm2
+# CHECK-NEXT: 2 10 1.00 * vroundps $1, (%rax), %ymm2
+# CHECK-NEXT: 1 3 1.00 vroundsd $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vroundsd $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vroundss $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vroundss $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vrsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vrsqrtps (%rax), %xmm2
+# CHECK-NEXT: 3 7 2.00 vrsqrtps %ymm0, %ymm2
+# CHECK-NEXT: 4 14 2.00 * vrsqrtps (%rax), %ymm2
+# CHECK-NEXT: 1 5 1.00 vrsqrtss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 11 1.00 * vrsqrtss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vshufpd $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vshufpd $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vshufpd $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vshufpd $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vshufps $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vshufps $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vshufps $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vshufps $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 21 21.00 vsqrtpd %xmm0, %xmm2
+# CHECK-NEXT: 2 27 21.00 * vsqrtpd (%rax), %xmm2
+# CHECK-NEXT: 3 45 44.00 vsqrtpd %ymm0, %ymm2
+# CHECK-NEXT: 4 52 44.00 * vsqrtpd (%rax), %ymm2
+# CHECK-NEXT: 1 14 14.00 vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 2 20 14.00 * vsqrtps (%rax), %xmm2
+# CHECK-NEXT: 3 29 28.00 vsqrtps %ymm0, %ymm2
+# CHECK-NEXT: 4 36 28.00 * vsqrtps (%rax), %ymm2
+# CHECK-NEXT: 1 21 21.00 vsqrtsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 27 21.00 * vsqrtsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 14 14.00 vsqrtss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 20 14.00 * vsqrtss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 4 5 1.00 * * U vstmxcsr (%rax)
+# CHECK-NEXT: 1 3 1.00 vsubpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vsubpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vsubpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 1.00 * vsubpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 3 1.00 vsubps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vsubps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vsubps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 1.00 * vsubps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 3 1.00 vsubsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vsubsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vsubss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 9 1.00 * vsubss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vtestpd %xmm0, %xmm1
+# CHECK-NEXT: 2 7 1.00 * vtestpd (%rax), %xmm1
+# CHECK-NEXT: 1 1 1.00 vtestpd %ymm0, %ymm1
+# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %ymm1
+# CHECK-NEXT: 1 1 1.00 vtestps %xmm0, %xmm1
+# CHECK-NEXT: 2 7 1.00 * vtestps (%rax), %xmm1
+# CHECK-NEXT: 1 1 1.00 vtestps %ymm0, %ymm1
+# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %ymm1
+# CHECK-NEXT: 2 2 1.00 vucomisd %xmm0, %xmm1
+# CHECK-NEXT: 3 8 1.00 * vucomisd (%rax), %xmm1
+# CHECK-NEXT: 2 2 1.00 vucomiss %xmm0, %xmm1
+# CHECK-NEXT: 3 8 1.00 * vucomiss (%rax), %xmm1
+# CHECK-NEXT: 1 1 1.00 vunpckhpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vunpckhpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vunpckhpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vunpckhpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vunpckhps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vunpckhps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vunpckhps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vunpckhps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vunpcklpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vunpcklpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vunpcklpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vunpcklpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vunpcklps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vunpcklps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vunpcklps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vunpcklps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vxorpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vxorpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vxorpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vxorpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vxorps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vxorps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vxorps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vxorps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 20 9 2.00 * * U vzeroall
+# CHECK-NEXT: 1 100 0.33 * * U vzeroupper
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - 572.00 246.83 317.33 39.00 365.83 179.50 179.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - vaddpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vaddpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vaddpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vaddpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vaddps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vaddps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - - - vaddsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vaddsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vaddss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vaddss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vaddsubpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vaddsubpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vaddsubpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vaddsubpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - - - vaddsubps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vaddsubps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vaddsubps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vaddsubps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - vaesdec %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 0.50 0.50 vaesdec (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - vaesdeclast %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 0.50 0.50 vaesdeclast (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - vaesenc %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 0.50 0.50 vaesenc (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - vaesenclast %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 0.50 0.50 vaesenclast (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 2.00 - - vaesimc %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 2.00 0.50 0.50 vaesimc (%rax), %xmm2
+# CHECK-NEXT: - - 3.67 3.67 - 3.67 - - vaeskeygenassist $22, %xmm0, %xmm2
+# CHECK-NEXT: - - 3.33 3.33 - 3.33 0.50 0.50 vaeskeygenassist $22, (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vandnpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vandnpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vandnpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vandnpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vandnps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vandnps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vandnps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vandnps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vandpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vandpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vandpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vandpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vandps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vandps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vandps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vandps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 - - 0.50 - - vblendpd $11, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 - - 0.50 0.50 0.50 vblendpd $11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 - - 0.50 - - vblendpd $11, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 - - 0.50 0.50 0.50 vblendpd $11, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 - - 0.50 - - vblendps $11, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 - - 0.50 0.50 0.50 vblendps $11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 - - 0.50 - - vblendps $11, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 - - 0.50 0.50 0.50 vblendps $11, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 1.00 - - 1.00 - - vblendvpd %xmm3, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vblendvpd %xmm3, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 - - vblendvpd %ymm3, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vblendvpd %ymm3, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 1.00 - - 1.00 - - vblendvps %xmm3, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vblendvps %xmm3, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 - - vblendvps %ymm3, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vblendvps %ymm3, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vbroadcastf128 (%rax), %ymm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 vbroadcastsd (%rax), %ymm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 vbroadcastss (%rax), %xmm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 vbroadcastss (%rax), %ymm2
+# CHECK-NEXT: - - - 1.00 - - - - vcmppd $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmppd $0, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vcmppd $0, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmppd $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - - - vcmpps $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpps $0, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vcmpps $0, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpps $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - - - vcmpsd $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpsd $0, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vcmpss $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpss $0, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - vcomisd %xmm0, %xmm1
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcomisd (%rax), %xmm1
+# CHECK-NEXT: - - 1.00 1.00 - - - - vcomiss %xmm0, %xmm1
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcomiss (%rax), %xmm1
+# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtdq2pd %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtdq2pd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtdq2pd %xmm0, %ymm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtdq2pd (%rax), %ymm2
+# CHECK-NEXT: - - - 1.00 - - - - vcvtdq2ps %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtdq2ps (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vcvtdq2ps %ymm0, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtdq2ps (%rax), %ymm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dqx (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %ymm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dqy (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2psx (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %ymm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2psy (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vcvtps2dq %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vcvtps2dq %ymm0, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax), %ymm2
+# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm0, %ymm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2si %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2si %xmm0, %rcx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcvtsd2si (%rax), %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcvtsd2si (%rax), %rcx
+# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtsd2ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtsd2ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtsi2sdl %ecx, %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtsi2sdq %rcx, %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtsi2sdl (%rax), %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtsi2sdq (%rax), %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 - - vcvtsi2ssl %ecx, %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 - - vcvtsi2ssq %rcx, %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtsi2ssl (%rax), %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtsi2ssq (%rax), %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vcvtss2sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtss2sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - vcvtss2si %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - - - vcvtss2si %xmm0, %rcx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcvtss2si (%rax), %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcvtss2si (%rax), %rcx
+# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dqx (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %ymm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dqy (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %ymm0, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %ymm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - vcvttsd2si %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - - - vcvttsd2si %xmm0, %rcx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcvttsd2si (%rax), %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcvttsd2si (%rax), %rcx
+# CHECK-NEXT: - - 1.00 1.00 - - - - vcvttss2si %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - - - vcvttss2si %xmm0, %rcx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcvttss2si (%rax), %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcvttss2si (%rax), %rcx
+# CHECK-NEXT: - 22.00 1.00 - - - - - vdivpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - 22.00 1.00 - - - 0.50 0.50 vdivpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - 44.00 2.50 - - 0.50 - - vdivpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - 44.00 2.50 - - 0.50 0.50 0.50 vdivpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - 14.00 1.00 - - - - - vdivps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - 14.00 1.00 - - - 0.50 0.50 vdivps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - 28.00 2.50 - - 0.50 - - vdivps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - 28.00 2.50 - - 0.50 0.50 0.50 vdivps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - 22.00 1.00 - - - - - vdivsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - 22.00 1.00 - - - 0.50 0.50 vdivsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - 14.00 1.00 - - - - - vdivss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - 14.00 1.00 - - - 0.50 0.50 vdivss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vdppd $22, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 0.50 0.50 vdppd $22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 2.00 - 1.00 - - vdpps $22, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 2.00 - 1.00 0.50 0.50 vdpps $22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 2.00 - 1.00 - - vdpps $22, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 1.00 2.00 - 1.00 0.50 0.50 vdpps $22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vextractf128 $1, %ymm0, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vextractf128 $1, %ymm0, (%rax)
+# CHECK-NEXT: - - 1.00 - - 1.00 - - vextractps $1, %xmm0, %ecx
+# CHECK-NEXT: - - - - 1.00 1.00 0.50 0.50 vextractps $1, %xmm0, (%rax)
+# CHECK-NEXT: - - - 1.00 - 2.00 - - vhaddpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 0.50 0.50 vhaddpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 - - vhaddpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - 2.00 0.50 0.50 vhaddpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - 2.00 - - vhaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 0.50 0.50 vhaddps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 - - vhaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - 2.00 0.50 0.50 vhaddps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - 2.00 - - vhsubpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 0.50 0.50 vhsubpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 - - vhsubpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - 2.00 0.50 0.50 vhsubpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - 2.00 - - vhsubps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 0.50 0.50 vhsubps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 - - vhsubps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - 2.00 0.50 0.50 vhsubps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vinsertf128 $1, %xmm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 - - 0.50 0.50 0.50 vinsertf128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vinsertps $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vinsertps $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 vlddqu (%rax), %xmm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 vlddqu (%rax), %ymm2
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 0.50 0.50 vldmxcsr (%rax)
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmaskmovdqu %xmm0, %xmm1
+# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vmaskmovpd (%rax), %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vmaskmovpd (%rax), %ymm0, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 1.00 - 0.50 0.50 vmaskmovpd %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: - - 0.50 0.50 1.00 - 0.50 0.50 vmaskmovpd %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vmaskmovps (%rax), %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vmaskmovps (%rax), %ymm0, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 1.00 - 0.50 0.50 vmaskmovps %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: - - 0.50 0.50 1.00 - 0.50 0.50 vmaskmovps %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: - - - 1.00 - - - - vmaxpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vmaxpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vmaxpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vmaxpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - - - vmaxps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vmaxps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vmaxps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vmaxps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - - - vmaxsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vmaxsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vmaxss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vmaxss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vminpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vminpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vminpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vminpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - - - vminps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vminps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vminps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vminps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - - - vminsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vminsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vminss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vminss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovapd %xmm0, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovapd %xmm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovapd (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovapd %ymm0, %ymm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovapd %ymm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovapd (%rax), %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovaps %xmm0, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovaps %xmm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovaps (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovaps %ymm0, %ymm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovaps %ymm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovaps (%rax), %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovd %eax, %xmm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovd (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vmovd %xmm0, %ecx
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovd %xmm0, (%rax)
+# CHECK-NEXT: - - - - - 1.00 - - vmovddup %xmm0, %xmm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovddup (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovddup %ymm0, %ymm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovddup (%rax), %ymm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqa %xmm0, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa %xmm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqa %ymm0, %ymm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa %ymm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa (%rax), %ymm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqu %xmm0, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu %xmm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu %ymm0, %ymm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu %ymm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu (%rax), %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovhlps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovlhps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovhpd %xmm0, (%rax)
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vmovhpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovhps %xmm0, (%rax)
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vmovhps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovlpd %xmm0, (%rax)
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vmovlpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovlps %xmm0, (%rax)
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vmovlps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vmovmskpd %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 - - - - - vmovmskpd %ymm0, %ecx
+# CHECK-NEXT: - - 1.00 - - - - - vmovmskps %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 - - - - - vmovmskps %ymm0, %ecx
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovntdq %xmm0, (%rax)
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovntdq %ymm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovntdqa (%rax), %xmm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovntdqa (%rax), %ymm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovntpd %xmm0, (%rax)
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovntpd %ymm0, (%rax)
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovntps %xmm0, (%rax)
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovntps %ymm0, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovq %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovq %rax, %xmm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovq (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vmovq %xmm0, %rcx
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovq %xmm0, (%rax)
+# CHECK-NEXT: - - - - - 1.00 - - vmovsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovsd %xmm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovsd (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovshdup %xmm0, %xmm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovshdup (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovshdup %ymm0, %ymm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovshdup (%rax), %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovsldup %xmm0, %xmm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovsldup (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovsldup %ymm0, %ymm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovsldup (%rax), %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovss %xmm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovss (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovupd %xmm0, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovupd %xmm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovupd (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovupd %ymm0, %ymm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovupd %ymm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovupd (%rax), %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovups %xmm0, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovups %xmm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovups (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovups %ymm0, %ymm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovups %ymm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 vmovups (%rax), %ymm2
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vmpsadbw $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 0.50 0.50 vmpsadbw $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vmulpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vmulpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vmulpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vmulpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 1.00 - - - - - vmulps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vmulps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vmulps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vmulps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 1.00 - - - - - vmulsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vmulsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vmulss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vmulss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vorpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vorpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vorpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vorpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vorps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vorps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vorps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vorps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpabsb %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpabsb (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpabsd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpabsd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpabsw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpabsw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpackssdw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpackssdw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpacksswb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpacksswb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpackusdw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpackusdw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpackuswb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpackuswb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpaddb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpaddd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpaddq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpaddsb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddsb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpaddsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpaddusb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddusb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpaddusw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddusw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpaddw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpalignr $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpalignr $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpand %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vpand (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpandn %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vpandn (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpavgb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpavgb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpavgw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpavgw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - vpblendvb %xmm3, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vpblendvb %xmm3, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpblendw $11, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpblendw $11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 6.00 6.00 - 6.00 - - vpclmulqdq $11, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 5.67 5.67 - 5.67 0.50 0.50 vpclmulqdq $11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 2.67 2.67 - 2.67 - - vpcmpestri $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 2.33 2.33 - 2.33 0.50 0.50 vpcmpestri $1, (%rax), %xmm2
+# CHECK-NEXT: - - 2.67 2.67 - 2.67 - - vpcmpestrm $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 2.33 2.33 - 2.33 0.50 0.50 vpcmpestrm $1, (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpcmpgtq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpcmpgtq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 3.00 - - - - - vpcmpistri $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 3.00 - - - 0.50 0.50 vpcmpistri $1, (%rax), %xmm2
+# CHECK-NEXT: - - 3.00 - - - - - vpcmpistrm $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 3.00 - - - 0.50 0.50 vpcmpistrm $1, (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vperm2f128 $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vperm2f128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vpermilpd $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermilpd $1, (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vpermilpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermilpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vpermilpd $1, %ymm0, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermilpd $1, (%rax), %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vpermilpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermilpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vpermilps $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermilps $1, (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vpermilps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermilps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vpermilps $1, %ymm0, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermilps $1, (%rax), %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vpermilps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermilps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - vpextrb $1, %xmm0, %ecx
+# CHECK-NEXT: - - - 0.50 1.00 0.50 0.50 0.50 vpextrb $1, %xmm0, (%rax)
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - vpextrd $1, %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 0.50 1.00 0.50 0.50 0.50 vpextrd $1, %xmm0, (%rax)
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - vpextrq $1, %xmm0, %rcx
+# CHECK-NEXT: - - 1.00 0.50 1.00 0.50 0.50 0.50 vpextrq $1, %xmm0, (%rax)
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - vpextrw $1, %xmm0, %ecx
+# CHECK-NEXT: - - - 0.50 1.00 0.50 0.50 0.50 vpextrw $1, %xmm0, (%rax)
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vphminposuw %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vphminposuw (%rax), %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphsubd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphsubd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphsubsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphsubsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphsubw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphsubw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 1.50 - - vpinsrb $1, %eax, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpinsrb $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 1.50 - - vpinsrd $1, %eax, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpinsrd $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 1.50 - - vpinsrq $1, %rax, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpinsrq $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 1.50 - - vpinsrw $1, %eax, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpinsrw $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpmaddubsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmaddubsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpmaddwd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmaddwd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmaxsb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmaxsb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmaxsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmaxsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmaxsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmaxsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmaxub %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmaxub (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmaxud %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmaxud (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmaxuw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmaxuw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpminsb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpminsb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpminsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpminsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpminsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpminsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpminub %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpminub (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpminud %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpminud (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpminuw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpminuw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpmovmskb %xmm0, %ecx
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovsxbd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmovsxbd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovsxbq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmovsxbq (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovsxbw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmovsxbw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovsxdq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmovsxdq (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovsxwd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmovsxwd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovsxwq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmovsxwq (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovzxbd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmovzxbd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovzxbq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmovzxbq (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovzxbw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmovzxbw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovzxdq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmovzxdq (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovzxwd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmovzxwd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovzxwq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmovzxwq (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpmuldq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmuldq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpmulhrsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmulhrsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpmulhuw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmulhuw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpmulhw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmulhw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpmulld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmulld (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpmullw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmullw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpmuludq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmuludq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpor %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vpor (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpsadbw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpsadbw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpshufb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpshufb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpshufd $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpshufd $1, (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpshufhw $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpshufhw $1, (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpshuflw $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpshuflw $1, (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpsignb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsignb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpsignd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsignd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpsignw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsignw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpslld $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - vpslld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 vpslld (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpslldq $1, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpsllq $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - vpsllq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 vpsllq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpsllw $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - vpsllw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 vpsllw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpsrad $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - vpsrad %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 vpsrad (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpsraw $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - vpsraw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 vpsraw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpsrld $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - vpsrld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 vpsrld (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpsrldq $1, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpsrlq $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - vpsrlq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 vpsrlq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpsrlw $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - vpsrlw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 vpsrlw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpsubb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsubb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpsubd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsubd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpsubq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsubq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpsubsb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsubsb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpsubsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsubsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpsubusb %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsubusb (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpsubusw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsubusw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpsubw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsubw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 - - vptest %xmm0, %xmm1
+# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vptest (%rax), %xmm1
+# CHECK-NEXT: - - 1.00 - - 1.00 - - vptest %ymm0, %ymm1
+# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vptest (%rax), %ymm1
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpunpckhbw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpunpckhbw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpunpckhdq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpunpckhdq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpunpckhqdq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpunpckhqdq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpunpckhwd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpunpckhwd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpunpcklbw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpunpcklbw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpunpckldq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpunpckldq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpunpcklqdq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpunpcklqdq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpunpcklwd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpunpcklwd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpxor %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vpxor (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vrcpps %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcpps (%rax), %xmm2
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcpps %ymm0, %ymm2
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcpps (%rax), %ymm2
+# CHECK-NEXT: - - 1.00 - - - - - vrcpss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcpss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vroundpd $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vroundpd $1, (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vroundpd $1, %ymm0, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vroundpd $1, (%rax), %ymm2
+# CHECK-NEXT: - - - 1.00 - - - - vroundps $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vroundps $1, (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vroundps $1, %ymm0, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vroundps $1, (%rax), %ymm2
+# CHECK-NEXT: - - - 1.00 - - - - vroundsd $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vroundsd $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vroundss $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vroundss $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vrsqrtps %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrtps (%rax), %xmm2
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrtps %ymm0, %ymm2
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrtps (%rax), %ymm2
+# CHECK-NEXT: - - 1.00 - - - - - vrsqrtss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrtss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vshufpd $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vshufpd $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vshufpd $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vshufpd $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vshufps $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vshufps $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vshufps $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vshufps $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - 21.00 1.00 - - - - - vsqrtpd %xmm0, %xmm2
+# CHECK-NEXT: - 21.00 1.00 - - - 0.50 0.50 vsqrtpd (%rax), %xmm2
+# CHECK-NEXT: - 44.00 2.50 - - 0.50 - - vsqrtpd %ymm0, %ymm2
+# CHECK-NEXT: - 44.00 2.50 - - 0.50 0.50 0.50 vsqrtpd (%rax), %ymm2
+# CHECK-NEXT: - 14.00 1.00 - - - - - vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: - 14.00 1.00 - - - 0.50 0.50 vsqrtps (%rax), %xmm2
+# CHECK-NEXT: - 28.00 2.50 - - 0.50 - - vsqrtps %ymm0, %ymm2
+# CHECK-NEXT: - 28.00 2.50 - - 0.50 0.50 0.50 vsqrtps (%rax), %ymm2
+# CHECK-NEXT: - 21.00 1.00 - - - - - vsqrtsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - 21.00 1.00 - - - 0.50 0.50 vsqrtsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - 14.00 1.00 - - - - - vsqrtss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - 14.00 1.00 - - - 0.50 0.50 vsqrtss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 0.50 0.50 vstmxcsr (%rax)
+# CHECK-NEXT: - - - 1.00 - - - - vsubpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vsubpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vsubpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vsubpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - - - vsubps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vsubps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vsubps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vsubps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - - - vsubsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vsubsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vsubss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vsubss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vtestpd %xmm0, %xmm1
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vtestpd (%rax), %xmm1
+# CHECK-NEXT: - - 1.00 - - - - - vtestpd %ymm0, %ymm1
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vtestpd (%rax), %ymm1
+# CHECK-NEXT: - - 1.00 - - - - - vtestps %xmm0, %xmm1
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vtestps (%rax), %xmm1
+# CHECK-NEXT: - - 1.00 - - - - - vtestps %ymm0, %ymm1
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vtestps (%rax), %ymm1
+# CHECK-NEXT: - - 1.00 1.00 - - - - vucomisd %xmm0, %xmm1
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vucomisd (%rax), %xmm1
+# CHECK-NEXT: - - 1.00 1.00 - - - - vucomiss %xmm0, %xmm1
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vucomiss (%rax), %xmm1
+# CHECK-NEXT: - - - - - 1.00 - - vunpckhpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vunpckhpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vunpckhpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vunpckhpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vunpckhps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vunpckhps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vunpckhps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vunpckhps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vunpcklpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vunpcklpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vunpcklpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vunpcklpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vunpcklps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vunpcklps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vunpcklps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vunpcklps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vxorpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vxorpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vxorpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vxorpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vxorps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vxorps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vxorps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vxorps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 2.00 - - vzeroall
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vzeroupper
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+andn %eax, %ebx, %ecx
+andn (%rax), %ebx, %ecx
+
+andn %rax, %rbx, %rcx
+andn (%rax), %rbx, %rcx
+
+bextr %eax, %ebx, %ecx
+bextr %eax, (%rbx), %ecx
+
+bextr %rax, %rbx, %rcx
+bextr %rax, (%rbx), %rcx
+
+blsi %eax, %ecx
+blsi (%rax), %ecx
+
+blsi %rax, %rcx
+blsi (%rax), %rcx
+
+blsmsk %eax, %ecx
+blsmsk (%rax), %ecx
+
+blsmsk %rax, %rcx
+blsmsk (%rax), %rcx
+
+blsr %eax, %ecx
+blsr (%rax), %ecx
+
+blsr %rax, %rcx
+blsr (%rax), %rcx
+
+tzcnt %eax, %ecx
+tzcnt (%rax), %ecx
+
+tzcnt %rax, %rcx
+tzcnt (%rax), %rcx
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.33 andnl %eax, %ebx, %ecx
+# CHECK-NEXT: 2 6 0.50 * andnl (%rax), %ebx, %ecx
+# CHECK-NEXT: 1 1 0.33 andnq %rax, %rbx, %rcx
+# CHECK-NEXT: 2 6 0.50 * andnq (%rax), %rbx, %rcx
+# CHECK-NEXT: 2 2 1.00 bextrl %eax, %ebx, %ecx
+# CHECK-NEXT: 3 7 1.00 * bextrl %eax, (%rbx), %ecx
+# CHECK-NEXT: 2 2 1.00 bextrq %rax, %rbx, %rcx
+# CHECK-NEXT: 3 7 1.00 * bextrq %rax, (%rbx), %rcx
+# CHECK-NEXT: 1 1 0.33 blsil %eax, %ecx
+# CHECK-NEXT: 2 6 0.50 * blsil (%rax), %ecx
+# CHECK-NEXT: 1 1 0.33 blsiq %rax, %rcx
+# CHECK-NEXT: 2 6 0.50 * blsiq (%rax), %rcx
+# CHECK-NEXT: 1 1 0.33 blsmskl %eax, %ecx
+# CHECK-NEXT: 2 6 0.50 * blsmskl (%rax), %ecx
+# CHECK-NEXT: 1 1 0.33 blsmskq %rax, %rcx
+# CHECK-NEXT: 2 6 0.50 * blsmskq (%rax), %rcx
+# CHECK-NEXT: 1 1 0.33 blsrl %eax, %ecx
+# CHECK-NEXT: 2 6 0.50 * blsrl (%rax), %ecx
+# CHECK-NEXT: 1 1 0.33 blsrq %rax, %rcx
+# CHECK-NEXT: 2 6 0.50 * blsrq (%rax), %rcx
+# CHECK-NEXT: 1 3 1.00 tzcntl %eax, %ecx
+# CHECK-NEXT: 2 8 1.00 * tzcntl (%rax), %ecx
+# CHECK-NEXT: 1 3 1.00 tzcntq %rax, %rcx
+# CHECK-NEXT: 2 8 1.00 * tzcntq (%rax), %rcx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 7.33 13.33 - 7.33 6.00 6.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andnl %eax, %ebx, %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 andnl (%rax), %ebx, %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andnq %rax, %rbx, %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 andnq (%rax), %rbx, %rcx
+# CHECK-NEXT: - - 0.50 1.00 - 0.50 - - bextrl %eax, %ebx, %ecx
+# CHECK-NEXT: - - 0.50 1.00 - 0.50 0.50 0.50 bextrl %eax, (%rbx), %ecx
+# CHECK-NEXT: - - 0.50 1.00 - 0.50 - - bextrq %rax, %rbx, %rcx
+# CHECK-NEXT: - - 0.50 1.00 - 0.50 0.50 0.50 bextrq %rax, (%rbx), %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsil %eax, %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsil (%rax), %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsiq %rax, %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsiq (%rax), %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsmskl %eax, %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsmskl (%rax), %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsmskq %rax, %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsmskq (%rax), %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsrl %eax, %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsrl (%rax), %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsrq %rax, %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsrq (%rax), %rcx
+# CHECK-NEXT: - - - 1.00 - - - - tzcntl %eax, %ecx
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 tzcntl (%rax), %ecx
+# CHECK-NEXT: - - - 1.00 - - - - tzcntq %rax, %rcx
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 tzcntq (%rax), %rcx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+clflushopt (%rax)
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 4 5 1.00 * * U clflushopt (%rax)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 0.50 0.50 1.00 1.00 0.50 0.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.50 0.50 1.00 1.00 0.50 0.50 clflushopt (%rax)
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+cmovow %si, %di
+cmovnow %si, %di
+cmovbw %si, %di
+cmovaew %si, %di
+cmovew %si, %di
+cmovnew %si, %di
+cmovbew %si, %di
+cmovaw %si, %di
+cmovsw %si, %di
+cmovnsw %si, %di
+cmovpw %si, %di
+cmovnpw %si, %di
+cmovlw %si, %di
+cmovgew %si, %di
+cmovlew %si, %di
+cmovgw %si, %di
+
+cmovow (%rax), %di
+cmovnow (%rax), %di
+cmovbw (%rax), %di
+cmovaew (%rax), %di
+cmovew (%rax), %di
+cmovnew (%rax), %di
+cmovbew (%rax), %di
+cmovaw (%rax), %di
+cmovsw (%rax), %di
+cmovnsw (%rax), %di
+cmovpw (%rax), %di
+cmovnpw (%rax), %di
+cmovlw (%rax), %di
+cmovgew (%rax), %di
+cmovlew (%rax), %di
+cmovgw (%rax), %di
+
+cmovol %esi, %edi
+cmovnol %esi, %edi
+cmovbl %esi, %edi
+cmovael %esi, %edi
+cmovel %esi, %edi
+cmovnel %esi, %edi
+cmovbel %esi, %edi
+cmoval %esi, %edi
+cmovsl %esi, %edi
+cmovnsl %esi, %edi
+cmovpl %esi, %edi
+cmovnpl %esi, %edi
+cmovll %esi, %edi
+cmovgel %esi, %edi
+cmovlel %esi, %edi
+cmovgl %esi, %edi
+
+cmovol (%rax), %edi
+cmovnol (%rax), %edi
+cmovbl (%rax), %edi
+cmovael (%rax), %edi
+cmovel (%rax), %edi
+cmovnel (%rax), %edi
+cmovbel (%rax), %edi
+cmoval (%rax), %edi
+cmovsl (%rax), %edi
+cmovnsl (%rax), %edi
+cmovpl (%rax), %edi
+cmovnpl (%rax), %edi
+cmovll (%rax), %edi
+cmovgel (%rax), %edi
+cmovlel (%rax), %edi
+cmovgl (%rax), %edi
+
+cmovoq %rsi, %rdi
+cmovnoq %rsi, %rdi
+cmovbq %rsi, %rdi
+cmovaeq %rsi, %rdi
+cmoveq %rsi, %rdi
+cmovneq %rsi, %rdi
+cmovbeq %rsi, %rdi
+cmovaq %rsi, %rdi
+cmovsq %rsi, %rdi
+cmovnsq %rsi, %rdi
+cmovpq %rsi, %rdi
+cmovnpq %rsi, %rdi
+cmovlq %rsi, %rdi
+cmovgeq %rsi, %rdi
+cmovleq %rsi, %rdi
+cmovgq %rsi, %rdi
+
+cmovoq (%rax), %rdi
+cmovnoq (%rax), %rdi
+cmovbq (%rax), %rdi
+cmovaeq (%rax), %rdi
+cmoveq (%rax), %rdi
+cmovneq (%rax), %rdi
+cmovbeq (%rax), %rdi
+cmovaq (%rax), %rdi
+cmovsq (%rax), %rdi
+cmovnsq (%rax), %rdi
+cmovpq (%rax), %rdi
+cmovnpq (%rax), %rdi
+cmovlq (%rax), %rdi
+cmovgeq (%rax), %rdi
+cmovleq (%rax), %rdi
+cmovgq (%rax), %rdi
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 2 0.67 cmovow %si, %di
+# CHECK-NEXT: 2 2 0.67 cmovnow %si, %di
+# CHECK-NEXT: 2 2 0.67 cmovbw %si, %di
+# CHECK-NEXT: 2 2 0.67 cmovaew %si, %di
+# CHECK-NEXT: 2 2 0.67 cmovew %si, %di
+# CHECK-NEXT: 2 2 0.67 cmovnew %si, %di
+# CHECK-NEXT: 3 3 1.00 cmovbew %si, %di
+# CHECK-NEXT: 3 3 1.00 cmovaw %si, %di
+# CHECK-NEXT: 2 2 0.67 cmovsw %si, %di
+# CHECK-NEXT: 2 2 0.67 cmovnsw %si, %di
+# CHECK-NEXT: 2 2 0.67 cmovpw %si, %di
+# CHECK-NEXT: 2 2 0.67 cmovnpw %si, %di
+# CHECK-NEXT: 2 2 0.67 cmovlw %si, %di
+# CHECK-NEXT: 2 2 0.67 cmovgew %si, %di
+# CHECK-NEXT: 2 2 0.67 cmovlew %si, %di
+# CHECK-NEXT: 2 2 0.67 cmovgw %si, %di
+# CHECK-NEXT: 3 7 0.67 * cmovow (%rax), %di
+# CHECK-NEXT: 3 7 0.67 * cmovnow (%rax), %di
+# CHECK-NEXT: 3 7 0.67 * cmovbw (%rax), %di
+# CHECK-NEXT: 3 7 0.67 * cmovaew (%rax), %di
+# CHECK-NEXT: 3 7 0.67 * cmovew (%rax), %di
+# CHECK-NEXT: 3 7 0.67 * cmovnew (%rax), %di
+# CHECK-NEXT: 4 8 1.00 * cmovbew (%rax), %di
+# CHECK-NEXT: 4 8 1.00 * cmovaw (%rax), %di
+# CHECK-NEXT: 3 7 0.67 * cmovsw (%rax), %di
+# CHECK-NEXT: 3 7 0.67 * cmovnsw (%rax), %di
+# CHECK-NEXT: 3 7 0.67 * cmovpw (%rax), %di
+# CHECK-NEXT: 3 7 0.67 * cmovnpw (%rax), %di
+# CHECK-NEXT: 3 7 0.67 * cmovlw (%rax), %di
+# CHECK-NEXT: 3 7 0.67 * cmovgew (%rax), %di
+# CHECK-NEXT: 3 7 0.67 * cmovlew (%rax), %di
+# CHECK-NEXT: 3 7 0.67 * cmovgw (%rax), %di
+# CHECK-NEXT: 2 2 0.67 cmovol %esi, %edi
+# CHECK-NEXT: 2 2 0.67 cmovnol %esi, %edi
+# CHECK-NEXT: 2 2 0.67 cmovbl %esi, %edi
+# CHECK-NEXT: 2 2 0.67 cmovael %esi, %edi
+# CHECK-NEXT: 2 2 0.67 cmovel %esi, %edi
+# CHECK-NEXT: 2 2 0.67 cmovnel %esi, %edi
+# CHECK-NEXT: 3 3 1.00 cmovbel %esi, %edi
+# CHECK-NEXT: 3 3 1.00 cmoval %esi, %edi
+# CHECK-NEXT: 2 2 0.67 cmovsl %esi, %edi
+# CHECK-NEXT: 2 2 0.67 cmovnsl %esi, %edi
+# CHECK-NEXT: 2 2 0.67 cmovpl %esi, %edi
+# CHECK-NEXT: 2 2 0.67 cmovnpl %esi, %edi
+# CHECK-NEXT: 2 2 0.67 cmovll %esi, %edi
+# CHECK-NEXT: 2 2 0.67 cmovgel %esi, %edi
+# CHECK-NEXT: 2 2 0.67 cmovlel %esi, %edi
+# CHECK-NEXT: 2 2 0.67 cmovgl %esi, %edi
+# CHECK-NEXT: 3 7 0.67 * cmovol (%rax), %edi
+# CHECK-NEXT: 3 7 0.67 * cmovnol (%rax), %edi
+# CHECK-NEXT: 3 7 0.67 * cmovbl (%rax), %edi
+# CHECK-NEXT: 3 7 0.67 * cmovael (%rax), %edi
+# CHECK-NEXT: 3 7 0.67 * cmovel (%rax), %edi
+# CHECK-NEXT: 3 7 0.67 * cmovnel (%rax), %edi
+# CHECK-NEXT: 4 8 1.00 * cmovbel (%rax), %edi
+# CHECK-NEXT: 4 8 1.00 * cmoval (%rax), %edi
+# CHECK-NEXT: 3 7 0.67 * cmovsl (%rax), %edi
+# CHECK-NEXT: 3 7 0.67 * cmovnsl (%rax), %edi
+# CHECK-NEXT: 3 7 0.67 * cmovpl (%rax), %edi
+# CHECK-NEXT: 3 7 0.67 * cmovnpl (%rax), %edi
+# CHECK-NEXT: 3 7 0.67 * cmovll (%rax), %edi
+# CHECK-NEXT: 3 7 0.67 * cmovgel (%rax), %edi
+# CHECK-NEXT: 3 7 0.67 * cmovlel (%rax), %edi
+# CHECK-NEXT: 3 7 0.67 * cmovgl (%rax), %edi
+# CHECK-NEXT: 2 2 0.67 cmovoq %rsi, %rdi
+# CHECK-NEXT: 2 2 0.67 cmovnoq %rsi, %rdi
+# CHECK-NEXT: 2 2 0.67 cmovbq %rsi, %rdi
+# CHECK-NEXT: 2 2 0.67 cmovaeq %rsi, %rdi
+# CHECK-NEXT: 2 2 0.67 cmoveq %rsi, %rdi
+# CHECK-NEXT: 2 2 0.67 cmovneq %rsi, %rdi
+# CHECK-NEXT: 3 3 1.00 cmovbeq %rsi, %rdi
+# CHECK-NEXT: 3 3 1.00 cmovaq %rsi, %rdi
+# CHECK-NEXT: 2 2 0.67 cmovsq %rsi, %rdi
+# CHECK-NEXT: 2 2 0.67 cmovnsq %rsi, %rdi
+# CHECK-NEXT: 2 2 0.67 cmovpq %rsi, %rdi
+# CHECK-NEXT: 2 2 0.67 cmovnpq %rsi, %rdi
+# CHECK-NEXT: 2 2 0.67 cmovlq %rsi, %rdi
+# CHECK-NEXT: 2 2 0.67 cmovgeq %rsi, %rdi
+# CHECK-NEXT: 2 2 0.67 cmovleq %rsi, %rdi
+# CHECK-NEXT: 2 2 0.67 cmovgq %rsi, %rdi
+# CHECK-NEXT: 3 7 0.67 * cmovoq (%rax), %rdi
+# CHECK-NEXT: 3 7 0.67 * cmovnoq (%rax), %rdi
+# CHECK-NEXT: 3 7 0.67 * cmovbq (%rax), %rdi
+# CHECK-NEXT: 3 7 0.67 * cmovaeq (%rax), %rdi
+# CHECK-NEXT: 3 7 0.67 * cmoveq (%rax), %rdi
+# CHECK-NEXT: 3 7 0.67 * cmovneq (%rax), %rdi
+# CHECK-NEXT: 4 8 1.00 * cmovbeq (%rax), %rdi
+# CHECK-NEXT: 4 8 1.00 * cmovaq (%rax), %rdi
+# CHECK-NEXT: 3 7 0.67 * cmovsq (%rax), %rdi
+# CHECK-NEXT: 3 7 0.67 * cmovnsq (%rax), %rdi
+# CHECK-NEXT: 3 7 0.67 * cmovpq (%rax), %rdi
+# CHECK-NEXT: 3 7 0.67 * cmovnpq (%rax), %rdi
+# CHECK-NEXT: 3 7 0.67 * cmovlq (%rax), %rdi
+# CHECK-NEXT: 3 7 0.67 * cmovgeq (%rax), %rdi
+# CHECK-NEXT: 3 7 0.67 * cmovleq (%rax), %rdi
+# CHECK-NEXT: 3 7 0.67 * cmovgq (%rax), %rdi
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 86.00 32.00 - 86.00 24.00 24.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovow %si, %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnow %si, %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovbw %si, %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovaew %si, %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovew %si, %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnew %si, %di
+# CHECK-NEXT: - - 1.33 0.33 - 1.33 - - cmovbew %si, %di
+# CHECK-NEXT: - - 1.33 0.33 - 1.33 - - cmovaw %si, %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovsw %si, %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnsw %si, %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovpw %si, %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnpw %si, %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovlw %si, %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovgew %si, %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovlew %si, %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovgw %si, %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovow (%rax), %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnow (%rax), %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovbw (%rax), %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovaew (%rax), %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovew (%rax), %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnew (%rax), %di
+# CHECK-NEXT: - - 1.33 0.33 - 1.33 0.50 0.50 cmovbew (%rax), %di
+# CHECK-NEXT: - - 1.33 0.33 - 1.33 0.50 0.50 cmovaw (%rax), %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovsw (%rax), %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnsw (%rax), %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovpw (%rax), %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnpw (%rax), %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovlw (%rax), %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovgew (%rax), %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovlew (%rax), %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovgw (%rax), %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovol %esi, %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnol %esi, %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovbl %esi, %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovael %esi, %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovel %esi, %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnel %esi, %edi
+# CHECK-NEXT: - - 1.33 0.33 - 1.33 - - cmovbel %esi, %edi
+# CHECK-NEXT: - - 1.33 0.33 - 1.33 - - cmoval %esi, %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovsl %esi, %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnsl %esi, %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovpl %esi, %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnpl %esi, %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovll %esi, %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovgel %esi, %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovlel %esi, %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovgl %esi, %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovol (%rax), %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnol (%rax), %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovbl (%rax), %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovael (%rax), %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovel (%rax), %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnel (%rax), %edi
+# CHECK-NEXT: - - 1.33 0.33 - 1.33 0.50 0.50 cmovbel (%rax), %edi
+# CHECK-NEXT: - - 1.33 0.33 - 1.33 0.50 0.50 cmoval (%rax), %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovsl (%rax), %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnsl (%rax), %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovpl (%rax), %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnpl (%rax), %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovll (%rax), %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovgel (%rax), %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovlel (%rax), %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovgl (%rax), %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovoq %rsi, %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnoq %rsi, %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovbq %rsi, %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovaeq %rsi, %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmoveq %rsi, %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovneq %rsi, %rdi
+# CHECK-NEXT: - - 1.33 0.33 - 1.33 - - cmovbeq %rsi, %rdi
+# CHECK-NEXT: - - 1.33 0.33 - 1.33 - - cmovaq %rsi, %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovsq %rsi, %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnsq %rsi, %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovpq %rsi, %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnpq %rsi, %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovlq %rsi, %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovgeq %rsi, %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovleq %rsi, %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovgq %rsi, %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovoq (%rax), %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnoq (%rax), %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovbq (%rax), %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovaeq (%rax), %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmoveq (%rax), %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovneq (%rax), %rdi
+# CHECK-NEXT: - - 1.33 0.33 - 1.33 0.50 0.50 cmovbeq (%rax), %rdi
+# CHECK-NEXT: - - 1.33 0.33 - 1.33 0.50 0.50 cmovaq (%rax), %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovsq (%rax), %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnsq (%rax), %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovpq (%rax), %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnpq (%rax), %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovlq (%rax), %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovgeq (%rax), %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovleq (%rax), %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovgq (%rax), %rdi
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+cmpxchg8b (%rax)
+cmpxchg16b (%rax)
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 3 6 1.00 * * cmpxchg8b (%rax)
+# CHECK-NEXT: 3 6 1.00 * * cmpxchg16b (%rax)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 0.67 0.67 2.00 0.67 2.00 2.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 cmpxchg8b (%rax)
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 cmpxchg16b (%rax)
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+vcvtph2ps %xmm0, %xmm2
+vcvtph2ps (%rax), %xmm2
+
+vcvtph2ps %xmm0, %ymm2
+vcvtph2ps (%rax), %ymm2
+
+vcvtps2ph $0, %xmm0, %xmm2
+vcvtps2ph $0, %xmm0, (%rax)
+
+vcvtps2ph $0, %ymm0, %xmm2
+vcvtps2ph $0, %ymm0, (%rax)
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %xmm2
+# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %ymm2
+# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %ymm2
+# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2
+# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %xmm0, (%rax)
+# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %ymm0, %xmm2
+# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %ymm0, (%rax)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 8.00 2.00 - 2.00 2.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %ymm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %ymm2
+# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax)
+# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %ymm0, %xmm2
+# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax)
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+vfmadd132pd %xmm0, %xmm1, %xmm2
+vfmadd132pd (%rax), %xmm1, %xmm2
+
+vfmadd132pd %ymm0, %ymm1, %ymm2
+vfmadd132pd (%rax), %ymm1, %ymm2
+
+vfmadd213pd %xmm0, %xmm1, %xmm2
+vfmadd213pd (%rax), %xmm1, %xmm2
+
+vfmadd213pd %ymm0, %ymm1, %ymm2
+vfmadd213pd (%rax), %ymm1, %ymm2
+
+vfmadd231pd %xmm0, %xmm1, %xmm2
+vfmadd231pd (%rax), %xmm1, %xmm2
+
+vfmadd231pd %ymm0, %ymm1, %ymm2
+vfmadd231pd (%rax), %ymm1, %ymm2
+
+vfmadd132ps %xmm0, %xmm1, %xmm2
+vfmadd132ps (%rax), %xmm1, %xmm2
+
+vfmadd132ps %ymm0, %ymm1, %ymm2
+vfmadd132ps (%rax), %ymm1, %ymm2
+
+vfmadd213ps %xmm0, %xmm1, %xmm2
+vfmadd213ps (%rax), %xmm1, %xmm2
+
+vfmadd213ps %ymm0, %ymm1, %ymm2
+vfmadd213ps (%rax), %ymm1, %ymm2
+
+vfmadd231ps %xmm0, %xmm1, %xmm2
+vfmadd231ps (%rax), %xmm1, %xmm2
+
+vfmadd231ps %ymm0, %ymm1, %ymm2
+vfmadd231ps (%rax), %ymm1, %ymm2
+
+vfmadd132sd %xmm0, %xmm1, %xmm2
+vfmadd132sd (%rax), %xmm1, %xmm2
+
+vfmadd213sd %xmm0, %xmm1, %xmm2
+vfmadd213sd (%rax), %xmm1, %xmm2
+
+vfmadd231sd %xmm0, %xmm1, %xmm2
+vfmadd231sd (%rax), %xmm1, %xmm2
+
+vfmadd132ss %xmm0, %xmm1, %xmm2
+vfmadd132ss (%rax), %xmm1, %xmm2
+
+vfmadd213ss %xmm0, %xmm1, %xmm2
+vfmadd213ss (%rax), %xmm1, %xmm2
+
+vfmadd231ss %xmm0, %xmm1, %xmm2
+vfmadd231ss (%rax), %xmm1, %xmm2
+
+vfmaddsub132pd %xmm0, %xmm1, %xmm2
+vfmaddsub132pd (%rax), %xmm1, %xmm2
+
+vfmaddsub132pd %ymm0, %ymm1, %ymm2
+vfmaddsub132pd (%rax), %ymm1, %ymm2
+
+vfmaddsub213pd %xmm0, %xmm1, %xmm2
+vfmaddsub213pd (%rax), %xmm1, %xmm2
+
+vfmaddsub213pd %ymm0, %ymm1, %ymm2
+vfmaddsub213pd (%rax), %ymm1, %ymm2
+
+vfmaddsub231pd %xmm0, %xmm1, %xmm2
+vfmaddsub231pd (%rax), %xmm1, %xmm2
+
+vfmaddsub231pd %ymm0, %ymm1, %ymm2
+vfmaddsub231pd (%rax), %ymm1, %ymm2
+
+vfmaddsub132ps %xmm0, %xmm1, %xmm2
+vfmaddsub132ps (%rax), %xmm1, %xmm2
+
+vfmaddsub132ps %ymm0, %ymm1, %ymm2
+vfmaddsub132ps (%rax), %ymm1, %ymm2
+
+vfmaddsub213ps %xmm0, %xmm1, %xmm2
+vfmaddsub213ps (%rax), %xmm1, %xmm2
+
+vfmaddsub213ps %ymm0, %ymm1, %ymm2
+vfmaddsub213ps (%rax), %ymm1, %ymm2
+
+vfmaddsub231ps %xmm0, %xmm1, %xmm2
+vfmaddsub231ps (%rax), %xmm1, %xmm2
+
+vfmaddsub231ps %ymm0, %ymm1, %ymm2
+vfmaddsub231ps (%rax), %ymm1, %ymm2
+
+vfmsub132pd %xmm0, %xmm1, %xmm2
+vfmsub132pd (%rax), %xmm1, %xmm2
+
+vfmsub132pd %ymm0, %ymm1, %ymm2
+vfmsub132pd (%rax), %ymm1, %ymm2
+
+vfmsub213pd %xmm0, %xmm1, %xmm2
+vfmsub213pd (%rax), %xmm1, %xmm2
+
+vfmsub213pd %ymm0, %ymm1, %ymm2
+vfmsub213pd (%rax), %ymm1, %ymm2
+
+vfmsub231pd %xmm0, %xmm1, %xmm2
+vfmsub231pd (%rax), %xmm1, %xmm2
+
+vfmsub231pd %ymm0, %ymm1, %ymm2
+vfmsub231pd (%rax), %ymm1, %ymm2
+
+vfmsub132ps %xmm0, %xmm1, %xmm2
+vfmsub132ps (%rax), %xmm1, %xmm2
+
+vfmsub132ps %ymm0, %ymm1, %ymm2
+vfmsub132ps (%rax), %ymm1, %ymm2
+
+vfmsub213ps %xmm0, %xmm1, %xmm2
+vfmsub213ps (%rax), %xmm1, %xmm2
+
+vfmsub213ps %ymm0, %ymm1, %ymm2
+vfmsub213ps (%rax), %ymm1, %ymm2
+
+vfmsub231ps %xmm0, %xmm1, %xmm2
+vfmsub231ps (%rax), %xmm1, %xmm2
+
+vfmsub231ps %ymm0, %ymm1, %ymm2
+vfmsub231ps (%rax), %ymm1, %ymm2
+
+vfmsub132sd %xmm0, %xmm1, %xmm2
+vfmsub132sd (%rax), %xmm1, %xmm2
+
+vfmsub213sd %xmm0, %xmm1, %xmm2
+vfmsub213sd (%rax), %xmm1, %xmm2
+
+vfmsub231sd %xmm0, %xmm1, %xmm2
+vfmsub231sd (%rax), %xmm1, %xmm2
+
+vfmsub132ss %xmm0, %xmm1, %xmm2
+vfmsub132ss (%rax), %xmm1, %xmm2
+
+vfmsub213ss %xmm0, %xmm1, %xmm2
+vfmsub213ss (%rax), %xmm1, %xmm2
+
+vfmsub231ss %xmm0, %xmm1, %xmm2
+vfmsub231ss (%rax), %xmm1, %xmm2
+
+vfmsubadd132pd %xmm0, %xmm1, %xmm2
+vfmsubadd132pd (%rax), %xmm1, %xmm2
+
+vfmsubadd132pd %ymm0, %ymm1, %ymm2
+vfmsubadd132pd (%rax), %ymm1, %ymm2
+
+vfmsubadd213pd %xmm0, %xmm1, %xmm2
+vfmsubadd213pd (%rax), %xmm1, %xmm2
+
+vfmsubadd213pd %ymm0, %ymm1, %ymm2
+vfmsubadd213pd (%rax), %ymm1, %ymm2
+
+vfmsubadd231pd %xmm0, %xmm1, %xmm2
+vfmsubadd231pd (%rax), %xmm1, %xmm2
+
+vfmsubadd231pd %ymm0, %ymm1, %ymm2
+vfmsubadd231pd (%rax), %ymm1, %ymm2
+
+vfmsubadd132ps %xmm0, %xmm1, %xmm2
+vfmsubadd132ps (%rax), %xmm1, %xmm2
+
+vfmsubadd132ps %ymm0, %ymm1, %ymm2
+vfmsubadd132ps (%rax), %ymm1, %ymm2
+
+vfmsubadd213ps %xmm0, %xmm1, %xmm2
+vfmsubadd213ps (%rax), %xmm1, %xmm2
+
+vfmsubadd213ps %ymm0, %ymm1, %ymm2
+vfmsubadd213ps (%rax), %ymm1, %ymm2
+
+vfmsubadd231ps %xmm0, %xmm1, %xmm2
+vfmsubadd231ps (%rax), %xmm1, %xmm2
+
+vfmsubadd231ps %ymm0, %ymm1, %ymm2
+vfmsubadd231ps (%rax), %ymm1, %ymm2
+
+vfnmadd132pd %xmm0, %xmm1, %xmm2
+vfnmadd132pd (%rax), %xmm1, %xmm2
+
+vfnmadd132pd %ymm0, %ymm1, %ymm2
+vfnmadd132pd (%rax), %ymm1, %ymm2
+
+vfnmadd213pd %xmm0, %xmm1, %xmm2
+vfnmadd213pd (%rax), %xmm1, %xmm2
+
+vfnmadd213pd %ymm0, %ymm1, %ymm2
+vfnmadd213pd (%rax), %ymm1, %ymm2
+
+vfnmadd231pd %xmm0, %xmm1, %xmm2
+vfnmadd231pd (%rax), %xmm1, %xmm2
+
+vfnmadd231pd %ymm0, %ymm1, %ymm2
+vfnmadd231pd (%rax), %ymm1, %ymm2
+
+vfnmadd132ps %xmm0, %xmm1, %xmm2
+vfnmadd132ps (%rax), %xmm1, %xmm2
+
+vfnmadd132ps %ymm0, %ymm1, %ymm2
+vfnmadd132ps (%rax), %ymm1, %ymm2
+
+vfnmadd213ps %xmm0, %xmm1, %xmm2
+vfnmadd213ps (%rax), %xmm1, %xmm2
+
+vfnmadd213ps %ymm0, %ymm1, %ymm2
+vfnmadd213ps (%rax), %ymm1, %ymm2
+
+vfnmadd231ps %xmm0, %xmm1, %xmm2
+vfnmadd231ps (%rax), %xmm1, %xmm2
+
+vfnmadd231ps %ymm0, %ymm1, %ymm2
+vfnmadd231ps (%rax), %ymm1, %ymm2
+
+vfnmadd132sd %xmm0, %xmm1, %xmm2
+vfnmadd132sd (%rax), %xmm1, %xmm2
+
+vfnmadd213sd %xmm0, %xmm1, %xmm2
+vfnmadd213sd (%rax), %xmm1, %xmm2
+
+vfnmadd231sd %xmm0, %xmm1, %xmm2
+vfnmadd231sd (%rax), %xmm1, %xmm2
+
+vfnmadd132ss %xmm0, %xmm1, %xmm2
+vfnmadd132ss (%rax), %xmm1, %xmm2
+
+vfnmadd213ss %xmm0, %xmm1, %xmm2
+vfnmadd213ss (%rax), %xmm1, %xmm2
+
+vfnmadd231ss %xmm0, %xmm1, %xmm2
+vfnmadd231ss (%rax), %xmm1, %xmm2
+
+vfnmsub132pd %xmm0, %xmm1, %xmm2
+vfnmsub132pd (%rax), %xmm1, %xmm2
+
+vfnmsub132pd %ymm0, %ymm1, %ymm2
+vfnmsub132pd (%rax), %ymm1, %ymm2
+
+vfnmsub213pd %xmm0, %xmm1, %xmm2
+vfnmsub213pd (%rax), %xmm1, %xmm2
+
+vfnmsub213pd %ymm0, %ymm1, %ymm2
+vfnmsub213pd (%rax), %ymm1, %ymm2
+
+vfnmsub231pd %xmm0, %xmm1, %xmm2
+vfnmsub231pd (%rax), %xmm1, %xmm2
+
+vfnmsub231pd %ymm0, %ymm1, %ymm2
+vfnmsub231pd (%rax), %ymm1, %ymm2
+
+vfnmsub132ps %xmm0, %xmm1, %xmm2
+vfnmsub132ps (%rax), %xmm1, %xmm2
+
+vfnmsub132ps %ymm0, %ymm1, %ymm2
+vfnmsub132ps (%rax), %ymm1, %ymm2
+
+vfnmsub213ps %xmm0, %xmm1, %xmm2
+vfnmsub213ps (%rax), %xmm1, %xmm2
+
+vfnmsub213ps %ymm0, %ymm1, %ymm2
+vfnmsub213ps (%rax), %ymm1, %ymm2
+
+vfnmsub231ps %xmm0, %xmm1, %xmm2
+vfnmsub231ps (%rax), %xmm1, %xmm2
+
+vfnmsub231ps %ymm0, %ymm1, %ymm2
+vfnmsub231ps (%rax), %ymm1, %ymm2
+
+vfnmsub132sd %xmm0, %xmm1, %xmm2
+vfnmsub132sd (%rax), %xmm1, %xmm2
+
+vfnmsub213sd %xmm0, %xmm1, %xmm2
+vfnmsub213sd (%rax), %xmm1, %xmm2
+
+vfnmsub231sd %xmm0, %xmm1, %xmm2
+vfnmsub231sd (%rax), %xmm1, %xmm2
+
+vfnmsub132ss %xmm0, %xmm1, %xmm2
+vfnmsub132ss (%rax), %xmm1, %xmm2
+
+vfnmsub213ss %xmm0, %xmm1, %xmm2
+vfnmsub213ss (%rax), %xmm1, %xmm2
+
+vfnmsub231ss %xmm0, %xmm1, %xmm2
+vfnmsub231ss (%rax), %xmm1, %xmm2
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 vfmadd132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmadd132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmadd213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmadd213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmadd231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmadd231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmadd132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmadd132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmadd213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmadd213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmadd231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmadd231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmadd132sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd132sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmadd213sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd213sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmadd231sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd231sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmadd132ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd132ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmadd213ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd213ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmadd231ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmadd231ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmaddsub132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmaddsub132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmaddsub132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmaddsub132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmaddsub213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmaddsub213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmaddsub213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmaddsub213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmaddsub231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmaddsub231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmaddsub231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmaddsub231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmaddsub132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmaddsub132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmaddsub132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmaddsub132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmaddsub213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmaddsub213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmaddsub213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmaddsub213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmaddsub231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmaddsub231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmaddsub231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmaddsub231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmsub132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsub132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmsub213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsub213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmsub231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsub231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmsub132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsub132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmsub213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsub213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmsub231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsub231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmsub132sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub132sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsub213sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub213sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsub231sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub231sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsub132ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub132ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsub213ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub213ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsub231ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsub231ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsubadd132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsubadd132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsubadd132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmsubadd132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmsubadd213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsubadd213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsubadd213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmsubadd213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmsubadd231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsubadd231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsubadd231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmsubadd231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmsubadd132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsubadd132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsubadd132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmsubadd132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmsubadd213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsubadd213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsubadd213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmsubadd213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfmsubadd231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfmsubadd231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfmsubadd231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfmsubadd231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd132sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd132sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd213sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd213sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd231sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd231sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd132ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd132ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd213ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd213ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmadd231ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmadd231ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub132sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub132sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub213sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub213sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub231sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub231sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub132ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub132ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub213ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub213ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 vfnmsub231ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 0.50 * vfnmsub231ss (%rax), %xmm1, %xmm2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 96.00 96.00 - - 48.00 48.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub132sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub132sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub213sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub213sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub231sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub231sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub132ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub132ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub213ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub213ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub231ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub231ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd132sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd132sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd213sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd213sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd231sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd231sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd132ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd132ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd213ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd213ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd231ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd231ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub132sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub132sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub213sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub213sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub231sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub231sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub132ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub132ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub213ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub213ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub231ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub231ss (%rax), %xmm1, %xmm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+vfmaddpd %xmm0, %xmm1, %xmm2, %xmm3
+vfmaddpd (%rax), %xmm1, %xmm2, %xmm3
+vfmaddpd %xmm0, (%rax), %xmm2, %xmm3
+
+vfmaddpd %ymm0, %ymm1, %ymm2, %ymm3
+vfmaddpd (%rax), %ymm1, %ymm2, %ymm3
+vfmaddpd %ymm0, (%rax), %ymm2, %ymm3
+
+vfmaddps %xmm0, %xmm1, %xmm2, %xmm3
+vfmaddps (%rax), %xmm1, %xmm2, %xmm3
+vfmaddps %xmm0, (%rax), %xmm2, %xmm3
+
+vfmaddps %ymm0, %ymm1, %ymm2, %ymm3
+vfmaddps (%rax), %ymm1, %ymm2, %ymm3
+vfmaddps %ymm0, (%rax), %ymm2, %ymm3
+
+vfmaddsd %xmm0, %xmm1, %xmm2, %xmm3
+vfmaddsd (%rax), %xmm1, %xmm2, %xmm3
+vfmaddsd %xmm0, (%rax), %xmm2, %xmm3
+
+vfmaddss %xmm0, %xmm1, %xmm2, %xmm3
+vfmaddss (%rax), %xmm1, %xmm2, %xmm3
+vfmaddss %xmm0, (%rax), %xmm2, %xmm3
+
+vfmaddsubpd %xmm0, %xmm1, %xmm2, %xmm3
+vfmaddsubpd (%rax), %xmm1, %xmm2, %xmm3
+vfmaddsubpd %xmm0, (%rax), %xmm2, %xmm3
+
+vfmaddsubpd %ymm0, %ymm1, %ymm2, %ymm3
+vfmaddsubpd (%rax), %ymm1, %ymm2, %ymm3
+vfmaddsubpd %ymm0, (%rax), %ymm2, %ymm3
+
+vfmaddsubps %xmm0, %xmm1, %xmm2, %xmm3
+vfmaddsubps (%rax), %xmm1, %xmm2, %xmm3
+vfmaddsubps %xmm0, (%rax), %xmm2, %xmm3
+
+vfmaddsubps %ymm0, %ymm1, %ymm2, %ymm3
+vfmaddsubps (%rax), %ymm1, %ymm2, %ymm3
+vfmaddsubps %ymm0, (%rax), %ymm2, %ymm3
+
+vfmsubaddpd %xmm0, %xmm1, %xmm2, %xmm3
+vfmsubaddpd (%rax), %xmm1, %xmm2, %xmm3
+vfmsubaddpd %xmm0, (%rax), %xmm2, %xmm3
+
+vfmsubaddpd %ymm0, %ymm1, %ymm2, %ymm3
+vfmsubaddpd (%rax), %ymm1, %ymm2, %ymm3
+vfmsubaddpd %ymm0, (%rax), %ymm2, %ymm3
+
+vfmsubaddps %xmm0, %xmm1, %xmm2, %xmm3
+vfmsubaddps (%rax), %xmm1, %xmm2, %xmm3
+vfmsubaddps %xmm0, (%rax), %xmm2, %xmm3
+
+vfmsubaddps %ymm0, %ymm1, %ymm2, %ymm3
+vfmsubaddps (%rax), %ymm1, %ymm2, %ymm3
+vfmsubaddps %ymm0, (%rax), %ymm2, %ymm3
+
+vfmsubpd %xmm0, %xmm1, %xmm2, %xmm3
+vfmsubpd (%rax), %xmm1, %xmm2, %xmm3
+vfmsubpd %xmm0, (%rax), %xmm2, %xmm3
+
+vfmsubpd %ymm0, %ymm1, %ymm2, %ymm3
+vfmsubpd (%rax), %ymm1, %ymm2, %ymm3
+vfmsubpd %ymm0, (%rax), %ymm2, %ymm3
+
+vfmsubps %xmm0, %xmm1, %xmm2, %xmm3
+vfmsubps (%rax), %xmm1, %xmm2, %xmm3
+vfmsubps %xmm0, (%rax), %xmm2, %xmm3
+
+vfmsubps %ymm0, %ymm1, %ymm2, %ymm3
+vfmsubps (%rax), %ymm1, %ymm2, %ymm3
+vfmsubps %ymm0, (%rax), %ymm2, %ymm3
+
+vfmsubsd %xmm0, %xmm1, %xmm2, %xmm3
+vfmsubsd (%rax), %xmm1, %xmm2, %xmm3
+vfmsubsd %xmm0, (%rax), %xmm2, %xmm3
+
+vfmsubss %xmm0, %xmm1, %xmm2, %xmm3
+vfmsubss (%rax), %xmm1, %xmm2, %xmm3
+vfmsubss %xmm0, (%rax), %xmm2, %xmm3
+
+vfnmaddpd %xmm0, %xmm1, %xmm2, %xmm3
+vfnmaddpd (%rax), %xmm1, %xmm2, %xmm3
+vfnmaddpd %xmm0, (%rax), %xmm2, %xmm3
+
+vfnmaddpd %ymm0, %ymm1, %ymm2, %ymm3
+vfnmaddpd (%rax), %ymm1, %ymm2, %ymm3
+vfnmaddpd %ymm0, (%rax), %ymm2, %ymm3
+
+vfnmaddps %xmm0, %xmm1, %xmm2, %xmm3
+vfnmaddps (%rax), %xmm1, %xmm2, %xmm3
+vfnmaddps %xmm0, (%rax), %xmm2, %xmm3
+
+vfnmaddps %ymm0, %ymm1, %ymm2, %ymm3
+vfnmaddps (%rax), %ymm1, %ymm2, %ymm3
+vfnmaddps %ymm0, (%rax), %ymm2, %ymm3
+
+vfnmaddsd %xmm0, %xmm1, %xmm2, %xmm3
+vfnmaddsd (%rax), %xmm1, %xmm2, %xmm3
+vfnmaddsd %xmm0, (%rax), %xmm2, %xmm3
+
+vfnmaddss %xmm0, %xmm1, %xmm2, %xmm3
+vfnmaddss (%rax), %xmm1, %xmm2, %xmm3
+vfnmaddss %xmm0, (%rax), %xmm2, %xmm3
+
+vfnmsubpd %xmm0, %xmm1, %xmm2, %xmm3
+vfnmsubpd (%rax), %xmm1, %xmm2, %xmm3
+vfnmsubpd %xmm0, (%rax), %xmm2, %xmm3
+
+vfnmsubpd %ymm0, %ymm1, %ymm2, %ymm3
+vfnmsubpd (%rax), %ymm1, %ymm2, %ymm3
+vfnmsubpd %ymm0, (%rax), %ymm2, %ymm3
+
+vfnmsubps %xmm0, %xmm1, %xmm2, %xmm3
+vfnmsubps (%rax), %xmm1, %xmm2, %xmm3
+vfnmsubps %xmm0, (%rax), %xmm2, %xmm3
+
+vfnmsubps %ymm0, %ymm1, %ymm2, %ymm3
+vfnmsubps (%rax), %ymm1, %ymm2, %ymm3
+vfnmsubps %ymm0, (%rax), %ymm2, %ymm3
+
+vfnmsubsd %xmm0, %xmm1, %xmm2, %xmm3
+vfnmsubsd (%rax), %xmm1, %xmm2, %xmm3
+vfnmsubsd %xmm0, (%rax), %xmm2, %xmm3
+
+vfnmsubss %xmm0, %xmm1, %xmm2, %xmm3
+vfnmsubss (%rax), %xmm1, %xmm2, %xmm3
+vfnmsubss %xmm0, (%rax), %xmm2, %xmm3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 vfmaddpd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddpd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddpd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfmaddpd %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddpd (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddpd %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: 1 5 0.50 vfmaddps %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddps (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddps %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfmaddps %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddps (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddps %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: 1 5 0.50 vfmaddsd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddsd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddsd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfmaddss %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddss (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddss %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfmaddsubpd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddsubpd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddsubpd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfmaddsubpd %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddsubpd (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddsubpd %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: 1 5 0.50 vfmaddsubps %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddsubps (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddsubps %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfmaddsubps %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddsubps (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfmaddsubps %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: 1 5 0.50 vfmsubaddpd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubaddpd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubaddpd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfmsubaddpd %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubaddpd (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubaddpd %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: 1 5 0.50 vfmsubaddps %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubaddps (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubaddps %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfmsubaddps %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubaddps (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubaddps %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: 1 5 0.50 vfmsubpd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubpd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubpd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfmsubpd %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubpd (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubpd %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: 1 5 0.50 vfmsubps %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubps (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubps %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfmsubps %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubps (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubps %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: 1 5 0.50 vfmsubsd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubsd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubsd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfmsubss %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubss (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfmsubss %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfnmaddpd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfnmaddpd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfnmaddpd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfnmaddpd %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfnmaddpd (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfnmaddpd %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: 1 5 0.50 vfnmaddps %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfnmaddps (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfnmaddps %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfnmaddps %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfnmaddps (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfnmaddps %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: 1 5 0.50 vfnmaddsd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfnmaddsd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfnmaddsd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfnmaddss %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfnmaddss (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfnmaddss %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfnmsubpd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfnmsubpd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfnmsubpd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfnmsubpd %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfnmsubpd (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfnmsubpd %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: 1 5 0.50 vfnmsubps %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfnmsubps (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfnmsubps %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfnmsubps %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfnmsubps (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 10 0.50 * vfnmsubps %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: 1 5 0.50 vfnmsubsd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfnmsubsd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfnmsubsd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: 1 5 0.50 vfnmsubss %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfnmsubss (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 10 0.50 * vfnmsubss %xmm0, (%rax), %xmm2, %xmm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 48.00 48.00 - - 32.00 32.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddpd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddpd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddpd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddpd %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddpd (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddpd %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddps %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddps (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddps %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddps %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddps (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddps %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddss %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddss (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddss %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsubpd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsubpd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsubpd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsubpd %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsubpd (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsubpd %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsubps %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsubps (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsubps %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsubps %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsubps (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsubps %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubaddpd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubaddpd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubaddpd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubaddpd %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubaddpd (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubaddpd %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubaddps %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubaddps (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubaddps %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubaddps %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubaddps (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubaddps %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubpd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubpd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubpd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubpd %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubpd (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubpd %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubps %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubps (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubps %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubps %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubps (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubps %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubsd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubsd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubsd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubss %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubss (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubss %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmaddpd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddpd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddpd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmaddpd %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddpd (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddpd %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmaddps %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddps (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddps %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmaddps %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddps (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddps %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmaddsd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddsd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddsd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmaddss %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddss (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddss %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsubpd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubpd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubpd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsubpd %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubpd (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubpd %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsubps %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubps (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubps %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsubps %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubps (%rax), %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubps %ymm0, (%rax), %ymm2, %ymm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsubsd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubsd (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubsd %xmm0, (%rax), %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsubss %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubss (%rax), %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubss %xmm0, (%rax), %xmm2, %xmm3
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+lea 0(), %cx
+lea 0(), %ecx
+lea 0(), %rcx
+lea (%eax), %cx
+lea (%eax), %ecx
+lea (%eax), %rcx
+lea (%rax), %cx
+lea (%rax), %ecx
+lea (%rax), %rcx
+lea (, %ebx), %cx
+lea (, %ebx), %ecx
+lea (, %ebx), %rcx
+lea (, %rbx), %cx
+lea (, %rbx), %ecx
+lea (, %rbx), %rcx
+lea (, %ebx, 1), %cx
+lea (, %ebx, 1), %ecx
+lea (, %ebx, 1), %rcx
+lea (, %rbx, 1), %cx
+lea (, %rbx, 1), %ecx
+lea (, %rbx, 1), %rcx
+lea (, %ebx, 2), %cx
+lea (, %ebx, 2), %ecx
+lea (, %ebx, 2), %rcx
+lea (, %rbx, 2), %cx
+lea (, %rbx, 2), %ecx
+lea (, %rbx, 2), %rcx
+lea (%eax, %ebx), %cx
+lea (%eax, %ebx), %ecx
+lea (%eax, %ebx), %rcx
+lea (%rax, %rbx), %cx
+lea (%rax, %rbx), %ecx
+lea (%rax, %rbx), %rcx
+lea (%eax, %ebx, 1), %cx
+lea (%eax, %ebx, 1), %ecx
+lea (%eax, %ebx, 1), %rcx
+lea (%rax, %rbx, 1), %cx
+lea (%rax, %rbx, 1), %ecx
+lea (%rax, %rbx, 1), %rcx
+lea (%eax, %ebx, 2), %cx
+lea (%eax, %ebx, 2), %ecx
+lea (%eax, %ebx, 2), %rcx
+lea (%rax, %rbx, 2), %cx
+lea (%rax, %rbx, 2), %ecx
+lea (%rax, %rbx, 2), %rcx
+
+lea -16(), %cx
+lea -16(), %ecx
+lea -16(), %rcx
+lea -16(%eax), %cx
+lea -16(%eax), %ecx
+lea -16(%eax), %rcx
+lea -16(%rax), %cx
+lea -16(%rax), %ecx
+lea -16(%rax), %rcx
+lea -16(, %ebx), %cx
+lea -16(, %ebx), %ecx
+lea -16(, %ebx), %rcx
+lea -16(, %rbx), %cx
+lea -16(, %rbx), %ecx
+lea -16(, %rbx), %rcx
+lea -16(, %ebx, 1), %cx
+lea -16(, %ebx, 1), %ecx
+lea -16(, %ebx, 1), %rcx
+lea -16(, %rbx, 1), %cx
+lea -16(, %rbx, 1), %ecx
+lea -16(, %rbx, 1), %rcx
+lea -16(, %ebx, 2), %cx
+lea -16(, %ebx, 2), %ecx
+lea -16(, %ebx, 2), %rcx
+lea -16(, %rbx, 2), %cx
+lea -16(, %rbx, 2), %ecx
+lea -16(, %rbx, 2), %rcx
+lea -16(%eax, %ebx), %cx
+lea -16(%eax, %ebx), %ecx
+lea -16(%eax, %ebx), %rcx
+lea -16(%rax, %rbx), %cx
+lea -16(%rax, %rbx), %ecx
+lea -16(%rax, %rbx), %rcx
+lea -16(%eax, %ebx, 1), %cx
+lea -16(%eax, %ebx, 1), %ecx
+lea -16(%eax, %ebx, 1), %rcx
+lea -16(%rax, %rbx, 1), %cx
+lea -16(%rax, %rbx, 1), %ecx
+lea -16(%rax, %rbx, 1), %rcx
+lea -16(%eax, %ebx, 2), %cx
+lea -16(%eax, %ebx, 2), %ecx
+lea -16(%eax, %ebx, 2), %rcx
+lea -16(%rax, %rbx, 2), %cx
+lea -16(%rax, %rbx, 2), %ecx
+lea -16(%rax, %rbx, 2), %rcx
+
+lea 1024(), %cx
+lea 1024(), %ecx
+lea 1024(), %rcx
+lea 1024(%eax), %cx
+lea 1024(%eax), %ecx
+lea 1024(%eax), %rcx
+lea 1024(%rax), %cx
+lea 1024(%rax), %ecx
+lea 1024(%rax), %rcx
+lea 1024(, %ebx), %cx
+lea 1024(, %ebx), %ecx
+lea 1024(, %ebx), %rcx
+lea 1024(, %rbx), %cx
+lea 1024(, %rbx), %ecx
+lea 1024(, %rbx), %rcx
+lea 1024(, %ebx, 1), %cx
+lea 1024(, %ebx, 1), %ecx
+lea 1024(, %ebx, 1), %rcx
+lea 1024(, %rbx, 1), %cx
+lea 1024(, %rbx, 1), %ecx
+lea 1024(, %rbx, 1), %rcx
+lea 1024(, %ebx, 2), %cx
+lea 1024(, %ebx, 2), %ecx
+lea 1024(, %ebx, 2), %rcx
+lea 1024(, %rbx, 2), %cx
+lea 1024(, %rbx, 2), %ecx
+lea 1024(, %rbx, 2), %rcx
+lea 1024(%eax, %ebx), %cx
+lea 1024(%eax, %ebx), %ecx
+lea 1024(%eax, %ebx), %rcx
+lea 1024(%rax, %rbx), %cx
+lea 1024(%rax, %rbx), %ecx
+lea 1024(%rax, %rbx), %rcx
+lea 1024(%eax, %ebx, 1), %cx
+lea 1024(%eax, %ebx, 1), %ecx
+lea 1024(%eax, %ebx, 1), %rcx
+lea 1024(%rax, %rbx, 1), %cx
+lea 1024(%rax, %rbx, 1), %ecx
+lea 1024(%rax, %rbx, 1), %rcx
+lea 1024(%eax, %ebx, 2), %cx
+lea 1024(%eax, %ebx, 2), %ecx
+lea 1024(%eax, %ebx, 2), %rcx
+lea 1024(%rax, %rbx, 2), %cx
+lea 1024(%rax, %rbx, 2), %ecx
+lea 1024(%rax, %rbx, 2), %rcx
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 leaw 0, %cx
+# CHECK-NEXT: 1 1 0.50 leal 0, %ecx
+# CHECK-NEXT: 1 1 0.50 leaq 0, %rcx
+# CHECK-NEXT: 1 1 0.50 leaw (%eax), %cx
+# CHECK-NEXT: 1 1 0.50 leal (%eax), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq (%eax), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw (%rax), %cx
+# CHECK-NEXT: 1 1 0.50 leal (%rax), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq (%rax), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw (,%ebx), %cx
+# CHECK-NEXT: 1 1 0.50 leal (,%ebx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq (,%ebx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw (,%rbx), %cx
+# CHECK-NEXT: 1 1 0.50 leal (,%rbx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq (,%rbx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw (,%ebx), %cx
+# CHECK-NEXT: 1 1 0.50 leal (,%ebx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq (,%ebx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw (,%rbx), %cx
+# CHECK-NEXT: 1 1 0.50 leal (,%rbx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq (,%rbx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw (,%ebx,2), %cx
+# CHECK-NEXT: 1 1 0.50 leal (,%ebx,2), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq (,%ebx,2), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw (,%rbx,2), %cx
+# CHECK-NEXT: 1 1 0.50 leal (,%rbx,2), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq (,%rbx,2), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw (%eax,%ebx), %cx
+# CHECK-NEXT: 1 1 0.50 leal (%eax,%ebx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq (%eax,%ebx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw (%rax,%rbx), %cx
+# CHECK-NEXT: 1 1 0.50 leal (%rax,%rbx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq (%rax,%rbx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw (%eax,%ebx), %cx
+# CHECK-NEXT: 1 1 0.50 leal (%eax,%ebx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq (%eax,%ebx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw (%rax,%rbx), %cx
+# CHECK-NEXT: 1 1 0.50 leal (%rax,%rbx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq (%rax,%rbx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw (%eax,%ebx,2), %cx
+# CHECK-NEXT: 1 1 0.50 leal (%eax,%ebx,2), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq (%eax,%ebx,2), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw (%rax,%rbx,2), %cx
+# CHECK-NEXT: 1 1 0.50 leal (%rax,%rbx,2), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq (%rax,%rbx,2), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw -16, %cx
+# CHECK-NEXT: 1 1 0.50 leal -16, %ecx
+# CHECK-NEXT: 1 1 0.50 leaq -16, %rcx
+# CHECK-NEXT: 1 1 0.50 leaw -16(%eax), %cx
+# CHECK-NEXT: 1 1 0.50 leal -16(%eax), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq -16(%eax), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw -16(%rax), %cx
+# CHECK-NEXT: 1 1 0.50 leal -16(%rax), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq -16(%rax), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw -16(,%ebx), %cx
+# CHECK-NEXT: 1 1 0.50 leal -16(,%ebx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq -16(,%ebx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw -16(,%rbx), %cx
+# CHECK-NEXT: 1 1 0.50 leal -16(,%rbx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq -16(,%rbx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw -16(,%ebx), %cx
+# CHECK-NEXT: 1 1 0.50 leal -16(,%ebx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq -16(,%ebx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw -16(,%rbx), %cx
+# CHECK-NEXT: 1 1 0.50 leal -16(,%rbx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq -16(,%rbx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw -16(,%ebx,2), %cx
+# CHECK-NEXT: 1 1 0.50 leal -16(,%ebx,2), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq -16(,%ebx,2), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw -16(,%rbx,2), %cx
+# CHECK-NEXT: 1 1 0.50 leal -16(,%rbx,2), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq -16(,%rbx,2), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw -16(%eax,%ebx), %cx
+# CHECK-NEXT: 1 1 0.50 leal -16(%eax,%ebx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq -16(%eax,%ebx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw -16(%rax,%rbx), %cx
+# CHECK-NEXT: 1 1 0.50 leal -16(%rax,%rbx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq -16(%rax,%rbx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw -16(%eax,%ebx), %cx
+# CHECK-NEXT: 1 1 0.50 leal -16(%eax,%ebx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq -16(%eax,%ebx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw -16(%rax,%rbx), %cx
+# CHECK-NEXT: 1 1 0.50 leal -16(%rax,%rbx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq -16(%rax,%rbx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw -16(%eax,%ebx,2), %cx
+# CHECK-NEXT: 1 1 0.50 leal -16(%eax,%ebx,2), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq -16(%eax,%ebx,2), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw -16(%rax,%rbx,2), %cx
+# CHECK-NEXT: 1 1 0.50 leal -16(%rax,%rbx,2), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq -16(%rax,%rbx,2), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw 1024, %cx
+# CHECK-NEXT: 1 1 0.50 leal 1024, %ecx
+# CHECK-NEXT: 1 1 0.50 leaq 1024, %rcx
+# CHECK-NEXT: 1 1 0.50 leaw 1024(%eax), %cx
+# CHECK-NEXT: 1 1 0.50 leal 1024(%eax), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq 1024(%eax), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw 1024(%rax), %cx
+# CHECK-NEXT: 1 1 0.50 leal 1024(%rax), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq 1024(%rax), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw 1024(,%ebx), %cx
+# CHECK-NEXT: 1 1 0.50 leal 1024(,%ebx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq 1024(,%ebx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw 1024(,%rbx), %cx
+# CHECK-NEXT: 1 1 0.50 leal 1024(,%rbx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq 1024(,%rbx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw 1024(,%ebx), %cx
+# CHECK-NEXT: 1 1 0.50 leal 1024(,%ebx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq 1024(,%ebx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw 1024(,%rbx), %cx
+# CHECK-NEXT: 1 1 0.50 leal 1024(,%rbx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq 1024(,%rbx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw 1024(,%ebx,2), %cx
+# CHECK-NEXT: 1 1 0.50 leal 1024(,%ebx,2), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq 1024(,%ebx,2), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw 1024(,%rbx,2), %cx
+# CHECK-NEXT: 1 1 0.50 leal 1024(,%rbx,2), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq 1024(,%rbx,2), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw 1024(%eax,%ebx), %cx
+# CHECK-NEXT: 1 1 0.50 leal 1024(%eax,%ebx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq 1024(%eax,%ebx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw 1024(%rax,%rbx), %cx
+# CHECK-NEXT: 1 1 0.50 leal 1024(%rax,%rbx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq 1024(%rax,%rbx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw 1024(%eax,%ebx), %cx
+# CHECK-NEXT: 1 1 0.50 leal 1024(%eax,%ebx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq 1024(%eax,%ebx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw 1024(%rax,%rbx), %cx
+# CHECK-NEXT: 1 1 0.50 leal 1024(%rax,%rbx), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq 1024(%rax,%rbx), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw 1024(%eax,%ebx,2), %cx
+# CHECK-NEXT: 1 1 0.50 leal 1024(%eax,%ebx,2), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq 1024(%eax,%ebx,2), %rcx
+# CHECK-NEXT: 1 1 0.50 leaw 1024(%rax,%rbx,2), %cx
+# CHECK-NEXT: 1 1 0.50 leal 1024(%rax,%rbx,2), %ecx
+# CHECK-NEXT: 1 1 0.50 leaq 1024(%rax,%rbx,2), %rcx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 67.50 67.50 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 0, %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal 0, %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 0, %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (%eax), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal (%eax), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (%eax), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (%rax), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal (%rax), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (%rax), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (,%ebx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal (,%ebx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (,%ebx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (,%rbx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal (,%rbx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (,%rbx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (,%ebx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal (,%ebx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (,%ebx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (,%rbx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal (,%rbx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (,%rbx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (,%ebx,2), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal (,%ebx,2), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (,%ebx,2), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (,%rbx,2), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal (,%rbx,2), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (,%rbx,2), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (%eax,%ebx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal (%eax,%ebx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (%eax,%ebx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (%rax,%rbx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal (%rax,%rbx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (%rax,%rbx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (%eax,%ebx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal (%eax,%ebx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (%eax,%ebx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (%rax,%rbx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal (%rax,%rbx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (%rax,%rbx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (%eax,%ebx,2), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal (%eax,%ebx,2), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (%eax,%ebx,2), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (%rax,%rbx,2), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal (%rax,%rbx,2), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (%rax,%rbx,2), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16, %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16, %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16, %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(%eax), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(%eax), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(%eax), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(%rax), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(%rax), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(%rax), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(,%ebx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(,%ebx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(,%ebx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(,%rbx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(,%rbx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(,%rbx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(,%ebx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(,%ebx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(,%ebx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(,%rbx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(,%rbx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(,%rbx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(,%ebx,2), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(,%ebx,2), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(,%ebx,2), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(,%rbx,2), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(,%rbx,2), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(,%rbx,2), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(%eax,%ebx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(%eax,%ebx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(%eax,%ebx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(%rax,%rbx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(%rax,%rbx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(%rax,%rbx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(%eax,%ebx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(%eax,%ebx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(%eax,%ebx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(%rax,%rbx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(%rax,%rbx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(%rax,%rbx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(%eax,%ebx,2), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(%eax,%ebx,2), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(%eax,%ebx,2), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(%rax,%rbx,2), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(%rax,%rbx,2), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(%rax,%rbx,2), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024, %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024, %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024, %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(%eax), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(%eax), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(%eax), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(%rax), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(%rax), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(%rax), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(,%ebx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(,%ebx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(,%ebx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(,%rbx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(,%rbx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(,%rbx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(,%ebx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(,%ebx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(,%ebx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(,%rbx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(,%rbx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(,%rbx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(,%ebx,2), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(,%ebx,2), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(,%ebx,2), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(,%rbx,2), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(,%rbx,2), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(,%rbx,2), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(%eax,%ebx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(%eax,%ebx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(%eax,%ebx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(%rax,%rbx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(%rax,%rbx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(%rax,%rbx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(%eax,%ebx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(%eax,%ebx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(%eax,%ebx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(%rax,%rbx), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(%rax,%rbx), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(%rax,%rbx), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(%eax,%ebx,2), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(%eax,%ebx,2), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(%eax,%ebx,2), %rcx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(%rax,%rbx,2), %cx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(%rax,%rbx,2), %ecx
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(%rax,%rbx,2), %rcx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+lzcntw %cx, %cx
+lzcntw (%rax), %cx
+
+lzcntl %eax, %ecx
+lzcntl (%rax), %ecx
+
+lzcntq %rax, %rcx
+lzcntq (%rax), %rcx
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 lzcntw %cx, %cx
+# CHECK-NEXT: 2 8 1.00 * lzcntw (%rax), %cx
+# CHECK-NEXT: 1 3 1.00 lzcntl %eax, %ecx
+# CHECK-NEXT: 2 8 1.00 * lzcntl (%rax), %ecx
+# CHECK-NEXT: 1 3 1.00 lzcntq %rax, %rcx
+# CHECK-NEXT: 2 8 1.00 * lzcntq (%rax), %rcx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 6.00 - - 1.50 1.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - lzcntw %cx, %cx
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 lzcntw (%rax), %cx
+# CHECK-NEXT: - - - 1.00 - - - - lzcntl %eax, %ecx
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 lzcntl (%rax), %ecx
+# CHECK-NEXT: - - - 1.00 - - - - lzcntq %rax, %rcx
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 lzcntq (%rax), %rcx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+emms
+
+movd %eax, %mm2
+movd (%rax), %mm2
+
+movd %mm0, %ecx
+movd %mm0, (%rax)
+
+movq %rax, %mm2
+movq (%rax), %mm2
+
+movq %mm0, %rcx
+movq %mm0, (%rax)
+
+packsswb %mm0, %mm2
+packsswb (%rax), %mm2
+
+packssdw %mm0, %mm2
+packssdw (%rax), %mm2
+
+packuswb %mm0, %mm2
+packuswb (%rax), %mm2
+
+paddb %mm0, %mm2
+paddb (%rax), %mm2
+
+paddd %mm0, %mm2
+paddd (%rax), %mm2
+
+paddsb %mm0, %mm2
+paddsb (%rax), %mm2
+
+paddsw %mm0, %mm2
+paddsw (%rax), %mm2
+
+paddusb %mm0, %mm2
+paddusb (%rax), %mm2
+
+paddusw %mm0, %mm2
+paddusw (%rax), %mm2
+
+paddw %mm0, %mm2
+paddw (%rax), %mm2
+
+pand %mm0, %mm2
+pand (%rax), %mm2
+
+pandn %mm0, %mm2
+pandn (%rax), %mm2
+
+pcmpeqb %mm0, %mm2
+pcmpeqb (%rax), %mm2
+
+pcmpeqd %mm0, %mm2
+pcmpeqd (%rax), %mm2
+
+pcmpeqw %mm0, %mm2
+pcmpeqw (%rax), %mm2
+
+pcmpgtb %mm0, %mm2
+pcmpgtb (%rax), %mm2
+
+pcmpgtd %mm0, %mm2
+pcmpgtd (%rax), %mm2
+
+pcmpgtw %mm0, %mm2
+pcmpgtw (%rax), %mm2
+
+pmaddwd %mm0, %mm2
+pmaddwd (%rax), %mm2
+
+pmulhw %mm0, %mm2
+pmulhw (%rax), %mm2
+
+pmullw %mm0, %mm2
+pmullw (%rax), %mm2
+
+por %mm0, %mm2
+por (%rax), %mm2
+
+pslld $1, %mm2
+pslld %mm0, %mm2
+pslld (%rax), %mm2
+
+psllq $1, %mm2
+psllq %mm0, %mm2
+psllq (%rax), %mm2
+
+psllw $1, %mm2
+psllw %mm0, %mm2
+psllw (%rax), %mm2
+
+psrad $1, %mm2
+psrad %mm0, %mm2
+psrad (%rax), %mm2
+
+psraw $1, %mm2
+psraw %mm0, %mm2
+psraw (%rax), %mm2
+
+psrld $1, %mm2
+psrld %mm0, %mm2
+psrld (%rax), %mm2
+
+psrlq $1, %mm2
+psrlq %mm0, %mm2
+psrlq (%rax), %mm2
+
+psrlw $1, %mm2
+psrlw %mm0, %mm2
+psrlw (%rax), %mm2
+
+psubb %mm0, %mm2
+psubb (%rax), %mm2
+
+psubd %mm0, %mm2
+psubd (%rax), %mm2
+
+psubsb %mm0, %mm2
+psubsb (%rax), %mm2
+
+psubsw %mm0, %mm2
+psubsw (%rax), %mm2
+
+psubusb %mm0, %mm2
+psubusb (%rax), %mm2
+
+psubusw %mm0, %mm2
+psubusw (%rax), %mm2
+
+psubw %mm0, %mm2
+psubw (%rax), %mm2
+
+punpckhbw %mm0, %mm2
+punpckhbw (%rax), %mm2
+
+punpckhdq %mm0, %mm2
+punpckhdq (%rax), %mm2
+
+punpckhwd %mm0, %mm2
+punpckhwd (%rax), %mm2
+
+punpcklbw %mm0, %mm2
+punpcklbw (%rax), %mm2
+
+punpckldq %mm0, %mm2
+punpckldq (%rax), %mm2
+
+punpcklwd %mm0, %mm2
+punpcklwd (%rax), %mm2
+
+pxor %mm0, %mm2
+pxor (%rax), %mm2
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 31 31 10.33 * * U emms
+# CHECK-NEXT: 1 1 1.00 movd %eax, %mm2
+# CHECK-NEXT: 1 5 0.50 * movd (%rax), %mm2
+# CHECK-NEXT: 1 2 1.00 movd %mm0, %ecx
+# CHECK-NEXT: 1 1 1.00 * U movd %mm0, (%rax)
+# CHECK-NEXT: 1 1 1.00 movq %rax, %mm2
+# CHECK-NEXT: 1 5 0.50 * movq (%rax), %mm2
+# CHECK-NEXT: 1 2 1.00 movq %mm0, %rcx
+# CHECK-NEXT: 1 1 1.00 * movq %mm0, (%rax)
+# CHECK-NEXT: 1 1 1.00 packsswb %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * packsswb (%rax), %mm2
+# CHECK-NEXT: 1 1 1.00 packssdw %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * packssdw (%rax), %mm2
+# CHECK-NEXT: 1 1 1.00 packuswb %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * packuswb (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 paddb %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * paddb (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 paddd %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * paddd (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 paddsb %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * paddsb (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 paddsw %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * paddsw (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 paddusb %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * paddusb (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 paddusw %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * paddusw (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 paddw %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * paddw (%rax), %mm2
+# CHECK-NEXT: 1 1 0.33 pand %mm0, %mm2
+# CHECK-NEXT: 2 6 0.50 * pand (%rax), %mm2
+# CHECK-NEXT: 1 1 0.33 pandn %mm0, %mm2
+# CHECK-NEXT: 2 6 0.50 * pandn (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pcmpeqb %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * pcmpeqb (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pcmpeqd %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * pcmpeqd (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pcmpeqw %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * pcmpeqw (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pcmpgtb %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * pcmpgtb (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pcmpgtd %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * pcmpgtd (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pcmpgtw %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * pcmpgtw (%rax), %mm2
+# CHECK-NEXT: 1 5 1.00 pmaddwd %mm0, %mm2
+# CHECK-NEXT: 2 10 1.00 * pmaddwd (%rax), %mm2
+# CHECK-NEXT: 1 5 1.00 pmulhw %mm0, %mm2
+# CHECK-NEXT: 2 10 1.00 * pmulhw (%rax), %mm2
+# CHECK-NEXT: 1 5 1.00 pmullw %mm0, %mm2
+# CHECK-NEXT: 2 10 1.00 * pmullw (%rax), %mm2
+# CHECK-NEXT: 1 1 0.33 por %mm0, %mm2
+# CHECK-NEXT: 2 6 0.50 * por (%rax), %mm2
+# CHECK-NEXT: 1 1 1.00 pslld $1, %mm2
+# CHECK-NEXT: 1 1 1.00 pslld %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * pslld (%rax), %mm2
+# CHECK-NEXT: 1 1 1.00 psllq $1, %mm2
+# CHECK-NEXT: 1 1 1.00 psllq %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * psllq (%rax), %mm2
+# CHECK-NEXT: 1 1 1.00 psllw $1, %mm2
+# CHECK-NEXT: 1 1 1.00 psllw %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * psllw (%rax), %mm2
+# CHECK-NEXT: 1 1 1.00 psrad $1, %mm2
+# CHECK-NEXT: 1 1 1.00 psrad %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * psrad (%rax), %mm2
+# CHECK-NEXT: 1 1 1.00 psraw $1, %mm2
+# CHECK-NEXT: 1 1 1.00 psraw %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * psraw (%rax), %mm2
+# CHECK-NEXT: 1 1 1.00 psrld $1, %mm2
+# CHECK-NEXT: 1 1 1.00 psrld %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * psrld (%rax), %mm2
+# CHECK-NEXT: 1 1 1.00 psrlq $1, %mm2
+# CHECK-NEXT: 1 1 1.00 psrlq %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * psrlq (%rax), %mm2
+# CHECK-NEXT: 1 1 1.00 psrlw $1, %mm2
+# CHECK-NEXT: 1 1 1.00 psrlw %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * psrlw (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 psubb %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * psubb (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 psubd %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * psubd (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 psubsb %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * psubsb (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 psubsw %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * psubsw (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 psubusb %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * psubusb (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 psubusw %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * psubusw (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 psubw %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * psubw (%rax), %mm2
+# CHECK-NEXT: 1 1 1.00 punpckhbw %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * punpckhbw (%rax), %mm2
+# CHECK-NEXT: 1 1 1.00 punpckhdq %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * punpckhdq (%rax), %mm2
+# CHECK-NEXT: 1 1 1.00 punpckhwd %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * punpckhwd (%rax), %mm2
+# CHECK-NEXT: 1 1 1.00 punpcklbw %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * punpcklbw (%rax), %mm2
+# CHECK-NEXT: 1 1 1.00 punpckldq %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * punpckldq (%rax), %mm2
+# CHECK-NEXT: 1 1 1.00 punpcklwd %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * punpcklwd (%rax), %mm2
+# CHECK-NEXT: 1 1 0.33 pxor %mm0, %mm2
+# CHECK-NEXT: 2 6 0.50 * pxor (%rax), %mm2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 21.00 53.00 2.00 57.00 24.00 24.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 10.33 10.33 - 10.33 - - emms
+# CHECK-NEXT: - - - - - 1.00 - - movd %eax, %mm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 movd (%rax), %mm2
+# CHECK-NEXT: - - 1.00 - - - - - movd %mm0, %ecx
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movd %mm0, (%rax)
+# CHECK-NEXT: - - - - - 1.00 - - movq %rax, %mm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 movq (%rax), %mm2
+# CHECK-NEXT: - - 1.00 - - - - - movq %mm0, %rcx
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movq %mm0, (%rax)
+# CHECK-NEXT: - - - - - 1.00 - - packsswb %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 packsswb (%rax), %mm2
+# CHECK-NEXT: - - - - - 1.00 - - packssdw %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 packssdw (%rax), %mm2
+# CHECK-NEXT: - - - - - 1.00 - - packuswb %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 packuswb (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - paddb %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 paddb (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - paddd %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 paddd (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - paddsb %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 paddsb (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - paddsw %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 paddsw (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - paddusb %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 paddusb (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - paddusw %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 paddusw (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - paddw %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 paddw (%rax), %mm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - pand %mm0, %mm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 pand (%rax), %mm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - pandn %mm0, %mm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 pandn (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pcmpeqb %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pcmpeqb (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pcmpeqd %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pcmpeqd (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pcmpeqw %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pcmpeqw (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pcmpgtb %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pcmpgtb (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pcmpgtd %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pcmpgtd (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pcmpgtw %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pcmpgtw (%rax), %mm2
+# CHECK-NEXT: - - 1.00 - - - - - pmaddwd %mm0, %mm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmaddwd (%rax), %mm2
+# CHECK-NEXT: - - 1.00 - - - - - pmulhw %mm0, %mm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmulhw (%rax), %mm2
+# CHECK-NEXT: - - 1.00 - - - - - pmullw %mm0, %mm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmullw (%rax), %mm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - por %mm0, %mm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 por (%rax), %mm2
+# CHECK-NEXT: - - - - - 1.00 - - pslld $1, %mm2
+# CHECK-NEXT: - - - - - 1.00 - - pslld %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 pslld (%rax), %mm2
+# CHECK-NEXT: - - - - - 1.00 - - psllq $1, %mm2
+# CHECK-NEXT: - - - - - 1.00 - - psllq %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 psllq (%rax), %mm2
+# CHECK-NEXT: - - - - - 1.00 - - psllw $1, %mm2
+# CHECK-NEXT: - - - - - 1.00 - - psllw %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 psllw (%rax), %mm2
+# CHECK-NEXT: - - - - - 1.00 - - psrad $1, %mm2
+# CHECK-NEXT: - - - - - 1.00 - - psrad %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 psrad (%rax), %mm2
+# CHECK-NEXT: - - - - - 1.00 - - psraw $1, %mm2
+# CHECK-NEXT: - - - - - 1.00 - - psraw %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 psraw (%rax), %mm2
+# CHECK-NEXT: - - - - - 1.00 - - psrld $1, %mm2
+# CHECK-NEXT: - - - - - 1.00 - - psrld %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 psrld (%rax), %mm2
+# CHECK-NEXT: - - - - - 1.00 - - psrlq $1, %mm2
+# CHECK-NEXT: - - - - - 1.00 - - psrlq %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 psrlq (%rax), %mm2
+# CHECK-NEXT: - - - - - 1.00 - - psrlw $1, %mm2
+# CHECK-NEXT: - - - - - 1.00 - - psrlw %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 psrlw (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - psubb %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 psubb (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - psubd %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 psubd (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - psubsb %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 psubsb (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - psubsw %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 psubsw (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - psubusb %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 psubusb (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - psubusw %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 psubusw (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - psubw %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 psubw (%rax), %mm2
+# CHECK-NEXT: - - - - - 1.00 - - punpckhbw %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 punpckhbw (%rax), %mm2
+# CHECK-NEXT: - - - - - 1.00 - - punpckhdq %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 punpckhdq (%rax), %mm2
+# CHECK-NEXT: - - - - - 1.00 - - punpckhwd %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 punpckhwd (%rax), %mm2
+# CHECK-NEXT: - - - - - 1.00 - - punpcklbw %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 punpcklbw (%rax), %mm2
+# CHECK-NEXT: - - - - - 1.00 - - punpckldq %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 punpckldq (%rax), %mm2
+# CHECK-NEXT: - - - - - 1.00 - - punpcklwd %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 punpcklwd (%rax), %mm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - pxor %mm0, %mm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 pxor (%rax), %mm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+movbe %cx, (%rax)
+movbe (%rax), %cx
+
+movbe %ecx, (%rax)
+movbe (%rax), %ecx
+
+movbe %rcx, (%rax)
+movbe (%rax), %rcx
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 1.00 * movbew %cx, (%rax)
+# CHECK-NEXT: 2 6 0.50 * movbew (%rax), %cx
+# CHECK-NEXT: 1 1 1.00 * movbel %ecx, (%rax)
+# CHECK-NEXT: 2 6 0.50 * movbel (%rax), %ecx
+# CHECK-NEXT: 1 1 1.00 * movbeq %rcx, (%rax)
+# CHECK-NEXT: 2 6 0.50 * movbeq (%rax), %rcx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 1.00 3.00 1.00 3.00 3.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movbew %cx, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 movbew (%rax), %cx
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movbel %ecx, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 movbel (%rax), %ecx
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movbeq %rcx, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 movbeq (%rax), %rcx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+pclmulqdq $11, %xmm0, %xmm2
+pclmulqdq $11, (%rax), %xmm2
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 14 6.00 pclmulqdq $11, %xmm0, %xmm2
+# CHECK-NEXT: 1 14 5.67 * pclmulqdq $11, (%rax), %xmm2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 11.67 11.67 - 11.67 0.50 0.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 6.00 6.00 - 6.00 - - pclmulqdq $11, %xmm0, %xmm2
+# CHECK-NEXT: - - 5.67 5.67 - 5.67 0.50 0.50 pclmulqdq $11, (%rax), %xmm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+popcntw %cx, %cx
+popcntw (%rax), %cx
+
+popcntl %eax, %ecx
+popcntl (%rax), %ecx
+
+popcntq %rax, %rcx
+popcntq (%rax), %rcx
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 popcntw %cx, %cx
+# CHECK-NEXT: 2 9 1.00 * popcntw (%rax), %cx
+# CHECK-NEXT: 1 3 1.00 popcntl %eax, %ecx
+# CHECK-NEXT: 2 9 1.00 * popcntl (%rax), %ecx
+# CHECK-NEXT: 1 3 1.00 popcntq %rax, %rcx
+# CHECK-NEXT: 2 9 1.00 * popcntq (%rax), %rcx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 6.00 - - 1.50 1.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - popcntw %cx, %cx
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 popcntw (%rax), %cx
+# CHECK-NEXT: - - - 1.00 - - - - popcntl %eax, %ecx
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 popcntl (%rax), %ecx
+# CHECK-NEXT: - - - 1.00 - - - - popcntq %rax, %rcx
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 popcntq (%rax), %rcx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+prefetch (%rax)
+prefetchw (%rax)
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 * * prefetch (%rax)
+# CHECK-NEXT: 1 5 0.50 * * prefetchw (%rax)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - - - - 1.00 1.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - - 0.50 0.50 prefetch (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 prefetchw (%rax)
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+addps %xmm0, %xmm2
+addps (%rax), %xmm2
+
+addss %xmm0, %xmm2
+addss (%rax), %xmm2
+
+andnps %xmm0, %xmm2
+andnps (%rax), %xmm2
+
+andps %xmm0, %xmm2
+andps (%rax), %xmm2
+
+cmpps $0, %xmm0, %xmm2
+cmpps $0, (%rax), %xmm2
+
+cmpss $0, %xmm0, %xmm2
+cmpss $0, (%rax), %xmm2
+
+comiss %xmm0, %xmm1
+comiss (%rax), %xmm1
+
+cvtpi2ps %mm0, %xmm2
+cvtpi2ps (%rax), %xmm2
+
+cvtps2pi %xmm0, %mm2
+cvtps2pi (%rax), %mm2
+
+cvtsi2ss %ecx, %xmm2
+cvtsi2ss %rcx, %xmm2
+cvtsi2ss (%rax), %xmm2
+cvtsi2ss (%rax), %xmm2
+
+cvtss2si %xmm0, %ecx
+cvtss2si %xmm0, %rcx
+cvtss2si (%rax), %ecx
+cvtss2si (%rax), %rcx
+
+cvttps2pi %xmm0, %mm2
+cvttps2pi (%rax), %mm2
+
+cvttss2si %xmm0, %ecx
+cvttss2si %xmm0, %rcx
+cvttss2si (%rax), %ecx
+cvttss2si (%rax), %rcx
+
+divps %xmm0, %xmm2
+divps (%rax), %xmm2
+
+divss %xmm0, %xmm2
+divss (%rax), %xmm2
+
+ldmxcsr (%rax)
+
+maskmovq %mm0, %mm1
+
+maxps %xmm0, %xmm2
+maxps (%rax), %xmm2
+
+maxss %xmm0, %xmm2
+maxss (%rax), %xmm2
+
+minps %xmm0, %xmm2
+minps (%rax), %xmm2
+
+minss %xmm0, %xmm2
+minss (%rax), %xmm2
+
+movaps %xmm0, %xmm2
+movaps %xmm0, (%rax)
+movaps (%rax), %xmm2
+
+movhlps %xmm0, %xmm2
+movlhps %xmm0, %xmm2
+
+movhps %xmm0, (%rax)
+movhps (%rax), %xmm2
+
+movlps %xmm0, (%rax)
+movlps (%rax), %xmm2
+
+movmskps %xmm0, %rcx
+
+movntps %xmm0, (%rax)
+movntq %mm0, (%rax)
+
+movss %xmm0, %xmm2
+movss %xmm0, (%rax)
+movss (%rax), %xmm2
+
+movups %xmm0, %xmm2
+movups %xmm0, (%rax)
+movups (%rax), %xmm2
+
+mulps %xmm0, %xmm2
+mulps (%rax), %xmm2
+
+mulss %xmm0, %xmm2
+mulss (%rax), %xmm2
+
+orps %xmm0, %xmm2
+orps (%rax), %xmm2
+
+pavgb %mm0, %mm2
+pavgb (%rax), %mm2
+
+pavgw %mm0, %mm2
+pavgw (%rax), %mm2
+
+pextrw $1, %mm0, %rcx
+
+pinsrw $1, %rax, %mm2
+pinsrw $1, (%rax), %mm2
+
+pmaxsw %mm0, %mm2
+pmaxsw (%rax), %mm2
+
+pmaxub %mm0, %mm2
+pmaxub (%rax), %mm2
+
+pminsw %mm0, %mm2
+pminsw (%rax), %mm2
+
+pminub %mm0, %mm2
+pminub (%rax), %mm2
+
+pmovmskb %xmm0, %rcx
+
+pmulhuw %mm0, %mm2
+pmulhuw (%rax), %mm2
+
+prefetcht0 (%rax)
+prefetcht1 (%rax)
+prefetcht2 (%rax)
+prefetchnta (%rax)
+
+psadbw %mm0, %mm2
+psadbw (%rax), %mm2
+
+pshufw $1, %mm0, %mm2
+pshufw $1, (%rax), %mm2
+
+rcpps %xmm0, %xmm2
+rcpps (%rax), %xmm2
+
+rcpss %xmm0, %xmm2
+rcpss (%rax), %xmm2
+
+rsqrtps %xmm0, %xmm2
+rsqrtps (%rax), %xmm2
+
+rsqrtss %xmm0, %xmm2
+rsqrtss (%rax), %xmm2
+
+sfence
+
+shufps $1, %xmm0, %xmm2
+shufps $1, (%rax), %xmm2
+
+sqrtps %xmm0, %xmm2
+sqrtps (%rax), %xmm2
+
+sqrtss %xmm0, %xmm2
+sqrtss (%rax), %xmm2
+
+stmxcsr (%rax)
+
+subps %xmm0, %xmm2
+subps (%rax), %xmm2
+
+subss %xmm0, %xmm2
+subss (%rax), %xmm2
+
+ucomiss %xmm0, %xmm1
+ucomiss (%rax), %xmm1
+
+unpckhps %xmm0, %xmm2
+unpckhps (%rax), %xmm2
+
+unpcklps %xmm0, %xmm2
+unpcklps (%rax), %xmm2
+
+xorps %xmm0, %xmm2
+xorps (%rax), %xmm2
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 addps %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * addps (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 addss %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * addss (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 andnps %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * andnps (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 andps %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * andps (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 cmpps $0, %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * cmpps $0, (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 cmpss $0, %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * cmpss $0, (%rax), %xmm2
+# CHECK-NEXT: 2 2 1.00 comiss %xmm0, %xmm1
+# CHECK-NEXT: 3 8 1.00 * comiss (%rax), %xmm1
+# CHECK-NEXT: 1 3 1.00 cvtpi2ps %mm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * cvtpi2ps (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 cvtps2pi %xmm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * cvtps2pi (%rax), %mm2
+# CHECK-NEXT: 3 5 2.00 cvtsi2ssl %ecx, %xmm2
+# CHECK-NEXT: 3 5 2.00 cvtsi2ssq %rcx, %xmm2
+# CHECK-NEXT: 3 10 1.00 * cvtsi2ssl (%rax), %xmm2
+# CHECK-NEXT: 3 10 1.00 * cvtsi2ssl (%rax), %xmm2
+# CHECK-NEXT: 2 5 1.00 cvtss2si %xmm0, %ecx
+# CHECK-NEXT: 2 5 1.00 cvtss2si %xmm0, %rcx
+# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %ecx
+# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %rcx
+# CHECK-NEXT: 1 3 1.00 cvttps2pi %xmm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * cvttps2pi (%rax), %mm2
+# CHECK-NEXT: 2 5 1.00 cvttss2si %xmm0, %ecx
+# CHECK-NEXT: 2 5 1.00 cvttss2si %xmm0, %rcx
+# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %ecx
+# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %rcx
+# CHECK-NEXT: 1 14 14.00 divps %xmm0, %xmm2
+# CHECK-NEXT: 2 20 14.00 * divps (%rax), %xmm2
+# CHECK-NEXT: 1 14 14.00 divss %xmm0, %xmm2
+# CHECK-NEXT: 2 20 14.00 * divss (%rax), %xmm2
+# CHECK-NEXT: 4 5 1.00 * * U ldmxcsr (%rax)
+# CHECK-NEXT: 1 1 1.00 * * U maskmovq %mm0, %mm1
+# CHECK-NEXT: 1 3 1.00 maxps %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * maxps (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 maxss %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * maxss (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 minps %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * minps (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 minss %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * minss (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 movaps %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 * movaps %xmm0, (%rax)
+# CHECK-NEXT: 1 6 0.50 * movaps (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 movhlps %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 movlhps %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 * movhps %xmm0, (%rax)
+# CHECK-NEXT: 2 7 1.00 * movhps (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 * movlps %xmm0, (%rax)
+# CHECK-NEXT: 2 7 1.00 * movlps (%rax), %xmm2
+# CHECK-NEXT: 1 2 1.00 movmskps %xmm0, %ecx
+# CHECK-NEXT: 1 1 1.00 * movntps %xmm0, (%rax)
+# CHECK-NEXT: 1 1 1.00 * * U movntq %mm0, (%rax)
+# CHECK-NEXT: 1 1 1.00 movss %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 * movss %xmm0, (%rax)
+# CHECK-NEXT: 1 6 0.50 * movss (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 movups %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 * movups %xmm0, (%rax)
+# CHECK-NEXT: 1 6 0.50 * movups (%rax), %xmm2
+# CHECK-NEXT: 1 5 1.00 mulps %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * mulps (%rax), %xmm2
+# CHECK-NEXT: 1 5 1.00 mulss %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * mulss (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 orps %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * orps (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 pavgb %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * pavgb (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pavgw %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * pavgw (%rax), %mm2
+# CHECK-NEXT: 2 3 1.00 pextrw $1, %mm0, %ecx
+# CHECK-NEXT: 2 2 1.00 pinsrw $1, %eax, %mm2
+# CHECK-NEXT: 2 7 0.50 * pinsrw $1, (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pmaxsw %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * pmaxsw (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pmaxub %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * pmaxub (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pminsw %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * pminsw (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pminub %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * pminub (%rax), %mm2
+# CHECK-NEXT: 1 2 1.00 pmovmskb %xmm0, %ecx
+# CHECK-NEXT: 1 5 1.00 pmulhuw %mm0, %mm2
+# CHECK-NEXT: 2 10 1.00 * pmulhuw (%rax), %mm2
+# CHECK-NEXT: 1 5 0.50 * * prefetcht0 (%rax)
+# CHECK-NEXT: 1 5 0.50 * * prefetcht1 (%rax)
+# CHECK-NEXT: 1 5 0.50 * * prefetcht2 (%rax)
+# CHECK-NEXT: 1 5 0.50 * * prefetchnta (%rax)
+# CHECK-NEXT: 1 5 1.00 psadbw %mm0, %mm2
+# CHECK-NEXT: 2 10 1.00 * psadbw (%rax), %mm2
+# CHECK-NEXT: 1 1 1.00 pshufw $1, %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * pshufw $1, (%rax), %mm2
+# CHECK-NEXT: 1 5 1.00 rcpps %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * rcpps (%rax), %xmm2
+# CHECK-NEXT: 1 5 1.00 rcpss %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * rcpss (%rax), %xmm2
+# CHECK-NEXT: 1 5 1.00 rsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * rsqrtps (%rax), %xmm2
+# CHECK-NEXT: 1 5 1.00 rsqrtss %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * rsqrtss (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 * * U sfence
+# CHECK-NEXT: 1 1 1.00 shufps $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * shufps $1, (%rax), %xmm2
+# CHECK-NEXT: 1 14 14.00 sqrtps %xmm0, %xmm2
+# CHECK-NEXT: 2 20 14.00 * sqrtps (%rax), %xmm2
+# CHECK-NEXT: 1 14 14.00 sqrtss %xmm0, %xmm2
+# CHECK-NEXT: 2 20 14.00 * sqrtss (%rax), %xmm2
+# CHECK-NEXT: 4 5 1.00 * * U stmxcsr (%rax)
+# CHECK-NEXT: 1 3 1.00 subps %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * subps (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 subss %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * subss (%rax), %xmm2
+# CHECK-NEXT: 2 2 1.00 ucomiss %xmm0, %xmm1
+# CHECK-NEXT: 3 8 1.00 * ucomiss (%rax), %xmm1
+# CHECK-NEXT: 1 1 1.00 unpckhps %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * unpckhps (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 unpcklps %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * unpcklps (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 xorps %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * xorps (%rax), %xmm2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - 112.00 41.00 55.50 10.00 34.50 33.50 33.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - addps %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 addps (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - addss %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 addss (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - andnps %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 andnps (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - andps %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 andps (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - cmpps $0, %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cmpps $0, (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - cmpss $0, %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cmpss $0, (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - comiss %xmm0, %xmm1
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 comiss (%rax), %xmm1
+# CHECK-NEXT: - - - 1.00 - - - - cvtpi2ps %mm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtpi2ps (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - cvtps2pi %xmm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtps2pi (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - 2.00 - - cvtsi2ssl %ecx, %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 - - cvtsi2ssq %rcx, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvtsi2ssl (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvtsi2ssl (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - cvtss2si %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - - - cvtss2si %xmm0, %rcx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 cvtss2si (%rax), %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 cvtss2si (%rax), %rcx
+# CHECK-NEXT: - - - 1.00 - - - - cvttps2pi %xmm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvttps2pi (%rax), %mm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - cvttss2si %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - - - cvttss2si %xmm0, %rcx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 cvttss2si (%rax), %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 cvttss2si (%rax), %rcx
+# CHECK-NEXT: - 14.00 1.00 - - - - - divps %xmm0, %xmm2
+# CHECK-NEXT: - 14.00 1.00 - - - 0.50 0.50 divps (%rax), %xmm2
+# CHECK-NEXT: - 14.00 1.00 - - - - - divss %xmm0, %xmm2
+# CHECK-NEXT: - 14.00 1.00 - - - 0.50 0.50 divss (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 0.50 0.50 ldmxcsr (%rax)
+# CHECK-NEXT: - - - - - 1.00 - - maskmovq %mm0, %mm1
+# CHECK-NEXT: - - - 1.00 - - - - maxps %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 maxps (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - maxss %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 maxss (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - minps %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 minps (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - minss %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 minss (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - movaps %xmm0, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movaps %xmm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 movaps (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - movhlps %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - movlhps %xmm0, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movhps %xmm0, (%rax)
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 movhps (%rax), %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movlps %xmm0, (%rax)
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 movlps (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - movmskps %xmm0, %ecx
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movntps %xmm0, (%rax)
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movntq %mm0, (%rax)
+# CHECK-NEXT: - - - - - 1.00 - - movss %xmm0, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movss %xmm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 movss (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - movups %xmm0, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movups %xmm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 movups (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - mulps %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 mulps (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - mulss %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 mulss (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - orps %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 orps (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - pavgb %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pavgb (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pavgw %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pavgw (%rax), %mm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrw $1, %mm0, %ecx
+# CHECK-NEXT: - - - 0.50 - 1.50 - - pinsrw $1, %eax, %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pinsrw $1, (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pmaxsw %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pmaxsw (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pmaxub %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pmaxub (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pminsw %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pminsw (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pminub %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pminub (%rax), %mm2
+# CHECK-NEXT: - - 1.00 - - - - - pmovmskb %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 - - - - - pmulhuw %mm0, %mm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmulhuw (%rax), %mm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 prefetcht0 (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 prefetcht1 (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 prefetcht2 (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 prefetchnta (%rax)
+# CHECK-NEXT: - - 1.00 - - - - - psadbw %mm0, %mm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 psadbw (%rax), %mm2
+# CHECK-NEXT: - - - - - 1.00 - - pshufw $1, %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 pshufw $1, (%rax), %mm2
+# CHECK-NEXT: - - 1.00 - - - - - rcpps %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 rcpps (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - rcpss %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 rcpss (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - rsqrtps %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 rsqrtps (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - rsqrtss %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 rsqrtss (%rax), %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 sfence
+# CHECK-NEXT: - - - - - 1.00 - - shufps $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 shufps $1, (%rax), %xmm2
+# CHECK-NEXT: - 14.00 1.00 - - - - - sqrtps %xmm0, %xmm2
+# CHECK-NEXT: - 14.00 1.00 - - - 0.50 0.50 sqrtps (%rax), %xmm2
+# CHECK-NEXT: - 14.00 1.00 - - - - - sqrtss %xmm0, %xmm2
+# CHECK-NEXT: - 14.00 1.00 - - - 0.50 0.50 sqrtss (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 0.50 0.50 stmxcsr (%rax)
+# CHECK-NEXT: - - - 1.00 - - - - subps %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 subps (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - subss %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 subss (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - ucomiss %xmm0, %xmm1
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 ucomiss (%rax), %xmm1
+# CHECK-NEXT: - - - - - 1.00 - - unpckhps %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 unpckhps (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - unpcklps %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 unpcklps (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - xorps %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 xorps (%rax), %xmm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+addpd %xmm0, %xmm2
+addpd (%rax), %xmm2
+
+addsd %xmm0, %xmm2
+addsd (%rax), %xmm2
+
+andnpd %xmm0, %xmm2
+andnpd (%rax), %xmm2
+
+andpd %xmm0, %xmm2
+andpd (%rax), %xmm2
+
+clflush (%rax)
+
+cmppd $0, %xmm0, %xmm2
+cmppd $0, (%rax), %xmm2
+
+cmpsd $0, %xmm0, %xmm2
+cmpsd $0, (%rax), %xmm2
+
+comisd %xmm0, %xmm1
+comisd (%rax), %xmm1
+
+cvtdq2pd %xmm0, %xmm2
+cvtdq2pd (%rax), %xmm2
+
+cvtdq2ps %xmm0, %xmm2
+cvtdq2ps (%rax), %xmm2
+
+cvtpd2dq %xmm0, %xmm2
+cvtpd2dq (%rax), %xmm2
+
+cvtpd2pi %xmm0, %mm2
+cvtpd2pi (%rax), %mm2
+
+cvtpd2ps %xmm0, %xmm2
+cvtpd2ps (%rax), %xmm2
+
+cvtpi2pd %mm0, %xmm2
+cvtpi2pd (%rax), %xmm2
+
+cvtps2dq %xmm0, %xmm2
+cvtps2dq (%rax), %xmm2
+
+cvtps2pd %xmm0, %xmm2
+cvtps2pd (%rax), %xmm2
+
+cvtsd2si %xmm0, %ecx
+cvtsd2si %xmm0, %rcx
+cvtsd2si (%rax), %ecx
+cvtsd2si (%rax), %rcx
+
+cvtsd2ss %xmm0, %xmm2
+cvtsd2ss (%rax), %xmm2
+
+cvtsi2sd %ecx, %xmm2
+cvtsi2sd %rcx, %xmm2
+cvtsi2sd (%rax), %xmm2
+cvtsi2sd (%rax), %xmm2
+
+cvtss2sd %xmm0, %xmm2
+cvtss2sd (%rax), %xmm2
+
+cvttpd2dq %xmm0, %xmm2
+cvttpd2dq (%rax), %xmm2
+
+cvttpd2pi %xmm0, %mm2
+cvttpd2pi (%rax), %mm2
+
+cvttps2dq %xmm0, %xmm2
+cvttps2dq (%rax), %xmm2
+
+cvttsd2si %xmm0, %ecx
+cvttsd2si %xmm0, %rcx
+cvttsd2si (%rax), %ecx
+cvttsd2si (%rax), %rcx
+
+divpd %xmm0, %xmm2
+divpd (%rax), %xmm2
+
+divsd %xmm0, %xmm2
+divsd (%rax), %xmm2
+
+lfence
+
+maskmovdqu %xmm0, %xmm1
+
+maxpd %xmm0, %xmm2
+maxpd (%rax), %xmm2
+
+maxsd %xmm0, %xmm2
+maxsd (%rax), %xmm2
+
+minpd %xmm0, %xmm2
+minpd (%rax), %xmm2
+
+minsd %xmm0, %xmm2
+minsd (%rax), %xmm2
+
+movapd %xmm0, %xmm2
+movapd %xmm0, (%rax)
+movapd (%rax), %xmm2
+
+movd %eax, %xmm2
+movd (%rax), %xmm2
+
+movd %xmm0, %ecx
+movd %xmm0, (%rax)
+
+movdqa %xmm0, %xmm2
+movdqa %xmm0, (%rax)
+movdqa (%rax), %xmm2
+
+movdqu %xmm0, %xmm2
+movdqu %xmm0, (%rax)
+movdqu (%rax), %xmm2
+
+movdq2q %xmm0, %mm2
+
+movhpd %xmm0, (%rax)
+movhpd (%rax), %xmm2
+
+movlpd %xmm0, (%rax)
+movlpd (%rax), %xmm2
+
+movmskpd %xmm0, %rcx
+
+movntil %eax, (%rax)
+movntiq %rax, (%rax)
+
+movntdq %xmm0, (%rax)
+movntpd %xmm0, (%rax)
+
+movq %xmm0, %xmm2
+
+movq %rax, %xmm2
+movq (%rax), %xmm2
+
+movq %xmm0, %rcx
+movq %xmm0, (%rax)
+
+movq2dq %mm0, %xmm2
+
+movsd %xmm0, %xmm2
+movsd %xmm0, (%rax)
+movsd (%rax), %xmm2
+
+movupd %xmm0, %xmm2
+movupd %xmm0, (%rax)
+movupd (%rax), %xmm2
+
+mulpd %xmm0, %xmm2
+mulpd (%rax), %xmm2
+
+mulsd %xmm0, %xmm2
+mulsd (%rax), %xmm2
+
+orpd %xmm0, %xmm2
+orpd (%rax), %xmm2
+
+packssdw %xmm0, %xmm2
+packssdw (%rax), %xmm2
+
+packsswb %xmm0, %xmm2
+packsswb (%rax), %xmm2
+
+packuswb %xmm0, %xmm2
+packuswb (%rax), %xmm2
+
+paddb %xmm0, %xmm2
+paddb (%rax), %xmm2
+
+paddd %xmm0, %xmm2
+paddd (%rax), %xmm2
+
+paddq %mm0, %mm2
+paddq (%rax), %mm2
+
+paddq %xmm0, %xmm2
+paddq (%rax), %xmm2
+
+paddsb %xmm0, %xmm2
+paddsb (%rax), %xmm2
+
+paddsw %xmm0, %xmm2
+paddsw (%rax), %xmm2
+
+paddusb %xmm0, %xmm2
+paddusb (%rax), %xmm2
+
+paddusw %xmm0, %xmm2
+paddusw (%rax), %xmm2
+
+paddw %xmm0, %xmm2
+paddw (%rax), %xmm2
+
+pand %xmm0, %xmm2
+pand (%rax), %xmm2
+
+pandn %xmm0, %xmm2
+pandn (%rax), %xmm2
+
+pavgb %xmm0, %xmm2
+pavgb (%rax), %xmm2
+
+pavgw %xmm0, %xmm2
+pavgw (%rax), %xmm2
+
+pcmpeqb %xmm0, %xmm2
+pcmpeqb (%rax), %xmm2
+
+pcmpeqd %xmm0, %xmm2
+pcmpeqd (%rax), %xmm2
+
+pcmpeqw %xmm0, %xmm2
+pcmpeqw (%rax), %xmm2
+
+pcmpgtb %xmm0, %xmm2
+pcmpgtb (%rax), %xmm2
+
+pcmpgtd %xmm0, %xmm2
+pcmpgtd (%rax), %xmm2
+
+pcmpgtw %xmm0, %xmm2
+pcmpgtw (%rax), %xmm2
+
+pextrw $1, %xmm0, %rcx
+
+pmaddwd %xmm0, %xmm2
+pmaddwd (%rax), %xmm2
+
+pmaxsw %xmm0, %xmm2
+pmaxsw (%rax), %xmm2
+
+pmaxub %xmm0, %xmm2
+pmaxub (%rax), %xmm2
+
+pminsw %xmm0, %xmm2
+pminsw (%rax), %xmm2
+
+pminub %xmm0, %xmm2
+pminub (%rax), %xmm2
+
+pmovmskb %xmm0, %rcx
+
+pmulhuw %xmm0, %xmm2
+pmulhuw (%rax), %xmm2
+
+pmulhw %xmm0, %xmm2
+pmulhw (%rax), %xmm2
+
+pmullw %xmm0, %xmm2
+pmullw (%rax), %xmm2
+
+pmuludq %mm0, %mm2
+pmuludq (%rax), %mm2
+
+pmuludq %xmm0, %xmm2
+pmuludq (%rax), %xmm2
+
+por %xmm0, %xmm2
+por (%rax), %xmm2
+
+psadbw %xmm0, %xmm2
+psadbw (%rax), %xmm2
+
+pshufd $1, %xmm0, %xmm2
+pshufd $1, (%rax), %xmm2
+
+pshufhw $1, %xmm0, %xmm2
+pshufhw $1, (%rax), %xmm2
+
+pshuflw $1, %xmm0, %xmm2
+pshuflw $1, (%rax), %xmm2
+
+pslld $1, %xmm2
+pslld %xmm0, %xmm2
+pslld (%rax), %xmm2
+
+pslldq $1, %xmm2
+
+psllq $1, %xmm2
+psllq %xmm0, %xmm2
+psllq (%rax), %xmm2
+
+psllw $1, %xmm2
+psllw %xmm0, %xmm2
+psllw (%rax), %xmm2
+
+psrad $1, %xmm2
+psrad %xmm0, %xmm2
+psrad (%rax), %xmm2
+
+psraw $1, %xmm2
+psraw %xmm0, %xmm2
+psraw (%rax), %xmm2
+
+psrld $1, %xmm2
+psrld %xmm0, %xmm2
+psrld (%rax), %xmm2
+
+psrldq $1, %xmm2
+
+psrlq $1, %xmm2
+psrlq %xmm0, %xmm2
+psrlq (%rax), %xmm2
+
+psrlw $1, %xmm2
+psrlw %xmm0, %xmm2
+psrlw (%rax), %xmm2
+
+psubb %xmm0, %xmm2
+psubb (%rax), %xmm2
+
+psubd %xmm0, %xmm2
+psubd (%rax), %xmm2
+
+psubq %mm0, %mm2
+psubq (%rax), %mm2
+
+psubq %xmm0, %xmm2
+psubq (%rax), %xmm2
+
+psubsb %xmm0, %xmm2
+psubsb (%rax), %xmm2
+
+psubsw %xmm0, %xmm2
+psubsw (%rax), %xmm2
+
+psubusb %xmm0, %xmm2
+psubusb (%rax), %xmm2
+
+psubusw %xmm0, %xmm2
+psubusw (%rax), %xmm2
+
+psubw %xmm0, %xmm2
+psubw (%rax), %xmm2
+
+punpckhbw %xmm0, %xmm2
+punpckhbw (%rax), %xmm2
+
+punpckhdq %xmm0, %xmm2
+punpckhdq (%rax), %xmm2
+
+punpckhqdq %xmm0, %xmm2
+punpckhqdq (%rax), %xmm2
+
+punpckhwd %xmm0, %xmm2
+punpckhwd (%rax), %xmm2
+
+punpcklbw %xmm0, %xmm2
+punpcklbw (%rax), %xmm2
+
+punpckldq %xmm0, %xmm2
+punpckldq (%rax), %xmm2
+
+punpcklqdq %xmm0, %xmm2
+punpcklqdq (%rax), %xmm2
+
+punpcklwd %xmm0, %xmm2
+punpcklwd (%rax), %xmm2
+
+pxor %xmm0, %xmm2
+pxor (%rax), %xmm2
+
+shufpd $1, %xmm0, %xmm2
+shufpd $1, (%rax), %xmm2
+
+sqrtpd %xmm0, %xmm2
+sqrtpd (%rax), %xmm2
+
+sqrtsd %xmm0, %xmm2
+sqrtsd (%rax), %xmm2
+
+subpd %xmm0, %xmm2
+subpd (%rax), %xmm2
+
+subsd %xmm0, %xmm2
+subsd (%rax), %xmm2
+
+ucomisd %xmm0, %xmm1
+ucomisd (%rax), %xmm1
+
+unpckhpd %xmm0, %xmm2
+unpckhpd (%rax), %xmm2
+
+unpcklpd %xmm0, %xmm2
+unpcklpd (%rax), %xmm2
+
+xorpd %xmm0, %xmm2
+xorpd (%rax), %xmm2
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 addpd %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * addpd (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 addsd %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * addsd (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 andnpd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * andnpd (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 andpd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * andpd (%rax), %xmm2
+# CHECK-NEXT: 4 5 1.00 * * U clflush (%rax)
+# CHECK-NEXT: 1 3 1.00 cmppd $0, %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * cmppd $0, (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 cmpsd $0, %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * cmpsd $0, (%rax), %xmm2
+# CHECK-NEXT: 2 2 1.00 comisd %xmm0, %xmm1
+# CHECK-NEXT: 3 8 1.00 * comisd (%rax), %xmm1
+# CHECK-NEXT: 2 4 1.00 cvtdq2pd %xmm0, %xmm2
+# CHECK-NEXT: 3 10 1.00 * cvtdq2pd (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 cvtdq2ps %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * cvtdq2ps (%rax), %xmm2
+# CHECK-NEXT: 2 4 1.00 cvtpd2dq %xmm0, %xmm2
+# CHECK-NEXT: 3 10 1.00 * cvtpd2dq (%rax), %xmm2
+# CHECK-NEXT: 2 4 1.00 cvtpd2pi %xmm0, %mm2
+# CHECK-NEXT: 3 10 1.00 * cvtpd2pi (%rax), %mm2
+# CHECK-NEXT: 2 4 1.00 cvtpd2ps %xmm0, %xmm2
+# CHECK-NEXT: 3 10 1.00 * cvtpd2ps (%rax), %xmm2
+# CHECK-NEXT: 2 4 1.00 cvtpi2pd %mm0, %xmm2
+# CHECK-NEXT: 3 10 1.00 * cvtpi2pd (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 cvtps2dq %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * cvtps2dq (%rax), %xmm2
+# CHECK-NEXT: 2 2 1.00 cvtps2pd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * cvtps2pd (%rax), %xmm2
+# CHECK-NEXT: 2 5 1.00 cvtsd2si %xmm0, %ecx
+# CHECK-NEXT: 2 5 1.00 cvtsd2si %xmm0, %rcx
+# CHECK-NEXT: 3 9 1.00 * cvtsd2si (%rax), %ecx
+# CHECK-NEXT: 3 9 1.00 * cvtsd2si (%rax), %rcx
+# CHECK-NEXT: 2 4 1.00 cvtsd2ss %xmm0, %xmm2
+# CHECK-NEXT: 3 10 1.00 * cvtsd2ss (%rax), %xmm2
+# CHECK-NEXT: 2 4 1.00 cvtsi2sdl %ecx, %xmm2
+# CHECK-NEXT: 2 4 1.00 cvtsi2sdq %rcx, %xmm2
+# CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2
+# CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 cvtss2sd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * cvtss2sd (%rax), %xmm2
+# CHECK-NEXT: 2 4 1.00 cvttpd2dq %xmm0, %xmm2
+# CHECK-NEXT: 3 10 1.00 * cvttpd2dq (%rax), %xmm2
+# CHECK-NEXT: 2 4 1.00 cvttpd2pi %xmm0, %mm2
+# CHECK-NEXT: 3 10 1.00 * cvttpd2pi (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 cvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * cvttps2dq (%rax), %xmm2
+# CHECK-NEXT: 2 5 1.00 cvttsd2si %xmm0, %ecx
+# CHECK-NEXT: 2 5 1.00 cvttsd2si %xmm0, %rcx
+# CHECK-NEXT: 3 9 1.00 * cvttsd2si (%rax), %ecx
+# CHECK-NEXT: 3 9 1.00 * cvttsd2si (%rax), %rcx
+# CHECK-NEXT: 1 22 22.00 divpd %xmm0, %xmm2
+# CHECK-NEXT: 2 28 22.00 * divpd (%rax), %xmm2
+# CHECK-NEXT: 1 22 22.00 divsd %xmm0, %xmm2
+# CHECK-NEXT: 2 28 22.00 * divsd (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 * * U lfence
+# CHECK-NEXT: 1 1 1.00 * * U maskmovdqu %xmm0, %xmm1
+# CHECK-NEXT: 1 3 1.00 maxpd %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * maxpd (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 maxsd %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * maxsd (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 minpd %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * minpd (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 minsd %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * minsd (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 movapd %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 * movapd %xmm0, (%rax)
+# CHECK-NEXT: 1 6 0.50 * movapd (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 movd %eax, %xmm2
+# CHECK-NEXT: 1 6 0.50 * movd (%rax), %xmm2
+# CHECK-NEXT: 1 2 1.00 movd %xmm0, %ecx
+# CHECK-NEXT: 1 1 1.00 * movd %xmm0, (%rax)
+# CHECK-NEXT: 1 1 0.33 movdqa %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 * movdqa %xmm0, (%rax)
+# CHECK-NEXT: 1 6 0.50 * movdqa (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.33 movdqu %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 * movdqu %xmm0, (%rax)
+# CHECK-NEXT: 1 6 0.50 * movdqu (%rax), %xmm2
+# CHECK-NEXT: 2 2 1.00 movdq2q %xmm0, %mm2
+# CHECK-NEXT: 1 1 1.00 * movhpd %xmm0, (%rax)
+# CHECK-NEXT: 2 7 1.00 * movhpd (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 * movlpd %xmm0, (%rax)
+# CHECK-NEXT: 2 7 1.00 * movlpd (%rax), %xmm2
+# CHECK-NEXT: 1 2 1.00 movmskpd %xmm0, %ecx
+# CHECK-NEXT: 1 1 1.00 * movntil %eax, (%rax)
+# CHECK-NEXT: 1 1 1.00 * movntiq %rax, (%rax)
+# CHECK-NEXT: 1 1 1.00 * movntdq %xmm0, (%rax)
+# CHECK-NEXT: 1 1 1.00 * movntpd %xmm0, (%rax)
+# CHECK-NEXT: 1 1 0.33 movq %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 movq %rax, %xmm2
+# CHECK-NEXT: 1 6 0.50 * movq (%rax), %xmm2
+# CHECK-NEXT: 1 2 1.00 movq %xmm0, %rcx
+# CHECK-NEXT: 1 1 1.00 * movq %xmm0, (%rax)
+# CHECK-NEXT: 1 1 0.33 movq2dq %mm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 movsd %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 * movsd %xmm0, (%rax)
+# CHECK-NEXT: 1 6 0.50 * movsd (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 movupd %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 * movupd %xmm0, (%rax)
+# CHECK-NEXT: 1 6 0.50 * movupd (%rax), %xmm2
+# CHECK-NEXT: 1 5 1.00 mulpd %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * mulpd (%rax), %xmm2
+# CHECK-NEXT: 1 5 1.00 mulsd %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * mulsd (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 orpd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * orpd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 packssdw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * packssdw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 packsswb %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * packsswb (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 packuswb %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * packuswb (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 paddb %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * paddb (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 paddd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * paddd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 paddq %mm0, %mm2
+# CHECK-NEXT: 2 7 0.50 * paddq (%rax), %mm2
+# CHECK-NEXT: 1 1 0.50 paddq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * paddq (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 paddsb %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * paddsb (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 paddsw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * paddsw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 paddusb %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * paddusb (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 paddusw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * paddusw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 paddw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * paddw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.33 pand %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pand (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.33 pandn %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pandn (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pavgb %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pavgb (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pavgw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pavgw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pcmpeqb %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pcmpeqb (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pcmpeqd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pcmpeqd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pcmpeqw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pcmpeqw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pcmpgtb %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pcmpgtb (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pcmpgtd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pcmpgtd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pcmpgtw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pcmpgtw (%rax), %xmm2
+# CHECK-NEXT: 2 3 1.00 pextrw $1, %xmm0, %ecx
+# CHECK-NEXT: 1 5 1.00 pmaddwd %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * pmaddwd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pmaxsw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmaxsw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pmaxub %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmaxub (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pminsw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pminsw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pminub %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pminub (%rax), %xmm2
+# CHECK-NEXT: 1 2 1.00 pmovmskb %xmm0, %ecx
+# CHECK-NEXT: 1 5 1.00 pmulhuw %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * pmulhuw (%rax), %xmm2
+# CHECK-NEXT: 1 5 1.00 pmulhw %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * pmulhw (%rax), %xmm2
+# CHECK-NEXT: 1 5 1.00 pmullw %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * pmullw (%rax), %xmm2
+# CHECK-NEXT: 1 5 1.00 pmuludq %mm0, %mm2
+# CHECK-NEXT: 2 10 1.00 * pmuludq (%rax), %mm2
+# CHECK-NEXT: 1 5 1.00 pmuludq %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * pmuludq (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.33 por %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * por (%rax), %xmm2
+# CHECK-NEXT: 1 5 1.00 psadbw %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * psadbw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pshufd $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pshufd $1, (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pshufhw $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pshufhw $1, (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pshuflw $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pshuflw $1, (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 pslld $1, %xmm2
+# CHECK-NEXT: 2 2 1.00 pslld %xmm0, %xmm2
+# CHECK-NEXT: 3 8 1.00 * pslld (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pslldq $1, %xmm2
+# CHECK-NEXT: 1 1 1.00 psllq $1, %xmm2
+# CHECK-NEXT: 2 2 1.00 psllq %xmm0, %xmm2
+# CHECK-NEXT: 3 8 1.00 * psllq (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 psllw $1, %xmm2
+# CHECK-NEXT: 2 2 1.00 psllw %xmm0, %xmm2
+# CHECK-NEXT: 3 8 1.00 * psllw (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 psrad $1, %xmm2
+# CHECK-NEXT: 2 2 1.00 psrad %xmm0, %xmm2
+# CHECK-NEXT: 3 8 1.00 * psrad (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 psraw $1, %xmm2
+# CHECK-NEXT: 2 2 1.00 psraw %xmm0, %xmm2
+# CHECK-NEXT: 3 8 1.00 * psraw (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 psrld $1, %xmm2
+# CHECK-NEXT: 2 2 1.00 psrld %xmm0, %xmm2
+# CHECK-NEXT: 3 8 1.00 * psrld (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 psrldq $1, %xmm2
+# CHECK-NEXT: 1 1 1.00 psrlq $1, %xmm2
+# CHECK-NEXT: 2 2 1.00 psrlq %xmm0, %xmm2
+# CHECK-NEXT: 3 8 1.00 * psrlq (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 psrlw $1, %xmm2
+# CHECK-NEXT: 2 2 1.00 psrlw %xmm0, %xmm2
+# CHECK-NEXT: 3 8 1.00 * psrlw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 psubb %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * psubb (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 psubd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * psubd (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 psubq %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * psubq (%rax), %mm2
+# CHECK-NEXT: 1 1 0.50 psubq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * psubq (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 psubsb %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * psubsb (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 psubsw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * psubsw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 psubusb %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * psubusb (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 psubusw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * psubusw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 psubw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * psubw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 punpckhbw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * punpckhbw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 punpckhdq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * punpckhdq (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 punpckhqdq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * punpckhqdq (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 punpckhwd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * punpckhwd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 punpcklbw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * punpcklbw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 punpckldq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * punpckldq (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 punpcklqdq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * punpcklqdq (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 punpcklwd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * punpcklwd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.33 pxor %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pxor (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 shufpd $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * shufpd $1, (%rax), %xmm2
+# CHECK-NEXT: 1 21 21.00 sqrtpd %xmm0, %xmm2
+# CHECK-NEXT: 2 27 21.00 * sqrtpd (%rax), %xmm2
+# CHECK-NEXT: 1 21 21.00 sqrtsd %xmm0, %xmm2
+# CHECK-NEXT: 2 27 21.00 * sqrtsd (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 subpd %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * subpd (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 subsd %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * subsd (%rax), %xmm2
+# CHECK-NEXT: 2 2 1.00 ucomisd %xmm0, %xmm1
+# CHECK-NEXT: 3 8 1.00 * ucomisd (%rax), %xmm1
+# CHECK-NEXT: 1 1 1.00 unpckhpd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * unpckhpd (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 unpcklpd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * unpcklpd (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 xorpd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * xorpd (%rax), %xmm2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - 172.00 75.83 117.33 16.00 98.83 66.00 66.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - addpd %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 addpd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - addsd %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 addsd (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - andnpd %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 andnpd (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - andpd %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 andpd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 1.00 1.00 0.50 0.50 clflush (%rax)
+# CHECK-NEXT: - - - 1.00 - - - - cmppd $0, %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cmppd $0, (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - cmpsd $0, %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cmpsd $0, (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - comisd %xmm0, %xmm1
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 comisd (%rax), %xmm1
+# CHECK-NEXT: - - - 1.00 - 1.00 - - cvtdq2pd %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvtdq2pd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - cvtdq2ps %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtdq2ps (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - cvtpd2dq %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvtpd2dq (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - cvtpd2pi %xmm0, %mm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvtpd2pi (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - cvtpd2ps %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvtpd2ps (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - cvtpi2pd %mm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvtpi2pd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - cvtps2dq %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtps2dq (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 - - cvtps2pd %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 cvtps2pd (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - cvtsd2si %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - - - cvtsd2si %xmm0, %rcx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 cvtsd2si (%rax), %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 cvtsd2si (%rax), %rcx
+# CHECK-NEXT: - - - 1.00 - 1.00 - - cvtsd2ss %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvtsd2ss (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - cvtsi2sdl %ecx, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - cvtsi2sdq %rcx, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdl (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdl (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - cvtss2sd %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 cvtss2sd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - cvttpd2dq %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvttpd2dq (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - cvttpd2pi %xmm0, %mm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvttpd2pi (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - cvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvttps2dq (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - cvttsd2si %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - - - cvttsd2si %xmm0, %rcx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 cvttsd2si (%rax), %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 cvttsd2si (%rax), %rcx
+# CHECK-NEXT: - 22.00 1.00 - - - - - divpd %xmm0, %xmm2
+# CHECK-NEXT: - 22.00 1.00 - - - 0.50 0.50 divpd (%rax), %xmm2
+# CHECK-NEXT: - 22.00 1.00 - - - - - divsd %xmm0, %xmm2
+# CHECK-NEXT: - 22.00 1.00 - - - 0.50 0.50 divsd (%rax), %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 lfence
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 maskmovdqu %xmm0, %xmm1
+# CHECK-NEXT: - - - 1.00 - - - - maxpd %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 maxpd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - maxsd %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 maxsd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - minpd %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 minpd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - minsd %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 minsd (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - movapd %xmm0, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movapd %xmm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 movapd (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - movd %eax, %xmm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 movd (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - movd %xmm0, %ecx
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movd %xmm0, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movdqa %xmm0, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movdqa %xmm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 movdqa (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movdqu %xmm0, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movdqu %xmm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 movdqu (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - movdq2q %xmm0, %mm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movhpd %xmm0, (%rax)
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 movhpd (%rax), %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movlpd %xmm0, (%rax)
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 movlpd (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - movmskpd %xmm0, %ecx
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movntil %eax, (%rax)
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movntiq %rax, (%rax)
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movntdq %xmm0, (%rax)
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movntpd %xmm0, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - movq %rax, %xmm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 movq (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - movq %xmm0, %rcx
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movq %xmm0, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq2dq %mm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - movsd %xmm0, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movsd %xmm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 movsd (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - movupd %xmm0, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movupd %xmm0, (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 movupd (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - mulpd %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 mulpd (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - mulsd %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 mulsd (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - orpd %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 orpd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - packssdw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 packssdw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - packsswb %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 packsswb (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - packuswb %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 packuswb (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - paddb %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddb (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - paddd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - paddq %mm0, %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddq (%rax), %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - paddq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddq (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - paddsb %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddsb (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - paddsw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddsw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - paddusb %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddusb (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - paddusw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddusw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - paddw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddw (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - pand %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 pand (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - pandn %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 pandn (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pavgb %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pavgb (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pavgw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pavgw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pcmpeqb %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pcmpeqb (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pcmpeqd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pcmpeqd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pcmpeqw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pcmpeqw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pcmpgtb %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pcmpgtb (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pcmpgtd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pcmpgtd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pcmpgtw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pcmpgtw (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrw $1, %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 - - - - - pmaddwd %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmaddwd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmaxsw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmaxsw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmaxub %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmaxub (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pminsw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pminsw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pminub %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pminub (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - pmovmskb %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 - - - - - pmulhuw %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmulhuw (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - pmulhw %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmulhw (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - pmullw %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmullw (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - pmuludq %mm0, %mm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmuludq (%rax), %mm2
+# CHECK-NEXT: - - 1.00 - - - - - pmuludq %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmuludq (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - por %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 por (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - psadbw %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 psadbw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pshufd $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pshufd $1, (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pshufhw $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pshufhw $1, (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pshuflw $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pshuflw $1, (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - pslld $1, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pslld %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 pslld (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pslldq $1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - psllq $1, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - psllq %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 psllq (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - psllw $1, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - psllw %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 psllw (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - psrad $1, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - psrad %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 psrad (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - psraw $1, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - psraw %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 psraw (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - psrld $1, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - psrld %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 psrld (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - psrldq $1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - psrlq $1, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - psrlq %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 psrlq (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - psrlw $1, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - psrlw %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 psrlw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - psubb %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psubb (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - psubd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psubd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - psubq %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 psubq (%rax), %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - psubq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psubq (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - psubsb %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psubsb (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - psubsw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psubsw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - psubusb %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psubusb (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - psubusw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psubusw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - psubw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psubw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - punpckhbw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 punpckhbw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - punpckhdq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 punpckhdq (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - punpckhqdq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 punpckhqdq (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - punpckhwd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 punpckhwd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - punpcklbw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 punpcklbw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - punpckldq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 punpckldq (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - punpcklqdq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 punpcklqdq (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - punpcklwd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 punpcklwd (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - pxor %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 pxor (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - shufpd $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 shufpd $1, (%rax), %xmm2
+# CHECK-NEXT: - 21.00 1.00 - - - - - sqrtpd %xmm0, %xmm2
+# CHECK-NEXT: - 21.00 1.00 - - - 0.50 0.50 sqrtpd (%rax), %xmm2
+# CHECK-NEXT: - 21.00 1.00 - - - - - sqrtsd %xmm0, %xmm2
+# CHECK-NEXT: - 21.00 1.00 - - - 0.50 0.50 sqrtsd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - subpd %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 subpd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - subsd %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 subsd (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - ucomisd %xmm0, %xmm1
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 ucomisd (%rax), %xmm1
+# CHECK-NEXT: - - - - - 1.00 - - unpckhpd %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 unpckhpd (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - unpcklpd %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 unpcklpd (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - xorpd %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 xorpd (%rax), %xmm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+addsubpd %xmm0, %xmm2
+addsubpd (%rax), %xmm2
+
+addsubps %xmm0, %xmm2
+addsubps (%rax), %xmm2
+
+haddpd %xmm0, %xmm2
+haddpd (%rax), %xmm2
+
+haddps %xmm0, %xmm2
+haddps (%rax), %xmm2
+
+hsubpd %xmm0, %xmm2
+hsubpd (%rax), %xmm2
+
+hsubps %xmm0, %xmm2
+hsubps (%rax), %xmm2
+
+lddqu (%rax), %xmm2
+
+movddup %xmm0, %xmm2
+movddup (%rax), %xmm2
+
+movshdup %xmm0, %xmm2
+movshdup (%rax), %xmm2
+
+movsldup %xmm0, %xmm2
+movsldup (%rax), %xmm2
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 addsubpd %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * addsubpd (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 addsubps %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * addsubps (%rax), %xmm2
+# CHECK-NEXT: 3 5 2.00 haddpd %xmm0, %xmm2
+# CHECK-NEXT: 4 11 2.00 * haddpd (%rax), %xmm2
+# CHECK-NEXT: 3 5 2.00 haddps %xmm0, %xmm2
+# CHECK-NEXT: 4 11 2.00 * haddps (%rax), %xmm2
+# CHECK-NEXT: 3 5 2.00 hsubpd %xmm0, %xmm2
+# CHECK-NEXT: 4 11 2.00 * hsubpd (%rax), %xmm2
+# CHECK-NEXT: 3 5 2.00 hsubps %xmm0, %xmm2
+# CHECK-NEXT: 4 11 2.00 * hsubps (%rax), %xmm2
+# CHECK-NEXT: 1 6 0.50 * lddqu (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2
+# CHECK-NEXT: 1 6 0.50 * movddup (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 movshdup %xmm0, %xmm2
+# CHECK-NEXT: 1 6 0.50 * movshdup (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 movsldup %xmm0, %xmm2
+# CHECK-NEXT: 1 6 0.50 * movsldup (%rax), %xmm2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 12.00 - 19.00 5.00 5.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - addsubpd %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 addsubpd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - addsubps %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 addsubps (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 - - haddpd %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 0.50 0.50 haddpd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 - - haddps %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 0.50 0.50 haddps (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 - - hsubpd %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 0.50 0.50 hsubpd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 - - hsubps %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 0.50 0.50 hsubps (%rax), %xmm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 lddqu (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - movddup %xmm0, %xmm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 movddup (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - movshdup %xmm0, %xmm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 movshdup (%rax), %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - movsldup %xmm0, %xmm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 movsldup (%rax), %xmm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+blendpd $11, %xmm0, %xmm2
+blendpd $11, (%rax), %xmm2
+
+blendps $11, %xmm0, %xmm2
+blendps $11, (%rax), %xmm2
+
+blendvpd %xmm0, %xmm2
+blendvpd (%rax), %xmm2
+
+blendvps %xmm0, %xmm2
+blendvps (%rax), %xmm2
+
+dppd $22, %xmm0, %xmm2
+dppd $22, (%rax), %xmm2
+
+dpps $22, %xmm0, %xmm2
+dpps $22, (%rax), %xmm2
+
+extractps $1, %xmm0, %rcx
+extractps $1, %xmm0, (%rax)
+
+insertps $1, %xmm0, %xmm2
+insertps $1, (%rax), %xmm2
+
+movntdqa (%rax), %xmm2
+
+mpsadbw $1, %xmm0, %xmm2
+mpsadbw $1, (%rax), %xmm2
+
+packusdw %xmm0, %xmm2
+packusdw (%rax), %xmm2
+
+pblendvb %xmm0, %xmm2
+pblendvb (%rax), %xmm2
+
+pblendw $11, %xmm0, %xmm2
+pblendw $11, (%rax), %xmm2
+
+pcmpeqq %xmm0, %xmm2
+pcmpeqq (%rax), %xmm2
+
+pextrb $1, %xmm0, %ecx
+pextrb $1, %xmm0, (%rax)
+
+pextrd $1, %xmm0, %ecx
+pextrd $1, %xmm0, (%rax)
+
+pextrq $1, %xmm0, %rcx
+pextrq $1, %xmm0, (%rax)
+
+pextrw $1, %xmm0, (%rax)
+
+phminposuw %xmm0, %xmm2
+phminposuw (%rax), %xmm2
+
+pinsrb $1, %eax, %xmm1
+pinsrb $1, (%rax), %xmm1
+
+pinsrd $1, %eax, %xmm1
+pinsrd $1, (%rax), %xmm1
+
+pinsrq $1, %rax, %xmm1
+pinsrq $1, (%rax), %xmm1
+
+pmaxsb %xmm0, %xmm2
+pmaxsb (%rax), %xmm2
+
+pmaxsd %xmm0, %xmm2
+pmaxsd (%rax), %xmm2
+
+pmaxud %xmm0, %xmm2
+pmaxud (%rax), %xmm2
+
+pmaxuw %xmm0, %xmm2
+pmaxuw (%rax), %xmm2
+
+pminsb %xmm0, %xmm2
+pminsb (%rax), %xmm2
+
+pminsd %xmm0, %xmm2
+pminsd (%rax), %xmm2
+
+pminud %xmm0, %xmm2
+pminud (%rax), %xmm2
+
+pminuw %xmm0, %xmm2
+pminuw (%rax), %xmm2
+
+pmovsxbd %xmm0, %xmm2
+pmovsxbd (%rax), %xmm2
+
+pmovsxbq %xmm0, %xmm2
+pmovsxbq (%rax), %xmm2
+
+pmovsxbw %xmm0, %xmm2
+pmovsxbw (%rax), %xmm2
+
+pmovsxdq %xmm0, %xmm2
+pmovsxdq (%rax), %xmm2
+
+pmovsxwd %xmm0, %xmm2
+pmovsxwd (%rax), %xmm2
+
+pmovsxwq %xmm0, %xmm2
+pmovsxwq (%rax), %xmm2
+
+pmovzxbd %xmm0, %xmm2
+pmovzxbd (%rax), %xmm2
+
+pmovzxbq %xmm0, %xmm2
+pmovzxbq (%rax), %xmm2
+
+pmovzxbw %xmm0, %xmm2
+pmovzxbw (%rax), %xmm2
+
+pmovzxdq %xmm0, %xmm2
+pmovzxdq (%rax), %xmm2
+
+pmovzxwd %xmm0, %xmm2
+pmovzxwd (%rax), %xmm2
+
+pmovzxwq %xmm0, %xmm2
+pmovzxwq (%rax), %xmm2
+
+pmuldq %xmm0, %xmm2
+pmuldq (%rax), %xmm2
+
+pmulld %xmm0, %xmm2
+pmulld (%rax), %xmm2
+
+ptest %xmm0, %xmm1
+ptest (%rax), %xmm1
+
+roundpd $1, %xmm0, %xmm2
+roundpd $1, (%rax), %xmm2
+
+roundps $1, %xmm0, %xmm2
+roundps $1, (%rax), %xmm2
+
+roundsd $1, %xmm0, %xmm2
+roundsd $1, (%rax), %xmm2
+
+roundss $1, %xmm0, %xmm2
+roundss $1, (%rax), %xmm2
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 blendpd $11, %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * blendpd $11, (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 blendps $11, %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * blendps $11, (%rax), %xmm2
+# CHECK-NEXT: 2 2 1.00 blendvpd %xmm0, %xmm0, %xmm2
+# CHECK-NEXT: 3 8 1.00 * blendvpd %xmm0, (%rax), %xmm2
+# CHECK-NEXT: 2 2 1.00 blendvps %xmm0, %xmm0, %xmm2
+# CHECK-NEXT: 3 8 1.00 * blendvps %xmm0, (%rax), %xmm2
+# CHECK-NEXT: 3 9 1.00 dppd $22, %xmm0, %xmm2
+# CHECK-NEXT: 4 15 1.00 * dppd $22, (%rax), %xmm2
+# CHECK-NEXT: 4 12 2.00 dpps $22, %xmm0, %xmm2
+# CHECK-NEXT: 5 18 2.00 * dpps $22, (%rax), %xmm2
+# CHECK-NEXT: 2 3 1.00 extractps $1, %xmm0, %ecx
+# CHECK-NEXT: 3 5 1.00 * extractps $1, %xmm0, (%rax)
+# CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 7 1.00 * insertps $1, (%rax), %xmm2
+# CHECK-NEXT: 1 6 0.50 * movntdqa (%rax), %xmm2
+# CHECK-NEXT: 3 7 1.00 mpsadbw $1, %xmm0, %xmm2
+# CHECK-NEXT: 4 13 1.00 * mpsadbw $1, (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 packusdw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * packusdw (%rax), %xmm2
+# CHECK-NEXT: 2 2 1.00 pblendvb %xmm0, %xmm0, %xmm2
+# CHECK-NEXT: 3 8 1.00 * pblendvb %xmm0, (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pblendw $11, %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pblendw $11, (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pcmpeqq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pcmpeqq (%rax), %xmm2
+# CHECK-NEXT: 2 3 1.00 pextrb $1, %xmm0, %ecx
+# CHECK-NEXT: 3 5 1.00 * pextrb $1, %xmm0, (%rax)
+# CHECK-NEXT: 2 3 1.00 pextrd $1, %xmm0, %ecx
+# CHECK-NEXT: 4 5 1.00 * pextrd $1, %xmm0, (%rax)
+# CHECK-NEXT: 2 3 1.00 pextrq $1, %xmm0, %rcx
+# CHECK-NEXT: 4 5 1.00 * pextrq $1, %xmm0, (%rax)
+# CHECK-NEXT: 3 5 1.00 * pextrw $1, %xmm0, (%rax)
+# CHECK-NEXT: 1 5 1.00 phminposuw %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * phminposuw (%rax), %xmm2
+# CHECK-NEXT: 2 2 1.00 pinsrb $1, %eax, %xmm1
+# CHECK-NEXT: 2 7 0.50 * pinsrb $1, (%rax), %xmm1
+# CHECK-NEXT: 2 2 1.00 pinsrd $1, %eax, %xmm1
+# CHECK-NEXT: 2 7 0.50 * pinsrd $1, (%rax), %xmm1
+# CHECK-NEXT: 2 2 1.00 pinsrq $1, %rax, %xmm1
+# CHECK-NEXT: 2 7 0.50 * pinsrq $1, (%rax), %xmm1
+# CHECK-NEXT: 1 1 0.50 pmaxsb %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmaxsb (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pmaxsd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmaxsd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pmaxud %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmaxud (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pmaxuw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmaxuw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pminsb %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pminsb (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pminsd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pminsd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pminud %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pminud (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pminuw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pminuw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pmovsxbd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmovsxbd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pmovsxbq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmovsxbq (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pmovsxbw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmovsxbw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pmovsxdq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmovsxdq (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pmovsxwd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmovsxwd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pmovsxwq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmovsxwq (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pmovzxbd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmovzxbd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pmovzxbq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmovzxbq (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pmovzxbw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmovzxbw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pmovzxdq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmovzxdq (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pmovzxwd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmovzxwd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pmovzxwq %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pmovzxwq (%rax), %xmm2
+# CHECK-NEXT: 1 5 1.00 pmuldq %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * pmuldq (%rax), %xmm2
+# CHECK-NEXT: 1 5 1.00 pmulld %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * pmulld (%rax), %xmm2
+# CHECK-NEXT: 2 2 1.00 ptest %xmm0, %xmm1
+# CHECK-NEXT: 3 8 1.00 * ptest (%rax), %xmm1
+# CHECK-NEXT: 1 3 1.00 roundpd $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * roundpd $1, (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 roundps $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * roundps $1, (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 roundsd $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * roundsd $1, (%rax), %xmm2
+# CHECK-NEXT: 1 3 1.00 roundss $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 9 1.00 * roundss $1, (%rax), %xmm2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 26.00 47.50 5.00 52.50 24.50 24.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.50 - - 0.50 - - blendpd $11, %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 - - 0.50 0.50 0.50 blendpd $11, (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 - - 0.50 - - blendps $11, %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 - - 0.50 0.50 0.50 blendps $11, (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 - - blendvpd %xmm0, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 blendvpd %xmm0, (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 - - blendvps %xmm0, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 blendvps %xmm0, (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - dppd $22, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 0.50 0.50 dppd $22, (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 2.00 - 1.00 - - dpps $22, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 2.00 - 1.00 0.50 0.50 dpps $22, (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 - - extractps $1, %xmm0, %ecx
+# CHECK-NEXT: - - - - 1.00 1.00 0.50 0.50 extractps $1, %xmm0, (%rax)
+# CHECK-NEXT: - - - - - 1.00 - - insertps $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 insertps $1, (%rax), %xmm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 movntdqa (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - mpsadbw $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 0.50 0.50 mpsadbw $1, (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - packusdw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 packusdw (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 - - pblendvb %xmm0, %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 pblendvb %xmm0, (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pblendw $11, %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pblendw $11, (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pcmpeqq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pcmpeqq (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrb $1, %xmm0, %ecx
+# CHECK-NEXT: - - - 0.50 1.00 0.50 0.50 0.50 pextrb $1, %xmm0, (%rax)
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrd $1, %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 0.50 1.00 0.50 0.50 0.50 pextrd $1, %xmm0, (%rax)
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrq $1, %xmm0, %rcx
+# CHECK-NEXT: - - 1.00 0.50 1.00 0.50 0.50 0.50 pextrq $1, %xmm0, (%rax)
+# CHECK-NEXT: - - - 0.50 1.00 0.50 0.50 0.50 pextrw $1, %xmm0, (%rax)
+# CHECK-NEXT: - - 1.00 - - - - - phminposuw %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 phminposuw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 1.50 - - pinsrb $1, %eax, %xmm1
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pinsrb $1, (%rax), %xmm1
+# CHECK-NEXT: - - - 0.50 - 1.50 - - pinsrd $1, %eax, %xmm1
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pinsrd $1, (%rax), %xmm1
+# CHECK-NEXT: - - - 0.50 - 1.50 - - pinsrq $1, %rax, %xmm1
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pinsrq $1, (%rax), %xmm1
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmaxsb %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmaxsb (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmaxsd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmaxsd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmaxud %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmaxud (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmaxuw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmaxuw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pminsb %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pminsb (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pminsd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pminsd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pminud %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pminud (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pminuw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pminuw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovsxbd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovsxbd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovsxbq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovsxbq (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovsxbw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovsxbw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovsxdq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovsxdq (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovsxwd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovsxwd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovsxwq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovsxwq (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovzxbd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovzxbd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovzxbq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovzxbq (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovzxbw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovzxbw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovzxdq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovzxdq (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovzxwd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovzxwd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovzxwq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovzxwq (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - pmuldq %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmuldq (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - pmulld %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmulld (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 - - ptest %xmm0, %xmm1
+# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 ptest (%rax), %xmm1
+# CHECK-NEXT: - - - 1.00 - - - - roundpd $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 roundpd $1, (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - roundps $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 roundps $1, (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - roundsd $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 roundsd $1, (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - roundss $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 roundss $1, (%rax), %xmm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+crc32b %al, %ecx
+crc32b (%rax), %ecx
+
+crc32l %eax, %ecx
+crc32l (%rax), %ecx
+
+crc32w %ax, %ecx
+crc32w (%rax), %ecx
+
+crc32b %al, %rcx
+crc32b (%rax), %rcx
+
+crc32q %rax, %rcx
+crc32q (%rax), %rcx
+
+pcmpestri $1, %xmm0, %xmm2
+pcmpestri $1, (%rax), %xmm2
+
+pcmpestrm $1, %xmm0, %xmm2
+pcmpestrm $1, (%rax), %xmm2
+
+pcmpistri $1, %xmm0, %xmm2
+pcmpistri $1, (%rax), %xmm2
+
+pcmpistrm $1, %xmm0, %xmm2
+pcmpistrm $1, (%rax), %xmm2
+
+pcmpgtq %xmm0, %xmm2
+pcmpgtq (%rax), %xmm2
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 crc32b %al, %ecx
+# CHECK-NEXT: 2 8 1.00 * crc32b (%rax), %ecx
+# CHECK-NEXT: 1 3 1.00 crc32l %eax, %ecx
+# CHECK-NEXT: 2 8 1.00 * crc32l (%rax), %ecx
+# CHECK-NEXT: 1 3 1.00 crc32w %ax, %ecx
+# CHECK-NEXT: 2 8 1.00 * crc32w (%rax), %ecx
+# CHECK-NEXT: 1 3 1.00 crc32b %al, %rcx
+# CHECK-NEXT: 2 8 1.00 * crc32b (%rax), %rcx
+# CHECK-NEXT: 1 3 1.00 crc32q %rax, %rcx
+# CHECK-NEXT: 2 8 1.00 * crc32q (%rax), %rcx
+# CHECK-NEXT: 1 4 2.67 pcmpestri $1, %xmm0, %xmm2
+# CHECK-NEXT: 1 4 2.33 * pcmpestri $1, (%rax), %xmm2
+# CHECK-NEXT: 1 11 2.67 pcmpestrm $1, %xmm0, %xmm2
+# CHECK-NEXT: 1 11 2.33 * pcmpestrm $1, (%rax), %xmm2
+# CHECK-NEXT: 3 11 3.00 pcmpistri $1, %xmm0, %xmm2
+# CHECK-NEXT: 4 17 3.00 * pcmpistri $1, (%rax), %xmm2
+# CHECK-NEXT: 3 11 3.00 pcmpistrm $1, %xmm0, %xmm2
+# CHECK-NEXT: 4 17 3.00 * pcmpistrm $1, (%rax), %xmm2
+# CHECK-NEXT: 1 5 1.00 pcmpgtq %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * pcmpgtq (%rax), %xmm2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 24.00 20.00 - 10.00 5.00 5.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - crc32b %al, %ecx
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 crc32b (%rax), %ecx
+# CHECK-NEXT: - - - 1.00 - - - - crc32l %eax, %ecx
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 crc32l (%rax), %ecx
+# CHECK-NEXT: - - - 1.00 - - - - crc32w %ax, %ecx
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 crc32w (%rax), %ecx
+# CHECK-NEXT: - - - 1.00 - - - - crc32b %al, %rcx
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 crc32b (%rax), %rcx
+# CHECK-NEXT: - - - 1.00 - - - - crc32q %rax, %rcx
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 crc32q (%rax), %rcx
+# CHECK-NEXT: - - 2.67 2.67 - 2.67 - - pcmpestri $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 2.33 2.33 - 2.33 0.50 0.50 pcmpestri $1, (%rax), %xmm2
+# CHECK-NEXT: - - 2.67 2.67 - 2.67 - - pcmpestrm $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 2.33 2.33 - 2.33 0.50 0.50 pcmpestrm $1, (%rax), %xmm2
+# CHECK-NEXT: - - 3.00 - - - - - pcmpistri $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 3.00 - - - 0.50 0.50 pcmpistri $1, (%rax), %xmm2
+# CHECK-NEXT: - - 3.00 - - - - - pcmpistrm $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 3.00 - - - 0.50 0.50 pcmpistrm $1, (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - pcmpgtq %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pcmpgtq (%rax), %xmm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+extrq %xmm0, %xmm2
+extrq $22, $2, %xmm2
+
+insertq %xmm0, %xmm2
+insertq $22, $22, %xmm0, %xmm2
+
+movntsd %xmm0, (%rax)
+movntss %xmm0, (%rax)
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 extrq %xmm0, %xmm2
+# CHECK-NEXT: 1 1 0.50 extrq $22, $2, %xmm2
+# CHECK-NEXT: 1 1 0.50 insertq %xmm0, %xmm2
+# CHECK-NEXT: 1 1 0.50 insertq $22, $22, %xmm0, %xmm2
+# CHECK-NEXT: 1 1 1.00 * movntsd %xmm0, (%rax)
+# CHECK-NEXT: 1 1 1.00 * movntss %xmm0, (%rax)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 2.00 2.00 2.00 1.00 1.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 0.50 - 0.50 - - extrq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - extrq $22, $2, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - insertq %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - insertq $22, $22, %xmm0, %xmm2
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movntsd %xmm0, (%rax)
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movntss %xmm0, (%rax)
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+pabsb %mm0, %mm2
+pabsb (%rax), %mm2
+
+pabsb %xmm0, %xmm2
+pabsb (%rax), %xmm2
+
+pabsd %mm0, %mm2
+pabsd (%rax), %mm2
+
+pabsd %xmm0, %xmm2
+pabsd (%rax), %xmm2
+
+pabsw %mm0, %mm2
+pabsw (%rax), %mm2
+
+pabsw %xmm0, %xmm2
+pabsw (%rax), %xmm2
+
+palignr $1, %mm0, %mm2
+palignr $1, (%rax), %mm2
+
+palignr $1, %xmm0, %xmm2
+palignr $1, (%rax), %xmm2
+
+phaddd %mm0, %mm2
+phaddd (%rax), %mm2
+
+phaddd %xmm0, %xmm2
+phaddd (%rax), %xmm2
+
+phaddsw %mm0, %mm2
+phaddsw (%rax), %mm2
+
+phaddsw %xmm0, %xmm2
+phaddsw (%rax), %xmm2
+
+phaddw %mm0, %mm2
+phaddw (%rax), %mm2
+
+phaddw %xmm0, %xmm2
+phaddw (%rax), %xmm2
+
+phsubd %mm0, %mm2
+phsubd (%rax), %mm2
+
+phsubd %xmm0, %xmm2
+phsubd (%rax), %xmm2
+
+phsubsw %mm0, %mm2
+phsubsw (%rax), %mm2
+
+phsubsw %xmm0, %xmm2
+phsubsw (%rax), %xmm2
+
+phsubw %mm0, %mm2
+phsubw (%rax), %mm2
+
+phsubw %xmm0, %xmm2
+phsubw (%rax), %xmm2
+
+pmaddubsw %mm0, %mm2
+pmaddubsw (%rax), %mm2
+
+pmaddubsw %xmm0, %xmm2
+pmaddubsw (%rax), %xmm2
+
+pmulhrsw %mm0, %mm2
+pmulhrsw (%rax), %mm2
+
+pmulhrsw %xmm0, %xmm2
+pmulhrsw (%rax), %xmm2
+
+pshufb %mm0, %mm2
+pshufb (%rax), %mm2
+
+pshufb %xmm0, %xmm2
+pshufb (%rax), %xmm2
+
+psignb %mm0, %mm2
+psignb (%rax), %mm2
+
+psignb %xmm0, %xmm2
+psignb (%rax), %xmm2
+
+psignd %mm0, %mm2
+psignd (%rax), %mm2
+
+psignd %xmm0, %xmm2
+psignd (%rax), %xmm2
+
+psignw %mm0, %mm2
+psignw (%rax), %mm2
+
+psignw %xmm0, %xmm2
+psignw (%rax), %xmm2
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 pabsb %mm0, %mm2
+# CHECK-NEXT: 2 6 0.50 * pabsb (%rax), %mm2
+# CHECK-NEXT: 1 1 0.50 pabsb %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pabsb (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pabsd %mm0, %mm2
+# CHECK-NEXT: 2 6 0.50 * pabsd (%rax), %mm2
+# CHECK-NEXT: 1 1 0.50 pabsd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pabsd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pabsw %mm0, %mm2
+# CHECK-NEXT: 2 6 0.50 * pabsw (%rax), %mm2
+# CHECK-NEXT: 1 1 0.50 pabsw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pabsw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 palignr $1, %mm0, %mm2
+# CHECK-NEXT: 2 6 0.50 * palignr $1, (%rax), %mm2
+# CHECK-NEXT: 1 1 0.50 palignr $1, %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * palignr $1, (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.50 phaddd %mm0, %mm2
+# CHECK-NEXT: 4 8 1.50 * phaddd (%rax), %mm2
+# CHECK-NEXT: 3 3 1.50 phaddd %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.50 * phaddd (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.50 phaddsw %mm0, %mm2
+# CHECK-NEXT: 4 8 1.50 * phaddsw (%rax), %mm2
+# CHECK-NEXT: 3 3 1.50 phaddsw %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.50 * phaddsw (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.50 phaddw %mm0, %mm2
+# CHECK-NEXT: 4 8 1.50 * phaddw (%rax), %mm2
+# CHECK-NEXT: 3 3 1.50 phaddw %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.50 * phaddw (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.50 phsubd %mm0, %mm2
+# CHECK-NEXT: 4 8 1.50 * phsubd (%rax), %mm2
+# CHECK-NEXT: 3 3 1.50 phsubd %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.50 * phsubd (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.50 phsubsw %mm0, %mm2
+# CHECK-NEXT: 4 8 1.50 * phsubsw (%rax), %mm2
+# CHECK-NEXT: 3 3 1.50 phsubsw %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.50 * phsubsw (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.50 phsubw %mm0, %mm2
+# CHECK-NEXT: 4 8 1.50 * phsubw (%rax), %mm2
+# CHECK-NEXT: 3 3 1.50 phsubw %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.50 * phsubw (%rax), %xmm2
+# CHECK-NEXT: 1 5 1.00 pmaddubsw %mm0, %mm2
+# CHECK-NEXT: 2 10 1.00 * pmaddubsw (%rax), %mm2
+# CHECK-NEXT: 1 5 1.00 pmaddubsw %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * pmaddubsw (%rax), %xmm2
+# CHECK-NEXT: 1 5 1.00 pmulhrsw %mm0, %mm2
+# CHECK-NEXT: 2 10 1.00 * pmulhrsw (%rax), %mm2
+# CHECK-NEXT: 1 5 1.00 pmulhrsw %xmm0, %xmm2
+# CHECK-NEXT: 2 11 1.00 * pmulhrsw (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 pshufb %mm0, %mm2
+# CHECK-NEXT: 2 6 0.50 * pshufb (%rax), %mm2
+# CHECK-NEXT: 1 1 0.50 pshufb %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * pshufb (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 psignb %mm0, %mm2
+# CHECK-NEXT: 2 6 0.50 * psignb (%rax), %mm2
+# CHECK-NEXT: 1 1 0.50 psignb %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * psignb (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 psignd %mm0, %mm2
+# CHECK-NEXT: 2 6 0.50 * psignd (%rax), %mm2
+# CHECK-NEXT: 1 1 0.50 psignd %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * psignd (%rax), %xmm2
+# CHECK-NEXT: 1 1 0.50 psignw %mm0, %mm2
+# CHECK-NEXT: 2 6 0.50 * psignw (%rax), %mm2
+# CHECK-NEXT: 1 1 0.50 psignw %xmm0, %xmm2
+# CHECK-NEXT: 2 7 0.50 * psignw (%rax), %xmm2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 8.00 52.00 - 52.00 16.00 16.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pabsb %mm0, %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pabsb (%rax), %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pabsb %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pabsb (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pabsd %mm0, %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pabsd (%rax), %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pabsd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pabsd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pabsw %mm0, %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pabsw (%rax), %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pabsw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pabsw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - palignr $1, %mm0, %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 palignr $1, (%rax), %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - palignr $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 palignr $1, (%rax), %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 - - phaddd %mm0, %mm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phaddd (%rax), %mm2
+# CHECK-NEXT: - - - 1.50 - 1.50 - - phaddd %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phaddd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 - - phaddsw %mm0, %mm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phaddsw (%rax), %mm2
+# CHECK-NEXT: - - - 1.50 - 1.50 - - phaddsw %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phaddsw (%rax), %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 - - phaddw %mm0, %mm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phaddw (%rax), %mm2
+# CHECK-NEXT: - - - 1.50 - 1.50 - - phaddw %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phaddw (%rax), %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 - - phsubd %mm0, %mm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phsubd (%rax), %mm2
+# CHECK-NEXT: - - - 1.50 - 1.50 - - phsubd %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phsubd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 - - phsubsw %mm0, %mm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phsubsw (%rax), %mm2
+# CHECK-NEXT: - - - 1.50 - 1.50 - - phsubsw %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phsubsw (%rax), %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 - - phsubw %mm0, %mm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phsubw (%rax), %mm2
+# CHECK-NEXT: - - - 1.50 - 1.50 - - phsubw %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phsubw (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - pmaddubsw %mm0, %mm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmaddubsw (%rax), %mm2
+# CHECK-NEXT: - - 1.00 - - - - - pmaddubsw %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmaddubsw (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - pmulhrsw %mm0, %mm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmulhrsw (%rax), %mm2
+# CHECK-NEXT: - - 1.00 - - - - - pmulhrsw %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmulhrsw (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pshufb %mm0, %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pshufb (%rax), %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - pshufb %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pshufb (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - psignb %mm0, %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psignb (%rax), %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - psignb %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psignb (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - psignd %mm0, %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psignd (%rax), %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - psignd %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psignd (%rax), %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - psignw %mm0, %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psignw (%rax), %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 - - psignw %xmm0, %xmm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psignw (%rax), %xmm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+bextr $8192, %ebx, %ecx
+bextr $8192, (%rbx), %ecx
+
+bextr $16384, %rbx, %rcx
+bextr $16384, (%rbx), %rcx
+
+blcfill %eax, %ecx
+blcfill (%rax), %ecx
+
+blcfill %rax, %rcx
+blcfill (%rax), %rcx
+
+blci %eax, %ecx
+blci (%rax), %ecx
+
+blci %rax, %rcx
+blci (%rax), %rcx
+
+blcic %eax, %ecx
+blcic (%rax), %ecx
+
+blcic %rax, %rcx
+blcic (%rax), %rcx
+
+blcmsk %eax, %ecx
+blcmsk (%rax), %ecx
+
+blcmsk %rax, %rcx
+blcmsk (%rax), %rcx
+
+blcs %eax, %ecx
+blcs (%rax), %ecx
+
+blcs %rax, %rcx
+blcs (%rax), %rcx
+
+blsfill %eax, %ecx
+blsfill (%rax), %ecx
+
+blsfill %rax, %rcx
+blsfill (%rax), %rcx
+
+blsic %eax, %ecx
+blsic (%rax), %ecx
+
+blsic %rax, %rcx
+blsic (%rax), %rcx
+
+t1mskc %eax, %ecx
+t1mskc (%rax), %ecx
+
+t1mskc %rax, %rcx
+t1mskc (%rax), %rcx
+
+tzmsk %eax, %ecx
+tzmsk (%rax), %ecx
+
+tzmsk %rax, %rcx
+tzmsk (%rax), %rcx
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 2 1.00 bextrl $8192, %ebx, %ecx
+# CHECK-NEXT: 3 7 1.00 * bextrl $8192, (%rbx), %ecx
+# CHECK-NEXT: 2 2 1.00 bextrq $16384, %rbx, %rcx
+# CHECK-NEXT: 3 7 1.00 * bextrq $16384, (%rbx), %rcx
+# CHECK-NEXT: 1 1 0.33 blcfilll %eax, %ecx
+# CHECK-NEXT: 2 6 0.50 * blcfilll (%rax), %ecx
+# CHECK-NEXT: 1 1 0.33 blcfillq %rax, %rcx
+# CHECK-NEXT: 2 6 0.50 * blcfillq (%rax), %rcx
+# CHECK-NEXT: 1 1 0.33 blcil %eax, %ecx
+# CHECK-NEXT: 2 6 0.50 * blcil (%rax), %ecx
+# CHECK-NEXT: 1 1 0.33 blciq %rax, %rcx
+# CHECK-NEXT: 2 6 0.50 * blciq (%rax), %rcx
+# CHECK-NEXT: 1 1 0.33 blcicl %eax, %ecx
+# CHECK-NEXT: 2 6 0.50 * blcicl (%rax), %ecx
+# CHECK-NEXT: 1 1 0.33 blcicq %rax, %rcx
+# CHECK-NEXT: 2 6 0.50 * blcicq (%rax), %rcx
+# CHECK-NEXT: 1 1 0.33 blcmskl %eax, %ecx
+# CHECK-NEXT: 2 6 0.50 * blcmskl (%rax), %ecx
+# CHECK-NEXT: 1 1 0.33 blcmskq %rax, %rcx
+# CHECK-NEXT: 2 6 0.50 * blcmskq (%rax), %rcx
+# CHECK-NEXT: 1 1 0.33 blcsl %eax, %ecx
+# CHECK-NEXT: 2 6 0.50 * blcsl (%rax), %ecx
+# CHECK-NEXT: 1 1 0.33 blcsq %rax, %rcx
+# CHECK-NEXT: 2 6 0.50 * blcsq (%rax), %rcx
+# CHECK-NEXT: 1 1 0.33 blsfilll %eax, %ecx
+# CHECK-NEXT: 2 6 0.50 * blsfilll (%rax), %ecx
+# CHECK-NEXT: 1 1 0.33 blsfillq %rax, %rcx
+# CHECK-NEXT: 2 6 0.50 * blsfillq (%rax), %rcx
+# CHECK-NEXT: 1 1 0.33 blsicl %eax, %ecx
+# CHECK-NEXT: 2 6 0.50 * blsicl (%rax), %ecx
+# CHECK-NEXT: 1 1 0.33 blsicq %rax, %rcx
+# CHECK-NEXT: 2 6 0.50 * blsicq (%rax), %rcx
+# CHECK-NEXT: 1 1 0.33 t1mskcl %eax, %ecx
+# CHECK-NEXT: 2 6 0.50 * t1mskcl (%rax), %ecx
+# CHECK-NEXT: 1 1 0.33 t1mskcq %rax, %rcx
+# CHECK-NEXT: 2 6 0.50 * t1mskcq (%rax), %rcx
+# CHECK-NEXT: 1 1 0.33 tzmskl %eax, %ecx
+# CHECK-NEXT: 2 6 0.50 * tzmskl (%rax), %ecx
+# CHECK-NEXT: 1 1 0.33 tzmskq %rax, %rcx
+# CHECK-NEXT: 2 6 0.50 * tzmskq (%rax), %rcx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 14.00 16.00 - 14.00 10.00 10.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.50 1.00 - 0.50 - - bextrl $8192, %ebx, %ecx
+# CHECK-NEXT: - - 0.50 1.00 - 0.50 0.50 0.50 bextrl $8192, (%rbx), %ecx
+# CHECK-NEXT: - - 0.50 1.00 - 0.50 - - bextrq $16384, %rbx, %rcx
+# CHECK-NEXT: - - 0.50 1.00 - 0.50 0.50 0.50 bextrq $16384, (%rbx), %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcfilll %eax, %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcfilll (%rax), %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcfillq %rax, %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcfillq (%rax), %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcil %eax, %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcil (%rax), %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blciq %rax, %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blciq (%rax), %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcicl %eax, %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcicl (%rax), %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcicq %rax, %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcicq (%rax), %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcmskl %eax, %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcmskl (%rax), %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcmskq %rax, %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcmskq (%rax), %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcsl %eax, %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcsl (%rax), %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcsq %rax, %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcsq (%rax), %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsfilll %eax, %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsfilll (%rax), %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsfillq %rax, %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsfillq (%rax), %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsicl %eax, %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsicl (%rax), %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsicq %rax, %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsicq (%rax), %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - t1mskcl %eax, %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 t1mskcl (%rax), %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - t1mskcq %rax, %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 t1mskcq (%rax), %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - tzmskl %eax, %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 tzmskl (%rax), %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - tzmskq %rax, %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 tzmskq (%rax), %rcx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=i686-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+aaa
+
+aad
+aad $7
+
+aam
+aam $7
+
+aas
+
+bound %bx, (%eax)
+bound %ebx, (%eax)
+
+daa
+
+das
+
+into
+
+leave
+
+salc
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 100 0.33 aaa
+# CHECK-NEXT: 1 100 0.33 aad
+# CHECK-NEXT: 1 100 0.33 aad $7
+# CHECK-NEXT: 1 100 0.33 aam
+# CHECK-NEXT: 1 100 0.33 aam $7
+# CHECK-NEXT: 1 100 0.33 aas
+# CHECK-NEXT: 1 100 0.33 U bound %bx, (%eax)
+# CHECK-NEXT: 1 100 0.33 U bound %ebx, (%eax)
+# CHECK-NEXT: 1 100 0.33 daa
+# CHECK-NEXT: 1 100 0.33 das
+# CHECK-NEXT: 1 100 0.33 U into
+# CHECK-NEXT: 3 7 0.67 * leave
+# CHECK-NEXT: 1 1 0.33 U salc
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 4.67 4.67 - 4.67 0.50 0.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aaa
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aad
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aad $7
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aam
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aam $7
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aas
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - bound %bx, (%eax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - bound %ebx, (%eax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - daa
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - das
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - into
+# CHECK-NEXT: - - 0.67 0.67 - 0.67 0.50 0.50 leave
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - salc
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+adcb $7, %al
+adcb $7, %dil
+adcb $7, (%rax)
+adcb %sil, %dil
+adcb %sil, (%rax)
+adcb (%rax), %dil
+
+adcw $511, %ax
+adcw $511, %di
+adcw $511, (%rax)
+adcw $7, %di
+adcw $7, (%rax)
+adcw %si, %di
+adcw %si, (%rax)
+adcw (%rax), %di
+
+adcl $665536, %eax
+adcl $665536, %edi
+adcl $665536, (%rax)
+adcl $7, %edi
+adcl $7, (%rax)
+adcl %esi, %edi
+adcl %esi, (%rax)
+adcl (%rax), %edi
+
+adcq $665536, %rax
+adcq $665536, %rdi
+adcq $665536, (%rax)
+adcq $7, %rdi
+adcq $7, (%rax)
+adcq %rsi, %rdi
+adcq %rsi, (%rax)
+adcq (%rax), %rdi
+
+addb $7, %al
+addb $7, %dil
+addb $7, (%rax)
+addb %sil, %dil
+addb %sil, (%rax)
+addb (%rax), %dil
+
+addw $511, %ax
+addw $511, %di
+addw $511, (%rax)
+addw $7, %di
+addw $7, (%rax)
+addw %si, %di
+addw %si, (%rax)
+addw (%rax), %di
+
+addl $665536, %eax
+addl $665536, %edi
+addl $665536, (%rax)
+addl $7, %edi
+addl $7, (%rax)
+addl %esi, %edi
+addl %esi, (%rax)
+addl (%rax), %edi
+
+addq $665536, %rax
+addq $665536, %rdi
+addq $665536, (%rax)
+addq $7, %rdi
+addq $7, (%rax)
+addq %rsi, %rdi
+addq %rsi, (%rax)
+addq (%rax), %rdi
+
+andb $7, %al
+andb $7, %dil
+andb $7, (%rax)
+andb %sil, %dil
+andb %sil, (%rax)
+andb (%rax), %dil
+
+andw $511, %ax
+andw $511, %di
+andw $511, (%rax)
+andw $7, %di
+andw $7, (%rax)
+andw %si, %di
+andw %si, (%rax)
+andw (%rax), %di
+
+andl $665536, %eax
+andl $665536, %edi
+andl $665536, (%rax)
+andl $7, %edi
+andl $7, (%rax)
+andl %esi, %edi
+andl %esi, (%rax)
+andl (%rax), %edi
+
+andq $665536, %rax
+andq $665536, %rdi
+andq $665536, (%rax)
+andq $7, %rdi
+andq $7, (%rax)
+andq %rsi, %rdi
+andq %rsi, (%rax)
+andq (%rax), %rdi
+
+bsfw %si, %di
+bsrw %si, %di
+bsfw (%rax), %di
+bsrw (%rax), %di
+
+bsfl %esi, %edi
+bsrl %esi, %edi
+bsfl (%rax), %edi
+bsrl (%rax), %edi
+
+bsfq %rsi, %rdi
+bsrq %rsi, %rdi
+bsfq (%rax), %rdi
+bsrq (%rax), %rdi
+
+bswap %eax
+bswap %rax
+
+btw %si, %di
+btcw %si, %di
+btrw %si, %di
+btsw %si, %di
+btw %si, (%rax)
+btcw %si, (%rax)
+btrw %si, (%rax)
+btsw %si, (%rax)
+btw $7, %di
+btcw $7, %di
+btrw $7, %di
+btsw $7, %di
+btw $7, (%rax)
+btcw $7, (%rax)
+btrw $7, (%rax)
+btsw $7, (%rax)
+
+btl %esi, %edi
+btcl %esi, %edi
+btrl %esi, %edi
+btsl %esi, %edi
+btl %esi, (%rax)
+btcl %esi, (%rax)
+btrl %esi, (%rax)
+btsl %esi, (%rax)
+btl $7, %edi
+btcl $7, %edi
+btrl $7, %edi
+btsl $7, %edi
+btl $7, (%rax)
+btcl $7, (%rax)
+btrl $7, (%rax)
+btsl $7, (%rax)
+
+btq %rsi, %rdi
+btcq %rsi, %rdi
+btrq %rsi, %rdi
+btsq %rsi, %rdi
+btq %rsi, (%rax)
+btcq %rsi, (%rax)
+btrq %rsi, (%rax)
+btsq %rsi, (%rax)
+btq $7, %rdi
+btcq $7, %rdi
+btrq $7, %rdi
+btsq $7, %rdi
+btq $7, (%rax)
+btcq $7, (%rax)
+btrq $7, (%rax)
+btsq $7, (%rax)
+
+cbw
+cwde
+cdqe
+cwd
+cdq
+cqo
+
+clc
+cld
+cmc
+
+cmpb $7, %al
+cmpb $7, %dil
+cmpb $7, (%rax)
+cmpb %sil, %dil
+cmpb %sil, (%rax)
+cmpb (%rax), %dil
+
+cmpw $511, %ax
+cmpw $511, %di
+cmpw $511, (%rax)
+cmpw $7, %di
+cmpw $7, (%rax)
+cmpw %si, %di
+cmpw %si, (%rax)
+cmpw (%rax), %di
+
+cmpl $665536, %eax
+cmpl $665536, %edi
+cmpl $665536, (%rax)
+cmpl $7, %edi
+cmpl $7, (%rax)
+cmpl %esi, %edi
+cmpl %esi, (%rax)
+cmpl (%rax), %edi
+
+cmpq $665536, %rax
+cmpq $665536, %rdi
+cmpq $665536, (%rax)
+cmpq $7, %rdi
+cmpq $7, (%rax)
+cmpq %rsi, %rdi
+cmpq %rsi, (%rax)
+cmpq (%rax), %rdi
+
+cmpsb
+cmpsw
+cmpsl
+cmpsq
+
+cmpxchgb %cl, %bl
+cmpxchgb %cl, (%rbx)
+
+cmpxchgw %cx, %bx
+cmpxchgw %cx, (%rbx)
+
+cmpxchgl %ecx, %ebx
+cmpxchgl %ecx, (%rbx)
+
+cmpxchgq %rcx, %rbx
+cmpxchgq %rcx, (%rbx)
+
+cpuid
+
+decb %dil
+decb (%rax)
+decw %di
+decw (%rax)
+decl %edi
+decl (%rax)
+decq %rdi
+decq (%rax)
+
+divb %dil
+divb (%rax)
+divw %si
+divw (%rax)
+divl %edx
+divl (%rax)
+divq %rcx
+divq (%rax)
+
+idivb %dil
+idivb (%rax)
+idivw %si
+idivw (%rax)
+idivl %edx
+idivl (%rax)
+idivq %rcx
+idivq (%rax)
+
+imulb %dil
+imulb (%rax)
+
+imulw %di
+imulw (%rax)
+imulw %si, %di
+imulw (%rax), %di
+imulw $511, %si, %di
+imulw $511, (%rax), %di
+imulw $7, %si, %di
+imulw $7, (%rax), %di
+
+imull %edi
+imull (%rax)
+imull %esi, %edi
+imull (%rax), %edi
+imull $665536, %esi, %edi
+imull $665536, (%rax), %edi
+imull $7, %esi, %edi
+imull $7, (%rax), %edi
+
+imulq %rdi
+imulq (%rax)
+imulq %rsi, %rdi
+imulq (%rax), %rdi
+imulq $665536, %rsi, %rdi
+imulq $665536, (%rax), %rdi
+imulq $7, %rsi, %rdi
+imulq $7, (%rax), %rdi
+
+inb $7, %al
+inb %dx, %al
+inw $7, %ax
+inw %dx, %ax
+inl $7, %eax
+inl %dx, %eax
+
+incb %dil
+incb (%rax)
+incw %di
+incw (%rax)
+incl %edi
+incl (%rax)
+incq %rdi
+incq (%rax)
+
+insb
+insw
+insl
+
+int $7
+
+lahf
+
+lodsb
+lodsw
+lodsl
+lodsq
+
+movsb
+movsw
+movsl
+movsq
+
+movsbw %al, %di
+movzbw %al, %di
+movsbw (%rax), %di
+movzbw (%rax), %di
+movsbl %al, %edi
+movzbl %al, %edi
+movsbl (%rax), %edi
+movzbl (%rax), %edi
+movsbq %al, %rdi
+movzbq %al, %rdi
+movsbq (%rax), %rdi
+movzbq (%rax), %rdi
+
+movswl %ax, %edi
+movzwl %ax, %edi
+movswl (%rax), %edi
+movzwl (%rax), %edi
+movswq %ax, %rdi
+movzwq %ax, %rdi
+movswq (%rax), %rdi
+movzwq (%rax), %rdi
+
+movslq %eax, %rdi
+movslq (%rax), %rdi
+
+mulb %dil
+mulb (%rax)
+mulw %si
+mulw (%rax)
+mull %edx
+mull (%rax)
+mulq %rcx
+mulq (%rax)
+
+negb %dil
+negb (%r8)
+negw %si
+negw (%r9)
+negl %edx
+negl (%rax)
+negq %rcx
+negq (%r10)
+
+nop
+nopw %di
+nopw (%rcx)
+nopl %esi
+nopl (%r8)
+nopq %rdx
+nopq (%r9)
+
+notb %dil
+notb (%r8)
+notw %si
+notw (%r9)
+notl %edx
+notl (%rax)
+notq %rcx
+notq (%r10)
+
+orb $7, %al
+orb $7, %dil
+orb $7, (%rax)
+orb %sil, %dil
+orb %sil, (%rax)
+orb (%rax), %dil
+
+orw $511, %ax
+orw $511, %di
+orw $511, (%rax)
+orw $7, %di
+orw $7, (%rax)
+orw %si, %di
+orw %si, (%rax)
+orw (%rax), %di
+
+orl $665536, %eax
+orl $665536, %edi
+orl $665536, (%rax)
+orl $7, %edi
+orl $7, (%rax)
+orl %esi, %edi
+orl %esi, (%rax)
+orl (%rax), %edi
+
+orq $665536, %rax
+orq $665536, %rdi
+orq $665536, (%rax)
+orq $7, %rdi
+orq $7, (%rax)
+orq %rsi, %rdi
+orq %rsi, (%rax)
+orq (%rax), %rdi
+
+outb %al, $7
+outb %al, %dx
+outw %ax, $7
+outw %ax, %dx
+outl %eax, $7
+outl %eax, %dx
+
+outsb
+outsw
+outsl
+
+pause
+
+rclb %dil
+rcrb %dil
+rclb (%rax)
+rcrb (%rax)
+rclb $7, %dil
+rcrb $7, %dil
+rclb $7, (%rax)
+rcrb $7, (%rax)
+rclb %cl, %dil
+rcrb %cl, %dil
+rclb %cl, (%rax)
+rcrb %cl, (%rax)
+
+rclw %di
+rcrw %di
+rclw (%rax)
+rcrw (%rax)
+rclw $7, %di
+rcrw $7, %di
+rclw $7, (%rax)
+rcrw $7, (%rax)
+rclw %cl, %di
+rcrw %cl, %di
+rclw %cl, (%rax)
+rcrw %cl, (%rax)
+
+rcll %edi
+rcrl %edi
+rcll (%rax)
+rcrl (%rax)
+rcll $7, %edi
+rcrl $7, %edi
+rcll $7, (%rax)
+rcrl $7, (%rax)
+rcll %cl, %edi
+rcrl %cl, %edi
+rcll %cl, (%rax)
+rcrl %cl, (%rax)
+
+rclq %rdi
+rcrq %rdi
+rclq (%rax)
+rcrq (%rax)
+rclq $7, %rdi
+rcrq $7, %rdi
+rclq $7, (%rax)
+rcrq $7, (%rax)
+rclq %cl, %rdi
+rcrq %cl, %rdi
+rclq %cl, (%rax)
+rcrq %cl, (%rax)
+
+rolb %dil
+rorb %dil
+rolb (%rax)
+rorb (%rax)
+rolb $7, %dil
+rorb $7, %dil
+rolb $7, (%rax)
+rorb $7, (%rax)
+rolb %cl, %dil
+rorb %cl, %dil
+rolb %cl, (%rax)
+rorb %cl, (%rax)
+
+rolw %di
+rorw %di
+rolw (%rax)
+rorw (%rax)
+rolw $7, %di
+rorw $7, %di
+rolw $7, (%rax)
+rorw $7, (%rax)
+rolw %cl, %di
+rorw %cl, %di
+rolw %cl, (%rax)
+rorw %cl, (%rax)
+
+roll %edi
+rorl %edi
+roll (%rax)
+rorl (%rax)
+roll $7, %edi
+rorl $7, %edi
+roll $7, (%rax)
+rorl $7, (%rax)
+roll %cl, %edi
+rorl %cl, %edi
+roll %cl, (%rax)
+rorl %cl, (%rax)
+
+rolq %rdi
+rorq %rdi
+rolq (%rax)
+rorq (%rax)
+rolq $7, %rdi
+rorq $7, %rdi
+rolq $7, (%rax)
+rorq $7, (%rax)
+rolq %cl, %rdi
+rorq %cl, %rdi
+rolq %cl, (%rax)
+rorq %cl, (%rax)
+
+sahf
+
+sarb %dil
+shlb %dil
+shrb %dil
+sarb (%rax)
+shlb (%rax)
+shrb (%rax)
+sarb $7, %dil
+shlb $7, %dil
+shrb $7, %dil
+sarb $7, (%rax)
+shlb $7, (%rax)
+shrb $7, (%rax)
+sarb %cl, %dil
+shlb %cl, %dil
+shrb %cl, %dil
+sarb %cl, (%rax)
+shlb %cl, (%rax)
+shrb %cl, (%rax)
+
+sarw %di
+shlw %di
+shrw %di
+sarw (%rax)
+shlw (%rax)
+shrw (%rax)
+sarw $7, %di
+shlw $7, %di
+shrw $7, %di
+sarw $7, (%rax)
+shlw $7, (%rax)
+shrw $7, (%rax)
+sarw %cl, %di
+shlw %cl, %di
+shrw %cl, %di
+sarw %cl, (%rax)
+shlw %cl, (%rax)
+shrw %cl, (%rax)
+
+sarl %edi
+shll %edi
+shrl %edi
+sarl (%rax)
+shll (%rax)
+shrl (%rax)
+sarl $7, %edi
+shll $7, %edi
+shrl $7, %edi
+sarl $7, (%rax)
+shll $7, (%rax)
+shrl $7, (%rax)
+sarl %cl, %edi
+shll %cl, %edi
+shrl %cl, %edi
+sarl %cl, (%rax)
+shll %cl, (%rax)
+shrl %cl, (%rax)
+
+sarq %rdi
+shlq %rdi
+shrq %rdi
+sarq (%rax)
+shlq (%rax)
+shrq (%rax)
+sarq $7, %rdi
+shlq $7, %rdi
+shrq $7, %rdi
+sarq $7, (%rax)
+shlq $7, (%rax)
+shrq $7, (%rax)
+sarq %cl, %rdi
+shlq %cl, %rdi
+shrq %cl, %rdi
+sarq %cl, (%rax)
+shlq %cl, (%rax)
+shrq %cl, (%rax)
+
+sbbb $7, %al
+sbbb $7, %dil
+sbbb $7, (%rax)
+sbbb %sil, %dil
+sbbb %sil, (%rax)
+sbbb (%rax), %dil
+
+sbbw $511, %ax
+sbbw $511, %di
+sbbw $511, (%rax)
+sbbw $7, %di
+sbbw $7, (%rax)
+sbbw %si, %di
+sbbw %si, (%rax)
+sbbw (%rax), %di
+
+sbbl $665536, %eax
+sbbl $665536, %edi
+sbbl $665536, (%rax)
+sbbl $7, %edi
+sbbl $7, (%rax)
+sbbl %esi, %edi
+sbbl %esi, (%rax)
+sbbl (%rax), %edi
+
+sbbq $665536, %rax
+sbbq $665536, %rdi
+sbbq $665536, (%rax)
+sbbq $7, %rdi
+sbbq $7, (%rax)
+sbbq %rsi, %rdi
+sbbq %rsi, (%rax)
+sbbq (%rax), %rdi
+
+scasb
+scasw
+scasl
+scasq
+
+seto %al
+seto (%rax)
+setno %al
+setno (%rax)
+setb %al
+setb (%rax)
+setnb %al
+setnb (%rax)
+setz %al
+setz (%rax)
+setnz %al
+setnz (%rax)
+seta %al
+seta (%rax)
+setna %al
+setna (%rax)
+sets %al
+sets (%rax)
+setns %al
+setns (%rax)
+setp %al
+setp (%rax)
+setnp %al
+setnp (%rax)
+setl %al
+setl (%rax)
+setnl %al
+setnl (%rax)
+setg %al
+setg (%rax)
+setng %al
+setng (%rax)
+
+shldw %cl, %si, %di
+shrdw %cl, %si, %di
+shldw %cl, %si, (%rax)
+shrdw %cl, %si, (%rax)
+shldw $7, %si, %di
+shrdw $7, %si, %di
+shldw $7, %si, (%rax)
+shrdw $7, %si, (%rax)
+
+shldl %cl, %esi, %edi
+shrdl %cl, %esi, %edi
+shldl %cl, %esi, (%rax)
+shrdl %cl, %esi, (%rax)
+shldl $7, %esi, %edi
+shrdl $7, %esi, %edi
+shldl $7, %esi, (%rax)
+shrdl $7, %esi, (%rax)
+
+shldq %cl, %rsi, %rdi
+shrdq %cl, %rsi, %rdi
+shldq %cl, %rsi, (%rax)
+shrdq %cl, %rsi, (%rax)
+shldq $7, %rsi, %rdi
+shrdq $7, %rsi, %rdi
+shldq $7, %rsi, (%rax)
+shrdq $7, %rsi, (%rax)
+
+stc
+std
+
+stosb
+stosw
+stosl
+stosq
+
+subb $7, %al
+subb $7, %dil
+subb $7, (%rax)
+subb %sil, %dil
+subb %sil, (%rax)
+subb (%rax), %dil
+
+subw $511, %ax
+subw $511, %di
+subw $511, (%rax)
+subw $7, %di
+subw $7, (%rax)
+subw %si, %di
+subw %si, (%rax)
+subw (%rax), %di
+
+subl $665536, %eax
+subl $665536, %edi
+subl $665536, (%rax)
+subl $7, %edi
+subl $7, (%rax)
+subl %esi, %edi
+subl %esi, (%rax)
+subl (%rax), %edi
+
+subq $665536, %rax
+subq $665536, %rdi
+subq $665536, (%rax)
+subq $7, %rdi
+subq $7, (%rax)
+subq %rsi, %rdi
+subq %rsi, (%rax)
+subq (%rax), %rdi
+
+testb $7, %al
+testb $7, %dil
+testb $7, (%rax)
+testb %sil, %dil
+testb %sil, (%rax)
+
+testw $511, %ax
+testw $511, %di
+testw $511, (%rax)
+testw $7, %di
+testw $7, (%rax)
+testw %si, %di
+testw %si, (%rax)
+
+testl $665536, %eax
+testl $665536, %edi
+testl $665536, (%rax)
+testl $7, %edi
+testl $7, (%rax)
+testl %esi, %edi
+testl %esi, (%rax)
+
+testq $665536, %rax
+testq $665536, %rdi
+testq $665536, (%rax)
+testq $7, %rdi
+testq $7, (%rax)
+testq %rsi, %rdi
+testq %rsi, (%rax)
+
+ud2
+
+xaddb %bl, %cl
+xaddb %bl, (%rcx)
+
+xaddw %bx, %cx
+xaddw %ax, (%rbx)
+
+xaddl %ebx, %ecx
+xaddl %eax, (%rbx)
+
+xaddq %rbx, %rcx
+xaddq %rax, (%rbx)
+
+xchgb %bl, %cl
+xchgb %bl, (%rbx)
+
+xchgw %ax, %bx
+xchgw %bx, %cx
+xchgw %ax, (%rbx)
+
+xchgl %eax, %ebx
+xchgl %ebx, %ecx
+xchgl %eax, (%rbx)
+
+xchgq %rax, %rbx
+xchgq %rbx, %rcx
+xchgq %rax, (%rbx)
+
+xlatb
+
+xorb $7, %al
+xorb $7, %dil
+xorb $7, (%rax)
+xorb %sil, %dil
+xorb %sil, (%rax)
+xorb (%rax), %dil
+
+xorw $511, %ax
+xorw $511, %di
+xorw $511, (%rax)
+xorw $7, %di
+xorw $7, (%rax)
+xorw %si, %di
+xorw %si, (%rax)
+xorw (%rax), %di
+
+xorl $665536, %eax
+xorl $665536, %edi
+xorl $665536, (%rax)
+xorl $7, %edi
+xorl $7, (%rax)
+xorl %esi, %edi
+xorl %esi, (%rax)
+xorl (%rax), %edi
+
+xorq $665536, %rax
+xorq $665536, %rdi
+xorq $665536, (%rax)
+xorq $7, %rdi
+xorq $7, (%rax)
+xorq %rsi, %rdi
+xorq %rsi, (%rax)
+xorq (%rax), %rdi
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 2 0.67 adcb $7, %al
+# CHECK-NEXT: 2 2 0.67 adcb $7, %dil
+# CHECK-NEXT: 6 9 1.00 * * adcb $7, (%rax)
+# CHECK-NEXT: 2 2 0.67 adcb %sil, %dil
+# CHECK-NEXT: 6 9 1.00 * * adcb %sil, (%rax)
+# CHECK-NEXT: 3 7 0.67 * adcb (%rax), %dil
+# CHECK-NEXT: 2 2 0.67 adcw $511, %ax
+# CHECK-NEXT: 2 2 0.67 adcw $511, %di
+# CHECK-NEXT: 6 9 1.00 * * adcw $511, (%rax)
+# CHECK-NEXT: 2 2 0.67 adcw $7, %di
+# CHECK-NEXT: 6 9 1.00 * * adcw $7, (%rax)
+# CHECK-NEXT: 2 2 0.67 adcw %si, %di
+# CHECK-NEXT: 6 9 1.00 * * adcw %si, (%rax)
+# CHECK-NEXT: 3 7 0.67 * adcw (%rax), %di
+# CHECK-NEXT: 2 2 0.67 adcl $665536, %eax
+# CHECK-NEXT: 2 2 0.67 adcl $665536, %edi
+# CHECK-NEXT: 6 9 1.00 * * adcl $665536, (%rax)
+# CHECK-NEXT: 2 2 0.67 adcl $7, %edi
+# CHECK-NEXT: 6 9 1.00 * * adcl $7, (%rax)
+# CHECK-NEXT: 2 2 0.67 adcl %esi, %edi
+# CHECK-NEXT: 6 9 1.00 * * adcl %esi, (%rax)
+# CHECK-NEXT: 3 7 0.67 * adcl (%rax), %edi
+# CHECK-NEXT: 2 2 0.67 adcq $665536, %rax
+# CHECK-NEXT: 2 2 0.67 adcq $665536, %rdi
+# CHECK-NEXT: 6 9 1.00 * * adcq $665536, (%rax)
+# CHECK-NEXT: 2 2 0.67 adcq $7, %rdi
+# CHECK-NEXT: 6 9 1.00 * * adcq $7, (%rax)
+# CHECK-NEXT: 2 2 0.67 adcq %rsi, %rdi
+# CHECK-NEXT: 6 9 1.00 * * adcq %rsi, (%rax)
+# CHECK-NEXT: 3 7 0.67 * adcq (%rax), %rdi
+# CHECK-NEXT: 1 1 0.33 addb $7, %al
+# CHECK-NEXT: 1 1 0.33 addb $7, %dil
+# CHECK-NEXT: 3 7 1.00 * * addb $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 addb %sil, %dil
+# CHECK-NEXT: 3 7 1.00 * * addb %sil, (%rax)
+# CHECK-NEXT: 2 6 0.50 * addb (%rax), %dil
+# CHECK-NEXT: 1 1 0.33 addw $511, %ax
+# CHECK-NEXT: 1 1 0.33 addw $511, %di
+# CHECK-NEXT: 3 7 1.00 * * addw $511, (%rax)
+# CHECK-NEXT: 1 1 0.33 addw $7, %di
+# CHECK-NEXT: 3 7 1.00 * * addw $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 addw %si, %di
+# CHECK-NEXT: 3 7 1.00 * * addw %si, (%rax)
+# CHECK-NEXT: 2 6 0.50 * addw (%rax), %di
+# CHECK-NEXT: 1 1 0.33 addl $665536, %eax
+# CHECK-NEXT: 1 1 0.33 addl $665536, %edi
+# CHECK-NEXT: 3 7 1.00 * * addl $665536, (%rax)
+# CHECK-NEXT: 1 1 0.33 addl $7, %edi
+# CHECK-NEXT: 3 7 1.00 * * addl $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 addl %esi, %edi
+# CHECK-NEXT: 3 7 1.00 * * addl %esi, (%rax)
+# CHECK-NEXT: 2 6 0.50 * addl (%rax), %edi
+# CHECK-NEXT: 1 1 0.33 addq $665536, %rax
+# CHECK-NEXT: 1 1 0.33 addq $665536, %rdi
+# CHECK-NEXT: 3 7 1.00 * * addq $665536, (%rax)
+# CHECK-NEXT: 1 1 0.33 addq $7, %rdi
+# CHECK-NEXT: 3 7 1.00 * * addq $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 addq %rsi, %rdi
+# CHECK-NEXT: 3 7 1.00 * * addq %rsi, (%rax)
+# CHECK-NEXT: 2 6 0.50 * addq (%rax), %rdi
+# CHECK-NEXT: 1 1 0.33 andb $7, %al
+# CHECK-NEXT: 1 1 0.33 andb $7, %dil
+# CHECK-NEXT: 3 7 1.00 * * andb $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 andb %sil, %dil
+# CHECK-NEXT: 3 7 1.00 * * andb %sil, (%rax)
+# CHECK-NEXT: 2 6 0.50 * andb (%rax), %dil
+# CHECK-NEXT: 1 1 0.33 andw $511, %ax
+# CHECK-NEXT: 1 1 0.33 andw $511, %di
+# CHECK-NEXT: 3 7 1.00 * * andw $511, (%rax)
+# CHECK-NEXT: 1 1 0.33 andw $7, %di
+# CHECK-NEXT: 3 7 1.00 * * andw $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 andw %si, %di
+# CHECK-NEXT: 3 7 1.00 * * andw %si, (%rax)
+# CHECK-NEXT: 2 6 0.50 * andw (%rax), %di
+# CHECK-NEXT: 1 1 0.33 andl $665536, %eax
+# CHECK-NEXT: 1 1 0.33 andl $665536, %edi
+# CHECK-NEXT: 3 7 1.00 * * andl $665536, (%rax)
+# CHECK-NEXT: 1 1 0.33 andl $7, %edi
+# CHECK-NEXT: 3 7 1.00 * * andl $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 andl %esi, %edi
+# CHECK-NEXT: 3 7 1.00 * * andl %esi, (%rax)
+# CHECK-NEXT: 2 6 0.50 * andl (%rax), %edi
+# CHECK-NEXT: 1 1 0.33 andq $665536, %rax
+# CHECK-NEXT: 1 1 0.33 andq $665536, %rdi
+# CHECK-NEXT: 3 7 1.00 * * andq $665536, (%rax)
+# CHECK-NEXT: 1 1 0.33 andq $7, %rdi
+# CHECK-NEXT: 3 7 1.00 * * andq $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 andq %rsi, %rdi
+# CHECK-NEXT: 3 7 1.00 * * andq %rsi, (%rax)
+# CHECK-NEXT: 2 6 0.50 * andq (%rax), %rdi
+# CHECK-NEXT: 1 3 1.00 bsfw %si, %di
+# CHECK-NEXT: 1 3 1.00 bsrw %si, %di
+# CHECK-NEXT: 2 8 1.00 * bsfw (%rax), %di
+# CHECK-NEXT: 2 8 1.00 * bsrw (%rax), %di
+# CHECK-NEXT: 1 3 1.00 bsfl %esi, %edi
+# CHECK-NEXT: 1 3 1.00 bsrl %esi, %edi
+# CHECK-NEXT: 2 8 1.00 * bsfl (%rax), %edi
+# CHECK-NEXT: 2 8 1.00 * bsrl (%rax), %edi
+# CHECK-NEXT: 1 3 1.00 bsfq %rsi, %rdi
+# CHECK-NEXT: 1 3 1.00 bsrq %rsi, %rdi
+# CHECK-NEXT: 2 8 1.00 * bsfq (%rax), %rdi
+# CHECK-NEXT: 2 8 1.00 * bsrq (%rax), %rdi
+# CHECK-NEXT: 1 1 1.00 bswapl %eax
+# CHECK-NEXT: 2 2 1.00 bswapq %rax
+# CHECK-NEXT: 1 1 0.50 btw %si, %di
+# CHECK-NEXT: 1 1 0.50 btcw %si, %di
+# CHECK-NEXT: 1 1 0.50 btrw %si, %di
+# CHECK-NEXT: 1 1 0.50 btsw %si, %di
+# CHECK-NEXT: 6 9 1.00 * btw %si, (%rax)
+# CHECK-NEXT: 6 9 1.00 * * btcw %si, (%rax)
+# CHECK-NEXT: 6 9 1.00 * * btrw %si, (%rax)
+# CHECK-NEXT: 6 9 1.00 * * btsw %si, (%rax)
+# CHECK-NEXT: 1 1 0.50 btw $7, %di
+# CHECK-NEXT: 1 1 0.50 btcw $7, %di
+# CHECK-NEXT: 1 1 0.50 btrw $7, %di
+# CHECK-NEXT: 1 1 0.50 btsw $7, %di
+# CHECK-NEXT: 2 6 0.50 * btw $7, (%rax)
+# CHECK-NEXT: 4 7 1.00 * * btcw $7, (%rax)
+# CHECK-NEXT: 4 7 1.00 * * btrw $7, (%rax)
+# CHECK-NEXT: 4 7 1.00 * * btsw $7, (%rax)
+# CHECK-NEXT: 1 1 0.50 btl %esi, %edi
+# CHECK-NEXT: 1 1 0.50 btcl %esi, %edi
+# CHECK-NEXT: 1 1 0.50 btrl %esi, %edi
+# CHECK-NEXT: 1 1 0.50 btsl %esi, %edi
+# CHECK-NEXT: 6 9 1.00 * btl %esi, (%rax)
+# CHECK-NEXT: 6 9 1.00 * * btcl %esi, (%rax)
+# CHECK-NEXT: 6 9 1.00 * * btrl %esi, (%rax)
+# CHECK-NEXT: 6 9 1.00 * * btsl %esi, (%rax)
+# CHECK-NEXT: 1 1 0.50 btl $7, %edi
+# CHECK-NEXT: 1 1 0.50 btcl $7, %edi
+# CHECK-NEXT: 1 1 0.50 btrl $7, %edi
+# CHECK-NEXT: 1 1 0.50 btsl $7, %edi
+# CHECK-NEXT: 2 6 0.50 * btl $7, (%rax)
+# CHECK-NEXT: 4 7 1.00 * * btcl $7, (%rax)
+# CHECK-NEXT: 4 7 1.00 * * btrl $7, (%rax)
+# CHECK-NEXT: 4 7 1.00 * * btsl $7, (%rax)
+# CHECK-NEXT: 1 1 0.50 btq %rsi, %rdi
+# CHECK-NEXT: 1 1 0.50 btcq %rsi, %rdi
+# CHECK-NEXT: 1 1 0.50 btrq %rsi, %rdi
+# CHECK-NEXT: 1 1 0.50 btsq %rsi, %rdi
+# CHECK-NEXT: 6 9 1.00 * btq %rsi, (%rax)
+# CHECK-NEXT: 6 9 1.00 * * btcq %rsi, (%rax)
+# CHECK-NEXT: 6 9 1.00 * * btrq %rsi, (%rax)
+# CHECK-NEXT: 6 9 1.00 * * btsq %rsi, (%rax)
+# CHECK-NEXT: 1 1 0.50 btq $7, %rdi
+# CHECK-NEXT: 1 1 0.50 btcq $7, %rdi
+# CHECK-NEXT: 1 1 0.50 btrq $7, %rdi
+# CHECK-NEXT: 1 1 0.50 btsq $7, %rdi
+# CHECK-NEXT: 2 6 0.50 * btq $7, (%rax)
+# CHECK-NEXT: 4 7 1.00 * * btcq $7, (%rax)
+# CHECK-NEXT: 4 7 1.00 * * btrq $7, (%rax)
+# CHECK-NEXT: 4 7 1.00 * * btsq $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 cbtw
+# CHECK-NEXT: 1 1 0.33 cwtl
+# CHECK-NEXT: 1 1 0.33 cltq
+# CHECK-NEXT: 2 2 1.00 cwtd
+# CHECK-NEXT: 1 1 0.50 cltd
+# CHECK-NEXT: 1 1 0.50 cqto
+# CHECK-NEXT: 1 1 0.25 U clc
+# CHECK-NEXT: 1 1 0.33 U cld
+# CHECK-NEXT: 1 1 0.33 U cmc
+# CHECK-NEXT: 1 1 0.33 cmpb $7, %al
+# CHECK-NEXT: 1 1 0.33 cmpb $7, %dil
+# CHECK-NEXT: 2 6 0.50 * cmpb $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 cmpb %sil, %dil
+# CHECK-NEXT: 2 6 0.50 * cmpb %sil, (%rax)
+# CHECK-NEXT: 2 6 0.50 * cmpb (%rax), %dil
+# CHECK-NEXT: 1 1 0.33 cmpw $511, %ax
+# CHECK-NEXT: 1 1 0.33 cmpw $511, %di
+# CHECK-NEXT: 2 6 0.50 * cmpw $511, (%rax)
+# CHECK-NEXT: 1 1 0.33 cmpw $7, %di
+# CHECK-NEXT: 2 6 0.50 * cmpw $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 cmpw %si, %di
+# CHECK-NEXT: 2 6 0.50 * cmpw %si, (%rax)
+# CHECK-NEXT: 2 6 0.50 * cmpw (%rax), %di
+# CHECK-NEXT: 1 1 0.33 cmpl $665536, %eax
+# CHECK-NEXT: 1 1 0.33 cmpl $665536, %edi
+# CHECK-NEXT: 2 6 0.50 * cmpl $665536, (%rax)
+# CHECK-NEXT: 1 1 0.33 cmpl $7, %edi
+# CHECK-NEXT: 2 6 0.50 * cmpl $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 cmpl %esi, %edi
+# CHECK-NEXT: 2 6 0.50 * cmpl %esi, (%rax)
+# CHECK-NEXT: 2 6 0.50 * cmpl (%rax), %edi
+# CHECK-NEXT: 1 1 0.33 cmpq $665536, %rax
+# CHECK-NEXT: 1 1 0.33 cmpq $665536, %rdi
+# CHECK-NEXT: 2 6 0.50 * cmpq $665536, (%rax)
+# CHECK-NEXT: 1 1 0.33 cmpq $7, %rdi
+# CHECK-NEXT: 2 6 0.50 * cmpq $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 cmpq %rsi, %rdi
+# CHECK-NEXT: 2 6 0.50 * cmpq %rsi, (%rax)
+# CHECK-NEXT: 2 6 0.50 * cmpq (%rax), %rdi
+# CHECK-NEXT: 5 8 1.00 U cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: 5 8 1.00 U cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: 5 8 1.00 U cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: 5 8 1.00 U cmpsq %es:(%rdi), (%rsi)
+# CHECK-NEXT: 4 5 1.33 cmpxchgb %cl, %bl
+# CHECK-NEXT: 6 8 2.00 * * cmpxchgb %cl, (%rbx)
+# CHECK-NEXT: 4 5 1.33 cmpxchgw %cx, %bx
+# CHECK-NEXT: 6 8 2.00 * * cmpxchgw %cx, (%rbx)
+# CHECK-NEXT: 4 5 1.33 cmpxchgl %ecx, %ebx
+# CHECK-NEXT: 6 8 2.00 * * cmpxchgl %ecx, (%rbx)
+# CHECK-NEXT: 4 5 1.33 cmpxchgq %rcx, %rbx
+# CHECK-NEXT: 6 8 2.00 * * cmpxchgq %rcx, (%rbx)
+# CHECK-NEXT: 1 100 0.33 U cpuid
+# CHECK-NEXT: 1 1 0.33 decb %dil
+# CHECK-NEXT: 3 7 1.00 * * decb (%rax)
+# CHECK-NEXT: 1 1 0.33 decw %di
+# CHECK-NEXT: 3 7 1.00 * * decw (%rax)
+# CHECK-NEXT: 1 1 0.33 decl %edi
+# CHECK-NEXT: 3 7 1.00 * * decl (%rax)
+# CHECK-NEXT: 1 1 0.33 decq %rdi
+# CHECK-NEXT: 3 7 1.00 * * decq (%rax)
+# CHECK-NEXT: 1 25 10.00 U divb %dil
+# CHECK-NEXT: 2 30 10.00 * U divb (%rax)
+# CHECK-NEXT: 1 25 10.00 U divw %si
+# CHECK-NEXT: 2 30 10.00 * U divw (%rax)
+# CHECK-NEXT: 1 25 10.00 U divl %edx
+# CHECK-NEXT: 2 30 10.00 * U divl (%rax)
+# CHECK-NEXT: 1 25 10.00 U divq %rcx
+# CHECK-NEXT: 2 30 10.00 * U divq (%rax)
+# CHECK-NEXT: 1 25 10.00 U idivb %dil
+# CHECK-NEXT: 2 30 10.00 * U idivb (%rax)
+# CHECK-NEXT: 1 25 10.00 U idivw %si
+# CHECK-NEXT: 2 30 10.00 * U idivw (%rax)
+# CHECK-NEXT: 1 25 10.00 U idivl %edx
+# CHECK-NEXT: 2 30 10.00 * U idivl (%rax)
+# CHECK-NEXT: 1 25 10.00 U idivq %rcx
+# CHECK-NEXT: 2 30 10.00 * U idivq (%rax)
+# CHECK-NEXT: 1 3 1.00 imulb %dil
+# CHECK-NEXT: 2 8 1.00 * imulb (%rax)
+# CHECK-NEXT: 4 4 1.33 imulw %di
+# CHECK-NEXT: 5 9 1.33 * imulw (%rax)
+# CHECK-NEXT: 1 3 1.00 imulw %si, %di
+# CHECK-NEXT: 2 8 1.00 * imulw (%rax), %di
+# CHECK-NEXT: 2 4 1.00 imulw $511, %si, %di
+# CHECK-NEXT: 3 8 1.00 * imulw $511, (%rax), %di
+# CHECK-NEXT: 2 4 1.00 imulw $7, %si, %di
+# CHECK-NEXT: 3 8 1.00 * imulw $7, (%rax), %di
+# CHECK-NEXT: 3 4 1.00 imull %edi
+# CHECK-NEXT: 4 9 1.00 * imull (%rax)
+# CHECK-NEXT: 1 3 1.00 imull %esi, %edi
+# CHECK-NEXT: 2 8 1.00 * imull (%rax), %edi
+# CHECK-NEXT: 1 3 1.00 imull $665536, %esi, %edi
+# CHECK-NEXT: 2 8 1.00 * imull $665536, (%rax), %edi
+# CHECK-NEXT: 1 3 1.00 imull $7, %esi, %edi
+# CHECK-NEXT: 2 8 1.00 * imull $7, (%rax), %edi
+# CHECK-NEXT: 2 4 1.00 imulq %rdi
+# CHECK-NEXT: 3 9 1.00 * imulq (%rax)
+# CHECK-NEXT: 1 3 1.00 imulq %rsi, %rdi
+# CHECK-NEXT: 2 8 1.00 * imulq (%rax), %rdi
+# CHECK-NEXT: 1 3 1.00 imulq $665536, %rsi, %rdi
+# CHECK-NEXT: 2 8 1.00 * imulq $665536, (%rax), %rdi
+# CHECK-NEXT: 1 3 1.00 imulq $7, %rsi, %rdi
+# CHECK-NEXT: 2 8 1.00 * imulq $7, (%rax), %rdi
+# CHECK-NEXT: 1 100 0.33 U inb $7, %al
+# CHECK-NEXT: 1 100 0.33 U inb %dx, %al
+# CHECK-NEXT: 1 100 0.33 U inw $7, %ax
+# CHECK-NEXT: 1 100 0.33 U inw %dx, %ax
+# CHECK-NEXT: 1 100 0.33 U inl $7, %eax
+# CHECK-NEXT: 1 100 0.33 U inl %dx, %eax
+# CHECK-NEXT: 1 1 0.33 incb %dil
+# CHECK-NEXT: 3 7 1.00 * * incb (%rax)
+# CHECK-NEXT: 1 1 0.33 incw %di
+# CHECK-NEXT: 3 7 1.00 * * incw (%rax)
+# CHECK-NEXT: 1 1 0.33 incl %edi
+# CHECK-NEXT: 3 7 1.00 * * incl (%rax)
+# CHECK-NEXT: 1 1 0.33 incq %rdi
+# CHECK-NEXT: 3 7 1.00 * * incq (%rax)
+# CHECK-NEXT: 1 100 0.33 U insb %dx, %es:(%rdi)
+# CHECK-NEXT: 1 100 0.33 U insw %dx, %es:(%rdi)
+# CHECK-NEXT: 1 100 0.33 U insl %dx, %es:(%rdi)
+# CHECK-NEXT: 1 100 0.33 * * U int $7
+# CHECK-NEXT: 1 1 0.50 lahf
+# CHECK-NEXT: 3 7 0.67 U lodsb (%rsi), %al
+# CHECK-NEXT: 3 7 0.67 U lodsw (%rsi), %ax
+# CHECK-NEXT: 2 6 0.50 U lodsl (%rsi), %eax
+# CHECK-NEXT: 2 6 0.50 U lodsq (%rsi), %rax
+# CHECK-NEXT: 5 8 1.00 U movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: 5 8 1.00 U movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: 5 8 1.00 U movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: 5 8 1.00 U movsq (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 1 0.33 movsbw %al, %di
+# CHECK-NEXT: 1 1 0.33 movzbw %al, %di
+# CHECK-NEXT: 1 5 0.50 * movsbw (%rax), %di
+# CHECK-NEXT: 1 5 0.50 * movzbw (%rax), %di
+# CHECK-NEXT: 1 1 0.33 movsbl %al, %edi
+# CHECK-NEXT: 1 1 0.33 movzbl %al, %edi
+# CHECK-NEXT: 1 5 0.50 * movsbl (%rax), %edi
+# CHECK-NEXT: 1 5 0.50 * movzbl (%rax), %edi
+# CHECK-NEXT: 1 1 0.33 movsbq %al, %rdi
+# CHECK-NEXT: 1 1 0.33 movzbq %al, %rdi
+# CHECK-NEXT: 1 5 0.50 * movsbq (%rax), %rdi
+# CHECK-NEXT: 1 5 0.50 * movzbq (%rax), %rdi
+# CHECK-NEXT: 1 1 0.33 movswl %ax, %edi
+# CHECK-NEXT: 1 1 0.33 movzwl %ax, %edi
+# CHECK-NEXT: 1 5 0.50 * movswl (%rax), %edi
+# CHECK-NEXT: 1 5 0.50 * movzwl (%rax), %edi
+# CHECK-NEXT: 1 1 0.33 movswq %ax, %rdi
+# CHECK-NEXT: 1 1 0.33 movzwq %ax, %rdi
+# CHECK-NEXT: 1 5 0.50 * movswq (%rax), %rdi
+# CHECK-NEXT: 1 5 0.50 * movzwq (%rax), %rdi
+# CHECK-NEXT: 1 1 0.33 movslq %eax, %rdi
+# CHECK-NEXT: 1 5 0.50 * movslq (%rax), %rdi
+# CHECK-NEXT: 1 3 1.00 mulb %dil
+# CHECK-NEXT: 2 8 1.00 * mulb (%rax)
+# CHECK-NEXT: 4 4 1.33 mulw %si
+# CHECK-NEXT: 5 9 1.33 * mulw (%rax)
+# CHECK-NEXT: 3 4 1.00 mull %edx
+# CHECK-NEXT: 4 9 1.00 * mull (%rax)
+# CHECK-NEXT: 2 4 1.00 mulq %rcx
+# CHECK-NEXT: 3 9 1.00 * mulq (%rax)
+# CHECK-NEXT: 1 1 0.33 negb %dil
+# CHECK-NEXT: 3 7 1.00 * * negb (%r8)
+# CHECK-NEXT: 1 1 0.33 negw %si
+# CHECK-NEXT: 3 7 1.00 * * negw (%r9)
+# CHECK-NEXT: 1 1 0.33 negl %edx
+# CHECK-NEXT: 3 7 1.00 * * negl (%rax)
+# CHECK-NEXT: 1 1 0.33 negq %rcx
+# CHECK-NEXT: 3 7 1.00 * * negq (%r10)
+# CHECK-NEXT: 1 1 0.25 nop
+# CHECK-NEXT: 1 1 0.25 nopw %di
+# CHECK-NEXT: 1 1 0.25 nopw (%rcx)
+# CHECK-NEXT: 1 1 0.25 nopl %esi
+# CHECK-NEXT: 1 1 0.25 nopl (%r8)
+# CHECK-NEXT: 1 1 0.25 nopq %rdx
+# CHECK-NEXT: 1 1 0.25 nopq (%r9)
+# CHECK-NEXT: 1 1 0.33 notb %dil
+# CHECK-NEXT: 3 7 1.00 * * notb (%r8)
+# CHECK-NEXT: 1 1 0.33 notw %si
+# CHECK-NEXT: 3 7 1.00 * * notw (%r9)
+# CHECK-NEXT: 1 1 0.33 notl %edx
+# CHECK-NEXT: 3 7 1.00 * * notl (%rax)
+# CHECK-NEXT: 1 1 0.33 notq %rcx
+# CHECK-NEXT: 3 7 1.00 * * notq (%r10)
+# CHECK-NEXT: 1 1 0.33 orb $7, %al
+# CHECK-NEXT: 1 1 0.33 orb $7, %dil
+# CHECK-NEXT: 3 7 1.00 * * orb $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 orb %sil, %dil
+# CHECK-NEXT: 3 7 1.00 * * orb %sil, (%rax)
+# CHECK-NEXT: 2 6 0.50 * orb (%rax), %dil
+# CHECK-NEXT: 1 1 0.33 orw $511, %ax
+# CHECK-NEXT: 1 1 0.33 orw $511, %di
+# CHECK-NEXT: 3 7 1.00 * * orw $511, (%rax)
+# CHECK-NEXT: 1 1 0.33 orw $7, %di
+# CHECK-NEXT: 3 7 1.00 * * orw $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 orw %si, %di
+# CHECK-NEXT: 3 7 1.00 * * orw %si, (%rax)
+# CHECK-NEXT: 2 6 0.50 * orw (%rax), %di
+# CHECK-NEXT: 1 1 0.33 orl $665536, %eax
+# CHECK-NEXT: 1 1 0.33 orl $665536, %edi
+# CHECK-NEXT: 3 7 1.00 * * orl $665536, (%rax)
+# CHECK-NEXT: 1 1 0.33 orl $7, %edi
+# CHECK-NEXT: 3 7 1.00 * * orl $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 orl %esi, %edi
+# CHECK-NEXT: 3 7 1.00 * * orl %esi, (%rax)
+# CHECK-NEXT: 2 6 0.50 * orl (%rax), %edi
+# CHECK-NEXT: 1 1 0.33 orq $665536, %rax
+# CHECK-NEXT: 1 1 0.33 orq $665536, %rdi
+# CHECK-NEXT: 3 7 1.00 * * orq $665536, (%rax)
+# CHECK-NEXT: 1 1 0.33 orq $7, %rdi
+# CHECK-NEXT: 3 7 1.00 * * orq $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 orq %rsi, %rdi
+# CHECK-NEXT: 3 7 1.00 * * orq %rsi, (%rax)
+# CHECK-NEXT: 2 6 0.50 * orq (%rax), %rdi
+# CHECK-NEXT: 1 100 0.33 U outb %al, $7
+# CHECK-NEXT: 1 100 0.33 U outb %al, %dx
+# CHECK-NEXT: 1 100 0.33 U outw %ax, $7
+# CHECK-NEXT: 1 100 0.33 U outw %ax, %dx
+# CHECK-NEXT: 1 100 0.33 U outl %eax, $7
+# CHECK-NEXT: 1 100 0.33 U outl %eax, %dx
+# CHECK-NEXT: 1 100 0.33 U outsb (%rsi), %dx
+# CHECK-NEXT: 1 100 0.33 U outsw (%rsi), %dx
+# CHECK-NEXT: 1 100 0.33 U outsl (%rsi), %dx
+# CHECK-NEXT: 4 4 1.33 * * U pause
+# CHECK-NEXT: 3 2 1.50 rclb %dil
+# CHECK-NEXT: 3 2 1.50 rcrb %dil
+# CHECK-NEXT: 11 11 3.50 * rclb (%rax)
+# CHECK-NEXT: 11 11 3.50 * rcrb (%rax)
+# CHECK-NEXT: 8 5 4.00 rclb $7, %dil
+# CHECK-NEXT: 8 5 4.00 rcrb $7, %dil
+# CHECK-NEXT: 11 11 3.50 * rclb $7, (%rax)
+# CHECK-NEXT: 11 11 3.50 * rcrb $7, (%rax)
+# CHECK-NEXT: 8 5 4.00 rclb %cl, %dil
+# CHECK-NEXT: 8 5 4.00 rcrb %cl, %dil
+# CHECK-NEXT: 11 11 3.50 * rclb %cl, (%rax)
+# CHECK-NEXT: 11 11 3.50 * rcrb %cl, (%rax)
+# CHECK-NEXT: 3 2 1.50 rclw %di
+# CHECK-NEXT: 3 2 1.50 rcrw %di
+# CHECK-NEXT: 11 11 3.50 * rclw (%rax)
+# CHECK-NEXT: 11 11 3.50 * rcrw (%rax)
+# CHECK-NEXT: 8 5 4.00 rclw $7, %di
+# CHECK-NEXT: 8 5 4.00 rcrw $7, %di
+# CHECK-NEXT: 11 11 3.50 * rclw $7, (%rax)
+# CHECK-NEXT: 11 11 3.50 * rcrw $7, (%rax)
+# CHECK-NEXT: 8 5 4.00 rclw %cl, %di
+# CHECK-NEXT: 8 5 4.00 rcrw %cl, %di
+# CHECK-NEXT: 11 11 3.50 * rclw %cl, (%rax)
+# CHECK-NEXT: 11 11 3.50 * rcrw %cl, (%rax)
+# CHECK-NEXT: 3 2 1.50 rcll %edi
+# CHECK-NEXT: 3 2 1.50 rcrl %edi
+# CHECK-NEXT: 11 11 3.50 * rcll (%rax)
+# CHECK-NEXT: 11 11 3.50 * rcrl (%rax)
+# CHECK-NEXT: 8 5 4.00 rcll $7, %edi
+# CHECK-NEXT: 8 5 4.00 rcrl $7, %edi
+# CHECK-NEXT: 11 11 3.50 * rcll $7, (%rax)
+# CHECK-NEXT: 11 11 3.50 * rcrl $7, (%rax)
+# CHECK-NEXT: 8 5 4.00 rcll %cl, %edi
+# CHECK-NEXT: 8 5 4.00 rcrl %cl, %edi
+# CHECK-NEXT: 11 11 3.50 * rcll %cl, (%rax)
+# CHECK-NEXT: 11 11 3.50 * rcrl %cl, (%rax)
+# CHECK-NEXT: 3 2 1.50 rclq %rdi
+# CHECK-NEXT: 3 2 1.50 rcrq %rdi
+# CHECK-NEXT: 11 11 3.50 * rclq (%rax)
+# CHECK-NEXT: 11 11 3.50 * rcrq (%rax)
+# CHECK-NEXT: 8 5 4.00 rclq $7, %rdi
+# CHECK-NEXT: 8 5 4.00 rcrq $7, %rdi
+# CHECK-NEXT: 11 11 3.50 * rclq $7, (%rax)
+# CHECK-NEXT: 11 11 3.50 * rcrq $7, (%rax)
+# CHECK-NEXT: 8 5 4.00 rclq %cl, %rdi
+# CHECK-NEXT: 8 5 4.00 rcrq %cl, %rdi
+# CHECK-NEXT: 11 11 3.50 * rclq %cl, (%rax)
+# CHECK-NEXT: 11 11 3.50 * rcrq %cl, (%rax)
+# CHECK-NEXT: 2 2 1.00 rolb %dil
+# CHECK-NEXT: 2 2 1.00 rorb %dil
+# CHECK-NEXT: 5 8 1.00 * * rolb (%rax)
+# CHECK-NEXT: 5 8 1.00 * * rorb (%rax)
+# CHECK-NEXT: 2 2 1.00 rolb $7, %dil
+# CHECK-NEXT: 2 2 1.00 rorb $7, %dil
+# CHECK-NEXT: 5 8 1.00 * * rolb $7, (%rax)
+# CHECK-NEXT: 5 8 1.00 * * rorb $7, (%rax)
+# CHECK-NEXT: 3 3 1.50 rolb %cl, %dil
+# CHECK-NEXT: 3 3 1.50 rorb %cl, %dil
+# CHECK-NEXT: 6 9 1.50 * * rolb %cl, (%rax)
+# CHECK-NEXT: 6 9 1.50 * * rorb %cl, (%rax)
+# CHECK-NEXT: 2 2 1.00 rolw %di
+# CHECK-NEXT: 2 2 1.00 rorw %di
+# CHECK-NEXT: 5 8 1.00 * * rolw (%rax)
+# CHECK-NEXT: 5 8 1.00 * * rorw (%rax)
+# CHECK-NEXT: 2 2 1.00 rolw $7, %di
+# CHECK-NEXT: 2 2 1.00 rorw $7, %di
+# CHECK-NEXT: 5 8 1.00 * * rolw $7, (%rax)
+# CHECK-NEXT: 5 8 1.00 * * rorw $7, (%rax)
+# CHECK-NEXT: 3 3 1.50 rolw %cl, %di
+# CHECK-NEXT: 3 3 1.50 rorw %cl, %di
+# CHECK-NEXT: 6 9 1.50 * * rolw %cl, (%rax)
+# CHECK-NEXT: 6 9 1.50 * * rorw %cl, (%rax)
+# CHECK-NEXT: 2 2 1.00 roll %edi
+# CHECK-NEXT: 2 2 1.00 rorl %edi
+# CHECK-NEXT: 5 8 1.00 * * roll (%rax)
+# CHECK-NEXT: 5 8 1.00 * * rorl (%rax)
+# CHECK-NEXT: 2 2 1.00 roll $7, %edi
+# CHECK-NEXT: 2 2 1.00 rorl $7, %edi
+# CHECK-NEXT: 5 8 1.00 * * roll $7, (%rax)
+# CHECK-NEXT: 5 8 1.00 * * rorl $7, (%rax)
+# CHECK-NEXT: 3 3 1.50 roll %cl, %edi
+# CHECK-NEXT: 3 3 1.50 rorl %cl, %edi
+# CHECK-NEXT: 6 9 1.50 * * roll %cl, (%rax)
+# CHECK-NEXT: 6 9 1.50 * * rorl %cl, (%rax)
+# CHECK-NEXT: 2 2 1.00 rolq %rdi
+# CHECK-NEXT: 2 2 1.00 rorq %rdi
+# CHECK-NEXT: 5 8 1.00 * * rolq (%rax)
+# CHECK-NEXT: 5 8 1.00 * * rorq (%rax)
+# CHECK-NEXT: 2 2 1.00 rolq $7, %rdi
+# CHECK-NEXT: 2 2 1.00 rorq $7, %rdi
+# CHECK-NEXT: 5 8 1.00 * * rolq $7, (%rax)
+# CHECK-NEXT: 5 8 1.00 * * rorq $7, (%rax)
+# CHECK-NEXT: 3 3 1.50 rolq %cl, %rdi
+# CHECK-NEXT: 3 3 1.50 rorq %cl, %rdi
+# CHECK-NEXT: 6 9 1.50 * * rolq %cl, (%rax)
+# CHECK-NEXT: 6 9 1.50 * * rorq %cl, (%rax)
+# CHECK-NEXT: 1 1 0.50 sahf
+# CHECK-NEXT: 1 1 0.50 sarb %dil
+# CHECK-NEXT: 1 1 0.50 shlb %dil
+# CHECK-NEXT: 1 1 0.50 shrb %dil
+# CHECK-NEXT: 4 7 1.00 * * sarb (%rax)
+# CHECK-NEXT: 4 7 1.00 * * shlb (%rax)
+# CHECK-NEXT: 4 7 1.00 * * shrb (%rax)
+# CHECK-NEXT: 1 1 0.50 sarb $7, %dil
+# CHECK-NEXT: 1 1 0.50 shlb $7, %dil
+# CHECK-NEXT: 1 1 0.50 shrb $7, %dil
+# CHECK-NEXT: 4 7 1.00 * * sarb $7, (%rax)
+# CHECK-NEXT: 4 7 1.00 * * shlb $7, (%rax)
+# CHECK-NEXT: 4 7 1.00 * * shrb $7, (%rax)
+# CHECK-NEXT: 3 3 1.50 sarb %cl, %dil
+# CHECK-NEXT: 3 3 1.50 shlb %cl, %dil
+# CHECK-NEXT: 3 3 1.50 shrb %cl, %dil
+# CHECK-NEXT: 6 9 1.50 * * sarb %cl, (%rax)
+# CHECK-NEXT: 6 9 1.50 * * shlb %cl, (%rax)
+# CHECK-NEXT: 6 9 1.50 * * shrb %cl, (%rax)
+# CHECK-NEXT: 1 1 0.50 sarw %di
+# CHECK-NEXT: 1 1 0.50 shlw %di
+# CHECK-NEXT: 1 1 0.50 shrw %di
+# CHECK-NEXT: 4 7 1.00 * * sarw (%rax)
+# CHECK-NEXT: 4 7 1.00 * * shlw (%rax)
+# CHECK-NEXT: 4 7 1.00 * * shrw (%rax)
+# CHECK-NEXT: 1 1 0.50 sarw $7, %di
+# CHECK-NEXT: 1 1 0.50 shlw $7, %di
+# CHECK-NEXT: 1 1 0.50 shrw $7, %di
+# CHECK-NEXT: 4 7 1.00 * * sarw $7, (%rax)
+# CHECK-NEXT: 4 7 1.00 * * shlw $7, (%rax)
+# CHECK-NEXT: 4 7 1.00 * * shrw $7, (%rax)
+# CHECK-NEXT: 3 3 1.50 sarw %cl, %di
+# CHECK-NEXT: 3 3 1.50 shlw %cl, %di
+# CHECK-NEXT: 3 3 1.50 shrw %cl, %di
+# CHECK-NEXT: 6 9 1.50 * * sarw %cl, (%rax)
+# CHECK-NEXT: 6 9 1.50 * * shlw %cl, (%rax)
+# CHECK-NEXT: 6 9 1.50 * * shrw %cl, (%rax)
+# CHECK-NEXT: 1 1 0.50 sarl %edi
+# CHECK-NEXT: 1 1 0.50 shll %edi
+# CHECK-NEXT: 1 1 0.50 shrl %edi
+# CHECK-NEXT: 4 7 1.00 * * sarl (%rax)
+# CHECK-NEXT: 4 7 1.00 * * shll (%rax)
+# CHECK-NEXT: 4 7 1.00 * * shrl (%rax)
+# CHECK-NEXT: 1 1 0.50 sarl $7, %edi
+# CHECK-NEXT: 1 1 0.50 shll $7, %edi
+# CHECK-NEXT: 1 1 0.50 shrl $7, %edi
+# CHECK-NEXT: 4 7 1.00 * * sarl $7, (%rax)
+# CHECK-NEXT: 4 7 1.00 * * shll $7, (%rax)
+# CHECK-NEXT: 4 7 1.00 * * shrl $7, (%rax)
+# CHECK-NEXT: 3 3 1.50 sarl %cl, %edi
+# CHECK-NEXT: 3 3 1.50 shll %cl, %edi
+# CHECK-NEXT: 3 3 1.50 shrl %cl, %edi
+# CHECK-NEXT: 6 9 1.50 * * sarl %cl, (%rax)
+# CHECK-NEXT: 6 9 1.50 * * shll %cl, (%rax)
+# CHECK-NEXT: 6 9 1.50 * * shrl %cl, (%rax)
+# CHECK-NEXT: 1 1 0.50 sarq %rdi
+# CHECK-NEXT: 1 1 0.50 shlq %rdi
+# CHECK-NEXT: 1 1 0.50 shrq %rdi
+# CHECK-NEXT: 4 7 1.00 * * sarq (%rax)
+# CHECK-NEXT: 4 7 1.00 * * shlq (%rax)
+# CHECK-NEXT: 4 7 1.00 * * shrq (%rax)
+# CHECK-NEXT: 1 1 0.50 sarq $7, %rdi
+# CHECK-NEXT: 1 1 0.50 shlq $7, %rdi
+# CHECK-NEXT: 1 1 0.50 shrq $7, %rdi
+# CHECK-NEXT: 4 7 1.00 * * sarq $7, (%rax)
+# CHECK-NEXT: 4 7 1.00 * * shlq $7, (%rax)
+# CHECK-NEXT: 4 7 1.00 * * shrq $7, (%rax)
+# CHECK-NEXT: 3 3 1.50 sarq %cl, %rdi
+# CHECK-NEXT: 3 3 1.50 shlq %cl, %rdi
+# CHECK-NEXT: 3 3 1.50 shrq %cl, %rdi
+# CHECK-NEXT: 6 9 1.50 * * sarq %cl, (%rax)
+# CHECK-NEXT: 6 9 1.50 * * shlq %cl, (%rax)
+# CHECK-NEXT: 6 9 1.50 * * shrq %cl, (%rax)
+# CHECK-NEXT: 2 2 0.67 sbbb $7, %al
+# CHECK-NEXT: 2 2 0.67 sbbb $7, %dil
+# CHECK-NEXT: 6 9 1.00 * * sbbb $7, (%rax)
+# CHECK-NEXT: 2 2 0.67 sbbb %sil, %dil
+# CHECK-NEXT: 6 9 1.00 * * sbbb %sil, (%rax)
+# CHECK-NEXT: 3 7 0.67 * sbbb (%rax), %dil
+# CHECK-NEXT: 2 2 0.67 sbbw $511, %ax
+# CHECK-NEXT: 2 2 0.67 sbbw $511, %di
+# CHECK-NEXT: 6 9 1.00 * * sbbw $511, (%rax)
+# CHECK-NEXT: 2 2 0.67 sbbw $7, %di
+# CHECK-NEXT: 6 9 1.00 * * sbbw $7, (%rax)
+# CHECK-NEXT: 2 2 0.67 sbbw %si, %di
+# CHECK-NEXT: 6 9 1.00 * * sbbw %si, (%rax)
+# CHECK-NEXT: 3 7 0.67 * sbbw (%rax), %di
+# CHECK-NEXT: 2 2 0.67 sbbl $665536, %eax
+# CHECK-NEXT: 2 2 0.67 sbbl $665536, %edi
+# CHECK-NEXT: 6 9 1.00 * * sbbl $665536, (%rax)
+# CHECK-NEXT: 2 2 0.67 sbbl $7, %edi
+# CHECK-NEXT: 6 9 1.00 * * sbbl $7, (%rax)
+# CHECK-NEXT: 2 2 0.67 sbbl %esi, %edi
+# CHECK-NEXT: 6 9 1.00 * * sbbl %esi, (%rax)
+# CHECK-NEXT: 3 7 0.67 * sbbl (%rax), %edi
+# CHECK-NEXT: 2 2 0.67 sbbq $665536, %rax
+# CHECK-NEXT: 2 2 0.67 sbbq $665536, %rdi
+# CHECK-NEXT: 6 9 1.00 * * sbbq $665536, (%rax)
+# CHECK-NEXT: 2 2 0.67 sbbq $7, %rdi
+# CHECK-NEXT: 6 9 1.00 * * sbbq $7, (%rax)
+# CHECK-NEXT: 2 2 0.67 sbbq %rsi, %rdi
+# CHECK-NEXT: 6 9 1.00 * * sbbq %rsi, (%rax)
+# CHECK-NEXT: 3 7 0.67 * sbbq (%rax), %rdi
+# CHECK-NEXT: 2 2 0.67 U scasb %es:(%rdi), %al
+# CHECK-NEXT: 2 2 0.67 U scasw %es:(%rdi), %ax
+# CHECK-NEXT: 2 2 0.67 U scasl %es:(%rdi), %eax
+# CHECK-NEXT: 2 2 0.67 U scasq %es:(%rdi), %rax
+# CHECK-NEXT: 1 1 0.50 seto %al
+# CHECK-NEXT: 3 2 1.00 * seto (%rax)
+# CHECK-NEXT: 1 1 0.50 setno %al
+# CHECK-NEXT: 3 2 1.00 * setno (%rax)
+# CHECK-NEXT: 1 1 0.50 setb %al
+# CHECK-NEXT: 3 2 1.00 * setb (%rax)
+# CHECK-NEXT: 1 1 0.50 setae %al
+# CHECK-NEXT: 3 2 1.00 * setae (%rax)
+# CHECK-NEXT: 1 1 0.50 sete %al
+# CHECK-NEXT: 3 2 1.00 * sete (%rax)
+# CHECK-NEXT: 1 1 0.50 setne %al
+# CHECK-NEXT: 3 2 1.00 * setne (%rax)
+# CHECK-NEXT: 2 2 1.00 seta %al
+# CHECK-NEXT: 4 3 1.00 * seta (%rax)
+# CHECK-NEXT: 2 2 1.00 setbe %al
+# CHECK-NEXT: 4 3 1.00 * setbe (%rax)
+# CHECK-NEXT: 1 1 0.50 sets %al
+# CHECK-NEXT: 3 2 1.00 * sets (%rax)
+# CHECK-NEXT: 1 1 0.50 setns %al
+# CHECK-NEXT: 3 2 1.00 * setns (%rax)
+# CHECK-NEXT: 1 1 0.50 setp %al
+# CHECK-NEXT: 3 2 1.00 * setp (%rax)
+# CHECK-NEXT: 1 1 0.50 setnp %al
+# CHECK-NEXT: 3 2 1.00 * setnp (%rax)
+# CHECK-NEXT: 1 1 0.50 setl %al
+# CHECK-NEXT: 3 2 1.00 * setl (%rax)
+# CHECK-NEXT: 1 1 0.50 setge %al
+# CHECK-NEXT: 3 2 1.00 * setge (%rax)
+# CHECK-NEXT: 1 1 0.50 setg %al
+# CHECK-NEXT: 3 2 1.00 * setg (%rax)
+# CHECK-NEXT: 1 1 0.50 setle %al
+# CHECK-NEXT: 3 2 1.00 * setle (%rax)
+# CHECK-NEXT: 4 4 1.50 shldw %cl, %si, %di
+# CHECK-NEXT: 4 4 1.50 shrdw %cl, %si, %di
+# CHECK-NEXT: 7 10 1.50 * * shldw %cl, %si, (%rax)
+# CHECK-NEXT: 7 10 1.50 * * shrdw %cl, %si, (%rax)
+# CHECK-NEXT: 2 2 0.67 shldw $7, %si, %di
+# CHECK-NEXT: 2 2 0.67 shrdw $7, %si, %di
+# CHECK-NEXT: 5 8 1.00 * * shldw $7, %si, (%rax)
+# CHECK-NEXT: 5 8 1.00 * * shrdw $7, %si, (%rax)
+# CHECK-NEXT: 4 4 1.50 shldl %cl, %esi, %edi
+# CHECK-NEXT: 4 4 1.50 shrdl %cl, %esi, %edi
+# CHECK-NEXT: 7 10 1.50 * * shldl %cl, %esi, (%rax)
+# CHECK-NEXT: 7 10 1.50 * * shrdl %cl, %esi, (%rax)
+# CHECK-NEXT: 2 2 0.67 shldl $7, %esi, %edi
+# CHECK-NEXT: 2 2 0.67 shrdl $7, %esi, %edi
+# CHECK-NEXT: 5 8 1.00 * * shldl $7, %esi, (%rax)
+# CHECK-NEXT: 5 8 1.00 * * shrdl $7, %esi, (%rax)
+# CHECK-NEXT: 4 4 1.50 shldq %cl, %rsi, %rdi
+# CHECK-NEXT: 4 4 1.50 shrdq %cl, %rsi, %rdi
+# CHECK-NEXT: 7 10 1.50 * * shldq %cl, %rsi, (%rax)
+# CHECK-NEXT: 7 10 1.50 * * shrdq %cl, %rsi, (%rax)
+# CHECK-NEXT: 2 2 0.67 shldq $7, %rsi, %rdi
+# CHECK-NEXT: 2 2 0.67 shrdq $7, %rsi, %rdi
+# CHECK-NEXT: 5 8 1.00 * * shldq $7, %rsi, (%rax)
+# CHECK-NEXT: 5 8 1.00 * * shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: 1 1 0.33 U stc
+# CHECK-NEXT: 1 1 0.33 U std
+# CHECK-NEXT: 3 5 1.00 U stosb %al, %es:(%rdi)
+# CHECK-NEXT: 3 5 1.00 U stosw %ax, %es:(%rdi)
+# CHECK-NEXT: 3 5 1.00 U stosl %eax, %es:(%rdi)
+# CHECK-NEXT: 3 5 1.00 U stosq %rax, %es:(%rdi)
+# CHECK-NEXT: 1 1 0.33 subb $7, %al
+# CHECK-NEXT: 1 1 0.33 subb $7, %dil
+# CHECK-NEXT: 3 7 1.00 * * subb $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 subb %sil, %dil
+# CHECK-NEXT: 3 7 1.00 * * subb %sil, (%rax)
+# CHECK-NEXT: 2 6 0.50 * subb (%rax), %dil
+# CHECK-NEXT: 1 1 0.33 subw $511, %ax
+# CHECK-NEXT: 1 1 0.33 subw $511, %di
+# CHECK-NEXT: 3 7 1.00 * * subw $511, (%rax)
+# CHECK-NEXT: 1 1 0.33 subw $7, %di
+# CHECK-NEXT: 3 7 1.00 * * subw $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 subw %si, %di
+# CHECK-NEXT: 3 7 1.00 * * subw %si, (%rax)
+# CHECK-NEXT: 2 6 0.50 * subw (%rax), %di
+# CHECK-NEXT: 1 1 0.33 subl $665536, %eax
+# CHECK-NEXT: 1 1 0.33 subl $665536, %edi
+# CHECK-NEXT: 3 7 1.00 * * subl $665536, (%rax)
+# CHECK-NEXT: 1 1 0.33 subl $7, %edi
+# CHECK-NEXT: 3 7 1.00 * * subl $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 subl %esi, %edi
+# CHECK-NEXT: 3 7 1.00 * * subl %esi, (%rax)
+# CHECK-NEXT: 2 6 0.50 * subl (%rax), %edi
+# CHECK-NEXT: 1 1 0.33 subq $665536, %rax
+# CHECK-NEXT: 1 1 0.33 subq $665536, %rdi
+# CHECK-NEXT: 3 7 1.00 * * subq $665536, (%rax)
+# CHECK-NEXT: 1 1 0.33 subq $7, %rdi
+# CHECK-NEXT: 3 7 1.00 * * subq $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 subq %rsi, %rdi
+# CHECK-NEXT: 3 7 1.00 * * subq %rsi, (%rax)
+# CHECK-NEXT: 2 6 0.50 * subq (%rax), %rdi
+# CHECK-NEXT: 1 1 0.33 testb $7, %al
+# CHECK-NEXT: 1 1 0.33 testb $7, %dil
+# CHECK-NEXT: 2 6 0.50 * testb $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 testb %sil, %dil
+# CHECK-NEXT: 2 6 0.50 * testb %sil, (%rax)
+# CHECK-NEXT: 1 1 0.33 testw $511, %ax
+# CHECK-NEXT: 1 1 0.33 testw $511, %di
+# CHECK-NEXT: 2 6 0.50 * testw $511, (%rax)
+# CHECK-NEXT: 1 1 0.33 testw $7, %di
+# CHECK-NEXT: 2 6 0.50 * testw $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 testw %si, %di
+# CHECK-NEXT: 2 6 0.50 * testw %si, (%rax)
+# CHECK-NEXT: 1 1 0.33 testl $665536, %eax
+# CHECK-NEXT: 1 1 0.33 testl $665536, %edi
+# CHECK-NEXT: 2 6 0.50 * testl $665536, (%rax)
+# CHECK-NEXT: 1 1 0.33 testl $7, %edi
+# CHECK-NEXT: 2 6 0.50 * testl $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 testl %esi, %edi
+# CHECK-NEXT: 2 6 0.50 * testl %esi, (%rax)
+# CHECK-NEXT: 1 1 0.33 testq $665536, %rax
+# CHECK-NEXT: 1 1 0.33 testq $665536, %rdi
+# CHECK-NEXT: 2 6 0.50 * testq $665536, (%rax)
+# CHECK-NEXT: 1 1 0.33 testq $7, %rdi
+# CHECK-NEXT: 2 6 0.50 * testq $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 testq %rsi, %rdi
+# CHECK-NEXT: 2 6 0.50 * testq %rsi, (%rax)
+# CHECK-NEXT: 1 100 0.33 * U ud2
+# CHECK-NEXT: 3 2 1.00 xaddb %bl, %cl
+# CHECK-NEXT: 5 8 1.00 * * xaddb %bl, (%rcx)
+# CHECK-NEXT: 3 2 1.00 xaddw %bx, %cx
+# CHECK-NEXT: 5 8 1.00 * * xaddw %ax, (%rbx)
+# CHECK-NEXT: 3 2 1.00 xaddl %ebx, %ecx
+# CHECK-NEXT: 5 8 1.00 * * xaddl %eax, (%rbx)
+# CHECK-NEXT: 3 2 1.00 xaddq %rbx, %rcx
+# CHECK-NEXT: 5 8 1.00 * * xaddq %rax, (%rbx)
+# CHECK-NEXT: 3 2 1.00 xchgb %bl, %cl
+# CHECK-NEXT: 3 6 1.00 * * xchgb %bl, (%rbx)
+# CHECK-NEXT: 3 2 1.00 xchgw %bx, %ax
+# CHECK-NEXT: 3 2 1.00 xchgw %bx, %cx
+# CHECK-NEXT: 3 6 1.00 * * xchgw %ax, (%rbx)
+# CHECK-NEXT: 3 2 1.00 xchgl %ebx, %eax
+# CHECK-NEXT: 3 2 1.00 xchgl %ebx, %ecx
+# CHECK-NEXT: 3 6 1.00 * * xchgl %eax, (%rbx)
+# CHECK-NEXT: 3 2 1.00 xchgq %rbx, %rax
+# CHECK-NEXT: 3 2 1.00 xchgq %rbx, %rcx
+# CHECK-NEXT: 3 6 1.00 * * xchgq %rax, (%rbx)
+# CHECK-NEXT: 1 5 0.50 * xlatb
+# CHECK-NEXT: 1 1 0.33 xorb $7, %al
+# CHECK-NEXT: 1 1 0.33 xorb $7, %dil
+# CHECK-NEXT: 3 7 1.00 * * xorb $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 xorb %sil, %dil
+# CHECK-NEXT: 3 7 1.00 * * xorb %sil, (%rax)
+# CHECK-NEXT: 2 6 0.50 * xorb (%rax), %dil
+# CHECK-NEXT: 1 1 0.33 xorw $511, %ax
+# CHECK-NEXT: 1 1 0.33 xorw $511, %di
+# CHECK-NEXT: 3 7 1.00 * * xorw $511, (%rax)
+# CHECK-NEXT: 1 1 0.33 xorw $7, %di
+# CHECK-NEXT: 3 7 1.00 * * xorw $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 xorw %si, %di
+# CHECK-NEXT: 3 7 1.00 * * xorw %si, (%rax)
+# CHECK-NEXT: 2 6 0.50 * xorw (%rax), %di
+# CHECK-NEXT: 1 1 0.33 xorl $665536, %eax
+# CHECK-NEXT: 1 1 0.33 xorl $665536, %edi
+# CHECK-NEXT: 3 7 1.00 * * xorl $665536, (%rax)
+# CHECK-NEXT: 1 1 0.33 xorl $7, %edi
+# CHECK-NEXT: 3 7 1.00 * * xorl $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 xorl %esi, %edi
+# CHECK-NEXT: 3 7 1.00 * * xorl %esi, (%rax)
+# CHECK-NEXT: 2 6 0.50 * xorl (%rax), %edi
+# CHECK-NEXT: 1 1 0.33 xorq $665536, %rax
+# CHECK-NEXT: 1 1 0.33 xorq $665536, %rdi
+# CHECK-NEXT: 3 7 1.00 * * xorq $665536, (%rax)
+# CHECK-NEXT: 1 1 0.33 xorq $7, %rdi
+# CHECK-NEXT: 3 7 1.00 * * xorq $7, (%rax)
+# CHECK-NEXT: 1 1 0.33 xorq %rsi, %rdi
+# CHECK-NEXT: 3 7 1.00 * * xorq %rsi, (%rax)
+# CHECK-NEXT: 2 6 0.50 * xorq (%rax), %rdi
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: 160.00 - 571.83 221.33 222.00 571.83 316.00 316.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcb $7, %al
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcb $7, %dil
+# CHECK-NEXT: - - 1.00 1.00 1.00 1.00 1.00 1.00 adcb $7, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcb %sil, %dil
+# CHECK-NEXT: - - 1.33 0.33 1.00 1.33 1.00 1.00 adcb %sil, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 adcb (%rax), %dil
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcw $511, %ax
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcw $511, %di
+# CHECK-NEXT: - - 1.00 1.00 1.00 1.00 1.00 1.00 adcw $511, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcw $7, %di
+# CHECK-NEXT: - - 1.00 1.00 1.00 1.00 1.00 1.00 adcw $7, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcw %si, %di
+# CHECK-NEXT: - - 1.33 0.33 1.00 1.33 1.00 1.00 adcw %si, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 adcw (%rax), %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcl $665536, %eax
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcl $665536, %edi
+# CHECK-NEXT: - - 1.00 1.00 1.00 1.00 1.00 1.00 adcl $665536, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcl $7, %edi
+# CHECK-NEXT: - - 1.00 1.00 1.00 1.00 1.00 1.00 adcl $7, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcl %esi, %edi
+# CHECK-NEXT: - - 1.33 0.33 1.00 1.33 1.00 1.00 adcl %esi, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 adcl (%rax), %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcq $665536, %rax
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcq $665536, %rdi
+# CHECK-NEXT: - - 1.00 1.00 1.00 1.00 1.00 1.00 adcq $665536, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcq $7, %rdi
+# CHECK-NEXT: - - 1.00 1.00 1.00 1.00 1.00 1.00 adcq $7, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcq %rsi, %rdi
+# CHECK-NEXT: - - 1.33 0.33 1.00 1.33 1.00 1.00 adcq %rsi, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 adcq (%rax), %rdi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - addb $7, %al
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - addb $7, %dil
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 addb $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - addb %sil, %dil
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 addb %sil, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 addb (%rax), %dil
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - addw $511, %ax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - addw $511, %di
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 addw $511, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - addw $7, %di
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 addw $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - addw %si, %di
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 addw %si, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 addw (%rax), %di
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - addl $665536, %eax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - addl $665536, %edi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 addl $665536, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - addl $7, %edi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 addl $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - addl %esi, %edi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 addl %esi, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 addl (%rax), %edi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - addq $665536, %rax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - addq $665536, %rdi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 addq $665536, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - addq $7, %rdi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 addq $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - addq %rsi, %rdi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 addq %rsi, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 addq (%rax), %rdi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andb $7, %al
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andb $7, %dil
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 andb $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andb %sil, %dil
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 andb %sil, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 andb (%rax), %dil
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andw $511, %ax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andw $511, %di
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 andw $511, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andw $7, %di
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 andw $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andw %si, %di
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 andw %si, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 andw (%rax), %di
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andl $665536, %eax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andl $665536, %edi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 andl $665536, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andl $7, %edi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 andl $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andl %esi, %edi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 andl %esi, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 andl (%rax), %edi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andq $665536, %rax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andq $665536, %rdi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 andq $665536, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andq $7, %rdi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 andq $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andq %rsi, %rdi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 andq %rsi, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 andq (%rax), %rdi
+# CHECK-NEXT: - - - 1.00 - - - - bsfw %si, %di
+# CHECK-NEXT: - - - 1.00 - - - - bsrw %si, %di
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 bsfw (%rax), %di
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 bsrw (%rax), %di
+# CHECK-NEXT: - - - 1.00 - - - - bsfl %esi, %edi
+# CHECK-NEXT: - - - 1.00 - - - - bsrl %esi, %edi
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 bsfl (%rax), %edi
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 bsrl (%rax), %edi
+# CHECK-NEXT: - - - 1.00 - - - - bsfq %rsi, %rdi
+# CHECK-NEXT: - - - 1.00 - - - - bsrq %rsi, %rdi
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 bsfq (%rax), %rdi
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 bsrq (%rax), %rdi
+# CHECK-NEXT: - - - 1.00 - - - - bswapl %eax
+# CHECK-NEXT: - - 0.50 1.00 - 0.50 - - bswapq %rax
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btw %si, %di
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btcw %si, %di
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btrw %si, %di
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btsw %si, %di
+# CHECK-NEXT: - - 0.83 0.33 1.00 1.83 1.00 1.00 btw %si, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 1.00 1.83 1.00 1.00 btcw %si, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 1.00 1.83 1.00 1.00 btrw %si, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 1.00 1.83 1.00 1.00 btsw %si, (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btw $7, %di
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btcw $7, %di
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btrw $7, %di
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btsw $7, %di
+# CHECK-NEXT: - - 0.50 - - 0.50 0.50 0.50 btw $7, (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 btcw $7, (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 btrw $7, (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 btsw $7, (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btl %esi, %edi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btcl %esi, %edi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btrl %esi, %edi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btsl %esi, %edi
+# CHECK-NEXT: - - 0.83 0.33 1.00 1.83 1.00 1.00 btl %esi, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 1.00 1.83 1.00 1.00 btcl %esi, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 1.00 1.83 1.00 1.00 btrl %esi, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 1.00 1.83 1.00 1.00 btsl %esi, (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btl $7, %edi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btcl $7, %edi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btrl $7, %edi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btsl $7, %edi
+# CHECK-NEXT: - - 0.50 - - 0.50 0.50 0.50 btl $7, (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 btcl $7, (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 btrl $7, (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 btsl $7, (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btq %rsi, %rdi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btcq %rsi, %rdi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btrq %rsi, %rdi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btsq %rsi, %rdi
+# CHECK-NEXT: - - 0.83 0.33 1.00 1.83 1.00 1.00 btq %rsi, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 1.00 1.83 1.00 1.00 btcq %rsi, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 1.00 1.83 1.00 1.00 btrq %rsi, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 1.00 1.83 1.00 1.00 btsq %rsi, (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btq $7, %rdi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btcq $7, %rdi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btrq $7, %rdi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - btsq $7, %rdi
+# CHECK-NEXT: - - 0.50 - - 0.50 0.50 0.50 btq $7, (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 btcq $7, (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 btrq $7, (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 btsq $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cbtw
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cwtl
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cltq
+# CHECK-NEXT: - - 1.33 0.33 - 0.33 - - cwtd
+# CHECK-NEXT: - - 0.50 - - 0.50 - - cltd
+# CHECK-NEXT: - - 0.50 - - 0.50 - - cqto
+# CHECK-NEXT: - - - - - - - - clc
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cld
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmc
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmpb $7, %al
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmpb $7, %dil
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpb $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmpb %sil, %dil
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpb %sil, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpb (%rax), %dil
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmpw $511, %ax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmpw $511, %di
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpw $511, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmpw $7, %di
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpw $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmpw %si, %di
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpw %si, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpw (%rax), %di
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmpl $665536, %eax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmpl $665536, %edi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpl $665536, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmpl $7, %edi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpl $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmpl %esi, %edi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpl %esi, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpl (%rax), %edi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmpq $665536, %rax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmpq $665536, %rdi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpq $665536, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmpq $7, %rdi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpq $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmpq %rsi, %rdi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpq %rsi, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpq (%rax), %rdi
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 1.00 1.00 cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 1.00 1.00 cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 1.00 1.00 cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 1.00 1.00 cmpsq %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 1.50 1.00 - 1.50 - - cmpxchgb %cl, %bl
+# CHECK-NEXT: - - 0.33 0.33 1.00 2.33 1.00 1.00 cmpxchgb %cl, (%rbx)
+# CHECK-NEXT: - - 1.50 1.00 - 1.50 - - cmpxchgw %cx, %bx
+# CHECK-NEXT: - - 0.33 0.33 1.00 2.33 1.00 1.00 cmpxchgw %cx, (%rbx)
+# CHECK-NEXT: - - 1.50 1.00 - 1.50 - - cmpxchgl %ecx, %ebx
+# CHECK-NEXT: - - 0.33 0.33 1.00 2.33 1.00 1.00 cmpxchgl %ecx, (%rbx)
+# CHECK-NEXT: - - 1.50 1.00 - 1.50 - - cmpxchgq %rcx, %rbx
+# CHECK-NEXT: - - 0.33 0.33 1.00 2.33 1.00 1.00 cmpxchgq %rcx, (%rbx)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cpuid
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - decb %dil
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 decb (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - decw %di
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 decw (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - decl %edi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 decl (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - decq %rdi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 decq (%rax)
+# CHECK-NEXT: 10.00 - 1.00 - - - - - divb %dil
+# CHECK-NEXT: 10.00 - 1.00 - - - 0.50 0.50 divb (%rax)
+# CHECK-NEXT: 10.00 - 1.00 - - - - - divw %si
+# CHECK-NEXT: 10.00 - 1.00 - - - 0.50 0.50 divw (%rax)
+# CHECK-NEXT: 10.00 - 1.00 - - - - - divl %edx
+# CHECK-NEXT: 10.00 - 1.00 - - - 0.50 0.50 divl (%rax)
+# CHECK-NEXT: 10.00 - 1.00 - - - - - divq %rcx
+# CHECK-NEXT: 10.00 - 1.00 - - - 0.50 0.50 divq (%rax)
+# CHECK-NEXT: 10.00 - 1.00 - - - - - idivb %dil
+# CHECK-NEXT: 10.00 - 1.00 - - - 0.50 0.50 idivb (%rax)
+# CHECK-NEXT: 10.00 - 1.00 - - - - - idivw %si
+# CHECK-NEXT: 10.00 - 1.00 - - - 0.50 0.50 idivw (%rax)
+# CHECK-NEXT: 10.00 - 1.00 - - - - - idivl %edx
+# CHECK-NEXT: 10.00 - 1.00 - - - 0.50 0.50 idivl (%rax)
+# CHECK-NEXT: 10.00 - 1.00 - - - - - idivq %rcx
+# CHECK-NEXT: 10.00 - 1.00 - - - 0.50 0.50 idivq (%rax)
+# CHECK-NEXT: - - - 1.00 - - - - imulb %dil
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 imulb (%rax)
+# CHECK-NEXT: - - 1.17 1.67 - 1.17 - - imulw %di
+# CHECK-NEXT: - - 1.17 1.67 - 1.17 0.50 0.50 imulw (%rax)
+# CHECK-NEXT: - - - 1.00 - - - - imulw %si, %di
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 imulw (%rax), %di
+# CHECK-NEXT: - - 0.33 1.33 - 0.33 - - imulw $511, %si, %di
+# CHECK-NEXT: - - 0.33 1.33 - 0.33 0.50 0.50 imulw $511, (%rax), %di
+# CHECK-NEXT: - - 0.33 1.33 - 0.33 - - imulw $7, %si, %di
+# CHECK-NEXT: - - 0.33 1.33 - 0.33 0.50 0.50 imulw $7, (%rax), %di
+# CHECK-NEXT: - - 0.83 1.33 - 0.83 - - imull %edi
+# CHECK-NEXT: - - 0.83 1.33 - 0.83 0.50 0.50 imull (%rax)
+# CHECK-NEXT: - - - 1.00 - - - - imull %esi, %edi
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 imull (%rax), %edi
+# CHECK-NEXT: - - - 1.00 - - - - imull $665536, %esi, %edi
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 imull $665536, (%rax), %edi
+# CHECK-NEXT: - - - 1.00 - - - - imull $7, %esi, %edi
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 imull $7, (%rax), %edi
+# CHECK-NEXT: - - 1.00 1.00 - - - - imulq %rdi
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 imulq (%rax)
+# CHECK-NEXT: - - - 1.00 - - - - imulq %rsi, %rdi
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 imulq (%rax), %rdi
+# CHECK-NEXT: - - - 1.00 - - - - imulq $665536, %rsi, %rdi
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 imulq $665536, (%rax), %rdi
+# CHECK-NEXT: - - - 1.00 - - - - imulq $7, %rsi, %rdi
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 imulq $7, (%rax), %rdi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - inb $7, %al
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - inb %dx, %al
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - inw $7, %ax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - inw %dx, %ax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - inl $7, %eax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - inl %dx, %eax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - incb %dil
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 incb (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - incw %di
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 incw (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - incl %edi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 incl (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - incq %rdi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 incq (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - insb %dx, %es:(%rdi)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - insw %dx, %es:(%rdi)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - insl %dx, %es:(%rdi)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - int $7
+# CHECK-NEXT: - - 0.50 - - 0.50 - - lahf
+# CHECK-NEXT: - - 0.67 0.67 - 0.67 0.50 0.50 lodsb (%rsi), %al
+# CHECK-NEXT: - - 0.67 0.67 - 0.67 0.50 0.50 lodsw (%rsi), %ax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 lodsl (%rsi), %eax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 lodsq (%rsi), %rax
+# CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsq (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movsbw %al, %di
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movzbw %al, %di
+# CHECK-NEXT: - - - - - - 0.50 0.50 movsbw (%rax), %di
+# CHECK-NEXT: - - - - - - 0.50 0.50 movzbw (%rax), %di
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movsbl %al, %edi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movzbl %al, %edi
+# CHECK-NEXT: - - - - - - 0.50 0.50 movsbl (%rax), %edi
+# CHECK-NEXT: - - - - - - 0.50 0.50 movzbl (%rax), %edi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movsbq %al, %rdi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movzbq %al, %rdi
+# CHECK-NEXT: - - - - - - 0.50 0.50 movsbq (%rax), %rdi
+# CHECK-NEXT: - - - - - - 0.50 0.50 movzbq (%rax), %rdi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movswl %ax, %edi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movzwl %ax, %edi
+# CHECK-NEXT: - - - - - - 0.50 0.50 movswl (%rax), %edi
+# CHECK-NEXT: - - - - - - 0.50 0.50 movzwl (%rax), %edi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movswq %ax, %rdi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movzwq %ax, %rdi
+# CHECK-NEXT: - - - - - - 0.50 0.50 movswq (%rax), %rdi
+# CHECK-NEXT: - - - - - - 0.50 0.50 movzwq (%rax), %rdi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movslq %eax, %rdi
+# CHECK-NEXT: - - - - - - 0.50 0.50 movslq (%rax), %rdi
+# CHECK-NEXT: - - - 1.00 - - - - mulb %dil
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 mulb (%rax)
+# CHECK-NEXT: - - 1.17 1.67 - 1.17 - - mulw %si
+# CHECK-NEXT: - - 1.17 1.67 - 1.17 0.50 0.50 mulw (%rax)
+# CHECK-NEXT: - - 0.83 1.33 - 0.83 - - mull %edx
+# CHECK-NEXT: - - 0.83 1.33 - 0.83 0.50 0.50 mull (%rax)
+# CHECK-NEXT: - - 1.00 1.00 - - - - mulq %rcx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 mulq (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - negb %dil
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 negb (%r8)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - negw %si
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 negw (%r9)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - negl %edx
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 negl (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - negq %rcx
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 negq (%r10)
+# CHECK-NEXT: - - - - - - - - nop
+# CHECK-NEXT: - - - - - - - - nopw %di
+# CHECK-NEXT: - - - - - - - - nopw (%rcx)
+# CHECK-NEXT: - - - - - - - - nopl %esi
+# CHECK-NEXT: - - - - - - - - nopl (%r8)
+# CHECK-NEXT: - - - - - - - - nopq %rdx
+# CHECK-NEXT: - - - - - - - - nopq (%r9)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - notb %dil
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 notb (%r8)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - notw %si
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 notw (%r9)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - notl %edx
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 notl (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - notq %rcx
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 notq (%r10)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - orb $7, %al
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - orb $7, %dil
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 orb $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - orb %sil, %dil
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 orb %sil, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 orb (%rax), %dil
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - orw $511, %ax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - orw $511, %di
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 orw $511, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - orw $7, %di
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 orw $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - orw %si, %di
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 orw %si, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 orw (%rax), %di
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - orl $665536, %eax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - orl $665536, %edi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 orl $665536, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - orl $7, %edi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 orl $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - orl %esi, %edi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 orl %esi, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 orl (%rax), %edi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - orq $665536, %rax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - orq $665536, %rdi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 orq $665536, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - orq $7, %rdi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 orq $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - orq %rsi, %rdi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 orq %rsi, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 orq (%rax), %rdi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - outb %al, $7
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - outb %al, %dx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - outw %ax, $7
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - outw %ax, %dx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - outl %eax, $7
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - outl %eax, %dx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - outsb (%rsi), %dx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - outsw (%rsi), %dx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - outsl (%rsi), %dx
+# CHECK-NEXT: - - 1.00 1.00 - 2.00 - - pause
+# CHECK-NEXT: - - 1.50 - - 1.50 - - rclb %dil
+# CHECK-NEXT: - - 1.50 - - 1.50 - - rcrb %dil
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rclb (%rax)
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rcrb (%rax)
+# CHECK-NEXT: - - 4.00 - - 4.00 - - rclb $7, %dil
+# CHECK-NEXT: - - 4.00 - - 4.00 - - rcrb $7, %dil
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rclb $7, (%rax)
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rcrb $7, (%rax)
+# CHECK-NEXT: - - 4.00 - - 4.00 - - rclb %cl, %dil
+# CHECK-NEXT: - - 4.00 - - 4.00 - - rcrb %cl, %dil
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rclb %cl, (%rax)
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rcrb %cl, (%rax)
+# CHECK-NEXT: - - 1.50 - - 1.50 - - rclw %di
+# CHECK-NEXT: - - 1.50 - - 1.50 - - rcrw %di
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rclw (%rax)
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rcrw (%rax)
+# CHECK-NEXT: - - 4.00 - - 4.00 - - rclw $7, %di
+# CHECK-NEXT: - - 4.00 - - 4.00 - - rcrw $7, %di
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rclw $7, (%rax)
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rcrw $7, (%rax)
+# CHECK-NEXT: - - 4.00 - - 4.00 - - rclw %cl, %di
+# CHECK-NEXT: - - 4.00 - - 4.00 - - rcrw %cl, %di
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rclw %cl, (%rax)
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rcrw %cl, (%rax)
+# CHECK-NEXT: - - 1.50 - - 1.50 - - rcll %edi
+# CHECK-NEXT: - - 1.50 - - 1.50 - - rcrl %edi
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rcll (%rax)
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rcrl (%rax)
+# CHECK-NEXT: - - 4.00 - - 4.00 - - rcll $7, %edi
+# CHECK-NEXT: - - 4.00 - - 4.00 - - rcrl $7, %edi
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rcll $7, (%rax)
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rcrl $7, (%rax)
+# CHECK-NEXT: - - 4.00 - - 4.00 - - rcll %cl, %edi
+# CHECK-NEXT: - - 4.00 - - 4.00 - - rcrl %cl, %edi
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rcll %cl, (%rax)
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rcrl %cl, (%rax)
+# CHECK-NEXT: - - 1.50 - - 1.50 - - rclq %rdi
+# CHECK-NEXT: - - 1.50 - - 1.50 - - rcrq %rdi
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rclq (%rax)
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rcrq (%rax)
+# CHECK-NEXT: - - 4.00 - - 4.00 - - rclq $7, %rdi
+# CHECK-NEXT: - - 4.00 - - 4.00 - - rcrq $7, %rdi
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rclq $7, (%rax)
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rcrq $7, (%rax)
+# CHECK-NEXT: - - 4.00 - - 4.00 - - rclq %cl, %rdi
+# CHECK-NEXT: - - 4.00 - - 4.00 - - rcrq %cl, %rdi
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rclq %cl, (%rax)
+# CHECK-NEXT: - - 3.50 - - 3.50 2.00 2.00 rcrq %cl, (%rax)
+# CHECK-NEXT: - - 1.00 - - 1.00 - - rolb %dil
+# CHECK-NEXT: - - 1.00 - - 1.00 - - rorb %dil
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 1.00 1.00 rolb (%rax)
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 1.00 1.00 rorb (%rax)
+# CHECK-NEXT: - - 1.00 - - 1.00 - - rolb $7, %dil
+# CHECK-NEXT: - - 1.00 - - 1.00 - - rorb $7, %dil
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 1.00 1.00 rolb $7, (%rax)
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 1.00 1.00 rorb $7, (%rax)
+# CHECK-NEXT: - - 1.50 - - 1.50 - - rolb %cl, %dil
+# CHECK-NEXT: - - 1.50 - - 1.50 - - rorb %cl, %dil
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 rolb %cl, (%rax)
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 rorb %cl, (%rax)
+# CHECK-NEXT: - - 1.00 - - 1.00 - - rolw %di
+# CHECK-NEXT: - - 1.00 - - 1.00 - - rorw %di
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 1.00 1.00 rolw (%rax)
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 1.00 1.00 rorw (%rax)
+# CHECK-NEXT: - - 1.00 - - 1.00 - - rolw $7, %di
+# CHECK-NEXT: - - 1.00 - - 1.00 - - rorw $7, %di
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 1.00 1.00 rolw $7, (%rax)
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 1.00 1.00 rorw $7, (%rax)
+# CHECK-NEXT: - - 1.50 - - 1.50 - - rolw %cl, %di
+# CHECK-NEXT: - - 1.50 - - 1.50 - - rorw %cl, %di
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 rolw %cl, (%rax)
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 rorw %cl, (%rax)
+# CHECK-NEXT: - - 1.00 - - 1.00 - - roll %edi
+# CHECK-NEXT: - - 1.00 - - 1.00 - - rorl %edi
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 1.00 1.00 roll (%rax)
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 1.00 1.00 rorl (%rax)
+# CHECK-NEXT: - - 1.00 - - 1.00 - - roll $7, %edi
+# CHECK-NEXT: - - 1.00 - - 1.00 - - rorl $7, %edi
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 1.00 1.00 roll $7, (%rax)
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 1.00 1.00 rorl $7, (%rax)
+# CHECK-NEXT: - - 1.50 - - 1.50 - - roll %cl, %edi
+# CHECK-NEXT: - - 1.50 - - 1.50 - - rorl %cl, %edi
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 roll %cl, (%rax)
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 rorl %cl, (%rax)
+# CHECK-NEXT: - - 1.00 - - 1.00 - - rolq %rdi
+# CHECK-NEXT: - - 1.00 - - 1.00 - - rorq %rdi
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 1.00 1.00 rolq (%rax)
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 1.00 1.00 rorq (%rax)
+# CHECK-NEXT: - - 1.00 - - 1.00 - - rolq $7, %rdi
+# CHECK-NEXT: - - 1.00 - - 1.00 - - rorq $7, %rdi
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 1.00 1.00 rolq $7, (%rax)
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 1.00 1.00 rorq $7, (%rax)
+# CHECK-NEXT: - - 1.50 - - 1.50 - - rolq %cl, %rdi
+# CHECK-NEXT: - - 1.50 - - 1.50 - - rorq %cl, %rdi
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 rolq %cl, (%rax)
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 rorq %cl, (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - sahf
+# CHECK-NEXT: - - 0.50 - - 0.50 - - sarb %dil
+# CHECK-NEXT: - - 0.50 - - 0.50 - - shlb %dil
+# CHECK-NEXT: - - 0.50 - - 0.50 - - shrb %dil
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 sarb (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 shlb (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 shrb (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - sarb $7, %dil
+# CHECK-NEXT: - - 0.50 - - 0.50 - - shlb $7, %dil
+# CHECK-NEXT: - - 0.50 - - 0.50 - - shrb $7, %dil
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 sarb $7, (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 shlb $7, (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 shrb $7, (%rax)
+# CHECK-NEXT: - - 1.50 - - 1.50 - - sarb %cl, %dil
+# CHECK-NEXT: - - 1.50 - - 1.50 - - shlb %cl, %dil
+# CHECK-NEXT: - - 1.50 - - 1.50 - - shrb %cl, %dil
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 sarb %cl, (%rax)
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 shlb %cl, (%rax)
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 shrb %cl, (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - sarw %di
+# CHECK-NEXT: - - 0.50 - - 0.50 - - shlw %di
+# CHECK-NEXT: - - 0.50 - - 0.50 - - shrw %di
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 sarw (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 shlw (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 shrw (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - sarw $7, %di
+# CHECK-NEXT: - - 0.50 - - 0.50 - - shlw $7, %di
+# CHECK-NEXT: - - 0.50 - - 0.50 - - shrw $7, %di
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 sarw $7, (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 shlw $7, (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 shrw $7, (%rax)
+# CHECK-NEXT: - - 1.50 - - 1.50 - - sarw %cl, %di
+# CHECK-NEXT: - - 1.50 - - 1.50 - - shlw %cl, %di
+# CHECK-NEXT: - - 1.50 - - 1.50 - - shrw %cl, %di
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 sarw %cl, (%rax)
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 shlw %cl, (%rax)
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 shrw %cl, (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - sarl %edi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - shll %edi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - shrl %edi
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 sarl (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 shll (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 shrl (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - sarl $7, %edi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - shll $7, %edi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - shrl $7, %edi
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 sarl $7, (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 shll $7, (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 shrl $7, (%rax)
+# CHECK-NEXT: - - 1.50 - - 1.50 - - sarl %cl, %edi
+# CHECK-NEXT: - - 1.50 - - 1.50 - - shll %cl, %edi
+# CHECK-NEXT: - - 1.50 - - 1.50 - - shrl %cl, %edi
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 sarl %cl, (%rax)
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 shll %cl, (%rax)
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 shrl %cl, (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - sarq %rdi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - shlq %rdi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - shrq %rdi
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 sarq (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 shlq (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 shrq (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - sarq $7, %rdi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - shlq $7, %rdi
+# CHECK-NEXT: - - 0.50 - - 0.50 - - shrq $7, %rdi
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 sarq $7, (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 shlq $7, (%rax)
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 1.00 1.00 shrq $7, (%rax)
+# CHECK-NEXT: - - 1.50 - - 1.50 - - sarq %cl, %rdi
+# CHECK-NEXT: - - 1.50 - - 1.50 - - shlq %cl, %rdi
+# CHECK-NEXT: - - 1.50 - - 1.50 - - shrq %cl, %rdi
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 sarq %cl, (%rax)
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 shlq %cl, (%rax)
+# CHECK-NEXT: - - 1.50 - 1.00 1.50 1.00 1.00 shrq %cl, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - sbbb $7, %al
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - sbbb $7, %dil
+# CHECK-NEXT: - - 1.00 1.00 1.00 1.00 1.00 1.00 sbbb $7, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - sbbb %sil, %dil
+# CHECK-NEXT: - - 1.33 0.33 1.00 1.33 1.00 1.00 sbbb %sil, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 sbbb (%rax), %dil
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - sbbw $511, %ax
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - sbbw $511, %di
+# CHECK-NEXT: - - 1.00 1.00 1.00 1.00 1.00 1.00 sbbw $511, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - sbbw $7, %di
+# CHECK-NEXT: - - 1.00 1.00 1.00 1.00 1.00 1.00 sbbw $7, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - sbbw %si, %di
+# CHECK-NEXT: - - 1.33 0.33 1.00 1.33 1.00 1.00 sbbw %si, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 sbbw (%rax), %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - sbbl $665536, %eax
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - sbbl $665536, %edi
+# CHECK-NEXT: - - 1.00 1.00 1.00 1.00 1.00 1.00 sbbl $665536, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - sbbl $7, %edi
+# CHECK-NEXT: - - 1.00 1.00 1.00 1.00 1.00 1.00 sbbl $7, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - sbbl %esi, %edi
+# CHECK-NEXT: - - 1.33 0.33 1.00 1.33 1.00 1.00 sbbl %esi, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 sbbl (%rax), %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - sbbq $665536, %rax
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - sbbq $665536, %rdi
+# CHECK-NEXT: - - 1.00 1.00 1.00 1.00 1.00 1.00 sbbq $665536, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - sbbq $7, %rdi
+# CHECK-NEXT: - - 1.00 1.00 1.00 1.00 1.00 1.00 sbbq $7, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - sbbq %rsi, %rdi
+# CHECK-NEXT: - - 1.33 0.33 1.00 1.33 1.00 1.00 sbbq %rsi, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 sbbq (%rax), %rdi
+# CHECK-NEXT: - - 0.67 0.67 - 0.67 - - scasb %es:(%rdi), %al
+# CHECK-NEXT: - - 0.67 0.67 - 0.67 - - scasw %es:(%rdi), %ax
+# CHECK-NEXT: - - 0.67 0.67 - 0.67 - - scasl %es:(%rdi), %eax
+# CHECK-NEXT: - - 0.67 0.67 - 0.67 - - scasq %es:(%rdi), %rax
+# CHECK-NEXT: - - 0.50 - - 0.50 - - seto %al
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 0.50 0.50 seto (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - setno %al
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 0.50 0.50 setno (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - setb %al
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 0.50 0.50 setb (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - setae %al
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 0.50 0.50 setae (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - sete %al
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 0.50 0.50 sete (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - setne %al
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 0.50 0.50 setne (%rax)
+# CHECK-NEXT: - - 1.00 - - 1.00 - - seta %al
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 0.50 0.50 seta (%rax)
+# CHECK-NEXT: - - 1.00 - - 1.00 - - setbe %al
+# CHECK-NEXT: - - 1.00 - 1.00 1.00 0.50 0.50 setbe (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - sets %al
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 0.50 0.50 sets (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - setns %al
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 0.50 0.50 setns (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - setp %al
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 0.50 0.50 setp (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - setnp %al
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 0.50 0.50 setnp (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - setl %al
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 0.50 0.50 setl (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - setge %al
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 0.50 0.50 setge (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - setg %al
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 0.50 0.50 setg (%rax)
+# CHECK-NEXT: - - 0.50 - - 0.50 - - setle %al
+# CHECK-NEXT: - - 0.50 - 1.00 0.50 0.50 0.50 setle (%rax)
+# CHECK-NEXT: - - 1.83 0.33 - 1.83 - - shldw %cl, %si, %di
+# CHECK-NEXT: - - 1.83 0.33 - 1.83 - - shrdw %cl, %si, %di
+# CHECK-NEXT: - - 1.83 0.33 1.00 1.83 1.00 1.00 shldw %cl, %si, (%rax)
+# CHECK-NEXT: - - 1.83 0.33 1.00 1.83 1.00 1.00 shrdw %cl, %si, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - shldw $7, %si, %di
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - shrdw $7, %si, %di
+# CHECK-NEXT: - - 0.83 0.33 1.00 0.83 1.00 1.00 shldw $7, %si, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 1.00 0.83 1.00 1.00 shrdw $7, %si, (%rax)
+# CHECK-NEXT: - - 1.83 0.33 - 1.83 - - shldl %cl, %esi, %edi
+# CHECK-NEXT: - - 1.83 0.33 - 1.83 - - shrdl %cl, %esi, %edi
+# CHECK-NEXT: - - 1.83 0.33 1.00 1.83 1.00 1.00 shldl %cl, %esi, (%rax)
+# CHECK-NEXT: - - 1.83 0.33 1.00 1.83 1.00 1.00 shrdl %cl, %esi, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - shldl $7, %esi, %edi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - shrdl $7, %esi, %edi
+# CHECK-NEXT: - - 0.83 0.33 1.00 0.83 1.00 1.00 shldl $7, %esi, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 1.00 0.83 1.00 1.00 shrdl $7, %esi, (%rax)
+# CHECK-NEXT: - - 1.83 0.33 - 1.83 - - shldq %cl, %rsi, %rdi
+# CHECK-NEXT: - - 1.83 0.33 - 1.83 - - shrdq %cl, %rsi, %rdi
+# CHECK-NEXT: - - 1.83 0.33 1.00 1.83 1.00 1.00 shldq %cl, %rsi, (%rax)
+# CHECK-NEXT: - - 1.83 0.33 1.00 1.83 1.00 1.00 shrdq %cl, %rsi, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - shldq $7, %rsi, %rdi
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - shrdq $7, %rsi, %rdi
+# CHECK-NEXT: - - 0.83 0.33 1.00 0.83 1.00 1.00 shldq $7, %rsi, (%rax)
+# CHECK-NEXT: - - 0.83 0.33 1.00 0.83 1.00 1.00 shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - stc
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - std
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 0.50 0.50 stosb %al, %es:(%rdi)
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 0.50 0.50 stosw %ax, %es:(%rdi)
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 0.50 0.50 stosl %eax, %es:(%rdi)
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 0.50 0.50 stosq %rax, %es:(%rdi)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subb $7, %al
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subb $7, %dil
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 subb $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subb %sil, %dil
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 subb %sil, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 subb (%rax), %dil
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subw $511, %ax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subw $511, %di
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 subw $511, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subw $7, %di
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 subw $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subw %si, %di
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 subw %si, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 subw (%rax), %di
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subl $665536, %eax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subl $665536, %edi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 subl $665536, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subl $7, %edi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 subl $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subl %esi, %edi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 subl %esi, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 subl (%rax), %edi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subq $665536, %rax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subq $665536, %rdi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 subq $665536, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subq $7, %rdi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 subq $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subq %rsi, %rdi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 subq %rsi, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 subq (%rax), %rdi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - testb $7, %al
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - testb $7, %dil
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 testb $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - testb %sil, %dil
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 testb %sil, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - testw $511, %ax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - testw $511, %di
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 testw $511, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - testw $7, %di
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 testw $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - testw %si, %di
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 testw %si, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - testl $665536, %eax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - testl $665536, %edi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 testl $665536, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - testl $7, %edi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 testl $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - testl %esi, %edi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 testl %esi, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - testq $665536, %rax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - testq $665536, %rdi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 testq $665536, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - testq $7, %rdi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 testq $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - testq %rsi, %rdi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 testq %rsi, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - ud2
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - xaddb %bl, %cl
+# CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 xaddb %bl, (%rcx)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - xaddw %bx, %cx
+# CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 xaddw %ax, (%rbx)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - xaddl %ebx, %ecx
+# CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 xaddl %eax, (%rbx)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - xaddq %rbx, %rcx
+# CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 xaddq %rax, (%rbx)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - xchgb %bl, %cl
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 xchgb %bl, (%rbx)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - xchgw %bx, %ax
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - xchgw %bx, %cx
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 xchgw %ax, (%rbx)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - xchgl %ebx, %eax
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - xchgl %ebx, %ecx
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 xchgl %eax, (%rbx)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - xchgq %rbx, %rax
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - xchgq %rbx, %rcx
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 xchgq %rax, (%rbx)
+# CHECK-NEXT: - - - - - - 0.50 0.50 xlatb
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorb $7, %al
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorb $7, %dil
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 xorb $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorb %sil, %dil
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 xorb %sil, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 xorb (%rax), %dil
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorw $511, %ax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorw $511, %di
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 xorw $511, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorw $7, %di
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 xorw $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorw %si, %di
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 xorw %si, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 xorw (%rax), %di
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorl $665536, %eax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorl $665536, %edi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 xorl $665536, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorl $7, %edi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 xorl $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorl %esi, %edi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 xorl %esi, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 xorl (%rax), %edi
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorq $665536, %rax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorq $665536, %rdi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 xorq $665536, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorq $7, %rdi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 xorq $7, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorq %rsi, %rdi
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 xorq %rsi, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 xorq (%rax), %rdi
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+f2xm1
+
+fabs
+
+fadd %st(0), %st(1)
+fadd %st(2)
+fadds (%ecx)
+faddl (%ecx)
+faddp %st(1)
+faddp %st(2)
+fiadds (%ecx)
+fiaddl (%ecx)
+
+fbld (%ecx)
+fbstp (%eax)
+
+fchs
+
+fnclex
+
+fcmovb %st(1), %st(0)
+fcmovbe %st(1), %st(0)
+fcmove %st(1), %st(0)
+fcmovnb %st(1), %st(0)
+fcmovnbe %st(1), %st(0)
+fcmovne %st(1), %st(0)
+fcmovnu %st(1), %st(0)
+fcmovu %st(1), %st(0)
+
+fcom %st(1)
+fcom %st(3)
+fcoms (%ecx)
+fcoml (%eax)
+fcomp %st(1)
+fcomp %st(3)
+fcomps (%ecx)
+fcompl (%eax)
+fcompp
+
+fcomi %st(3)
+fcompi %st(3)
+
+fcos
+
+fdecstp
+
+fdiv %st(0), %st(1)
+fdiv %st(2)
+fdivs (%ecx)
+fdivl (%eax)
+fdivp %st(1)
+fdivp %st(2)
+fidivs (%ecx)
+fidivl (%eax)
+
+fdivr %st(0), %st(1)
+fdivr %st(2)
+fdivrs (%ecx)
+fdivrl (%eax)
+fdivrp %st(1)
+fdivrp %st(2)
+fidivrs (%ecx)
+fidivrl (%eax)
+
+ffree %st(0)
+
+ficoms (%ecx)
+ficoml (%eax)
+ficomps (%ecx)
+ficompl (%eax)
+
+filds (%edx)
+fildl (%ecx)
+fildll (%eax)
+
+fincstp
+
+fninit
+
+fists (%edx)
+fistl (%ecx)
+fistps (%edx)
+fistpl (%ecx)
+fistpll (%eax)
+
+fisttps (%edx)
+fisttpl (%ecx)
+fisttpll (%eax)
+
+fld %st(0)
+flds (%edx)
+fldl (%ecx)
+fldt (%eax)
+
+fldcw (%eax)
+fldenv (%eax)
+
+fld1
+fldl2e
+fldl2t
+fldlg2
+fldln2
+fldpi
+fldz
+
+fmul %st(0), %st(1)
+fmul %st(2)
+fmuls (%ecx)
+fmull (%eax)
+fmulp %st(1)
+fmulp %st(2)
+fimuls (%ecx)
+fimull (%eax)
+
+fnop
+
+fpatan
+
+fprem
+fprem1
+
+fptan
+
+frndint
+
+frstor (%eax)
+
+fnsave (%eax)
+
+fscale
+
+fsin
+
+fsincos
+
+fsqrt
+
+fst %st(0)
+fsts (%edx)
+fstl (%ecx)
+fstp %st(0)
+fstpl (%edx)
+fstpl (%ecx)
+fstpt (%eax)
+
+fnstcw (%eax)
+fnstenv (%eax)
+fnstsw (%eax)
+
+frstor (%eax)
+fsave (%eax)
+
+fsub %st(0), %st(1)
+fsub %st(2)
+fsubs (%ecx)
+fsubl (%eax)
+fsubp %st(1)
+fsubp %st(2)
+fisubs (%ecx)
+fisubl (%eax)
+
+fsubr %st(0), %st(1)
+fsubr %st(2)
+fsubrs (%ecx)
+fsubrl (%eax)
+fsubrp %st(1)
+fsubrp %st(2)
+fisubrs (%ecx)
+fisubrl (%eax)
+
+ftst
+
+fucom %st(1)
+fucom %st(3)
+fucomp %st(1)
+fucomp %st(3)
+fucompp
+
+fucomi %st(3)
+fucompi %st(3)
+
+fwait
+
+fxam
+
+fxch %st(1)
+fxch %st(3)
+
+fxrstor (%eax)
+fxsave (%eax)
+
+fxtract
+
+fyl2x
+fyl2xp1
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 100 0.33 U f2xm1
+# CHECK-NEXT: 1 1 1.00 U fabs
+# CHECK-NEXT: 1 3 1.00 U fadd %st(0), %st(1)
+# CHECK-NEXT: 1 3 1.00 U fadd %st(2)
+# CHECK-NEXT: 2 10 1.00 * U fadds (%ecx)
+# CHECK-NEXT: 2 10 1.00 * U faddl (%ecx)
+# CHECK-NEXT: 1 3 1.00 U faddp %st(1)
+# CHECK-NEXT: 1 3 1.00 U faddp %st(2)
+# CHECK-NEXT: 3 13 2.00 * U fiadds (%ecx)
+# CHECK-NEXT: 3 13 2.00 * U fiaddl (%ecx)
+# CHECK-NEXT: 1 100 0.33 U fbld (%ecx)
+# CHECK-NEXT: 1 100 0.33 U fbstp (%eax)
+# CHECK-NEXT: 1 1 1.00 U fchs
+# CHECK-NEXT: 1 100 0.33 U fnclex
+# CHECK-NEXT: 3 3 2.00 U fcmovb %st(1), %st(0)
+# CHECK-NEXT: 3 3 2.00 U fcmovbe %st(1), %st(0)
+# CHECK-NEXT: 3 3 2.00 U fcmove %st(1), %st(0)
+# CHECK-NEXT: 3 3 2.00 U fcmovnb %st(1), %st(0)
+# CHECK-NEXT: 3 3 2.00 U fcmovnbe %st(1), %st(0)
+# CHECK-NEXT: 3 3 2.00 U fcmovne %st(1), %st(0)
+# CHECK-NEXT: 3 3 2.00 U fcmovnu %st(1), %st(0)
+# CHECK-NEXT: 3 3 2.00 U fcmovu %st(1), %st(0)
+# CHECK-NEXT: 1 1 1.00 U fcom %st(1)
+# CHECK-NEXT: 1 1 1.00 U fcom %st(3)
+# CHECK-NEXT: 2 8 1.00 U fcoms (%ecx)
+# CHECK-NEXT: 2 8 1.00 U fcoml (%eax)
+# CHECK-NEXT: 1 1 1.00 U fcomp %st(1)
+# CHECK-NEXT: 1 1 1.00 U fcomp %st(3)
+# CHECK-NEXT: 2 8 1.00 U fcomps (%ecx)
+# CHECK-NEXT: 2 8 1.00 U fcompl (%eax)
+# CHECK-NEXT: 1 100 0.33 U fcompp
+# CHECK-NEXT: 3 3 1.00 U fcomi %st(3)
+# CHECK-NEXT: 3 3 1.00 U fcompi %st(3)
+# CHECK-NEXT: 1 100 0.33 U fcos
+# CHECK-NEXT: 1 1 1.00 U fdecstp
+# CHECK-NEXT: 1 14 14.00 U fdiv %st(0), %st(1)
+# CHECK-NEXT: 1 14 14.00 U fdiv %st(2)
+# CHECK-NEXT: 2 31 1.00 * U fdivs (%ecx)
+# CHECK-NEXT: 2 31 1.00 * U fdivl (%eax)
+# CHECK-NEXT: 1 14 14.00 U fdivp %st(1)
+# CHECK-NEXT: 1 14 14.00 U fdivp %st(2)
+# CHECK-NEXT: 3 34 1.00 * U fidivs (%ecx)
+# CHECK-NEXT: 3 34 1.00 * U fidivl (%eax)
+# CHECK-NEXT: 1 14 14.00 U fdivr %st(0), %st(1)
+# CHECK-NEXT: 1 14 14.00 U fdivr %st(2)
+# CHECK-NEXT: 2 31 1.00 * U fdivrs (%ecx)
+# CHECK-NEXT: 2 31 1.00 * U fdivrl (%eax)
+# CHECK-NEXT: 1 14 14.00 U fdivrp %st(1)
+# CHECK-NEXT: 1 14 14.00 U fdivrp %st(2)
+# CHECK-NEXT: 3 34 1.00 * U fidivrs (%ecx)
+# CHECK-NEXT: 3 34 1.00 * U fidivrl (%eax)
+# CHECK-NEXT: 1 1 1.00 U ffree %st(0)
+# CHECK-NEXT: 3 11 2.00 U ficoms (%ecx)
+# CHECK-NEXT: 3 11 2.00 U ficoml (%eax)
+# CHECK-NEXT: 3 11 2.00 U ficomps (%ecx)
+# CHECK-NEXT: 3 11 2.00 U ficompl (%eax)
+# CHECK-NEXT: 2 10 1.00 * U filds (%edx)
+# CHECK-NEXT: 2 10 1.00 * U fildl (%ecx)
+# CHECK-NEXT: 2 10 1.00 * U fildll (%eax)
+# CHECK-NEXT: 1 1 1.00 U fincstp
+# CHECK-NEXT: 4 5 1.33 U fninit
+# CHECK-NEXT: 4 9 1.00 * U fists (%edx)
+# CHECK-NEXT: 4 9 1.00 * U fistl (%ecx)
+# CHECK-NEXT: 4 9 1.00 * U fistps (%edx)
+# CHECK-NEXT: 4 9 1.00 * U fistpl (%ecx)
+# CHECK-NEXT: 4 9 1.00 * U fistpll (%eax)
+# CHECK-NEXT: 3 5 1.00 * U fisttps (%edx)
+# CHECK-NEXT: 3 5 1.00 * U fisttpl (%ecx)
+# CHECK-NEXT: 3 5 1.00 * U fisttpll (%eax)
+# CHECK-NEXT: 1 1 1.00 U fld %st(0)
+# CHECK-NEXT: 3 9 1.00 * U flds (%edx)
+# CHECK-NEXT: 3 9 1.00 * U fldl (%ecx)
+# CHECK-NEXT: 3 9 1.00 * U fldt (%eax)
+# CHECK-NEXT: 5 8 2.00 * U fldcw (%eax)
+# CHECK-NEXT: 1 100 0.33 U fldenv (%eax)
+# CHECK-NEXT: 2 1 1.00 U fld1
+# CHECK-NEXT: 2 1 1.00 U fldl2e
+# CHECK-NEXT: 2 1 1.00 U fldl2t
+# CHECK-NEXT: 2 1 1.00 U fldlg2
+# CHECK-NEXT: 2 1 1.00 U fldln2
+# CHECK-NEXT: 2 1 1.00 U fldpi
+# CHECK-NEXT: 1 1 1.00 U fldz
+# CHECK-NEXT: 1 5 1.00 U fmul %st(0), %st(1)
+# CHECK-NEXT: 1 5 1.00 U fmul %st(2)
+# CHECK-NEXT: 2 12 1.00 * U fmuls (%ecx)
+# CHECK-NEXT: 2 12 1.00 * U fmull (%eax)
+# CHECK-NEXT: 1 5 1.00 U fmulp %st(1)
+# CHECK-NEXT: 1 5 1.00 U fmulp %st(2)
+# CHECK-NEXT: 3 15 1.00 * U fimuls (%ecx)
+# CHECK-NEXT: 3 15 1.00 * U fimull (%eax)
+# CHECK-NEXT: 1 1 1.00 U fnop
+# CHECK-NEXT: 1 100 0.33 U fpatan
+# CHECK-NEXT: 1 100 0.33 U fprem
+# CHECK-NEXT: 1 100 0.33 U fprem1
+# CHECK-NEXT: 1 100 0.33 U fptan
+# CHECK-NEXT: 1 100 0.33 U frndint
+# CHECK-NEXT: 1 100 0.33 U frstor (%eax)
+# CHECK-NEXT: 1 100 0.33 U fnsave (%eax)
+# CHECK-NEXT: 1 100 0.33 U fscale
+# CHECK-NEXT: 1 100 0.33 U fsin
+# CHECK-NEXT: 1 100 0.33 U fsincos
+# CHECK-NEXT: 1 24 24.00 U fsqrt
+# CHECK-NEXT: 1 1 1.00 U fst %st(0)
+# CHECK-NEXT: 3 6 1.00 * U fsts (%edx)
+# CHECK-NEXT: 3 6 1.00 * U fstl (%ecx)
+# CHECK-NEXT: 1 1 1.00 U fstp %st(0)
+# CHECK-NEXT: 3 6 1.00 * U fstpl (%edx)
+# CHECK-NEXT: 3 6 1.00 * U fstpl (%ecx)
+# CHECK-NEXT: 3 6 1.00 * U fstpt (%eax)
+# CHECK-NEXT: 4 7 1.00 * U fnstcw (%eax)
+# CHECK-NEXT: 1 100 0.33 U fnstenv (%eax)
+# CHECK-NEXT: 4 7 1.00 U fnstsw (%eax)
+# CHECK-NEXT: 1 100 0.33 U frstor (%eax)
+# CHECK-NEXT: 1 100 0.33 U wait
+# CHECK-NEXT: 1 100 0.33 U fnsave (%eax)
+# CHECK-NEXT: 1 3 1.00 U fsub %st(0), %st(1)
+# CHECK-NEXT: 1 3 1.00 U fsub %st(2)
+# CHECK-NEXT: 2 10 1.00 * U fsubs (%ecx)
+# CHECK-NEXT: 2 10 1.00 * U fsubl (%eax)
+# CHECK-NEXT: 1 3 1.00 U fsubp %st(1)
+# CHECK-NEXT: 1 3 1.00 U fsubp %st(2)
+# CHECK-NEXT: 3 13 2.00 * U fisubs (%ecx)
+# CHECK-NEXT: 3 13 2.00 * U fisubl (%eax)
+# CHECK-NEXT: 1 3 1.00 U fsubr %st(0), %st(1)
+# CHECK-NEXT: 1 3 1.00 U fsubr %st(2)
+# CHECK-NEXT: 2 10 1.00 * U fsubrs (%ecx)
+# CHECK-NEXT: 2 10 1.00 * U fsubrl (%eax)
+# CHECK-NEXT: 1 3 1.00 U fsubrp %st(1)
+# CHECK-NEXT: 1 3 1.00 U fsubrp %st(2)
+# CHECK-NEXT: 3 13 2.00 * U fisubrs (%ecx)
+# CHECK-NEXT: 3 13 2.00 * U fisubrl (%eax)
+# CHECK-NEXT: 1 3 1.00 U ftst
+# CHECK-NEXT: 1 1 1.00 U fucom %st(1)
+# CHECK-NEXT: 1 1 1.00 U fucom %st(3)
+# CHECK-NEXT: 1 1 1.00 U fucomp %st(1)
+# CHECK-NEXT: 1 1 1.00 U fucomp %st(3)
+# CHECK-NEXT: 1 3 1.00 U fucompp
+# CHECK-NEXT: 3 3 1.00 U fucomi %st(3)
+# CHECK-NEXT: 3 3 1.00 U fucompi %st(3)
+# CHECK-NEXT: 1 100 0.33 U wait
+# CHECK-NEXT: 1 100 0.33 U fxam
+# CHECK-NEXT: 1 1 0.33 U fxch %st(1)
+# CHECK-NEXT: 1 1 0.33 U fxch %st(3)
+# CHECK-NEXT: 5 5 2.00 * * U fxrstor (%eax)
+# CHECK-NEXT: 1 100 0.33 * * U fxsave (%eax)
+# CHECK-NEXT: 1 100 0.33 U fxtract
+# CHECK-NEXT: 1 100 0.33 U fyl2x
+# CHECK-NEXT: 1 100 0.33 U fyl2xp1
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - 136.00 52.67 90.67 17.00 54.67 34.00 34.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - f2xm1
+# CHECK-NEXT: - - - - - 1.00 - - fabs
+# CHECK-NEXT: - - - 1.00 - - - - fadd %st(0), %st(1)
+# CHECK-NEXT: - - - 1.00 - - - - fadd %st(2)
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fadds (%ecx)
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 faddl (%ecx)
+# CHECK-NEXT: - - - 1.00 - - - - faddp %st(1)
+# CHECK-NEXT: - - - 1.00 - - - - faddp %st(2)
+# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fiadds (%ecx)
+# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fiaddl (%ecx)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fbld (%ecx)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fbstp (%eax)
+# CHECK-NEXT: - - - - - 1.00 - - fchs
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fnclex
+# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovb %st(1), %st(0)
+# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovbe %st(1), %st(0)
+# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmove %st(1), %st(0)
+# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnb %st(1), %st(0)
+# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnbe %st(1), %st(0)
+# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovne %st(1), %st(0)
+# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnu %st(1), %st(0)
+# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovu %st(1), %st(0)
+# CHECK-NEXT: - - - 1.00 - - - - fcom %st(1)
+# CHECK-NEXT: - - - 1.00 - - - - fcom %st(3)
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fcoms (%ecx)
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fcoml (%eax)
+# CHECK-NEXT: - - - 1.00 - - - - fcomp %st(1)
+# CHECK-NEXT: - - - 1.00 - - - - fcomp %st(3)
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fcomps (%ecx)
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fcompl (%eax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fcompp
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fcomi %st(3)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fcompi %st(3)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fcos
+# CHECK-NEXT: - - - - - 1.00 - - fdecstp
+# CHECK-NEXT: - 14.00 1.00 - - - - - fdiv %st(0), %st(1)
+# CHECK-NEXT: - 14.00 1.00 - - - - - fdiv %st(2)
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivs (%ecx)
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivl (%eax)
+# CHECK-NEXT: - 14.00 1.00 - - - - - fdivp %st(1)
+# CHECK-NEXT: - 14.00 1.00 - - - - - fdivp %st(2)
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivs (%ecx)
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivl (%eax)
+# CHECK-NEXT: - 14.00 1.00 - - - - - fdivr %st(0), %st(1)
+# CHECK-NEXT: - 14.00 1.00 - - - - - fdivr %st(2)
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivrs (%ecx)
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivrl (%eax)
+# CHECK-NEXT: - 14.00 1.00 - - - - - fdivrp %st(1)
+# CHECK-NEXT: - 14.00 1.00 - - - - - fdivrp %st(2)
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivrs (%ecx)
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivrl (%eax)
+# CHECK-NEXT: - - - - - 1.00 - - ffree %st(0)
+# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 ficoms (%ecx)
+# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 ficoml (%eax)
+# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 ficomps (%ecx)
+# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 ficompl (%eax)
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 filds (%edx)
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fildl (%ecx)
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fildll (%eax)
+# CHECK-NEXT: - - - - - 1.00 - - fincstp
+# CHECK-NEXT: - - 1.00 1.00 - 2.00 - - fninit
+# CHECK-NEXT: - - - 1.00 1.00 - 1.00 1.00 fists (%edx)
+# CHECK-NEXT: - - - 1.00 1.00 - 1.00 1.00 fistl (%ecx)
+# CHECK-NEXT: - - - 1.00 1.00 - 1.00 1.00 fistps (%edx)
+# CHECK-NEXT: - - - 1.00 1.00 - 1.00 1.00 fistpl (%ecx)
+# CHECK-NEXT: - - - 1.00 1.00 - 1.00 1.00 fistpll (%eax)
+# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 fisttps (%edx)
+# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 fisttpl (%ecx)
+# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 fisttpll (%eax)
+# CHECK-NEXT: - - - - - 1.00 - - fld %st(0)
+# CHECK-NEXT: - - 0.50 0.50 - 1.00 0.50 0.50 flds (%edx)
+# CHECK-NEXT: - - 0.50 0.50 - 1.00 0.50 0.50 fldl (%ecx)
+# CHECK-NEXT: - - 0.50 0.50 - 1.00 0.50 0.50 fldt (%eax)
+# CHECK-NEXT: - - - - 1.00 2.00 1.00 1.00 fldcw (%eax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fldenv (%eax)
+# CHECK-NEXT: - - 1.00 - - 1.00 - - fld1
+# CHECK-NEXT: - - 1.00 1.00 - - - - fldl2e
+# CHECK-NEXT: - - 1.00 1.00 - - - - fldl2t
+# CHECK-NEXT: - - 1.00 1.00 - - - - fldlg2
+# CHECK-NEXT: - - 1.00 1.00 - - - - fldln2
+# CHECK-NEXT: - - 1.00 1.00 - - - - fldpi
+# CHECK-NEXT: - - - - - 1.00 - - fldz
+# CHECK-NEXT: - - 1.00 - - - - - fmul %st(0), %st(1)
+# CHECK-NEXT: - - 1.00 - - - - - fmul %st(2)
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fmuls (%ecx)
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fmull (%eax)
+# CHECK-NEXT: - - 1.00 - - - - - fmulp %st(1)
+# CHECK-NEXT: - - 1.00 - - - - - fmulp %st(2)
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fimuls (%ecx)
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fimull (%eax)
+# CHECK-NEXT: - - - - - 1.00 - - fnop
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fpatan
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fprem
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fprem1
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fptan
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - frndint
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - frstor (%eax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fnsave (%eax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fscale
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fsin
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fsincos
+# CHECK-NEXT: - 24.00 1.00 - - - - - fsqrt
+# CHECK-NEXT: - - - - - 1.00 - - fst %st(0)
+# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 fsts (%edx)
+# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 fstl (%ecx)
+# CHECK-NEXT: - - - - - 1.00 - - fstp %st(0)
+# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 fstpl (%edx)
+# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 fstpl (%ecx)
+# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 fstpt (%eax)
+# CHECK-NEXT: - - - - 1.00 1.00 1.00 1.00 fnstcw (%eax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fnstenv (%eax)
+# CHECK-NEXT: - - 1.00 - 1.00 - 1.00 1.00 fnstsw (%eax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - frstor (%eax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - wait
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fnsave (%eax)
+# CHECK-NEXT: - - - 1.00 - - - - fsub %st(0), %st(1)
+# CHECK-NEXT: - - - 1.00 - - - - fsub %st(2)
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fsubs (%ecx)
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fsubl (%eax)
+# CHECK-NEXT: - - - 1.00 - - - - fsubp %st(1)
+# CHECK-NEXT: - - - 1.00 - - - - fsubp %st(2)
+# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fisubs (%ecx)
+# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fisubl (%eax)
+# CHECK-NEXT: - - - 1.00 - - - - fsubr %st(0), %st(1)
+# CHECK-NEXT: - - - 1.00 - - - - fsubr %st(2)
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fsubrs (%ecx)
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fsubrl (%eax)
+# CHECK-NEXT: - - - 1.00 - - - - fsubrp %st(1)
+# CHECK-NEXT: - - - 1.00 - - - - fsubrp %st(2)
+# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fisubrs (%ecx)
+# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fisubrl (%eax)
+# CHECK-NEXT: - - - 1.00 - - - - ftst
+# CHECK-NEXT: - - - 1.00 - - - - fucom %st(1)
+# CHECK-NEXT: - - - 1.00 - - - - fucom %st(3)
+# CHECK-NEXT: - - - 1.00 - - - - fucomp %st(1)
+# CHECK-NEXT: - - - 1.00 - - - - fucomp %st(3)
+# CHECK-NEXT: - - - 1.00 - - - - fucompp
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fucomi %st(3)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fucompi %st(3)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - wait
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fxam
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fxch %st(1)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fxch %st(3)
+# CHECK-NEXT: - - 0.50 0.50 1.00 2.00 0.50 0.50 fxrstor (%eax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fxsave (%eax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fxtract
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fyl2x
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fyl2xp1
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+vfrczpd %xmm0, %xmm3
+vfrczpd (%rax), %xmm3
+
+vfrczpd %ymm0, %ymm3
+vfrczpd (%rax), %ymm3
+
+vfrczps %xmm0, %xmm3
+vfrczps (%rax), %xmm3
+
+vfrczps %ymm0, %ymm3
+vfrczps (%rax), %ymm3
+
+vfrczsd %xmm0, %xmm3
+vfrczsd (%rax), %xmm3
+
+vfrczss %xmm0, %xmm3
+vfrczss (%rax), %xmm3
+
+vpcmov %xmm0, %xmm1, %xmm2, %xmm3
+vpcmov (%rax), %xmm0, %xmm1, %xmm3
+vpcmov %xmm0, (%rax), %xmm1, %xmm3
+
+vpcmov %ymm0, %ymm1, %ymm2, %ymm3
+vpcmov (%rax), %ymm0, %ymm1, %ymm3
+vpcmov %ymm0, (%rax), %ymm1, %ymm3
+
+vpcomb $0, %xmm0, %xmm1, %xmm3
+vpcomb $0, (%rax), %xmm0, %xmm3
+
+vpcomd $0, %xmm0, %xmm1, %xmm3
+vpcomd $0, (%rax), %xmm0, %xmm3
+
+vpcomq $0, %xmm0, %xmm1, %xmm3
+vpcomq $0, (%rax), %xmm0, %xmm3
+
+vpcomub $0, %xmm0, %xmm1, %xmm3
+vpcomub $0, (%rax), %xmm0, %xmm3
+
+vpcomud $0, %xmm0, %xmm1, %xmm3
+vpcomud $0, (%rax), %xmm0, %xmm3
+
+vpcomuq $0, %xmm0, %xmm1, %xmm3
+vpcomuq $0, (%rax), %xmm0, %xmm3
+
+vpcomuw $0, %xmm0, %xmm1, %xmm3
+vpcomuw $0, (%rax), %xmm0, %xmm3
+
+vpcomw $0, %xmm0, %xmm1, %xmm3
+vpcomw $0, (%rax), %xmm0, %xmm3
+
+vpermil2pd $0, %xmm0, %xmm1, %xmm2, %xmm3
+vpermil2pd $0, (%rax), %xmm0, %xmm1, %xmm3
+vpermil2pd $0, %xmm0, (%rax), %xmm1, %xmm3
+
+vpermil2pd $0, %ymm0, %ymm1, %ymm2, %ymm3
+vpermil2pd $0, (%rax), %ymm0, %ymm1, %ymm3
+vpermil2pd $0, %ymm0, (%rax), %ymm1, %ymm3
+
+vpermil2ps $0, %xmm0, %xmm1, %xmm2, %xmm3
+vpermil2ps $0, (%rax), %xmm0, %xmm1, %xmm3
+vpermil2ps $0, %xmm0, (%rax), %xmm1, %xmm3
+
+vpermil2ps $0, %ymm0, %ymm1, %ymm2, %ymm3
+vpermil2ps $0, (%rax), %ymm0, %ymm1, %ymm3
+vpermil2ps $0, %ymm0, (%rax), %ymm1, %ymm3
+
+vphaddbd %xmm0, %xmm3
+vphaddbd (%rax), %xmm3
+
+vphaddbq %xmm0, %xmm3
+vphaddbq (%rax), %xmm3
+
+vphaddbw %xmm0, %xmm3
+vphaddbw (%rax), %xmm3
+
+vphadddq %xmm0, %xmm3
+vphadddq (%rax), %xmm3
+
+vphaddubd %xmm0, %xmm3
+vphaddubd (%rax), %xmm3
+
+vphaddubq %xmm0, %xmm3
+vphaddubq (%rax), %xmm3
+
+vphaddubw %xmm0, %xmm3
+vphaddubw (%rax), %xmm3
+
+vphaddudq %xmm0, %xmm3
+vphaddudq (%rax), %xmm3
+
+vphadduwd %xmm0, %xmm3
+vphadduwd (%rax), %xmm3
+
+vphadduwq %xmm0, %xmm3
+vphadduwq (%rax), %xmm3
+
+vphaddwd %xmm0, %xmm3
+vphaddwd (%rax), %xmm3
+
+vphaddwq %xmm0, %xmm3
+vphaddwq (%rax), %xmm3
+
+vphsubbw %xmm0, %xmm3
+vphsubbw (%rax), %xmm3
+
+vphsubdq %xmm0, %xmm3
+vphsubdq (%rax), %xmm3
+
+vphsubwd %xmm0, %xmm3
+vphsubwd (%rax), %xmm3
+
+vpmacsdd %xmm0, %xmm1, %xmm2, %xmm3
+vpmacsdd %xmm0, (%rax), %xmm1, %xmm3
+
+vpmacsdqh %xmm0, %xmm1, %xmm2, %xmm3
+vpmacsdqh %xmm0, (%rax), %xmm1, %xmm3
+
+vpmacsdql %xmm0, %xmm1, %xmm2, %xmm3
+vpmacsdql %xmm0, (%rax), %xmm1, %xmm3
+
+vpmacssdd %xmm0, %xmm1, %xmm2, %xmm3
+vpmacssdd %xmm0, (%rax), %xmm1, %xmm3
+
+vpmacssdqh %xmm0, %xmm1, %xmm2, %xmm3
+vpmacssdqh %xmm0, (%rax), %xmm1, %xmm3
+
+vpmacssdql %xmm0, %xmm1, %xmm2, %xmm3
+vpmacssdql %xmm0, (%rax), %xmm1, %xmm3
+
+vpmacsswd %xmm0, %xmm1, %xmm2, %xmm3
+vpmacsswd %xmm0, (%rax), %xmm1, %xmm3
+
+vpmacssww %xmm0, %xmm1, %xmm2, %xmm3
+vpmacssww %xmm0, (%rax), %xmm1, %xmm3
+
+vpmacswd %xmm0, %xmm1, %xmm2, %xmm3
+vpmacswd %xmm0, (%rax), %xmm1, %xmm3
+
+vpmacsww %xmm0, %xmm1, %xmm2, %xmm3
+vpmacsww %xmm0, (%rax), %xmm1, %xmm3
+
+vpmadcsswd %xmm0, %xmm1, %xmm2, %xmm3
+vpmadcsswd %xmm0, (%rax), %xmm1, %xmm3
+
+vpmadcswd %xmm0, %xmm1, %xmm2, %xmm3
+vpmadcswd %xmm0, (%rax), %xmm1, %xmm3
+
+vpperm %xmm0, %xmm1, %xmm2, %xmm3
+vpperm (%rax), %xmm0, %xmm1, %xmm3
+vpperm %xmm0, (%rax), %xmm1, %xmm3
+
+vprotb %xmm0, %xmm1, %xmm3
+vprotb (%rax), %xmm0, %xmm3
+vprotb %xmm0, (%rax), %xmm3
+
+vprotb $0, %xmm0, %xmm3
+vprotb $0, (%rax), %xmm3
+
+vprotd %xmm0, %xmm1, %xmm3
+vprotd (%rax), %xmm0, %xmm3
+vprotd %xmm0, (%rax), %xmm3
+
+vprotd $0, %xmm0, %xmm3
+vprotd $0, (%rax), %xmm3
+
+vprotq %xmm0, %xmm1, %xmm3
+vprotq (%rax), %xmm0, %xmm3
+vprotq %xmm0, (%rax), %xmm3
+
+vprotq $0, %xmm0, %xmm3
+vprotq $0, (%rax), %xmm3
+
+vprotw %xmm0, %xmm1, %xmm3
+vprotw (%rax), %xmm0, %xmm3
+vprotw %xmm0, (%rax), %xmm3
+
+vprotw $0, %xmm0, %xmm3
+vprotw $0, (%rax), %xmm3
+
+vpshab %xmm0, %xmm1, %xmm3
+vpshab (%rax), %xmm0, %xmm3
+vpshab %xmm0, (%rax), %xmm3
+
+vpshad %xmm0, %xmm1, %xmm3
+vpshad (%rax), %xmm0, %xmm3
+vpshad %xmm0, (%rax), %xmm3
+
+vpshaq %xmm0, %xmm1, %xmm3
+vpshaq (%rax), %xmm0, %xmm3
+vpshaq %xmm0, (%rax), %xmm3
+
+vpshaw %xmm0, %xmm1, %xmm3
+vpshaw (%rax), %xmm0, %xmm3
+vpshaw %xmm0, (%rax), %xmm3
+
+vpshlb %xmm0, %xmm1, %xmm3
+vpshlb (%rax), %xmm0, %xmm3
+vpshlb %xmm0, (%rax), %xmm3
+
+vpshld %xmm0, %xmm1, %xmm3
+vpshld (%rax), %xmm0, %xmm3
+vpshld %xmm0, (%rax), %xmm3
+
+vpshlq %xmm0, %xmm1, %xmm3
+vpshlq (%rax), %xmm0, %xmm3
+vpshlq %xmm0, (%rax), %xmm3
+
+vpshlw %xmm0, %xmm1, %xmm3
+vpshlw (%rax), %xmm0, %xmm3
+vpshlw %xmm0, (%rax), %xmm3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 vfrczpd %xmm0, %xmm3
+# CHECK-NEXT: 2 9 1.00 * vfrczpd (%rax), %xmm3
+# CHECK-NEXT: 1 3 1.00 vfrczpd %ymm0, %ymm3
+# CHECK-NEXT: 2 10 1.00 * vfrczpd (%rax), %ymm3
+# CHECK-NEXT: 1 3 1.00 vfrczps %xmm0, %xmm3
+# CHECK-NEXT: 2 9 1.00 * vfrczps (%rax), %xmm3
+# CHECK-NEXT: 1 3 1.00 vfrczps %ymm0, %ymm3
+# CHECK-NEXT: 2 10 1.00 * vfrczps (%rax), %ymm3
+# CHECK-NEXT: 1 3 1.00 vfrczsd %xmm0, %xmm3
+# CHECK-NEXT: 2 9 1.00 * vfrczsd (%rax), %xmm3
+# CHECK-NEXT: 1 3 1.00 vfrczss %xmm0, %xmm3
+# CHECK-NEXT: 2 9 1.00 * vfrczss (%rax), %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcmov %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 7 0.50 * vpcmov (%rax), %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 0.50 * vpcmov %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: 1 1 1.00 vpcmov %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 8 1.00 * vpcmov (%rax), %ymm0, %ymm1, %ymm3
+# CHECK-NEXT: 2 8 1.00 * vpcmov %ymm0, (%rax), %ymm1, %ymm3
+# CHECK-NEXT: 1 1 0.50 vpcomb $0, %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 0.50 * vpcomb $0, (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcomd $0, %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 0.50 * vpcomd $0, (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcomq $0, %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 0.50 * vpcomq $0, (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcomub $0, %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 0.50 * vpcomub $0, (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcomud $0, %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 0.50 * vpcomud $0, (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcomuq $0, %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 0.50 * vpcomuq $0, (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcomuw $0, %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 0.50 * vpcomuw $0, (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcomw $0, %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 0.50 * vpcomw $0, (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 1 1 1.00 vpermil2pd $0, %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpermil2pd $0, (%rax), %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpermil2pd $0, %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: 1 1 1.00 vpermil2pd $0, %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 8 1.00 * vpermil2pd $0, (%rax), %ymm0, %ymm1, %ymm3
+# CHECK-NEXT: 2 8 1.00 * vpermil2pd $0, %ymm0, (%rax), %ymm1, %ymm3
+# CHECK-NEXT: 1 1 1.00 vpermil2ps $0, %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpermil2ps $0, (%rax), %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpermil2ps $0, %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: 1 1 1.00 vpermil2ps $0, %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 2 8 1.00 * vpermil2ps $0, (%rax), %ymm0, %ymm1, %ymm3
+# CHECK-NEXT: 2 8 1.00 * vpermil2ps $0, %ymm0, (%rax), %ymm1, %ymm3
+# CHECK-NEXT: 3 3 1.50 vphaddbd %xmm0, %xmm3
+# CHECK-NEXT: 4 9 1.50 * vphaddbd (%rax), %xmm3
+# CHECK-NEXT: 3 3 1.50 vphaddbq %xmm0, %xmm3
+# CHECK-NEXT: 4 9 1.50 * vphaddbq (%rax), %xmm3
+# CHECK-NEXT: 3 3 1.50 vphaddbw %xmm0, %xmm3
+# CHECK-NEXT: 4 9 1.50 * vphaddbw (%rax), %xmm3
+# CHECK-NEXT: 3 3 1.50 vphadddq %xmm0, %xmm3
+# CHECK-NEXT: 4 9 1.50 * vphadddq (%rax), %xmm3
+# CHECK-NEXT: 3 3 1.50 vphaddubd %xmm0, %xmm3
+# CHECK-NEXT: 4 9 1.50 * vphaddubd (%rax), %xmm3
+# CHECK-NEXT: 3 3 1.50 vphaddubq %xmm0, %xmm3
+# CHECK-NEXT: 4 9 1.50 * vphaddubq (%rax), %xmm3
+# CHECK-NEXT: 3 3 1.50 vphaddubw %xmm0, %xmm3
+# CHECK-NEXT: 4 9 1.50 * vphaddubw (%rax), %xmm3
+# CHECK-NEXT: 3 3 1.50 vphaddudq %xmm0, %xmm3
+# CHECK-NEXT: 4 9 1.50 * vphaddudq (%rax), %xmm3
+# CHECK-NEXT: 3 3 1.50 vphadduwd %xmm0, %xmm3
+# CHECK-NEXT: 4 9 1.50 * vphadduwd (%rax), %xmm3
+# CHECK-NEXT: 3 3 1.50 vphadduwq %xmm0, %xmm3
+# CHECK-NEXT: 4 9 1.50 * vphadduwq (%rax), %xmm3
+# CHECK-NEXT: 3 3 1.50 vphaddwd %xmm0, %xmm3
+# CHECK-NEXT: 4 9 1.50 * vphaddwd (%rax), %xmm3
+# CHECK-NEXT: 3 3 1.50 vphaddwq %xmm0, %xmm3
+# CHECK-NEXT: 4 9 1.50 * vphaddwq (%rax), %xmm3
+# CHECK-NEXT: 3 3 1.50 vphsubbw %xmm0, %xmm3
+# CHECK-NEXT: 4 9 1.50 * vphsubbw (%rax), %xmm3
+# CHECK-NEXT: 3 3 1.50 vphsubdq %xmm0, %xmm3
+# CHECK-NEXT: 4 9 1.50 * vphsubdq (%rax), %xmm3
+# CHECK-NEXT: 3 3 1.50 vphsubwd %xmm0, %xmm3
+# CHECK-NEXT: 4 9 1.50 * vphsubwd (%rax), %xmm3
+# CHECK-NEXT: 1 5 1.00 vpmacsdd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 11 1.00 * vpmacsdd %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: 1 5 1.00 vpmacsdqh %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 11 1.00 * vpmacsdqh %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: 1 5 1.00 vpmacsdql %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 11 1.00 * vpmacsdql %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: 1 5 1.00 vpmacssdd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 11 1.00 * vpmacssdd %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: 1 5 1.00 vpmacssdqh %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 11 1.00 * vpmacssdqh %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: 1 5 1.00 vpmacssdql %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 11 1.00 * vpmacssdql %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: 1 5 1.00 vpmacsswd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 11 1.00 * vpmacsswd %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: 1 5 1.00 vpmacssww %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 11 1.00 * vpmacssww %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: 1 5 1.00 vpmacswd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 11 1.00 * vpmacswd %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: 1 5 1.00 vpmacsww %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 11 1.00 * vpmacsww %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: 1 5 1.00 vpmadcsswd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 11 1.00 * vpmadcsswd %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: 1 5 1.00 vpmadcswd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 11 1.00 * vpmadcswd %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpperm %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 2 7 0.50 * vpperm (%rax), %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 0.50 * vpperm %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: 1 1 1.00 vprotb %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vprotb (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vprotb %xmm0, (%rax), %xmm3
+# CHECK-NEXT: 1 1 1.00 vprotb $0, %xmm0, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vprotb $0, (%rax), %xmm3
+# CHECK-NEXT: 1 1 1.00 vprotd %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vprotd (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vprotd %xmm0, (%rax), %xmm3
+# CHECK-NEXT: 1 1 1.00 vprotd $0, %xmm0, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vprotd $0, (%rax), %xmm3
+# CHECK-NEXT: 1 1 1.00 vprotq %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vprotq (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vprotq %xmm0, (%rax), %xmm3
+# CHECK-NEXT: 1 1 1.00 vprotq $0, %xmm0, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vprotq $0, (%rax), %xmm3
+# CHECK-NEXT: 1 1 1.00 vprotw %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vprotw (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vprotw %xmm0, (%rax), %xmm3
+# CHECK-NEXT: 1 1 1.00 vprotw $0, %xmm0, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vprotw $0, (%rax), %xmm3
+# CHECK-NEXT: 1 1 1.00 vpshab %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpshab (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpshab %xmm0, (%rax), %xmm3
+# CHECK-NEXT: 1 1 1.00 vpshad %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpshad (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpshad %xmm0, (%rax), %xmm3
+# CHECK-NEXT: 1 1 1.00 vpshaq %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpshaq (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpshaq %xmm0, (%rax), %xmm3
+# CHECK-NEXT: 1 1 1.00 vpshaw %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpshaw (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpshaw %xmm0, (%rax), %xmm3
+# CHECK-NEXT: 1 1 1.00 vpshlb %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpshlb (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpshlb %xmm0, (%rax), %xmm3
+# CHECK-NEXT: 1 1 1.00 vpshld %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpshld (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpshld %xmm0, (%rax), %xmm3
+# CHECK-NEXT: 1 1 1.00 vpshlq %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpshlq (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpshlq %xmm0, (%rax), %xmm3
+# CHECK-NEXT: 1 1 1.00 vpshlw %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpshlw (%rax), %xmm0, %xmm3
+# CHECK-NEXT: 2 7 1.00 * vpshlw %xmm0, (%rax), %xmm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 68.00 68.00 - 71.00 41.50 41.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - vfrczpd %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfrczpd (%rax), %xmm3
+# CHECK-NEXT: - - - 1.00 - - - - vfrczpd %ymm0, %ymm3
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfrczpd (%rax), %ymm3
+# CHECK-NEXT: - - - 1.00 - - - - vfrczps %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfrczps (%rax), %xmm3
+# CHECK-NEXT: - - - 1.00 - - - - vfrczps %ymm0, %ymm3
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfrczps (%rax), %ymm3
+# CHECK-NEXT: - - - 1.00 - - - - vfrczsd %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfrczsd (%rax), %xmm3
+# CHECK-NEXT: - - - 1.00 - - - - vfrczss %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfrczss (%rax), %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmov %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmov (%rax), %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmov %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: - - - - - 1.00 - - vpcmov %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpcmov (%rax), %ymm0, %ymm1, %ymm3
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpcmov %ymm0, (%rax), %ymm1, %ymm3
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcomb $0, %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcomb $0, (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcomd $0, %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcomd $0, (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcomq $0, %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcomq $0, (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcomub $0, %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcomub $0, (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcomud $0, %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcomud $0, (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcomuq $0, %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcomuq $0, (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcomuw $0, %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcomuw $0, (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcomw $0, %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcomw $0, (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - - - - 1.00 - - vpermil2pd $0, %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermil2pd $0, (%rax), %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermil2pd $0, %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: - - - - - 1.00 - - vpermil2pd $0, %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermil2pd $0, (%rax), %ymm0, %ymm1, %ymm3
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermil2pd $0, %ymm0, (%rax), %ymm1, %ymm3
+# CHECK-NEXT: - - - - - 1.00 - - vpermil2ps $0, %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermil2ps $0, (%rax), %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermil2ps $0, %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: - - - - - 1.00 - - vpermil2ps $0, %ymm0, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermil2ps $0, (%rax), %ymm0, %ymm1, %ymm3
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermil2ps $0, %ymm0, (%rax), %ymm1, %ymm3
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddbd %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddbd (%rax), %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddbq %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddbq (%rax), %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddbw %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddbw (%rax), %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphadddq %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphadddq (%rax), %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddubd %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddubd (%rax), %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddubq %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddubq (%rax), %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddubw %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddubw (%rax), %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddudq %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddudq (%rax), %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphadduwd %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphadduwd (%rax), %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphadduwq %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphadduwq (%rax), %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddwd %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddwd (%rax), %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddwq %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddwq (%rax), %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphsubbw %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphsubbw (%rax), %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphsubdq %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphsubdq (%rax), %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 - - vphsubwd %xmm0, %xmm3
+# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphsubwd (%rax), %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpmacsdd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacsdd %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpmacsdqh %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacsdqh %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpmacsdql %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacsdql %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpmacssdd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacssdd %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpmacssdqh %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacssdqh %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpmacssdql %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacssdql %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpmacsswd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacsswd %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpmacssww %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacssww %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpmacswd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacswd %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpmacsww %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacsww %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpmadcsswd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmadcsswd %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpmadcswd %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmadcswd %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 - - vpperm %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpperm (%rax), %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpperm %xmm0, (%rax), %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vprotb %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotb (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotb %xmm0, (%rax), %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vprotb $0, %xmm0, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotb $0, (%rax), %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vprotd %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotd (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotd %xmm0, (%rax), %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vprotd $0, %xmm0, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotd $0, (%rax), %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vprotq %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotq (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotq %xmm0, (%rax), %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vprotq $0, %xmm0, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotq $0, (%rax), %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vprotw %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotw (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotw %xmm0, (%rax), %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vprotw $0, %xmm0, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotw $0, (%rax), %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpshab %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshab (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshab %xmm0, (%rax), %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpshad %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshad (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshad %xmm0, (%rax), %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpshaq %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshaq (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshaq %xmm0, (%rax), %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpshaw %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshaw (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshaw %xmm0, (%rax), %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpshlb %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshlb (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshlb %xmm0, (%rax), %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpshld %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshld (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshld %xmm0, (%rax), %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpshlq %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshlq (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshlq %xmm0, (%rax), %xmm3
+# CHECK-NEXT: - - 1.00 - - - - - vpshlw %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshlw (%rax), %xmm0, %xmm3
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshlw %xmm0, (%rax), %xmm3
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -scheduler-stats < %s | FileCheck %s
+
+vmulps (%rsi), %xmm0, %xmm0
+add %rsi, %rsi
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 14
+# CHECK-NEXT: Total uOps: 3
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.21
+# CHECK-NEXT: IPC: 0.14
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 11 1.00 * vmulps (%rsi), %xmm0, %xmm0
+# CHECK-NEXT: 1 1 0.33 addq %rsi, %rsi
+
+# CHECK: Schedulers - number of cycles where we saw N instructions issued:
+# CHECK-NEXT: [# issued], [# cycles]
+# CHECK-NEXT: 0, 13 (92.9%)
+# CHECK-NEXT: 2, 1 (7.1%)
+
+# CHECK: Scheduler's queue usage:
+# CHECK-NEXT: [1] Resource name.
+# CHECK-NEXT: [2] Average number of used buffer entries.
+# CHECK-NEXT: [3] Maximum number of used buffer entries.
+# CHECK-NEXT: [4] Total number of buffer entries.
+
+# CHECK: [1] [2] [3] [4]
+# CHECK-NEXT: SBPortAny 0 2 54
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 - - 1.00 - 1.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 1.00 - - - - 1.00 vmulps (%rsi), %xmm0, %xmm0
+# CHECK-NEXT: - - - - - 1.00 - - addq %rsi, %rsi
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 < %s | FileCheck %s
+
+add %edi, %eax
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 100
+# CHECK-NEXT: Total Cycles: 103
+# CHECK-NEXT: Total uOps: 100
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.97
+# CHECK-NEXT: IPC: 0.97
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.33 addl %edi, %eax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 0.33 0.33 - 0.34 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.33 0.33 - 0.34 - - addl %edi, %eax
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+leaq 8(%rsp, %rdi, 2), %rax
+vbroadcastss (%rax), %ymm0
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 60
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 3.33
+# CHECK-NEXT: IPC: 3.33
+# CHECK-NEXT: Block RThroughput: 0.5
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 leaq 8(%rsp,%rdi,2), %rax
+# CHECK-NEXT: 1 7 0.50 * vbroadcastss (%rax), %ymm0
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 8(%rsp,%rdi,2), %rax
+# CHECK-NEXT: - - - - - - 0.50 0.50 vbroadcastss (%rax), %ymm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . .. leaq 8(%rsp,%rdi,2), %rax
+# CHECK-NEXT: [0,1] D=eeeeeeeER. vbroadcastss (%rax), %ymm0
+# CHECK-NEXT: [1,0] DeE-------R. leaq 8(%rsp,%rdi,2), %rax
+# CHECK-NEXT: [1,1] D=eeeeeeeER. vbroadcastss (%rax), %ymm0
+# CHECK-NEXT: [2,0] .DeE------R. leaq 8(%rsp,%rdi,2), %rax
+# CHECK-NEXT: [2,1] .D=eeeeeeeER vbroadcastss (%rax), %ymm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 1.0 1.0 4.3 leaq 8(%rsp,%rdi,2), %rax
+# CHECK-NEXT: 1. 3 2.0 0.0 0.0 vbroadcastss (%rax), %ymm0
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s
+
+vaddps %xmm0, %xmm0, %xmm1
+vandps (%rdi), %xmm1, %xmm2
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Total uOps: 3
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.30
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 2 7 1.00 * vandps (%rdi), %xmm1, %xmm2
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [0,1] DeeeeeeeER vandps (%rdi), %xmm1, %xmm2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 vandps (%rdi), %xmm1, %xmm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s
+
+vaddps %ymm0, %ymm0, %ymm1
+vandps (%rdi), %ymm1, %ymm2
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 11
+# CHECK-NEXT: Total uOps: 3
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: IPC: 0.18
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: 2 8 1.00 * vandps (%rdi), %ymm1, %ymm2
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . vaddps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [0,1] DeeeeeeeeER vandps (%rdi), %ymm1, %ymm2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 vandps (%rdi), %ymm1, %ymm2
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=2 < %s | FileCheck %s
+
+ vmulps %ymm0, %ymm1, %ymm2
+ vfrczpd %xmm1, %xmm2
+ vmulps %ymm2, %ymm3, %ymm4
+ vaddps %ymm4, %ymm5, %ymm6
+ vmulps %ymm6, %ymm3, %ymm4
+ vaddps %ymm4, %ymm5, %ymm0
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 600
+# CHECK-NEXT: Total Cycles: 318
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.89
+# CHECK-NEXT: IPC: 1.89
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 1.00 vmulps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 3 1.00 vfrczpd %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vmulps %ymm2, %ymm3, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm4, %ymm5, %ymm6
+# CHECK-NEXT: 1 5 1.00 vmulps %ymm6, %ymm3, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm4, %ymm5, %ymm0
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 3.00 3.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 1.00 - - - - - vmulps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 - - - - vfrczpd %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vmulps %ymm2, %ymm3, %ymm4
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm4, %ymm5, %ymm6
+# CHECK-NEXT: - - 1.00 - - - - - vmulps %ymm6, %ymm3, %ymm4
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm4, %ymm5, %ymm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeER . . . .. vmulps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: [0,1] DeeeE--R . . . .. vfrczpd %xmm1, %xmm2
+# CHECK-NEXT: [0,2] D===eeeeeER . . .. vmulps %ymm2, %ymm3, %ymm4
+# CHECK-NEXT: [0,3] D========eeeER . . .. vaddps %ymm4, %ymm5, %ymm6
+# CHECK-NEXT: [0,4] .D==========eeeeeER . .. vmulps %ymm6, %ymm3, %ymm4
+# CHECK-NEXT: [0,5] .D===============eeeER .. vaddps %ymm4, %ymm5, %ymm0
+# CHECK-NEXT: [1,0] .D==================eeeeeER vmulps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: [1,1] .DeeeE--------------------R vfrczpd %xmm1, %xmm2
+# CHECK-NEXT: [1,2] . D==eeeeeE---------------R vmulps %ymm2, %ymm3, %ymm4
+# CHECK-NEXT: [1,3] . D=======eeeE------------R vaddps %ymm4, %ymm5, %ymm6
+# CHECK-NEXT: [1,4] . D==========eeeeeE-------R vmulps %ymm6, %ymm3, %ymm4
+# CHECK-NEXT: [1,5] . D===============eeeE----R vaddps %ymm4, %ymm5, %ymm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 10.0 0.5 0.0 vmulps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1. 2 1.0 1.0 11.0 vfrczpd %xmm1, %xmm2
+# CHECK-NEXT: 2. 2 3.5 0.0 7.5 vmulps %ymm2, %ymm3, %ymm4
+# CHECK-NEXT: 3. 2 8.5 0.0 6.0 vaddps %ymm4, %ymm5, %ymm6
+# CHECK-NEXT: 4. 2 11.0 0.0 3.5 vmulps %ymm6, %ymm3, %ymm4
+# CHECK-NEXT: 5. 2 16.0 0.0 2.0 vaddps %ymm4, %ymm5, %ymm0
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=2 < %s | FileCheck %s
+
+ vmulps %ymm0, %ymm1, %ymm2
+ vpermil2pd $16, %xmm3, %xmm5, %xmm1, %xmm2
+ vmulps %ymm2, %ymm3, %ymm4
+ vaddps %ymm4, %ymm5, %ymm6
+ vmulps %ymm6, %ymm3, %ymm4
+ vaddps %ymm4, %ymm5, %ymm0
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 600
+# CHECK-NEXT: Total Cycles: 316
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.90
+# CHECK-NEXT: IPC: 1.90
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 1.00 vmulps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vpermil2pd $16, %xmm3, %xmm5, %xmm1, %xmm2
+# CHECK-NEXT: 1 5 1.00 vmulps %ymm2, %ymm3, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm4, %ymm5, %ymm6
+# CHECK-NEXT: 1 5 1.00 vmulps %ymm6, %ymm3, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm4, %ymm5, %ymm0
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 3.00 2.00 - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 1.00 - - - - - vmulps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vpermil2pd $16, %xmm3, %xmm5, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vmulps %ymm2, %ymm3, %ymm4
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm4, %ymm5, %ymm6
+# CHECK-NEXT: - - 1.00 - - - - - vmulps %ymm6, %ymm3, %ymm4
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm4, %ymm5, %ymm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234
+
+# CHECK: [0,0] DeeeeeER . . . . vmulps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: [0,1] DeE----R . . . . vpermil2pd $16, %xmm3, %xmm5, %xmm1, %xmm2
+# CHECK-NEXT: [0,2] D=eeeeeER . . . . vmulps %ymm2, %ymm3, %ymm4
+# CHECK-NEXT: [0,3] D======eeeER . . . vaddps %ymm4, %ymm5, %ymm6
+# CHECK-NEXT: [0,4] .D========eeeeeER . . vmulps %ymm6, %ymm3, %ymm4
+# CHECK-NEXT: [0,5] .D=============eeeER. . vaddps %ymm4, %ymm5, %ymm0
+# CHECK-NEXT: [1,0] .D================eeeeeER vmulps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: [1,1] .DeE--------------------R vpermil2pd $16, %xmm3, %xmm5, %xmm1, %xmm2
+# CHECK-NEXT: [1,2] . DeeeeeE---------------R vmulps %ymm2, %ymm3, %ymm4
+# CHECK-NEXT: [1,3] . D=====eeeE------------R vaddps %ymm4, %ymm5, %ymm6
+# CHECK-NEXT: [1,4] . D========eeeeeE-------R vmulps %ymm6, %ymm3, %ymm4
+# CHECK-NEXT: [1,5] . D=============eeeE----R vaddps %ymm4, %ymm5, %ymm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 9.0 0.5 0.0 vmulps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1. 2 1.0 1.0 12.0 vpermil2pd $16, %xmm3, %xmm5, %xmm1, %xmm2
+# CHECK-NEXT: 2. 2 1.5 0.0 7.5 vmulps %ymm2, %ymm3, %ymm4
+# CHECK-NEXT: 3. 2 6.5 0.0 6.0 vaddps %ymm4, %ymm5, %ymm6
+# CHECK-NEXT: 4. 2 9.0 0.0 3.5 vmulps %ymm6, %ymm3, %ymm4
+# CHECK-NEXT: 5. 2 14.0 0.0 2.0 vaddps %ymm4, %ymm5, %ymm0
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+# TODO: Fix the processor resource usage for zero-idiom YMM XOR instructions.
+# Those vector XOR instructions should only consume 1cy of JFPU1 (instead
+# of 2cy).
+
+# LLVM-MCA-BEGIN ZERO-IDIOM-1
+
+vaddps %ymm0, %ymm0, %ymm1
+vxorps %ymm1, %ymm1, %ymm1
+vblendps $2, %ymm1, %ymm2, %ymm3
+
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN ZERO-IDIOM-2
+
+vaddpd %ymm0, %ymm0, %ymm1
+vxorpd %ymm1, %ymm1, %ymm1
+vblendpd $2, %ymm1, %ymm2, %ymm3
+
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN ZERO-IDIOM-3
+vaddps %ymm0, %ymm1, %ymm2
+vandnps %ymm2, %ymm2, %ymm3
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN ZERO-IDIOM-4
+vaddps %ymm0, %ymm1, %ymm2
+vandnps %ymm2, %ymm2, %ymm3
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN ZERO-IDIOM-5
+vperm2f128 $136, %ymm0, %ymm0, %ymm1
+vaddps %ymm1, %ymm1, %ymm0
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region - ZERO-IDIOM-1
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 300
+# CHECK-NEXT: Total Cycles: 107
+# CHECK-NEXT: Total uOps: 300
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 2.80
+# CHECK-NEXT: IPC: 2.80
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: 1 1 1.00 vxorps %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 1 1 0.50 vblendps $2, %ymm1, %ymm2, %ymm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: - - - - - 1.00 - - vxorps %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: - - 1.00 - - - - - vblendps $2, %ymm1, %ymm2, %ymm3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . vaddps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [0,1] D===eER . vxorps %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,2] D====eER . vblendps $2, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: [1,0] D=eeeE-R . vaddps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [1,1] .D===eER . vxorps %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [1,2] .D====eER. vblendps $2, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: [2,0] .D=eeeE-R. vaddps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [2,1] .D====eER. vxorps %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [2,2] . D====eER vblendps $2, %ymm1, %ymm2, %ymm3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 1.7 1.7 0.7 vaddps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: 1. 3 4.3 0.0 0.0 vxorps %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 2. 3 5.0 0.0 0.0 vblendps $2, %ymm1, %ymm2, %ymm3
+
+# CHECK: [1] Code Region - ZERO-IDIOM-2
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 300
+# CHECK-NEXT: Total Cycles: 107
+# CHECK-NEXT: Total uOps: 300
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 2.80
+# CHECK-NEXT: IPC: 2.80
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 vaddpd %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: 1 1 1.00 vxorpd %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 1 1 0.50 vblendpd $2, %ymm1, %ymm2, %ymm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - vaddpd %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: - - - - - 1.00 - - vxorpd %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: - - 1.00 - - - - - vblendpd $2, %ymm1, %ymm2, %ymm3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . vaddpd %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [0,1] D===eER . vxorpd %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,2] D====eER . vblendpd $2, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: [1,0] D=eeeE-R . vaddpd %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [1,1] .D===eER . vxorpd %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [1,2] .D====eER. vblendpd $2, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: [2,0] .D=eeeE-R. vaddpd %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [2,1] .D====eER. vxorpd %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [2,2] . D====eER vblendpd $2, %ymm1, %ymm2, %ymm3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 1.7 1.7 0.7 vaddpd %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: 1. 3 4.3 0.0 0.0 vxorpd %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 2. 3 5.0 0.0 0.0 vblendpd $2, %ymm1, %ymm2, %ymm3
+
+# CHECK: [2] Code Region - ZERO-IDIOM-3
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 106
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.89
+# CHECK-NEXT: IPC: 1.89
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vandnps %ymm2, %ymm2, %ymm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 1.00 - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vandnps %ymm2, %ymm2, %ymm3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeER . vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: [0,1] D===eER . vandnps %ymm2, %ymm2, %ymm3
+# CHECK-NEXT: [1,0] D=eeeER . vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: [1,1] D====eER. vandnps %ymm2, %ymm2, %ymm3
+# CHECK-NEXT: [2,0] .D=eeeER. vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: [2,1] .D====eER vandnps %ymm2, %ymm2, %ymm3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 1.7 1.7 0.0 vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1. 3 4.7 0.0 0.0 vandnps %ymm2, %ymm2, %ymm3
+
+# CHECK: [3] Code Region - ZERO-IDIOM-4
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 106
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.89
+# CHECK-NEXT: IPC: 1.89
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 1 1.00 vandnps %ymm2, %ymm2, %ymm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 1.00 - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - 1.00 - - vandnps %ymm2, %ymm2, %ymm3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeER . vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: [0,1] D===eER . vandnps %ymm2, %ymm2, %ymm3
+# CHECK-NEXT: [1,0] D=eeeER . vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: [1,1] D====eER. vandnps %ymm2, %ymm2, %ymm3
+# CHECK-NEXT: [2,0] .D=eeeER. vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: [2,1] .D====eER vandnps %ymm2, %ymm2, %ymm3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 1.7 1.7 0.0 vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1. 3 4.7 0.0 0.0 vandnps %ymm2, %ymm2, %ymm3
+
+# CHECK: [4] Code Region - ZERO-IDIOM-5
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 1.00 vperm2f128 $136, %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm1, %ymm1, %ymm0
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 1.00 - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - 1.00 - - vperm2f128 $136, %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm1, %ymm1, %ymm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . vperm2f128 $136, %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [0,1] D=eeeER . . vaddps %ymm1, %ymm1, %ymm0
+# CHECK-NEXT: [1,0] D====eER . . vperm2f128 $136, %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [1,1] D=====eeeER . vaddps %ymm1, %ymm1, %ymm0
+# CHECK-NEXT: [2,0] .D=======eER . vperm2f128 $136, %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [2,1] .D========eeeER vaddps %ymm1, %ymm1, %ymm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 4.7 0.3 0.0 vperm2f128 $136, %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: 1. 3 5.7 0.0 0.0 vaddps %ymm1, %ymm1, %ymm0
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -register-file-stats -iterations=1 < %s | FileCheck %s
+
+subl %eax, %eax
+subq %rax, %rax
+xorl %eax, %eax
+xorq %rax, %rax
+
+pcmpgtb %mm2, %mm2
+pcmpgtd %mm2, %mm2
+# pcmpgtq %mm2, %mm2 # invalid operand for instruction
+pcmpgtw %mm2, %mm2
+
+pcmpgtb %xmm2, %xmm2
+pcmpgtd %xmm2, %xmm2
+pcmpgtq %xmm2, %xmm2
+pcmpgtw %xmm2, %xmm2
+
+vpcmpgtb %xmm3, %xmm3, %xmm3
+vpcmpgtd %xmm3, %xmm3, %xmm3
+vpcmpgtq %xmm3, %xmm3, %xmm3
+vpcmpgtw %xmm3, %xmm3, %xmm3
+
+vpcmpgtb %xmm3, %xmm3, %xmm5
+vpcmpgtd %xmm3, %xmm3, %xmm5
+vpcmpgtq %xmm3, %xmm3, %xmm5
+vpcmpgtw %xmm3, %xmm3, %xmm5
+
+psubb %mm2, %mm2
+psubd %mm2, %mm2
+psubq %mm2, %mm2
+psubw %mm2, %mm2
+psubb %xmm2, %xmm2
+psubd %xmm2, %xmm2
+psubq %xmm2, %xmm2
+psubw %xmm2, %xmm2
+vpsubb %xmm3, %xmm3, %xmm3
+vpsubd %xmm3, %xmm3, %xmm3
+vpsubq %xmm3, %xmm3, %xmm3
+vpsubw %xmm3, %xmm3, %xmm3
+
+vpsubb %xmm3, %xmm3, %xmm5
+vpsubd %xmm3, %xmm3, %xmm5
+vpsubq %xmm3, %xmm3, %xmm5
+vpsubw %xmm3, %xmm3, %xmm5
+
+psubsb %mm2, %mm2
+psubsw %mm2, %mm2
+psubsb %xmm2, %xmm2
+psubsw %xmm2, %xmm2
+vpsubsb %xmm3, %xmm3, %xmm3
+vpsubsw %xmm3, %xmm3, %xmm3
+
+vpsubsb %xmm3, %xmm3, %xmm5
+vpsubsw %xmm3, %xmm3, %xmm5
+
+psubusb %mm2, %mm2
+psubusw %mm2, %mm2
+psubusb %xmm2, %xmm2
+psubusw %xmm2, %xmm2
+vpsubusb %xmm3, %xmm3, %xmm3
+vpsubusw %xmm3, %xmm3, %xmm3
+
+vpsubsb %xmm3, %xmm3, %xmm5
+vpsubsw %xmm3, %xmm3, %xmm5
+
+andnps %xmm0, %xmm0
+andnpd %xmm1, %xmm1
+vandnps %xmm2, %xmm2, %xmm2
+vandnpd %xmm1, %xmm1, %xmm1
+pandn %mm2, %mm2
+pandn %xmm2, %xmm2
+vpandn %xmm3, %xmm3, %xmm3
+
+vandnps %xmm2, %xmm2, %xmm5
+vandnpd %xmm1, %xmm1, %xmm5
+vpandn %xmm3, %xmm3, %xmm5
+
+xorps %xmm0, %xmm0
+xorpd %xmm1, %xmm1
+vxorps %xmm2, %xmm2, %xmm2
+vxorpd %xmm1, %xmm1, %xmm1
+pxor %mm2, %mm2
+pxor %xmm2, %xmm2
+vpxor %xmm3, %xmm3, %xmm3
+
+vxorps %xmm4, %xmm4, %xmm5
+vxorpd %xmm1, %xmm1, %xmm3
+vpxor %xmm3, %xmm3, %xmm5
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 71
+# CHECK-NEXT: Total Cycles: 39
+# CHECK-NEXT: Total uOps: 71
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.82
+# CHECK-NEXT: IPC: 1.82
+# CHECK-NEXT: Block RThroughput: 17.8
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 0 0.25 subl %eax, %eax
+# CHECK-NEXT: 1 0 0.25 subq %rax, %rax
+# CHECK-NEXT: 1 0 0.25 xorl %eax, %eax
+# CHECK-NEXT: 1 0 0.25 xorq %rax, %rax
+# CHECK-NEXT: 1 3 1.00 pcmpgtb %mm2, %mm2
+# CHECK-NEXT: 1 3 1.00 pcmpgtd %mm2, %mm2
+# CHECK-NEXT: 1 3 1.00 pcmpgtw %mm2, %mm2
+# CHECK-NEXT: 1 0 0.25 pcmpgtb %xmm2, %xmm2
+# CHECK-NEXT: 1 0 0.25 pcmpgtd %xmm2, %xmm2
+# CHECK-NEXT: 1 0 0.25 pcmpgtq %xmm2, %xmm2
+# CHECK-NEXT: 1 0 0.25 pcmpgtw %xmm2, %xmm2
+# CHECK-NEXT: 1 0 0.25 vpcmpgtb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 0 0.25 vpcmpgtd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 0 0.25 vpcmpgtq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 0 0.25 vpcmpgtw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 0 0.25 vpcmpgtb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 0 0.25 vpcmpgtd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 0 0.25 vpcmpgtq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 0 0.25 vpcmpgtw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 3 1.00 psubb %mm2, %mm2
+# CHECK-NEXT: 1 3 1.00 psubd %mm2, %mm2
+# CHECK-NEXT: 1 3 1.00 psubq %mm2, %mm2
+# CHECK-NEXT: 1 3 1.00 psubw %mm2, %mm2
+# CHECK-NEXT: 1 0 0.25 psubb %xmm2, %xmm2
+# CHECK-NEXT: 1 0 0.25 psubd %xmm2, %xmm2
+# CHECK-NEXT: 1 0 0.25 psubq %xmm2, %xmm2
+# CHECK-NEXT: 1 0 0.25 psubw %xmm2, %xmm2
+# CHECK-NEXT: 1 0 0.25 vpsubb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 0 0.25 vpsubd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 0 0.25 vpsubq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 0 0.25 vpsubw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 0 0.25 vpsubb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 0 0.25 vpsubd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 0 0.25 vpsubq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 0 0.25 vpsubw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 3 1.00 psubsb %mm2, %mm2
+# CHECK-NEXT: 1 3 1.00 psubsw %mm2, %mm2
+# CHECK-NEXT: 1 1 0.50 psubsb %xmm2, %xmm2
+# CHECK-NEXT: 1 1 0.50 psubsw %xmm2, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpsubsb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpsubsw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpsubsb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 1 0.50 vpsubsw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 3 1.00 psubusb %mm2, %mm2
+# CHECK-NEXT: 1 3 1.00 psubusw %mm2, %mm2
+# CHECK-NEXT: 1 1 0.50 psubusb %xmm2, %xmm2
+# CHECK-NEXT: 1 1 0.50 psubusw %xmm2, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpsubusb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpsubusw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpsubsb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 1 0.50 vpsubsw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 1 1.00 andnps %xmm0, %xmm0
+# CHECK-NEXT: 1 1 1.00 andnpd %xmm1, %xmm1
+# CHECK-NEXT: 1 1 1.00 vandnps %xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 1 1 1.00 vandnpd %xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 1 1 0.33 pandn %mm2, %mm2
+# CHECK-NEXT: 1 1 0.33 pandn %xmm2, %xmm2
+# CHECK-NEXT: 1 1 0.33 vpandn %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 1 1.00 vandnps %xmm2, %xmm2, %xmm5
+# CHECK-NEXT: 1 1 1.00 vandnpd %xmm1, %xmm1, %xmm5
+# CHECK-NEXT: 1 1 0.33 vpandn %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 0 0.25 xorps %xmm0, %xmm0
+# CHECK-NEXT: 1 0 0.25 xorpd %xmm1, %xmm1
+# CHECK-NEXT: 1 0 0.25 vxorps %xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 1 0 0.25 vxorpd %xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 1 1 0.33 pxor %mm2, %mm2
+# CHECK-NEXT: 1 0 0.25 pxor %xmm2, %xmm2
+# CHECK-NEXT: 1 0 0.25 vpxor %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 0 0.25 vxorps %xmm4, %xmm4, %xmm5
+# CHECK-NEXT: 1 0 0.25 vxorpd %xmm1, %xmm1, %xmm3
+# CHECK-NEXT: 1 0 0.25 vpxor %xmm3, %xmm3, %xmm5
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 75
+# CHECK-NEXT: Max number of mappings used: 51
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 5.00 16.00 - 13.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - - - - subl %eax, %eax
+# CHECK-NEXT: - - - - - - - - subq %rax, %rax
+# CHECK-NEXT: - - - - - - - - xorl %eax, %eax
+# CHECK-NEXT: - - - - - - - - xorq %rax, %rax
+# CHECK-NEXT: - - - 1.00 - - - - pcmpgtb %mm2, %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pcmpgtd %mm2, %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pcmpgtw %mm2, %mm2
+# CHECK-NEXT: - - - - - - - - pcmpgtb %xmm2, %xmm2
+# CHECK-NEXT: - - - - - - - - pcmpgtd %xmm2, %xmm2
+# CHECK-NEXT: - - - - - - - - pcmpgtq %xmm2, %xmm2
+# CHECK-NEXT: - - - - - - - - pcmpgtw %xmm2, %xmm2
+# CHECK-NEXT: - - - - - - - - vpcmpgtb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - - - - vpcmpgtd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - - - - vpcmpgtq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - - - - vpcmpgtw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - - - - vpcmpgtb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - - - - - - vpcmpgtd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - - - - - - vpcmpgtq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - - - - - - vpcmpgtw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - 1.00 - - - - psubb %mm2, %mm2
+# CHECK-NEXT: - - - 1.00 - - - - psubd %mm2, %mm2
+# CHECK-NEXT: - - - 1.00 - - - - psubq %mm2, %mm2
+# CHECK-NEXT: - - - 1.00 - - - - psubw %mm2, %mm2
+# CHECK-NEXT: - - - - - - - - psubb %xmm2, %xmm2
+# CHECK-NEXT: - - - - - - - - psubd %xmm2, %xmm2
+# CHECK-NEXT: - - - - - - - - psubq %xmm2, %xmm2
+# CHECK-NEXT: - - - - - - - - psubw %xmm2, %xmm2
+# CHECK-NEXT: - - - - - - - - vpsubb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - - - - vpsubd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - - - - vpsubq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - - - - vpsubw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - - - - vpsubb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - - - - - - vpsubd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - - - - - - vpsubq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - - - - - - vpsubw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - 1.00 - - - - psubsb %mm2, %mm2
+# CHECK-NEXT: - - - 1.00 - - - - psubsw %mm2, %mm2
+# CHECK-NEXT: - - - - - 1.00 - - psubsb %xmm2, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - psubsw %xmm2, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vpsubsb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - 1.00 - - vpsubsw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - 1.00 - - - - vpsubsb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - - - 1.00 - - vpsubsw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - 1.00 - - - - psubusb %mm2, %mm2
+# CHECK-NEXT: - - - 1.00 - - - - psubusw %mm2, %mm2
+# CHECK-NEXT: - - - 1.00 - - - - psubusb %xmm2, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - psubusw %xmm2, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vpsubusb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - 1.00 - - - - vpsubusw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - 1.00 - - vpsubsb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - 1.00 - - - - vpsubsw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - - - 1.00 - - andnps %xmm0, %xmm0
+# CHECK-NEXT: - - - - - 1.00 - - andnpd %xmm1, %xmm1
+# CHECK-NEXT: - - - - - 1.00 - - vandnps %xmm2, %xmm2, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vandnpd %xmm1, %xmm1, %xmm1
+# CHECK-NEXT: - - 1.00 - - - - - pandn %mm2, %mm2
+# CHECK-NEXT: - - 1.00 - - - - - pandn %xmm2, %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - vpandn %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - 1.00 - - vandnps %xmm2, %xmm2, %xmm5
+# CHECK-NEXT: - - - - - 1.00 - - vandnpd %xmm1, %xmm1, %xmm5
+# CHECK-NEXT: - - 1.00 - - - - - vpandn %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - - - - - - xorps %xmm0, %xmm0
+# CHECK-NEXT: - - - - - - - - xorpd %xmm1, %xmm1
+# CHECK-NEXT: - - - - - - - - vxorps %xmm2, %xmm2, %xmm2
+# CHECK-NEXT: - - - - - - - - vxorpd %xmm1, %xmm1, %xmm1
+# CHECK-NEXT: - - 1.00 - - - - - pxor %mm2, %mm2
+# CHECK-NEXT: - - - - - - - - pxor %xmm2, %xmm2
+# CHECK-NEXT: - - - - - - - - vpxor %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - - - - vxorps %xmm4, %xmm4, %xmm5
+# CHECK-NEXT: - - - - - - - - vxorpd %xmm1, %xmm1, %xmm3
+# CHECK-NEXT: - - - - - - - - vpxor %xmm3, %xmm3, %xmm5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DR . . . . . . . . subl %eax, %eax
+# CHECK-NEXT: [0,1] DR . . . . . . . . subq %rax, %rax
+# CHECK-NEXT: [0,2] DR . . . . . . . . xorl %eax, %eax
+# CHECK-NEXT: [0,3] DR . . . . . . . . xorq %rax, %rax
+# CHECK-NEXT: [0,4] .DeeeER . . . . . . . pcmpgtb %mm2, %mm2
+# CHECK-NEXT: [0,5] .D===eeeER. . . . . . . pcmpgtd %mm2, %mm2
+# CHECK-NEXT: [0,6] .D======eeeER . . . . . . pcmpgtw %mm2, %mm2
+# CHECK-NEXT: [0,7] .D----------R . . . . . . pcmpgtb %xmm2, %xmm2
+# CHECK-NEXT: [0,8] . D---------R . . . . . . pcmpgtd %xmm2, %xmm2
+# CHECK-NEXT: [0,9] . D---------R . . . . . . pcmpgtq %xmm2, %xmm2
+# CHECK-NEXT: [0,10] . D---------R . . . . . . pcmpgtw %xmm2, %xmm2
+# CHECK-NEXT: [0,11] . D---------R . . . . . . vpcmpgtb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,12] . D--------R . . . . . . vpcmpgtd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,13] . D--------R . . . . . . vpcmpgtq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,14] . D--------R . . . . . . vpcmpgtw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,15] . D--------R . . . . . . vpcmpgtb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,16] . D-------R . . . . . . vpcmpgtd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,17] . D-------R . . . . . . vpcmpgtq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,18] . D-------R . . . . . . vpcmpgtw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,19] . D======eeeER . . . . . psubb %mm2, %mm2
+# CHECK-NEXT: [0,20] . D========eeeER . . . . . psubd %mm2, %mm2
+# CHECK-NEXT: [0,21] . D===========eeeER . . . . psubq %mm2, %mm2
+# CHECK-NEXT: [0,22] . D==============eeeER. . . . psubw %mm2, %mm2
+# CHECK-NEXT: [0,23] . D------------------R. . . . psubb %xmm2, %xmm2
+# CHECK-NEXT: [0,24] . .D-----------------R. . . . psubd %xmm2, %xmm2
+# CHECK-NEXT: [0,25] . .D-----------------R. . . . psubq %xmm2, %xmm2
+# CHECK-NEXT: [0,26] . .D-----------------R. . . . psubw %xmm2, %xmm2
+# CHECK-NEXT: [0,27] . .D-----------------R. . . . vpsubb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,28] . . D----------------R. . . . vpsubd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,29] . . D----------------R. . . . vpsubq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,30] . . D----------------R. . . . vpsubw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,31] . . D----------------R. . . . vpsubb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,32] . . D---------------R. . . . vpsubd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,33] . . D---------------R. . . . vpsubq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,34] . . D---------------R. . . . vpsubw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,35] . . D==============eeeER . . . psubsb %mm2, %mm2
+# CHECK-NEXT: [0,36] . . D================eeeER . . psubsw %mm2, %mm2
+# CHECK-NEXT: [0,37] . . DeE------------------R . . psubsb %xmm2, %xmm2
+# CHECK-NEXT: [0,38] . . D==eE----------------R . . psubsw %xmm2, %xmm2
+# CHECK-NEXT: [0,39] . . DeE------------------R . . vpsubsb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,40] . . DeE-----------------R . . vpsubsw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,41] . . D=eE----------------R . . vpsubsb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,42] . . D==eE---------------R . . vpsubsw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,43] . . D==================eeeER . . psubusb %mm2, %mm2
+# CHECK-NEXT: [0,44] . . .D====================eeeER . psubusw %mm2, %mm2
+# CHECK-NEXT: [0,45] . . .D=eE---------------------R . psubusb %xmm2, %xmm2
+# CHECK-NEXT: [0,46] . . .D==eE--------------------R . psubusw %xmm2, %xmm2
+# CHECK-NEXT: [0,47] . . .D===eE-------------------R . vpsubusb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,48] . . . D===eE------------------R . vpsubusw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,49] . . . D====eE-----------------R . vpsubsb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,50] . . . D=====eE----------------R . vpsubsw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,51] . . . D===eE------------------R . andnps %xmm0, %xmm0
+# CHECK-NEXT: [0,52] . . . D====eE----------------R . andnpd %xmm1, %xmm1
+# CHECK-NEXT: [0,53] . . . D======eE--------------R . vandnps %xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,54] . . . D=====eE---------------R . vandnpd %xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,55] . . . D=====================eER. pandn %mm2, %mm2
+# CHECK-NEXT: [0,56] . . . D======eE--------------R. pandn %xmm2, %xmm2
+# CHECK-NEXT: [0,57] . . . D==eE------------------R. vpandn %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,58] . . . D=======eE-------------R. vandnps %xmm2, %xmm2, %xmm5
+# CHECK-NEXT: [0,59] . . . D======eE--------------R. vandnpd %xmm1, %xmm1, %xmm5
+# CHECK-NEXT: [0,60] . . . D==eE-----------------R. vpandn %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,61] . . . D=E-------------------R. xorps %xmm0, %xmm0
+# CHECK-NEXT: [0,62] . . . D====E----------------R. xorpd %xmm1, %xmm1
+# CHECK-NEXT: [0,63] . . . D======E--------------R. vxorps %xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,64] . . . .D===E----------------R. vxorpd %xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,65] . . . .D===================eER pxor %mm2, %mm2
+# CHECK-NEXT: [0,66] . . . .D=====E---------------R pxor %xmm2, %xmm2
+# CHECK-NEXT: [0,67] . . . .D=E-------------------R vpxor %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,68] . . . . D--------------------R vxorps %xmm4, %xmm4, %xmm5
+# CHECK-NEXT: [0,69] . . . . D==E-----------------R vxorpd %xmm1, %xmm1, %xmm3
+# CHECK-NEXT: [0,70] . . . . D==E-----------------R vpxor %xmm3, %xmm3, %xmm5
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 subl %eax, %eax
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 subq %rax, %rax
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 xorl %eax, %eax
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 xorq %rax, %rax
+# CHECK-NEXT: 4. 1 1.0 1.0 0.0 pcmpgtb %mm2, %mm2
+# CHECK-NEXT: 5. 1 4.0 0.0 0.0 pcmpgtd %mm2, %mm2
+# CHECK-NEXT: 6. 1 7.0 0.0 0.0 pcmpgtw %mm2, %mm2
+# CHECK-NEXT: 7. 1 0.0 0.0 10.0 pcmpgtb %xmm2, %xmm2
+# CHECK-NEXT: 8. 1 0.0 0.0 9.0 pcmpgtd %xmm2, %xmm2
+# CHECK-NEXT: 9. 1 0.0 0.0 9.0 pcmpgtq %xmm2, %xmm2
+# CHECK-NEXT: 10. 1 0.0 0.0 9.0 pcmpgtw %xmm2, %xmm2
+# CHECK-NEXT: 11. 1 0.0 0.0 9.0 vpcmpgtb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 12. 1 0.0 0.0 8.0 vpcmpgtd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 13. 1 0.0 0.0 8.0 vpcmpgtq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 14. 1 0.0 0.0 8.0 vpcmpgtw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 15. 1 0.0 0.0 8.0 vpcmpgtb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 16. 1 0.0 0.0 7.0 vpcmpgtd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 17. 1 0.0 0.0 7.0 vpcmpgtq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 18. 1 0.0 0.0 7.0 vpcmpgtw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 19. 1 7.0 0.0 0.0 psubb %mm2, %mm2
+# CHECK-NEXT: 20. 1 9.0 0.0 0.0 psubd %mm2, %mm2
+# CHECK-NEXT: 21. 1 12.0 0.0 0.0 psubq %mm2, %mm2
+# CHECK-NEXT: 22. 1 15.0 0.0 0.0 psubw %mm2, %mm2
+# CHECK-NEXT: 23. 1 0.0 0.0 18.0 psubb %xmm2, %xmm2
+# CHECK-NEXT: 24. 1 0.0 0.0 17.0 psubd %xmm2, %xmm2
+# CHECK-NEXT: 25. 1 0.0 0.0 17.0 psubq %xmm2, %xmm2
+# CHECK-NEXT: 26. 1 0.0 0.0 17.0 psubw %xmm2, %xmm2
+# CHECK-NEXT: 27. 1 0.0 0.0 17.0 vpsubb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 28. 1 0.0 0.0 16.0 vpsubd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 29. 1 0.0 0.0 16.0 vpsubq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 30. 1 0.0 0.0 16.0 vpsubw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 31. 1 0.0 0.0 16.0 vpsubb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 32. 1 0.0 0.0 15.0 vpsubd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 33. 1 0.0 0.0 15.0 vpsubq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 34. 1 0.0 0.0 15.0 vpsubw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 35. 1 15.0 0.0 0.0 psubsb %mm2, %mm2
+# CHECK-NEXT: 36. 1 17.0 0.0 0.0 psubsw %mm2, %mm2
+# CHECK-NEXT: 37. 1 1.0 1.0 18.0 psubsb %xmm2, %xmm2
+# CHECK-NEXT: 38. 1 3.0 1.0 16.0 psubsw %xmm2, %xmm2
+# CHECK-NEXT: 39. 1 1.0 1.0 18.0 vpsubsb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 40. 1 1.0 0.0 17.0 vpsubsw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 41. 1 2.0 0.0 16.0 vpsubsb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 42. 1 3.0 1.0 15.0 vpsubsw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 43. 1 19.0 0.0 0.0 psubusb %mm2, %mm2
+# CHECK-NEXT: 44. 1 21.0 0.0 0.0 psubusw %mm2, %mm2
+# CHECK-NEXT: 45. 1 2.0 0.0 21.0 psubusb %xmm2, %xmm2
+# CHECK-NEXT: 46. 1 3.0 0.0 20.0 psubusw %xmm2, %xmm2
+# CHECK-NEXT: 47. 1 4.0 3.0 19.0 vpsubusb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 48. 1 4.0 0.0 18.0 vpsubusw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 49. 1 5.0 0.0 17.0 vpsubsb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 50. 1 6.0 1.0 16.0 vpsubsw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 51. 1 4.0 4.0 18.0 andnps %xmm0, %xmm0
+# CHECK-NEXT: 52. 1 5.0 5.0 16.0 andnpd %xmm1, %xmm1
+# CHECK-NEXT: 53. 1 7.0 5.0 14.0 vandnps %xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 54. 1 6.0 0.0 15.0 vandnpd %xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 55. 1 22.0 0.0 0.0 pandn %mm2, %mm2
+# CHECK-NEXT: 56. 1 7.0 0.0 14.0 pandn %xmm2, %xmm2
+# CHECK-NEXT: 57. 1 3.0 0.0 18.0 vpandn %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 58. 1 8.0 0.0 13.0 vandnps %xmm2, %xmm2, %xmm5
+# CHECK-NEXT: 59. 1 7.0 1.0 14.0 vandnpd %xmm1, %xmm1, %xmm5
+# CHECK-NEXT: 60. 1 3.0 0.0 17.0 vpandn %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 61. 1 2.0 0.0 19.0 xorps %xmm0, %xmm0
+# CHECK-NEXT: 62. 1 5.0 0.0 16.0 xorpd %xmm1, %xmm1
+# CHECK-NEXT: 63. 1 7.0 0.0 14.0 vxorps %xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 64. 1 4.0 0.0 16.0 vxorpd %xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 65. 1 20.0 0.0 0.0 pxor %mm2, %mm2
+# CHECK-NEXT: 66. 1 6.0 0.0 15.0 pxor %xmm2, %xmm2
+# CHECK-NEXT: 67. 1 2.0 0.0 19.0 vpxor %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 68. 1 0.0 0.0 20.0 vxorps %xmm4, %xmm4, %xmm5
+# CHECK-NEXT: 69. 1 3.0 0.0 17.0 vxorpd %xmm1, %xmm1, %xmm3
+# CHECK-NEXT: 70. 1 3.0 0.0 17.0 vpxor %xmm3, %xmm3, %xmm5
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=HASWELL
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDWELL
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDVER2
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BTVER2
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
# ALL: Iterations: 1
# ALL-NEXT: Instructions: 2
+# BDVER2-NEXT: Total Cycles: 10
+# BDVER2-NEXT: Total uOps: 4
+
# BDWELL-NEXT: Total Cycles: 10
# BDWELL-NEXT: Total uOps: 4
# ZNVER1-NEXT: Total Cycles: 8
# ZNVER1-NEXT: Total uOps: 3
+# BDVER2: Dispatch Width: 4
+# BDVER2-NEXT: uOps Per Cycle: 0.40
+# BDVER2-NEXT: IPC: 0.20
+# BDVER2-NEXT: Block RThroughput: 1.0
+
# BDWELL: Dispatch Width: 4
# BDWELL-NEXT: uOps Per Cycle: 0.40
# BDWELL-NEXT: IPC: 0.20
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+# BDVER2-NEXT: 1 1 0.33 addl %edi, %esi
+# BDVER2-NEXT: 3 7 1.00 * bextrl %esi, (%rdi), %eax
+
# BDWELL-NEXT: 1 1 0.25 addl %edi, %esi
# BDWELL-NEXT: 3 7 0.50 * bextrl %esi, (%rdi), %eax
# ALL: Timeline view:
+# BDVER2-NEXT: Index 0123456789
# BDWELL-NEXT: Index 0123456789
# BTVER2-NEXT: Index 0123456
# HASWELL-NEXT: Index 0123456789
# SKYLAKE-NEXT: Index 0123456789
# ZNVER1-NEXT: Index 01234567
+# BDVER2: [0,0] DeER . . addl %edi, %esi
+# BDVER2-NEXT: [0,1] DeeeeeeeER bextrl %esi, (%rdi), %eax
+
# BDWELL: [0,0] DeER . . addl %edi, %esi
# BDWELL-NEXT: [0,1] DeeeeeeeER bextrl %esi, (%rdi), %eax
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=BDVER2 %s
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=BTVER2 %s
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=ZNVER1 %s
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=SANDYBRIDGE %s
# ALL-NEXT: Total Cycles: 103
# ALL-NEXT: Total uOps: 100
+# BDVER2: Dispatch Width: 4
+# BDVER2-NEXT: uOps Per Cycle: 0.97
+# BDVER2-NEXT: IPC: 0.97
+# BDVER2-NEXT: Block RThroughput: 0.3
+
# BROADWELL: Dispatch Width: 4
# BROADWELL-NEXT: uOps Per Cycle: 0.97
# BROADWELL-NEXT: IPC: 0.97
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=HASWELL
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDWELL
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDVER2
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BTVER2
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
# ALL: Iterations: 1
# ALL-NEXT: Instructions: 2
+# BDVER2-NEXT: Total Cycles: 20
+# BDVER2-NEXT: Total uOps: 3
+
# BDWELL-NEXT: Total Cycles: 17
# BDWELL-NEXT: Total uOps: 3
# ZNVER1-NEXT: Total Cycles: 20
# ZNVER1-NEXT: Total uOps: 2
+# BDVER2: Dispatch Width: 4
+# BDVER2-NEXT: uOps Per Cycle: 0.15
+# BDVER2-NEXT: IPC: 0.10
+# BDVER2-NEXT: Block RThroughput: 14.0
+
# BDWELL: Dispatch Width: 4
# BDWELL-NEXT: uOps Per Cycle: 0.18
# BDWELL-NEXT: IPC: 0.12
# ALL: Timeline view:
+# BDVER2-NEXT: 0123456789
+# BDVER2-NEXT: Index 0123456789
+
# BDWELL-NEXT: 0123456
# BDWELL-NEXT: Index 0123456789
# ZNVER1-NEXT: 0123456789
# ZNVER1-NEXT: Index 0123456789
+# BDVER2: [0,0] DeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1
+# BDVER2-NEXT: [0,1] D========eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1
+
# BDWELL: [0,0] DeeeeeeeeeeeER .. vdivps %xmm0, %xmm1, %xmm1
# BDWELL-NEXT: [0,1] D======eeeeeeeeER vaddps (%rax), %xmm1, %xmm1
# ALL: [0] [1] [2] [3]
# ALL-NEXT: 0. 1 1.0 1.0 0.0 vdivps %xmm0, %xmm1, %xmm1
+# BDVER2-NEXT: 1. 1 9.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
# BDWELL-NEXT: 1. 1 7.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
# BTVER2-NEXT: 1. 1 15.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
# HASWELL-NEXT: 1. 1 8.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL,BTVER2 %s
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL,ZNVER1 %s
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,BDVER2 %s
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,BTVER2 %s
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ZNVER1 %s
# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,SNB %s
# ALL-NEXT: 0, 3 (75.0%)
# ALL-NEXT: 1, 1 (25.0%)
+# BDVER2: Scheduler's queue usage:
+# BDVER2-NEXT: [1] Resource name.
+# BDVER2-NEXT: [2] Average number of used buffer entries.
+# BDVER2-NEXT: [3] Maximum number of used buffer entries.
+# BDVER2-NEXT: [4] Total number of buffer entries.
+
# BDW: Scheduler's queue usage:
# BDW-NEXT: [1] Resource name.
# BDW-NEXT: [2] Average number of used buffer entries.
# ZNVER1-NEXT: [3] Maximum number of used buffer entries.
# ZNVER1-NEXT: [4] Total number of buffer entries.
+# BDVER2: [1] [2] [3] [4]
+# BDVER2-NEXT: SBPortAny 0 1 54
+
# BDW: [1] [2] [3] [4]
# BDW-NEXT: BWPortAny 0 1 60
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# ZZZ: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDVER2
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BTVER2
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=HASWELL
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDVER2
+
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BTVER2
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
# ALL: Iterations: 1
# ALL-NEXT: Instructions: 2
+# BDVER2-NEXT: Total Cycles: 11
+# BDVER2-NEXT: Total uOps: 4
+
# BDWELL-NEXT: Total Cycles: 10
# BDWELL-NEXT: Total uOps: 4
# ZNVER1-NEXT: Total Cycles: 11
# ZNVER1-NEXT: Total uOps: 2
+# BDVER2: Dispatch Width: 4
+# BDVER2-NEXT: uOps Per Cycle: 0.36
+# BDVER2-NEXT: IPC: 0.18
+# BDVER2-NEXT: Block RThroughput: 1.0
+
# BDWELL: Dispatch Width: 4
# BDWELL-NEXT: uOps Per Cycle: 0.40
# BDWELL-NEXT: IPC: 0.20
# ZNVER1-NEXT: IPC: 0.18
# ZNVER1-NEXT: Block RThroughput: 1.0
+# BDVER2: Timeline view:
+# BDVER2-NEXT: 0
+# BDVER2-NEXT: Index 0123456789
+
# BDWELL: Timeline view:
# BDWELL-NEXT: Index 0123456789
# ZNVER1-NEXT: 0
# ZNVER1-NEXT: Index 0123456789
+# BDVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
+# BDVER2-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+
# BDWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
# BDWELL-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
# ALL: [0] [1] [2] [3]
# ALL-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1
+# BDVER2-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3
# BDWELL-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3
# BTVER2-NEXT: 1. 1 1.0 1.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3
# HASWELL-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDVER2
+
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BTVER2
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
# ALL: Iterations: 1
# ALL-NEXT: Instructions: 2
+# BDVER2-NEXT: Total Cycles: 11
+# BDVER2-NEXT: Total uOps: 4
+
# BDWELL-NEXT: Total Cycles: 10
# BDWELL-NEXT: Total uOps: 4
# ZNVER1-NEXT: Total Cycles: 11
# ZNVER1-NEXT: Total uOps: 2
+# BDVER2: Dispatch Width: 4
+# BDVER2-NEXT: uOps Per Cycle: 0.36
+# BDVER2-NEXT: IPC: 0.18
+# BDVER2-NEXT: Block RThroughput: 1.0
+
# BDWELL: Dispatch Width: 4
# BDWELL-NEXT: uOps Per Cycle: 0.40
# BDWELL-NEXT: IPC: 0.20
# ZNVER1-NEXT: IPC: 0.18
# ZNVER1-NEXT: Block RThroughput: 1.0
+# BDVER2: Timeline view:
+# BDVER2-NEXT: 0
+# BDVER2-NEXT: Index 0123456789
+
# BDWELL: Timeline view:
# BDWELL-NEXT: Index 0123456789
# ZNVER1-NEXT: 0
# ZNVER1-NEXT: Index 0123456789
+# BDVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
+# BDVER2-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
+
# BDWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2
# BDWELL-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3
# ALL: [0] [1] [2] [3]
# ALL-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm2
+# BDVER2-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3
# BDWELL-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3
# BTVER2-NEXT: 1. 1 1.0 1.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3
# HASWELL-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3