These should match the YMM MOVDUP/ PERMILPD/PERMILPS + SHUFPD/SHUFPS shuffles instead of using the WriteFShuffle defaults.
llvm-svn: 328501
let ResourceCycles = [2, 2];
let NumMicroOps = 2;
}
-def : InstRW<[JWriteShuffleY], (instrs VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr,
- VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri, VSHUFPSYrri)>;
+def : InstRW<[JWriteShuffleY], (instrs VBLENDPDYrri, VBLENDPSYrri,
+ VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr,
+ VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri, VSHUFPSYrri,
+ VUNPCKHPDYrr, VUNPCKHPSYrr, VUNPCKLPDYrr, VUNPCKLPSYrr)>;
def JWriteShuffleYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
let ResourceCycles = [2, 2, 2];
let NumMicroOps = 2;
}
-def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VMOVDDUPYrm, VMOVSHDUPYrm, VMOVSLDUPYrm,
- VPERMILPDYmi, VPERMILPSYmi, VSHUFPDYrmi, VSHUFPSYrmi)>;
+def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VBLENDPDYrmi, VBLENDPSYrmi,
+ VMOVDDUPYrm, VMOVSHDUPYrm, VMOVSLDUPYrm,
+ VPERMILPDYmi, VPERMILPSYmi, VSHUFPDYrmi, VSHUFPSYrmi,
+ VUNPCKHPDYrm, VUNPCKHPSYrm, VUNPCKLPDYrm, VUNPCKLPSYrm)>;
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
;
; BTVER2-LABEL: test_blendpd:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
+; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00]
; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [6:1.00]
+; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [6:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_blendpd:
;
; BTVER2-LABEL: test_blendps:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
-; BTVER2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [6:1.00]
+; BTVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00]
+; BTVER2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [6:2.00]
; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
;
; BTVER2-LABEL: test_unpckhpd:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.50]
-; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [6:1.00]
+; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
+; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [6:2.00]
; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
;
; BTVER2-LABEL: test_unpckhps:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.50]
-; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
+; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_unpckhps:
;
; BTVER2-LABEL: test_unpcklpd:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.50]
-; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [6:1.00]
+; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
+; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [6:2.00]
; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
;
; BTVER2-LABEL: test_unpcklps:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.50]
-; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
+; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_unpcklps:
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vandps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vblendpd $11, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vblendpd $11, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vblendpd $11, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vblendpd $11, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vblendpd $11, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vblendpd $11, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vblendps $11, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vblendps $11, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vblendps $11, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vblendps $11, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vblendps $11, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vblendps $11, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 - - - - - - - vblendvpd %xmm3, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 1.00 - - - - - - vblendvpd %xmm3, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - 3.00 3.00 1.00 1.00 - - - - - - - vblendvpd %ymm3, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vucomiss (%rax), %xmm1
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vunpckhpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vunpckhpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vunpckhpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vunpckhpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vunpckhpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vunpckhpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vunpckhps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vunpckhps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vunpckhps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vunpckhps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vunpckhps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vunpckhps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vunpcklpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vunpcklpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vunpcklpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vunpcklpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vunpcklpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vunpcklpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vunpcklps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vunpcklps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vunpcklps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vunpcklps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vunpcklps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vunpcklps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vxorpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vxorpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vxorpd %ymm0, %ymm1, %ymm2