def ZnWriteROUNDr : SchedWriteRes<[ZnFPU3]> {
let Latency = 4;
}
-def : InstRW<[ZnWriteROUNDr], (instregex "(V?)ROUND(Y?)(S|P)(S|D)r")>;
+def : InstRW<[ZnWriteROUNDr], (instregex "(V?)ROUND(S|P)(S|D)(Y?)r")>;
// v,m,i.
def ZnWriteROUNDm : SchedWriteRes<[ZnAGU, ZnFPU3]> {
let Latency = 11;
let NumMicroOps = 2;
}
-def : InstRW<[ZnWriteROUNDm], (instregex "(V?)ROUND(Y?)(S|P)(S|D)m")>;
+def : InstRW<[ZnWriteROUNDm], (instregex "(V?)ROUND(S|P)(S|D)(Y?)m")>;
// DPPS.
// x,x,i / v,v,v,i.
;
; ZNVER1-LABEL: test_roundpd:
; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00]
-; ZNVER1-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00]
+; ZNVER1-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [11:1.00]
+; ZNVER1-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [4:1.00]
; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7)
;
; ZNVER1-LABEL: test_roundps:
; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [10:1.00]
-; ZNVER1-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00]
+; ZNVER1-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [11:1.00]
+; ZNVER1-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [4:1.00]
; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7)