(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
- (_.VT (X86RndScales (_.VT _.RC:$src1),
- (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
- (i32 imm:$src3)))>;
+ (_.VT (X86RndScales _.RC:$src1,
+ _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>;
let isCodeGenOnly = 1, hasSideEffects = 0 in {
def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
; SKX-LABEL: test_x86_sse41_round_sd_load:
; SKX: ## BB#0:
; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; SKX-NEXT: vmovapd (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x08]
-; SKX-NEXT: vrndscalesd $7, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x0b,0xc1,0x07]
+; SKX-NEXT: vrndscalesd $7, (%eax), %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x0b,0x00,0x07]
; SKX-NEXT: retl ## encoding: [0xc3]
%a1b = load <2 x double>, <2 x double>* %a1
%res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1b, i32 7) ; <<2 x double>> [#uses=1]
; SKX-LABEL: test_roundsd:
; SKX: # BB#0:
; SKX-NEXT: vrndscalesd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
-; SKX-NEXT: vmovapd (%rdi), %xmm2 # sched: [6:0.50]
-; SKX-NEXT: vrndscalesd $7, %xmm2, %xmm0, %xmm0 # sched: [8:0.67]
+; SKX-NEXT: vrndscalesd $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_roundss:
; SKX: # BB#0:
; SKX-NEXT: vrndscaless $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
-; SKX-NEXT: vmovaps (%rdi), %xmm2 # sched: [6:0.50]
-; SKX-NEXT: vrndscaless $7, %xmm2, %xmm0, %xmm0 # sched: [8:0.67]
+; SKX-NEXT: vrndscaless $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67]
; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;