// Return true for any instruction the copies the high bits of the first source
// operand into the unused high bits of the destination operand.
-static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) {
+static bool hasUndefRegUpdate(unsigned Opcode, unsigned &OpNum,
+ bool ForLoadFold = false) {
+ // Set the OpNum parameter to the first source operand.
+ OpNum = 1;
switch (Opcode) {
case X86::VCVTSI2SSrr:
case X86::VCVTSI2SSrm:
case X86::VSQRTSDZm:
case X86::VSQRTSDZm_Int:
return true;
+ case X86::VMOVSSZrrk:
+ case X86::VMOVSDZrrk:
+ OpNum = 3;
+ return true;
+ case X86::VMOVSSZrrkz:
+ case X86::VMOVSDZrrkz:
+ OpNum = 2;
+ return true;
}
return false;
unsigned
X86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum,
const TargetRegisterInfo *TRI) const {
- if (!hasUndefRegUpdate(MI.getOpcode()))
+ if (!hasUndefRegUpdate(MI.getOpcode(), OpNum))
return 0;
- // Set the OpNum parameter to the first source operand.
- OpNum = 1;
-
const MachineOperand &MO = MI.getOperand(OpNum);
if (MO.isUndef() && Register::isPhysicalRegister(MO.getReg())) {
return UndefRegClearance;
static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF,
MachineInstr &MI) {
- if (!hasUndefRegUpdate(MI.getOpcode(), /*ForLoadFold*/true) ||
+ unsigned Ignored;
+ if (!hasUndefRegUpdate(MI.getOpcode(), Ignored, /*ForLoadFold*/true) ||
!MI.getOperand(1).isReg())
return false;
; KNL-NEXT: movswl 2(%rsi), %eax
; KNL-NEXT: vmovd %eax, %xmm1
; KNL-NEXT: vcvtph2ps %xmm1, %xmm1
-; KNL-NEXT: vmovss %xmm1, %xmm0, %xmm1 {%k2} {z}
+; KNL-NEXT: vmovss %xmm1, %xmm1, %xmm1 {%k2} {z}
; KNL-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z}
; KNL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; KNL-NEXT: vmovd %xmm0, %eax
; SKX-NEXT: movswl 2(%rsi), %eax
; SKX-NEXT: vmovd %eax, %xmm1
; SKX-NEXT: vcvtph2ps %xmm1, %xmm1
-; SKX-NEXT: vmovss %xmm1, %xmm0, %xmm1 {%k2} {z}
+; SKX-NEXT: vmovss %xmm1, %xmm1, %xmm1 {%k2} {z}
; SKX-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z}
; SKX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; SKX-NEXT: vmovd %xmm0, %eax
; X64-LABEL: select02:
; X64: # %bb.0:
; X64-NEXT: vcmpless %xmm0, %xmm3, %k1
-; X64-NEXT: vmovss %xmm2, %xmm0, %xmm1 {%k1}
+; X64-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1}
; X64-NEXT: vmovaps %xmm1, %xmm0
; X64-NEXT: retq
%cmp = fcmp oge float %a, %eps
; X64-LABEL: select03:
; X64: # %bb.0:
; X64-NEXT: vcmplesd %xmm0, %xmm3, %k1
-; X64-NEXT: vmovsd %xmm2, %xmm0, %xmm1 {%k1}
+; X64-NEXT: vmovsd %xmm2, %xmm1, %xmm1 {%k1}
; X64-NEXT: vmovapd %xmm1, %xmm0
; X64-NEXT: retq
%cmp = fcmp oge double %a, %eps
; AVX512-LABEL: select_fcmp_one_f32:
; AVX512: ## %bb.0:
; AVX512-NEXT: vcmpneq_oqss %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovaps %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp one float %a, %b
; AVX512-LABEL: select_fcmp_one_f64:
; AVX512: ## %bb.0:
; AVX512-NEXT: vcmpneq_oqsd %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovsd %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovapd %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp one double %a, %b
; AVX512-ISEL-NEXT: cmpq %rsi, %rdi
; AVX512-ISEL-NEXT: sete %al
; AVX512-ISEL-NEXT: kmovd %eax, %k1
-; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1}
+; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-ISEL-NEXT: vmovaps %xmm1, %xmm0
; AVX512-ISEL-NEXT: retq
;
; AVX512-ISEL-NEXT: cmpq %rsi, %rdi
; AVX512-ISEL-NEXT: setne %al
; AVX512-ISEL-NEXT: kmovd %eax, %k1
-; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1}
+; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-ISEL-NEXT: vmovaps %xmm1, %xmm0
; AVX512-ISEL-NEXT: retq
;
; AVX512-ISEL-NEXT: cmpq %rsi, %rdi
; AVX512-ISEL-NEXT: seta %al
; AVX512-ISEL-NEXT: kmovd %eax, %k1
-; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1}
+; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-ISEL-NEXT: vmovaps %xmm1, %xmm0
; AVX512-ISEL-NEXT: retq
;
; AVX512-ISEL-NEXT: cmpq %rsi, %rdi
; AVX512-ISEL-NEXT: setae %al
; AVX512-ISEL-NEXT: kmovd %eax, %k1
-; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1}
+; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-ISEL-NEXT: vmovaps %xmm1, %xmm0
; AVX512-ISEL-NEXT: retq
;
; AVX512-ISEL-NEXT: cmpq %rsi, %rdi
; AVX512-ISEL-NEXT: setb %al
; AVX512-ISEL-NEXT: kmovd %eax, %k1
-; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1}
+; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-ISEL-NEXT: vmovaps %xmm1, %xmm0
; AVX512-ISEL-NEXT: retq
;
; AVX512-ISEL-NEXT: cmpq %rsi, %rdi
; AVX512-ISEL-NEXT: setbe %al
; AVX512-ISEL-NEXT: kmovd %eax, %k1
-; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1}
+; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-ISEL-NEXT: vmovaps %xmm1, %xmm0
; AVX512-ISEL-NEXT: retq
;
; AVX512-ISEL-NEXT: cmpq %rsi, %rdi
; AVX512-ISEL-NEXT: setg %al
; AVX512-ISEL-NEXT: kmovd %eax, %k1
-; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1}
+; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-ISEL-NEXT: vmovaps %xmm1, %xmm0
; AVX512-ISEL-NEXT: retq
;
; AVX512-ISEL-NEXT: cmpq %rsi, %rdi
; AVX512-ISEL-NEXT: setge %al
; AVX512-ISEL-NEXT: kmovd %eax, %k1
-; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1}
+; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-ISEL-NEXT: vmovaps %xmm1, %xmm0
; AVX512-ISEL-NEXT: retq
;
; AVX512-ISEL-NEXT: cmpq %rsi, %rdi
; AVX512-ISEL-NEXT: setl %al
; AVX512-ISEL-NEXT: kmovd %eax, %k1
-; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1}
+; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-ISEL-NEXT: vmovaps %xmm1, %xmm0
; AVX512-ISEL-NEXT: retq
;
; AVX512-ISEL-NEXT: cmpq %rsi, %rdi
; AVX512-ISEL-NEXT: setle %al
; AVX512-ISEL-NEXT: kmovd %eax, %k1
-; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1}
+; AVX512-ISEL-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-ISEL-NEXT: vmovaps %xmm1, %xmm0
; AVX512-ISEL-NEXT: retq
;
; AVX512-LABEL: select_fcmp_oeq_f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovaps %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp oeq float %a, %b
; AVX512-LABEL: select_fcmp_oeq_f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovsd %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovapd %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp oeq double %a, %b
; AVX512-LABEL: select_fcmp_ogt_f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltss %xmm0, %xmm1, %k1
-; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovaps %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp ogt float %a, %b
; AVX512-LABEL: select_fcmp_ogt_f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltsd %xmm0, %xmm1, %k1
-; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovsd %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovapd %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp ogt double %a, %b
; AVX512-LABEL: select_fcmp_oge_f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpless %xmm0, %xmm1, %k1
-; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovaps %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp oge float %a, %b
; AVX512-LABEL: select_fcmp_oge_f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmplesd %xmm0, %xmm1, %k1
-; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovsd %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovapd %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp oge double %a, %b
; AVX512-LABEL: select_fcmp_olt_f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltss %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovaps %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp olt float %a, %b
; AVX512-LABEL: select_fcmp_olt_f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltsd %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovsd %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovapd %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp olt double %a, %b
; AVX512-LABEL: select_fcmp_ole_f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpless %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovaps %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp ole float %a, %b
; AVX512-LABEL: select_fcmp_ole_f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmplesd %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovsd %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovapd %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp ole double %a, %b
; AVX512-LABEL: select_fcmp_ord_f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpordss %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovaps %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp ord float %a, %b
; AVX512-LABEL: select_fcmp_ord_f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpordsd %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovsd %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovapd %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp ord double %a, %b
; AVX512-LABEL: select_fcmp_uno_f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpunordss %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovaps %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp uno float %a, %b
; AVX512-LABEL: select_fcmp_uno_f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpunordsd %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovsd %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovapd %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp uno double %a, %b
; AVX512-LABEL: select_fcmp_ugt_f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpnless %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovaps %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp ugt float %a, %b
; AVX512-LABEL: select_fcmp_ugt_f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpnlesd %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovsd %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovapd %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp ugt double %a, %b
; AVX512-LABEL: select_fcmp_uge_f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpnltss %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovaps %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp uge float %a, %b
; AVX512-LABEL: select_fcmp_uge_f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpnltsd %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovsd %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovapd %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp uge double %a, %b
; AVX512-LABEL: select_fcmp_ult_f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpnless %xmm0, %xmm1, %k1
-; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovaps %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp ult float %a, %b
; AVX512-LABEL: select_fcmp_ult_f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpnlesd %xmm0, %xmm1, %k1
-; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovsd %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovapd %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp ult double %a, %b
; AVX512-LABEL: select_fcmp_ule_f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpnltss %xmm0, %xmm1, %k1
-; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovaps %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp ule float %a, %b
; AVX512-LABEL: select_fcmp_ule_f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpnltsd %xmm0, %xmm1, %k1
-; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovsd %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovapd %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp ule double %a, %b
; AVX512-LABEL: select_fcmp_une_f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpneqss %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovss %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovaps %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp une float %a, %b
; AVX512-LABEL: select_fcmp_une_f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpneqsd %xmm1, %xmm0, %k1
-; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1}
+; AVX512-NEXT: vmovsd %xmm2, %xmm3, %xmm3 {%k1}
; AVX512-NEXT: vmovapd %xmm3, %xmm0
; AVX512-NEXT: retq
%1 = fcmp une double %a, %b
; AVX512: # %bb.0:
; AVX512-NEXT: vmaxss %xmm0, %xmm1, %xmm2
; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1
-; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1}
+; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
; AVX512-NEXT: vmovaps %xmm2, %xmm0
; AVX512-NEXT: retq
%z = call float @fmaxf(float %x, float %y) readnone
; AVX512: # %bb.0:
; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
-; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1}
+; AVX512-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
; AVX512-NEXT: vmovapd %xmm2, %xmm0
; AVX512-NEXT: retq
%z = call double @fmax(double %x, double %y) readnone
; AVX512: # %bb.0:
; AVX512-NEXT: vmaxss %xmm0, %xmm1, %xmm2
; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1
-; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1}
+; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
; AVX512-NEXT: vmovaps %xmm2, %xmm0
; AVX512-NEXT: retq
%z = call float @llvm.maxnum.f32(float %x, float %y) readnone
; AVX512: # %bb.0:
; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
-; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1}
+; AVX512-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
; AVX512-NEXT: vmovapd %xmm2, %xmm0
; AVX512-NEXT: retq
%z = call double @llvm.maxnum.f64(double %x, double %y) readnone
; AVX512: # %bb.0:
; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm2
; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1
-; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1}
+; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
; AVX512-NEXT: vmovaps %xmm2, %xmm0
; AVX512-NEXT: retq
%z = call float @fminf(float %x, float %y) readnone
; AVX512: # %bb.0:
; AVX512-NEXT: vminsd %xmm0, %xmm1, %xmm2
; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
-; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1}
+; AVX512-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
; AVX512-NEXT: vmovapd %xmm2, %xmm0
; AVX512-NEXT: retq
%z = call double @fmin(double %x, double %y) readnone
; AVX512: # %bb.0:
; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm2
; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1
-; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1}
+; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
; AVX512-NEXT: vmovaps %xmm2, %xmm0
; AVX512-NEXT: retq
%z = call float @llvm.minnum.f32(float %x, float %y) readnone
; AVX512: # %bb.0:
; AVX512-NEXT: vminsd %xmm0, %xmm1, %xmm2
; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
-; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1}
+; AVX512-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
; AVX512-NEXT: vmovapd %xmm2, %xmm0
; AVX512-NEXT: retq
%z = call double @llvm.minnum.f64(double %x, double %y) readnone
; X86-64-NEXT: .LBB0_2: # %else
; X86-64-NEXT: vcmpeqss %xmm5, %xmm4, %k1
; X86-64-NEXT: .LBB0_3: # %exit
-; X86-64-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1}
+; X86-64-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; X86-64-NEXT: vmovss %xmm1, (%rsi)
; X86-64-NEXT: retq
;
; X86-64-NEXT: movb (%rcx), %al
; X86-64-NEXT: .LBB2_3: # %exit
; X86-64-NEXT: kmovd %eax, %k1
-; X86-64-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1}
+; X86-64-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; X86-64-NEXT: vmovss %xmm1, (%rsi)
; X86-64-NEXT: retq
;
; X86-64-NEXT: .LBB3_2: # %else
; X86-64-NEXT: kmovb (%rcx), %k1
; X86-64-NEXT: .LBB3_3: # %exit
-; X86-64-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1}
+; X86-64-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; X86-64-NEXT: vmovss %xmm1, (%rsi)
; X86-64-NEXT: retq
;
; CHECK-NEXT: vcvtsi2ssl {{.*}}(%rip), %xmm1, %xmm1
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vmovss %xmm2, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1}
; CHECK-NEXT: vmovss %xmm1, {{.*}}(%rip)
; CHECK-NEXT: .LBB0_2: # %if.end
; CHECK-NEXT: popq %rax
; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX512F_32_WIN-NEXT: vcmpltss %xmm1, %xmm0, %k1
; AVX512F_32_WIN-NEXT: vsubss %xmm1, %xmm0, %xmm2
-; AVX512F_32_WIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1}
+; AVX512F_32_WIN-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1}
; AVX512F_32_WIN-NEXT: vmovss %xmm2, (%esp)
; AVX512F_32_WIN-NEXT: flds (%esp)
; AVX512F_32_WIN-NEXT: fisttpll (%esp)
; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX512F_32_LIN-NEXT: vcmpltss %xmm1, %xmm0, %k1
; AVX512F_32_LIN-NEXT: vsubss %xmm1, %xmm0, %xmm2
-; AVX512F_32_LIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1}
+; AVX512F_32_LIN-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1}
; AVX512F_32_LIN-NEXT: vmovss %xmm2, (%esp)
; AVX512F_32_LIN-NEXT: flds (%esp)
; AVX512F_32_LIN-NEXT: fisttpll (%esp)
; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX512F_32_WIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1
; AVX512F_32_WIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2
-; AVX512F_32_WIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1}
+; AVX512F_32_WIN-NEXT: vmovsd %xmm0, %xmm2, %xmm2 {%k1}
; AVX512F_32_WIN-NEXT: vmovsd %xmm2, (%esp)
; AVX512F_32_WIN-NEXT: fldl (%esp)
; AVX512F_32_WIN-NEXT: fisttpll (%esp)
; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX512F_32_LIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1
; AVX512F_32_LIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2
-; AVX512F_32_LIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1}
+; AVX512F_32_LIN-NEXT: vmovsd %xmm0, %xmm2, %xmm2 {%k1}
; AVX512F_32_LIN-NEXT: vmovsd %xmm2, (%esp)
; AVX512F_32_LIN-NEXT: fldl (%esp)
; AVX512F_32_LIN-NEXT: fisttpll (%esp)
; AVX512-NEXT: vmulss %xmm1, %xmm2, %xmm1
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vcmpeqss %xmm2, %xmm0, %k1
-; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm1 {%k1}
+; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1}
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: retq
%call = tail call float @__sqrtf_finite(float %f) #2
; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vcmpltss {{.*}}(%rip), %xmm0, %k1
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1}
+; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: retq
%call = tail call float @__sqrtf_finite(float %x) #2