These typically match the FMA3 equivalents, although the multiply operands sometimes get flipped due to the FMA3 permute variants.
CASE_AVX512_INS_COMMON(Inst##SD, Z, m_Int) \
CASE_AVX512_INS_COMMON(Inst##SS, Z, m_Int)
+#define CASE_FMA4(Inst, suf) \
+ CASE_AVX_INS_COMMON(Inst, 4, suf) \
+ CASE_AVX_INS_COMMON(Inst, 4Y, suf)
+
+#define CASE_FMA4_PACKED_RR(Inst) \
+ CASE_FMA4(Inst##PD, rr) \
+ CASE_FMA4(Inst##PS, rr)
+
+#define CASE_FMA4_PACKED_RM(Inst) \
+ CASE_FMA4(Inst##PD, rm) \
+ CASE_FMA4(Inst##PS, rm)
+
+#define CASE_FMA4_PACKED_MR(Inst) \
+ CASE_FMA4(Inst##PD, mr) \
+ CASE_FMA4(Inst##PS, mr)
+
+#define CASE_FMA4_SCALAR_RR(Inst) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , rr) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , rr) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , rr_Int) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , rr_Int)
+
+#define CASE_FMA4_SCALAR_RM(Inst) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , rm) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , rm) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , rm_Int) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , rm_Int)
+
+#define CASE_FMA4_SCALAR_MR(Inst) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , mr) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , mr) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , mr_Int) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , mr_Int)
+
static unsigned getVectorRegSize(unsigned RegNo) {
if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31)
return 512;
OS << " {z}";
}
-static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) {
+static bool printFMAComments(const MCInst *MI, raw_ostream &OS) {
const char *Mul1Name = nullptr, *Mul2Name = nullptr, *AccName = nullptr;
unsigned NumOperands = MI->getNumOperands();
bool RegForm = false;
bool Negate = false;
StringRef AccStr = "+";
- // The operands for FMA instructions without rounding fall into two forms.
+ // The operands for FMA3 instructions without rounding fall into two forms:
// dest, src1, src2, src3
// dest, src1, mask, src2, src3
// Where src3 is either a register or 5 memory address operands. So to find
// index from the end by taking into account memory vs register form when
// finding src2.
+ // The operands for FMA4 instructions:
+ // dest, src1, src2, src3
+ // Where src2 OR src3 are either a register or 5 memory address operands. So
+ // to find dest and src1 we can index from the front, src2 (reg/mem) follows
+ // and then src3 (reg) will be at the end.
+
switch (MI->getOpcode()) {
default:
return false;
+
+ CASE_FMA4_PACKED_RR(FMADD)
+ CASE_FMA4_SCALAR_RR(FMADD)
+ RegForm = true;
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FMADD)
+ CASE_FMA4_SCALAR_RM(FMADD)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+ CASE_FMA4_PACKED_MR(FMADD)
+ CASE_FMA4_SCALAR_MR(FMADD)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+
+ CASE_FMA4_PACKED_RR(FMSUB)
+ CASE_FMA4_SCALAR_RR(FMSUB)
+ RegForm = true;
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FMSUB)
+ CASE_FMA4_SCALAR_RM(FMSUB)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-";
+ break;
+ CASE_FMA4_PACKED_MR(FMSUB)
+ CASE_FMA4_SCALAR_MR(FMSUB)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-";
+ break;
+
+ CASE_FMA4_PACKED_RR(FNMADD)
+ CASE_FMA4_SCALAR_RR(FNMADD)
+ RegForm = true;
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FNMADD)
+ CASE_FMA4_SCALAR_RM(FNMADD)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ Negate = true;
+ break;
+ CASE_FMA4_PACKED_MR(FNMADD)
+ CASE_FMA4_SCALAR_MR(FNMADD)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ Negate = true;
+ break;
+
+ CASE_FMA4_PACKED_RR(FNMSUB)
+ CASE_FMA4_SCALAR_RR(FNMSUB)
+ RegForm = true;
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FNMSUB)
+ CASE_FMA4_SCALAR_RM(FNMSUB)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-";
+ Negate = true;
+ break;
+ CASE_FMA4_PACKED_MR(FNMSUB)
+ CASE_FMA4_SCALAR_MR(FNMSUB)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-";
+ Negate = true;
+ break;
+
+ CASE_FMA4_PACKED_RR(FMADDSUB)
+ RegForm = true;
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FMADDSUB)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "+/-";
+ break;
+ CASE_FMA4_PACKED_MR(FMADDSUB)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "+/-";
+ break;
+
+ CASE_FMA4_PACKED_RR(FMSUBADD)
+ RegForm = true;
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FMSUBADD)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-/+";
+ break;
+ CASE_FMA4_PACKED_MR(FMSUBADD)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-/+";
+ break;
+
CASE_FMA_PACKED_REG(FMADD132)
CASE_FMA_SCALAR_REG(FMADD132)
Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
unsigned NumOperands = MI->getNumOperands();
bool RegForm = false;
- if (printFMA3Comments(MI, OS))
+ if (printFMAComments(MI, OS))
return true;
switch (MI->getOpcode()) {
define <3 x float> @fmafunc(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
; CHECK-LABEL: fmafunc:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: retl
;
; CHECK-NOFMA-LABEL: fmafunc:
; FMA4-LABEL: test_fneg_fma_subx_y_negz_f32:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm0
-; FMA4-NEXT: vfnmaddss %xmm3, %xmm2, %xmm0, %xmm0
+; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm3
; FMA4-NEXT: retq
entry:
%subx = fsub nsz float %w, %x
; FMA4-LABEL: test_fneg_fma_x_suby_negz_f32:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vsubss %xmm2, %xmm0, %xmm0
-; FMA4-NEXT: vfnmaddss %xmm3, %xmm0, %xmm1, %xmm0
+; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3
; FMA4-NEXT: retq
entry:
%suby = fsub nsz float %w, %y
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm1
; FMA4-NEXT: vsubss %xmm2, %xmm0, %xmm0
-; FMA4-NEXT: vfnmaddss %xmm3, %xmm0, %xmm1, %xmm0
+; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3
; FMA4-NEXT: retq
entry:
%subx = fsub nsz float %w, %x
; FMA4-LABEL: test_fneg_fma_subx_negy_negz_f32:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm0
-; FMA4-NEXT: vfmaddss %xmm3, %xmm2, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm3
; FMA4-NEXT: retq
entry:
%subx = fsub nsz float %w, %x
; FMA4-LABEL: test_fma_rcp_fneg_v4f32:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vrcpps %xmm2, %xmm2
-; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%0 = fneg <4 x float> %z
; FMA4-LABEL: negated_constant:
; FMA4: # %bb.0:
; FMA4-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
-; FMA4-NEXT: vfnmsubss %xmm1, {{.*}}(%rip), %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1
; FMA4-NEXT: retq
%m = fmul float %x, 42.0
%fma = call nsz float @llvm.fma.f32(float %x, float -42.0, float %m)
; FMA4-LABEL: test_x86_fmadd_baa_ss:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; FMA4-NEXT: vfmaddss %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
ret <4 x float> %res
; FMA4-LABEL: test_x86_fmadd_aba_ss:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; FMA4-NEXT: vfmaddss %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
; FMA4-LABEL: test_x86_fmadd_bba_ss:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; FMA4-NEXT: vfmaddss (%rcx), %xmm0, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
; FMA4-LABEL: test_x86_fmadd_baa_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %xmm0
-; FMA4-NEXT: vfmaddps %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
ret <4 x float> %res
; FMA4-LABEL: test_x86_fmadd_aba_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %xmm0
-; FMA4-NEXT: vfmaddps %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
; FMA4-LABEL: test_x86_fmadd_bba_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rdx), %xmm0
-; FMA4-NEXT: vfmaddps (%rcx), %xmm0, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
; FMA4-LABEL: test_x86_fmadd_baa_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %ymm0
-; FMA4-NEXT: vfmaddps %ymm0, (%rdx), %ymm0, %ymm0
+; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
ret <8 x float> %res
; FMA4-LABEL: test_x86_fmadd_aba_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %ymm0
-; FMA4-NEXT: vfmaddps %ymm0, (%rdx), %ymm0, %ymm0
+; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
ret <8 x float> %res
; FMA4-LABEL: test_x86_fmadd_bba_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rdx), %ymm0
-; FMA4-NEXT: vfmaddps (%rcx), %ymm0, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
ret <8 x float> %res
; FMA4-LABEL: test_x86_fmadd_baa_sd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; FMA4-NEXT: vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
ret <2 x double> %res
; FMA4-LABEL: test_x86_fmadd_aba_sd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; FMA4-NEXT: vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
; FMA4-LABEL: test_x86_fmadd_bba_sd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; FMA4-NEXT: vfmaddsd (%rcx), %xmm0, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
; FMA4-LABEL: test_x86_fmadd_baa_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %xmm0
-; FMA4-NEXT: vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
ret <2 x double> %res
; FMA4-LABEL: test_x86_fmadd_aba_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %xmm0
-; FMA4-NEXT: vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
; FMA4-LABEL: test_x86_fmadd_bba_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rdx), %xmm0
-; FMA4-NEXT: vfmaddpd (%rcx), %xmm0, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
; FMA4-LABEL: test_x86_fmadd_baa_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %ymm0
-; FMA4-NEXT: vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0
+; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
ret <4 x double> %res
; FMA4-LABEL: test_x86_fmadd_aba_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %ymm0
-; FMA4-NEXT: vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0
+; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
ret <4 x double> %res
; FMA4-LABEL: test_x86_fmadd_bba_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rdx), %ymm0
-; FMA4-NEXT: vfmaddpd (%rcx), %ymm0, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
ret <4 x double> %res
; FMA4-LABEL: test_x86_fnmadd_baa_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %xmm0
-; FMA4-NEXT: vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
ret <4 x float> %res
; FMA4-LABEL: test_x86_fnmadd_aba_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %xmm0
-; FMA4-NEXT: vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
; FMA4-LABEL: test_x86_fnmadd_bba_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rdx), %xmm0
-; FMA4-NEXT: vfnmaddps (%rcx), %xmm0, %xmm0, %xmm0
+; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
; FMA4-LABEL: test_x86_fnmadd_baa_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %ymm0
-; FMA4-NEXT: vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0
+; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
ret <8 x float> %res
; FMA4-LABEL: test_x86_fnmadd_aba_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %ymm0
-; FMA4-NEXT: vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0
+; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
ret <8 x float> %res
; FMA4-LABEL: test_x86_fnmadd_bba_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rdx), %ymm0
-; FMA4-NEXT: vfnmaddps (%rcx), %ymm0, %ymm0, %ymm0
+; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
ret <8 x float> %res
; FMA4-LABEL: test_x86_fnmadd_baa_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %xmm0
-; FMA4-NEXT: vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
ret <2 x double> %res
; FMA4-LABEL: test_x86_fnmadd_aba_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %xmm0
-; FMA4-NEXT: vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
; FMA4-LABEL: test_x86_fnmadd_bba_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rdx), %xmm0
-; FMA4-NEXT: vfnmaddpd (%rcx), %xmm0, %xmm0, %xmm0
+; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
; FMA4-LABEL: test_x86_fnmadd_baa_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %ymm0
-; FMA4-NEXT: vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0
+; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
ret <4 x double> %res
; FMA4-LABEL: test_x86_fnmadd_aba_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %ymm0
-; FMA4-NEXT: vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0
+; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
ret <4 x double> %res
; FMA4-LABEL: test_x86_fnmadd_bba_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rdx), %ymm0
-; FMA4-NEXT: vfnmaddpd (%rcx), %ymm0, %ymm0, %ymm0
+; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
ret <4 x double> %res
; FMA4-LABEL: test_x86_fmsub_baa_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %xmm0
-; FMA4-NEXT: vfmsubps %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
ret <4 x float> %res
; FMA4-LABEL: test_x86_fmsub_aba_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %xmm0
-; FMA4-NEXT: vfmsubps %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
; FMA4-LABEL: test_x86_fmsub_bba_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rdx), %xmm0
-; FMA4-NEXT: vfmsubps (%rcx), %xmm0, %xmm0, %xmm0
+; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
; FMA4-LABEL: test_x86_fmsub_baa_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %ymm0
-; FMA4-NEXT: vfmsubps %ymm0, (%rdx), %ymm0, %ymm0
+; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
ret <8 x float> %res
; FMA4-LABEL: test_x86_fmsub_aba_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %ymm0
-; FMA4-NEXT: vfmsubps %ymm0, (%rdx), %ymm0, %ymm0
+; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
ret <8 x float> %res
; FMA4-LABEL: test_x86_fmsub_bba_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rdx), %ymm0
-; FMA4-NEXT: vfmsubps (%rcx), %ymm0, %ymm0, %ymm0
+; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm0) - mem
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
ret <8 x float> %res
; FMA4-LABEL: test_x86_fmsub_baa_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %xmm0
-; FMA4-NEXT: vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
ret <2 x double> %res
; FMA4-LABEL: test_x86_fmsub_aba_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %xmm0
-; FMA4-NEXT: vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
; FMA4-LABEL: test_x86_fmsub_bba_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rdx), %xmm0
-; FMA4-NEXT: vfmsubpd (%rcx), %xmm0, %xmm0, %xmm0
+; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
; FMA4-LABEL: test_x86_fmsub_baa_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %ymm0
-; FMA4-NEXT: vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0
+; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
ret <4 x double> %res
; FMA4-LABEL: test_x86_fmsub_aba_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %ymm0
-; FMA4-NEXT: vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0
+; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
ret <4 x double> %res
; FMA4-LABEL: test_x86_fmsub_bba_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rdx), %ymm0
-; FMA4-NEXT: vfmsubpd (%rcx), %ymm0, %ymm0, %ymm0
+; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm0) - mem
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
ret <4 x double> %res
; FMA4-LABEL: test_x86_fnmsub_baa_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %xmm0
-; FMA4-NEXT: vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
ret <4 x float> %res
; FMA4-LABEL: test_x86_fnmsub_aba_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %xmm0
-; FMA4-NEXT: vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
; FMA4-LABEL: test_x86_fnmsub_bba_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rdx), %xmm0
-; FMA4-NEXT: vfnmsubps (%rcx), %xmm0, %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
; FMA4-LABEL: test_x86_fnmsub_baa_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %ymm0
-; FMA4-NEXT: vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0
+; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
ret <8 x float> %res
; FMA4-LABEL: test_x86_fnmsub_aba_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %ymm0
-; FMA4-NEXT: vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0
+; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
ret <8 x float> %res
; FMA4-LABEL: test_x86_fnmsub_bba_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rdx), %ymm0
-; FMA4-NEXT: vfnmsubps (%rcx), %ymm0, %ymm0, %ymm0
+; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
ret <8 x float> %res
; FMA4-LABEL: test_x86_fnmsub_baa_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %xmm0
-; FMA4-NEXT: vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
ret <2 x double> %res
; FMA4-LABEL: test_x86_fnmsub_aba_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %xmm0
-; FMA4-NEXT: vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
; FMA4-LABEL: test_x86_fnmsub_bba_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rdx), %xmm0
-; FMA4-NEXT: vfnmsubpd (%rcx), %xmm0, %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
; FMA4-LABEL: test_x86_fnmsub_baa_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %ymm0
-; FMA4-NEXT: vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0
+; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
ret <4 x double> %res
; FMA4-LABEL: test_x86_fnmsub_aba_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %ymm0
-; FMA4-NEXT: vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0
+; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
ret <4 x double> %res
; FMA4-LABEL: test_x86_fnmsub_bba_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rdx), %ymm0
-; FMA4-NEXT: vfnmsubpd (%rcx), %ymm0, %ymm0, %ymm0
+; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
ret <4 x double> %res
define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: retq
%res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq
%sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
%res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i)
define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq
%sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
%res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c)
define <4 x float> @test4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: test4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq
%sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
%res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %sub.i, <4 x float> %b, <4 x float> %c)
define <4 x float> @test5(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: test5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq
%sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
%sub.i.2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: test6:
; CHECK: # %bb.0:
-; CHECK-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: retq
%res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
define <2 x double> @test7(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: test7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq
%sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
%res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i)
define <2 x double> @test8(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: test8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq
%sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b
%res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %sub.i, <2 x double> %c)
define <2 x double> @test9(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: test9:
; CHECK: # %bb.0:
-; CHECK-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq
%sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
%res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c)
define <2 x double> @test10(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: test10:
; CHECK: # %bb.0:
-; CHECK-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq
%sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
%sub.i.2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
; CHECK-LABEL: test_x86_fma_vfmadd_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
; CHECK-LABEL: test_x86_fma_vfmadd_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res
; CHECK-LABEL: test_x86_fma_vfmsub_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
; CHECK-LABEL: test_x86_fma_vfmsub_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res
; CHECK-LABEL: test_x86_fma_vfnmadd_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res
; CHECK-LABEL: test_x86_fma_vfnmsub_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res
; CHECK-LABEL: test_x86_fma_vfmaddsub_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res
; CHECK-LABEL: test_x86_fma_vfmsubadd_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res
; CHECK-LABEL: test_x86_fma4_vfmadd_ss:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
; CHECK-LABEL: test_x86_fma4_vfmadd_bac_ss:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6a,0xc2,0x00]
+; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
ret <4 x float> %res
; CHECK-LABEL: test_x86_fma4_vfmadd_sd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
; CHECK-LABEL: test_x86_fma4_vfmadd_bac_sd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6b,0xc2,0x00]
+; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
ret <2 x double> %res
; CHECK-LABEL: test_x86_fma_vfmadd_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %1
; CHECK-LABEL: test_x86_fma_vfmadd_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %1
; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %1
; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %1
; CHECK-LABEL: test_x86_fma_vfmsub_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
%2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %1)
; CHECK-LABEL: test_x86_fma_vfmsub_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
%2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %1)
; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
%2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %1)
; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
%2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %1)
; CHECK-LABEL: test_x86_fma_vfnmadd_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
%2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %a2)
; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
%2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %a2)
; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
%2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %a2)
; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
%2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %a2)
; CHECK-LABEL: test_x86_fma_vfnmsub_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
%2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
%2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
%2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
; CHECK-LABEL: test_x86_fma_vfmaddsub_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
%2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
%2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
%2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
; CHECK-LABEL: test_x86_fma_vfmsubadd_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
%2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
%2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
+; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
%2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
; CHECK-LABEL: test_x86_fma4_vfmadd_ss_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6a,0x07,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + mem
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load float , float *%a2
%y = insertelement <4 x float> undef, float %x, i32 0
; CHECK-LABEL: test_x86_fma4_vfmadd_ss_load2:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x6a,0x07,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load float , float *%a1
%y = insertelement <4 x float> undef, float %x, i32 0
; CHECK-LABEL: test_x86_fma4_vfmadd_sd_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6b,0x07,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + mem
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load double , double *%a2
%y = insertelement <2 x double> undef, double %x, i32 0
; CHECK-LABEL: test_x86_fma4_vfmadd_sd_load2:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x6b,0x07,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load double , double *%a1
%y = insertelement <2 x double> undef, double %x, i32 0
; CHECK-LABEL: test_x86_fma_vfmadd_ps_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0x07,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + mem
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load <4 x float>, <4 x float>* %a2
%res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x)
; CHECK-LABEL: test_x86_fma_vfmadd_ps_load2:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x68,0x07,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load <4 x float>, <4 x float>* %a1
%res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2)
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0f]
; CHECK-NEXT: vfmaddps %xmm0, (%rsi), %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x68,0x06,0x00]
+; CHECK-NEXT: # xmm0 = (xmm1 * mem) + xmm0
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load <4 x float>, <4 x float>* %a0
%y = load <4 x float>, <4 x float>* %a1
; CHECK-LABEL: test_x86_fma_vfmadd_pd_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0x07,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + mem
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load <2 x double>, <2 x double>* %a2
%res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x)
; CHECK-LABEL: test_x86_fma_vfmadd_pd_load2:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x69,0x07,0x10]
+; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load <2 x double>, <2 x double>* %a1
%res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2)
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd (%rdi), %xmm1 # encoding: [0xc5,0xf9,0x28,0x0f]
; CHECK-NEXT: vfmaddpd %xmm0, (%rsi), %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x69,0x06,0x00]
+; CHECK-NEXT: # xmm0 = (xmm1 * mem) + xmm0
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load <2 x double>, <2 x double>* %a0
%y = load <2 x double>, <2 x double>* %a1
; CHECK-LABEL: fmadd_aab_ss:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vfmaddss (%rsi), %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
; CHECK-NEXT: vmovss %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load float, float* %a
; CHECK-LABEL: fmadd_aba_ss:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vfmaddss %xmm0, (%rsi), %xmm0, %xmm0
+; CHECK-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; CHECK-NEXT: vmovss %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load float, float* %a
; CHECK-LABEL: fmadd_aab_sd:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vfmaddsd (%rsi), %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
; CHECK-NEXT: vmovsd %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load double, double* %a
; CHECK-LABEL: fmadd_aba_sd:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vfmaddsd %xmm0, (%rsi), %xmm0, %xmm0
+; CHECK-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; CHECK-NEXT: vmovsd %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load double, double* %a
;
; FMA4-LABEL: test_f32_fmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f32_fmadd:
;
; FMA4-LABEL: test_4f32_fmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f32_fmadd:
;
; FMA4-LABEL: test_8f32_fmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f32_fmadd:
;
; FMA4-LABEL: test_f64_fmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f64_fmadd:
;
; FMA4-LABEL: test_2f64_fmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_2f64_fmadd:
;
; FMA4-LABEL: test_4f64_fmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f64_fmadd:
;
; FMA4-LABEL: test_f32_fmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f32_fmsub:
;
; FMA4-LABEL: test_4f32_fmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f32_fmsub:
;
; FMA4-LABEL: test_8f32_fmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm1) - ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f32_fmsub:
;
; FMA4-LABEL: test_f64_fmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f64_fmsub:
;
; FMA4-LABEL: test_2f64_fmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_2f64_fmsub:
;
; FMA4-LABEL: test_4f64_fmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) - ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f64_fmsub:
;
; FMA4-LABEL: test_f32_fnmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f32_fnmadd:
;
; FMA4-LABEL: test_4f32_fnmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f32_fnmadd:
;
; FMA4-LABEL: test_8f32_fnmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f32_fnmadd:
;
; FMA4-LABEL: test_f64_fnmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f64_fnmadd:
;
; FMA4-LABEL: test_2f64_fnmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_2f64_fnmadd:
;
; FMA4-LABEL: test_4f64_fnmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f64_fnmadd:
;
; FMA4-LABEL: test_f32_fnmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f32_fnmsub:
;
; FMA4-LABEL: test_4f32_fnmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f32_fnmsub:
;
; FMA4-LABEL: test_8f32_fnmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f32_fnmsub:
;
; FMA4-LABEL: test_f64_fnmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f64_fnmsub:
;
; FMA4-LABEL: test_2f64_fnmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_2f64_fnmsub:
;
; FMA4-LABEL: test_4f64_fnmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f64_fnmsub:
;
; FMA4-LABEL: test_4f32_fmadd_load:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
+; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f32_fmadd_load:
;
; FMA4-LABEL: test_2f64_fmsub_load:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0
+; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_2f64_fmsub_load:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
; FMA4-INFS-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; FMA4-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3
; FMA4-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1
-; FMA4-INFS-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
+; FMA4-INFS-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_f32_interp:
;
; FMA4-NOINFS-LABEL: test_f32_interp:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubss %xmm1, %xmm1, %xmm2, %xmm1
-; FMA4-NOINFS-NEXT: vfmsubss %xmm1, %xmm2, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfmsubss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
+; FMA4-NOINFS-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_f32_interp:
; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3
; FMA4-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1
-; FMA4-INFS-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
+; FMA4-INFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v4f32_interp:
;
; FMA4-NOINFS-LABEL: test_v4f32_interp:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm2, %xmm1
-; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm2, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_interp:
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3
; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
-; FMA4-INFS-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
+; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v8f32_interp:
;
; FMA4-NOINFS-LABEL: test_v8f32_interp:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubps %ymm1, %ymm1, %ymm2, %ymm1
-; FMA4-NOINFS-NEXT: vfmsubps %ymm1, %ymm2, %ymm0, %ymm0
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v8f32_interp:
; FMA4-INFS-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
; FMA4-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3
; FMA4-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1
-; FMA4-INFS-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0
+; FMA4-INFS-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_f64_interp:
;
; FMA4-NOINFS-LABEL: test_f64_interp:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubsd %xmm1, %xmm1, %xmm2, %xmm1
-; FMA4-NOINFS-NEXT: vfmsubsd %xmm1, %xmm2, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfmsubsd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
+; FMA4-NOINFS-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_f64_interp:
; FMA4-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
; FMA4-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
-; FMA4-INFS-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
+; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v2f64_interp:
;
; FMA4-NOINFS-LABEL: test_v2f64_interp:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubpd %xmm1, %xmm1, %xmm2, %xmm1
-; FMA4-NOINFS-NEXT: vfmsubpd %xmm1, %xmm2, %xmm0, %xmm0
+; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
+; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v2f64_interp:
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3
; FMA4-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1
-; FMA4-INFS-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
+; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v4f64_interp:
;
; FMA4-NOINFS-LABEL: test_v4f64_interp:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubpd %ymm1, %ymm1, %ymm2, %ymm1
-; FMA4-NOINFS-NEXT: vfmsubpd %ymm1, %ymm2, %ymm0, %ymm0
+; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
+; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f64_interp:
;
; FMA4-LABEL: test_v4f32_fneg_fmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v4f32_fneg_fmadd:
;
; FMA4-LABEL: test_v4f64_fneg_fmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v4f64_fneg_fmsub:
;
; FMA4-LABEL: test_v4f32_fneg_fnmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v4f32_fneg_fnmadd:
;
; FMA4-LABEL: test_v4f64_fneg_fnmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v4f64_fneg_fnmsub:
;
; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0
+; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
; FMA4-LABEL: test_f64_fneg_fmul:
; FMA4: # %bb.0:
; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2
-; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f64_fneg_fmul:
; FMA4-LABEL: test_v4f32_fneg_fmul:
; FMA4: # %bb.0:
; FMA4-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v4f32_fneg_fmul:
; FMA4-LABEL: test_v4f64_fneg_fmul:
; FMA4: # %bb.0:
; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2
-; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v4f64_fneg_fmul:
;
; FMA4-LABEL: test_16f32_fmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmaddps %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfmaddps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4
+; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_16f32_fmadd:
;
; FMA4-LABEL: test_8f64_fmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4
+; FMA4-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f64_fmadd:
;
; FMA4-LABEL: test_16f32_fmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4
+; FMA4-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_16f32_fmsub:
;
; FMA4-LABEL: test_8f64_fmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmsubpd %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfmsubpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4
+; FMA4-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f64_fmsub:
;
; FMA4-LABEL: test_16f32_fnmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmaddps %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfnmaddps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4
+; FMA4-NEXT: vfnmaddps {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_16f32_fnmadd:
;
; FMA4-LABEL: test_8f64_fnmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4
+; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f64_fnmadd:
;
; FMA4-LABEL: test_16f32_fnmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
+; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_16f32_fnmsub:
;
; FMA4-LABEL: test_8f64_fnmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfnmsubpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
+; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f64_fnmsub:
;
; FMA4-LABEL: test_16f32_fmadd_load:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmaddps %ymm2, (%rdi), %ymm0, %ymm0
-; FMA4-NEXT: vfmaddps %ymm3, 32(%rdi), %ymm1, %ymm1
+; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm2
+; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * mem) + ymm3
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_16f32_fmadd_load:
;
; FMA4-LABEL: test_8f64_fmsub_load:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmsubpd %ymm2, (%rdi), %ymm0, %ymm0
-; FMA4-NEXT: vfmsubpd %ymm3, 32(%rdi), %ymm1, %ymm1
+; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * mem) - ymm2
+; FMA4-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * mem) - ymm3
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f64_fmsub_load:
;
; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_one_y:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
-; FMA4-NOINFS-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
+; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_one_y:
;
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_one:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
-; FMA4-NOINFS-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
+; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_one:
;
; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
-; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y:
;
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
-; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2
+; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone:
;
; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfnmaddps %ymm2, %ymm2, %ymm0, %ymm0
-; FMA4-NOINFS-NEXT: vfnmaddps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm2
+; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y:
;
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm0, %ymm0
-; FMA4-NOINFS-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NOINFS-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm2
+; FMA4-NOINFS-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x:
;
; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfnmsubps %ymm2, %ymm2, %ymm0, %ymm0
-; FMA4-NOINFS-NEXT: vfnmsubps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm2
+; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y:
;
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfnmsubpd %ymm2, %ymm2, %ymm0, %ymm0
-; FMA4-NOINFS-NEXT: vfnmsubpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NOINFS-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm2
+; FMA4-NOINFS-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x:
;
; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
-; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y:
;
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
-; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2
+; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one:
;
; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
-; FMA4-NOINFS-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
+; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y:
;
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
-; FMA4-NOINFS-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
+; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone:
; FMA4-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6
; FMA4-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3
; FMA4-INFS-NEXT: vmulps %ymm7, %ymm2, %ymm2
-; FMA4-INFS-NEXT: vfmaddps %ymm2, %ymm4, %ymm0, %ymm0
-; FMA4-INFS-NEXT: vfmaddps %ymm3, %ymm5, %ymm1, %ymm1
+; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm2
+; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm5) + ymm3
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v16f32_interp:
;
; FMA4-NOINFS-LABEL: test_v16f32_interp:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm3, %ymm5, %ymm3
-; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm2, %ymm4, %ymm2
-; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm4, %ymm0, %ymm0
-; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm5, %ymm1, %ymm1
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm4) - ymm2
+; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm5) - ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v16f32_interp:
; FMA4-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6
; FMA4-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3
; FMA4-INFS-NEXT: vmulpd %ymm7, %ymm2, %ymm2
-; FMA4-INFS-NEXT: vfmaddpd %ymm2, %ymm4, %ymm0, %ymm0
-; FMA4-INFS-NEXT: vfmaddpd %ymm3, %ymm5, %ymm1, %ymm1
+; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm2
+; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm5) + ymm3
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v8f64_interp:
;
; FMA4-NOINFS-LABEL: test_v8f64_interp:
; FMA4-NOINFS: # %bb.0:
-; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm3, %ymm5, %ymm3
-; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm2, %ymm4, %ymm2
-; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm4, %ymm0, %ymm0
-; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm5, %ymm1, %ymm1
+; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3
+; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2
+; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm4) - ymm2
+; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm5) - ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v8f64_interp:
;
; FMA4-LABEL: test_v16f32_fneg_fmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
+; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v16f32_fneg_fmadd:
;
; FMA4-LABEL: test_v8f64_fneg_fmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4
+; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v8f64_fneg_fmsub:
;
; FMA4-LABEL: test_v16f32_fneg_fnmadd:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4
+; FMA4-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v16f32_fneg_fnmadd:
;
; FMA4-LABEL: test_v8f64_fneg_fnmsub:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4
+; FMA4-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v8f64_fneg_fnmsub:
;
; FMA4-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
; FMA4: # %bb.0:
-; FMA4-NEXT: vfmaddps %ymm2, {{.*}}(%rip), %ymm0, %ymm0
-; FMA4-NEXT: vfmaddps %ymm3, {{.*}}(%rip), %ymm1, %ymm1
+; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm2
+; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * mem) + ymm3
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
; FMA4-LABEL: test_v16f32_fneg_fmul:
; FMA4: # %bb.0:
; FMA4-NEXT: vxorps %xmm4, %xmm4, %xmm4
-; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfnmsubps %ymm4, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
+; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm4
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v16f32_fneg_fmul:
; FMA4-LABEL: test_v8f64_fneg_fmul:
; FMA4: # %bb.0:
; FMA4-NEXT: vxorpd %xmm4, %xmm4, %xmm4
-; FMA4-NEXT: vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfnmsubpd %ymm4, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
+; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm4
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v8f64_fneg_fmul:
;
; FMA4-LABEL: mul_addsub_pd128:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
; FMA4-NEXT: retq
entry:
%AB = fmul <2 x double> %A, %B
;
; FMA4-LABEL: mul_addsub_ps128:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddsubps {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
; FMA4-NEXT: retq
entry:
%AB = fmul <4 x float> %A, %B
;
; FMA4-LABEL: mul_addsub_pd256:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
; FMA4-NEXT: retq
entry:
%AB = fmul <4 x double> %A, %B
;
; FMA4-LABEL: mul_addsub_ps256:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
; FMA4-NEXT: retq
entry:
%AB = fmul <8 x float> %A, %B
;
; FMA4-LABEL: mul_addsub_pd512:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfmaddsubpd %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfmaddsubpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm4
+; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
; FMA4-NEXT: retq
entry:
%AB = fmul <8 x double> %A, %B
;
; FMA4-LABEL: mul_addsub_ps512:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfmaddsubps %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfmaddsubps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm4
+; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
; FMA4-NEXT: retq
entry:
%AB = fmul <16 x float> %A, %B
;
; FMA4-LABEL: buildvector_mul_addsub_ps128:
; FMA4: # %bb.0: # %bb
-; FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddsubps {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
; FMA4-NEXT: retq
bb:
%A = fmul <4 x float> %C, %D
;
; FMA4-LABEL: buildvector_mul_addsub_pd128:
; FMA4: # %bb.0: # %bb
-; FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
; FMA4-NEXT: retq
bb:
%A = fmul <2 x double> %C, %D
;
; FMA4-LABEL: buildvector_mul_addsub_ps256:
; FMA4: # %bb.0: # %bb
-; FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
; FMA4-NEXT: retq
bb:
%A = fmul <8 x float> %C, %D
;
; FMA4-LABEL: buildvector_mul_addsub_pd256:
; FMA4: # %bb.0: # %bb
-; FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
; FMA4-NEXT: retq
bb:
%A = fmul <4 x double> %C, %D
;
; FMA4-LABEL: buildvector_mul_addsub_ps512:
; FMA4: # %bb.0: # %bb
-; FMA4-NEXT: vfmaddsubps %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfmaddsubps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm4
+; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
; FMA4-NEXT: retq
bb:
%A = fmul <16 x float> %C, %D
;
; FMA4-LABEL: buildvector_mul_addsub_pd512:
; FMA4: # %bb.0: # %bb
-; FMA4-NEXT: vfmaddsubpd %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfmaddsubpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm4
+; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
; FMA4-NEXT: retq
bb:
%A = fmul <8 x double> %C, %D
;
; FMA4-LABEL: buildvector_mul_subadd_ps128:
; FMA4: # %bb.0: # %bb
-; FMA4-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmsubaddps {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2
; FMA4-NEXT: retq
bb:
%A = fmul <4 x float> %C, %D
;
; FMA4-LABEL: buildvector_mul_subadd_pd128:
; FMA4: # %bb.0: # %bb
-; FMA4-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmsubaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2
; FMA4-NEXT: retq
bb:
%A = fmul <2 x double> %C, %D
;
; FMA4-LABEL: buildvector_mul_subadd_ps256:
; FMA4: # %bb.0: # %bb
-; FMA4-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2
; FMA4-NEXT: retq
bb:
%A = fmul <8 x float> %C, %D
;
; FMA4-LABEL: buildvector_mul_subadd_pd256:
; FMA4: # %bb.0: # %bb
-; FMA4-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2
; FMA4-NEXT: retq
bb:
%A = fmul <4 x double> %C, %D
;
; FMA4-LABEL: buildvector_mul_subadd_ps512:
; FMA4: # %bb.0: # %bb
-; FMA4-NEXT: vfmsubaddps %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfmsubaddps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm4
+; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5
; FMA4-NEXT: retq
bb:
%A = fmul <16 x float> %C, %D
;
; FMA4-LABEL: buildvector_mul_subadd_pd512:
; FMA4: # %bb.0: # %bb
-; FMA4-NEXT: vfmsubaddpd %ymm4, %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vfmsubaddpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm4
+; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5
; FMA4-NEXT: retq
bb:
%A = fmul <8 x double> %C, %D
;
; FMA4-LABEL: f1:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg float %0
;
; FMA4-LABEL: f2:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg double %0
;
; FMA4-LABEL: f3:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg float %2
;
; FMA4-LABEL: f4:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg double %2
;
; FMA4-LABEL: f5:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg float %0
;
; FMA4-LABEL: f6:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg double %0
;
; FMA4-LABEL: f7:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; FMA4-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: retq
;
;
; FMA4-LABEL: f8:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; FMA4-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: retq
entry:
;
; FMA4-LABEL: f9:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: retq
;
;
; FMA4-LABEL: f10:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: retq
entry:
; FMA4-LABEL: f15:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; FMA4-NEXT: vfmaddss %xmm0, %xmm0, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0
; FMA4-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.fmuladd.f32(
; FMA4-LABEL: f16:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; FMA4-NEXT: vfmaddsd %xmm0, %xmm0, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0
; FMA4-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.fmuladd.f64(
; FMA4-LABEL: f17:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; FMA4-NEXT: vfmaddss %xmm0, %xmm0, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0
; FMA4-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.fma.f32(
; FMA4-LABEL: f18:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; FMA4-NEXT: vfmaddsd %xmm0, %xmm0, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0
; FMA4-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.fma.f64(
;
; FMA4-LABEL: f19:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg <4 x float> %0
;
; FMA4-LABEL: f20:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg <2 x double> %0
;
; FMA4-LABEL: f21:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg <4 x float> %2
;
; FMA4-LABEL: f22:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg <2 x double> %2
;
; FMA4-LABEL: f23:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg <4 x float> %0
;
; FMA4-LABEL: f24:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg <2 x double> %0
;
; FMA4-LABEL: f25:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; FMA4-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: retq
;
;
; FMA4-LABEL: f26:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; FMA4-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: retq
entry:
;
; FMA4-LABEL: f27:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: retq
;
;
; FMA4-LABEL: f28:
; FMA4: # %bb.0: # %entry
-; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: retq
entry:
; CHECK-NEXT: .cfi_def_cfa_register %ebp
; CHECK-NEXT: andl $-32, %esp
; CHECK-NEXT: subl $32, %esp
-; CHECK-NEXT: vfmaddps 8(%ebp), %ymm2, %ymm0, %ymm0
-; CHECK-NEXT: vfmaddps 40(%ebp), %ymm3, %ymm1, %ymm1
+; CHECK-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + mem
+; CHECK-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + mem
; CHECK-NEXT: movl %ebp, %esp
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: retl