From 4aa7b9cc963ae0df6a1411936b879d3c979d7299 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 8 Feb 2020 17:01:04 +0000 Subject: [PATCH] [X86] X86InstComments - add FMA4 comments These typically match the FMA3 equivalents, although the multiply operands sometimes get flipped due to the FMA3 permute variants. --- .../X86/MCTargetDesc/X86InstComments.cpp | 149 +++++++++++++++++- .../CodeGen/X86/extended-fma-contraction.ll | 2 +- llvm/test/CodeGen/X86/fma-fneg-combine-2.ll | 12 +- llvm/test/CodeGen/X86/fma4-commute-x86.ll | 108 ++++++------- llvm/test/CodeGen/X86/fma4-fneg-combine.ll | 20 +-- .../X86/fma4-intrinsics-x86-upgrade.ll | 24 +++ llvm/test/CodeGen/X86/fma4-intrinsics-x86.ll | 28 ++++ .../X86/fma4-intrinsics-x86_64-folded-load.ll | 10 ++ llvm/test/CodeGen/X86/fma4-scalar-memfold.ll | 8 +- llvm/test/CodeGen/X86/fma_patterns.ll | 140 ++++++++-------- llvm/test/CodeGen/X86/fma_patterns_wide.ll | 140 ++++++++-------- llvm/test/CodeGen/X86/fmaddsub-combine.ll | 48 +++--- llvm/test/CodeGen/X86/fp-intrinsics-fma.ll | 48 +++--- llvm/test/CodeGen/X86/wide-fma-contraction.ll | 4 +- 14 files changed, 473 insertions(+), 268 deletions(-) diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp index d88e1029cd55..8999da34d71e 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp @@ -199,6 +199,40 @@ using namespace llvm; CASE_AVX512_INS_COMMON(Inst##SD, Z, m_Int) \ CASE_AVX512_INS_COMMON(Inst##SS, Z, m_Int) +#define CASE_FMA4(Inst, suf) \ + CASE_AVX_INS_COMMON(Inst, 4, suf) \ + CASE_AVX_INS_COMMON(Inst, 4Y, suf) + +#define CASE_FMA4_PACKED_RR(Inst) \ + CASE_FMA4(Inst##PD, rr) \ + CASE_FMA4(Inst##PS, rr) + +#define CASE_FMA4_PACKED_RM(Inst) \ + CASE_FMA4(Inst##PD, rm) \ + CASE_FMA4(Inst##PS, rm) + +#define CASE_FMA4_PACKED_MR(Inst) \ + CASE_FMA4(Inst##PD, mr) \ + CASE_FMA4(Inst##PS, mr) + +#define CASE_FMA4_SCALAR_RR(Inst) \ + CASE_AVX_INS_COMMON(Inst##SD4, , rr) \ + CASE_AVX_INS_COMMON(Inst##SS4, , rr) \ + CASE_AVX_INS_COMMON(Inst##SD4, , rr_Int) \ + CASE_AVX_INS_COMMON(Inst##SS4, , rr_Int) + +#define CASE_FMA4_SCALAR_RM(Inst) \ + CASE_AVX_INS_COMMON(Inst##SD4, , rm) \ + CASE_AVX_INS_COMMON(Inst##SS4, , rm) \ + CASE_AVX_INS_COMMON(Inst##SD4, , rm_Int) \ + CASE_AVX_INS_COMMON(Inst##SS4, , rm_Int) + +#define CASE_FMA4_SCALAR_MR(Inst) \ + CASE_AVX_INS_COMMON(Inst##SD4, , mr) \ + CASE_AVX_INS_COMMON(Inst##SS4, , mr) \ + CASE_AVX_INS_COMMON(Inst##SD4, , mr_Int) \ + CASE_AVX_INS_COMMON(Inst##SS4, , mr_Int) + static unsigned getVectorRegSize(unsigned RegNo) { if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31) return 512; @@ -247,14 +281,14 @@ static void printMasking(raw_ostream &OS, const MCInst *MI, OS << " {z}"; } -static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) { +static bool printFMAComments(const MCInst *MI, raw_ostream &OS) { const char *Mul1Name = nullptr, *Mul2Name = nullptr, *AccName = nullptr; unsigned NumOperands = MI->getNumOperands(); bool RegForm = false; bool Negate = false; StringRef AccStr = "+"; - // The operands for FMA instructions without rounding fall into two forms. + // The operands for FMA3 instructions without rounding fall into two forms: // dest, src1, src2, src3 // dest, src1, mask, src2, src3 // Where src3 is either a register or 5 memory address operands. So to find @@ -262,9 +296,118 @@ static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) { // index from the end by taking into account memory vs register form when // finding src2. + // The operands for FMA4 instructions: + // dest, src1, src2, src3 + // Where src2 OR src3 are either a register or 5 memory address operands. So + // to find dest and src1 we can index from the front, src2 (reg/mem) follows + // and then src3 (reg) will be at the end. + switch (MI->getOpcode()) { default: return false; + + CASE_FMA4_PACKED_RR(FMADD) + CASE_FMA4_SCALAR_RR(FMADD) + RegForm = true; + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; + CASE_FMA4_PACKED_RM(FMADD) + CASE_FMA4_SCALAR_RM(FMADD) + Mul2Name = getRegName(MI->getOperand(2).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + break; + CASE_FMA4_PACKED_MR(FMADD) + CASE_FMA4_SCALAR_MR(FMADD) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + break; + + CASE_FMA4_PACKED_RR(FMSUB) + CASE_FMA4_SCALAR_RR(FMSUB) + RegForm = true; + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; + CASE_FMA4_PACKED_RM(FMSUB) + CASE_FMA4_SCALAR_RM(FMSUB) + Mul2Name = getRegName(MI->getOperand(2).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + AccStr = "-"; + break; + CASE_FMA4_PACKED_MR(FMSUB) + CASE_FMA4_SCALAR_MR(FMSUB) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + AccStr = "-"; + break; + + CASE_FMA4_PACKED_RR(FNMADD) + CASE_FMA4_SCALAR_RR(FNMADD) + RegForm = true; + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; + CASE_FMA4_PACKED_RM(FNMADD) + CASE_FMA4_SCALAR_RM(FNMADD) + Mul2Name = getRegName(MI->getOperand(2).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + Negate = true; + break; + CASE_FMA4_PACKED_MR(FNMADD) + CASE_FMA4_SCALAR_MR(FNMADD) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + Negate = true; + break; + + CASE_FMA4_PACKED_RR(FNMSUB) + CASE_FMA4_SCALAR_RR(FNMSUB) + RegForm = true; + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; + CASE_FMA4_PACKED_RM(FNMSUB) + CASE_FMA4_SCALAR_RM(FNMSUB) + Mul2Name = getRegName(MI->getOperand(2).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + AccStr = "-"; + Negate = true; + break; + CASE_FMA4_PACKED_MR(FNMSUB) + CASE_FMA4_SCALAR_MR(FNMSUB) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + AccStr = "-"; + Negate = true; + break; + + CASE_FMA4_PACKED_RR(FMADDSUB) + RegForm = true; + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; + CASE_FMA4_PACKED_RM(FMADDSUB) + Mul2Name = getRegName(MI->getOperand(2).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + AccStr = "+/-"; + break; + CASE_FMA4_PACKED_MR(FMADDSUB) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + AccStr = "+/-"; + break; + + CASE_FMA4_PACKED_RR(FMSUBADD) + RegForm = true; + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; + CASE_FMA4_PACKED_RM(FMSUBADD) + Mul2Name = getRegName(MI->getOperand(2).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + AccStr = "-/+"; + break; + CASE_FMA4_PACKED_MR(FMSUBADD) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + AccStr = "-/+"; + break; + CASE_FMA_PACKED_REG(FMADD132) CASE_FMA_SCALAR_REG(FMADD132) Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); @@ -504,7 +647,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, unsigned NumOperands = MI->getNumOperands(); bool RegForm = false; - if (printFMA3Comments(MI, OS)) + if (printFMAComments(MI, OS)) return true; switch (MI->getOpcode()) { diff --git a/llvm/test/CodeGen/X86/extended-fma-contraction.ll b/llvm/test/CodeGen/X86/extended-fma-contraction.ll index c072d210750b..d751c4b8b1b7 100644 --- a/llvm/test/CodeGen/X86/extended-fma-contraction.ll +++ b/llvm/test/CodeGen/X86/extended-fma-contraction.ll @@ -5,7 +5,7 @@ define <3 x float> @fmafunc(<3 x float> %a, <3 x float> %b, <3 x float> %c) { ; CHECK-LABEL: fmafunc: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 ; CHECK-NEXT: retl ; ; CHECK-NOFMA-LABEL: fmafunc: diff --git a/llvm/test/CodeGen/X86/fma-fneg-combine-2.ll b/llvm/test/CodeGen/X86/fma-fneg-combine-2.ll index 0b66d53f5ac9..86f1b9a678e1 100644 --- a/llvm/test/CodeGen/X86/fma-fneg-combine-2.ll +++ b/llvm/test/CodeGen/X86/fma-fneg-combine-2.ll @@ -12,7 +12,7 @@ define float @test_fneg_fma_subx_y_negz_f32(float %w, float %x, float %y, float ; FMA4-LABEL: test_fneg_fma_subx_y_negz_f32: ; FMA4: # %bb.0: # %entry ; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; FMA4-NEXT: vfnmaddss %xmm3, %xmm2, %xmm0, %xmm0 +; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm3 ; FMA4-NEXT: retq entry: %subx = fsub nsz float %w, %x @@ -32,7 +32,7 @@ define float @test_fneg_fma_x_suby_negz_f32(float %w, float %x, float %y, float ; FMA4-LABEL: test_fneg_fma_x_suby_negz_f32: ; FMA4: # %bb.0: # %entry ; FMA4-NEXT: vsubss %xmm2, %xmm0, %xmm0 -; FMA4-NEXT: vfnmaddss %xmm3, %xmm0, %xmm1, %xmm0 +; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3 ; FMA4-NEXT: retq entry: %suby = fsub nsz float %w, %y @@ -54,7 +54,7 @@ define float @test_fneg_fma_subx_suby_negz_f32(float %w, float %x, float %y, flo ; FMA4: # %bb.0: # %entry ; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm1 ; FMA4-NEXT: vsubss %xmm2, %xmm0, %xmm0 -; FMA4-NEXT: vfnmaddss %xmm3, %xmm0, %xmm1, %xmm0 +; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3 ; FMA4-NEXT: retq entry: %subx = fsub nsz float %w, %x @@ -75,7 +75,7 @@ define float @test_fneg_fma_subx_negy_negz_f32(float %w, float %x, float %y, flo ; FMA4-LABEL: test_fneg_fma_subx_negy_negz_f32: ; FMA4: # %bb.0: # %entry ; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; FMA4-NEXT: vfmaddss %xmm3, %xmm2, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm3 ; FMA4-NEXT: retq entry: %subx = fsub nsz float %w, %x @@ -96,7 +96,7 @@ define <4 x float> @test_fma_rcp_fneg_v4f32(<4 x float> %x, <4 x float> %y, <4 x ; FMA4-LABEL: test_fma_rcp_fneg_v4f32: ; FMA4: # %bb.0: # %entry ; FMA4-NEXT: vrcpps %xmm2, %xmm2 -; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq entry: %0 = fneg <4 x float> %z @@ -118,7 +118,7 @@ define float @negated_constant(float %x) { ; FMA4-LABEL: negated_constant: ; FMA4: # %bb.0: ; FMA4-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 -; FMA4-NEXT: vfnmsubss %xmm1, {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 ; FMA4-NEXT: retq %m = fmul float %x, 42.0 %fma = call nsz float @llvm.fma.f32(float %x, float -42.0, float %m) diff --git a/llvm/test/CodeGen/X86/fma4-commute-x86.ll b/llvm/test/CodeGen/X86/fma4-commute-x86.ll index 9a1724f13737..38ae1464f8e7 100644 --- a/llvm/test/CodeGen/X86/fma4-commute-x86.ll +++ b/llvm/test/CodeGen/X86/fma4-commute-x86.ll @@ -8,7 +8,7 @@ define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA4-LABEL: test_x86_fmadd_baa_ss: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; FMA4-NEXT: vfmaddss %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 ; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind ret <4 x float> %res @@ -18,7 +18,7 @@ define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA4-LABEL: test_x86_fmadd_aba_ss: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; FMA4-NEXT: vfmaddss %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 ; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res @@ -28,7 +28,7 @@ define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 { ; FMA4-LABEL: test_x86_fmadd_bba_ss: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; FMA4-NEXT: vfmaddss (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + mem ; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res @@ -39,7 +39,7 @@ define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA4-LABEL: test_x86_fmadd_baa_ps: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfmaddps %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 ; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind ret <4 x float> %res @@ -49,7 +49,7 @@ define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA4-LABEL: test_x86_fmadd_aba_ps: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfmaddps %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 ; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res @@ -59,7 +59,7 @@ define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA4-LABEL: test_x86_fmadd_bba_ps: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rdx), %xmm0 -; FMA4-NEXT: vfmaddps (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm0) + mem ; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res @@ -70,7 +70,7 @@ define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { ; FMA4-LABEL: test_x86_fmadd_baa_ps_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rcx), %ymm0 -; FMA4-NEXT: vfmaddps %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0 ; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind ret <8 x float> %res @@ -80,7 +80,7 @@ define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { ; FMA4-LABEL: test_x86_fmadd_aba_ps_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rcx), %ymm0 -; FMA4-NEXT: vfmaddps %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0 ; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind ret <8 x float> %res @@ -90,7 +90,7 @@ define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { ; FMA4-LABEL: test_x86_fmadd_bba_ps_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rdx), %ymm0 -; FMA4-NEXT: vfmaddps (%rcx), %ymm0, %ymm0, %ymm0 +; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm0) + mem ; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind ret <8 x float> %res @@ -101,7 +101,7 @@ define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 ; FMA4-LABEL: test_x86_fmadd_baa_sd: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; FMA4-NEXT: vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 ; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind ret <2 x double> %res @@ -111,7 +111,7 @@ define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 ; FMA4-LABEL: test_x86_fmadd_aba_sd: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; FMA4-NEXT: vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 ; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res @@ -121,7 +121,7 @@ define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 ; FMA4-LABEL: test_x86_fmadd_bba_sd: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; FMA4-NEXT: vfmaddsd (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem ; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res @@ -132,7 +132,7 @@ define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA4-LABEL: test_x86_fmadd_baa_pd: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 ; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind ret <2 x double> %res @@ -142,7 +142,7 @@ define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA4-LABEL: test_x86_fmadd_aba_pd: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 ; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res @@ -152,7 +152,7 @@ define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA4-LABEL: test_x86_fmadd_bba_pd: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rdx), %xmm0 -; FMA4-NEXT: vfmaddpd (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem ; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res @@ -163,7 +163,7 @@ define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) # ; FMA4-LABEL: test_x86_fmadd_baa_pd_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rcx), %ymm0 -; FMA4-NEXT: vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0 ; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind ret <4 x double> %res @@ -173,7 +173,7 @@ define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) # ; FMA4-LABEL: test_x86_fmadd_aba_pd_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rcx), %ymm0 -; FMA4-NEXT: vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0 ; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind ret <4 x double> %res @@ -183,7 +183,7 @@ define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) # ; FMA4-LABEL: test_x86_fmadd_bba_pd_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rdx), %ymm0 -; FMA4-NEXT: vfmaddpd (%rcx), %ymm0, %ymm0, %ymm0 +; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm0) + mem ; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind ret <4 x double> %res @@ -194,7 +194,7 @@ define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA4-LABEL: test_x86_fnmadd_baa_ps: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0 ; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind ret <4 x float> %res @@ -204,7 +204,7 @@ define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA4-LABEL: test_x86_fnmadd_aba_ps: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0 ; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res @@ -214,7 +214,7 @@ define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA4-LABEL: test_x86_fnmadd_bba_ps: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rdx), %xmm0 -; FMA4-NEXT: vfnmaddps (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem ; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res @@ -225,7 +225,7 @@ define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 ; FMA4-LABEL: test_x86_fnmadd_baa_ps_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rcx), %ymm0 -; FMA4-NEXT: vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0 ; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind ret <8 x float> %res @@ -235,7 +235,7 @@ define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 ; FMA4-LABEL: test_x86_fnmadd_aba_ps_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rcx), %ymm0 -; FMA4-NEXT: vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0 ; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind ret <8 x float> %res @@ -245,7 +245,7 @@ define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 ; FMA4-LABEL: test_x86_fnmadd_bba_ps_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rdx), %ymm0 -; FMA4-NEXT: vfnmaddps (%rcx), %ymm0, %ymm0, %ymm0 +; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem ; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind ret <8 x float> %res @@ -256,7 +256,7 @@ define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA4-LABEL: test_x86_fnmadd_baa_pd: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0 ; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind ret <2 x double> %res @@ -266,7 +266,7 @@ define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA4-LABEL: test_x86_fnmadd_aba_pd: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0 ; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res @@ -276,7 +276,7 @@ define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA4-LABEL: test_x86_fnmadd_bba_pd: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rdx), %xmm0 -; FMA4-NEXT: vfnmaddpd (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem ; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res @@ -287,7 +287,7 @@ define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) ; FMA4-LABEL: test_x86_fnmadd_baa_pd_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rcx), %ymm0 -; FMA4-NEXT: vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0 ; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind ret <4 x double> %res @@ -297,7 +297,7 @@ define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) ; FMA4-LABEL: test_x86_fnmadd_aba_pd_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rcx), %ymm0 -; FMA4-NEXT: vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0 ; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind ret <4 x double> %res @@ -307,7 +307,7 @@ define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) ; FMA4-LABEL: test_x86_fnmadd_bba_pd_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rdx), %ymm0 -; FMA4-NEXT: vfnmaddpd (%rcx), %ymm0, %ymm0, %ymm0 +; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem ; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind ret <4 x double> %res @@ -318,7 +318,7 @@ define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA4-LABEL: test_x86_fmsub_baa_ps: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfmsubps %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0 ; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind ret <4 x float> %res @@ -328,7 +328,7 @@ define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA4-LABEL: test_x86_fmsub_aba_ps: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfmsubps %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0 ; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res @@ -338,7 +338,7 @@ define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA4-LABEL: test_x86_fmsub_bba_ps: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rdx), %xmm0 -; FMA4-NEXT: vfmsubps (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm0) - mem ; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res @@ -349,7 +349,7 @@ define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { ; FMA4-LABEL: test_x86_fmsub_baa_ps_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rcx), %ymm0 -; FMA4-NEXT: vfmsubps %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0 ; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind ret <8 x float> %res @@ -359,7 +359,7 @@ define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { ; FMA4-LABEL: test_x86_fmsub_aba_ps_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rcx), %ymm0 -; FMA4-NEXT: vfmsubps %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0 ; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind ret <8 x float> %res @@ -369,7 +369,7 @@ define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { ; FMA4-LABEL: test_x86_fmsub_bba_ps_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rdx), %ymm0 -; FMA4-NEXT: vfmsubps (%rcx), %ymm0, %ymm0, %ymm0 +; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm0) - mem ; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind ret <8 x float> %res @@ -380,7 +380,7 @@ define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA4-LABEL: test_x86_fmsub_baa_pd: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0 ; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind ret <2 x double> %res @@ -390,7 +390,7 @@ define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA4-LABEL: test_x86_fmsub_aba_pd: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0 ; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res @@ -400,7 +400,7 @@ define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA4-LABEL: test_x86_fmsub_bba_pd: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rdx), %xmm0 -; FMA4-NEXT: vfmsubpd (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm0) - mem ; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res @@ -411,7 +411,7 @@ define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) # ; FMA4-LABEL: test_x86_fmsub_baa_pd_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rcx), %ymm0 -; FMA4-NEXT: vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0 ; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind ret <4 x double> %res @@ -421,7 +421,7 @@ define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) # ; FMA4-LABEL: test_x86_fmsub_aba_pd_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rcx), %ymm0 -; FMA4-NEXT: vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0 ; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind ret <4 x double> %res @@ -431,7 +431,7 @@ define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) # ; FMA4-LABEL: test_x86_fmsub_bba_pd_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rdx), %ymm0 -; FMA4-NEXT: vfmsubpd (%rcx), %ymm0, %ymm0, %ymm0 +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm0) - mem ; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind ret <4 x double> %res @@ -442,7 +442,7 @@ define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA4-LABEL: test_x86_fnmsub_baa_ps: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0 ; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind ret <4 x float> %res @@ -452,7 +452,7 @@ define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA4-LABEL: test_x86_fnmsub_aba_ps: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rcx), %xmm0 -; FMA4-NEXT: vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0 ; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res @@ -462,7 +462,7 @@ define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 { ; FMA4-LABEL: test_x86_fnmsub_bba_ps: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rdx), %xmm0 -; FMA4-NEXT: vfnmsubps (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem ; FMA4-NEXT: retq %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind ret <4 x float> %res @@ -473,7 +473,7 @@ define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 ; FMA4-LABEL: test_x86_fnmsub_baa_ps_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rcx), %ymm0 -; FMA4-NEXT: vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0 ; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind ret <8 x float> %res @@ -483,7 +483,7 @@ define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 ; FMA4-LABEL: test_x86_fnmsub_aba_ps_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rcx), %ymm0 -; FMA4-NEXT: vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0 ; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind ret <8 x float> %res @@ -493,7 +493,7 @@ define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 ; FMA4-LABEL: test_x86_fnmsub_bba_ps_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovaps (%rdx), %ymm0 -; FMA4-NEXT: vfnmsubps (%rcx), %ymm0, %ymm0, %ymm0 +; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem ; FMA4-NEXT: retq %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind ret <8 x float> %res @@ -504,7 +504,7 @@ define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA4-LABEL: test_x86_fnmsub_baa_pd: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0 ; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind ret <2 x double> %res @@ -514,7 +514,7 @@ define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA4-LABEL: test_x86_fnmsub_aba_pd: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rcx), %xmm0 -; FMA4-NEXT: vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0 ; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res @@ -524,7 +524,7 @@ define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 ; FMA4-LABEL: test_x86_fnmsub_bba_pd: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rdx), %xmm0 -; FMA4-NEXT: vfnmsubpd (%rcx), %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem ; FMA4-NEXT: retq %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind ret <2 x double> %res @@ -535,7 +535,7 @@ define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) ; FMA4-LABEL: test_x86_fnmsub_baa_pd_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rcx), %ymm0 -; FMA4-NEXT: vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0 ; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind ret <4 x double> %res @@ -545,7 +545,7 @@ define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) ; FMA4-LABEL: test_x86_fnmsub_aba_pd_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rcx), %ymm0 -; FMA4-NEXT: vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0 +; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0 ; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind ret <4 x double> %res @@ -555,7 +555,7 @@ define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) ; FMA4-LABEL: test_x86_fnmsub_bba_pd_y: ; FMA4: # %bb.0: ; FMA4-NEXT: vmovapd (%rdx), %ymm0 -; FMA4-NEXT: vfnmsubpd (%rcx), %ymm0, %ymm0, %ymm0 +; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem ; FMA4-NEXT: retq %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind ret <4 x double> %res diff --git a/llvm/test/CodeGen/X86/fma4-fneg-combine.ll b/llvm/test/CodeGen/X86/fma4-fneg-combine.ll index 771162a2c993..af86a2c917a2 100644 --- a/llvm/test/CodeGen/X86/fma4-fneg-combine.ll +++ b/llvm/test/CodeGen/X86/fma4-fneg-combine.ll @@ -9,7 +9,7 @@ declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: -; CHECK-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 ; CHECK-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq %res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) @@ -20,7 +20,7 @@ define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) { define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: -; CHECK-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 ; CHECK-NEXT: retq %sub.i = fsub <4 x float> , %c %res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i) @@ -30,7 +30,7 @@ define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) { define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: -; CHECK-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 ; CHECK-NEXT: retq %sub.i = fsub <4 x float> , %b %res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c) @@ -40,7 +40,7 @@ define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) { define <4 x float> @test4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: -; CHECK-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 ; CHECK-NEXT: retq %sub.i = fsub <4 x float> , %a %res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %sub.i, <4 x float> %b, <4 x float> %c) @@ -50,7 +50,7 @@ define <4 x float> @test4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { define <4 x float> @test5(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: -; CHECK-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; CHECK-NEXT: retq %sub.i = fsub <4 x float> , %a %sub.i.2 = fsub <4 x float> , %c @@ -61,7 +61,7 @@ define <4 x float> @test5(<4 x float> %a, <4 x float> %b, <4 x float> %c) { define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: -; CHECK-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 ; CHECK-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq %res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) @@ -72,7 +72,7 @@ define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) { define <2 x double> @test7(<2 x double> %a, <2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: -; CHECK-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 ; CHECK-NEXT: retq %sub.i = fsub <2 x double> , %c %res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i) @@ -82,7 +82,7 @@ define <2 x double> @test7(<2 x double> %a, <2 x double> %b, <2 x double> %c) { define <2 x double> @test8(<2 x double> %a, <2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: -; CHECK-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 ; CHECK-NEXT: retq %sub.i = fsub <2 x double> , %b %res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %sub.i, <2 x double> %c) @@ -92,7 +92,7 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b, <2 x double> %c) { define <2 x double> @test9(<2 x double> %a, <2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: test9: ; CHECK: # %bb.0: -; CHECK-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 ; CHECK-NEXT: retq %sub.i = fsub <2 x double> , %a %res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c) @@ -102,7 +102,7 @@ define <2 x double> @test9(<2 x double> %a, <2 x double> %b, <2 x double> %c) { define <2 x double> @test10(<2 x double> %a, <2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: test10: ; CHECK: # %bb.0: -; CHECK-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; CHECK-NEXT: retq %sub.i = fsub <2 x double> , %a %sub.i.2 = fsub <2 x double> , %c diff --git a/llvm/test/CodeGen/X86/fma4-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/fma4-intrinsics-x86-upgrade.ll index 1c9c5ba749a0..169590233896 100644 --- a/llvm/test/CodeGen/X86/fma4-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/fma4-intrinsics-x86-upgrade.ll @@ -6,6 +6,7 @@ define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-LABEL: test_x86_fma_vfmadd_ps: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res @@ -16,6 +17,7 @@ define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, ; CHECK-LABEL: test_x86_fma_vfmadd_pd: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res @@ -26,6 +28,7 @@ define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, ; CHECK-LABEL: test_x86_fma_vfmadd_ps_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10] +; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ret <8 x float> %res @@ -36,6 +39,7 @@ define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> % ; CHECK-LABEL: test_x86_fma_vfmadd_pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10] +; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ret <4 x double> %res @@ -47,6 +51,7 @@ define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-LABEL: test_x86_fma_vfmsub_ps: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res @@ -57,6 +62,7 @@ define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, ; CHECK-LABEL: test_x86_fma_vfmsub_pd: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res @@ -67,6 +73,7 @@ define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, ; CHECK-LABEL: test_x86_fma_vfmsub_ps_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10] +; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ret <8 x float> %res @@ -77,6 +84,7 @@ define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> % ; CHECK-LABEL: test_x86_fma_vfmsub_pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10] +; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ret <4 x double> %res @@ -88,6 +96,7 @@ define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-LABEL: test_x86_fma_vfnmadd_ps: ; CHECK: # %bb.0: ; CHECK-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10] +; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res @@ -98,6 +107,7 @@ define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, ; CHECK-LABEL: test_x86_fma_vfnmadd_pd: ; CHECK: # %bb.0: ; CHECK-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10] +; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res @@ -108,6 +118,7 @@ define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1 ; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10] +; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ret <8 x float> %res @@ -118,6 +129,7 @@ define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> ; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10] +; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ret <4 x double> %res @@ -129,6 +141,7 @@ define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-LABEL: test_x86_fma_vfnmsub_ps: ; CHECK: # %bb.0: ; CHECK-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10] +; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res @@ -139,6 +152,7 @@ define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, ; CHECK-LABEL: test_x86_fma_vfnmsub_pd: ; CHECK: # %bb.0: ; CHECK-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10] +; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res @@ -149,6 +163,7 @@ define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1 ; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10] +; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ret <8 x float> %res @@ -159,6 +174,7 @@ define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> ; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10] +; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ret <4 x double> %res @@ -170,6 +186,7 @@ define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, ; CHECK-LABEL: test_x86_fma_vfmaddsub_ps: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res @@ -180,6 +197,7 @@ define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a ; CHECK-LABEL: test_x86_fma_vfmaddsub_pd: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res @@ -190,6 +208,7 @@ define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> % ; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10] +; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ret <8 x float> %res @@ -200,6 +219,7 @@ define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double ; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10] +; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ret <4 x double> %res @@ -211,6 +231,7 @@ define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, ; CHECK-LABEL: test_x86_fma_vfmsubadd_ps: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res @@ -221,6 +242,7 @@ define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a ; CHECK-LABEL: test_x86_fma_vfmsubadd_pd: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res @@ -231,6 +253,7 @@ define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> % ; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10] +; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ret <8 x float> %res @@ -241,6 +264,7 @@ define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double ; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10] +; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ret <4 x double> %res diff --git a/llvm/test/CodeGen/X86/fma4-intrinsics-x86.ll b/llvm/test/CodeGen/X86/fma4-intrinsics-x86.ll index 96e285bc25f3..39adb0209c2a 100644 --- a/llvm/test/CodeGen/X86/fma4-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/fma4-intrinsics-x86.ll @@ -7,6 +7,7 @@ define <4 x float> @test_x86_fma4_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-LABEL: test_x86_fma4_vfmadd_ss: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %res @@ -16,6 +17,7 @@ define <4 x float> @test_x86_fma4_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1 ; CHECK-LABEL: test_x86_fma4_vfmadd_bac_ss: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6a,0xc2,0x00] +; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) ret <4 x float> %res @@ -26,6 +28,7 @@ define <2 x double> @test_x86_fma4_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, ; CHECK-LABEL: test_x86_fma4_vfmadd_sd: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %res @@ -35,6 +38,7 @@ define <2 x double> @test_x86_fma4_vfmadd_bac_sd(<2 x double> %a0, <2 x double> ; CHECK-LABEL: test_x86_fma4_vfmadd_bac_sd: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6b,0xc2,0x00] +; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) ret <2 x double> %res @@ -45,6 +49,7 @@ define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-LABEL: test_x86_fma_vfmadd_ps: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ret <4 x float> %1 @@ -54,6 +59,7 @@ define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, ; CHECK-LABEL: test_x86_fma_vfmadd_pd: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ret <2 x double> %1 @@ -63,6 +69,7 @@ define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, ; CHECK-LABEL: test_x86_fma_vfmadd_ps_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10] +; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ret <8 x float> %1 @@ -72,6 +79,7 @@ define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> % ; CHECK-LABEL: test_x86_fma_vfmadd_pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10] +; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ret <4 x double> %1 @@ -82,6 +90,7 @@ define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-LABEL: test_x86_fma_vfmsub_ps: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = fsub <4 x float> , %a2 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %1) @@ -92,6 +101,7 @@ define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, ; CHECK-LABEL: test_x86_fma_vfmsub_pd: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = fsub <2 x double> , %a2 %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %1) @@ -102,6 +112,7 @@ define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, ; CHECK-LABEL: test_x86_fma_vfmsub_ps_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10] +; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = fsub <8 x float> , %a2 %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %1) @@ -112,6 +123,7 @@ define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> % ; CHECK-LABEL: test_x86_fma_vfmsub_pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10] +; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = fsub <4 x double> , %a2 %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %1) @@ -123,6 +135,7 @@ define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-LABEL: test_x86_fma_vfnmadd_ps: ; CHECK: # %bb.0: ; CHECK-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10] +; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = fsub <4 x float> , %a0 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %a2) @@ -133,6 +146,7 @@ define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, ; CHECK-LABEL: test_x86_fma_vfnmadd_pd: ; CHECK: # %bb.0: ; CHECK-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10] +; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = fsub <2 x double> , %a0 %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %a2) @@ -143,6 +157,7 @@ define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1 ; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10] +; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = fsub <8 x float> , %a0 %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %a2) @@ -153,6 +168,7 @@ define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> ; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10] +; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = fsub <4 x double> , %a0 %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %a2) @@ -164,6 +180,7 @@ define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-LABEL: test_x86_fma_vfnmsub_ps: ; CHECK: # %bb.0: ; CHECK-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10] +; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = fsub <4 x float> , %a0 %2 = fsub <4 x float> , %a2 @@ -175,6 +192,7 @@ define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, ; CHECK-LABEL: test_x86_fma_vfnmsub_pd: ; CHECK: # %bb.0: ; CHECK-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10] +; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = fsub <2 x double> , %a0 %2 = fsub <2 x double> , %a2 @@ -186,6 +204,7 @@ define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1 ; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10] +; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = fsub <8 x float> , %a0 %2 = fsub <8 x float> , %a2 @@ -197,6 +216,7 @@ define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> ; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10] +; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = fsub <4 x double> , %a0 %2 = fsub <4 x double> , %a2 @@ -209,6 +229,7 @@ define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, ; CHECK-LABEL: test_x86_fma_vfmaddsub_ps: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) %2 = fsub <4 x float> , %a2 @@ -221,6 +242,7 @@ define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a ; CHECK-LABEL: test_x86_fma_vfmaddsub_pd: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) %2 = fsub <2 x double> , %a2 @@ -233,6 +255,7 @@ define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> % ; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10] +; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) %2 = fsub <8 x float> , %a2 @@ -245,6 +268,7 @@ define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double ; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10] +; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) %2 = fsub <4 x double> , %a2 @@ -258,6 +282,7 @@ define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, ; CHECK-LABEL: test_x86_fma_vfmsubadd_ps: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) %2 = fsub <4 x float> , %a2 @@ -270,6 +295,7 @@ define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a ; CHECK-LABEL: test_x86_fma_vfmsubadd_pd: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) %2 = fsub <2 x double> , %a2 @@ -282,6 +308,7 @@ define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> % ; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10] +; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) %2 = fsub <8 x float> , %a2 @@ -294,6 +321,7 @@ define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double ; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10] +; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2 ; CHECK-NEXT: retq # encoding: [0xc3] %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) %2 = fsub <4 x double> , %a2 diff --git a/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll b/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll index 7947421205ac..7353ed4d06d2 100644 --- a/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll +++ b/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll @@ -7,6 +7,7 @@ define < 4 x float > @test_x86_fma4_vfmadd_ss_load(< 4 x float > %a0, < 4 x floa ; CHECK-LABEL: test_x86_fma4_vfmadd_ss_load: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6a,0x07,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + mem ; CHECK-NEXT: retq # encoding: [0xc3] %x = load float , float *%a2 %y = insertelement <4 x float> undef, float %x, i32 0 @@ -17,6 +18,7 @@ define < 4 x float > @test_x86_fma4_vfmadd_ss_load2(< 4 x float > %a0, float* %a ; CHECK-LABEL: test_x86_fma4_vfmadd_ss_load2: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x6a,0x07,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1 ; CHECK-NEXT: retq # encoding: [0xc3] %x = load float , float *%a1 %y = insertelement <4 x float> undef, float %x, i32 0 @@ -30,6 +32,7 @@ define < 2 x double > @test_x86_fma4_vfmadd_sd_load(< 2 x double > %a0, < 2 x do ; CHECK-LABEL: test_x86_fma4_vfmadd_sd_load: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6b,0x07,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + mem ; CHECK-NEXT: retq # encoding: [0xc3] %x = load double , double *%a2 %y = insertelement <2 x double> undef, double %x, i32 0 @@ -40,6 +43,7 @@ define < 2 x double > @test_x86_fma4_vfmadd_sd_load2(< 2 x double > %a0, double* ; CHECK-LABEL: test_x86_fma4_vfmadd_sd_load2: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x6b,0x07,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1 ; CHECK-NEXT: retq # encoding: [0xc3] %x = load double , double *%a1 %y = insertelement <2 x double> undef, double %x, i32 0 @@ -51,6 +55,7 @@ define < 4 x float > @test_x86_fma_vfmadd_ps_load(< 4 x float > %a0, < 4 x float ; CHECK-LABEL: test_x86_fma_vfmadd_ps_load: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0x07,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + mem ; CHECK-NEXT: retq # encoding: [0xc3] %x = load <4 x float>, <4 x float>* %a2 %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x) @@ -60,6 +65,7 @@ define < 4 x float > @test_x86_fma_vfmadd_ps_load2(< 4 x float > %a0, < 4 x floa ; CHECK-LABEL: test_x86_fma_vfmadd_ps_load2: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x68,0x07,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1 ; CHECK-NEXT: retq # encoding: [0xc3] %x = load <4 x float>, <4 x float>* %a1 %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2) @@ -73,6 +79,7 @@ define < 4 x float > @test_x86_fma_vfmadd_ps_load3(< 4 x float >* %a0, < 4 x flo ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps (%rdi), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0f] ; CHECK-NEXT: vfmaddps %xmm0, (%rsi), %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x68,0x06,0x00] +; CHECK-NEXT: # xmm0 = (xmm1 * mem) + xmm0 ; CHECK-NEXT: retq # encoding: [0xc3] %x = load <4 x float>, <4 x float>* %a0 %y = load <4 x float>, <4 x float>* %a1 @@ -84,6 +91,7 @@ define < 2 x double > @test_x86_fma_vfmadd_pd_load(< 2 x double > %a0, < 2 x dou ; CHECK-LABEL: test_x86_fma_vfmadd_pd_load: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0x07,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + mem ; CHECK-NEXT: retq # encoding: [0xc3] %x = load <2 x double>, <2 x double>* %a2 %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x) @@ -93,6 +101,7 @@ define < 2 x double > @test_x86_fma_vfmadd_pd_load2(< 2 x double > %a0, < 2 x do ; CHECK-LABEL: test_x86_fma_vfmadd_pd_load2: ; CHECK: # %bb.0: ; CHECK-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x69,0x07,0x10] +; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1 ; CHECK-NEXT: retq # encoding: [0xc3] %x = load <2 x double>, <2 x double>* %a1 %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2) @@ -106,6 +115,7 @@ define < 2 x double > @test_x86_fma_vfmadd_pd_load3(< 2 x double >* %a0, < 2 x d ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd (%rdi), %xmm1 # encoding: [0xc5,0xf9,0x28,0x0f] ; CHECK-NEXT: vfmaddpd %xmm0, (%rsi), %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x69,0x06,0x00] +; CHECK-NEXT: # xmm0 = (xmm1 * mem) + xmm0 ; CHECK-NEXT: retq # encoding: [0xc3] %x = load <2 x double>, <2 x double>* %a0 %y = load <2 x double>, <2 x double>* %a1 diff --git a/llvm/test/CodeGen/X86/fma4-scalar-memfold.ll b/llvm/test/CodeGen/X86/fma4-scalar-memfold.ll index 5d32278f788f..4fbd5d864ad7 100644 --- a/llvm/test/CodeGen/X86/fma4-scalar-memfold.ll +++ b/llvm/test/CodeGen/X86/fma4-scalar-memfold.ll @@ -10,7 +10,7 @@ define void @fmadd_aab_ss(float* %a, float* %b) { ; CHECK-LABEL: fmadd_aab_ss: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vfmaddss (%rsi), %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + mem ; CHECK-NEXT: vmovss %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load float, float* %a @@ -36,7 +36,7 @@ define void @fmadd_aba_ss(float* %a, float* %b) { ; CHECK-LABEL: fmadd_aba_ss: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vfmaddss %xmm0, (%rsi), %xmm0, %xmm0 +; CHECK-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 ; CHECK-NEXT: vmovss %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load float, float* %a @@ -62,7 +62,7 @@ define void @fmadd_aab_sd(double* %a, double* %b) { ; CHECK-LABEL: fmadd_aab_sd: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vfmaddsd (%rsi), %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem ; CHECK-NEXT: vmovsd %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load double, double* %a @@ -84,7 +84,7 @@ define void @fmadd_aba_sd(double* %a, double* %b) { ; CHECK-LABEL: fmadd_aba_sd: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vfmaddsd %xmm0, (%rsi), %xmm0, %xmm0 +; CHECK-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 ; CHECK-NEXT: vmovsd %xmm0, (%rdi) ; CHECK-NEXT: retq %a.val = load double, double* %a diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll index 043a5206f296..32bcd48e1d10 100644 --- a/llvm/test/CodeGen/X86/fma_patterns.ll +++ b/llvm/test/CodeGen/X86/fma_patterns.ll @@ -20,7 +20,7 @@ define float @test_f32_fmadd(float %a0, float %a1, float %a2) { ; ; FMA4-LABEL: test_f32_fmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_f32_fmadd: @@ -40,7 +40,7 @@ define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float ; ; FMA4-LABEL: test_4f32_fmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_4f32_fmadd: @@ -60,7 +60,7 @@ define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float ; ; FMA4-LABEL: test_8f32_fmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_8f32_fmadd: @@ -80,7 +80,7 @@ define double @test_f64_fmadd(double %a0, double %a1, double %a2) { ; ; FMA4-LABEL: test_f64_fmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_f64_fmadd: @@ -100,7 +100,7 @@ define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x do ; ; FMA4-LABEL: test_2f64_fmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_2f64_fmadd: @@ -120,7 +120,7 @@ define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x do ; ; FMA4-LABEL: test_4f64_fmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_4f64_fmadd: @@ -144,7 +144,7 @@ define float @test_f32_fmsub(float %a0, float %a1, float %a2) { ; ; FMA4-LABEL: test_f32_fmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_f32_fmsub: @@ -164,7 +164,7 @@ define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float ; ; FMA4-LABEL: test_4f32_fmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_4f32_fmsub: @@ -184,7 +184,7 @@ define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float ; ; FMA4-LABEL: test_8f32_fmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm1) - ymm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_8f32_fmsub: @@ -204,7 +204,7 @@ define double @test_f64_fmsub(double %a0, double %a1, double %a2) { ; ; FMA4-LABEL: test_f64_fmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_f64_fmsub: @@ -224,7 +224,7 @@ define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x do ; ; FMA4-LABEL: test_2f64_fmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_2f64_fmsub: @@ -244,7 +244,7 @@ define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x do ; ; FMA4-LABEL: test_4f64_fmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) - ymm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_4f64_fmsub: @@ -268,7 +268,7 @@ define float @test_f32_fnmadd(float %a0, float %a1, float %a2) { ; ; FMA4-LABEL: test_f32_fnmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_f32_fnmadd: @@ -288,7 +288,7 @@ define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x floa ; ; FMA4-LABEL: test_4f32_fnmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_4f32_fnmadd: @@ -308,7 +308,7 @@ define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x floa ; ; FMA4-LABEL: test_8f32_fnmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_8f32_fnmadd: @@ -328,7 +328,7 @@ define double @test_f64_fnmadd(double %a0, double %a1, double %a2) { ; ; FMA4-LABEL: test_f64_fnmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_f64_fnmadd: @@ -348,7 +348,7 @@ define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x d ; ; FMA4-LABEL: test_2f64_fnmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_2f64_fnmadd: @@ -368,7 +368,7 @@ define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x d ; ; FMA4-LABEL: test_4f64_fnmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_4f64_fnmadd: @@ -392,7 +392,7 @@ define float @test_f32_fnmsub(float %a0, float %a1, float %a2) { ; ; FMA4-LABEL: test_f32_fnmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_f32_fnmsub: @@ -413,7 +413,7 @@ define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x floa ; ; FMA4-LABEL: test_4f32_fnmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_4f32_fnmsub: @@ -434,7 +434,7 @@ define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x floa ; ; FMA4-LABEL: test_8f32_fnmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_8f32_fnmsub: @@ -455,7 +455,7 @@ define double @test_f64_fnmsub(double %a0, double %a1, double %a2) { ; ; FMA4-LABEL: test_f64_fnmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_f64_fnmsub: @@ -476,7 +476,7 @@ define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x d ; ; FMA4-LABEL: test_2f64_fnmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_2f64_fnmsub: @@ -497,7 +497,7 @@ define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x d ; ; FMA4-LABEL: test_4f64_fnmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_4f64_fnmsub: @@ -522,7 +522,7 @@ define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x ; ; FMA4-LABEL: test_4f32_fmadd_load: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 +; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_4f32_fmadd_load: @@ -543,7 +543,7 @@ define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, < ; ; FMA4-LABEL: test_2f64_fmsub_load: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 +; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_2f64_fmsub_load: @@ -586,7 +586,7 @@ define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) { ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_one_y: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_one_y: @@ -624,7 +624,7 @@ define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) { ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one: @@ -662,7 +662,7 @@ define <4 x float> @test_v4f32_mul_y_add_x_one_undefs(<4 x float> %x, <4 x float ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: @@ -700,7 +700,7 @@ define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y: @@ -738,7 +738,7 @@ define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone: @@ -776,7 +776,7 @@ define <4 x float> @test_v4f32_mul_y_add_x_negone_undefs(<4 x float> %x, <4 x fl ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: @@ -817,7 +817,7 @@ define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) { ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y: @@ -858,7 +858,7 @@ define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) { ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x: @@ -899,7 +899,7 @@ define <4 x float> @test_v4f32_mul_y_sub_one_x_undefs(<4 x float> %x, <4 x float ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: @@ -940,7 +940,7 @@ define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y: @@ -981,7 +981,7 @@ define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x: @@ -1022,7 +1022,7 @@ define <4 x float> @test_v4f32_mul_y_sub_negone_x_undefs(<4 x float> %x, <4 x fl ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: @@ -1060,7 +1060,7 @@ define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y: @@ -1098,7 +1098,7 @@ define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one: @@ -1136,7 +1136,7 @@ define <4 x float> @test_v4f32_mul_y_sub_x_one_undefs(<4 x float> %x, <4 x float ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: @@ -1174,7 +1174,7 @@ define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y: @@ -1212,7 +1212,7 @@ define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone: @@ -1250,7 +1250,7 @@ define <4 x float> @test_v4f32_mul_y_sub_x_negone_undefs(<4 x float> %x, <4 x fl ; ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: @@ -1280,7 +1280,7 @@ define float @test_f32_interp(float %x, float %y, float %t) { ; FMA4-INFS-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero ; FMA4-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3 ; FMA4-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1 -; FMA4-INFS-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0 +; FMA4-INFS-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_f32_interp: @@ -1299,8 +1299,8 @@ define float @test_f32_interp(float %x, float %y, float %t) { ; ; FMA4-NOINFS-LABEL: test_f32_interp: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubss %xmm1, %xmm1, %xmm2, %xmm1 -; FMA4-NOINFS-NEXT: vfmsubss %xmm1, %xmm2, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfmsubss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 +; FMA4-NOINFS-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_f32_interp: @@ -1329,7 +1329,7 @@ define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float ; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; FMA4-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3 ; FMA4-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1 -; FMA4-INFS-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0 +; FMA4-INFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f32_interp: @@ -1348,8 +1348,8 @@ define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float ; ; FMA4-NOINFS-LABEL: test_v4f32_interp: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm2, %xmm1 -; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm2, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f32_interp: @@ -1378,7 +1378,7 @@ define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float ; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; FMA4-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3 ; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0 +; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v8f32_interp: @@ -1397,8 +1397,8 @@ define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float ; ; FMA4-NOINFS-LABEL: test_v8f32_interp: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps %ymm1, %ymm1, %ymm2, %ymm1 -; FMA4-NOINFS-NEXT: vfmsubps %ymm1, %ymm2, %ymm0, %ymm0 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v8f32_interp: @@ -1427,7 +1427,7 @@ define double @test_f64_interp(double %x, double %y, double %t) { ; FMA4-INFS-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero ; FMA4-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3 ; FMA4-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1 -; FMA4-INFS-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0 +; FMA4-INFS-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_f64_interp: @@ -1446,8 +1446,8 @@ define double @test_f64_interp(double %x, double %y, double %t) { ; ; FMA4-NOINFS-LABEL: test_f64_interp: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubsd %xmm1, %xmm1, %xmm2, %xmm1 -; FMA4-NOINFS-NEXT: vfmsubsd %xmm1, %xmm2, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfmsubsd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 +; FMA4-NOINFS-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_f64_interp: @@ -1476,7 +1476,7 @@ define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x do ; FMA4-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.0E+0,1.0E+0] ; FMA4-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3 ; FMA4-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1 -; FMA4-INFS-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0 +; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v2f64_interp: @@ -1495,8 +1495,8 @@ define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x do ; ; FMA4-NOINFS-LABEL: test_v2f64_interp: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubpd %xmm1, %xmm1, %xmm2, %xmm1 -; FMA4-NOINFS-NEXT: vfmsubpd %xmm1, %xmm2, %xmm0, %xmm0 +; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 +; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v2f64_interp: @@ -1525,7 +1525,7 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do ; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; FMA4-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3 ; FMA4-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0 +; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f64_interp: @@ -1544,8 +1544,8 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do ; ; FMA4-NOINFS-LABEL: test_v4f64_interp: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubpd %ymm1, %ymm1, %ymm2, %ymm1 -; FMA4-NOINFS-NEXT: vfmsubpd %ymm1, %ymm2, %ymm0, %ymm0 +; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 +; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v4f64_interp: @@ -1572,7 +1572,7 @@ define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x ; ; FMA4-LABEL: test_v4f32_fneg_fmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_v4f32_fneg_fmadd: @@ -1593,7 +1593,7 @@ define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, < ; ; FMA4-LABEL: test_v4f64_fneg_fmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_v4f64_fneg_fmsub: @@ -1614,7 +1614,7 @@ define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 ; ; FMA4-LABEL: test_v4f32_fneg_fnmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_v4f32_fneg_fnmadd: @@ -1636,7 +1636,7 @@ define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, ; ; FMA4-LABEL: test_v4f64_fneg_fnmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_v4f64_fneg_fnmsub: @@ -1687,7 +1687,7 @@ define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y ; ; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: @@ -1712,7 +1712,7 @@ define double @test_f64_fneg_fmul(double %x, double %y) #0 { ; FMA4-LABEL: test_f64_fneg_fmul: ; FMA4: # %bb.0: ; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2 -; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_f64_fneg_fmul: @@ -1735,7 +1735,7 @@ define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 { ; FMA4-LABEL: test_v4f32_fneg_fmul: ; FMA4: # %bb.0: ; FMA4-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_v4f32_fneg_fmul: @@ -1758,7 +1758,7 @@ define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 { ; FMA4-LABEL: test_v4f64_fneg_fmul: ; FMA4: # %bb.0: ; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2 -; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_v4f64_fneg_fmul: diff --git a/llvm/test/CodeGen/X86/fma_patterns_wide.ll b/llvm/test/CodeGen/X86/fma_patterns_wide.ll index 773352b6609c..d2cb64f87f06 100644 --- a/llvm/test/CodeGen/X86/fma_patterns_wide.ll +++ b/llvm/test/CodeGen/X86/fma_patterns_wide.ll @@ -21,8 +21,8 @@ define <16 x float> @test_16f32_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x ; ; FMA4-LABEL: test_16f32_fmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmaddps %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfmaddps %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4 +; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_16f32_fmadd: @@ -43,8 +43,8 @@ define <8 x double> @test_8f64_fmadd(<8 x double> %a0, <8 x double> %a1, <8 x do ; ; FMA4-LABEL: test_8f64_fmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4 +; FMA4-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_8f64_fmadd: @@ -69,8 +69,8 @@ define <16 x float> @test_16f32_fmsub(<16 x float> %a0, <16 x float> %a1, <16 x ; ; FMA4-LABEL: test_16f32_fmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4 +; FMA4-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_16f32_fmsub: @@ -91,8 +91,8 @@ define <8 x double> @test_8f64_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x do ; ; FMA4-LABEL: test_8f64_fmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmsubpd %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfmsubpd %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4 +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_8f64_fmsub: @@ -117,8 +117,8 @@ define <16 x float> @test_16f32_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x ; ; FMA4-LABEL: test_16f32_fnmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmaddps %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfnmaddps %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4 +; FMA4-NEXT: vfnmaddps {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_16f32_fnmadd: @@ -139,8 +139,8 @@ define <8 x double> @test_8f64_fnmadd(<8 x double> %a0, <8 x double> %a1, <8 x d ; ; FMA4-LABEL: test_8f64_fnmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4 +; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_8f64_fnmadd: @@ -165,8 +165,8 @@ define <16 x float> @test_16f32_fnmsub(<16 x float> %a0, <16 x float> %a1, <16 x ; ; FMA4-LABEL: test_16f32_fnmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4 +; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_16f32_fnmsub: @@ -188,8 +188,8 @@ define <8 x double> @test_8f64_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x d ; ; FMA4-LABEL: test_8f64_fnmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfnmsubpd %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4 +; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_8f64_fnmsub: @@ -215,8 +215,8 @@ define <16 x float> @test_16f32_fmadd_load(<16 x float>* %a0, <16 x float> %a1, ; ; FMA4-LABEL: test_16f32_fmadd_load: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmaddps %ymm2, (%rdi), %ymm0, %ymm0 -; FMA4-NEXT: vfmaddps %ymm3, 32(%rdi), %ymm1, %ymm1 +; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm2 +; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * mem) + ymm3 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_16f32_fmadd_load: @@ -238,8 +238,8 @@ define <8 x double> @test_8f64_fmsub_load(<8 x double>* %a0, <8 x double> %a1, < ; ; FMA4-LABEL: test_8f64_fmsub_load: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmsubpd %ymm2, (%rdi), %ymm0, %ymm0 -; FMA4-NEXT: vfmsubpd %ymm3, 32(%rdi), %ymm1, %ymm1 +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * mem) - ymm2 +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * mem) - ymm3 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_8f64_fmsub_load: @@ -289,8 +289,8 @@ define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> % ; ; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_one_y: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0 -; FMA4-NOINFS-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1 +; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 +; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_one_y: @@ -335,8 +335,8 @@ define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y ; ; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_one: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0 -; FMA4-NOINFS-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1 +; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 +; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_one: @@ -381,8 +381,8 @@ define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float ; ; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0 -; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y: @@ -427,8 +427,8 @@ define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> ; ; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0 -; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1 +; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2 +; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone: @@ -474,8 +474,8 @@ define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> % ; ; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmaddps %ymm2, %ymm2, %ymm0, %ymm0 -; FMA4-NOINFS-NEXT: vfnmaddps %ymm3, %ymm3, %ymm1, %ymm1 +; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm2 +; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm3 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y: @@ -521,8 +521,8 @@ define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y ; ; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm0, %ymm0 -; FMA4-NOINFS-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm1, %ymm1 +; FMA4-NOINFS-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm2 +; FMA4-NOINFS-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm3 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x: @@ -568,8 +568,8 @@ define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float ; ; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmsubps %ymm2, %ymm2, %ymm0, %ymm0 -; FMA4-NOINFS-NEXT: vfnmsubps %ymm3, %ymm3, %ymm1, %ymm1 +; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm2 +; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm3 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y: @@ -615,8 +615,8 @@ define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> ; ; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmsubpd %ymm2, %ymm2, %ymm0, %ymm0 -; FMA4-NOINFS-NEXT: vfnmsubpd %ymm3, %ymm3, %ymm1, %ymm1 +; FMA4-NOINFS-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm2 +; FMA4-NOINFS-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm3 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x: @@ -661,8 +661,8 @@ define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> % ; ; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0 -; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y: @@ -707,8 +707,8 @@ define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y ; ; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0 -; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1 +; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2 +; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one: @@ -753,8 +753,8 @@ define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float ; ; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0 -; FMA4-NOINFS-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1 +; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 +; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y: @@ -799,8 +799,8 @@ define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> ; ; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0 -; FMA4-NOINFS-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1 +; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 +; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone: @@ -835,8 +835,8 @@ define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x ; FMA4-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6 ; FMA4-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3 ; FMA4-INFS-NEXT: vmulps %ymm7, %ymm2, %ymm2 -; FMA4-INFS-NEXT: vfmaddps %ymm2, %ymm4, %ymm0, %ymm0 -; FMA4-INFS-NEXT: vfmaddps %ymm3, %ymm5, %ymm1, %ymm1 +; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm2 +; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm5) + ymm3 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v16f32_interp: @@ -857,10 +857,10 @@ define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x ; ; FMA4-NOINFS-LABEL: test_v16f32_interp: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm3, %ymm5, %ymm3 -; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm2, %ymm4, %ymm2 -; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm4, %ymm0, %ymm0 -; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm5, %ymm1, %ymm1 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm4) - ymm2 +; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm5) - ymm3 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v16f32_interp: @@ -894,8 +894,8 @@ define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x do ; FMA4-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6 ; FMA4-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3 ; FMA4-INFS-NEXT: vmulpd %ymm7, %ymm2, %ymm2 -; FMA4-INFS-NEXT: vfmaddpd %ymm2, %ymm4, %ymm0, %ymm0 -; FMA4-INFS-NEXT: vfmaddpd %ymm3, %ymm5, %ymm1, %ymm1 +; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm2 +; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm5) + ymm3 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v8f64_interp: @@ -916,10 +916,10 @@ define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x do ; ; FMA4-NOINFS-LABEL: test_v8f64_interp: ; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm3, %ymm5, %ymm3 -; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm2, %ymm4, %ymm2 -; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm4, %ymm0, %ymm0 -; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm5, %ymm1, %ymm1 +; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3 +; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2 +; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm4) - ymm2 +; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm5) - ymm3 ; FMA4-NOINFS-NEXT: retq ; ; AVX512-NOINFS-LABEL: test_v8f64_interp: @@ -947,8 +947,8 @@ define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1, ; ; FMA4-LABEL: test_v16f32_fneg_fmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4 +; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_v16f32_fneg_fmadd: @@ -970,8 +970,8 @@ define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, < ; ; FMA4-LABEL: test_v8f64_fneg_fmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4 +; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_v8f64_fneg_fmsub: @@ -993,8 +993,8 @@ define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1, ; ; FMA4-LABEL: test_v16f32_fneg_fnmadd: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4 +; FMA4-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_v16f32_fneg_fnmadd: @@ -1017,8 +1017,8 @@ define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1, ; ; FMA4-LABEL: test_v8f64_fneg_fnmsub: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4 +; FMA4-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_v8f64_fneg_fnmsub: @@ -1072,8 +1072,8 @@ define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float ; ; FMA4-LABEL: test_v16f32_fma_fmul_x_c1_c2_y: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmaddps %ymm2, {{.*}}(%rip), %ymm0, %ymm0 -; FMA4-NEXT: vfmaddps %ymm3, {{.*}}(%rip), %ymm1, %ymm1 +; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm2 +; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * mem) + ymm3 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_v16f32_fma_fmul_x_c1_c2_y: @@ -1099,8 +1099,8 @@ define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0 ; FMA4-LABEL: test_v16f32_fneg_fmul: ; FMA4: # %bb.0: ; FMA4-NEXT: vxorps %xmm4, %xmm4, %xmm4 -; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfnmsubps %ymm4, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4 +; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm4 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_v16f32_fneg_fmul: @@ -1124,8 +1124,8 @@ define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 { ; FMA4-LABEL: test_v8f64_fneg_fmul: ; FMA4: # %bb.0: ; FMA4-NEXT: vxorpd %xmm4, %xmm4, %xmm4 -; FMA4-NEXT: vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfnmsubpd %ymm4, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4 +; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm4 ; FMA4-NEXT: retq ; ; AVX512-LABEL: test_v8f64_fneg_fmul: diff --git a/llvm/test/CodeGen/X86/fmaddsub-combine.ll b/llvm/test/CodeGen/X86/fmaddsub-combine.ll index 320f2575e97c..739fa8297c44 100644 --- a/llvm/test/CodeGen/X86/fmaddsub-combine.ll +++ b/llvm/test/CodeGen/X86/fmaddsub-combine.ll @@ -13,7 +13,7 @@ define <2 x double> @mul_addsub_pd128(<2 x double> %A, <2 x double> %B, <2 x do ; ; FMA4-LABEL: mul_addsub_pd128: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2 ; FMA4-NEXT: retq entry: %AB = fmul <2 x double> %A, %B @@ -31,7 +31,7 @@ define <4 x float> @mul_addsub_ps128(<4 x float> %A, <4 x float> %B, <4 x float> ; ; FMA4-LABEL: mul_addsub_ps128: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddsubps {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2 ; FMA4-NEXT: retq entry: %AB = fmul <4 x float> %A, %B @@ -49,7 +49,7 @@ define <4 x double> @mul_addsub_pd256(<4 x double> %A, <4 x double> %B, <4 x dou ; ; FMA4-LABEL: mul_addsub_pd256: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2 ; FMA4-NEXT: retq entry: %AB = fmul <4 x double> %A, %B @@ -67,7 +67,7 @@ define <8 x float> @mul_addsub_ps256(<8 x float> %A, <8 x float> %B, <8 x float> ; ; FMA4-LABEL: mul_addsub_ps256: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2 ; FMA4-NEXT: retq entry: %AB = fmul <8 x float> %A, %B @@ -91,8 +91,8 @@ define <8 x double> @mul_addsub_pd512(<8 x double> %A, <8 x double> %B, <8 x dou ; ; FMA4-LABEL: mul_addsub_pd512: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfmaddsubpd %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfmaddsubpd %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm4 +; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5 ; FMA4-NEXT: retq entry: %AB = fmul <8 x double> %A, %B @@ -116,8 +116,8 @@ define <16 x float> @mul_addsub_ps512(<16 x float> %A, <16 x float> %B, <16 x fl ; ; FMA4-LABEL: mul_addsub_ps512: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfmaddsubps %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfmaddsubps %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm4 +; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5 ; FMA4-NEXT: retq entry: %AB = fmul <16 x float> %A, %B @@ -135,7 +135,7 @@ define <4 x float> @buildvector_mul_addsub_ps128(<4 x float> %C, <4 x float> %D, ; ; FMA4-LABEL: buildvector_mul_addsub_ps128: ; FMA4: # %bb.0: # %bb -; FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddsubps {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2 ; FMA4-NEXT: retq bb: %A = fmul <4 x float> %C, %D @@ -166,7 +166,7 @@ define <2 x double> @buildvector_mul_addsub_pd128(<2 x double> %C, <2 x double> ; ; FMA4-LABEL: buildvector_mul_addsub_pd128: ; FMA4: # %bb.0: # %bb -; FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2 ; FMA4-NEXT: retq bb: %A = fmul <2 x double> %C, %D @@ -189,7 +189,7 @@ define <8 x float> @buildvector_mul_addsub_ps256(<8 x float> %C, <8 x float> %D, ; ; FMA4-LABEL: buildvector_mul_addsub_ps256: ; FMA4: # %bb.0: # %bb -; FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2 ; FMA4-NEXT: retq bb: %A = fmul <8 x float> %C, %D @@ -236,7 +236,7 @@ define <4 x double> @buildvector_mul_addsub_pd256(<4 x double> %C, <4 x double> ; ; FMA4-LABEL: buildvector_mul_addsub_pd256: ; FMA4: # %bb.0: # %bb -; FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2 ; FMA4-NEXT: retq bb: %A = fmul <4 x double> %C, %D @@ -273,8 +273,8 @@ define <16 x float> @buildvector_mul_addsub_ps512(<16 x float> %C, <16 x float> ; ; FMA4-LABEL: buildvector_mul_addsub_ps512: ; FMA4: # %bb.0: # %bb -; FMA4-NEXT: vfmaddsubps %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfmaddsubps %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm4 +; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5 ; FMA4-NEXT: retq bb: %A = fmul <16 x float> %C, %D @@ -359,8 +359,8 @@ define <8 x double> @buildvector_mul_addsub_pd512(<8 x double> %C, <8 x double> ; ; FMA4-LABEL: buildvector_mul_addsub_pd512: ; FMA4: # %bb.0: # %bb -; FMA4-NEXT: vfmaddsubpd %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfmaddsubpd %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm4 +; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5 ; FMA4-NEXT: retq bb: %A = fmul <8 x double> %C, %D @@ -404,7 +404,7 @@ define <4 x float> @buildvector_mul_subadd_ps128(<4 x float> %C, <4 x float> %D, ; ; FMA4-LABEL: buildvector_mul_subadd_ps128: ; FMA4: # %bb.0: # %bb -; FMA4-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmsubaddps {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2 ; FMA4-NEXT: retq bb: %A = fmul <4 x float> %C, %D @@ -435,7 +435,7 @@ define <2 x double> @buildvector_mul_subadd_pd128(<2 x double> %C, <2 x double> ; ; FMA4-LABEL: buildvector_mul_subadd_pd128: ; FMA4: # %bb.0: # %bb -; FMA4-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmsubaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2 ; FMA4-NEXT: retq bb: %A = fmul <2 x double> %C, %D @@ -458,7 +458,7 @@ define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D, ; ; FMA4-LABEL: buildvector_mul_subadd_ps256: ; FMA4: # %bb.0: # %bb -; FMA4-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2 ; FMA4-NEXT: retq bb: %A = fmul <8 x float> %C, %D @@ -505,7 +505,7 @@ define <4 x double> @buildvector_mul_subadd_pd256(<4 x double> %C, <4 x double> ; ; FMA4-LABEL: buildvector_mul_subadd_pd256: ; FMA4: # %bb.0: # %bb -; FMA4-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2 ; FMA4-NEXT: retq bb: %A = fmul <4 x double> %C, %D @@ -542,8 +542,8 @@ define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float> ; ; FMA4-LABEL: buildvector_mul_subadd_ps512: ; FMA4: # %bb.0: # %bb -; FMA4-NEXT: vfmsubaddps %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfmsubaddps %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm4 +; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5 ; FMA4-NEXT: retq bb: %A = fmul <16 x float> %C, %D @@ -628,8 +628,8 @@ define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double> ; ; FMA4-LABEL: buildvector_mul_subadd_pd512: ; FMA4: # %bb.0: # %bb -; FMA4-NEXT: vfmsubaddpd %ymm4, %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vfmsubaddpd %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm4 +; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5 ; FMA4-NEXT: retq bb: %A = fmul <8 x double> %C, %D diff --git a/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll b/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll index 93bd857e92b6..c2cbad069425 100644 --- a/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll @@ -22,7 +22,7 @@ define float @f1(float %0, float %1, float %2) #0 { ; ; FMA4-LABEL: f1: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 ; FMA4-NEXT: retq entry: %3 = fneg float %0 @@ -50,7 +50,7 @@ define double @f2(double %0, double %1, double %2) #0 { ; ; FMA4-LABEL: f2: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 ; FMA4-NEXT: retq entry: %3 = fneg double %0 @@ -78,7 +78,7 @@ define float @f3(float %0, float %1, float %2) #0 { ; ; FMA4-LABEL: f3: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq entry: %3 = fneg float %2 @@ -106,7 +106,7 @@ define double @f4(double %0, double %1, double %2) #0 { ; ; FMA4-LABEL: f4: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq entry: %3 = fneg double %2 @@ -136,7 +136,7 @@ define float @f5(float %0, float %1, float %2) #0 { ; ; FMA4-LABEL: f5: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq entry: %3 = fneg float %0 @@ -167,7 +167,7 @@ define double @f6(double %0, double %1, double %2) #0 { ; ; FMA4-LABEL: f6: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq entry: %3 = fneg double %0 @@ -197,7 +197,7 @@ define float @f7(float %0, float %1, float %2) #0 { ; ; FMA4-LABEL: f7: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 ; FMA4-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-NEXT: retq ; @@ -234,7 +234,7 @@ define double @f8(double %0, double %1, double %2) #0 { ; ; FMA4-LABEL: f8: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 ; FMA4-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-NEXT: retq entry: @@ -267,7 +267,7 @@ define float @f9(float %0, float %1, float %2) #0 { ; ; FMA4-LABEL: f9: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; FMA4-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-NEXT: retq ; @@ -309,7 +309,7 @@ define double @f10(double %0, double %1, double %2) #0 { ; ; FMA4-LABEL: f10: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; FMA4-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-NEXT: retq entry: @@ -400,7 +400,7 @@ define float @f15() #0 { ; FMA4-LABEL: f15: ; FMA4: # %bb.0: # %entry ; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; FMA4-NEXT: vfmaddss %xmm0, %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0 ; FMA4-NEXT: retq entry: %result = call float @llvm.experimental.constrained.fmuladd.f32( @@ -432,7 +432,7 @@ define double @f16() #0 { ; FMA4-LABEL: f16: ; FMA4: # %bb.0: # %entry ; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; FMA4-NEXT: vfmaddsd %xmm0, %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0 ; FMA4-NEXT: retq entry: %result = call double @llvm.experimental.constrained.fmuladd.f64( @@ -468,7 +468,7 @@ define float @f17() #0 { ; FMA4-LABEL: f17: ; FMA4: # %bb.0: # %entry ; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; FMA4-NEXT: vfmaddss %xmm0, %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0 ; FMA4-NEXT: retq entry: %result = call float @llvm.experimental.constrained.fma.f32( @@ -504,7 +504,7 @@ define double @f18() #0 { ; FMA4-LABEL: f18: ; FMA4: # %bb.0: # %entry ; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; FMA4-NEXT: vfmaddsd %xmm0, %xmm0, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0 ; FMA4-NEXT: retq entry: %result = call double @llvm.experimental.constrained.fma.f64( @@ -568,7 +568,7 @@ define <4 x float> @f19(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 { ; ; FMA4-LABEL: f19: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 ; FMA4-NEXT: retq entry: %3 = fneg <4 x float> %0 @@ -610,7 +610,7 @@ define <2 x double> @f20(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 { ; ; FMA4-LABEL: f20: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 ; FMA4-NEXT: retq entry: %3 = fneg <2 x double> %0 @@ -672,7 +672,7 @@ define <4 x float> @f21(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 { ; ; FMA4-LABEL: f21: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq entry: %3 = fneg <4 x float> %2 @@ -714,7 +714,7 @@ define <2 x double> @f22(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 { ; ; FMA4-LABEL: f22: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq entry: %3 = fneg <2 x double> %2 @@ -778,7 +778,7 @@ define <4 x float> @f23(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 { ; ; FMA4-LABEL: f23: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq entry: %3 = fneg <4 x float> %0 @@ -823,7 +823,7 @@ define <2 x double> @f24(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 { ; ; FMA4-LABEL: f24: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; FMA4-NEXT: retq entry: %3 = fneg <2 x double> %0 @@ -887,7 +887,7 @@ define <4 x float> @f25(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 { ; ; FMA4-LABEL: f25: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 ; FMA4-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-NEXT: retq ; @@ -938,7 +938,7 @@ define <2 x double> @f26(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 { ; ; FMA4-LABEL: f26: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 ; FMA4-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-NEXT: retq entry: @@ -1005,7 +1005,7 @@ define <4 x float> @f27(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 { ; ; FMA4-LABEL: f27: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; FMA4-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-NEXT: retq ; @@ -1061,7 +1061,7 @@ define <2 x double> @f28(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 { ; ; FMA4-LABEL: f28: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; FMA4-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/wide-fma-contraction.ll b/llvm/test/CodeGen/X86/wide-fma-contraction.ll index d15ced21e95e..eba8d2875159 100644 --- a/llvm/test/CodeGen/X86/wide-fma-contraction.ll +++ b/llvm/test/CodeGen/X86/wide-fma-contraction.ll @@ -14,8 +14,8 @@ define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c) ; CHECK-NEXT: .cfi_def_cfa_register %ebp ; CHECK-NEXT: andl $-32, %esp ; CHECK-NEXT: subl $32, %esp -; CHECK-NEXT: vfmaddps 8(%ebp), %ymm2, %ymm0, %ymm0 -; CHECK-NEXT: vfmaddps 40(%ebp), %ymm3, %ymm1, %ymm1 +; CHECK-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + mem +; CHECK-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + mem ; CHECK-NEXT: movl %ebp, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl -- 2.34.1