From ce415ff9c5a469aa75f6471ead4318d99f457ac9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 25 Jul 2016 07:20:35 +0000 Subject: [PATCH] [AVX512] Add load folding support for the unmasked forms of the FMA instructions. llvm-svn: 276615 --- llvm/lib/Target/X86/X86InstrInfo.cpp | 144 +++++++++++++++++++++++++++++ llvm/test/CodeGen/X86/fma_patterns.ll | 6 +- llvm/test/CodeGen/X86/fma_patterns_wide.ll | 6 +- 3 files changed, 148 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 9a0d3e9..74b9ae3 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1820,6 +1820,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VFMADD213SSr_Int, X86::VFMADD213SSm_Int, TB_ALIGN_NONE }, { X86::VFMADD213SDr, X86::VFMADD213SDm, TB_ALIGN_NONE }, { X86::VFMADD213SDr_Int, X86::VFMADD213SDm_Int, TB_ALIGN_NONE }, + { X86::VFMADD231SSZr, X86::VFMADD231SSZm, TB_ALIGN_NONE }, + { X86::VFMADD231SSZr_Int, X86::VFMADD231SSZm_Int, TB_ALIGN_NONE }, + { X86::VFMADD231SDZr, X86::VFMADD231SDZm, TB_ALIGN_NONE }, + { X86::VFMADD231SDZr_Int, X86::VFMADD231SDZm_Int, TB_ALIGN_NONE }, + { X86::VFMADD132SSZr, X86::VFMADD132SSZm, TB_ALIGN_NONE }, + { X86::VFMADD132SSZr_Int, X86::VFMADD132SSZm_Int, TB_ALIGN_NONE }, + { X86::VFMADD132SDZr, X86::VFMADD132SDZm, TB_ALIGN_NONE }, + { X86::VFMADD132SDZr_Int, X86::VFMADD132SDZm_Int, TB_ALIGN_NONE }, + { X86::VFMADD213SSZr, X86::VFMADD213SSZm, TB_ALIGN_NONE }, + { X86::VFMADD213SSZr_Int, X86::VFMADD213SSZm_Int, TB_ALIGN_NONE }, + { X86::VFMADD213SDZr, X86::VFMADD213SDZm, TB_ALIGN_NONE }, + { X86::VFMADD213SDZr_Int, X86::VFMADD213SDZm_Int, TB_ALIGN_NONE }, { X86::VFMADD231PSr, X86::VFMADD231PSm, TB_ALIGN_NONE }, { X86::VFMADD231PDr, X86::VFMADD231PDm, TB_ALIGN_NONE }, @@ -1833,6 +1845,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VFMADD132PDYr, X86::VFMADD132PDYm, TB_ALIGN_NONE }, { X86::VFMADD213PSYr, X86::VFMADD213PSYm, TB_ALIGN_NONE }, { X86::VFMADD213PDYr, X86::VFMADD213PDYm, TB_ALIGN_NONE }, + { X86::VFMADD231PSZr, X86::VFMADD231PSZm, TB_ALIGN_NONE }, + { X86::VFMADD231PDZr, X86::VFMADD231PDZm, TB_ALIGN_NONE }, + { X86::VFMADD132PSZr, X86::VFMADD132PSZm, TB_ALIGN_NONE }, + { X86::VFMADD132PDZr, X86::VFMADD132PDZm, TB_ALIGN_NONE }, + { X86::VFMADD213PSZr, X86::VFMADD213PSZm, TB_ALIGN_NONE }, + { X86::VFMADD213PDZr, X86::VFMADD213PDZm, TB_ALIGN_NONE }, + { X86::VFMADD231PSZ128r, X86::VFMADD231PSZ128m, TB_ALIGN_NONE }, + { X86::VFMADD231PDZ128r, X86::VFMADD231PDZ128m, TB_ALIGN_NONE }, + { X86::VFMADD132PSZ128r, X86::VFMADD132PSZ128m, TB_ALIGN_NONE }, + { X86::VFMADD132PDZ128r, X86::VFMADD132PDZ128m, TB_ALIGN_NONE }, + { X86::VFMADD213PSZ128r, X86::VFMADD213PSZ128m, TB_ALIGN_NONE }, + { X86::VFMADD213PDZ128r, X86::VFMADD213PDZ128m, TB_ALIGN_NONE }, + { X86::VFMADD231PSZ256r, X86::VFMADD231PSZ256m, TB_ALIGN_NONE }, + { X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256m, TB_ALIGN_NONE }, + { X86::VFMADD132PSZ256r, X86::VFMADD132PSZ256m, TB_ALIGN_NONE }, + { X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256m, TB_ALIGN_NONE }, + { X86::VFMADD213PSZ256r, X86::VFMADD213PSZ256m, TB_ALIGN_NONE }, + { X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256m, TB_ALIGN_NONE }, { X86::VFNMADD231SSr, X86::VFNMADD231SSm, TB_ALIGN_NONE }, { X86::VFNMADD231SSr_Int, X86::VFNMADD231SSm_Int, TB_ALIGN_NONE }, @@ -1846,6 +1876,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VFNMADD213SSr_Int, X86::VFNMADD213SSm_Int, TB_ALIGN_NONE }, { X86::VFNMADD213SDr, X86::VFNMADD213SDm, TB_ALIGN_NONE }, { X86::VFNMADD213SDr_Int, X86::VFNMADD213SDm_Int, TB_ALIGN_NONE }, + { X86::VFNMADD231SSZr, X86::VFNMADD231SSZm, TB_ALIGN_NONE }, + { X86::VFNMADD231SSZr_Int, X86::VFNMADD231SSZm_Int, TB_ALIGN_NONE }, + { X86::VFNMADD231SDZr, X86::VFNMADD231SDZm, TB_ALIGN_NONE }, + { X86::VFNMADD231SDZr_Int, X86::VFNMADD231SDZm_Int, TB_ALIGN_NONE }, + { X86::VFNMADD132SSZr, X86::VFNMADD132SSZm, TB_ALIGN_NONE }, + { X86::VFNMADD132SSZr_Int, X86::VFNMADD132SSZm_Int, TB_ALIGN_NONE }, + { X86::VFNMADD132SDZr, X86::VFNMADD132SDZm, TB_ALIGN_NONE }, + { X86::VFNMADD132SDZr_Int, X86::VFNMADD132SDZm_Int, TB_ALIGN_NONE }, + { X86::VFNMADD213SSZr, X86::VFNMADD213SSZm, TB_ALIGN_NONE }, + { X86::VFNMADD213SSZr_Int, X86::VFNMADD213SSZm_Int, TB_ALIGN_NONE }, + { X86::VFNMADD213SDZr, X86::VFNMADD213SDZm, TB_ALIGN_NONE }, + { X86::VFNMADD213SDZr_Int, X86::VFNMADD213SDZm_Int, TB_ALIGN_NONE }, { X86::VFNMADD231PSr, X86::VFNMADD231PSm, TB_ALIGN_NONE }, { X86::VFNMADD231PDr, X86::VFNMADD231PDm, TB_ALIGN_NONE }, @@ -1859,6 +1901,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VFNMADD132PDYr, X86::VFNMADD132PDYm, TB_ALIGN_NONE }, { X86::VFNMADD213PSYr, X86::VFNMADD213PSYm, TB_ALIGN_NONE }, { X86::VFNMADD213PDYr, X86::VFNMADD213PDYm, TB_ALIGN_NONE }, + { X86::VFNMADD231PSZr, X86::VFNMADD231PSZm, TB_ALIGN_NONE }, + { X86::VFNMADD231PDZr, X86::VFNMADD231PDZm, TB_ALIGN_NONE }, + { X86::VFNMADD132PSZr, X86::VFNMADD132PSZm, TB_ALIGN_NONE }, + { X86::VFNMADD132PDZr, X86::VFNMADD132PDZm, TB_ALIGN_NONE }, + { X86::VFNMADD213PSZr, X86::VFNMADD213PSZm, TB_ALIGN_NONE }, + { X86::VFNMADD213PDZr, X86::VFNMADD213PDZm, TB_ALIGN_NONE }, + { X86::VFNMADD231PSZ128r, X86::VFNMADD231PSZ128m, TB_ALIGN_NONE }, + { X86::VFNMADD231PDZ128r, X86::VFNMADD231PDZ128m, TB_ALIGN_NONE }, + { X86::VFNMADD132PSZ128r, X86::VFNMADD132PSZ128m, TB_ALIGN_NONE }, + { X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128m, TB_ALIGN_NONE }, + { X86::VFNMADD213PSZ128r, X86::VFNMADD213PSZ128m, TB_ALIGN_NONE }, + { X86::VFNMADD213PDZ128r, X86::VFNMADD213PDZ128m, TB_ALIGN_NONE }, + { X86::VFNMADD231PSZ256r, X86::VFNMADD231PSZ256m, TB_ALIGN_NONE }, + { X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256m, TB_ALIGN_NONE }, + { X86::VFNMADD132PSZ256r, X86::VFNMADD132PSZ256m, TB_ALIGN_NONE }, + { X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256m, TB_ALIGN_NONE }, + { X86::VFNMADD213PSZ256r, X86::VFNMADD213PSZ256m, TB_ALIGN_NONE }, + { X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256m, TB_ALIGN_NONE }, { X86::VFMSUB231SSr, X86::VFMSUB231SSm, TB_ALIGN_NONE }, { X86::VFMSUB231SSr_Int, X86::VFMSUB231SSm_Int, TB_ALIGN_NONE }, @@ -1872,6 +1932,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VFMSUB213SSr_Int, X86::VFMSUB213SSm_Int, TB_ALIGN_NONE }, { X86::VFMSUB213SDr, X86::VFMSUB213SDm, TB_ALIGN_NONE }, { X86::VFMSUB213SDr_Int, X86::VFMSUB213SDm_Int, TB_ALIGN_NONE }, + { X86::VFMSUB231SSZr, X86::VFMSUB231SSZm, TB_ALIGN_NONE }, + { X86::VFMSUB231SSZr_Int, X86::VFMSUB231SSZm_Int, TB_ALIGN_NONE }, + { X86::VFMSUB231SDZr, X86::VFMSUB231SDZm, TB_ALIGN_NONE }, + { X86::VFMSUB231SDZr_Int, X86::VFMSUB231SDZm_Int, TB_ALIGN_NONE }, + { X86::VFMSUB132SSZr, X86::VFMSUB132SSZm, TB_ALIGN_NONE }, + { X86::VFMSUB132SSZr_Int, X86::VFMSUB132SSZm_Int, TB_ALIGN_NONE }, + { X86::VFMSUB132SDZr, X86::VFMSUB132SDZm, TB_ALIGN_NONE }, + { X86::VFMSUB132SDZr_Int, X86::VFMSUB132SDZm_Int, TB_ALIGN_NONE }, + { X86::VFMSUB213SSZr, X86::VFMSUB213SSZm, TB_ALIGN_NONE }, + { X86::VFMSUB213SSZr_Int, X86::VFMSUB213SSZm_Int, TB_ALIGN_NONE }, + { X86::VFMSUB213SDZr, X86::VFMSUB213SDZm, TB_ALIGN_NONE }, + { X86::VFMSUB213SDZr_Int, X86::VFMSUB213SDZm_Int, TB_ALIGN_NONE }, { X86::VFMSUB231PSr, X86::VFMSUB231PSm, TB_ALIGN_NONE }, { X86::VFMSUB231PDr, X86::VFMSUB231PDm, TB_ALIGN_NONE }, @@ -1885,6 +1957,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VFMSUB132PDYr, X86::VFMSUB132PDYm, TB_ALIGN_NONE }, { X86::VFMSUB213PSYr, X86::VFMSUB213PSYm, TB_ALIGN_NONE }, { X86::VFMSUB213PDYr, X86::VFMSUB213PDYm, TB_ALIGN_NONE }, + { X86::VFMSUB231PSZr, X86::VFMSUB231PSZm, TB_ALIGN_NONE }, + { X86::VFMSUB231PDZr, X86::VFMSUB231PDZm, TB_ALIGN_NONE }, + { X86::VFMSUB132PSZr, X86::VFMSUB132PSZm, TB_ALIGN_NONE }, + { X86::VFMSUB132PDZr, X86::VFMSUB132PDZm, TB_ALIGN_NONE }, + { X86::VFMSUB213PSZr, X86::VFMSUB213PSZm, TB_ALIGN_NONE }, + { X86::VFMSUB213PDZr, X86::VFMSUB213PDZm, TB_ALIGN_NONE }, + { X86::VFMSUB231PSZ128r, X86::VFMSUB231PSZ128m, TB_ALIGN_NONE }, + { X86::VFMSUB231PDZ128r, X86::VFMSUB231PDZ128m, TB_ALIGN_NONE }, + { X86::VFMSUB132PSZ128r, X86::VFMSUB132PSZ128m, TB_ALIGN_NONE }, + { X86::VFMSUB132PDZ128r, X86::VFMSUB132PDZ128m, TB_ALIGN_NONE }, + { X86::VFMSUB213PSZ128r, X86::VFMSUB213PSZ128m, TB_ALIGN_NONE }, + { X86::VFMSUB213PDZ128r, X86::VFMSUB213PDZ128m, TB_ALIGN_NONE }, + { X86::VFMSUB231PSZ256r, X86::VFMSUB231PSZ256m, TB_ALIGN_NONE }, + { X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256m, TB_ALIGN_NONE }, + { X86::VFMSUB132PSZ256r, X86::VFMSUB132PSZ256m, TB_ALIGN_NONE }, + { X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256m, TB_ALIGN_NONE }, + { X86::VFMSUB213PSZ256r, X86::VFMSUB213PSZ256m, TB_ALIGN_NONE }, + { X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256m, TB_ALIGN_NONE }, { X86::VFNMSUB231SSr, X86::VFNMSUB231SSm, TB_ALIGN_NONE }, { X86::VFNMSUB231SSr_Int, X86::VFNMSUB231SSm_Int, TB_ALIGN_NONE }, @@ -1911,6 +2001,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VFNMSUB132PDYr, X86::VFNMSUB132PDYm, TB_ALIGN_NONE }, { X86::VFNMSUB213PSYr, X86::VFNMSUB213PSYm, TB_ALIGN_NONE }, { X86::VFNMSUB213PDYr, X86::VFNMSUB213PDYm, TB_ALIGN_NONE }, + { X86::VFNMSUB231PSZr, X86::VFNMSUB231PSZm, TB_ALIGN_NONE }, + { X86::VFNMSUB231PDZr, X86::VFNMSUB231PDZm, TB_ALIGN_NONE }, + { X86::VFNMSUB132PSZr, X86::VFNMSUB132PSZm, TB_ALIGN_NONE }, + { X86::VFNMSUB132PDZr, X86::VFNMSUB132PDZm, TB_ALIGN_NONE }, + { X86::VFNMSUB213PSZr, X86::VFNMSUB213PSZm, TB_ALIGN_NONE }, + { X86::VFNMSUB213PDZr, X86::VFNMSUB213PDZm, TB_ALIGN_NONE }, + { X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128m, TB_ALIGN_NONE }, + { X86::VFNMSUB231PDZ128r, X86::VFNMSUB231PDZ128m, TB_ALIGN_NONE }, + { X86::VFNMSUB132PSZ128r, X86::VFNMSUB132PSZ128m, TB_ALIGN_NONE }, + { X86::VFNMSUB132PDZ128r, X86::VFNMSUB132PDZ128m, TB_ALIGN_NONE }, + { X86::VFNMSUB213PSZ128r, X86::VFNMSUB213PSZ128m, TB_ALIGN_NONE }, + { X86::VFNMSUB213PDZ128r, X86::VFNMSUB213PDZ128m, TB_ALIGN_NONE }, + { X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256m, TB_ALIGN_NONE }, + { X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256m, TB_ALIGN_NONE }, + { X86::VFNMSUB132PSZ256r, X86::VFNMSUB132PSZ256m, TB_ALIGN_NONE }, + { X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256m, TB_ALIGN_NONE }, + { X86::VFNMSUB213PSZ256r, X86::VFNMSUB213PSZ256m, TB_ALIGN_NONE }, + { X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256m, TB_ALIGN_NONE }, { X86::VFMADDSUB231PSr, X86::VFMADDSUB231PSm, TB_ALIGN_NONE }, { X86::VFMADDSUB231PDr, X86::VFMADDSUB231PDm, TB_ALIGN_NONE }, @@ -1924,6 +2032,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VFMADDSUB132PDYr, X86::VFMADDSUB132PDYm, TB_ALIGN_NONE }, { X86::VFMADDSUB213PSYr, X86::VFMADDSUB213PSYm, TB_ALIGN_NONE }, { X86::VFMADDSUB213PDYr, X86::VFMADDSUB213PDYm, TB_ALIGN_NONE }, + { X86::VFMADDSUB231PSZr, X86::VFMADDSUB231PSZm, TB_ALIGN_NONE }, + { X86::VFMADDSUB231PDZr, X86::VFMADDSUB231PDZm, TB_ALIGN_NONE }, + { X86::VFMADDSUB132PSZr, X86::VFMADDSUB132PSZm, TB_ALIGN_NONE }, + { X86::VFMADDSUB132PDZr, X86::VFMADDSUB132PDZm, TB_ALIGN_NONE }, + { X86::VFMADDSUB213PSZr, X86::VFMADDSUB213PSZm, TB_ALIGN_NONE }, + { X86::VFMADDSUB213PDZr, X86::VFMADDSUB213PDZm, TB_ALIGN_NONE }, + { X86::VFMADDSUB231PSZ128r, X86::VFMADDSUB231PSZ128m, TB_ALIGN_NONE }, + { X86::VFMADDSUB231PDZ128r, X86::VFMADDSUB231PDZ128m, TB_ALIGN_NONE }, + { X86::VFMADDSUB132PSZ128r, X86::VFMADDSUB132PSZ128m, TB_ALIGN_NONE }, + { X86::VFMADDSUB132PDZ128r, X86::VFMADDSUB132PDZ128m, TB_ALIGN_NONE }, + { X86::VFMADDSUB213PSZ128r, X86::VFMADDSUB213PSZ128m, TB_ALIGN_NONE }, + { X86::VFMADDSUB213PDZ128r, X86::VFMADDSUB213PDZ128m, TB_ALIGN_NONE }, + { X86::VFMADDSUB231PSZ256r, X86::VFMADDSUB231PSZ256m, TB_ALIGN_NONE }, + { X86::VFMADDSUB231PDZ256r, X86::VFMADDSUB231PDZ256m, TB_ALIGN_NONE }, + { X86::VFMADDSUB132PSZ256r, X86::VFMADDSUB132PSZ256m, TB_ALIGN_NONE }, + { X86::VFMADDSUB132PDZ256r, X86::VFMADDSUB132PDZ256m, TB_ALIGN_NONE }, + { X86::VFMADDSUB213PSZ256r, X86::VFMADDSUB213PSZ256m, TB_ALIGN_NONE }, + { X86::VFMADDSUB213PDZ256r, X86::VFMADDSUB213PDZ256m, TB_ALIGN_NONE }, { X86::VFMSUBADD231PSr, X86::VFMSUBADD231PSm, TB_ALIGN_NONE }, { X86::VFMSUBADD231PDr, X86::VFMSUBADD231PDm, TB_ALIGN_NONE }, @@ -1937,6 +2063,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VFMSUBADD132PDYr, X86::VFMSUBADD132PDYm, TB_ALIGN_NONE }, { X86::VFMSUBADD213PSYr, X86::VFMSUBADD213PSYm, TB_ALIGN_NONE }, { X86::VFMSUBADD213PDYr, X86::VFMSUBADD213PDYm, TB_ALIGN_NONE }, + { X86::VFMSUBADD231PSZr, X86::VFMSUBADD231PSZm, TB_ALIGN_NONE }, + { X86::VFMSUBADD231PDZr, X86::VFMSUBADD231PDZm, TB_ALIGN_NONE }, + { X86::VFMSUBADD132PSZr, X86::VFMSUBADD132PSZm, TB_ALIGN_NONE }, + { X86::VFMSUBADD132PDZr, X86::VFMSUBADD132PDZm, TB_ALIGN_NONE }, + { X86::VFMSUBADD213PSZr, X86::VFMSUBADD213PSZm, TB_ALIGN_NONE }, + { X86::VFMSUBADD213PDZr, X86::VFMSUBADD213PDZm, TB_ALIGN_NONE }, + { X86::VFMSUBADD231PSZ128r, X86::VFMSUBADD231PSZ128m, TB_ALIGN_NONE }, + { X86::VFMSUBADD231PDZ128r, X86::VFMSUBADD231PDZ128m, TB_ALIGN_NONE }, + { X86::VFMSUBADD132PSZ128r, X86::VFMSUBADD132PSZ128m, TB_ALIGN_NONE }, + { X86::VFMSUBADD132PDZ128r, X86::VFMSUBADD132PDZ128m, TB_ALIGN_NONE }, + { X86::VFMSUBADD213PSZ128r, X86::VFMSUBADD213PSZ128m, TB_ALIGN_NONE }, + { X86::VFMSUBADD213PDZ128r, X86::VFMSUBADD213PDZ128m, TB_ALIGN_NONE }, + { X86::VFMSUBADD231PSZ256r, X86::VFMSUBADD231PSZ256m, TB_ALIGN_NONE }, + { X86::VFMSUBADD231PDZ256r, X86::VFMSUBADD231PDZ256m, TB_ALIGN_NONE }, + { X86::VFMSUBADD132PSZ256r, X86::VFMSUBADD132PSZ256m, TB_ALIGN_NONE }, + { X86::VFMSUBADD132PDZ256r, X86::VFMSUBADD132PDZ256m, TB_ALIGN_NONE }, + { X86::VFMSUBADD213PSZ256r, X86::VFMSUBADD213PSZ256m, TB_ALIGN_NONE }, + { X86::VFMSUBADD213PDZ256r, X86::VFMSUBADD213PDZ256m, TB_ALIGN_NONE }, // FMA4 foldable patterns { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_NONE }, diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll index b724da2..371361d 100644 --- a/llvm/test/CodeGen/X86/fma_patterns.ll +++ b/llvm/test/CodeGen/X86/fma_patterns.ll @@ -524,8 +524,7 @@ define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x ; ; AVX512-LABEL: test_4f32_fmadd_load: ; AVX512: # BB#0: -; AVX512-NEXT: vmovaps (%rdi), %xmm2 -; AVX512-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 +; AVX512-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0 ; AVX512-NEXT: retq %x = load <4 x float>, <4 x float>* %a0 %y = fmul <4 x float> %x, %a1 @@ -546,8 +545,7 @@ define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, < ; ; AVX512-LABEL: test_2f64_fmsub_load: ; AVX512: # BB#0: -; AVX512-NEXT: vmovapd (%rdi), %xmm2 -; AVX512-NEXT: vfmsub213pd %xmm1, %xmm2, %xmm0 +; AVX512-NEXT: vfmsub132pd (%rdi), %xmm1, %xmm0 ; AVX512-NEXT: retq %x = load <2 x double>, <2 x double>* %a0 %y = fmul <2 x double> %x, %a1 diff --git a/llvm/test/CodeGen/X86/fma_patterns_wide.ll b/llvm/test/CodeGen/X86/fma_patterns_wide.ll index 98b6c49..2c49bb8 100644 --- a/llvm/test/CodeGen/X86/fma_patterns_wide.ll +++ b/llvm/test/CodeGen/X86/fma_patterns_wide.ll @@ -217,8 +217,7 @@ define <16 x float> @test_16f32_fmadd_load(<16 x float>* %a0, <16 x float> %a1, ; ; AVX512-LABEL: test_16f32_fmadd_load: ; AVX512: # BB#0: -; AVX512-NEXT: vmovaps (%rdi), %zmm2 -; AVX512-NEXT: vfmadd213ps %zmm1, %zmm2, %zmm0 +; AVX512-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 ; AVX512-NEXT: retq %x = load <16 x float>, <16 x float>* %a0 %y = fmul <16 x float> %x, %a1 @@ -241,8 +240,7 @@ define <8 x double> @test_8f64_fmsub_load(<8 x double>* %a0, <8 x double> %a1, < ; ; AVX512-LABEL: test_8f64_fmsub_load: ; AVX512: # BB#0: -; AVX512-NEXT: vmovapd (%rdi), %zmm2 -; AVX512-NEXT: vfmsub213pd %zmm1, %zmm2, %zmm0 +; AVX512-NEXT: vfmsub132pd (%rdi), %zmm1, %zmm0 ; AVX512-NEXT: retq %x = load <8 x double>, <8 x double>* %a0 %y = fmul <8 x double> %x, %a1 -- 2.7.4