static bool isFMA3(unsigned Opcode, bool &IsIntrinsic) {
IsIntrinsic = false;
+#define FMA3_CASE(Name, Modifier) \
+case X86::Name##r##Modifier: case X86::Name##m##Modifier:
+
+#define FMA3_SCALAR_PAIR(Name, Form, Modifier) \
+ FMA3_CASE(Name##SD##Form, Modifier) \
+ FMA3_CASE(Name##SS##Form, Modifier)
+
+#define FMA3_PACKED_PAIR(Name, Form, Modifier) \
+ FMA3_CASE(Name##PD##Form, Modifier) \
+ FMA3_CASE(Name##PS##Form, Modifier)
+
+#define FMA3_PACKED_SET(Form, Modifier) \
+ FMA3_PACKED_PAIR(VFMADD, Form, Modifier) \
+ FMA3_PACKED_PAIR(VFMSUB, Form, Modifier) \
+ FMA3_PACKED_PAIR(VFNMADD, Form, Modifier) \
+ FMA3_PACKED_PAIR(VFNMSUB, Form, Modifier) \
+ FMA3_PACKED_PAIR(VFMADDSUB, Form, Modifier) \
+ FMA3_PACKED_PAIR(VFMSUBADD, Form, Modifier)
+
+#define FMA3_CASES(Form) \
+ FMA3_SCALAR_PAIR(VFMADD, Form, ) \
+ FMA3_SCALAR_PAIR(VFMSUB, Form, ) \
+ FMA3_SCALAR_PAIR(VFNMADD, Form, ) \
+ FMA3_SCALAR_PAIR(VFNMSUB, Form, ) \
+ FMA3_PACKED_SET(Form, ) \
+ FMA3_PACKED_SET(Form, Y) \
+
+#define FMA3_SCALAR_PAIR_AVX512(Name, Modifier) \
+ FMA3_CASE(Name##SD, Modifier) \
+ FMA3_CASE(Name##SS, Modifier)
+
+#define FMA3_PACKED_PAIR_AVX512(Name, Size) \
+ FMA3_CASE(Name##PD##Size, ) \
+ FMA3_CASE(Name##PS##Size, )
+
+#define FMA3_PACKED_SET_AVX512(Form, Size) \
+ FMA3_PACKED_PAIR_AVX512(VFMADD##Form, Size) \
+ FMA3_PACKED_PAIR_AVX512(VFMSUB##Form, Size) \
+ FMA3_PACKED_PAIR_AVX512(VFNMADD##Form, Size) \
+ FMA3_PACKED_PAIR_AVX512(VFNMSUB##Form, Size) \
+ FMA3_PACKED_PAIR_AVX512(VFMADDSUB##Form, Size) \
+ FMA3_PACKED_PAIR_AVX512(VFMSUBADD##Form, Size)
+
+#define FMA3_CASES_AVX512(Form) \
+ FMA3_SCALAR_PAIR_AVX512(VFMADD##Form, ) \
+ FMA3_SCALAR_PAIR_AVX512(VFMSUB##Form, ) \
+ FMA3_SCALAR_PAIR_AVX512(VFNMADD##Form, ) \
+ FMA3_SCALAR_PAIR_AVX512(VFNMSUB##Form, ) \
+ FMA3_PACKED_SET_AVX512(Form, Z128) \
+ FMA3_PACKED_SET_AVX512(Form, Z256) \
+ FMA3_PACKED_SET_AVX512(Form, Z)
+
+#define FMA3_CASES_SCALAR_INT(Form) \
+ FMA3_SCALAR_PAIR(VFMADD, Form, _Int) \
+ FMA3_SCALAR_PAIR(VFMSUB, Form, _Int) \
+ FMA3_SCALAR_PAIR(VFNMADD, Form, _Int) \
+ FMA3_SCALAR_PAIR(VFNMSUB, Form, _Int)
+
+#define FMA3_CASES_SCALAR_INT_AVX512(Form) \
+ FMA3_SCALAR_PAIR_AVX512(VFMADD##Form, _Int) \
+ FMA3_SCALAR_PAIR_AVX512(VFMSUB##Form, _Int) \
+ FMA3_SCALAR_PAIR_AVX512(VFNMADD##Form, _Int) \
+ FMA3_SCALAR_PAIR_AVX512(VFNMSUB##Form, _Int)
+
switch (Opcode) {
- case X86::VFMADDSDr132r: case X86::VFMADDSDr132m:
- case X86::VFMADDSSr132r: case X86::VFMADDSSr132m:
- case X86::VFMSUBSDr132r: case X86::VFMSUBSDr132m:
- case X86::VFMSUBSSr132r: case X86::VFMSUBSSr132m:
- case X86::VFNMADDSDr132r: case X86::VFNMADDSDr132m:
- case X86::VFNMADDSSr132r: case X86::VFNMADDSSr132m:
- case X86::VFNMSUBSDr132r: case X86::VFNMSUBSDr132m:
- case X86::VFNMSUBSSr132r: case X86::VFNMSUBSSr132m:
-
- case X86::VFMADDSDr213r: case X86::VFMADDSDr213m:
- case X86::VFMADDSSr213r: case X86::VFMADDSSr213m:
- case X86::VFMSUBSDr213r: case X86::VFMSUBSDr213m:
- case X86::VFMSUBSSr213r: case X86::VFMSUBSSr213m:
- case X86::VFNMADDSDr213r: case X86::VFNMADDSDr213m:
- case X86::VFNMADDSSr213r: case X86::VFNMADDSSr213m:
- case X86::VFNMSUBSDr213r: case X86::VFNMSUBSDr213m:
- case X86::VFNMSUBSSr213r: case X86::VFNMSUBSSr213m:
-
- case X86::VFMADDSDr231r: case X86::VFMADDSDr231m:
- case X86::VFMADDSSr231r: case X86::VFMADDSSr231m:
- case X86::VFMSUBSDr231r: case X86::VFMSUBSDr231m:
- case X86::VFMSUBSSr231r: case X86::VFMSUBSSr231m:
- case X86::VFNMADDSDr231r: case X86::VFNMADDSDr231m:
- case X86::VFNMADDSSr231r: case X86::VFNMADDSSr231m:
- case X86::VFNMSUBSDr231r: case X86::VFNMSUBSDr231m:
- case X86::VFNMSUBSSr231r: case X86::VFNMSUBSSr231m:
-
- case X86::VFMADDSUBPDr132r: case X86::VFMADDSUBPDr132m:
- case X86::VFMADDSUBPSr132r: case X86::VFMADDSUBPSr132m:
- case X86::VFMSUBADDPDr132r: case X86::VFMSUBADDPDr132m:
- case X86::VFMSUBADDPSr132r: case X86::VFMSUBADDPSr132m:
- case X86::VFMADDSUBPDr132rY: case X86::VFMADDSUBPDr132mY:
- case X86::VFMADDSUBPSr132rY: case X86::VFMADDSUBPSr132mY:
- case X86::VFMSUBADDPDr132rY: case X86::VFMSUBADDPDr132mY:
- case X86::VFMSUBADDPSr132rY: case X86::VFMSUBADDPSr132mY:
-
- case X86::VFMADDPDr132r: case X86::VFMADDPDr132m:
- case X86::VFMADDPSr132r: case X86::VFMADDPSr132m:
- case X86::VFMSUBPDr132r: case X86::VFMSUBPDr132m:
- case X86::VFMSUBPSr132r: case X86::VFMSUBPSr132m:
- case X86::VFNMADDPDr132r: case X86::VFNMADDPDr132m:
- case X86::VFNMADDPSr132r: case X86::VFNMADDPSr132m:
- case X86::VFNMSUBPDr132r: case X86::VFNMSUBPDr132m:
- case X86::VFNMSUBPSr132r: case X86::VFNMSUBPSr132m:
- case X86::VFMADDPDr132rY: case X86::VFMADDPDr132mY:
- case X86::VFMADDPSr132rY: case X86::VFMADDPSr132mY:
- case X86::VFMSUBPDr132rY: case X86::VFMSUBPDr132mY:
- case X86::VFMSUBPSr132rY: case X86::VFMSUBPSr132mY:
- case X86::VFNMADDPDr132rY: case X86::VFNMADDPDr132mY:
- case X86::VFNMADDPSr132rY: case X86::VFNMADDPSr132mY:
- case X86::VFNMSUBPDr132rY: case X86::VFNMSUBPDr132mY:
- case X86::VFNMSUBPSr132rY: case X86::VFNMSUBPSr132mY:
-
- case X86::VFMADDSUBPDr213r: case X86::VFMADDSUBPDr213m:
- case X86::VFMADDSUBPSr213r: case X86::VFMADDSUBPSr213m:
- case X86::VFMSUBADDPDr213r: case X86::VFMSUBADDPDr213m:
- case X86::VFMSUBADDPSr213r: case X86::VFMSUBADDPSr213m:
- case X86::VFMADDSUBPDr213rY: case X86::VFMADDSUBPDr213mY:
- case X86::VFMADDSUBPSr213rY: case X86::VFMADDSUBPSr213mY:
- case X86::VFMSUBADDPDr213rY: case X86::VFMSUBADDPDr213mY:
- case X86::VFMSUBADDPSr213rY: case X86::VFMSUBADDPSr213mY:
-
- case X86::VFMADDPDr213r: case X86::VFMADDPDr213m:
- case X86::VFMADDPSr213r: case X86::VFMADDPSr213m:
- case X86::VFMSUBPDr213r: case X86::VFMSUBPDr213m:
- case X86::VFMSUBPSr213r: case X86::VFMSUBPSr213m:
- case X86::VFNMADDPDr213r: case X86::VFNMADDPDr213m:
- case X86::VFNMADDPSr213r: case X86::VFNMADDPSr213m:
- case X86::VFNMSUBPDr213r: case X86::VFNMSUBPDr213m:
- case X86::VFNMSUBPSr213r: case X86::VFNMSUBPSr213m:
- case X86::VFMADDPDr213rY: case X86::VFMADDPDr213mY:
- case X86::VFMADDPSr213rY: case X86::VFMADDPSr213mY:
- case X86::VFMSUBPDr213rY: case X86::VFMSUBPDr213mY:
- case X86::VFMSUBPSr213rY: case X86::VFMSUBPSr213mY:
- case X86::VFNMADDPDr213rY: case X86::VFNMADDPDr213mY:
- case X86::VFNMADDPSr213rY: case X86::VFNMADDPSr213mY:
- case X86::VFNMSUBPDr213rY: case X86::VFNMSUBPDr213mY:
- case X86::VFNMSUBPSr213rY: case X86::VFNMSUBPSr213mY:
-
- case X86::VFMADDSUBPDr231r: case X86::VFMADDSUBPDr231m:
- case X86::VFMADDSUBPSr231r: case X86::VFMADDSUBPSr231m:
- case X86::VFMSUBADDPDr231r: case X86::VFMSUBADDPDr231m:
- case X86::VFMSUBADDPSr231r: case X86::VFMSUBADDPSr231m:
- case X86::VFMADDSUBPDr231rY: case X86::VFMADDSUBPDr231mY:
- case X86::VFMADDSUBPSr231rY: case X86::VFMADDSUBPSr231mY:
- case X86::VFMSUBADDPDr231rY: case X86::VFMSUBADDPDr231mY:
- case X86::VFMSUBADDPSr231rY: case X86::VFMSUBADDPSr231mY:
-
- case X86::VFMADDPDr231r: case X86::VFMADDPDr231m:
- case X86::VFMADDPSr231r: case X86::VFMADDPSr231m:
- case X86::VFMSUBPDr231r: case X86::VFMSUBPDr231m:
- case X86::VFMSUBPSr231r: case X86::VFMSUBPSr231m:
- case X86::VFNMADDPDr231r: case X86::VFNMADDPDr231m:
- case X86::VFNMADDPSr231r: case X86::VFNMADDPSr231m:
- case X86::VFNMSUBPDr231r: case X86::VFNMSUBPDr231m:
- case X86::VFNMSUBPSr231r: case X86::VFNMSUBPSr231m:
- case X86::VFMADDPDr231rY: case X86::VFMADDPDr231mY:
- case X86::VFMADDPSr231rY: case X86::VFMADDPSr231mY:
- case X86::VFMSUBPDr231rY: case X86::VFMSUBPDr231mY:
- case X86::VFMSUBPSr231rY: case X86::VFMSUBPSr231mY:
- case X86::VFNMADDPDr231rY: case X86::VFNMADDPDr231mY:
- case X86::VFNMADDPSr231rY: case X86::VFNMADDPSr231mY:
- case X86::VFNMSUBPDr231rY: case X86::VFNMSUBPDr231mY:
- case X86::VFNMSUBPSr231rY: case X86::VFNMSUBPSr231mY:
+ FMA3_CASES(r132)
+ FMA3_CASES(r213)
+ FMA3_CASES(r231)
+
+ // AVX-512 instructions
+ FMA3_CASES_AVX512(132)
+ FMA3_CASES_AVX512(213)
+ FMA3_CASES_AVX512(231)
return true;
- case X86::VFMADDSDr132r_Int: case X86::VFMADDSDr132m_Int:
- case X86::VFMADDSSr132r_Int: case X86::VFMADDSSr132m_Int:
- case X86::VFMSUBSDr132r_Int: case X86::VFMSUBSDr132m_Int:
- case X86::VFMSUBSSr132r_Int: case X86::VFMSUBSSr132m_Int:
- case X86::VFNMADDSDr132r_Int: case X86::VFNMADDSDr132m_Int:
- case X86::VFNMADDSSr132r_Int: case X86::VFNMADDSSr132m_Int:
- case X86::VFNMSUBSDr132r_Int: case X86::VFNMSUBSDr132m_Int:
- case X86::VFNMSUBSSr132r_Int: case X86::VFNMSUBSSr132m_Int:
-
- case X86::VFMADDSDr213r_Int: case X86::VFMADDSDr213m_Int:
- case X86::VFMADDSSr213r_Int: case X86::VFMADDSSr213m_Int:
- case X86::VFMSUBSDr213r_Int: case X86::VFMSUBSDr213m_Int:
- case X86::VFMSUBSSr213r_Int: case X86::VFMSUBSSr213m_Int:
- case X86::VFNMADDSDr213r_Int: case X86::VFNMADDSDr213m_Int:
- case X86::VFNMADDSSr213r_Int: case X86::VFNMADDSSr213m_Int:
- case X86::VFNMSUBSDr213r_Int: case X86::VFNMSUBSDr213m_Int:
- case X86::VFNMSUBSSr213r_Int: case X86::VFNMSUBSSr213m_Int:
-
- case X86::VFMADDSDr231r_Int: case X86::VFMADDSDr231m_Int:
- case X86::VFMADDSSr231r_Int: case X86::VFMADDSSr231m_Int:
- case X86::VFMSUBSDr231r_Int: case X86::VFMSUBSDr231m_Int:
- case X86::VFMSUBSSr231r_Int: case X86::VFMSUBSSr231m_Int:
- case X86::VFNMADDSDr231r_Int: case X86::VFNMADDSDr231m_Int:
- case X86::VFNMADDSSr231r_Int: case X86::VFNMADDSSr231m_Int:
- case X86::VFNMSUBSDr231r_Int: case X86::VFNMSUBSDr231m_Int:
- case X86::VFNMSUBSSr231r_Int: case X86::VFNMSUBSSr231m_Int:
+ FMA3_CASES_SCALAR_INT(r132)
+ FMA3_CASES_SCALAR_INT(r213)
+ FMA3_CASES_SCALAR_INT(r231)
+
+ // AVX-512 instructions
+ FMA3_CASES_SCALAR_INT_AVX512(132)
+ FMA3_CASES_SCALAR_INT_AVX512(213)
+ FMA3_CASES_SCALAR_INT_AVX512(231)
IsIntrinsic = true;
return true;
default:
return false;
}
llvm_unreachable("Opcode not handled by the switch");
+
+#undef FMA3_CASE
+#undef FMA3_SCALAR_PAIR
+#undef FMA3_PACKED_PAIR
+#undef FMA3_PACKED_SET
+#undef FMA3_CASES
+#undef FMA3_SCALAR_PAIR_AVX512
+#undef FMA3_PACKED_PAIR_AVX512
+#undef FMA3_PACKED_SET_AVX512
+#undef FMA3_CASES_AVX512
+#undef FMA3_CASES_SCALAR_INT
+#undef FMA3_CASES_SCALAR_INT_AVX512
}
/// Returns an adjusted FMA opcode that must be used in FMA instruction that
bool IsIntrinOpcode,
unsigned SrcOpIdx1,
unsigned SrcOpIdx2) {
+#define FMA3_ENTRY(Name, Suffix) \
+ { X86::Name##132##Suffix, X86::Name##213##Suffix, X86::Name##231##Suffix },
+
+#define FMA3_SCALAR_PAIR(Name, Suffix) \
+ FMA3_ENTRY(Name##SSr, Suffix) \
+ FMA3_ENTRY(Name##SDr, Suffix)
+
+#define FMA3_PACKED_PAIR(Name, Suffix) \
+ FMA3_ENTRY(Name##PSr, Suffix) \
+ FMA3_ENTRY(Name##PDr, Suffix)
+
+#define FMA3_PACKED_SIZES(Name, Suffix) \
+ FMA3_PACKED_PAIR(Name, Suffix) \
+ FMA3_PACKED_PAIR(Name, Suffix##Y)
+
+#define FMA3_TABLE_ALL(Name) \
+ FMA3_SCALAR_PAIR(Name, r) \
+ FMA3_PACKED_SIZES(Name, r) \
+ FMA3_SCALAR_PAIR(Name, m) \
+ FMA3_PACKED_SIZES(Name, m)
+
+#define FMA3_TABLE_PACKED(Name) \
+ FMA3_PACKED_SIZES(Name, r) \
+ FMA3_PACKED_SIZES(Name, m)
+
+#define FMA3_TABLE_SCALAR_INT(Name) \
+ FMA3_SCALAR_PAIR(Name, r_Int) \
+ FMA3_SCALAR_PAIR(Name, m_Int)
+
+#define FMA3_SCALAR_PAIR_AVX512(Name, Suffix) \
+ FMA3_ENTRY(Name, SS##Suffix) \
+ FMA3_ENTRY(Name, SD##Suffix)
+
+#define FMA3_PACKED_PAIR_AVX512(Name, Suffix) \
+ FMA3_ENTRY(Name, PS##Suffix) \
+ FMA3_ENTRY(Name, PD##Suffix)
+
+#define FMA3_PACKED_SIZES_AVX512(Name, Suffix) \
+ FMA3_PACKED_PAIR_AVX512(Name, Z128##Suffix) \
+ FMA3_PACKED_PAIR_AVX512(Name, Z256##Suffix) \
+ FMA3_PACKED_PAIR_AVX512(Name, Z##Suffix)
+
+#define FMA3_TABLE_ALL_AVX512(Name) \
+ FMA3_SCALAR_PAIR_AVX512(Name, r) \
+ FMA3_PACKED_SIZES_AVX512(Name, r) \
+ FMA3_SCALAR_PAIR_AVX512(Name, m) \
+ FMA3_PACKED_SIZES_AVX512(Name, m)
+
+#define FMA3_TABLE_PACKED_AVX512(Name) \
+ FMA3_PACKED_SIZES_AVX512(Name, r) \
+ FMA3_PACKED_SIZES_AVX512(Name, m)
+
+#define FMA3_TABLE_SCALAR_INT_AVX512(Name) \
+ FMA3_SCALAR_PAIR_AVX512(Name, r_Int) \
+ FMA3_SCALAR_PAIR_AVX512(Name, m_Int)
+
// Define the array that holds FMA opcodes in groups
// of 3 opcodes(132, 213, 231) in each group.
static const uint16_t RegularOpcodeGroups[][3] = {
- { X86::VFMADDSSr132r, X86::VFMADDSSr213r, X86::VFMADDSSr231r },
- { X86::VFMADDSDr132r, X86::VFMADDSDr213r, X86::VFMADDSDr231r },
- { X86::VFMADDPSr132r, X86::VFMADDPSr213r, X86::VFMADDPSr231r },
- { X86::VFMADDPDr132r, X86::VFMADDPDr213r, X86::VFMADDPDr231r },
- { X86::VFMADDPSr132rY, X86::VFMADDPSr213rY, X86::VFMADDPSr231rY },
- { X86::VFMADDPDr132rY, X86::VFMADDPDr213rY, X86::VFMADDPDr231rY },
- { X86::VFMADDSSr132m, X86::VFMADDSSr213m, X86::VFMADDSSr231m },
- { X86::VFMADDSDr132m, X86::VFMADDSDr213m, X86::VFMADDSDr231m },
- { X86::VFMADDPSr132m, X86::VFMADDPSr213m, X86::VFMADDPSr231m },
- { X86::VFMADDPDr132m, X86::VFMADDPDr213m, X86::VFMADDPDr231m },
- { X86::VFMADDPSr132mY, X86::VFMADDPSr213mY, X86::VFMADDPSr231mY },
- { X86::VFMADDPDr132mY, X86::VFMADDPDr213mY, X86::VFMADDPDr231mY },
-
- { X86::VFMSUBSSr132r, X86::VFMSUBSSr213r, X86::VFMSUBSSr231r },
- { X86::VFMSUBSDr132r, X86::VFMSUBSDr213r, X86::VFMSUBSDr231r },
- { X86::VFMSUBPSr132r, X86::VFMSUBPSr213r, X86::VFMSUBPSr231r },
- { X86::VFMSUBPDr132r, X86::VFMSUBPDr213r, X86::VFMSUBPDr231r },
- { X86::VFMSUBPSr132rY, X86::VFMSUBPSr213rY, X86::VFMSUBPSr231rY },
- { X86::VFMSUBPDr132rY, X86::VFMSUBPDr213rY, X86::VFMSUBPDr231rY },
- { X86::VFMSUBSSr132m, X86::VFMSUBSSr213m, X86::VFMSUBSSr231m },
- { X86::VFMSUBSDr132m, X86::VFMSUBSDr213m, X86::VFMSUBSDr231m },
- { X86::VFMSUBPSr132m, X86::VFMSUBPSr213m, X86::VFMSUBPSr231m },
- { X86::VFMSUBPDr132m, X86::VFMSUBPDr213m, X86::VFMSUBPDr231m },
- { X86::VFMSUBPSr132mY, X86::VFMSUBPSr213mY, X86::VFMSUBPSr231mY },
- { X86::VFMSUBPDr132mY, X86::VFMSUBPDr213mY, X86::VFMSUBPDr231mY },
-
- { X86::VFNMADDSSr132r, X86::VFNMADDSSr213r, X86::VFNMADDSSr231r },
- { X86::VFNMADDSDr132r, X86::VFNMADDSDr213r, X86::VFNMADDSDr231r },
- { X86::VFNMADDPSr132r, X86::VFNMADDPSr213r, X86::VFNMADDPSr231r },
- { X86::VFNMADDPDr132r, X86::VFNMADDPDr213r, X86::VFNMADDPDr231r },
- { X86::VFNMADDPSr132rY, X86::VFNMADDPSr213rY, X86::VFNMADDPSr231rY },
- { X86::VFNMADDPDr132rY, X86::VFNMADDPDr213rY, X86::VFNMADDPDr231rY },
- { X86::VFNMADDSSr132m, X86::VFNMADDSSr213m, X86::VFNMADDSSr231m },
- { X86::VFNMADDSDr132m, X86::VFNMADDSDr213m, X86::VFNMADDSDr231m },
- { X86::VFNMADDPSr132m, X86::VFNMADDPSr213m, X86::VFNMADDPSr231m },
- { X86::VFNMADDPDr132m, X86::VFNMADDPDr213m, X86::VFNMADDPDr231m },
- { X86::VFNMADDPSr132mY, X86::VFNMADDPSr213mY, X86::VFNMADDPSr231mY },
- { X86::VFNMADDPDr132mY, X86::VFNMADDPDr213mY, X86::VFNMADDPDr231mY },
-
- { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr213r, X86::VFNMSUBSSr231r },
- { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr213r, X86::VFNMSUBSDr231r },
- { X86::VFNMSUBPSr132r, X86::VFNMSUBPSr213r, X86::VFNMSUBPSr231r },
- { X86::VFNMSUBPDr132r, X86::VFNMSUBPDr213r, X86::VFNMSUBPDr231r },
- { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr231rY },
- { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr231rY },
- { X86::VFNMSUBSSr132m, X86::VFNMSUBSSr213m, X86::VFNMSUBSSr231m },
- { X86::VFNMSUBSDr132m, X86::VFNMSUBSDr213m, X86::VFNMSUBSDr231m },
- { X86::VFNMSUBPSr132m, X86::VFNMSUBPSr213m, X86::VFNMSUBPSr231m },
- { X86::VFNMSUBPDr132m, X86::VFNMSUBPDr213m, X86::VFNMSUBPDr231m },
- { X86::VFNMSUBPSr132mY, X86::VFNMSUBPSr213mY, X86::VFNMSUBPSr231mY },
- { X86::VFNMSUBPDr132mY, X86::VFNMSUBPDr213mY, X86::VFNMSUBPDr231mY },
-
- { X86::VFMADDSUBPSr132r, X86::VFMADDSUBPSr213r, X86::VFMADDSUBPSr231r },
- { X86::VFMADDSUBPDr132r, X86::VFMADDSUBPDr213r, X86::VFMADDSUBPDr231r },
- { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr231rY },
- { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr231rY },
- { X86::VFMADDSUBPSr132m, X86::VFMADDSUBPSr213m, X86::VFMADDSUBPSr231m },
- { X86::VFMADDSUBPDr132m, X86::VFMADDSUBPDr213m, X86::VFMADDSUBPDr231m },
- { X86::VFMADDSUBPSr132mY, X86::VFMADDSUBPSr213mY, X86::VFMADDSUBPSr231mY },
- { X86::VFMADDSUBPDr132mY, X86::VFMADDSUBPDr213mY, X86::VFMADDSUBPDr231mY },
-
- { X86::VFMSUBADDPSr132r, X86::VFMSUBADDPSr213r, X86::VFMSUBADDPSr231r },
- { X86::VFMSUBADDPDr132r, X86::VFMSUBADDPDr213r, X86::VFMSUBADDPDr231r },
- { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr231rY },
- { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr231rY },
- { X86::VFMSUBADDPSr132m, X86::VFMSUBADDPSr213m, X86::VFMSUBADDPSr231m },
- { X86::VFMSUBADDPDr132m, X86::VFMSUBADDPDr213m, X86::VFMSUBADDPDr231m },
- { X86::VFMSUBADDPSr132mY, X86::VFMSUBADDPSr213mY, X86::VFMSUBADDPSr231mY },
- { X86::VFMSUBADDPDr132mY, X86::VFMSUBADDPDr213mY, X86::VFMSUBADDPDr231mY }
+ FMA3_TABLE_ALL(VFMADD)
+ FMA3_TABLE_ALL(VFMSUB)
+ FMA3_TABLE_ALL(VFNMADD)
+ FMA3_TABLE_ALL(VFNMSUB)
+ FMA3_TABLE_PACKED(VFMADDSUB)
+ FMA3_TABLE_PACKED(VFMSUBADD)
+
+ // AVX-512 instructions
+ FMA3_TABLE_ALL_AVX512(VFMADD)
+ FMA3_TABLE_ALL_AVX512(VFMSUB)
+ FMA3_TABLE_ALL_AVX512(VFNMADD)
+ FMA3_TABLE_ALL_AVX512(VFNMSUB)
+ FMA3_TABLE_PACKED_AVX512(VFMADDSUB)
+ FMA3_TABLE_PACKED_AVX512(VFMSUBADD)
};
// Define the array that holds FMA*_Int opcodes in groups
// of 3 opcodes(132, 213, 231) in each group.
static const uint16_t IntrinOpcodeGroups[][3] = {
- { X86::VFMADDSSr132r_Int, X86::VFMADDSSr213r_Int, X86::VFMADDSSr231r_Int },
- { X86::VFMADDSDr132r_Int, X86::VFMADDSDr213r_Int, X86::VFMADDSDr231r_Int },
- { X86::VFMADDSSr132m_Int, X86::VFMADDSSr213m_Int, X86::VFMADDSSr231m_Int },
- { X86::VFMADDSDr132m_Int, X86::VFMADDSDr213m_Int, X86::VFMADDSDr231m_Int },
-
- { X86::VFMSUBSSr132r_Int, X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr231r_Int },
- { X86::VFMSUBSDr132r_Int, X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr231r_Int },
- { X86::VFMSUBSSr132m_Int, X86::VFMSUBSSr213m_Int, X86::VFMSUBSSr231m_Int },
- { X86::VFMSUBSDr132m_Int, X86::VFMSUBSDr213m_Int, X86::VFMSUBSDr231m_Int },
-
- { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr231r_Int },
- { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr231r_Int },
- { X86::VFNMADDSSr132m_Int, X86::VFNMADDSSr213m_Int, X86::VFNMADDSSr231m_Int },
- { X86::VFNMADDSDr132m_Int, X86::VFNMADDSDr213m_Int, X86::VFNMADDSDr231m_Int },
-
- { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr231r_Int },
- { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr231r_Int },
- { X86::VFNMSUBSSr132m_Int, X86::VFNMSUBSSr213m_Int, X86::VFNMSUBSSr231m_Int },
- { X86::VFNMSUBSDr132m_Int, X86::VFNMSUBSDr213m_Int, X86::VFNMSUBSDr231m_Int },
+ FMA3_TABLE_SCALAR_INT(VFMADD)
+ FMA3_TABLE_SCALAR_INT(VFMSUB)
+ FMA3_TABLE_SCALAR_INT(VFNMADD)
+ FMA3_TABLE_SCALAR_INT(VFNMSUB)
+
+ // AVX-512 instructio
+ FMA3_TABLE_SCALAR_INT_AVX512(VFMADD)
+ FMA3_TABLE_SCALAR_INT_AVX512(VFMSUB)
+ FMA3_TABLE_SCALAR_INT_AVX512(VFNMADD)
+ FMA3_TABLE_SCALAR_INT_AVX512(VFNMSUB)
};
+#undef FMA3_ENTRY
+#undef FMA3_SCALAR_PAIR
+#undef FMA3_PACKED_PAIR
+#undef FMA3_PACKED_SIZES
+#undef FMA3_TABLE_ALL
+#undef FMA3_TABLE_PACKED
+#undef FMA3_TABLE_SCALAR_INT
+#undef FMA3_SCALAR_PAIR_AVX512
+#undef FMA3_PACKED_PAIR_AVX512
+#undef FMA3_PACKED_SIZES_AVX512
+#undef FMA3_TABLE_ALL_AVX512
+#undef FMA3_TABLE_PACKED_AVX512
+#undef FMA3_TABLE_SCALAR_INT_AVX512
+
const unsigned Form132Index = 0;
const unsigned Form213Index = 1;
const unsigned Form231Index = 2;
-; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4
;
; AVX512-LABEL: test_f32_fmadd:
; AVX512: # BB#0:
-; AVX512-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1
-; AVX512-NEXT: vmovaps %xmm1, %xmm0
+; AVX512-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
%x = fmul float %a0, %a1
%res = fadd float %x, %a2
;
; AVX512-LABEL: test_f64_fmadd:
; AVX512: # BB#0:
-; AVX512-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1
-; AVX512-NEXT: vmovaps %xmm1, %xmm0
+; AVX512-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
%x = fmul double %a0, %a1
%res = fadd double %x, %a2
;
; AVX512-LABEL: test_f32_fmsub:
; AVX512: # BB#0:
-; AVX512-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1
-; AVX512-NEXT: vmovaps %xmm1, %xmm0
+; AVX512-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
%x = fmul float %a0, %a1
%res = fsub float %x, %a2
;
; AVX512-LABEL: test_f64_fmsub:
; AVX512: # BB#0:
-; AVX512-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1
-; AVX512-NEXT: vmovaps %xmm1, %xmm0
+; AVX512-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
%x = fmul double %a0, %a1
%res = fsub double %x, %a2
;
; AVX512-LABEL: test_f32_fnmadd:
; AVX512: # BB#0:
-; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1
-; AVX512-NEXT: vmovaps %xmm1, %xmm0
+; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
%x = fmul float %a0, %a1
%res = fsub float %a2, %x
;
; AVX512-LABEL: test_f64_fnmadd:
; AVX512: # BB#0:
-; AVX512-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1
-; AVX512-NEXT: vmovaps %xmm1, %xmm0
+; AVX512-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
%x = fmul double %a0, %a1
%res = fsub double %a2, %x
;
; AVX512-LABEL: test_f32_fnmsub:
; AVX512: # BB#0:
-; AVX512-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1
-; AVX512-NEXT: vmovaps %xmm1, %xmm0
+; AVX512-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
%x = fmul float %a0, %a1
%y = fsub float -0.000000e+00, %x
;
; AVX512-LABEL: test_f64_fnmsub:
; AVX512: # BB#0:
-; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1
-; AVX512-NEXT: vmovaps %xmm1, %xmm0
+; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
%x = fmul double %a0, %a1
%y = fsub double -0.000000e+00, %x
; AVX512-LABEL: test_4f32_fmadd_load:
; AVX512: # BB#0:
; AVX512-NEXT: vmovaps (%rdi), %xmm2
-; AVX512-NEXT: vfmadd213ps %xmm1, %xmm0, %xmm2
-; AVX512-NEXT: vmovaps %xmm2, %xmm0
+; AVX512-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0
; AVX512-NEXT: retq
%x = load <4 x float>, <4 x float>* %a0
%y = fmul <4 x float> %x, %a1
; AVX512-LABEL: test_2f64_fmsub_load:
; AVX512: # BB#0:
; AVX512-NEXT: vmovapd (%rdi), %xmm2
-; AVX512-NEXT: vfmsub213pd %xmm1, %xmm0, %xmm2
-; AVX512-NEXT: vmovaps %xmm2, %xmm0
+; AVX512-NEXT: vfmsub213pd %xmm1, %xmm2, %xmm0
; AVX512-NEXT: retq
%x = load <2 x double>, <2 x double>* %a0
%y = fmul <2 x double> %x, %a1
; AVX512-LABEL: test_f32_interp:
; AVX512: # BB#0:
; AVX512-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1
-; AVX512-NEXT: vfmadd213ss %xmm1, %xmm0, %xmm2
-; AVX512-NEXT: vmovaps %xmm2, %xmm0
+; AVX512-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0
; AVX512-NEXT: retq
%t1 = fsub float 1.0, %t
%tx = fmul float %x, %t
;
; AVX512-LABEL: test_v4f32_interp:
; AVX512: # BB#0:
-; AVX512-NEXT: vmovaps %xmm2, %xmm3
-; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm3
-; AVX512-NEXT: vfmadd213ps %xmm3, %xmm2, %xmm0
+; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1
+; AVX512-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0
; AVX512-NEXT: retq
%t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
%tx = fmul <4 x float> %x, %t
;
; AVX512-LABEL: test_v8f32_interp:
; AVX512: # BB#0:
-; AVX512-NEXT: vmovaps %ymm2, %ymm3
-; AVX512-NEXT: vfnmadd213ps %ymm1, %ymm1, %ymm3
-; AVX512-NEXT: vfmadd213ps %ymm3, %ymm2, %ymm0
+; AVX512-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1
+; AVX512-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0
; AVX512-NEXT: retq
%t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
%tx = fmul <8 x float> %x, %t
; AVX512-LABEL: test_f64_interp:
; AVX512: # BB#0:
; AVX512-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1
-; AVX512-NEXT: vfmadd213sd %xmm1, %xmm0, %xmm2
-; AVX512-NEXT: vmovaps %xmm2, %xmm0
+; AVX512-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0
; AVX512-NEXT: retq
%t1 = fsub double 1.0, %t
%tx = fmul double %x, %t
;
; AVX512-LABEL: test_v2f64_interp:
; AVX512: # BB#0:
-; AVX512-NEXT: vmovaps %xmm2, %xmm3
-; AVX512-NEXT: vfnmadd213pd %xmm1, %xmm1, %xmm3
-; AVX512-NEXT: vfmadd213pd %xmm3, %xmm2, %xmm0
+; AVX512-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1
+; AVX512-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0
; AVX512-NEXT: retq
%t1 = fsub <2 x double> <double 1.0, double 1.0>, %t
%tx = fmul <2 x double> %x, %t
;
; AVX512-LABEL: test_v4f64_interp:
; AVX512: # BB#0:
-; AVX512-NEXT: vmovaps %ymm2, %ymm3
-; AVX512-NEXT: vfnmadd213pd %ymm1, %ymm1, %ymm3
-; AVX512-NEXT: vfmadd213pd %ymm3, %ymm2, %ymm0
+; AVX512-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1
+; AVX512-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0
; AVX512-NEXT: retq
%t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
%tx = fmul <4 x double> %x, %t
;
; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
; AVX512: # BB#0:
-; AVX512-NEXT: vfmadd231ps {{.*}}(%rip), %xmm0, %xmm1
-; AVX512-NEXT: vmovaps %xmm1, %xmm0
+; AVX512-NEXT: vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0
; AVX512-NEXT: retq
%m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
%m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0>
; AVX512-LABEL: test_f64_fneg_fmul:
; AVX512: # BB#0:
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1
-; AVX512-NEXT: vmovaps %xmm1, %xmm0
+; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
%m = fmul nsz double %x, %y
%n = fsub double -0.0, %m