BU_MMA_V2 (DISASSEMBLE_PAIR, "disassemble_pair", PAIR, vsx_disassemble_pair)
BU_COMPAT (VSX_BUILTIN_DISASSEMBLE_PAIR, "mma_disassemble_pair")
+BU_MMA_V3 (BUILD_PAIR, "build_pair", MISC, vsx_assemble_pair)
BU_MMA_V3 (ASSEMBLE_PAIR, "assemble_pair", MISC, vsx_assemble_pair)
BU_COMPAT (VSX_BUILTIN_ASSEMBLE_PAIR, "mma_assemble_pair")
BU_MMA_3 (XVBF16GER2, "xvbf16ger2", MISC, mma_xvbf16ger2)
BU_MMA_3 (XVI16GER2PP, "xvi16ger2pp", QUAD, mma_xvi16ger2pp)
BU_MMA_3 (XVI16GER2SPP, "xvi16ger2spp", QUAD, mma_xvi16ger2spp)
+BU_MMA_5 (BUILD_ACC, "build_acc", MISC, mma_assemble_acc)
BU_MMA_5 (ASSEMBLE_ACC, "assemble_acc", MISC, mma_assemble_acc)
BU_MMA_5 (PMXVF32GER, "pmxvf32ger", MISC, mma_pmxvf32ger)
BU_MMA_5 (PMXVF64GER, "pmxvf64ger", PAIR, mma_pmxvf64ger)
pat = GEN_FCN (icode) (op[0], op[1]);
break;
case 3:
+ /* The ASSEMBLE builtin source operands are reversed in little-endian
+ mode, so reorder them. */
+ if (fcode == VSX_BUILTIN_ASSEMBLE_PAIR_INTERNAL && !WORDS_BIG_ENDIAN)
+ std::swap (op[1], op[2]);
pat = GEN_FCN (icode) (op[0], op[1], op[2]);
break;
case 4:
pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
break;
case 5:
+ /* The ASSEMBLE builtin source operands are reversed in little-endian
+ mode, so reorder them. */
+ if (fcode == MMA_BUILTIN_ASSEMBLE_ACC_INTERNAL && !WORDS_BIG_ENDIAN)
+ {
+ std::swap (op[1], op[4]);
+ std::swap (op[2], op[3]);
+ }
pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
break;
case 6:
gcc_unreachable ();
}
- if (fncode == VSX_BUILTIN_ASSEMBLE_PAIR)
+ if (fncode == VSX_BUILTIN_BUILD_PAIR || fncode == VSX_BUILTIN_ASSEMBLE_PAIR)
lhs = make_ssa_name (vector_pair_type_node);
else
lhs = make_ssa_name (vector_quad_type_node);
machine_mode mode = insn_data[icode].operand[j].mode;
if (gimple_func && mode == XOmode)
op[nopnds++] = build_pointer_type (vector_quad_type_node);
- else if (gimple_func && mode == OOmode
- && d->code == VSX_BUILTIN_ASSEMBLE_PAIR)
+ else if (gimple_func
+ && mode == OOmode
+ && (d->code == VSX_BUILTIN_BUILD_PAIR
+ || d->code == VSX_BUILTIN_ASSEMBLE_PAIR))
op[nopnds++] = build_pointer_type (vector_pair_type_node);
else
/* MMA uses unsigned types. */
gcc_assert (VSX_REGNO_P (REGNO (dst)));
reg_mode = GET_MODE (XVECEXP (src, 0, 0));
- for (int i = 0; i < XVECLEN (src, 0); i++)
+ int nvecs = XVECLEN (src, 0);
+ for (int i = 0; i < nvecs; i++)
{
- rtx dst_i = gen_rtx_REG (reg_mode, reg + i);
+ int index = WORDS_BIG_ENDIAN ? i : nvecs - 1 - i;
+ rtx dst_i = gen_rtx_REG (reg_mode, reg + index);
emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
}
void __builtin_mma_xxmfacc (__vector_quad *);
void __builtin_mma_xxsetaccz (__vector_quad *);
-void __builtin_mma_assemble_acc (__vector_quad *, vec_t, vec_t, vec_t, vec_t);
+void __builtin_mma_build_acc (__vector_quad *, vec_t, vec_t, vec_t, vec_t);
void __builtin_mma_disassemble_acc (void *, __vector_quad *);
-void __builtin_vsx_assemble_pair (__vector_pair *, vec_t, vec_t);
+void __builtin_vsx_build_pair (__vector_pair *, vec_t, vec_t);
void __builtin_vsx_disassemble_pair (void *, __vector_pair *);
vec_t __builtin_vsx_xvcvspbf16 (vec_t);
}
void
+foo3 (__vector_pair *dst, vec_t *src)
+{
+ __vector_pair pair;
+ __builtin_vsx_build_pair (&pair, src[4], src[0]);
+ *dst = pair;
+}
+
+void
bar (vec_t *dst, __vector_pair *src)
{
vec_t res[2];
# error "__has_builtin (__builtin_mma_disassemble_pair) failed"
#endif
-/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */
+#if !__has_builtin (__builtin_vsx_build_pair)
+# error "__has_builtin (__builtin_vsx_build_pair) failed"
+#endif
+
+/* { dg-final { scan-assembler-times {\mlxv\M} 6 } } */
/* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */
/* { dg-final { scan-assembler-times {\mstxv\M} 4 } } */
-/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 3 } } */
}
void
+foo2 (__vector_quad *dst, vec_t *src)
+{
+ __vector_quad acc;
+ __builtin_mma_build_acc (&acc, src[12], src[8], src[4], src[0]);
+ *dst = acc;
+}
+
+void
bar (vec_t *dst, __vector_quad *src)
{
vec_t res[4];
dst[12] = res[3];
}
-/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */
+#if !__has_builtin (__builtin_mma_assemble_acc)
+# error "__has_builtin (__builtin_mma_assemble_acc) failed"
+#endif
+
+#if !__has_builtin (__builtin_mma_build_acc)
+# error "__has_builtin (__builtin_mma_build_acc) failed"
+#endif
+
+/* { dg-final { scan-assembler-times {\mlxv\M} 8 } } */
/* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */
/* { dg-final { scan-assembler-times {\mstxv\M} 4 } } */
-/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */
-/* { dg-final { scan-assembler-times {\mxxmfacc\M} 2 } } */
-/* { dg-final { scan-assembler-times {\mxxmtacc\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxxmfacc\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxxmtacc\M} 3 } } */