};
#undef ENTRY
+static machine_mode aarch64_simd_tuple_modes[ARM_NEON_H_TYPES_LAST][3];
static GTY(()) tree aarch64_simd_tuple_types[ARM_NEON_H_TYPES_LAST][3];
static GTY(()) tree aarch64_simd_intOI_type_node = NULL_TREE;
return aarch64_simd_types[i].itype;
if (aarch64_simd_tuple_types[i][0] != NULL_TREE)
for (int j = 0; j < 3; j++)
- if (TYPE_MODE (aarch64_simd_tuple_types[i][j]) == mode
+ if (aarch64_simd_tuple_modes[i][j] == mode
&& aarch64_simd_types[i].q == q)
return aarch64_simd_tuple_types[i][j];
}
}
unsigned int alignment
- = (known_eq (GET_MODE_SIZE (type->mode), 16) ? 128 : 64);
- gcc_assert (TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type)
+ = known_eq (GET_MODE_SIZE (type->mode), 16) ? 128 : 64;
+ machine_mode tuple_mode = TYPE_MODE_RAW (array_type);
+ gcc_assert (VECTOR_MODE_P (tuple_mode)
+ && TYPE_MODE (array_type) == tuple_mode
&& TYPE_ALIGN (array_type) == alignment);
tree field = build_decl (input_location, FIELD_DECL,
make_array_slice (&field,
1));
gcc_assert (TYPE_MODE_RAW (t) == TYPE_MODE (t)
- && TYPE_ALIGN (t) == alignment);
-
- if (num_vectors == 2)
- aarch64_simd_tuple_types[type_index][0] = t;
- else if (num_vectors == 3)
- aarch64_simd_tuple_types[type_index][1] = t;
- else if (num_vectors == 4)
- aarch64_simd_tuple_types[type_index][2] = t;
+ && (flag_pack_struct
+ || maximum_field_alignment
+ || (TYPE_MODE_RAW (t) == tuple_mode
+ && TYPE_ALIGN (t) == alignment)));
+
+ aarch64_simd_tuple_modes[type_index][num_vectors - 2] = tuple_mode;
+ aarch64_simd_tuple_types[type_index][num_vectors - 2] = t;
}
static bool
return (t == Poly8_t || t == Poly16_t || t == Poly64_t || t == Poly128_t);
}
+/* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD
+ set. */
+aarch64_simd_switcher::aarch64_simd_switcher (unsigned int extra_flags)
+ : m_old_isa_flags (aarch64_isa_flags),
+ m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY)
+{
+ /* Changing the ISA flags should be enough here. We shouldn't need to
+ pay the compile-time cost of a full target switch. */
+ aarch64_isa_flags = AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags;
+ global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
+}
+
+aarch64_simd_switcher::~aarch64_simd_switcher ()
+{
+ if (m_old_general_regs_only)
+ global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY;
+ aarch64_isa_flags = m_old_isa_flags;
+}
+
/* Implement #pragma GCC aarch64 "arm_neon.h". */
void
handle_arm_neon_h (void)
{
+ aarch64_simd_switcher simd;
+
/* Register the AdvSIMD vector tuple types. */
for (unsigned int i = 0; i < ARM_NEON_H_TYPES_LAST; i++)
for (unsigned int count = 2; count <= 4; ++count)
aarch64_init_bf16_types ();
- if (TARGET_SIMD)
+ {
+ aarch64_simd_switcher simd;
aarch64_init_simd_builtins ();
+ }
aarch64_init_crc32_builtins ();
aarch64_init_builtin_rsqrt ();
/* Mask that selects the aarch64_builtin_class part of a function code. */
const unsigned int AARCH64_BUILTIN_CLASS = (1 << AARCH64_BUILTIN_SHIFT) - 1;
+/* RAII class for enabling enough features to define built-in types
+ and implement the arm_neon.h pragma. */
+class aarch64_simd_switcher
+{
+public:
+ aarch64_simd_switcher (unsigned int extra_flags = 0);
+ ~aarch64_simd_switcher ();
+
+private:
+ unsigned long m_old_isa_flags;
+ bool m_old_general_regs_only;
+};
+
void aarch64_post_cfi_startproc (void);
poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
int aarch64_get_condition_code (rtx);
}
sve_switcher::sve_switcher ()
- : m_old_isa_flags (aarch64_isa_flags)
+ : aarch64_simd_switcher (AARCH64_FL_F16 | AARCH64_FL_SVE)
{
/* Changing the ISA flags and have_regs_of_mode should be enough here.
We shouldn't need to pay the compile-time cost of a full target
switch. */
- aarch64_isa_flags = (AARCH64_FL_FP | AARCH64_FL_SIMD | AARCH64_FL_F16
- | AARCH64_FL_SVE);
-
m_old_maximum_field_alignment = maximum_field_alignment;
maximum_field_alignment = 0;
- m_old_general_regs_only = TARGET_GENERAL_REGS_ONLY;
- global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
-
memcpy (m_old_have_regs_of_mode, have_regs_of_mode,
sizeof (have_regs_of_mode));
for (int i = 0; i < NUM_MACHINE_MODES; ++i)
{
memcpy (have_regs_of_mode, m_old_have_regs_of_mode,
sizeof (have_regs_of_mode));
- if (m_old_general_regs_only)
- global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY;
- aarch64_isa_flags = m_old_isa_flags;
maximum_field_alignment = m_old_maximum_field_alignment;
}
/* RAII class for enabling enough SVE features to define the built-in
types and implement the arm_sve.h pragma. */
-class sve_switcher
+class sve_switcher : public aarch64_simd_switcher
{
public:
sve_switcher ();
~sve_switcher ();
private:
- unsigned long m_old_isa_flags;
unsigned int m_old_maximum_field_alignment;
- bool m_old_general_regs_only;
bool m_old_have_regs_of_mode[MAX_MACHINE_MODE];
};
--- /dev/null
+/* { dg-options "-fpack-struct" } */
+
+#include <arm_neon.h>
+
+static_assert(alignof(int32x2_t) == 8, "int32x2_t alignment");
+static_assert(alignof(int32x4_t) == 16, "int32x4_t alignment");
+static_assert(alignof(int32x2x2_t) == 1, "int32x2x2_t alignment");
+static_assert(alignof(int32x4x2_t) == 1, "int32x4x2_t alignment");
+static_assert(alignof(int32x2x3_t) == 1, "int32x2x3_t alignment");
+static_assert(alignof(int32x4x3_t) == 1, "int32x4x3_t alignment");
+static_assert(alignof(int32x2x4_t) == 1, "int32x2x4_t alignment");
+static_assert(alignof(int32x4x4_t) == 1, "int32x4x4_t alignment");
--- /dev/null
+/* { dg-options "-O2 -fpack-struct -mstrict-align" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_neon.h>
+
+extern "C" {
+
+/*
+** ld2:
+** ...
+** ld2 .*
+** ...
+** (
+** strb .*
+** |
+** bl memcpy
+** )
+** ...
+*/
+void
+ld2 (int32x4x2_t *a, int32_t *b)
+{
+ *a = vld2q_s32 (b);
+}
+
+/*
+** ld3:
+** ...
+** ld3 .*
+** ...
+** (
+** strb .*
+** |
+** bl memcpy
+** )
+** ...
+*/
+void
+ld3 (int32x4x3_t *a, int32_t *b)
+{
+ *a = vld3q_s32 (b);
+}
+
+/*
+** ld4:
+** ...
+** ld4 .*
+** ...
+** (
+** strb .*
+** |
+** bl memcpy
+** )
+** ...
+*/
+void
+ld4 (int32x4x4_t *a, int32_t *b)
+{
+ *a = vld4q_s32 (b);
+}
+
+/*
+** ret:
+** ...
+** ldp q0, q1, \[x0\]
+** ldr q2, \[x0, #?32\]
+** ...
+*/
+int32x4x3_t
+ret (int32x4_t *ptr)
+{
+ return (int32x4x3_t) { ptr[0], ptr[1], ptr[2] };
+}
+
+/*
+** arg:
+** ...
+** stp d0, d1, \[x0\]
+** ...
+*/
+void
+arg (int32x2x2_t arg, int32x2_t *ptr)
+{
+ ptr[0] = arg.val[0];
+ ptr[1] = arg.val[1];
+}
+
+}
--- /dev/null
+/* { dg-options "-fpack-struct=1" } */
+
+#include <arm_neon.h>
+
+static_assert(alignof(int32x2_t) == 8, "int32x2_t alignment");
+static_assert(alignof(int32x4_t) == 16, "int32x4_t alignment");
+static_assert(alignof(int32x2x2_t) == 1, "int32x2x2_t alignment");
+static_assert(alignof(int32x4x2_t) == 1, "int32x4x2_t alignment");
+static_assert(alignof(int32x2x3_t) == 1, "int32x2x3_t alignment");
+static_assert(alignof(int32x4x3_t) == 1, "int32x4x3_t alignment");
+static_assert(alignof(int32x2x4_t) == 1, "int32x2x4_t alignment");
+static_assert(alignof(int32x4x4_t) == 1, "int32x4x4_t alignment");
--- /dev/null
+/* { dg-options "-fpack-struct=2" } */
+
+#include <arm_neon.h>
+
+static_assert(alignof(int32x2_t) == 8, "int32x2_t alignment");
+static_assert(alignof(int32x4_t) == 16, "int32x4_t alignment");
+static_assert(alignof(int32x2x2_t) == 2, "int32x2x2_t alignment");
+static_assert(alignof(int32x4x2_t) == 2, "int32x4x2_t alignment");
+static_assert(alignof(int32x2x3_t) == 2, "int32x2x3_t alignment");
+static_assert(alignof(int32x4x3_t) == 2, "int32x4x3_t alignment");
+static_assert(alignof(int32x2x4_t) == 2, "int32x2x4_t alignment");
+static_assert(alignof(int32x4x4_t) == 2, "int32x4x4_t alignment");
--- /dev/null
+/* { dg-options "-fpack-struct=8" } */
+
+#include <arm_neon.h>
+
+static_assert(alignof(int32x2_t) == 8, "int32x2_t alignment");
+static_assert(alignof(int32x4_t) == 16, "int32x4_t alignment");
+static_assert(alignof(int32x2x2_t) == 8, "int32x2x2_t alignment");
+static_assert(alignof(int32x4x2_t) == 8, "int32x4x2_t alignment");
+static_assert(alignof(int32x2x3_t) == 8, "int32x2x3_t alignment");
+static_assert(alignof(int32x4x3_t) == 8, "int32x4x3_t alignment");
+static_assert(alignof(int32x2x4_t) == 8, "int32x2x4_t alignment");
+static_assert(alignof(int32x4x4_t) == 8, "int32x4x4_t alignment");
--- /dev/null
+/* { dg-options "-fpack-struct=16" } */
+
+#include <arm_neon.h>
+
+static_assert(alignof(int32x2_t) == 8, "int32x2_t alignment");
+static_assert(alignof(int32x4_t) == 16, "int32x4_t alignment");
+static_assert(alignof(int32x2x2_t) == 8, "int32x2x2_t alignment");
+static_assert(alignof(int32x4x2_t) == 16, "int32x4x2_t alignment");
+static_assert(alignof(int32x2x3_t) == 8, "int32x2x3_t alignment");
+static_assert(alignof(int32x4x3_t) == 16, "int32x4x3_t alignment");
+static_assert(alignof(int32x2x4_t) == 8, "int32x2x4_t alignment");
+static_assert(alignof(int32x4x4_t) == 16, "int32x4x4_t alignment");
--- /dev/null
+/* { dg-options "-mgeneral-regs-only" } */
+
+#include <arm_neon.h>
--- /dev/null
+/* { dg-options "-fpack-struct" } */
+
+#pragma GCC aarch64 "arm_neon.h"
--- /dev/null
+/* { dg-options "-mgeneral-regs-only" } */
+
+#pragma GCC aarch64 "arm_neon.h"
--- /dev/null
+/* { dg-options "-mgeneral-regs-only" } */
+/* { dg-excess-errors "arm_neon.h" } */
+
+#include <arm_neon.h>
+
+int32x4x4_t
+test (int32_t *ptr) /* { dg-error "-mgeneral-regs-only" } */
+{
+ return vld4q_s32 (ptr);
+}
--- /dev/null
+/* { dg-options "-fpack-struct" } */
+
+#include <arm_neon.h>
+
+int assert1[__alignof__(int32x2_t) == 8 ? 1 : -1];
+int assert2[__alignof__(int32x4_t) == 16 ? 1 : -1];
+int assert3[__alignof__(int32x2x2_t) == 1 ? 1 : -1];
+int assert4[__alignof__(int32x4x2_t) == 1 ? 1 : -1];
+int assert5[__alignof__(int32x2x3_t) == 1 ? 1 : -1];
+int assert6[__alignof__(int32x4x3_t) == 1 ? 1 : -1];
+int assert7[__alignof__(int32x2x4_t) == 1 ? 1 : -1];
+int assert8[__alignof__(int32x4x4_t) == 1 ? 1 : -1];
--- /dev/null
+/* { dg-options "-O2 -fpack-struct -mstrict-align" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_neon.h>
+
+/*
+** ld2:
+** ...
+** ld2 .*
+** ...
+** (
+** strb .*
+** |
+** bl memcpy
+** )
+** ...
+*/
+void
+ld2 (int32x4x2_t *a, int32_t *b)
+{
+ *a = vld2q_s32 (b);
+}
+
+/*
+** ld3:
+** ...
+** ld3 .*
+** ...
+** (
+** strb .*
+** |
+** bl memcpy
+** )
+** ...
+*/
+void
+ld3 (int32x4x3_t *a, int32_t *b)
+{
+ *a = vld3q_s32 (b);
+}
+
+/*
+** ld4:
+** ...
+** ld4 .*
+** ...
+** (
+** strb .*
+** |
+** bl memcpy
+** )
+** ...
+*/
+void
+ld4 (int32x4x4_t *a, int32_t *b)
+{
+ *a = vld4q_s32 (b);
+}
+
+/*
+** ret:
+** ...
+** ldp q0, q1, \[x0\]
+** ldr q2, \[x0, #?32\]
+** ...
+*/
+int32x4x3_t
+ret (int32x4_t *ptr)
+{
+ return (int32x4x3_t) { ptr[0], ptr[1], ptr[2] };
+}
+
+/*
+** arg:
+** ...
+** stp d0, d1, \[x0\]
+** ...
+*/
+void
+arg (int32x2x2_t arg, int32x2_t *ptr)
+{
+ ptr[0] = arg.val[0];
+ ptr[1] = arg.val[1];
+}
--- /dev/null
+/* { dg-options "-fpack-struct=1" } */
+
+#include <arm_neon.h>
+
+int assert1[__alignof__(int32x2_t) == 8 ? 1 : -1];
+int assert2[__alignof__(int32x4_t) == 16 ? 1 : -1];
+int assert3[__alignof__(int32x2x2_t) == 1 ? 1 : -1];
+int assert4[__alignof__(int32x4x2_t) == 1 ? 1 : -1];
+int assert5[__alignof__(int32x2x3_t) == 1 ? 1 : -1];
+int assert6[__alignof__(int32x4x3_t) == 1 ? 1 : -1];
+int assert7[__alignof__(int32x2x4_t) == 1 ? 1 : -1];
+int assert8[__alignof__(int32x4x4_t) == 1 ? 1 : -1];
--- /dev/null
+/* { dg-options "-fpack-struct=2" } */
+
+#include <arm_neon.h>
+
+int assert1[__alignof__(int32x2_t) == 8 ? 1 : -1];
+int assert2[__alignof__(int32x4_t) == 16 ? 1 : -1];
+int assert3[__alignof__(int32x2x2_t) == 2 ? 1 : -1];
+int assert4[__alignof__(int32x4x2_t) == 2 ? 1 : -1];
+int assert5[__alignof__(int32x2x3_t) == 2 ? 1 : -1];
+int assert6[__alignof__(int32x4x3_t) == 2 ? 1 : -1];
+int assert7[__alignof__(int32x2x4_t) == 2 ? 1 : -1];
+int assert8[__alignof__(int32x4x4_t) == 2 ? 1 : -1];
--- /dev/null
+/* { dg-options "-fpack-struct=8" } */
+
+#include <arm_neon.h>
+
+int assert1[__alignof__(int32x2_t) == 8 ? 1 : -1];
+int assert2[__alignof__(int32x4_t) == 16 ? 1 : -1];
+int assert3[__alignof__(int32x2x2_t) == 8 ? 1 : -1];
+int assert4[__alignof__(int32x4x2_t) == 8 ? 1 : -1];
+int assert5[__alignof__(int32x2x3_t) == 8 ? 1 : -1];
+int assert6[__alignof__(int32x4x3_t) == 8 ? 1 : -1];
+int assert7[__alignof__(int32x2x4_t) == 8 ? 1 : -1];
+int assert8[__alignof__(int32x4x4_t) == 8 ? 1 : -1];
--- /dev/null
+/* { dg-options "-fpack-struct=16" } */
+
+#include <arm_neon.h>
+
+int assert1[__alignof__(int32x2_t) == 8 ? 1 : -1];
+int assert2[__alignof__(int32x4_t) == 16 ? 1 : -1];
+int assert3[__alignof__(int32x2x2_t) == 8 ? 1 : -1];
+int assert4[__alignof__(int32x4x2_t) == 16 ? 1 : -1];
+int assert5[__alignof__(int32x2x3_t) == 8 ? 1 : -1];
+int assert6[__alignof__(int32x4x3_t) == 16 ? 1 : -1];
+int assert7[__alignof__(int32x2x4_t) == 8 ? 1 : -1];
+int assert8[__alignof__(int32x4x4_t) == 16 ? 1 : -1];
--- /dev/null
+/* { dg-options "-mgeneral-regs-only" } */
+
+#include <arm_neon.h>
--- /dev/null
+/* { dg-options "-fpack-struct" } */
+
+#pragma GCC aarch64 "arm_neon.h"
--- /dev/null
+/* { dg-options "-mgeneral-regs-only" } */
+
+#pragma GCC aarch64 "arm_neon.h"
--- /dev/null
+/* { dg-options "-mgeneral-regs-only" } */
+/* { dg-excess-errors "arm_neon.h" } */
+
+#include <arm_neon.h>
+
+int32x4x4_t
+test (int32_t *ptr) /* { dg-error "-mgeneral-regs-only" } */
+{
+ return vld4q_s32 (ptr);
+}