/* { dg-do compile } */
/* { dg-require-effective-target stdint_types } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
#define TYPE int32_t
#include "complex-add-pattern-template.c"
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" { target { vect_complex_add_byte } } } } */
-/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { target { vect_complex_add_int && !arch64_sve2 } } } } */
+/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { target { vect_complex_add_int } && ! target { aarch64_sve2 } } } } */
/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT270" "slp1" } } */
/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT90" "slp1" } } */
/* { dg-do compile } */
/* { dg-require-effective-target vect_complex_add_long } */
+/* { dg-require-effective-target vect_long } */
/* { dg-require-effective-target stdint_types } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
+
+#define UNROLL
#define TYPE int64_t
#define N 16
#include "complex-add-pattern-template.c"
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" { target { vect_complex_add_long } } } } */
-/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { target { vect_complex_add_long && !arch64_sve2 } } } } */
+/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { target { vect_complex_add_long } && ! target { aarch64_sve2 } } } } */
/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT270" "slp1" } } */
/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT90" "slp1" } } */
/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT90" "slp1" } } */
/* { dg-do compile } */
/* { dg-require-effective-target stdint_types } */
+/* { dg-require-effective-target vect_int } */
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
+
+#define UNROLL
#define TYPE int16_t
#define N 16
#include "complex-add-pattern-template.c"
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" { target { vect_complex_add_short } } } } */
-/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { target { vect_complex_add_short && !arch64_sve2 } } } } */
+/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { target { vect_complex_add_short } && ! target { aarch64_sve2 } } } } */
/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT270" "slp1" } } */
/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT90" "slp1" } } */
/* { dg-do compile } */
/* { dg-require-effective-target stdint_types } */
+/* { dg-require-effective-target vect_int } */
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
+
+#define UNROLL
#define TYPE uint32_t
#define N 16
#include "complex-add-pattern-template.c"
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" { target { vect_complex_add_int } } } } */
-/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { target { vect_complex_add_int && !arch64_sve2 } } } } */
+/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { target { vect_complex_add_int } && ! target { aarch64_sve2 } } } } */
/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT270" "slp1" } } */
/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT90" "slp1" } } */
/* { dg-do compile } */
/* { dg-require-effective-target stdint_types } */
+/* { dg-require-effective-target vect_long } */
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
+
+#define UNROLL
#define TYPE uint64_t
#define N 16
#include "complex-add-pattern-template.c"
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" { target { vect_complex_add_long } } } } */
-/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { target { vect_complex_add_long && !arch64_sve2 } } } } */
+/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { target { vect_complex_add_long } && ! target { aarch64_sve2 } } } } */
/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT270" "slp1" } } */
/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT90" "slp1" } } */
/* { dg-do compile } */
/* { dg-require-effective-target stdint_types } */
+/* { dg-require-effective-target vect_int } */
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
+
+#define UNROLL
#define TYPE uint16_t
#define N 16
#include "complex-add-pattern-template.c"
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" { target { vect_complex_add_short } } } } */
-/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { target { vect_complex_add_short && !arch64_sve2 } } } } */
+/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { target { vect_complex_add_short } && ! target { aarch64_sve2 } } } } */
/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT270" "slp1" } } */
/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT90" "slp1" } } */
void add90 (TYPE a[restrict N], TYPE b[restrict N], TYPE c[restrict N])
{
+#if defined (UNROLL)
+#pragma GCC unroll 16
+#endif
for (int i=0; i < N; i+=2)
{
c[i] = a[i] - b[i+1];
void add270 (TYPE a[restrict N], TYPE b[restrict N], TYPE c[restrict N])
{
+#if defined (UNROLL)
+#pragma GCC unroll 16
+#endif
for (int i=0; i < N; i+=2)
{
c[i] = a[i] + b[i+1];
void addMixed (TYPE a[restrict N], TYPE b[restrict N], TYPE c[restrict N])
{
+#if defined (UNROLL)
+#pragma GCC unroll 16
+#endif
for (int i=0; i < N; i+=4)
{
c[i] = a[i] - b[i+1];
void add90HandUnrolled (TYPE a[restrict N], TYPE b[restrict N],
TYPE c[restrict N])
{
+#if defined (UNROLL)
+#pragma GCC unroll 16
+#endif
for (int i=0; i < (N /2); i+=4)
{
c[i] = a[i] - b[i+1];
void add90Hybrid (TYPE a[restrict N], TYPE b[restrict N], TYPE c[restrict N],
TYPE d[restrict N])
{
+#if defined (UNROLL)
+#pragma GCC unroll 16
+#endif
for (int i=0; i < N; i+=2)
{
c[i] = a[i] - b[i+1];
}
}
-/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 2 "vect" } } */
\ No newline at end of file
+/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 2 "vect" } } */
void add0 (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
_Complex TYPE c[restrict N])
{
+#if defined (UNROLL)
+#pragma GCC unroll 16
+#endif
for (int i=0; i < N; i++)
c[i] = a[i] + b[i];
}
void add90snd (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
_Complex TYPE c[restrict N])
{
+#if defined (UNROLL)
+#pragma GCC unroll 16
+#endif
for (int i=0; i < N; i++)
c[i] = a[i] + (b[i] * I);
}
void add180snd (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
_Complex TYPE c[restrict N])
{
+#if defined (UNROLL)
+#pragma GCC unroll 16
+#endif
for (int i=0; i < N; i++)
c[i] = a[i] + (b[i] * I * I);
}
void add270snd (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
_Complex TYPE c[restrict N])
{
+#if defined (UNROLL)
+#pragma GCC unroll 16
+#endif
for (int i=0; i < N; i++)
c[i] = a[i] + (b[i] * I * I * I);
}
void add90fst (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
_Complex TYPE c[restrict N])
{
+#if defined (UNROLL)
+#pragma GCC unroll 16
+#endif
for (int i=0; i < N; i++)
c[i] = (a[i] * I) + b[i];
}
void add180fst (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
_Complex TYPE c[restrict N])
{
+#if defined (UNROLL)
+#pragma GCC unroll 16
+#endif
for (int i=0; i < N; i++)
c[i] = (a[i] * I * I) + b[i];
}
void add270fst (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
_Complex TYPE c[restrict N])
{
+#if defined (UNROLL)
+#pragma GCC unroll 16
+#endif
for (int i=0; i < N; i++)
c[i] = (a[i] * I * I * I) + b[i];
}
void addconjfst (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
_Complex TYPE c[restrict N])
{
+#if defined (UNROLL)
+#pragma GCC unroll 16
+#endif
for (int i=0; i < N; i++)
c[i] = ~a[i] + b[i];
}
void addconjsnd (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
_Complex TYPE c[restrict N])
{
+#if defined (UNROLL)
+#pragma GCC unroll 16
+#endif
for (int i=0; i < N; i++)
c[i] = a[i] + ~b[i];
}
void addconjboth (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
_Complex TYPE c[restrict N])
{
+#if defined (UNROLL)
+#pragma GCC unroll 16
+#endif
for (int i=0; i < N; i++)
c[i] = ~a[i] + ~b[i];
}
/* { dg-do run } */
/* { dg-require-effective-target vect_complex_add_double } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
#include <stdio.h>
#include <complex.h>
/* { dg-do compile } */
/* { dg-require-effective-target vect_double } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-additional-options "-fno-tree-loop-vectorize -funroll-loops" } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-fno-tree-loop-vectorize" } */
+
+#define UNROLL
#define TYPE double
#define N 16
/* { dg-do compile } */
/* { dg-require-effective-target vect_float } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-additional-options "-fno-tree-loop-vectorize -funroll-loops" } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-fno-tree-loop-vectorize" } */
+
+#define UNROLL
#define TYPE float
#define N 16
/* { dg-do compile } */
/* { dg-require-effective-target vect_complex_add_half } */
+/* { dg-require-effective-target float16 } */
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
/* { dg-add-options arm_v8_3a_fp16_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE _Float16
#define N 16
/* { dg-do compile } */
/* { dg-add-options arm_v8_3a_complex_neon } */
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
+
+#define UNROLL
#define TYPE double
#define N 16
/* { dg-do compile } */
/* { dg-require-effective-target vect_float } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-additional-options "-fno-tree-loop-vectorize -funroll-loops" } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-fno-tree-loop-vectorize" } */
+
+#define UNROLL
#define TYPE float
#define N 16
/* { dg-do compile } */
/* { dg-require-effective-target vect_complex_add_half } */
+/* { dg-require-effective-target float16 } */
/* { dg-add-options arm_v8_3a_fp16_complex_neon } */
-/* { dg-additional-options "-fno-tree-loop-vectorize -funroll-loops" } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-fno-tree-loop-vectorize" } */
+
+#define UNROLL
#define TYPE _Float16
#define N 16
/* { dg-do compile } */
/* { dg-require-effective-target vect_complex_add_double } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE double
#define N 16
/* { dg-require-effective-target vect_complex_add_float } */
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
/* { dg-add-options arm_v8_3a_fp16_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE float
#define N 16
/* { dg-do compile } */
/* { dg-require-effective-target vect_complex_add_half } */
+/* { dg-require-effective-target float16 } */
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
/* { dg-add-options arm_v8_3a_fp16_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE _Float16
#define N 16
/* { dg-do compile } */
/* { dg-require-effective-target vect_complex_add_double } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE double
#define N 16
/* { dg-require-effective-target vect_complex_add_float } */
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE float
#define N 16
/* { dg-do compile } */
/* { dg-require-effective-target vect_complex_add_half } */
+/* { dg-require-effective-target float16 } */
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
/* { dg-add-options arm_v8_3a_fp16_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE _Float16
#define N 16
/* { dg-do compile } */
/* { dg-require-effective-target vect_complex_add_double } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE double
#define N 16
/* { dg-require-effective-target vect_complex_add_float } */
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE float
#define N 16
/* { dg-do compile } */
/* { dg-require-effective-target vect_complex_add_half } */
+/* { dg-require-effective-target float16 } */
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
/* { dg-add-options arm_v8_3a_fp16_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE _Float16
#define N 16
/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE double
#define N 200
/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE float
#define N 200
/* { dg-do compile } */
+/* { dg-require-effective-target float16 } */
/* { dg-add-options arm_v8_3a_fp16_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE _Float16
#define N 200
/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE double
#define N 200
/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE float
#define N 200
/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+/* { dg-require-effective-target float16 } */
/* { dg-add-options arm_v8_3a_fp16_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE _Float16
#define N 200
/* { dg-do compile } */
/* { dg-require-effective-target vect_complex_add_double } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE double
#define N 200
/* { dg-do compile } */
/* { dg-require-effective-target vect_complex_add_float } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE float
#define N 200
/* { dg-do compile } */
/* { dg-add-options arm_v8_3a_fp16_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE _Float16
#define N 200
/* { dg-do compile } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE double
#define N 200
/* { dg-do compile } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE float
#define N 200
/* { dg-do compile } */
/* { dg-add-options arm_v8_3a_fp16_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE _Float16
#define N 200
/* { dg-do compile } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE double
#define N 200
/* { dg-do compile } */
/* { dg-add-options arm_v8_3a_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE float
#define N 200
/* { dg-do compile } */
/* { dg-add-options arm_v8_3a_fp16_complex_neon } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
#define TYPE _Float16
#define N 200
/* { dg-do compile } */
/* { dg-require-effective-target stdint_types } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
#define TYPE int8_t
#define N 200
/* { dg-do compile } */
/* { dg-require-effective-target stdint_types } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
#define TYPE int32_t
#define N 200
/* { dg-do compile } */
/* { dg-require-effective-target stdint_types } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-require-effective-target vect_long } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
+
+#define UNROLL
#define TYPE int64_t
#define N 200
/* { dg-do compile } */
/* { dg-require-effective-target stdint_types } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
#define TYPE int16_t
#define N 200
/* { dg-do compile } */
/* { dg-require-effective-target stdint_types } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
#define TYPE uint8_t
#define N 200
/* { dg-do compile } */
/* { dg-require-effective-target stdint_types } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
#define TYPE uint32_t
#define N 200
/* { dg-do compile } */
/* { dg-require-effective-target stdint_types } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-require-effective-target vect_long } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
+
+#define UNROLL
#define TYPE uint64_t
#define N 200
/* { dg-do compile } */
/* { dg-require-effective-target stdint_types } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
#define TYPE uint16_t
#define N 200