[ARM,AArch64][testsuite] New Advanced SIMD intrinsics tests.
authorclyon <clyon@138bc75d-0d04-0410-961f-82ee72b054a4>
Sun, 8 Feb 2015 15:05:43 +0000 (15:05 +0000)
committerclyon <clyon@138bc75d-0d04-0410-961f-82ee72b054a4>
Sun, 8 Feb 2015 15:05:43 +0000 (15:05 +0000)
2015-02-04  Christophe Lyon  <christophe.lyon@linaro.org>

Backport from trunk r216640-r216661.
2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/vuzp.c: New file.
* gcc.target/aarch64/advsimd-intrinsics/vzip.c: Likewise.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/vmul.c: New file.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c: New file.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/vldX.c: New file.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c: New file.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: New file.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/vclz.c: New file.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/vbsl.c: New file.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/vaddw.c: New file.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/vaddl.c: New file.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/vaddhn.c: New file.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/vabdl.c: New file.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/vabd.c: New file.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/vabal.c: New file.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc: New
file.
* gcc.target/aarch64/advsimd-intrinsics/vqadd.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vqsub.c: Likewise.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/unary_sat_op.inc: New
file.
* gcc.target/aarch64/advsimd-intrinsics/vqabs.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vqneg.c: Likewise.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/cmp_fp_op.inc: New file.
* gcc.target/aarch64/advsimd-intrinsics/vcage.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vcagt.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vcale.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vcalt.c: Likewise.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/cmp_op.inc: New file.
* gcc.target/aarch64/advsimd-intrinsics/vceq.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vcge.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vcgt.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vcle.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vclt.c: Likewise.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/binary_op.inc: New file.
* gcc.target/aarch64/advsimd-intrinsics/vadd.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vand.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vbic.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/veor.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vorn.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vorr.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vsub.c: Likewise.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/aarch64/advsimd-intrinsics/unary_op.inc: New file.
* gcc.target/aarch64/advsimd-intrinsics/vabs.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vneg.c: Likewise.

2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>

* gcc.target/arm/README.advsimd-intrinsics: New file.
* gcc.target/aarch64/advsimd-intrinsics/README: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h:
Likewise.
* gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp:
Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vaba.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vld1.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vshl.c: Likewise.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_9-branch@220516 138bc75d-0d04-0410-961f-82ee72b054a4

52 files changed:
gcc/testsuite/ChangeLog.linaro
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/README [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op.inc [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_fp_op.inc [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_op.inc [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_op.inc [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_sat_op.inc [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaba.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabal.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabd.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabdl.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabs.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vadd.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddhn.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddl.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddw.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vand.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbic.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcage.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagt.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcale.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalt.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceq.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcge.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgt.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcle.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclt.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclz.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/veor.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vneg.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorn.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorr.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqabs.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqadd.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqneg.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqsub.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsub.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arm/README.advsimd-intrinsics [new file with mode: 0644]

index 176ab2e..a0e2d9f 100644 (file)
@@ -1,3 +1,124 @@
+2015-02-04  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       Backport from trunk r216640-r216661.
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/vuzp.c: New file.
+       * gcc.target/aarch64/advsimd-intrinsics/vzip.c: Likewise.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/vmul.c: New file.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c: New file.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/vldX.c: New file.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c: New file.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: New file.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/vclz.c: New file.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/vbsl.c: New file.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/vaddw.c: New file.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/vaddl.c: New file.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/vaddhn.c: New file.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/vabdl.c: New file.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/vabd.c: New file.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/vabal.c: New file.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc: New
+       file.
+       * gcc.target/aarch64/advsimd-intrinsics/vqadd.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vqsub.c: Likewise.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/unary_sat_op.inc: New
+       file.
+       * gcc.target/aarch64/advsimd-intrinsics/vqabs.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vqneg.c: Likewise.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/cmp_fp_op.inc: New file.
+       * gcc.target/aarch64/advsimd-intrinsics/vcage.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vcagt.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vcale.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vcalt.c: Likewise.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/cmp_op.inc: New file.
+       * gcc.target/aarch64/advsimd-intrinsics/vceq.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vcge.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vcgt.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vcle.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vclt.c: Likewise.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/binary_op.inc: New file.
+       * gcc.target/aarch64/advsimd-intrinsics/vadd.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vand.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vbic.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/veor.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vorn.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vorr.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vsub.c: Likewise.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/aarch64/advsimd-intrinsics/unary_op.inc: New file.
+       * gcc.target/aarch64/advsimd-intrinsics/vabs.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vneg.c: Likewise.
+
+       2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/arm/README.advsimd-intrinsics: New file.
+       * gcc.target/aarch64/advsimd-intrinsics/README: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h:
+       Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp:
+       Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vaba.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vld1.c: Likewise.
+       * gcc.target/aarch64/advsimd-intrinsics/vshl.c: Likewise.
+
 2015-02-05  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
 
        Backport from trunk r217230.
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/README b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/README
new file mode 100644 (file)
index 0000000..52c374c
--- /dev/null
@@ -0,0 +1,132 @@
+This directory contains executable tests for ARM/AArch64 Advanced SIMD
+(Neon) intrinsics.
+
+It is meant to cover execution cases of all the Advanced SIMD
+intrinsics, but does not scan the generated assembler code.
+
+The general framework is composed as follows:
+- advsimd-intrinsics.exp: main dejagnu driver
+- *.c: actual tests, generally one per intrinsinc family
+- arm-neon-ref.h: contains macro definitions to save typing in actual
+  test files
+- compute-ref-data.h: contains input vectors definitions
+- *.inc: generic tests, shared by several families of intrinsics. For
+   instance, unary or binary operators
+
+A typical .c test file starts with the following contents (look at
+vld1.c and vaba.c for sample cases):
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+Then, definitions of expected results, based on common input values,
+as defined in compute-ref-data.h.
+For example:
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x16, 0x17, 0x18, 0x19 };
+defines the expected results of an operator generating int16x4 values.
+
+The common input values defined in compute-ref-data.h have been chosen
+to avoid corner-case values for most operators, yet exposing negative
+values for signed operators. For this reason, their range is also
+limited. For instance, the initialization of buffer_int16x4 will be
+{ -16, -15, -14, -13 }.
+
+The initialization of floating-point values is done via hex notation,
+to avoid potential rounding problems.
+
+To test special values and corner cases, specific initialization
+values should be used in dedicated tests, to ensure proper coverage.
+An example of this is vshl.
+
+When a variant of an intrinsic is not available, its expected result
+should be defined to the value of CLEAN_PATTERN_8 as defined in
+arm-neon-ref.h. For example:
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+if the given intrinsic has no variant producing an int64x1 result,
+like the vcmp family (eg. vclt).
+
+This is because the helper function (check_results(), defined in
+arm-neon-ref.h), iterates over all the possible variants, to save
+typing in each individual test file. Alternatively, one can directly
+call the CHECK/CHECK_FP macros to check only a few expected results
+(see vabs.c for an example).
+
+Then, define the TEST_MSG string, which will be used when reporting errors.
+
+Next, define the function performing the actual tests, in general
+relying on the helpers provided by arm-neon-ref.h, which means:
+
+* declare necessary vectors of suitable types: using
+  DECL_VARIABLE_ALL_VARIANTS when all variants are supported, or the
+  relevant of subset calls to DECL_VARIABLE.
+
+* call clean_results() to initialize the 'results' buffers.
+
+* initialize the input vectors, using VLOAD, VDUP or VSET_LANE (vld*
+  tests do not need this step, since their actual purpose is to
+  initialize vectors).
+
+* execute the intrinsic on relevant variants, for instance using
+  TEST_MACRO_ALL_VARIANTS_2_5.
+
+* call check_results() to check that the results match the expected
+  values.
+
+A template test file could be:
+=================================================================
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf7, 0xf8, 0xf9,
+                                      0xfa, 0xfb, 0xfc, 0xfd };
+/* and as many others as necessary.  */
+
+#define TEST_MSG "VMYINTRINSIC"
+void exec_myintrinsic (void)
+{
+  /* my test: v4=vmyintrinsic(v1,v2,v3), then store the result.  */
+#define TEST_VMYINTR(Q, T1, T2, W, N)                                  \
+  VECT_VAR(vector_res, T1, W, N) =                                     \
+    vmyintr##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),                  \
+                        VECT_VAR(vector2, T1, W, N),                   \
+                        VECT_VAR(vector3, T1, W, N));                  \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define DECL_VMYINTR_VAR(VAR)                  \
+  DECL_VARIABLE(VAR, int, 8, 8);
+/* And as many others as necessary.  */
+
+  DECL_VMYINTR_VAR(vector1);
+  DECL_VMYINTR_VAR(vector2);
+  DECL_VMYINTR_VAR(vector3);
+  DECL_VMYINTR_VAR(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector1" from "buffer".  */
+  VLOAD(vector1, buffer, , int, s, 8, 8);
+/* And as many others as necessary.  */
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, , int, s, 8, 8, 1);
+/* And as many others as necessary.  */
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector3, , int, s, 8, 8, -5);
+/* And as many others as necessary.  */
+
+  /* Execute the tests.  */
+  TEST_VMYINTR(, int, s, 8, 8);
+/* And as many others as necessary.  */
+
+  check_results (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vmyintrinsic ();
+  return 0;
+}
+=================================================================
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
new file mode 100644 (file)
index 0000000..4250ae9
--- /dev/null
@@ -0,0 +1,54 @@
+# Copyright (C) 2014 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't an ARM or AArch64 target.
+if {![istarget arm*-*-*]
+    && ![istarget aarch64*-*-*]} then {
+  return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# Initialize `dg'.
+load_lib c-torture.exp
+load_lib target-supports.exp
+load_lib torture-options.exp
+
+dg-init
+
+torture-init
+set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS
+
+# Make sure Neon flags are provided, if necessary.
+set additional_flags [add_options_for_arm_neon ""]
+
+# Main loop.
+foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
+    # If we're only testing specific files and this isn't one of them, skip it.
+    if ![runtest_file_p $runtests $src] then {
+       continue
+    }
+
+    c-torture-execute $src $additional_flags
+    gcc-dg-runtest $src $additional_flags
+}
+
+# All done.
+torture-finish
+dg-finish
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
new file mode 100644 (file)
index 0000000..8ea1f26
--- /dev/null
@@ -0,0 +1,547 @@
+/* This file defines helper operations shared by all the tests.  */
+
+#ifndef _ARM_NEON_REF_H_
+#define _ARM_NEON_REF_H_
+
+#include <stdio.h>
+#include <inttypes.h>
+
+/* helper type, to help write floating point results in integer form.  */
+typedef uint32_t hfloat32_t;
+
+extern void abort(void);
+extern void *memset(void *, int, size_t);
+extern void *memcpy(void *, const void *, size_t);
+extern size_t strlen(const char *);
+
+/* Various string construction helpers.  */
+
+/*
+  The most useful at user-level are VECT_VAR and VECT_VAR_DECL, which
+   construct variable names or declarations, such as:
+   VECT_VAR(expected, int, 16, 4) -> expected_int16x4
+   VECT_VAR_DECL(expected, int, 16, 4) -> int16x4_t expected_int16x4
+*/
+
+#define xSTR(X) #X
+#define STR(X) xSTR(X)
+
+#define xNAME1(V,T) V ## _ ##  T
+#define xNAME(V,T) xNAME1(V,T)
+
+/* VAR(foo,int,16) -> foo_int16 */
+#define VAR(V,T,W) xNAME(V,T##W)
+/* VAR_DECL(foo,int,16) -> int16_t foo_int16 */
+#define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W)
+
+/* VECT_NAME(int,16,4) ->  int16x4 */
+#define VECT_NAME(T, W, N) T##W##x##N
+/* VECT_ARRAY_NAME(int,16,4,2) -> int16x4x2 */
+#define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L
+/* VECT_TYPE(int,16,4) -> int16x4_t */
+#define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t)
+/* VECT_ARRAY_TYPE(int,16,4,2) -> int16x4x2_t */
+#define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t)
+
+/* VECT_VAR(foo,int,16,4) -> foo_int16x4 */
+#define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N))
+/* VECT_VAR_DECL(foo,int,16,4) -> int16_t foo_int16x4 */
+#define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N)
+
+/* Array declarations.  */
+/* ARRAY(foo,int,16,4) -> int16_t foo_int16x4[4] */
+#define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N]
+
+/* Arrays of vectors.  */
+/* VECT_ARRAY_VAR(foo,int,16,4,2) -> foo_int16x4x2 */
+#define VECT_ARRAY_VAR(V,T,W,N,L) xNAME(V,VECT_ARRAY_NAME(T,W,N,L))
+/* VECT_ARRAY(foo,int,16,4,2) -> int16_t foo_int16x4x2[4*2] */
+#define VECT_ARRAY(V, T, W, N, L) T##W##_t VECT_ARRAY_VAR(V,T,W,N,L)[N*L]
+
+/* Check results vs expected values. Operates on one vector.  */
+#define CHECK(MSG,T,W,N,FMT,EXPECTED,COMMENT)                          \
+  {                                                                    \
+    int i;                                                             \
+    for(i=0; i<N ; i++)                                                        \
+      {                                                                        \
+       if (VECT_VAR(result, T, W, N)[i] !=                             \
+           VECT_VAR(EXPECTED, T, W, N)[i]) {                           \
+         fprintf(stderr,                                               \
+                 "ERROR in %s (%s line %d in buffer '%s') at type %s " \
+                 "index %d: got 0x%" FMT " != 0x%" FMT " %s\n",        \
+                 MSG, __FILE__, __LINE__,                              \
+                 STR(EXPECTED),                                        \
+                 STR(VECT_NAME(T, W, N)),                              \
+                 i,                                                    \
+                 VECT_VAR(result, T, W, N)[i],                         \
+                 VECT_VAR(EXPECTED, T, W, N)[i],                       \
+                 strlen(COMMENT) > 0 ? COMMENT : "");                  \
+         abort();                                                      \
+       }                                                               \
+      }                                                                        \
+  }
+
+/* Floating-point variant.  */
+#define CHECK_FP(MSG,T,W,N,FMT,EXPECTED,COMMENT)                       \
+  {                                                                    \
+    int i;                                                             \
+    for(i=0; i<N ; i++)                                                        \
+      {                                                                        \
+       union fp_operand {                                              \
+         uint##W##_t i;                                                \
+         float##W##_t f;                                               \
+       } tmp_res, tmp_exp;                                             \
+       tmp_res.f = VECT_VAR(result, T, W, N)[i];                       \
+       tmp_exp.i = VECT_VAR(EXPECTED, h##T, W, N)[i];                  \
+       if (tmp_res.i != tmp_exp.i) {                                   \
+         fprintf(stderr,                                               \
+                 "ERROR in %s (%s line %d in buffer '%s') at type %s " \
+                 "index %d: got 0x%" FMT " != 0x%" FMT " %s\n",        \
+                 MSG, __FILE__, __LINE__,                              \
+                 STR(EXPECTED),                                        \
+                 STR(VECT_NAME(T, W, N)),                              \
+                 i,                                                    \
+                 tmp_res.i,                                            \
+                 tmp_exp.i,                                            \
+                 strlen(COMMENT) > 0 ? COMMENT : "");                  \
+         abort();                                                      \
+       }                                                               \
+      }                                                                        \
+  }
+
+/* Clean buffer with a non-zero pattern to help diagnose buffer
+   overflows.  */
+#define CLEAN_PATTERN_8  0x33
+
+#define CLEAN(VAR,T,W,N)                                               \
+  memset(VECT_VAR(VAR, T, W, N),                                       \
+        CLEAN_PATTERN_8,                                               \
+        sizeof(VECT_VAR(VAR, T, W, N)));
+
+/* Define output buffers, one of each size.  */
+static ARRAY(result, int, 8, 8);
+static ARRAY(result, int, 16, 4);
+static ARRAY(result, int, 32, 2);
+static ARRAY(result, int, 64, 1);
+static ARRAY(result, uint, 8, 8);
+static ARRAY(result, uint, 16, 4);
+static ARRAY(result, uint, 32, 2);
+static ARRAY(result, uint, 64, 1);
+static ARRAY(result, poly, 8, 8);
+static ARRAY(result, poly, 16, 4);
+static ARRAY(result, float, 32, 2);
+static ARRAY(result, int, 8, 16);
+static ARRAY(result, int, 16, 8);
+static ARRAY(result, int, 32, 4);
+static ARRAY(result, int, 64, 2);
+static ARRAY(result, uint, 8, 16);
+static ARRAY(result, uint, 16, 8);
+static ARRAY(result, uint, 32, 4);
+static ARRAY(result, uint, 64, 2);
+static ARRAY(result, poly, 8, 16);
+static ARRAY(result, poly, 16, 8);
+static ARRAY(result, float, 32, 4);
+
+/* Declare expected results, one of each size. They are defined and
+   initialized in each test file.  */
+extern ARRAY(expected, int, 8, 8);
+extern ARRAY(expected, int, 16, 4);
+extern ARRAY(expected, int, 32, 2);
+extern ARRAY(expected, int, 64, 1);
+extern ARRAY(expected, uint, 8, 8);
+extern ARRAY(expected, uint, 16, 4);
+extern ARRAY(expected, uint, 32, 2);
+extern ARRAY(expected, uint, 64, 1);
+extern ARRAY(expected, poly, 8, 8);
+extern ARRAY(expected, poly, 16, 4);
+extern ARRAY(expected, hfloat, 32, 2);
+extern ARRAY(expected, int, 8, 16);
+extern ARRAY(expected, int, 16, 8);
+extern ARRAY(expected, int, 32, 4);
+extern ARRAY(expected, int, 64, 2);
+extern ARRAY(expected, uint, 8, 16);
+extern ARRAY(expected, uint, 16, 8);
+extern ARRAY(expected, uint, 32, 4);
+extern ARRAY(expected, uint, 64, 2);
+extern ARRAY(expected, poly, 8, 16);
+extern ARRAY(expected, poly, 16, 8);
+extern ARRAY(expected, hfloat, 32, 4);
+
+/* Check results. Operates on all possible vector types.  */
+#define CHECK_RESULTS(test_name,comment)                               \
+  {                                                                    \
+    CHECK(test_name, int, 8, 8, PRIx8, expected, comment);             \
+    CHECK(test_name, int, 16, 4, PRIx16, expected, comment);           \
+    CHECK(test_name, int, 32, 2, PRIx32, expected, comment);           \
+    CHECK(test_name, int, 64, 1, PRIx64, expected, comment);           \
+    CHECK(test_name, uint, 8, 8, PRIx8, expected, comment);            \
+    CHECK(test_name, uint, 16, 4, PRIx16, expected, comment);          \
+    CHECK(test_name, uint, 32, 2, PRIx32, expected, comment);          \
+    CHECK(test_name, uint, 64, 1, PRIx64, expected, comment);          \
+    CHECK(test_name, poly, 8, 8, PRIx8, expected, comment);            \
+    CHECK(test_name, poly, 16, 4, PRIx16, expected, comment);          \
+    CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment);      \
+                                                                       \
+    CHECK(test_name, int, 8, 16, PRIx8, expected, comment);            \
+    CHECK(test_name, int, 16, 8, PRIx16, expected, comment);           \
+    CHECK(test_name, int, 32, 4, PRIx32, expected, comment);           \
+    CHECK(test_name, int, 64, 2, PRIx64, expected, comment);           \
+    CHECK(test_name, uint, 8, 16, PRIx8, expected, comment);           \
+    CHECK(test_name, uint, 16, 8, PRIx16, expected, comment);          \
+    CHECK(test_name, uint, 32, 4, PRIx32, expected, comment);          \
+    CHECK(test_name, uint, 64, 2, PRIx64, expected, comment);          \
+    CHECK(test_name, poly, 8, 16, PRIx8, expected, comment);           \
+    CHECK(test_name, poly, 16, 8, PRIx16, expected, comment);          \
+    CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment);      \
+  }                                                                    \
+
+#define CHECK_RESULTS_NAMED(test_name,EXPECTED,comment)                        \
+  {                                                                    \
+    CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment);             \
+    CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment);           \
+    CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment);           \
+    CHECK(test_name, int, 64, 1, PRIx64, EXPECTED, comment);           \
+    CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment);            \
+    CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment);          \
+    CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment);          \
+    CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment);          \
+    CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment);            \
+    CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment);          \
+    CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment);      \
+                                                                       \
+    CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment);            \
+    CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment);           \
+    CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment);           \
+    CHECK(test_name, int, 64, 2, PRIx64, EXPECTED, comment);           \
+    CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment);           \
+    CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment);          \
+    CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment);          \
+    CHECK(test_name, uint, 64, 2, PRIx64, EXPECTED, comment);          \
+    CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment);           \
+    CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment);          \
+    CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment);      \
+  }                                                                    \
+
+
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+
+typedef union {
+  struct {
+    int _xxx:27;
+    unsigned int QC:1;
+    int V:1;
+    int C:1;
+    int Z:1;
+    int N:1;
+  } b;
+  unsigned int word;
+} _ARM_FPSCR;
+
+#else /* __ORDER_BIG_ENDIAN__ */
+
+typedef union {
+  struct {
+    int N:1;
+    int Z:1;
+    int C:1;
+    int V:1;
+    unsigned int QC:1;
+    int _dnm:27;
+  } b;
+  unsigned int word;
+} _ARM_FPSCR;
+
+#endif /* __ORDER_BIG_ENDIAN__ */
+
+#define Neon_Cumulative_Sat  __read_neon_cumulative_sat()
+#define Set_Neon_Cumulative_Sat(x)  __set_neon_cumulative_sat((x))
+
+#if defined(__aarch64__)
+static volatile int __read_neon_cumulative_sat (void) {
+    _ARM_FPSCR _afpscr_for_qc;
+    asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc));
+    return _afpscr_for_qc.b.QC;
+}
+static void __set_neon_cumulative_sat (int x) {
+    _ARM_FPSCR _afpscr_for_qc;
+    asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc));
+    _afpscr_for_qc.b.QC = x;
+    asm volatile ("msr fpsr,%0" : : "r" (_afpscr_for_qc));
+    return;
+}
+#else
+static volatile int __read_neon_cumulative_sat (void) {
+    _ARM_FPSCR _afpscr_for_qc;
+    asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc));
+    return _afpscr_for_qc.b.QC;
+}
+
+static void __set_neon_cumulative_sat (int x) {
+    _ARM_FPSCR _afpscr_for_qc;
+    asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc));
+    _afpscr_for_qc.b.QC = x;
+    asm volatile ("vmsr fpscr,%0" : : "r" (_afpscr_for_qc));
+    return;
+}
+#endif
+
+/* Declare expected cumulative saturation results, one for each
+   size. They are defined and initialized in relevant test files.  */
+extern int VECT_VAR(expected_cumulative_sat, int, 8, 8);
+extern int VECT_VAR(expected_cumulative_sat, int, 16, 4);
+extern int VECT_VAR(expected_cumulative_sat, int, 32, 2);
+extern int VECT_VAR(expected_cumulative_sat, int, 64, 1);
+extern int VECT_VAR(expected_cumulative_sat, uint, 8, 8);
+extern int VECT_VAR(expected_cumulative_sat, uint, 16, 4);
+extern int VECT_VAR(expected_cumulative_sat, uint, 32, 2);
+extern int VECT_VAR(expected_cumulative_sat, uint, 64, 1);
+extern int VECT_VAR(expected_cumulative_sat, int, 8, 16);
+extern int VECT_VAR(expected_cumulative_sat, int, 16, 8);
+extern int VECT_VAR(expected_cumulative_sat, int, 32, 4);
+extern int VECT_VAR(expected_cumulative_sat, int, 64, 2);
+extern int VECT_VAR(expected_cumulative_sat, uint, 8, 16);
+extern int VECT_VAR(expected_cumulative_sat, uint, 16, 8);
+extern int VECT_VAR(expected_cumulative_sat, uint, 32, 4);
+extern int VECT_VAR(expected_cumulative_sat, uint, 64, 2);
+
+/* Check cumulative saturation flag vs expected value.  */
+#define CHECK_CUMULATIVE_SAT(MSG,T,W,N,EXPECTED,COMMENT)               \
+  {                                                                    \
+    if (Neon_Cumulative_Sat !=                                         \
+       VECT_VAR(EXPECTED, T, W, N)) {                                  \
+      fprintf(stderr,                                                  \
+             "ERROR in %s (%s line %d in cumulative_sat '%s') at type %s: " \
+             "got %d expected %d%s\n",                                 \
+             MSG, __FILE__, __LINE__,                                  \
+             STR(EXPECTED),                                            \
+             STR(VECT_NAME(T, W, N)),                                  \
+             Neon_Cumulative_Sat,                                      \
+             VECT_VAR(EXPECTED, T, W, N),                              \
+             strlen(COMMENT) > 0 ? " " COMMENT : "");                  \
+      abort();                                                         \
+    }                                                                  \
+  }
+
+#define CHECK_CUMULATIVE_SAT_NAMED(test_name,EXPECTED,comment)         \
+  {                                                                    \
+    CHECK_CUMULATIVE_SAT(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, int, 32, 2, PRIx32, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, int, 64, 1, PRIx64, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, uint, 8, 8, PRIx8, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \
+                                                                       \
+    CHECK_CUMULATIVE_SAT(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, int, 64, 2, PRIx64, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, uint, 8, 16, PRIx8, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, uint, 64, 2, PRIx64, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
+  }                                                                    \
+
+
+/* Clean output buffers before execution.  */
+static void clean_results (void)
+{
+  CLEAN(result, int, 8, 8);
+  CLEAN(result, int, 16, 4);
+  CLEAN(result, int, 32, 2);
+  CLEAN(result, int, 64, 1);
+  CLEAN(result, uint, 8, 8);
+  CLEAN(result, uint, 16, 4);
+  CLEAN(result, uint, 32, 2);
+  CLEAN(result, uint, 64, 1);
+  CLEAN(result, poly, 8, 8);
+  CLEAN(result, poly, 16, 4);
+  CLEAN(result, float, 32, 2);
+
+  CLEAN(result, int, 8, 16);
+  CLEAN(result, int, 16, 8);
+  CLEAN(result, int, 32, 4);
+  CLEAN(result, int, 64, 2);
+  CLEAN(result, uint, 8, 16);
+  CLEAN(result, uint, 16, 8);
+  CLEAN(result, uint, 32, 4);
+  CLEAN(result, uint, 64, 2);
+  CLEAN(result, poly, 8, 16);
+  CLEAN(result, poly, 16, 8);
+  CLEAN(result, float, 32, 4);
+}
+
+
+/* Helpers to declare variables of various types.   */
+#define DECL_VARIABLE(VAR, T1, W, N)           \
+  VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
+
+/* Declare only 64 bits signed variants.  */
+#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR)      \
+  DECL_VARIABLE(VAR, int, 8, 8);                       \
+  DECL_VARIABLE(VAR, int, 16, 4);                      \
+  DECL_VARIABLE(VAR, int, 32, 2);                      \
+  DECL_VARIABLE(VAR, int, 64, 1)
+
+/* Declare only 64 bits unsigned variants.  */
+#define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR)    \
+  DECL_VARIABLE(VAR, uint, 8, 8);                      \
+  DECL_VARIABLE(VAR, uint, 16, 4);                     \
+  DECL_VARIABLE(VAR, uint, 32, 2);                     \
+  DECL_VARIABLE(VAR, uint, 64, 1)
+
+/* Declare only 128 bits signed variants.  */
+#define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)     \
+  DECL_VARIABLE(VAR, int, 8, 16);                      \
+  DECL_VARIABLE(VAR, int, 16, 8);                      \
+  DECL_VARIABLE(VAR, int, 32, 4);                      \
+  DECL_VARIABLE(VAR, int, 64, 2)
+
+/* Declare only 128 bits unsigned variants.  */
+#define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)   \
+  DECL_VARIABLE(VAR, uint, 8, 16);                     \
+  DECL_VARIABLE(VAR, uint, 16, 8);                     \
+  DECL_VARIABLE(VAR, uint, 32, 4);                     \
+  DECL_VARIABLE(VAR, uint, 64, 2)
+
+/* Declare all 64 bits variants.  */
+#define DECL_VARIABLE_64BITS_VARIANTS(VAR)     \
+  DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR);   \
+  DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
+  DECL_VARIABLE(VAR, poly, 8, 8);              \
+  DECL_VARIABLE(VAR, poly, 16, 4);             \
+  DECL_VARIABLE(VAR, float, 32, 2)
+
+/* Declare all 128 bits variants.  */
+#define DECL_VARIABLE_128BITS_VARIANTS(VAR)    \
+  DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR);  \
+  DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR);        \
+  DECL_VARIABLE(VAR, poly, 8, 16);             \
+  DECL_VARIABLE(VAR, poly, 16, 8);             \
+  DECL_VARIABLE(VAR, float, 32, 4)
+
+/* Declare all variants.  */
+#define DECL_VARIABLE_ALL_VARIANTS(VAR)                \
+  DECL_VARIABLE_64BITS_VARIANTS(VAR);          \
+  DECL_VARIABLE_128BITS_VARIANTS(VAR)
+
+/* Declare all signed variants.  */
+#define DECL_VARIABLE_SIGNED_VARIANTS(VAR)     \
+  DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR);   \
+  DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)
+
+/* Declare all unsigned variants.  */
+#define DECL_VARIABLE_UNSIGNED_VARIANTS(VAR)   \
+  DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
+  DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)
+
+/* Helpers to initialize vectors.  */
+#define VDUP(VAR, Q, T1, T2, W, N, V)                  \
+  VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
+
+#define VSET_LANE(VAR, Q, T1, T2, W, N, L, V)                          \
+  VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V,                  \
+                                                  VECT_VAR(VAR, T1, W, N), \
+                                                  L)
+
+/* We need to load initial values first, so rely on VLD1.  */
+#define VLOAD(VAR, BUF, Q, T1, T2, W, N)                               \
+  VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N))
+
+/* Helpers to call macros with 1 constant and 5 variable
+   arguments.  */
+#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)      \
+  MACRO(VAR, , int, s, 8, 8);                                  \
+  MACRO(VAR, , int, s, 16, 4);                                 \
+  MACRO(VAR, , int, s, 32, 2);                                 \
+  MACRO(VAR, , int, s, 64, 1)
+
+#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)    \
+  MACRO(VAR, , uint, u, 8, 8);                                 \
+  MACRO(VAR, , uint, u, 16, 4);                                        \
+  MACRO(VAR, , uint, u, 32, 2);                                        \
+  MACRO(VAR, , uint, u, 64, 1)
+
+#define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)     \
+  MACRO(VAR, q, int, s, 8, 16);                                        \
+  MACRO(VAR, q, int, s, 16, 8);                                        \
+  MACRO(VAR, q, int, s, 32, 4);                                        \
+  MACRO(VAR, q, int, s, 64, 2)
+
+#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR)    \
+  MACRO(VAR, q, uint, u, 8, 16);                               \
+  MACRO(VAR, q, uint, u, 16, 8);                               \
+  MACRO(VAR, q, uint, u, 32, 4);                               \
+  MACRO(VAR, q, uint, u, 64, 2)
+
+#define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR)     \
+  TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);   \
+  TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
+
+#define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)    \
+  TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);  \
+  TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
+
+#define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR)        \
+  TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR);  \
+  TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)
+
+#define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR)     \
+  TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);   \
+  TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)
+
+/* Helpers to call macros with 2 constant and 5 variable
+   arguments.  */
+#define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)       \
+  MACRO(VAR1, VAR2, , int, s, 8, 8);                                   \
+  MACRO(VAR1, VAR2, , int, s, 16, 4);                                  \
+  MACRO(VAR1, VAR2, , int, s, 32, 2);                                  \
+  MACRO(VAR1, VAR2 , , int, s, 64, 1)
+
+#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)     \
+  MACRO(VAR1, VAR2, , uint, u, 8, 8);                                  \
+  MACRO(VAR1, VAR2, , uint, u, 16, 4);                                 \
+  MACRO(VAR1, VAR2, , uint, u, 32, 2);                                 \
+  MACRO(VAR1, VAR2, , uint, u, 64, 1)
+
+#define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)      \
+  MACRO(VAR1, VAR2, q, int, s, 8, 16);                                 \
+  MACRO(VAR1, VAR2, q, int, s, 16, 8);                                 \
+  MACRO(VAR1, VAR2, q, int, s, 32, 4);                                 \
+  MACRO(VAR1, VAR2, q, int, s, 64, 2)
+
+#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)    \
+  MACRO(VAR1, VAR2, q, uint, u, 8, 16);                                        \
+  MACRO(VAR1, VAR2, q, uint, u, 16, 8);                                        \
+  MACRO(VAR1, VAR2, q, uint, u, 32, 4);                                        \
+  MACRO(VAR1, VAR2, q, uint, u, 64, 2)
+
+#define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)      \
+  TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);    \
+  TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);  \
+  MACRO(VAR1, VAR2, , poly, p, 8, 8);                          \
+  MACRO(VAR1, VAR2, , poly, p, 16, 4)
+
+#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)     \
+  TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);   \
+  TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
+  MACRO(VAR1, VAR2, q, poly, p, 8, 16);                                \
+  MACRO(VAR1, VAR2, q, poly, p, 16, 8)
+
+#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \
+  TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2);   \
+  TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)
+
+#define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)      \
+  TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);    \
+  TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
+
+#endif /* _ARM_NEON_REF_H_ */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op.inc
new file mode 100644 (file)
index 0000000..3483e0e
--- /dev/null
@@ -0,0 +1,70 @@
+/* Template file for binary operator validation.
+
+   This file is meant to be included by the relevant test files, which
+   have to define the intrinsic family to test. If a given intrinsic
+   supports variants which are not supported by all the other binary
+   operators, these can be tested by providing a definition for
+   EXTRA_TESTS.  */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* Basic test: y=OP(x1,x2), then store the result.  */
+#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)                         \
+  VECT_VAR(vector_res, T1, W, N) =                                     \
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),                      \
+                     VECT_VAR(vector2, T1, W, N));                     \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N)                          \
+  TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)                               \
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+
+  /* Fill input vector2 with arbitrary values.  */
+  VDUP(vector2, , int, s, 8, 8, 2);
+  VDUP(vector2, , int, s, 16, 4, -4);
+  VDUP(vector2, , int, s, 32, 2, 3);
+  VDUP(vector2, , int, s, 64, 1, 100);
+  VDUP(vector2, , uint, u, 8, 8, 20);
+  VDUP(vector2, , uint, u, 16, 4, 30);
+  VDUP(vector2, , uint, u, 32, 2, 40);
+  VDUP(vector2, , uint, u, 64, 1, 2);
+  VDUP(vector2, q, int, s, 8, 16, -10);
+  VDUP(vector2, q, int, s, 16, 8, -20);
+  VDUP(vector2, q, int, s, 32, 4, -30);
+  VDUP(vector2, q, int, s, 64, 2, 24);
+  VDUP(vector2, q, uint, u, 8, 16, 12);
+  VDUP(vector2, q, uint, u, 16, 8, 3);
+  VDUP(vector2, q, uint, u, 32, 4, 55);
+  VDUP(vector2, q, uint, u, 64, 2, 3);
+
+  /* Apply a binary operator named INSN_NAME.  */
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_BINARY_OP, INSN_NAME);
+
+  CHECK_RESULTS (TEST_MSG, "");
+
+#ifdef EXTRA_TESTS
+  EXTRA_TESTS();
+#endif
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc
new file mode 100644 (file)
index 0000000..35d7701
--- /dev/null
@@ -0,0 +1,91 @@
+/* Template file for saturating binary operator validation.
+
+   This file is meant to be included by the relevant test files, which
+   have to define the intrinsic family to test. If a given intrinsic
+   supports variants which are not supported by all the other
+   saturating binary operators, these can be tested by providing a
+   definition for EXTRA_TESTS.  */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* vector_res = OP(vector1,vector2), then store the result.  */
+
+#define TEST_BINARY_SAT_OP1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+  Set_Neon_Cumulative_Sat(0);                                          \
+  VECT_VAR(vector_res, T1, W, N) =                                     \
+    INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),                     \
+                     VECT_VAR(vector2, T1, W, N));                     \
+    vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),                      \
+                     VECT_VAR(vector_res, T1, W, N));                  \
+      CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_BINARY_SAT_OP(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_BINARY_SAT_OP1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  DECL_VARIABLE_ALL_VARIANTS(vector1);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector1" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+
+  /* Choose arbitrary initialization values.  */
+  VDUP(vector2, , int, s, 8, 8, 0x11);
+  VDUP(vector2, , int, s, 16, 4, 0x22);
+  VDUP(vector2, , int, s, 32, 2, 0x33);
+  VDUP(vector2, , int, s, 64, 1, 0x44);
+  VDUP(vector2, , uint, u, 8, 8, 0x55);
+  VDUP(vector2, , uint, u, 16, 4, 0x66);
+  VDUP(vector2, , uint, u, 32, 2, 0x77);
+  VDUP(vector2, , uint, u, 64, 1, 0x88);
+
+  VDUP(vector2, q, int, s, 8, 16, 0x11);
+  VDUP(vector2, q, int, s, 16, 8, 0x22);
+  VDUP(vector2, q, int, s, 32, 4, 0x33);
+  VDUP(vector2, q, int, s, 64, 2, 0x44);
+  VDUP(vector2, q, uint, u, 8, 16, 0x55);
+  VDUP(vector2, q, uint, u, 16, 8, 0x66);
+  VDUP(vector2, q, uint, u, 32, 4, 0x77);
+  VDUP(vector2, q, uint, u, 64, 2, 0x88);
+
+  /* Apply a saturating binary operator named INSN_NAME.  */
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 8, 8, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 16, 4, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 32, 2, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 8, 8, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 16, 4, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 32, 2, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1, expected_cumulative_sat, "");
+
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 8, 16, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 16, 8, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 32, 4, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 8, 16, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 16, 8, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 32, 4, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2, expected_cumulative_sat, "");
+
+  CHECK_RESULTS (TEST_MSG, "");
+
+#ifdef EXTRA_TESTS
+  EXTRA_TESTS();
+#endif
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_fp_op.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_fp_op.inc
new file mode 100644 (file)
index 0000000..33451d7
--- /dev/null
@@ -0,0 +1,75 @@
+/* Template file for the validation of comparison operator with
+   floating-point support.
+
+   This file is meant to be included by the relevant test files, which
+   have to define the intrinsic family to test. If a given intrinsic
+   supports variants which are not supported by all the other
+   operators, these can be tested by providing a definition for
+   EXTRA_TESTS.  */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Additional expected results declaration, they are initialized in
+   each test file.  */
+extern ARRAY(expected2, uint, 32, 2);
+extern ARRAY(expected2, uint, 32, 4);
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* Basic test: y=vcomp(x1,x2), then store the result.  */
+#define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)                         \
+  VECT_VAR(vector_res, T3, W, N) =                                     \
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),                      \
+                     VECT_VAR(vector2, T1, W, N));                     \
+  vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vector_res, T3, W, N))
+
+#define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N)                          \
+  TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)
+
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+  DECL_VARIABLE(vector2, float, 32, 2);
+  DECL_VARIABLE(vector2, float, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  VLOAD(vector, buffer, , float, f, 32, 2);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Choose init value arbitrarily, will be used for vector
+     comparison.  */
+  VDUP(vector2, , float, f, 32, 2, -16.0f);
+  VDUP(vector2, q, float, f, 32, 4, -14.0f);
+
+  /* Apply operator named INSN_NAME.  */
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
+
+  TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4);
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+
+  /* Test again, with different input values.  */
+  VDUP(vector2, , float, f, 32, 2, -10.0f);
+  VDUP(vector2, q, float, f, 32, 4, 10.0f);
+
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected2, "");
+
+  TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4);
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected2,"");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_op.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_op.inc
new file mode 100644 (file)
index 0000000..a09c5f5
--- /dev/null
@@ -0,0 +1,224 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+#include <math.h>
+
+/* Additional expected results declaration, they are initialized in
+   each test file.  */
+extern ARRAY(expected_uint, uint, 8, 8);
+extern ARRAY(expected_uint, uint, 16, 4);
+extern ARRAY(expected_uint, uint, 32, 2);
+extern ARRAY(expected_q_uint, uint, 8, 16);
+extern ARRAY(expected_q_uint, uint, 16, 8);
+extern ARRAY(expected_q_uint, uint, 32, 4);
+extern ARRAY(expected_float, uint, 32, 2);
+extern ARRAY(expected_q_float, uint, 32, 4);
+extern ARRAY(expected_uint2, uint, 32, 2);
+extern ARRAY(expected_uint3, uint, 32, 2);
+extern ARRAY(expected_uint4, uint, 32, 2);
+extern ARRAY(expected_nan, uint, 32, 2);
+extern ARRAY(expected_mnan, uint, 32, 2);
+extern ARRAY(expected_nan2, uint, 32, 2);
+extern ARRAY(expected_inf, uint, 32, 2);
+extern ARRAY(expected_minf, uint, 32, 2);
+extern ARRAY(expected_inf2, uint, 32, 2);
+extern ARRAY(expected_mzero, uint, 32, 2);
+extern ARRAY(expected_p8, uint, 8, 8);
+extern ARRAY(expected_q_p8, uint, 8, 16);
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* Basic test: y=vcomp(x1,x2), then store the result.  */
+#define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)                         \
+  VECT_VAR(vector_res, T3, W, N) =                                     \
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),                      \
+                     VECT_VAR(vector2, T1, W, N));                     \
+  vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vector_res, T3, W, N))
+
+#define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N)                          \
+  TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)
+
+  /* No need for 64 bits elements.  */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, uint, 8, 16);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector2, int, 8, 8);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, uint, 8, 8);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+  DECL_VARIABLE(vector2, float, 32, 2);
+  DECL_VARIABLE(vector2, int, 8, 16);
+  DECL_VARIABLE(vector2, int, 16, 8);
+  DECL_VARIABLE(vector2, int, 32, 4);
+  DECL_VARIABLE(vector2, uint, 8, 16);
+  DECL_VARIABLE(vector2, uint, 16, 8);
+  DECL_VARIABLE(vector2, uint, 32, 4);
+  DECL_VARIABLE(vector2, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 8, 16);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  clean_results ();
+
+  /* There is no 64 bits variant, don't use the generic initializer.  */
+  VLOAD(vector, buffer, , int, s, 8, 8);
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, , uint, u, 8, 8);
+  VLOAD(vector, buffer, , uint, u, 16, 4);
+  VLOAD(vector, buffer, , uint, u, 32, 2);
+  VLOAD(vector, buffer, , float, f, 32, 2);
+
+  VLOAD(vector, buffer, q, int, s, 8, 16);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 8, 16);
+  VLOAD(vector, buffer, q, uint, u, 16, 8);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Choose init value arbitrarily, will be used for vector
+     comparison.  */
+  VDUP(vector2, , int, s, 8, 8, -10);
+  VDUP(vector2, , int, s, 16, 4, -14);
+  VDUP(vector2, , int, s, 32, 2, -16);
+  VDUP(vector2, , uint, u, 8, 8, 0xF3);
+  VDUP(vector2, , uint, u, 16, 4, 0xFFF2);
+  VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF1);
+  VDUP(vector2, , float, f, 32, 2, -15.0f);
+
+  VDUP(vector2, q, int, s, 8, 16, -4);
+  VDUP(vector2, q, int, s, 16, 8, -10);
+  VDUP(vector2, q, int, s, 32, 4, -14);
+  VDUP(vector2, q, uint, u, 8, 16, 0xF4);
+  VDUP(vector2, q, uint, u, 16, 8, 0xFFF6);
+  VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFF2);
+  VDUP(vector2, q, float, f, 32, 4, -14.0f);
+
+  /* The comparison operators produce only unsigned results, which
+     means that our tests with uint* inputs write their results in the
+     same vectors as the int* variants. As a consequence, we have to
+     execute and test the int* first, then the uint* ones.
+     Same thing for float and poly8.
+  */
+
+  /* Apply operator named INSN_NAME.  */
+  TEST_VCOMP(INSN_NAME, , int, s, uint, 8, 8);
+  TEST_VCOMP(INSN_NAME, , int, s, uint, 16, 4);
+  TEST_VCOMP(INSN_NAME, , int, s, uint, 32, 2);
+  TEST_VCOMP(INSN_NAME, q, int, s, uint, 8, 16);
+  TEST_VCOMP(INSN_NAME, q, int, s, uint, 16, 8);
+  TEST_VCOMP(INSN_NAME, q, int, s, uint, 32, 4);
+
+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, "");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+
+  /* Now the uint* variants.  */
+  TEST_VCOMP(INSN_NAME, , uint, u, uint, 8, 8);
+  TEST_VCOMP(INSN_NAME, , uint, u, uint, 16, 4);
+  TEST_VCOMP(INSN_NAME, , uint, u, uint, 32, 2);
+  TEST_VCOMP(INSN_NAME, q, uint, u, uint, 8, 16);
+  TEST_VCOMP(INSN_NAME, q, uint, u, uint, 16, 8);
+  TEST_VCOMP(INSN_NAME, q, uint, u, uint, 32, 4);
+
+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_uint, "");
+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_uint, "");
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_uint, "");
+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_q_uint, "");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_q_uint, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_q_uint, "");
+
+  /* The float variants.  */
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_float, "");
+
+  TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4);
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_q_float, "");
+
+  /* Some "special" input values to test some corner cases.  */
+  /* Extra tests to have 100% coverage on all the variants.  */
+  VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0);
+  TEST_VCOMP(INSN_NAME, , uint, u, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_uint2, "uint 0xfffffff0");
+
+  VDUP(vector2, , int, s, 32, 2, -15);
+  TEST_VCOMP(INSN_NAME, , int, s, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_uint3, "int -15");
+
+  VDUP(vector2, , float, f, 32, 2, -16.0f);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_uint4, "float -16.0f");
+
+
+  /* Extra FP tests with special values (NaN, ....).  */
+  VDUP(vector, , float, f, 32, 2, 1.0);
+  VDUP(vector2, , float, f, 32, 2, NAN);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_nan, "FP special (NaN)");
+
+  VDUP(vector, , float, f, 32, 2, 1.0);
+  VDUP(vector2, , float, f, 32, 2, -NAN);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_mnan, " FP special (-NaN)");
+
+  VDUP(vector, , float, f, 32, 2, NAN);
+  VDUP(vector2, , float, f, 32, 2, 1.0);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_nan2, " FP special (NaN)");
+
+  VDUP(vector, , float, f, 32, 2, 1.0);
+  VDUP(vector2, , float, f, 32, 2, HUGE_VALF);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_inf, " FP special (inf)");
+
+  VDUP(vector, , float, f, 32, 2, 1.0);
+  VDUP(vector2, , float, f, 32, 2, -HUGE_VALF);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_minf, " FP special (-inf)");
+
+  VDUP(vector, , float, f, 32, 2, HUGE_VALF);
+  VDUP(vector2, , float, f, 32, 2, 1.0);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_inf2, " FP special (inf)");
+
+  VDUP(vector, , float, f, 32, 2, -0.0);
+  VDUP(vector2, , float, f, 32, 2, 0.0);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_mzero, " FP special (-0.0)");
+
+#ifdef EXTRA_TESTS
+  EXTRA_TESTS();
+#endif
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h
new file mode 100644 (file)
index 0000000..13824d4
--- /dev/null
@@ -0,0 +1,193 @@
+/* This file contains input data static definitions, shared by most of
+   the tests.  */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+
+/* Initialization helpers; 4 slices are needed for vld2, vld3 and
+   vld4.  */
+#define MY_INIT_TABLE(T,W,N) xNAME(INIT_TABLE,N)(T##W##_t)
+#define MY_INIT_TABLE2(T,W,N) xNAME(INIT_TABLE2,N)(T##W##_t)
+#define MY_INIT_TABLE3(T,W,N) xNAME(INIT_TABLE3,N)(T##W##_t)
+#define MY_INIT_TABLE4(T,W,N) xNAME(INIT_TABLE4,N)(T##W##_t)
+
+/* Initialized input buffers.  */
+#define VECT_VAR_DECL_INIT(V, T, W, N)                 \
+  VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TABLE(T,W,N) }
+
+/* Specialized initializer with 4 entries, as used by vldX_dup and
+   vdup tests, which iterate 4 times on input buffers.  */
+#define VECT_VAR_DECL_INIT4(V, T, W, N)                        \
+  VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TABLE(T,W,4) };
+
+/* Initializers for arrays of vectors.  */
+#define VECT_ARRAY_INIT2(V, T, W, N)           \
+  T##W##_t VECT_ARRAY_VAR(V,T,W,N,2)[] =       \
+  { MY_INIT_TABLE(T,W,N)                       \
+    MY_INIT_TABLE2(T,W,N) }
+
+#define VECT_ARRAY_INIT3(V, T, W, N)                   \
+  T##W##_t VECT_ARRAY_VAR(V,T,W,N,3)[] =               \
+  { MY_INIT_TABLE(T,W,N)                               \
+    MY_INIT_TABLE2(T,W,N)                              \
+    MY_INIT_TABLE3(T,W,N) }
+
+#define VECT_ARRAY_INIT4(V, T, W, N)                   \
+  T##W##_t VECT_ARRAY_VAR(V,T,W,N,4)[] =               \
+  { MY_INIT_TABLE(T,W,N)                               \
+    MY_INIT_TABLE2(T,W,N)                              \
+    MY_INIT_TABLE3(T,W,N)                              \
+    MY_INIT_TABLE4(T,W,N) }
+
+/* Sample initialization vectors.  */
+#define INIT_TABLE_1(T)                                \
+  (T)-16,
+#define INIT_TABLE2_1(T)                       \
+  (T)-15,
+#define INIT_TABLE3_1(T)                       \
+  (T)-14,
+#define INIT_TABLE4_1(T)                       \
+  (T)-13,
+
+#define INIT_TABLE_2(T)                                \
+  (T)-16, (T)-15,
+#define INIT_TABLE2_2(T)                       \
+  (T)-14, (T)-13,
+#define INIT_TABLE3_2(T)                       \
+  (T)-12, (T)-11,
+#define INIT_TABLE4_2(T)                       \
+  (T)-10, (T)-9,
+
+/* Initializer for vld3_lane tests.  */
+#define INIT_TABLE_3(T)                                \
+  (T)-16, (T)-15, (T)-14,
+
+#define INIT_TABLE_4(T)                                \
+  (T)-16, (T)-15, (T)-14, (T)-13,
+#define INIT_TABLE2_4(T)                       \
+  (T)-12, (T)-11, (T)-10, (T)-9,
+#define INIT_TABLE3_4(T)                       \
+  (T)-8, (T)-7, (T)-6, (T)-5,
+#define INIT_TABLE4_4(T)                       \
+  (T)-4, (T)-3, (T)-2, (T)-1,
+
+#define INIT_TABLE_8(T)                                                        \
+  (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9,
+#define INIT_TABLE2_8(T)                                       \
+  (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1,
+#define INIT_TABLE3_8(T)                               \
+  (T)0, (T)1, (T)2, (T)3, (T)4, (T)5, (T)6, (T)7,
+#define INIT_TABLE4_8(T)                               \
+  (T)8, (T)9, (T)10, (T)11, (T)12, (T)13, (T)14, (T)15,
+
+#define INIT_TABLE_16(T)                                               \
+  (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9,       \
+  (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1,
+#define INIT_TABLE2_16(T)                                              \
+  (T)0, (T)1, (T)2, (T)3, (T)4, (T)5, (T)6, (T)7,                      \
+  (T)8, (T)9, (T)10, (T)11, (T)12, (T)13, (T)14, (T)15,
+#define INIT_TABLE3_16(T)                                              \
+  (T)16, (T)17, (T)18, (T)19, (T)20, (T)21, (T)22, (T)23,              \
+   (T)24, (T)25, (T)26, (T)27, (T)28, (T)29, (T)30, (T)31,
+#define INIT_TABLE4_16(T)                                              \
+  (T)32, (T)33, (T)34, (T)35, (T)36, (T)37, (T)38, (T)39,              \
+  (T)40, (T)41, (T)42, (T)43, (T)44, (T)45, (T)46, (T)47,
+
+/* This one is used for padding between input buffers.  */
+#define PAD(V, T, W, N) char VECT_VAR(V,T,W,N)=42
+
+/* Input buffers, one of each size.  */
+/* Insert some padding to try to exhibit out of bounds accesses.  */
+VECT_VAR_DECL_INIT(buffer, int, 8, 8);
+PAD(buffer_pad, int, 8, 8);
+VECT_VAR_DECL_INIT(buffer, int, 16, 4);
+PAD(buffer_pad, int, 16, 4);
+VECT_VAR_DECL_INIT(buffer, int, 32, 2);
+PAD(buffer_pad, int, 32, 2);
+VECT_VAR_DECL_INIT(buffer, int, 64, 1);
+PAD(buffer_pad, int, 64, 1);
+VECT_VAR_DECL_INIT(buffer, uint, 8, 8);
+PAD(buffer_pad, uint, 8, 8);
+VECT_VAR_DECL_INIT(buffer, poly, 8, 8);
+PAD(buffer_pad, poly, 8, 8);
+VECT_VAR_DECL_INIT(buffer, poly, 16, 4);
+PAD(buffer_pad, poly, 16, 4);
+VECT_VAR_DECL_INIT(buffer, uint, 16, 4);
+PAD(buffer_pad, uint, 16, 4);
+VECT_VAR_DECL_INIT(buffer, uint, 32, 2);
+PAD(buffer_pad, uint, 32, 2);
+VECT_VAR_DECL_INIT(buffer, uint, 64, 1);
+PAD(buffer_pad, uint, 64, 1);
+VECT_VAR_DECL_INIT(buffer, float, 32, 2);
+PAD(buffer_pad, float, 32, 2);
+VECT_VAR_DECL_INIT(buffer, int, 8, 16);
+PAD(buffer_pad, int, 8, 16);
+VECT_VAR_DECL_INIT(buffer, int, 16, 8);
+PAD(buffer_pad, int, 16, 8);
+VECT_VAR_DECL_INIT(buffer, int, 32, 4);
+PAD(buffer_pad, int, 32, 4);
+VECT_VAR_DECL_INIT(buffer, int, 64, 2);
+PAD(buffer_pad, int, 64, 2);
+VECT_VAR_DECL_INIT(buffer, uint, 8, 16);
+PAD(buffer_pad, uint, 8, 16);
+VECT_VAR_DECL_INIT(buffer, uint, 16, 8);
+PAD(buffer_pad, uint, 16, 8);
+VECT_VAR_DECL_INIT(buffer, uint, 32, 4);
+PAD(buffer_pad, uint, 32, 4);
+VECT_VAR_DECL_INIT(buffer, uint, 64, 2);
+PAD(buffer_pad, uint, 64, 2);
+VECT_VAR_DECL_INIT(buffer, poly, 8, 16);
+PAD(buffer_pad, poly, 8, 16);
+VECT_VAR_DECL_INIT(buffer, poly, 16, 8);
+PAD(buffer_pad, poly, 16, 8);
+VECT_VAR_DECL_INIT(buffer, float, 32, 4);
+PAD(buffer_pad, float, 32, 4);
+
+/* The tests for vld1_dup and vdup expect at least 4 entries in the
+   input buffer, so force 1- and 2-elements initializers to have 4
+   entries (using VECT_VAR_DECL_INIT4).  */
+VECT_VAR_DECL_INIT(buffer_dup, int, 8, 8);
+VECT_VAR_DECL(buffer_dup_pad, int, 8, 8);
+VECT_VAR_DECL_INIT(buffer_dup, int, 16, 4);
+VECT_VAR_DECL(buffer_dup_pad, int, 16, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, int, 32, 2);
+VECT_VAR_DECL(buffer_dup_pad, int, 32, 2);
+VECT_VAR_DECL_INIT4(buffer_dup, int, 64, 1);
+VECT_VAR_DECL(buffer_dup_pad, int, 64, 1);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 8, 8);
+VECT_VAR_DECL(buffer_dup_pad, uint, 8, 8);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 16, 4);
+VECT_VAR_DECL(buffer_dup_pad, uint, 16, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, uint, 32, 2);
+VECT_VAR_DECL(buffer_dup_pad, uint, 32, 2);
+VECT_VAR_DECL_INIT4(buffer_dup, uint, 64, 1);
+VECT_VAR_DECL(buffer_dup_pad, uint, 64, 1);
+VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 8);
+VECT_VAR_DECL(buffer_dup_pad, poly, 8, 8);
+VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 4);
+VECT_VAR_DECL(buffer_dup_pad, poly, 16, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, float, 32, 2);
+VECT_VAR_DECL(buffer_dup_pad, float, 32, 2);
+
+VECT_VAR_DECL_INIT(buffer_dup, int, 8, 16);
+VECT_VAR_DECL(buffer_dup_pad, int, 8, 16);
+VECT_VAR_DECL_INIT(buffer_dup, int, 16, 8);
+VECT_VAR_DECL(buffer_dup_pad, int, 16, 8);
+VECT_VAR_DECL_INIT(buffer_dup, int, 32, 4);
+VECT_VAR_DECL(buffer_dup_pad, int, 32, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, int, 64, 2);
+VECT_VAR_DECL(buffer_dup_pad, int, 64, 2);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 8, 16);
+VECT_VAR_DECL(buffer_dup_pad, uint, 8, 16);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 16, 8);
+VECT_VAR_DECL(buffer_dup_pad, uint, 16, 8);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 32, 4);
+VECT_VAR_DECL(buffer_dup_pad, uint, 32, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, uint, 64, 2);
+VECT_VAR_DECL(buffer_dup_pad, uint, 64, 2);
+VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 16);
+VECT_VAR_DECL(buffer_dup_pad, poly, 8, 16);
+VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 8);
+VECT_VAR_DECL(buffer_dup_pad, poly, 16, 8);
+VECT_VAR_DECL_INIT(buffer_dup, float, 32, 4);
+VECT_VAR_DECL(buffer_dup_pad, float, 32, 4);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_op.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_op.inc
new file mode 100644 (file)
index 0000000..33f9b5f
--- /dev/null
@@ -0,0 +1,72 @@
+/* Template file for unary operator validation.
+
+   This file is meant to be included by the relevant test files, which
+   have to define the intrinsic family to test. If a given intrinsic
+   supports variants which are not supported by all the other unary
+   operators, these can be tested by providing a definition for
+   EXTRA_TESTS.  */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* Basic test: y=OP(x), then store the result.  */
+#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)                          \
+  VECT_VAR(vector_res, T1, W, N) =                                     \
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N));                     \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N)                           \
+  TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)                                        \
+
+  /* No need for 64 bits variants in the general case.  */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  VLOAD(vector, buffer, , int, s, 8, 8);
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, q, int, s, 8, 16);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+
+  /* Apply a unary operator named INSN_NAME.  */
+  TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+
+#ifdef EXTRA_TESTS
+  EXTRA_TESTS();
+#endif
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME)();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_sat_op.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_sat_op.inc
new file mode 100644 (file)
index 0000000..3f6d984
--- /dev/null
@@ -0,0 +1,80 @@
+/* Template file for saturating unary operator validation.
+
+   This file is meant to be included by the relevant test files, which
+   have to define the intrinsic family to test. If a given intrinsic
+   supports variants which are not supported by all the other
+   saturating unary operators, these can be tested by providing a
+   definition for EXTRA_TESTS.  */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* y=OP(x), then store the result.  */
+#define TEST_UNARY_SAT_OP1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+  Set_Neon_Cumulative_Sat(0);                                          \
+  VECT_VAR(vector_res, T1, W, N) =                                     \
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N));                     \
+    vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),                      \
+                     VECT_VAR(vector_res, T1, W, N));                  \
+      CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_UNARY_SAT_OP(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_UNARY_SAT_OP1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  /* No need for 64 bits variants.  */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  VLOAD(vector, buffer, , int, s, 8, 8);
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, q, int, s, 8, 16);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+
+  /* Apply a saturating unary operator named INSN_NAME.  */
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8, expected_cumulative_sat, "");
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4, expected_cumulative_sat, "");
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2, expected_cumulative_sat, "");
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 8, 16, expected_cumulative_sat, "");
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 16, 8, expected_cumulative_sat, "");
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4, expected_cumulative_sat, "");
+
+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 16, 4, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 32, 2, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 16, 8, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 32, 4, PRIx8, expected, "");
+
+#ifdef EXTRA_TESTS
+  EXTRA_TESTS();
+#endif
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaba.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaba.c
new file mode 100644 (file)
index 0000000..2465cd2
--- /dev/null
@@ -0,0 +1,142 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf7, 0xf8, 0xf9,
+                                      0xfa, 0xfb, 0xfc, 0xfd };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x16, 0x17, 0x18, 0x19 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x20, 0x21 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x53, 0x54, 0x55, 0x56,
+                                       0x57, 0x58, 0x59, 0x5a };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x907, 0x908, 0x909, 0x90a };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe7, 0xffffffe8 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x5e, 0x5f, 0x60, 0x61,
+                                       0x62, 0x63, 0x64, 0x65,
+                                       0x66, 0x67, 0x68, 0x69,
+                                       0x6a, 0x6b, 0x6c, 0x6d };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xb9c, 0xb9d, 0xb9e, 0xb9f,
+                                       0xba0, 0xba1, 0xba2, 0xba3 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x26e0, 0x26e1, 0x26e2, 0x26e3 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+                                       0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+                                        0xfc, 0xfd, 0xfe, 0xff,
+                                        0x0, 0x1, 0x2, 0x3,
+                                        0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff9, 0xfffa, 0xfffb, 0xfffc,
+                                        0xfffd, 0xfffe, 0xffff, 0x0 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+#define TEST_MSG "VABA/VABAQ"
+void exec_vaba (void)
+{
+  /* Basic test: v4=vaba(v1,v2,v3), then store the result.  */
+#define TEST_VABA(Q, T1, T2, W, N)                                     \
+  VECT_VAR(vector_res, T1, W, N) =                                     \
+    vaba##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),                     \
+                     VECT_VAR(vector2, T1, W, N),                      \
+                     VECT_VAR(vector3, T1, W, N));                     \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define DECL_VABA_VAR(VAR)                     \
+  DECL_VARIABLE(VAR, int, 8, 8);               \
+  DECL_VARIABLE(VAR, int, 16, 4);              \
+  DECL_VARIABLE(VAR, int, 32, 2);              \
+  DECL_VARIABLE(VAR, uint, 8, 8);              \
+  DECL_VARIABLE(VAR, uint, 16, 4);             \
+  DECL_VARIABLE(VAR, uint, 32, 2);             \
+  DECL_VARIABLE(VAR, int, 8, 16);              \
+  DECL_VARIABLE(VAR, int, 16, 8);              \
+  DECL_VARIABLE(VAR, int, 32, 4);              \
+  DECL_VARIABLE(VAR, uint, 8, 16);             \
+  DECL_VARIABLE(VAR, uint, 16, 8);             \
+  DECL_VARIABLE(VAR, uint, 32, 4)
+
+  DECL_VABA_VAR(vector1);
+  DECL_VABA_VAR(vector2);
+  DECL_VABA_VAR(vector3);
+  DECL_VABA_VAR(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector1" from "buffer".  */
+  VLOAD(vector1, buffer, , int, s, 8, 8);
+  VLOAD(vector1, buffer, , int, s, 16, 4);
+  VLOAD(vector1, buffer, , int, s, 32, 2);
+  VLOAD(vector1, buffer, , uint, u, 8, 8);
+  VLOAD(vector1, buffer, , uint, u, 16, 4);
+  VLOAD(vector1, buffer, , uint, u, 32, 2);
+  VLOAD(vector1, buffer, q, int, s, 8, 16);
+  VLOAD(vector1, buffer, q, int, s, 16, 8);
+  VLOAD(vector1, buffer, q, int, s, 32, 4);
+  VLOAD(vector1, buffer, q, uint, u, 8, 16);
+  VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  VLOAD(vector1, buffer, q, uint, u, 32, 4);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, , int, s, 8, 8, 1);
+  VDUP(vector2, , int, s, 16, 4, -13);
+  VDUP(vector2, , int, s, 32, 2, 8);
+  VDUP(vector2, , uint, u, 8, 8, 1);
+  VDUP(vector2, , uint, u, 16, 4, 13);
+  VDUP(vector2, , uint, u, 32, 2, 8);
+  VDUP(vector2, q, int, s, 8, 16, 10);
+  VDUP(vector2, q, int, s, 16, 8, -12);
+  VDUP(vector2, q, int, s, 32, 4, 32);
+  VDUP(vector2, q, uint, u, 8, 16, 10);
+  VDUP(vector2, q, uint, u, 16, 8, 12);
+  VDUP(vector2, q, uint, u, 32, 4, 32);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector3, , int, s, 8, 8, -5);
+  VDUP(vector3, , int, s, 16, 4, 25);
+  VDUP(vector3, , int, s, 32, 2, -40);
+  VDUP(vector3, , uint, u, 8, 8, 100);
+  VDUP(vector3, , uint, u, 16, 4, 2340);
+  VDUP(vector3, , uint, u, 32, 2, 0xffffffff);
+  VDUP(vector3, q, int, s, 8, 16, -100);
+  VDUP(vector3, q, int, s, 16, 8, -3000);
+  VDUP(vector3, q, int, s, 32, 4, 10000);
+  VDUP(vector3, q, uint, u, 8, 16, 2);
+  VDUP(vector3, q, uint, u, 16, 8, 3);
+  VDUP(vector3, q, uint, u, 32, 4, 4);
+
+  /* Execute the tests.  */
+  TEST_VABA(, int, s, 8, 8);
+  TEST_VABA(, int, s, 16, 4);
+  TEST_VABA(, int, s, 32, 2);
+  TEST_VABA(, uint, u, 8, 8);
+  TEST_VABA(, uint, u, 16, 4);
+  TEST_VABA(, uint, u, 32, 2);
+  TEST_VABA(q, int, s, 8, 16);
+  TEST_VABA(q, int, s, 16, 8);
+  TEST_VABA(q, int, s, 32, 4);
+  TEST_VABA(q, uint, u, 8, 16);
+  TEST_VABA(q, uint, u, 16, 8);
+  TEST_VABA(q, uint, u, 32, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vaba ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabal.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabal.c
new file mode 100644 (file)
index 0000000..cd31062
--- /dev/null
@@ -0,0 +1,161 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                      0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff6, 0xfff7, 0xfff8, 0xfff9,
+                                       0xfffa, 0xfffb, 0xfffc, 0xfffd };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x16, 0x17, 0x18, 0x19 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x20, 0x21 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x53, 0x54, 0x55, 0x56,
+                                        0x57, 0x58, 0x59, 0x5a };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x907, 0x908, 0x909, 0x90a };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe7,
+                                        0xffffffe8 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+/* Expected results for cases with input values chosen to test
+   possible intermediate overflow.  */
+VECT_VAR_DECL(expected2,int,16,8) [] = { 0xef, 0xf0, 0xf1, 0xf2,
+                                        0xf3, 0xf4, 0xf5, 0xf6 };
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0xffef, 0xfff0, 0xfff1, 0xfff2 };
+VECT_VAR_DECL(expected2,int,64,2) [] = { 0xffffffef, 0xfffffff0 };
+VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xee, 0xef, 0xf0, 0xf1,
+                                         0xf2, 0xf3, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xffe2, 0xffe3, 0xffe4, 0xffe5 };
+VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xffffffe7, 0xffffffe8 };
+
+#define TEST_MSG "VABAL"
+void exec_vabal (void)
+{
+  /* Basic test: v4=vabal(v1,v2,v3), then store the result.  */
+#define TEST_VABAL(T1, T2, W, W2, N)                                   \
+  VECT_VAR(vector_res, T1, W2, N) =                                    \
+    vabal_##T2##W(VECT_VAR(vector1, T1, W2, N),                                \
+                 VECT_VAR(vector2, T1, W, N),                          \
+                 VECT_VAR(vector3, T1, W, N));                         \
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+#define DECL_VABAL_VAR_LONG(VAR)               \
+  DECL_VARIABLE(VAR, int, 16, 8);              \
+  DECL_VARIABLE(VAR, int, 32, 4);              \
+  DECL_VARIABLE(VAR, int, 64, 2);              \
+  DECL_VARIABLE(VAR, uint, 16, 8);             \
+  DECL_VARIABLE(VAR, uint, 32, 4);             \
+  DECL_VARIABLE(VAR, uint, 64, 2)
+
+#define DECL_VABAL_VAR_SHORT(VAR)              \
+  DECL_VARIABLE(VAR, int, 8, 8);               \
+  DECL_VARIABLE(VAR, int, 16, 4);              \
+  DECL_VARIABLE(VAR, int, 32, 2);              \
+  DECL_VARIABLE(VAR, uint, 8, 8);              \
+  DECL_VARIABLE(VAR, uint, 16, 4);             \
+  DECL_VARIABLE(VAR, uint, 32, 2)
+
+  DECL_VABAL_VAR_LONG(vector1);
+  DECL_VABAL_VAR_SHORT(vector2);
+  DECL_VABAL_VAR_SHORT(vector3);
+  DECL_VABAL_VAR_LONG(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector1" from "buffer".  */
+  VLOAD(vector1, buffer, q, int, s, 16, 8);
+  VLOAD(vector1, buffer, q, int, s, 32, 4);
+  VLOAD(vector1, buffer, q, int, s, 64, 2);
+  VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  VLOAD(vector1, buffer, q, uint, u, 32, 4);
+  VLOAD(vector1, buffer, q, uint, u, 64, 2);
+
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, , int, s, 8, 8, 1);
+  VDUP(vector2, , int, s, 16, 4, -13);
+  VDUP(vector2, , int, s, 32, 2, 8);
+  VDUP(vector2, , uint, u, 8, 8, 1);
+  VDUP(vector2, , uint, u, 16, 4, 13);
+  VDUP(vector2, , uint, u, 32, 2, 8);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector3, , int, s, 8, 8, -5);
+  VDUP(vector3, , int, s, 16, 4, 25);
+  VDUP(vector3, , int, s, 32, 2, -40);
+  VDUP(vector3, , uint, u, 8, 8, 100);
+  VDUP(vector3, , uint, u, 16, 4, 2340);
+  VDUP(vector3, , uint, u, 32, 2, 0xffffffff);
+
+  /* Execute the tests.  */
+  TEST_VABAL(int, s, 8, 16, 8);
+  TEST_VABAL(int, s, 16, 32, 4);
+  TEST_VABAL(int, s, 32, 64, 2);
+  TEST_VABAL(uint, u, 8, 16, 8);
+  TEST_VABAL(uint, u, 16, 32, 4);
+  TEST_VABAL(uint, u, 32, 64, 2);
+
+  CHECK_RESULTS (TEST_MSG, "");
+
+  /* Use values that could lead to overflow intermediate
+   * calculations.  */
+  VDUP(vector2, , int, s, 8, 8, 0x80);
+  VDUP(vector2, , int, s, 16, 4, 0x8000);
+  VDUP(vector2, , int, s, 32, 2, 0x80000000);
+  VDUP(vector2, , uint, u, 8, 8, 1);
+  VDUP(vector2, , uint, u, 16, 4, 13);
+  VDUP(vector2, , uint, u, 32, 2, 8);
+
+  VDUP(vector3, , int, s, 8, 8, 0x7f);
+  VDUP(vector3, , int, s, 16, 4, 0x7fff);
+  VDUP(vector3, , int, s, 32, 2, 0x7fffffff);
+  VDUP(vector3, , uint, u, 8, 8, 0xff);
+  VDUP(vector3, , uint, u, 16, 4, 0xffff);
+  VDUP(vector3, , uint, u, 32, 2, 0xffffffff);
+
+  TEST_VABAL(int, s, 8, 16, 8);
+  TEST_VABAL(int, s, 16, 32, 4);
+  TEST_VABAL(int, s, 32, 64, 2);
+  TEST_VABAL(uint, u, 8, 16, 8);
+  TEST_VABAL(uint, u, 16, 32, 4);
+  TEST_VABAL(uint, u, 32, 64, 2);
+
+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected2, " test intermediate overflow");
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected2, " test intermediate overflow");
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected2, " test intermediate overflow");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected2, " test intermediate overflow");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected2, " test intermediate overflow");
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected2, " test intermediate overflow");
+}
+
+int main (void)
+{
+  exec_vabal ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabd.c
new file mode 100644 (file)
index 0000000..e95404f
--- /dev/null
@@ -0,0 +1,153 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+#include <math.h>
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x11, 0x10, 0xf, 0xe,
+                                      0xd, 0xc, 0xb, 0xa };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x3, 0x2, 0x1, 0x0 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x18, 0x17 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xef, 0xf0, 0xf1, 0xf2,
+                                       0xf3, 0xf4, 0xf5, 0xf6 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe8, 0xffffffe9 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x41c26666, 0x41ba6666 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x1a, 0x19, 0x18, 0x17,
+                                       0x16, 0x15, 0x14, 0x13,
+                                       0x12, 0x11, 0x10, 0xf,
+                                       0xe, 0xd, 0xc, 0xb };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x4, 0x3, 0x2, 0x1,
+                                       0x0, 0x1, 0x2, 0x3 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x30, 0x2f, 0x2e, 0x2d };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+                                       0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xe6, 0xe7, 0xe8, 0xe9,
+                                        0xea, 0xeb, 0xec, 0xed,
+                                        0xee, 0xef, 0xf0, 0xf1,
+                                        0xf2, 0xf3, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffe4, 0xffe5, 0xffe6, 0xffe7,
+                                        0xffe8, 0xffe9, 0xffea, 0xffeb };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffd0, 0xffffffd1,
+                                        0xffffffd2, 0xffffffd3 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x42407ae1, 0x423c7ae1,
+                                          0x42387ae1, 0x42347ae1 };
+
+/* Additional expected results for float32 variants with specially
+   chosen input values.  */
+VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+
+#define TEST_MSG "VABD/VABDQ"
+void exec_vabd (void)
+{
+  /* Basic test: v4=vabd(v1,v2), then store the result.  */
+#define TEST_VABD(Q, T1, T2, W, N)                                     \
+  VECT_VAR(vector_res, T1, W, N) =                                     \
+    vabd##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),                     \
+                     VECT_VAR(vector2, T1, W, N));                     \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define DECL_VABD_VAR(VAR)                     \
+  DECL_VARIABLE(VAR, int, 8, 8);               \
+  DECL_VARIABLE(VAR, int, 16, 4);              \
+  DECL_VARIABLE(VAR, int, 32, 2);              \
+  DECL_VARIABLE(VAR, uint, 8, 8);              \
+  DECL_VARIABLE(VAR, uint, 16, 4);             \
+  DECL_VARIABLE(VAR, uint, 32, 2);             \
+  DECL_VARIABLE(VAR, float, 32, 2);            \
+  DECL_VARIABLE(VAR, int, 8, 16);              \
+  DECL_VARIABLE(VAR, int, 16, 8);              \
+  DECL_VARIABLE(VAR, int, 32, 4);              \
+  DECL_VARIABLE(VAR, uint, 8, 16);             \
+  DECL_VARIABLE(VAR, uint, 16, 8);             \
+  DECL_VARIABLE(VAR, uint, 32, 4);             \
+  DECL_VARIABLE(VAR, float, 32, 4)
+
+  DECL_VABD_VAR(vector1);
+  DECL_VABD_VAR(vector2);
+  DECL_VABD_VAR(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector1" from "buffer".  */
+  VLOAD(vector1, buffer, , int, s, 8, 8);
+  VLOAD(vector1, buffer, , int, s, 16, 4);
+  VLOAD(vector1, buffer, , int, s, 32, 2);
+  VLOAD(vector1, buffer, , uint, u, 8, 8);
+  VLOAD(vector1, buffer, , uint, u, 16, 4);
+  VLOAD(vector1, buffer, , uint, u, 32, 2);
+  VLOAD(vector1, buffer, , float, f, 32, 2);
+  VLOAD(vector1, buffer, q, int, s, 8, 16);
+  VLOAD(vector1, buffer, q, int, s, 16, 8);
+  VLOAD(vector1, buffer, q, int, s, 32, 4);
+  VLOAD(vector1, buffer, q, uint, u, 8, 16);
+  VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  VLOAD(vector1, buffer, q, uint, u, 32, 4);
+  VLOAD(vector1, buffer, q, float, f, 32, 4);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, , int, s, 8, 8, 1);
+  VDUP(vector2, , int, s, 16, 4, -13);
+  VDUP(vector2, , int, s, 32, 2, 8);
+  VDUP(vector2, , uint, u, 8, 8, 1);
+  VDUP(vector2, , uint, u, 16, 4, 13);
+  VDUP(vector2, , uint, u, 32, 2, 8);
+  VDUP(vector2, , float, f, 32, 2, 8.3f);
+  VDUP(vector2, q, int, s, 8, 16, 10);
+  VDUP(vector2, q, int, s, 16, 8, -12);
+  VDUP(vector2, q, int, s, 32, 4, 32);
+  VDUP(vector2, q, uint, u, 8, 16, 10);
+  VDUP(vector2, q, uint, u, 16, 8, 12);
+  VDUP(vector2, q, uint, u, 32, 4, 32);
+  VDUP(vector2, q, float, f, 32, 4, 32.12f);
+
+  /* Execute the tests.  */
+  TEST_VABD(, int, s, 8, 8);
+  TEST_VABD(, int, s, 16, 4);
+  TEST_VABD(, int, s, 32, 2);
+  TEST_VABD(, uint, u, 8, 8);
+  TEST_VABD(, uint, u, 16, 4);
+  TEST_VABD(, uint, u, 32, 2);
+  TEST_VABD(, float, f, 32, 2);
+  TEST_VABD(q, int, s, 8, 16);
+  TEST_VABD(q, int, s, 16, 8);
+  TEST_VABD(q, int, s, 32, 4);
+  TEST_VABD(q, uint, u, 8, 16);
+  TEST_VABD(q, uint, u, 16, 8);
+  TEST_VABD(q, uint, u, 32, 4);
+  TEST_VABD(q, float, f, 32, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+
+
+  /* Extra FP tests with special values (-0.0, ....) */
+  VDUP(vector1, q, float, f, 32, 4, -0.0f);
+  VDUP(vector2, q, float, f, 32, 4, 0.0);
+  TEST_VABD(q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, " FP special (-0.0)");
+
+  /* Extra FP tests with special values (-0.0, ....) */
+  VDUP(vector1, q, float, f, 32, 4, 0.0f);
+  VDUP(vector2, q, float, f, 32, 4, -0.0);
+  TEST_VABD(q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, " FP special (-0.0)");
+}
+
+int main (void)
+{
+  exec_vabd ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabdl.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabdl.c
new file mode 100644 (file)
index 0000000..28018ab
--- /dev/null
@@ -0,0 +1,109 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                      0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x11, 0x10, 0xf, 0xe,
+                                       0xd, 0xc, 0xb, 0xa };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x3, 0x2, 0x1, 0x0 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x18, 0x17 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xef, 0xf0, 0xf1, 0xf2,
+                                        0xf3, 0xf4, 0xf5, 0xf6 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe8,
+                                        0xffffffe9 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+#define TEST_MSG "VABDL"
+void exec_vabdl (void)
+{
+  /* Basic test: v4=vabdl(v1,v2), then store the result.  */
+#define TEST_VABDL(T1, T2, W, W2, N)                                   \
+  VECT_VAR(vector_res, T1, W2, N) =                                    \
+    vabdl_##T2##W(VECT_VAR(vector1, T1, W, N),                         \
+                 VECT_VAR(vector2, T1, W, N));                         \
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+#define DECL_VABDL_VAR_LONG(VAR)               \
+  DECL_VARIABLE(VAR, int, 16, 8);              \
+  DECL_VARIABLE(VAR, int, 32, 4);              \
+  DECL_VARIABLE(VAR, int, 64, 2);              \
+  DECL_VARIABLE(VAR, uint, 16, 8);             \
+  DECL_VARIABLE(VAR, uint, 32, 4);             \
+  DECL_VARIABLE(VAR, uint, 64, 2)
+
+#define DECL_VABDL_VAR_SHORT(VAR)              \
+  DECL_VARIABLE(VAR, int, 8, 8);               \
+  DECL_VARIABLE(VAR, int, 16, 4);              \
+  DECL_VARIABLE(VAR, int, 32, 2);              \
+  DECL_VARIABLE(VAR, uint, 8, 8);              \
+  DECL_VARIABLE(VAR, uint, 16, 4);             \
+  DECL_VARIABLE(VAR, uint, 32, 2)
+
+  DECL_VABDL_VAR_SHORT(vector1);
+  DECL_VABDL_VAR_SHORT(vector2);
+  DECL_VABDL_VAR_LONG(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector1" from "buffer".  */
+  VLOAD(vector1, buffer, , int, s, 8, 8);
+  VLOAD(vector1, buffer, , int, s, 16, 4);
+  VLOAD(vector1, buffer, , int, s, 32, 2);
+  VLOAD(vector1, buffer, , uint, u, 8, 8);
+  VLOAD(vector1, buffer, , uint, u, 16, 4);
+  VLOAD(vector1, buffer, , uint, u, 32, 2);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, , int, s, 8, 8, 1);
+  VDUP(vector2, , int, s, 16, 4, -13);
+  VDUP(vector2, , int, s, 32, 2, 8);
+  VDUP(vector2, , uint, u, 8, 8, 1);
+  VDUP(vector2, , uint, u, 16, 4, 13);
+  VDUP(vector2, , uint, u, 32, 2, 8);
+
+  /* Execute the tests.  */
+  TEST_VABDL(int, s, 8, 16, 8);
+  TEST_VABDL(int, s, 16, 32, 4);
+  TEST_VABDL(int, s, 32, 64, 2);
+  TEST_VABDL(uint, u, 8, 16, 8);
+  TEST_VABDL(uint, u, 16, 32, 4);
+  TEST_VABDL(uint, u, 32, 64, 2);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vabdl ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabs.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabs.c
new file mode 100644 (file)
index 0000000..ca3901a
--- /dev/null
@@ -0,0 +1,74 @@
+#define INSN_NAME vabs
+#define TEST_MSG "VABS/VABSQ"
+
+/* Extra tests for functions requiring floating-point types.  */
+void exec_vabs_f32(void);
+#define EXTRA_TESTS exec_vabs_f32
+
+#include "unary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd,
+                                      0xc, 0xb, 0xa, 0x9 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9,
+                                       0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd,
+                                       0xc, 0xb, 0xa, 0x9 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+                                       0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+                                        0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                         0x33333333, 0x33333333 };
+
+/* Expected results for float32 variants. Needs to be separated since
+   the generic test function does not test floating-point
+   versions.  */
+VECT_VAR_DECL(expected_float32,hfloat,32,2) [] = { 0x40133333, 0x40133333 };
+VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0x4059999a, 0x4059999a,
+                                                  0x4059999a, 0x4059999a };
+
+void exec_vabs_f32(void)
+{
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 4);
+
+  VDUP(vector, , float, f, 32, 2, -2.3f);
+  VDUP(vector, q, float, f, 32, 4, 3.4f);
+
+  TEST_UNARY_OP(INSN_NAME, , float, f, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, float, f, 32, 4);
+
+  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_float32, "");
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, "");
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vadd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vadd.c
new file mode 100644 (file)
index 0000000..f08c620
--- /dev/null
@@ -0,0 +1,81 @@
+#define INSN_NAME vadd
+#define TEST_MSG "VADD/VADDQ"
+
+/* Extra tests for functions requiring floating-point types.  */
+void exec_vadd_f32(void);
+#define EXTRA_TESTS exec_vadd_f32
+
+#include "binary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf3, 0xf4, 0xf5,
+                                      0xf6, 0xf7, 0xf8, 0xf9 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffec, 0xffed, 0xffee, 0xffef };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff3, 0xfffffff4 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x54 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x4, 0x5, 0x6, 0x7,
+                                       0x8, 0x9, 0xa, 0xb };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xe, 0xf, 0x10, 0x11 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x18, 0x19 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xe6, 0xe7, 0xe8, 0xe9,
+                                       0xea, 0xeb, 0xec, 0xed,
+                                       0xee, 0xef, 0xf0, 0xf1,
+                                       0xf2, 0xf3, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xffdc, 0xffdd, 0xffde, 0xffdf,
+                                       0xffe0, 0xffe1, 0xffe2, 0xffe3 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffd2, 0xffffffd3,
+                                       0xffffffd4, 0xffffffd5 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x8, 0x9 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff,
+                                        0x0, 0x1, 0x2, 0x3,
+                                        0x4, 0x5, 0x6, 0x7,
+                                        0x8, 0x9, 0xa, 0xb };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff3, 0xfff4, 0xfff5, 0xfff6,
+                                        0xfff7, 0xfff8, 0xfff9, 0xfffa };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x27, 0x28, 0x29, 0x2a };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3,
+                                        0xfffffffffffffff4 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+/* Expected results for float32 variants. Needs to be separated since
+   the generic test function does not test floating-point
+   versions.  */
+VECT_VAR_DECL(expected_float32,hfloat,32,2) [] = { 0x40d9999a, 0x40d9999a };
+VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0x41100000, 0x41100000,
+                                                  0x41100000, 0x41100000 };
+
+void exec_vadd_f32(void)
+{
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector2, float, 32, 2);
+  DECL_VARIABLE(vector2, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 4);
+
+  VDUP(vector, , float, f, 32, 2, 2.3f);
+  VDUP(vector, q, float, f, 32, 4, 3.4f);
+
+  VDUP(vector2, , float, f, 32, 2, 4.5f);
+  VDUP(vector2, q, float, f, 32, 4, 5.6f);
+
+  TEST_BINARY_OP(INSN_NAME, , float, f, 32, 2);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+
+  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_float32, "");
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, "");
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddhn.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddhn.c
new file mode 100644 (file)
index 0000000..74b4b4d
--- /dev/null
@@ -0,0 +1,109 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#if defined(__cplusplus)
+#include <cstdint>
+#else
+#include <stdint.h>
+#endif
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x32, 0x32, 0x32, 0x32,
+                                      0x32, 0x32, 0x32, 0x32 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x32, 0x32, 0x32, 0x32 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x18, 0x18 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x3, 0x3, 0x3, 0x3,
+                                       0x3, 0x3, 0x3, 0x3 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x37, 0x37, 0x37, 0x37 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x3, 0x3 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = {  0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+                                       0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+                                       0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+                                        0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+#ifndef INSN_NAME
+#define INSN_NAME vaddhn
+#define TEST_MSG "VADDHN"
+#endif
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: vec64=vaddhn(vec128_a, vec128_b), then store the result.  */
+#define TEST_VADDHN1(INSN, T1, T2, W, W2, N)                           \
+  VECT_VAR(vector64, T1, W2, N) = INSN##_##T2##W(VECT_VAR(vector1, T1, W, N), \
+                                                VECT_VAR(vector2, T1, W, N)); \
+  vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector64, T1, W2, N))
+
+#define TEST_VADDHN(INSN, T1, T2, W, W2, N)    \
+  TEST_VADDHN1(INSN, T1, T2, W, W2, N)
+
+  DECL_VARIABLE_64BITS_VARIANTS(vector64);
+  DECL_VARIABLE_128BITS_VARIANTS(vector1);
+  DECL_VARIABLE_128BITS_VARIANTS(vector2);
+
+  clean_results ();
+
+  /* Fill input vector1 and vector2 with arbitrary values */
+  VDUP(vector1, q, int, s, 16, 8, 50*(UINT8_MAX+1));
+  VDUP(vector1, q, int, s, 32, 4, 50*(UINT16_MAX+1));
+  VDUP(vector1, q, int, s, 64, 2, 24*((uint64_t)UINT32_MAX+1));
+  VDUP(vector1, q, uint, u, 16, 8, 3*(UINT8_MAX+1));
+  VDUP(vector1, q, uint, u, 32, 4, 55*(UINT16_MAX+1));
+  VDUP(vector1, q, uint, u, 64, 2, 3*((uint64_t)UINT32_MAX+1));
+
+  VDUP(vector2, q, int, s, 16, 8, (uint16_t)UINT8_MAX);
+  VDUP(vector2, q, int, s, 32, 4, (uint32_t)UINT16_MAX);
+  VDUP(vector2, q, int, s, 64, 2, (uint64_t)UINT32_MAX);
+  VDUP(vector2, q, uint, u, 16, 8, (uint16_t)UINT8_MAX);
+  VDUP(vector2, q, uint, u, 32, 4, (uint32_t)UINT16_MAX);
+  VDUP(vector2, q, uint, u, 64, 2, (uint64_t)UINT32_MAX);
+
+  TEST_VADDHN(INSN_NAME, int, s, 16, 8, 8);
+  TEST_VADDHN(INSN_NAME, int, s, 32, 16, 4);
+  TEST_VADDHN(INSN_NAME, int, s, 64, 32, 2);
+  TEST_VADDHN(INSN_NAME, uint, u, 16, 8, 8);
+  TEST_VADDHN(INSN_NAME, uint, u, 32, 16, 4);
+  TEST_VADDHN(INSN_NAME, uint, u, 64, 32, 2);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddl.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddl.c
new file mode 100644 (file)
index 0000000..861abec
--- /dev/null
@@ -0,0 +1,122 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                      0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x3, 0x3, 0x3, 0x3,
+                                       0x3, 0x3, 0x3, 0x3 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x37, 0x37, 0x37, 0x37 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x3, 0x3 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = {  0xffe3, 0xffe4, 0xffe5, 0xffe6,
+                                        0xffe7, 0xffe8, 0xffe9, 0xffea };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe2, 0xffffffe3,
+                                       0xffffffe4, 0xffffffe5 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe0,
+                                       0xffffffffffffffe1 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1e3, 0x1e4, 0x1e5, 0x1e6,
+                                        0x1e7, 0x1e8, 0x1e9, 0x1ea };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1ffe1, 0x1ffe2,
+                                        0x1ffe3, 0x1ffe4 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1ffffffe0, 0x1ffffffe1 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+#ifndef INSN_NAME
+#define INSN_NAME vaddl
+#define TEST_MSG "VADDL"
+#endif
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=vaddl(x1,x2), then store the result.  */
+#define TEST_VADDL1(INSN, T1, T2, W, W2, N)                            \
+  VECT_VAR(vector_res, T1, W2, N) =                                    \
+    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),                         \
+                  VECT_VAR(vector2, T1, W, N));                        \
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+#define TEST_VADDL(INSN, T1, T2, W, W2, N)     \
+  TEST_VADDL1(INSN, T1, T2, W, W2, N)
+
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+
+  DECL_VARIABLE(vector2, int, 8, 8);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, uint, 8, 8);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  VLOAD(vector, buffer, , int, s, 8, 8);
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, , uint, u, 8, 8);
+  VLOAD(vector, buffer, , uint, u, 16, 4);
+  VLOAD(vector, buffer, , uint, u, 32, 2);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, , int, s, 8, 8, -13);
+  VDUP(vector2, , int, s, 16, 4, -14);
+  VDUP(vector2, , int, s, 32, 2, -16);
+  VDUP(vector2, , uint, u, 8, 8, 0xf3);
+  VDUP(vector2, , uint, u, 16, 4, 0xfff1);
+  VDUP(vector2, , uint, u, 32, 2, 0xfffffff0);
+
+  /* Execute the tests.  */
+  TEST_VADDL(INSN_NAME, int, s, 8, 16, 8);
+  TEST_VADDL(INSN_NAME, int, s, 16, 32, 4);
+  TEST_VADDL(INSN_NAME, int, s, 32, 64, 2);
+  TEST_VADDL(INSN_NAME, uint, u, 8, 16, 8);
+  TEST_VADDL(INSN_NAME, uint, u, 16, 32, 4);
+  TEST_VADDL(INSN_NAME, uint, u, 32, 64, 2);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddw.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddw.c
new file mode 100644 (file)
index 0000000..5804cd7
--- /dev/null
@@ -0,0 +1,122 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                      0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x3, 0x3, 0x3, 0x3,
+                                       0x3, 0x3, 0x3, 0x3 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x37, 0x37, 0x37, 0x37 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x3, 0x3 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = {  0xffe3, 0xffe4, 0xffe5, 0xffe6,
+                                        0xffe7, 0xffe8, 0xffe9, 0xffea };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe2, 0xffffffe3,
+                                       0xffffffe4, 0xffffffe5 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe0,
+                                       0xffffffffffffffe1 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xe3, 0xe4, 0xe5, 0xe6,
+                                        0xe7, 0xe8, 0xe9, 0xea };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe1, 0xffe2,
+                                        0xffe3, 0xffe4 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe0, 0xffffffe1 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+#ifndef INSN_NAME
+#define INSN_NAME vaddw
+#define TEST_MSG "VADDW"
+#endif
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=vaddw(x1,x2), then store the result.  */
+#define TEST_VADDW1(INSN, T1, T2, W, W2, N)                            \
+  VECT_VAR(vector_res, T1, W2, N) =                                    \
+    INSN##_##T2##W(VECT_VAR(vector, T1, W2, N),                                \
+                  VECT_VAR(vector2, T1, W, N));                        \
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+#define TEST_VADDW(INSN, T1, T2, W, W2, N)     \
+  TEST_VADDW1(INSN, T1, T2, W, W2, N)
+
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector, uint, 64, 2);
+
+  DECL_VARIABLE(vector2, int, 8, 8);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, uint, 8, 8);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, int, s, 64, 2);
+  VLOAD(vector, buffer, q, uint, u, 16, 8);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 64, 2);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, , int, s, 8, 8, -13);
+  VDUP(vector2, , int, s, 16, 4, -14);
+  VDUP(vector2, , int, s, 32, 2, -16);
+  VDUP(vector2, , uint, u, 8, 8, 0xf3);
+  VDUP(vector2, , uint, u, 16, 4, 0xfff1);
+  VDUP(vector2, , uint, u, 32, 2, 0xfffffff0);
+
+  /* Execute the tests.  */
+  TEST_VADDW(INSN_NAME, int, s, 8, 16, 8);
+  TEST_VADDW(INSN_NAME, int, s, 16, 32, 4);
+  TEST_VADDW(INSN_NAME, int, s, 32, 64, 2);
+  TEST_VADDW(INSN_NAME, uint, u, 8, 16, 8);
+  TEST_VADDW(INSN_NAME, uint, u, 16, 32, 4);
+  TEST_VADDW(INSN_NAME, uint, u, 32, 64, 2);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vand.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vand.c
new file mode 100644 (file)
index 0000000..e7e65dd
--- /dev/null
@@ -0,0 +1,45 @@
+#define INSN_NAME vand
+#define TEST_MSG "VAND/VANDQ"
+
+#include "binary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x2, 0x2,
+                                      0x0, 0x0, 0x2, 0x2 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x1 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x60 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x10, 0x10, 0x10, 0x10,
+                                       0x14, 0x14, 0x14, 0x14 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x10, 0x10, 0x12, 0x12 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20, 0x20 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf0, 0xf2, 0xf2,
+                                       0xf4, 0xf4, 0xf6, 0xf6,
+                                       0xf0, 0xf0, 0xf2, 0xf2,
+                                       0xf4, 0xf4, 0xf6, 0xf6 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe0, 0xffe0, 0xffe0,
+                                       0xffe4, 0xffe4, 0xffe4, 0xffe4 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe0, 0xffffffe0,
+                                       0xffffffe2, 0xffffffe2 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x10, 0x10 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+                                        0x4, 0x4, 0x4, 0x4,
+                                        0x8, 0x8, 0x8, 0x8,
+                                        0xc, 0xc, 0xc, 0xc };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+                                        0x0, 0x1, 0x2, 0x3 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x30, 0x31, 0x32, 0x33 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x1 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbic.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbic.c
new file mode 100644 (file)
index 0000000..83e57ff
--- /dev/null
@@ -0,0 +1,46 @@
+#define INSN_NAME vbic
+#define TEST_MSG "VBIC/VBICQ"
+
+#include "binary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
+                                      0xf4, 0xf5, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x1, 0x2, 0x3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff90 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe1, 0xe2, 0xe3,
+                                       0xe0, 0xe1, 0xe2, 0xe3 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe0, 0xffe1, 0xffe0, 0xffe1 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffd0, 0xffffffd1 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x1, 0x0, 0x1,
+                                       0x0, 0x1, 0x0, 0x1,
+                                       0x8, 0x9, 0x8, 0x9,
+                                       0x8, 0x9, 0x8, 0x9 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0x11, 0x12, 0x13,
+                                       0x10, 0x11, 0x12, 0x13 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0x11, 0x10, 0x11 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe0, 0xffffffffffffffe1 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                        0xf0, 0xf1, 0xf2, 0xf3,
+                                        0xf0, 0xf1, 0xf2, 0xf3,
+                                        0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+                                        0xfff4, 0xfff4, 0xfff4, 0xfff4 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffc0, 0xffffffc0,
+                                        0xffffffc0, 0xffffffc0 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0,
+                                        0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c
new file mode 100644 (file)
index 0000000..bb17f0a
--- /dev/null
@@ -0,0 +1,124 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+                                      0xf6, 0xf6, 0xf6, 0xf6 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffd };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
+                                       0xf7, 0xf7, 0xf7, 0xf7 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffff1 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
+                                       0xf7, 0xf7, 0xf7, 0xf7 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800004, 0xc1700004 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+                                       0xf6, 0xf6, 0xf6, 0xf6,
+                                       0xf2, 0xf2, 0xf2, 0xf2,
+                                       0xf6, 0xf6, 0xf6, 0xf6 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2,
+                                       0xfff4, 0xfff4, 0xfff6, 0xfff6 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff0,
+                                       0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffffd,
+                                       0xfffffffffffffffd };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
+                                        0xf7, 0xf7, 0xf7, 0xf7,
+                                        0xf3, 0xf3, 0xf3, 0xf3,
+                                        0xf7, 0xf7, 0xf7, 0xf7 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2,
+                                        0xfff4, 0xfff4, 0xfff6, 0xfff6 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff0,
+                                        0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffff1,
+                                        0xfffffff1 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
+                                        0xf7, 0xf7, 0xf7, 0xf7,
+                                        0xf3, 0xf3, 0xf3, 0xf3,
+                                        0xf7, 0xf7, 0xf7, 0xf7 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2,
+                                        0xfff4, 0xfff4, 0xfff6, 0xfff6 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800001, 0xc1700001,
+                                          0xc1600001, 0xc1500001 };
+
+#define TEST_MSG "VBSL/VBSLQ"
+void exec_vbsl (void)
+{
+  /* Basic test: y=vbsl(unsigned_vec,x1,x2), then store the result.  */
+#define TEST_VBSL(T3, Q, T1, T2, W, N)                                 \
+  VECT_VAR(vector_res, T1, W, N) =                                     \
+    vbsl##Q##_##T2##W(VECT_VAR(vector_first, T3, W, N),                        \
+                     VECT_VAR(vector, T1, W, N),                       \
+                     VECT_VAR(vector2, T1, W, N));                     \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  DECL_VARIABLE_UNSIGNED_VARIANTS(vector_first);
+
+  clean_results ();
+
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+  VLOAD(vector, buffer, , float, f, 32, 2);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Choose init value arbitrarily, will be used for vector
+     comparison. As we want different values for each type variant, we
+     can't use generic initialization macros.  */
+  VDUP(vector2, , int, s, 8, 8, -10);
+  VDUP(vector2, , int, s, 16, 4, -14);
+  VDUP(vector2, , int, s, 32, 2, -30);
+  VDUP(vector2, , int, s, 64, 1, -33);
+  VDUP(vector2, , uint, u, 8, 8, 0xF3);
+  VDUP(vector2, , uint, u, 16, 4, 0xFFF2);
+  VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0);
+  VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFF3);
+  VDUP(vector2, , float, f, 32, 2, -30.3f);
+  VDUP(vector2, , poly, p, 8, 8, 0xF3);
+  VDUP(vector2, , poly, p, 16, 4, 0xFFF2);
+
+  VDUP(vector2, q, int, s, 8, 16, -10);
+  VDUP(vector2, q, int, s, 16, 8, -14);
+  VDUP(vector2, q, int, s, 32, 4, -30);
+  VDUP(vector2, q, int, s, 64, 2, -33);
+  VDUP(vector2, q, uint, u, 8, 16, 0xF3);
+  VDUP(vector2, q, uint, u, 16, 8, 0xFFF2);
+  VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFF0);
+  VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFF3);
+  VDUP(vector2, q, poly, p, 8, 16, 0xF3);
+  VDUP(vector2, q, poly, p, 16, 8, 0xFFF2);
+  VDUP(vector2, q, float, f, 32, 4, -30.4f);
+
+  VDUP(vector_first, , uint, u, 8, 8, 0xF4);
+  VDUP(vector_first, , uint, u, 16, 4, 0xFFF6);
+  VDUP(vector_first, , uint, u, 32, 2, 0xFFFFFFF2);
+  VDUP(vector_first, , uint, u, 64, 1, 0xFFFFFFF2);
+  VDUP(vector_first, q, uint, u, 8, 16, 0xF4);
+  VDUP(vector_first, q, uint, u, 16, 8, 0xFFF6);
+  VDUP(vector_first, q, uint, u, 32, 4, 0xFFFFFFF2);
+  VDUP(vector_first, q, uint, u, 64, 2, 0xFFFFFFF2);
+
+  /* Execute the tests.  */
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VBSL, uint);
+  TEST_VBSL(uint, , poly, p, 8, 8);
+  TEST_VBSL(uint, , poly, p, 16, 4);
+  TEST_VBSL(uint, q, poly, p, 8, 16);
+  TEST_VBSL(uint, q, poly, p, 16, 8);
+  TEST_VBSL(uint, , float, f, 32, 2);
+  TEST_VBSL(uint, q, float, f, 32, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vbsl ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcage.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcage.c
new file mode 100644 (file)
index 0000000..219d03f
--- /dev/null
@@ -0,0 +1,52 @@
+#define INSN_NAME vcage
+#define TEST_MSG "VCAGE/VCAGEQ"
+
+#include "cmp_fp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                      0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+                                       0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+                                       0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+                                       0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+                                        0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+                                        0xffffffff, 0x0 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+                                         0xffffffff, 0xffffffff };
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagt.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagt.c
new file mode 100644 (file)
index 0000000..ed62c85
--- /dev/null
@@ -0,0 +1,51 @@
+#define INSN_NAME vcagt
+#define TEST_MSG "VCAGT/VCAGTQ"
+
+#include "cmp_fp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                      0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+                                       0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+                                       0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+                                        0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+                                        0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+                                         0xffffffff, 0xffffffff };
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcale.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcale.c
new file mode 100644 (file)
index 0000000..cc83a39
--- /dev/null
@@ -0,0 +1,49 @@
+#define INSN_NAME vcale
+#define TEST_MSG "VCALE/VCALEQ"
+
+#include "cmp_fp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                      0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+                                       0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+                                       0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+                                        0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected2,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalt.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalt.c
new file mode 100644 (file)
index 0000000..a60536e
--- /dev/null
@@ -0,0 +1,49 @@
+#define INSN_NAME vcalt
+#define TEST_MSG "VCALT/VCALTQ"
+
+#include "cmp_fp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                      0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+                                       0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+                                       0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+                                        0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected2,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceq.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceq.c
new file mode 100644 (file)
index 0000000..aa095df
--- /dev/null
@@ -0,0 +1,113 @@
+#define INSN_NAME vceq
+#define TEST_MSG "VCEQ/VCEQQ"
+
+/* Extra tests for _p8 variants, which exist only for vceq.  */
+void exec_vceq_p8(void);
+#define EXTRA_TESTS exec_vceq_p8
+
+#include "cmp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                      0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0x0 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0x0 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+                                       0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+                                       0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+                                       0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+                                        0x0, 0x0, 0x0, 0x0,
+                                        0x0, 0x0, 0x0, 0x0,
+                                        0xff, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+                                        0x0, 0x0, 0xffff, 0x0 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0x0, 0x0, 0x0, 0xff,
+                                            0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_uint,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0x0 };
+VECT_VAR_DECL(expected_uint,uint,32,2) [] = { 0x0, 0xffffffff };
+
+VECT_VAR_DECL(expected_q_uint,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+                                               0xff, 0x0, 0x0, 0x0,
+                                               0, 0x0, 0x0, 0x0,
+                                               0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+                                               0x0, 0x0, 0xffff, 0x0 };
+VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 };
+
+VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0x0, 0xffffffff };
+VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 };
+
+VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0x0, 0xffffffff };
+VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0xffffffff, 0x0 };
+
+VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_inf,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_minf,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_inf2,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_mzero,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+
+VECT_VAR_DECL(expected_p8,uint,8,8) [] = { 0x0, 0x0, 0x0, 0xff,
+                                          0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_q_p8,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+                                             0xff, 0x0, 0x0, 0x0,
+                                             0x0, 0x0, 0x0, 0x0,
+                                             0x0, 0x0, 0x0, 0x0 };
+
+void exec_vceq_p8(void)
+{
+  DECL_VARIABLE(vector, poly, 8, 8);
+  DECL_VARIABLE(vector, poly, 8, 16);
+
+  DECL_VARIABLE(vector2, poly, 8, 8);
+  DECL_VARIABLE(vector2, poly, 8, 16);
+
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 8, 16);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, , poly, p, 8, 8);
+  VLOAD(vector, buffer, q, poly, p, 8, 16);
+
+  VDUP(vector2, , poly, p, 8, 8, 0xF3);
+  VDUP(vector2, q, poly, p, 8, 16, 0xF4);
+
+  TEST_VCOMP(INSN_NAME, , poly, p, uint, 8, 8);
+  TEST_VCOMP(INSN_NAME, q, poly, p, uint, 8, 16);
+
+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_p8, "p8");
+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_q_p8, "p8");
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcge.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcge.c
new file mode 100644 (file)
index 0000000..236fd82
--- /dev/null
@@ -0,0 +1,76 @@
+#define INSN_NAME vcge
+#define TEST_MSG "VCGE/VCGEQ"
+
+#include "cmp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                      0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0xffff };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+                                       0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+                                       0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+                                       0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+                                        0x0, 0x0, 0x0, 0x0,
+                                        0x0, 0x0, 0x0, 0x0,
+                                        0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+                                        0x0, 0x0, 0xffff, 0xffff };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0x0, 0x0, 0x0, 0xff,
+                                            0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected_uint,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0xffff };
+VECT_VAR_DECL(expected_uint,uint,32,2) [] = { 0x0, 0xffffffff };
+
+VECT_VAR_DECL(expected_q_uint,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+                                               0xff, 0xff, 0xff, 0xff,
+                                               0xff, 0xff, 0xff, 0xff,
+                                               0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+                                               0, 0x0, 0xffff, 0xffff };
+VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff };
+
+VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0x0, 0xffffffff };
+VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff };
+
+VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0x0, 0xffffffff };
+VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+
+VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_inf,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_minf,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected_inf2,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+
+VECT_VAR_DECL(expected_mzero,uint,32,2) [] = { 0xffffffff, 0xffffffff };
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgt.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgt.c
new file mode 100644 (file)
index 0000000..23aaa01
--- /dev/null
@@ -0,0 +1,76 @@
+#define INSN_NAME vcgt
+#define TEST_MSG "VCGT/VCGTQ"
+
+#include "cmp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                      0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0xffff };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+                                       0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+                                       0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+                                       0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+                                        0x0, 0x0, 0x0, 0x0,
+                                        0x0, 0x0, 0x0, 0x0,
+                                        0x0, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+                                        0x0, 0x0, 0x0, 0xffff };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
+                                            0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected_uint,uint,16,4) [] = { 0x0, 0x0, 0x0, 0xffff };
+VECT_VAR_DECL(expected_uint,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_q_uint,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+                                               0x0, 0xff, 0xff, 0xff,
+                                               0xff, 0xff, 0xff, 0xff,
+                                               0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+                                               0x0, 0x0, 0x0, 0xffff };
+VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff };
+
+VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff };
+
+VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0x0, 0xffffffff };
+VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0x0, 0xffffffff };
+
+VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_inf,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_minf,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected_inf2,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+
+VECT_VAR_DECL(expected_mzero,uint,32,2) [] = { 0x0, 0x0 };
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcle.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcle.c
new file mode 100644 (file)
index 0000000..e4cad0c
--- /dev/null
@@ -0,0 +1,80 @@
+#define INSN_NAME vcle
+#define TEST_MSG "VCLE/VCLEQ"
+
+#include "cmp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                      0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
+                                       0xff, 0xff, 0xff, 0x0 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0x0 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+                                       0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+                                       0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+                                       0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
+                                        0xff, 0xff, 0xff, 0xff,
+                                        0xff, 0xff, 0xff, 0xff,
+                                        0xff, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
+                                        0xffff, 0xffff, 0xffff, 0x0 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+                                        0xffffffff, 0x0 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
+                                            0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_uint,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0x0 };
+VECT_VAR_DECL(expected_uint,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+
+VECT_VAR_DECL(expected_q_uint,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
+                                               0xff, 0x0, 0x0, 0x0,
+                                               0x0, 0x0, 0x0, 0x0,
+                                               0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
+                                               0xffff, 0xffff, 0xffff, 0x0 };
+VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+                                               0xffffffff, 0x0 };
+
+VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+                                                0xffffffff, 0x0 };
+
+VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0xffffffff, 0x0 };
+
+VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_inf,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected_minf,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_inf2,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_mzero,uint,32,2) [] = { 0xffffffff, 0xffffffff };
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclt.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclt.c
new file mode 100644 (file)
index 0000000..d437eae
--- /dev/null
@@ -0,0 +1,79 @@
+#define INSN_NAME vclt
+#define TEST_MSG "VCLT/VCLTQ"
+
+#include "cmp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                      0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
+                                       0xff, 0xff, 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+                                       0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+                                       0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+                                       0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
+                                        0xff, 0xff, 0xff, 0xff,
+                                        0xff, 0xff, 0xff, 0xff,
+                                        0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
+                                        0xffff, 0xffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0xff, 0xff, 0xff, 0x0,
+                                            0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_uint,uint,16,4) [] = { 0xffff, 0xffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected_uint,uint,32,2) [] = { 0xffffffff, 0x0 };
+
+VECT_VAR_DECL(expected_q_uint,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
+                                               0x0, 0x0, 0x0, 0x0,
+                                               0x0, 0x0, 0x0, 0x0,
+                                               0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
+                                               0xffff, 0xffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+                                               0x0, 0x0 };
+
+VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+                                                0x0, 0x0 };
+
+VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_inf,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected_minf,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_inf2,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_mzero,uint,32,2) [] = { 0x0, 0x0 };
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclz.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclz.c
new file mode 100644 (file)
index 0000000..ad28d2d
--- /dev/null
@@ -0,0 +1,194 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x3, 0x3, 0x3, 0x3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x11, 0x11 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x5, 0x5 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2,
+                                       0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x3, 0x3, 0x3, 0x3 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+                                       0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3,
+                                        0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xd, 0xd, 0xd, 0xd,
+                                        0xd, 0xd, 0xd, 0xd };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1f, 0x1f, 0x1f, 0x1f };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+
+/* Expected results with input=0.  */
+VECT_VAR_DECL(expected_with_0,int,8,8) [] = { 0x8, 0x8, 0x8, 0x8,
+                                             0x8, 0x8, 0x8, 0x8 };
+VECT_VAR_DECL(expected_with_0,int,16,4) [] = { 0x10, 0x10, 0x10, 0x10 };
+VECT_VAR_DECL(expected_with_0,int,32,2) [] = { 0x20, 0x20 };
+VECT_VAR_DECL(expected_with_0,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_with_0,uint,8,8) [] = { 0x8, 0x8, 0x8, 0x8,
+                                              0x8, 0x8, 0x8, 0x8 };
+VECT_VAR_DECL(expected_with_0,uint,16,4) [] = { 0x10, 0x10, 0x10, 0x10 };
+VECT_VAR_DECL(expected_with_0,uint,32,2) [] = { 0x20, 0x20 };
+VECT_VAR_DECL(expected_with_0,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_with_0,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_with_0,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_with_0,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_with_0,int,8,16) [] = { 0x8, 0x8, 0x8, 0x8,
+                                              0x8, 0x8, 0x8, 0x8,
+                                              0x8, 0x8, 0x8, 0x8,
+                                              0x8, 0x8, 0x8, 0x8 };
+VECT_VAR_DECL(expected_with_0,int,16,8) [] = { 0x10, 0x10, 0x10, 0x10,
+                                              0x10, 0x10, 0x10, 0x10 };
+VECT_VAR_DECL(expected_with_0,int,32,4) [] = { 0x20, 0x20, 0x20, 0x20 };
+VECT_VAR_DECL(expected_with_0,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_with_0,uint,8,16) [] = { 0x8, 0x8, 0x8, 0x8,
+                                               0x8, 0x8, 0x8, 0x8,
+                                               0x8, 0x8, 0x8, 0x8,
+                                               0x8, 0x8, 0x8, 0x8 };
+VECT_VAR_DECL(expected_with_0,uint,16,8) [] = { 0x10, 0x10, 0x10, 0x10,
+                                               0x10, 0x10, 0x10, 0x10 };
+VECT_VAR_DECL(expected_with_0,uint,32,4) [] = { 0x20, 0x20, 0x20, 0x20 };
+VECT_VAR_DECL(expected_with_0,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_with_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_with_0,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                               0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_with_0,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                                 0x33333333, 0x33333333 };
+
+#define INSN_NAME vclz
+#define TEST_MSG "VCLZ/VCLZQ"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=vclz(x), then store the result.  */
+#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)                          \
+  VECT_VAR(vector_res, T1, W, N) =                                     \
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N));                     \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N)   \
+  TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)                \
+
+  /* No need for 64 bits variants */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, uint, 8, 16);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 8, 16);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  clean_results ();
+
+  /* Fill input vector with arbitrary values.  */
+  VDUP(vector, , int, s, 8, 8, 0x84);
+  VDUP(vector, , int, s, 16, 4, 0x1234);
+  VDUP(vector, , int, s, 32, 2, 0x5678);
+  VDUP(vector, , uint, u, 8, 8, 0x34);
+  VDUP(vector, , uint, u, 16, 4, 0x8234);
+  VDUP(vector, , uint, u, 32, 2, 0x7654321);
+  VDUP(vector, q, int, s, 8, 16, 0x34);
+  VDUP(vector, q, int, s, 16, 8, 0x1234);
+  VDUP(vector, q, int, s, 32, 4, 0x12345678);
+  VDUP(vector, q, uint, u, 8, 16, 0x13);
+  VDUP(vector, q, uint, u, 16, 8, 0x4);
+  VDUP(vector, q, uint, u, 32, 4, 0x1);
+
+  /* Apply a unary operator named INSN_NAME.  */
+  TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 32, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+
+  /* Test with zero as input.  */
+  VDUP(vector, , int, s, 8, 8, 0);
+  VDUP(vector, , int, s, 16, 4, 0);
+  VDUP(vector, , int, s, 32, 2, 0);
+  VDUP(vector, , uint, u, 8, 8, 0);
+  VDUP(vector, , uint, u, 16, 4, 0);
+  VDUP(vector, , uint, u, 32, 2, 0);
+  VDUP(vector, q, int, s, 8, 16, 0);
+  VDUP(vector, q, int, s, 16, 8, 0);
+  VDUP(vector, q, int, s, 32, 4, 0);
+  VDUP(vector, q, uint, u, 8, 16, 0);
+  VDUP(vector, q, uint, u, 16, 8, 0);
+  VDUP(vector, q, uint, u, 32, 4, 0);
+
+  /* Apply a unary operator named INSN_NAME.  */
+  TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 32, 4);
+
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_with_0, " (input=0)");
+}
+
+int main (void)
+{
+  exec_vclz ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
new file mode 100644 (file)
index 0000000..b5132f4
--- /dev/null
@@ -0,0 +1,253 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* We test vdup and vmov in the same place since they are aliases.  */
+
+/* Expected results.  */
+/* Chunk 0.  */
+VECT_VAR_DECL(expected0,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+                                       0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected0,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,uint,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+                                        0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,uint,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+                                        0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
+VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+                                        0xf0, 0xf0, 0xf0, 0xf0,
+                                        0xf0, 0xf0, 0xf0, 0xf0,
+                                        0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,int,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+                                        0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,int,32,4) [] = { 0xfffffff0, 0xfffffff0,
+                                        0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected0,int,64,2) [] = { 0xfffffffffffffff0,
+                                        0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+                                         0xf0, 0xf0, 0xf0, 0xf0,
+                                         0xf0, 0xf0, 0xf0, 0xf0,
+                                         0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+                                         0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,uint,32,4) [] = { 0xfffffff0, 0xfffffff0,
+                                         0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected0,uint,64,2) [] = { 0xfffffffffffffff0,
+                                         0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+                                         0xf0, 0xf0, 0xf0, 0xf0,
+                                         0xf0, 0xf0, 0xf0, 0xf0,
+                                         0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+                                         0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000,
+                                           0xc1800000, 0xc1800000 };
+
+/* Chunk 1.  */
+VECT_VAR_DECL(expected1,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+                                       0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,int,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,int,32,2) [] = { 0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected1,int,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+                                        0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+                                        0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
+VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+                                        0xf1, 0xf1, 0xf1, 0xf1,
+                                        0xf1, 0xf1, 0xf1, 0xf1,
+                                        0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,int,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
+                                        0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
+                                        0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected1,int,64,2) [] = { 0xfffffffffffffff1,
+                                        0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,uint,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+                                         0xf1, 0xf1, 0xf1, 0xf1,
+                                         0xf1, 0xf1, 0xf1, 0xf1,
+                                         0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
+                                         0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,uint,32,4) [] = { 0xfffffff1, 0xfffffff1,
+                                         0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected1,uint,64,2) [] = { 0xfffffffffffffff1,
+                                         0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+                                         0xf1, 0xf1, 0xf1, 0xf1,
+                                         0xf1, 0xf1, 0xf1, 0xf1,
+                                         0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
+                                         0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
+                                           0xc1700000, 0xc1700000 };
+
+/* Chunk 2.  */
+VECT_VAR_DECL(expected2,int,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+                                       0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,int,32,2) [] = { 0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected2,int,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+                                        0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+                                        0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
+VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+                                        0xf2, 0xf2, 0xf2, 0xf2,
+                                        0xf2, 0xf2, 0xf2, 0xf2,
+                                        0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,int,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
+                                        0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff2, 0xfffffff2,
+                                        0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff2,
+                                        0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,uint,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+                                         0xf2, 0xf2, 0xf2, 0xf2,
+                                         0xf2, 0xf2, 0xf2, 0xf2,
+                                         0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
+                                         0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff2, 0xfffffff2,
+                                         0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff2,
+                                         0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+                                         0xf2, 0xf2, 0xf2, 0xf2,
+                                         0xf2, 0xf2, 0xf2, 0xf2,
+                                         0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
+                                         0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
+                                           0xc1600000, 0xc1600000 };
+
+#define TEST_MSG "VDUP/VDUPQ"
+void exec_vdup_vmov (void)
+{
+  int i;
+
+  /* Basic test: vec=vdup(x), then store the result.  */
+#undef TEST_VDUP
+#define TEST_VDUP(Q, T1, T2, W, N)                                     \
+  VECT_VAR(vector, T1, W, N) =                                         \
+    vdup##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]);            \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N))
+
+  /* Basic test: vec=vmov(x), then store the result.  */
+#define TEST_VMOV(Q, T1, T2, W, N)                                     \
+  VECT_VAR(vector, T1, W, N) =                                         \
+    vmov##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]);            \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N))
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+
+  /* Try to read different places from the input buffer.  */
+  for (i=0; i< 3; i++) {
+    clean_results ();
+
+    TEST_VDUP(, int, s, 8, 8);
+    TEST_VDUP(, int, s, 16, 4);
+    TEST_VDUP(, int, s, 32, 2);
+    TEST_VDUP(, int, s, 64, 1);
+    TEST_VDUP(, uint, u, 8, 8);
+    TEST_VDUP(, uint, u, 16, 4);
+    TEST_VDUP(, uint, u, 32, 2);
+    TEST_VDUP(, uint, u, 64, 1);
+    TEST_VDUP(, poly, p, 8, 8);
+    TEST_VDUP(, poly, p, 16, 4);
+    TEST_VDUP(, float, f, 32, 2);
+
+    TEST_VDUP(q, int, s, 8, 16);
+    TEST_VDUP(q, int, s, 16, 8);
+    TEST_VDUP(q, int, s, 32, 4);
+    TEST_VDUP(q, int, s, 64, 2);
+    TEST_VDUP(q, uint, u, 8, 16);
+    TEST_VDUP(q, uint, u, 16, 8);
+    TEST_VDUP(q, uint, u, 32, 4);
+    TEST_VDUP(q, uint, u, 64, 2);
+    TEST_VDUP(q, poly, p, 8, 16);
+    TEST_VDUP(q, poly, p, 16, 8);
+    TEST_VDUP(q, float, f, 32, 4);
+
+    switch (i) {
+    case 0:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+      break;
+    case 1:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+      break;
+    case 2:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+      break;
+    default:
+      abort();
+    }
+  }
+
+  /* Do the same tests with vmov. Use the same expected results.  */
+#undef TEST_MSG
+#define TEST_MSG "VMOV/VMOVQ"
+  for (i=0; i< 3; i++) {
+    clean_results ();
+
+    TEST_VMOV(, int, s, 8, 8);
+    TEST_VMOV(, int, s, 16, 4);
+    TEST_VMOV(, int, s, 32, 2);
+    TEST_VMOV(, int, s, 64, 1);
+    TEST_VMOV(, uint, u, 8, 8);
+    TEST_VMOV(, uint, u, 16, 4);
+    TEST_VMOV(, uint, u, 32, 2);
+    TEST_VMOV(, uint, u, 64, 1);
+    TEST_VMOV(, poly, p, 8, 8);
+    TEST_VMOV(, poly, p, 16, 4);
+    TEST_VMOV(, float, f, 32, 2);
+
+    TEST_VMOV(q, int, s, 8, 16);
+    TEST_VMOV(q, int, s, 16, 8);
+    TEST_VMOV(q, int, s, 32, 4);
+    TEST_VMOV(q, int, s, 64, 2);
+    TEST_VMOV(q, uint, u, 8, 16);
+    TEST_VMOV(q, uint, u, 16, 8);
+    TEST_VMOV(q, uint, u, 32, 4);
+    TEST_VMOV(q, uint, u, 64, 2);
+    TEST_VMOV(q, poly, p, 8, 16);
+    TEST_VMOV(q, poly, p, 16, 8);
+    TEST_VMOV(q, float, f, 32, 4);
+
+    switch (i) {
+    case 0:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+      break;
+    case 1:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+      break;
+    case 2:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+      break;
+    default:
+      abort();
+    }
+  }
+}
+
+int main (void)
+{
+  exec_vdup_vmov ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/veor.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/veor.c
new file mode 100644 (file)
index 0000000..474b225
--- /dev/null
@@ -0,0 +1,47 @@
+#define INSN_NAME veor
+#define TEST_MSG "VEOR/VEORQ"
+
+#include "binary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf3, 0xf0, 0xf1,
+                                      0xf6, 0xf7, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff3, 0xfffffff2 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff94 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe4, 0xe5, 0xe6, 0xe7,
+                                       0xe0, 0xe1, 0xe2, 0xe3 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffee, 0xffef, 0xffec, 0xffed };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffd8, 0xffffffd9 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x6, 0x7, 0x4, 0x5,
+                                       0x2, 0x3, 0x0, 0x1,
+                                       0xe, 0xf, 0xc, 0xd,
+                                       0xa, 0xb, 0x8, 0x9 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x1c, 0x1d, 0x1e, 0x1f,
+                                       0x18, 0x19, 0x1a, 0x1b };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x12, 0x13, 0x10, 0x11 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe8,
+                                       0xffffffffffffffe9 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff,
+                                        0xf8, 0xf9, 0xfa, 0xfb,
+                                        0xf4, 0xf5, 0xf6, 0xf7,
+                                        0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0,
+                                        0xfff7, 0xfff6, 0xfff5, 0xfff4 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffc7, 0xffffffc6,
+                                        0xffffffc5, 0xffffffc4 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3,
+                                        0xfffffffffffffff2 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c
new file mode 100644 (file)
index 0000000..ced9d73
--- /dev/null
@@ -0,0 +1,75 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                      0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                       0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                       0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                       0xf4, 0xf5, 0xf6, 0xf7,
+                                       0xf8, 0xf9, 0xfa, 0xfb,
+                                       0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+                                       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+                                       0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0,
+                                       0xfffffffffffffff1 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                        0xf4, 0xf5, 0xf6, 0xf7,
+                                        0xf8, 0xf9, 0xfa, 0xfb,
+                                        0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2,
+                                        0xfff3, 0xfff4, 0xfff5,
+                                        0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+                                        0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0,
+                                        0xfffffffffffffff1 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                        0xf4, 0xf5, 0xf6, 0xf7,
+                                        0xf8, 0xf9, 0xfa, 0xfb,
+                                        0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+                                        0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+                                          0xc1600000, 0xc1500000 };
+
+#define TEST_MSG "VLD1/VLD1Q"
+void exec_vld1 (void)
+{
+  /* Basic test vec=vld1(buffer); then store vec: vst1(result, vector).  */
+  /* This test actually tests vdl1 and vst1 at the same time.  */
+#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N)                           \
+  VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)); \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N))
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+
+  clean_results ();
+
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1, vector, buffer);
+
+  TEST_VLD1(vector, buffer, , float, f, 32, 2);
+  TEST_VLD1(vector, buffer, q, float, f, 32, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vld1 ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c
new file mode 100644 (file)
index 0000000..0e05274
--- /dev/null
@@ -0,0 +1,180 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+/* Chunk 0.  */
+VECT_VAR_DECL(expected0,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+                                       0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected0,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,uint,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+                                        0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,uint,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+                                        0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
+VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+                                        0xf0, 0xf0, 0xf0, 0xf0,
+                                        0xf0, 0xf0, 0xf0, 0xf0,
+                                        0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,int,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+                                        0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,int,32,4) [] = { 0xfffffff0, 0xfffffff0,
+                                        0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected0,int,64,2) [] = { 0xfffffffffffffff0,
+                                        0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+                                         0xf0, 0xf0, 0xf0, 0xf0,
+                                         0xf0, 0xf0, 0xf0, 0xf0,
+                                         0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+                                         0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,uint,32,4) [] = { 0xfffffff0, 0xfffffff0,
+                                         0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected0,uint,64,2) [] = { 0xfffffffffffffff0,
+                                         0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+                                         0xf0, 0xf0, 0xf0, 0xf0,
+                                         0xf0, 0xf0, 0xf0, 0xf0,
+                                         0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+                                         0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000,
+                                           0xc1800000, 0xc1800000 };
+
+/* Chunk 1.  */
+VECT_VAR_DECL(expected1,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+                                       0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,int,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,int,32,2) [] = { 0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected1,int,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+                                        0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+                                        0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
+VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+                                        0xf1, 0xf1, 0xf1, 0xf1,
+                                        0xf1, 0xf1, 0xf1, 0xf1,
+                                        0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,int,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
+                                        0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
+                                        0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected1,int,64,2) [] = { 0xfffffffffffffff1,
+                                        0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,uint,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+                                         0xf1, 0xf1, 0xf1, 0xf1,
+                                         0xf1, 0xf1, 0xf1, 0xf1,
+                                         0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
+                                         0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,uint,32,4) [] = { 0xfffffff1, 0xfffffff1,
+                                         0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected1,uint,64,2) [] = { 0xfffffffffffffff1,
+                                         0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+                                         0xf1, 0xf1, 0xf1, 0xf1,
+                                         0xf1, 0xf1, 0xf1, 0xf1,
+                                         0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
+                                         0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
+                                           0xc1700000, 0xc1700000 };
+
+/* Chunk 2.  */
+VECT_VAR_DECL(expected2,int,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+                                       0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,int,32,2) [] = { 0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected2,int,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+                                        0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+                                        0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
+VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+                                        0xf2, 0xf2, 0xf2, 0xf2,
+                                        0xf2, 0xf2, 0xf2, 0xf2,
+                                        0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,int,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
+                                        0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff2, 0xfffffff2,
+                                        0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff2,
+                                        0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,uint,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+                                         0xf2, 0xf2, 0xf2, 0xf2,
+                                         0xf2, 0xf2, 0xf2, 0xf2,
+                                         0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
+                                         0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff2, 0xfffffff2,
+                                         0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff2,
+                                         0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+                                         0xf2, 0xf2, 0xf2, 0xf2,
+                                         0xf2, 0xf2, 0xf2, 0xf2,
+                                         0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
+                                         0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
+                                           0xc1600000, 0xc1600000 };
+
+#define TEST_MSG "VLD1_DUP/VLD1_DUPQ"
+void exec_vld1_dup (void)
+{
+  int i;
+
+  /* Fill vector with buffer item #i.  */
+#define TEST_VLD1_DUP(VAR, BUF, Q, T1, T2, W, N)                       \
+  VECT_VAR(VAR, T1, W, N) =                                            \
+    vld1##Q##_dup_##T2##W(&VECT_VAR(BUF, T1, W, N)[i]);                        \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N))
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+
+  /* Try to read different places from the input buffer.  */
+  for (i=0; i<3; i++) {
+    clean_results ();
+
+    TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1_DUP, vector, buffer_dup);
+
+    TEST_VLD1_DUP(vector, buffer_dup, , float, f, 32, 2);
+    TEST_VLD1_DUP(vector, buffer_dup, q, float, f, 32, 4);
+
+    switch (i) {
+    case 0:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+      break;
+    case 1:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+      break;
+    case 2:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+      break;
+    default:
+      abort();
+    }
+  }
+}
+
+int main (void)
+{
+  exec_vld1_dup ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c
new file mode 100644 (file)
index 0000000..fe00640
--- /dev/null
@@ -0,0 +1,692 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+
+/* vld2/chunk 0.  */
+VECT_VAR_DECL(expected_vld2_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                             0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld2_0,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld2_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld2_0,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld2_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                              0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld2_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld2_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                              0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                              0xf4, 0xf5, 0xf6, 0xf7,
+                                              0xf8, 0xf9, 0xfa, 0xfb,
+                                              0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+                                              0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld2_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+                                              0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld2_0,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                               0xf4, 0xf5, 0xf6, 0xf7,
+                                               0xf8, 0xf9, 0xfa, 0xfb,
+                                               0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld2_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+                                               0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld2_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+                                               0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld2_0,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                               0xf4, 0xf5, 0xf6, 0xf7,
+                                               0xf8, 0xf9, 0xfa, 0xfb,
+                                               0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld2_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+                                               0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld2_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+                                                 0xc1600000, 0xc1500000 };
+
+/* vld2/chunk 1.  */
+VECT_VAR_DECL(expected_vld2_1,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+                                             0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld2_1,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld2_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld2_1,int,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld2_1,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+                                              0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld2_1,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld2_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld2_1,uint,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+                                              0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
+VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+                                              0x4, 0x5, 0x6, 0x7,
+                                              0x8, 0x9, 0xa, 0xb,
+                                              0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+                                              0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld2_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5,
+                                              0xfffffff6, 0xfffffff7 };
+VECT_VAR_DECL(expected_vld2_1,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+                                               0x4, 0x5, 0x6, 0x7,
+                                               0x8, 0x9, 0xa, 0xb,
+                                               0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld2_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+                                               0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld2_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5,
+                                               0xfffffff6, 0xfffffff7 };
+VECT_VAR_DECL(expected_vld2_1,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+                                               0x4, 0x5, 0x6, 0x7,
+                                               0x8, 0x9, 0xa, 0xb,
+                                               0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld2_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+                                               0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld2_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
+                                                 0xc1200000, 0xc1100000 };
+
+/* vld3/chunk 0.  */
+VECT_VAR_DECL(expected_vld3_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                             0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld3_0,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld3_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld3_0,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld3_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                              0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld3_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld3_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld3_0,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                              0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                              0xf4, 0xf5, 0xf6, 0xf7,
+                                              0xf8, 0xf9, 0xfa, 0xfb,
+                                              0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+                                              0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld3_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+                                              0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld3_0,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                               0xf4, 0xf5, 0xf6, 0xf7,
+                                               0xf8, 0xf9, 0xfa, 0xfb,
+                                               0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld3_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+                                               0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld3_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+                                               0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld3_0,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                               0xf4, 0xf5, 0xf6, 0xf7,
+                                               0xf8, 0xf9, 0xfa, 0xfb,
+                                               0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld3_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+                                               0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld3_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+                                                 0xc1600000, 0xc1500000 };
+
+/* vld3/chunk 1.  */
+VECT_VAR_DECL(expected_vld3_1,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+                                             0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld3_1,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld3_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld3_1,int,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld3_1,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+                                              0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld3_1,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld3_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld3_1,uint,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+                                              0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
+VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+                                              0x4, 0x5, 0x6, 0x7,
+                                              0x8, 0x9, 0xa, 0xb,
+                                              0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+                                              0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld3_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5,
+                                              0xfffffff6, 0xfffffff7 };
+VECT_VAR_DECL(expected_vld3_1,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+                                               0x4, 0x5, 0x6, 0x7,
+                                               0x8, 0x9, 0xa, 0xb,
+                                               0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld3_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+                                               0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld3_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5,
+                                               0xfffffff6, 0xfffffff7 };
+VECT_VAR_DECL(expected_vld3_1,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+                                               0x4, 0x5, 0x6, 0x7,
+                                               0x8, 0x9, 0xa, 0xb,
+                                               0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld3_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+                                               0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld3_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
+                                                 0xc1200000, 0xc1100000 };
+
+/* vld3/chunk 2.  */
+VECT_VAR_DECL(expected_vld3_2,int,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
+                                             0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld3_2,int,16,4) [] = { 0xfff8, 0xfff9,
+                                              0xfffa, 0xfffb };
+VECT_VAR_DECL(expected_vld3_2,int,32,2) [] = { 0xfffffff4, 0xfffffff5 };
+VECT_VAR_DECL(expected_vld3_2,int,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld3_2,uint,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
+                                              0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld3_2,uint,16,4) [] = { 0xfff8, 0xfff9,
+                                               0xfffa, 0xfffb };
+VECT_VAR_DECL(expected_vld3_2,uint,32,2) [] = { 0xfffffff4, 0xfffffff5 };
+VECT_VAR_DECL(expected_vld3_2,uint,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
+                                              0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff8, 0xfff9,
+                                               0xfffa, 0xfffb };
+VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
+VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
+                                              0x14, 0x15, 0x16, 0x17,
+                                              0x18, 0x19, 0x1a, 0x1b,
+                                              0x1c, 0x1d, 0x1e, 0x1f };
+VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+                                              0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld3_2,int,32,4) [] = { 0xfffffff8, 0xfffffff9,
+                                              0xfffffffa, 0xfffffffb };
+VECT_VAR_DECL(expected_vld3_2,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_2,uint,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
+                                               0x14, 0x15, 0x16, 0x17,
+                                               0x18, 0x19, 0x1a, 0x1b,
+                                               0x1c, 0x1d, 0x1e, 0x1f };
+VECT_VAR_DECL(expected_vld3_2,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+                                               0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld3_2,uint,32,4) [] = { 0xfffffff8, 0xfffffff9,
+                                               0xfffffffa, 0xfffffffb };
+VECT_VAR_DECL(expected_vld3_2,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
+                                               0x14, 0x15, 0x16, 0x17,
+                                               0x18, 0x19, 0x1a, 0x1b,
+                                               0x1c, 0x1d, 0x1e, 0x1f };
+VECT_VAR_DECL(expected_vld3_2,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+                                               0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld3_2,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000,
+                                                 0xc0c00000, 0xc0a00000 };
+
+/* vld4/chunk 0.  */
+VECT_VAR_DECL(expected_vld4_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                             0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld4_0,int,16,4) [] = { 0xfff0, 0xfff1,
+                                              0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld4_0,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld4_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                              0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld4_0,uint,16,4) [] = { 0xfff0, 0xfff1,
+                                               0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                              0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_vld4_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                              0xf4, 0xf5, 0xf6, 0xf7,
+                                              0xf8, 0xf9, 0xfa, 0xfb,
+                                              0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+                                              0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld4_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+                                              0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_0,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                               0xf4, 0xf5, 0xf6, 0xf7,
+                                               0xf8, 0xf9, 0xfa, 0xfb,
+                                               0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld4_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+                                               0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld4_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+                                               0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_0,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                               0xf4, 0xf5, 0xf6, 0xf7,
+                                               0xf8, 0xf9, 0xfa, 0xfb,
+                                               0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld4_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+                                               0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld4_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+                                                 0xc1600000, 0xc1500000 };
+
+/* vld4/chunk 1.  */
+VECT_VAR_DECL(expected_vld4_1,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+                                             0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld4_1,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld4_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_1,int,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld4_1,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+                                              0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld4_1,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld4_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+                                              0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
+VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+                                              0x4, 0x5, 0x6, 0x7,
+                                              0x8, 0x9, 0xa, 0xb,
+                                              0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+                                              0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld4_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5,
+                                              0xfffffff6, 0xfffffff7 };
+VECT_VAR_DECL(expected_vld4_1,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+                                               0x4, 0x5, 0x6, 0x7,
+                                               0x8, 0x9, 0xa, 0xb,
+                                               0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+                                               0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld4_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5,
+                                               0xfffffff6, 0xfffffff7 };
+VECT_VAR_DECL(expected_vld4_1,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+                                               0x4, 0x5, 0x6, 0x7,
+                                               0x8, 0x9, 0xa, 0xb,
+                                               0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+                                               0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld4_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
+                                                 0xc1200000, 0xc1100000 };
+
+/* vld4/chunk 2.  */
+VECT_VAR_DECL(expected_vld4_2,int,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
+                                             0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld4_2,int,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
+VECT_VAR_DECL(expected_vld4_2,int,32,2) [] = { 0xfffffff4, 0xfffffff5 };
+VECT_VAR_DECL(expected_vld4_2,int,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld4_2,uint,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
+                                              0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld4_2,uint,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
+VECT_VAR_DECL(expected_vld4_2,uint,32,2) [] = { 0xfffffff4, 0xfffffff5 };
+VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
+                                              0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
+VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
+VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
+                                              0x14, 0x15, 0x16, 0x17,
+                                              0x18, 0x19, 0x1a, 0x1b,
+                                              0x1c, 0x1d, 0x1e, 0x1f };
+VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+                                              0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld4_2,int,32,4) [] = { 0xfffffff8, 0xfffffff9,
+                                              0xfffffffa, 0xfffffffb };
+VECT_VAR_DECL(expected_vld4_2,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_2,uint,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
+                                               0x14, 0x15, 0x16, 0x17,
+                                               0x18, 0x19, 0x1a, 0x1b,
+                                               0x1c, 0x1d, 0x1e, 0x1f };
+VECT_VAR_DECL(expected_vld4_2,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+                                               0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld4_2,uint,32,4) [] = { 0xfffffff8, 0xfffffff9,
+                                               0xfffffffa, 0xfffffffb };
+VECT_VAR_DECL(expected_vld4_2,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
+                                               0x14, 0x15, 0x16, 0x17,
+                                               0x18, 0x19, 0x1a, 0x1b,
+                                               0x1c, 0x1d, 0x1e, 0x1f };
+VECT_VAR_DECL(expected_vld4_2,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+                                               0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld4_2,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000,
+                                                 0xc0c00000, 0xc0a00000 };
+
+/* vld4/chunk 3.  */
+VECT_VAR_DECL(expected_vld4_3,int,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
+                                             0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_3,int,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld4_3,int,32,2) [] = { 0xfffffff6, 0xfffffff7 };
+VECT_VAR_DECL(expected_vld4_3,int,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld4_3,uint,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
+                                              0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_3,uint,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld4_3,uint,32,2) [] = { 0xfffffff6, 0xfffffff7 };
+VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
+                                              0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1200000, 0xc1100000 };
+VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
+                                              0x24, 0x25, 0x26, 0x27,
+                                              0x28, 0x29, 0x2a, 0x2b,
+                                              0x2c, 0x2d, 0x2e, 0x2f };
+VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0x8, 0x9, 0xa, 0xb,
+                                              0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_3,int,32,4) [] = { 0xfffffffc, 0xfffffffd,
+                                              0xfffffffe, 0xffffffff };
+VECT_VAR_DECL(expected_vld4_3,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_3,uint,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
+                                               0x24, 0x25, 0x26, 0x27,
+                                               0x28, 0x29, 0x2a, 0x2b,
+                                               0x2c, 0x2d, 0x2e, 0x2f };
+VECT_VAR_DECL(expected_vld4_3,uint,16,8) [] = { 0x8, 0x9, 0xa, 0xb,
+                                               0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_3,uint,32,4) [] = { 0xfffffffc, 0xfffffffd,
+                                               0xfffffffe, 0xffffffff };
+VECT_VAR_DECL(expected_vld4_3,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_3,poly,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
+                                               0x24, 0x25, 0x26, 0x27,
+                                               0x28, 0x29, 0x2a, 0x2b,
+                                               0x2c, 0x2d, 0x2e, 0x2f };
+VECT_VAR_DECL(expected_vld4_3,poly,16,8) [] = { 0x8, 0x9, 0xa, 0xb,
+                                               0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_3,hfloat,32,4) [] = { 0xc0800000, 0xc0400000,
+                                                 0xc0000000, 0xbf800000 };
+
+void exec_vldX (void)
+{
+  /* In this case, input variables are arrays of vectors.  */
+#define DECL_VLDX(T1, W, N, X)                                         \
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X);    \
+  VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+  /* We need to use a temporary result buffer (result_bis), because
+     the one used for other tests is not large enough. A subset of the
+     result data is moved from result_bis to result, and it is this
+     subset which is used to check the actual behaviour. The next
+     macro enables to move another chunk of data from result_bis to
+     result.  */
+#define TEST_VLDX(Q, T1, T2, W, N, X)                                  \
+  VECT_ARRAY_VAR(vector, T1, W, N, X) =                                        \
+    /* Use dedicated init buffer, of size X */                         \
+    vld##X##Q##_##T2##W(VECT_ARRAY_VAR(buffer_vld##X, T1, W, N, X));   \
+  vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N),              \
+                     VECT_ARRAY_VAR(vector, T1, W, N, X));             \
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+        sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* Overwrite "result" with the contents of "result_bis"[Y].  */
+#define TEST_EXTRA_CHUNK(T1, W, N, X,Y)                        \
+  memcpy(VECT_VAR(result, T1, W, N),                   \
+        &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),    \
+        sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* We need all variants in 64 bits, but there is no 64x2 variant.  */
+#define DECL_ALL_VLDX(X)                       \
+  DECL_VLDX(int, 8, 8, X);                     \
+  DECL_VLDX(int, 16, 4, X);                    \
+  DECL_VLDX(int, 32, 2, X);                    \
+  DECL_VLDX(int, 64, 1, X);                    \
+  DECL_VLDX(uint, 8, 8, X);                    \
+  DECL_VLDX(uint, 16, 4, X);                   \
+  DECL_VLDX(uint, 32, 2, X);                   \
+  DECL_VLDX(uint, 64, 1, X);                   \
+  DECL_VLDX(poly, 8, 8, X);                    \
+  DECL_VLDX(poly, 16, 4, X);                   \
+  DECL_VLDX(float, 32, 2, X);                  \
+  DECL_VLDX(int, 8, 16, X);                    \
+  DECL_VLDX(int, 16, 8, X);                    \
+  DECL_VLDX(int, 32, 4, X);                    \
+  DECL_VLDX(uint, 8, 16, X);                   \
+  DECL_VLDX(uint, 16, 8, X);                   \
+  DECL_VLDX(uint, 32, 4, X);                   \
+  DECL_VLDX(poly, 8, 16, X);                   \
+  DECL_VLDX(poly, 16, 8, X);                   \
+  DECL_VLDX(float, 32, 4, X)
+
+#define TEST_ALL_VLDX(X)                       \
+  TEST_VLDX(, int, s, 8, 8, X);                        \
+  TEST_VLDX(, int, s, 16, 4, X);               \
+  TEST_VLDX(, int, s, 32, 2, X);               \
+  TEST_VLDX(, int, s, 64, 1, X);               \
+  TEST_VLDX(, uint, u, 8, 8, X);               \
+  TEST_VLDX(, uint, u, 16, 4, X);              \
+  TEST_VLDX(, uint, u, 32, 2, X);              \
+  TEST_VLDX(, uint, u, 64, 1, X);              \
+  TEST_VLDX(, poly, p, 8, 8, X);               \
+  TEST_VLDX(, poly, p, 16, 4, X);              \
+  TEST_VLDX(, float, f, 32, 2, X);             \
+  TEST_VLDX(q, int, s, 8, 16, X);              \
+  TEST_VLDX(q, int, s, 16, 8, X);              \
+  TEST_VLDX(q, int, s, 32, 4, X);              \
+  TEST_VLDX(q, uint, u, 8, 16, X);             \
+  TEST_VLDX(q, uint, u, 16, 8, X);             \
+  TEST_VLDX(q, uint, u, 32, 4, X);             \
+  TEST_VLDX(q, poly, p, 8, 16, X);             \
+  TEST_VLDX(q, poly, p, 16, 8, X);             \
+  TEST_VLDX(q, float, f, 32, 4, X)
+
+#define TEST_ALL_EXTRA_CHUNKS(X, Y)            \
+  TEST_EXTRA_CHUNK(int, 8, 8, X, Y);           \
+  TEST_EXTRA_CHUNK(int, 16, 4, X, Y);          \
+  TEST_EXTRA_CHUNK(int, 32, 2, X, Y);          \
+  TEST_EXTRA_CHUNK(int, 64, 1, X, Y);          \
+  TEST_EXTRA_CHUNK(uint, 8, 8, X, Y);          \
+  TEST_EXTRA_CHUNK(uint, 16, 4, X, Y);         \
+  TEST_EXTRA_CHUNK(uint, 32, 2, X, Y);         \
+  TEST_EXTRA_CHUNK(uint, 64, 1, X, Y);         \
+  TEST_EXTRA_CHUNK(poly, 8, 8, X, Y);          \
+  TEST_EXTRA_CHUNK(poly, 16, 4, X, Y);         \
+  TEST_EXTRA_CHUNK(float, 32, 2, X, Y);                \
+  TEST_EXTRA_CHUNK(int, 8, 16, X, Y);          \
+  TEST_EXTRA_CHUNK(int, 16, 8, X, Y);          \
+  TEST_EXTRA_CHUNK(int, 32, 4, X, Y);          \
+  TEST_EXTRA_CHUNK(uint, 8, 16, X, Y);         \
+  TEST_EXTRA_CHUNK(uint, 16, 8, X, Y);         \
+  TEST_EXTRA_CHUNK(uint, 32, 4, X, Y);         \
+  TEST_EXTRA_CHUNK(poly, 8, 16, X, Y);         \
+  TEST_EXTRA_CHUNK(poly, 16, 8, X, Y);         \
+  TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
+
+  DECL_ALL_VLDX(2);
+  DECL_ALL_VLDX(3);
+  DECL_ALL_VLDX(4);
+
+  /* Special input buffers of suitable size are needed for vld2/vld3/vld4.  */
+  /* Input buffers for vld2, 1 of each size */
+  VECT_ARRAY_INIT2(buffer_vld2, int, 8, 8);
+  PAD(buffer_vld2_pad, int, 8, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 16, 4);
+  PAD(buffer_vld2_pad, int, 16, 4);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 32, 2);
+  PAD(buffer_vld2_pad, int, 32, 2);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 64, 1);
+  PAD(buffer_vld2_pad, int, 64, 1);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 8, 8);
+  PAD(buffer_vld2_pad, uint, 8, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 16, 4);
+  PAD(buffer_vld2_pad, uint, 16, 4);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 32, 2);
+  PAD(buffer_vld2_pad, uint, 32, 2);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 64, 1);
+  PAD(buffer_vld2_pad, uint, 64, 1);
+  VECT_ARRAY_INIT2(buffer_vld2, poly, 8, 8);
+  PAD(buffer_vld2_pad, poly, 8, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4);
+  PAD(buffer_vld2_pad, poly, 16, 4);
+  VECT_ARRAY_INIT2(buffer_vld2, float, 32, 2);
+  PAD(buffer_vld2_pad, float, 32, 2);
+
+  VECT_ARRAY_INIT2(buffer_vld2, int, 8, 16);
+  PAD(buffer_vld2_pad, int, 8, 16);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 16, 8);
+  PAD(buffer_vld2_pad, int, 16, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 32, 4);
+  PAD(buffer_vld2_pad, int, 32, 4);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 64, 2);
+  PAD(buffer_vld2_pad, int, 64, 2);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 8, 16);
+  PAD(buffer_vld2_pad, uint, 8, 16);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 16, 8);
+  PAD(buffer_vld2_pad, uint, 16, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 32, 4);
+  PAD(buffer_vld2_pad, uint, 32, 4);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 64, 2);
+  PAD(buffer_vld2_pad, uint, 64, 2);
+  VECT_ARRAY_INIT2(buffer_vld2, poly, 8, 16);
+  PAD(buffer_vld2_pad, poly, 8, 16);
+  VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8);
+  PAD(buffer_vld2_pad, poly, 16, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, float, 32, 4);
+  PAD(buffer_vld2_pad, float, 32, 4);
+
+  /* Input buffers for vld3, 1 of each size */
+  VECT_ARRAY_INIT3(buffer_vld3, int, 8, 8);
+  PAD(buffer_vld3_pad, int, 8, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 16, 4);
+  PAD(buffer_vld3_pad, int, 16, 4);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 32, 2);
+  PAD(buffer_vld3_pad, int, 32, 2);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 64, 1);
+  PAD(buffer_vld3_pad, int, 64, 1);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 8, 8);
+  PAD(buffer_vld3_pad, uint, 8, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 16, 4);
+  PAD(buffer_vld3_pad, uint, 16, 4);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 32, 2);
+  PAD(buffer_vld3_pad, uint, 32, 2);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 64, 1);
+  PAD(buffer_vld3_pad, uint, 64, 1);
+  VECT_ARRAY_INIT3(buffer_vld3, poly, 8, 8);
+  PAD(buffer_vld3_pad, poly, 8, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4);
+  PAD(buffer_vld3_pad, poly, 16, 4);
+  VECT_ARRAY_INIT3(buffer_vld3, float, 32, 2);
+  PAD(buffer_vld3_pad, float, 32, 2);
+
+  VECT_ARRAY_INIT3(buffer_vld3, int, 8, 16);
+  PAD(buffer_vld3_pad, int, 8, 16);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 16, 8);
+  PAD(buffer_vld3_pad, int, 16, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 32, 4);
+  PAD(buffer_vld3_pad, int, 32, 4);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 64, 2);
+  PAD(buffer_vld3_pad, int, 64, 2);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 8, 16);
+  PAD(buffer_vld3_pad, uint, 8, 16);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 16, 8);
+  PAD(buffer_vld3_pad, uint, 16, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 32, 4);
+  PAD(buffer_vld3_pad, uint, 32, 4);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 64, 2);
+  PAD(buffer_vld3_pad, uint, 64, 2);
+  VECT_ARRAY_INIT3(buffer_vld3, poly, 8, 16);
+  PAD(buffer_vld3_pad, poly, 8, 16);
+  VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8);
+  PAD(buffer_vld3_pad, poly, 16, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, float, 32, 4);
+  PAD(buffer_vld3_pad, float, 32, 4);
+
+  /* Input buffers for vld4, 1 of each size */
+  VECT_ARRAY_INIT4(buffer_vld4, int, 8, 8);
+  PAD(buffer_vld4_pad, int, 8, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 16, 4);
+  PAD(buffer_vld4_pad, int, 16, 4);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 32, 2);
+  PAD(buffer_vld4_pad, int, 32, 2);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 64, 1);
+  PAD(buffer_vld4_pad, int, 64, 1);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 8, 8);
+  PAD(buffer_vld4_pad, uint, 8, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 16, 4);
+  PAD(buffer_vld4_pad, uint, 16, 4);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 32, 2);
+  PAD(buffer_vld4_pad, uint, 32, 2);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 64, 1);
+  PAD(buffer_vld4_pad, uint, 64, 1);
+  VECT_ARRAY_INIT4(buffer_vld4, poly, 8, 8);
+  PAD(buffer_vld4_pad, poly, 8, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4);
+  PAD(buffer_vld4_pad, poly, 16, 4);
+  VECT_ARRAY_INIT4(buffer_vld4, float, 32, 2);
+  PAD(buffer_vld4_pad, float, 32, 2);
+
+  VECT_ARRAY_INIT4(buffer_vld4, int, 8, 16);
+  PAD(buffer_vld4_pad, int, 8, 16);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 16, 8);
+  PAD(buffer_vld4_pad, int, 16, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 32, 4);
+  PAD(buffer_vld4_pad, int, 32, 4);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 64, 2);
+  PAD(buffer_vld4_pad, int, 64, 2);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 8, 16);
+  PAD(buffer_vld4_pad, uint, 8, 16);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 16, 8);
+  PAD(buffer_vld4_pad, uint, 16, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 32, 4);
+  PAD(buffer_vld4_pad, uint, 32, 4);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 64, 2);
+  PAD(buffer_vld4_pad, uint, 64, 2);
+  VECT_ARRAY_INIT4(buffer_vld4, poly, 8, 16);
+  PAD(buffer_vld4_pad, poly, 8, 16);
+  VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8);
+  PAD(buffer_vld4_pad, poly, 16, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, float, 32, 4);
+  PAD(buffer_vld4_pad, float, 32, 4);
+
+  /* Check vld2/vld2q.  */
+  clean_results ();
+#define TEST_MSG "VLD2/VLD2Q"
+  TEST_ALL_VLDX(2);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_0, "chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(2, 1);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_1, "chunk 1");
+
+  /* Check vld3/vld3q.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD3/VLD3Q"
+  TEST_ALL_VLDX(3);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_0, "chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(3, 1);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_1, "chunk 1");
+
+  TEST_ALL_EXTRA_CHUNKS(3, 2);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_2, "chunk 2");
+
+  /* Check vld4/vld4q.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD4/VLD4Q"
+  TEST_ALL_VLDX(4);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_0, "chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 1);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_1, "chunk 1");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 2);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_2, "chunk 2");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 3);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_3, "chunk 3");
+}
+
+int main (void)
+{
+  exec_vldX ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c
new file mode 100644 (file)
index 0000000..1991033
--- /dev/null
@@ -0,0 +1,610 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+
+/* vld2/chunk 0.  */
+VECT_VAR_DECL(expected_vld2_0,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                             0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld2_0,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld2_0,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_0,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                              0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld2_0,uint,16,4) [] = { 0xaaaa, 0xaaaa,
+                                               0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                              0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xaaaa, 0xaaaa,
+                                               0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                              0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_0,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                              0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld2_0,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_0,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                               0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+                                               0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld2_0,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                               0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                                 0xaaaaaaaa, 0xaaaaaaaa };
+
+/* vld2/chunk 1.  */
+VECT_VAR_DECL(expected_vld2_1,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                             0xaa, 0xaa, 0xf0, 0xf1 };
+VECT_VAR_DECL(expected_vld2_1,int,16,4) [] = { 0xfff0, 0xfff1, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_1,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld2_1,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_1,uint,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa,
+                                              0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld2_1,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld2_1,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld2_1,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa,
+                                              0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                              0xfff0, 0xfff1, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_1,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+                                              0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld2_1,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_1,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
+                                               0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                               0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld2_1,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
+                                               0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_1,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+                                                 0xaaaaaaaa, 0xaaaaaaaa };
+
+/* vld3/chunk 0.  */
+VECT_VAR_DECL(expected_vld3_0,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                             0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld3_0,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld3_0,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_0,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                              0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld3_0,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_0,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                              0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                              0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_0,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                              0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_0,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_0,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                               0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+                                               0xfffffff2, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_0,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                               0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                                 0xaaaaaaaa, 0xaaaaaaaa };
+
+/* vld3/chunk 1.  */
+VECT_VAR_DECL(expected_vld3_1,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                             0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld3_1,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld3_1,int,32,2) [] = { 0xfffffff2, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_1,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_1,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                              0xf0, 0xf1, 0xf2, 0xaa };
+VECT_VAR_DECL(expected_vld3_1,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_1,uint,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 };
+VECT_VAR_DECL(expected_vld3_1,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                              0xf0, 0xf1, 0xf2, 0xaa };
+VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                              0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_1,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                              0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld3_1,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_1,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                               0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 };
+VECT_VAR_DECL(expected_vld3_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                               0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_1,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                               0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 };
+VECT_VAR_DECL(expected_vld3_1,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                                 0xc1800000, 0xc1700000 };
+
+/* vld3/chunk 2.  */
+VECT_VAR_DECL(expected_vld3_2,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                             0xaa, 0xf0, 0xf1, 0xf2 };
+VECT_VAR_DECL(expected_vld3_2,int,16,4) [] = { 0xfff2, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_2,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_2,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_2,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                              0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld3_2,uint,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 };
+VECT_VAR_DECL(expected_vld3_2,uint,32,2) [] = { 0xfffffff1, 0xfffffff2 };
+VECT_VAR_DECL(expected_vld3_2,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                              0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 };
+VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
+                                              0xfff2, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_2,int,32,4) [] = { 0xfffffff2, 0xaaaaaaaa,
+                                              0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_2,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_2,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_2,uint,16,8) [] = { 0xfff1, 0xfff2, 0xaaaa, 0xaaaa,
+                                               0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_2,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                               0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_2,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_2,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_2,poly,16,8) [] = { 0xfff1, 0xfff2, 0xaaaa, 0xaaaa,
+                                               0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_2,hfloat,32,4) [] = { 0xc1600000, 0xaaaaaaaa,
+                                                 0xaaaaaaaa, 0xaaaaaaaa };
+
+/* vld4/chunk 0.  */
+VECT_VAR_DECL(expected_vld4_0,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                             0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_0,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld4_0,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_0,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                              0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_0,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                              0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_vld4_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                              0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_0,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                              0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_0,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_0,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                               0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+                                               0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_0,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                               0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                                 0xaaaaaaaa, 0xaaaaaaaa };
+
+/* vld4/chunk 1.  */
+VECT_VAR_DECL(expected_vld4_1,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                             0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_1,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_1,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_1,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                              0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_1,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_1,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                              0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
+VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                              0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_1,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                              0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_1,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_1,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                               0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                               0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_1,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                               0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_1,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                                 0xaaaaaaaa, 0xaaaaaaaa };
+
+/* vld4/chunk 2.  */
+VECT_VAR_DECL(expected_vld4_2,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                             0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_2,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_2,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_2,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_2,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                              0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_2,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_2,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                              0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                              0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_2,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+                                              0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_2,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_2,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_2,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                               0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_2,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                               0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_2,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_2,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_2,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                               0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_2,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+                                                 0xc1600000, 0xc1500000 };
+
+/* vld4/chunk 3.  */
+VECT_VAR_DECL(expected_vld4_3,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                             0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected_vld4_3,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_3,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_3,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_3,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                              0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_3,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_3,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+                                              0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33,
+                                              0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+                                              0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_3,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                              0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_3,int,64,2) [] = { 0x3333333333333333,
+                                              0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_3,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_3,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                               0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_3,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                               0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_3,uint,64,2) [] = { 0x3333333333333333,
+                                               0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_3,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33,
+                                               0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_3,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+                                               0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_3,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+                                                 0xaaaaaaaa, 0xaaaaaaaa };
+
+/* Declare additional input buffers as needed.  */
+/* Input buffers for vld2_lane */
+VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 8, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 16, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 32, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 64, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 8, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 16, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 32, 2);
+
+/* Input buffers for vld3_lane */
+VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 8, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 16, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 32, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 64, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 8, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 16, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 32, 3);
+
+/* Input buffers for vld4_lane */
+VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 8, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 16, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 32, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 64, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 8, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 16, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 32, 4);
+
+void exec_vldX_lane (void)
+{
+  /* In this case, input variables are arrays of vectors.  */
+#define DECL_VLDX_LANE(T1, W, N, X)                                    \
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X);    \
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X);        \
+  VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+  /* We need to use a temporary result buffer (result_bis), because
+     the one used for other tests is not large enough. A subset of the
+     result data is moved from result_bis to result, and it is this
+     subset which is used to check the actual behaviour. The next
+     macro enables to move another chunk of data from result_bis to
+     result.  */
+  /* We also use another extra input buffer (buffer_src), which we
+     fill with 0xAA, and which it used to load a vector from which we
+     read a given lane.  */
+#define TEST_VLDX_LANE(Q, T1, T2, W, N, X, L)                          \
+  memset (VECT_VAR(buffer_src, T1, W, N), 0xAA,                                \
+         sizeof(VECT_VAR(buffer_src, T1, W, N)));                      \
+                                                                       \
+  VECT_ARRAY_VAR(vector_src, T1, W, N, X) =                            \
+    vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N));               \
+                                                                       \
+  VECT_ARRAY_VAR(vector, T1, W, N, X) =                                        \
+    /* Use dedicated init buffer, of size.  X */                       \
+    vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \
+                            VECT_ARRAY_VAR(vector_src, T1, W, N, X),   \
+                            L);                                        \
+  vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N),              \
+                     VECT_ARRAY_VAR(vector, T1, W, N, X));             \
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+        sizeof(VECT_VAR(result, T1, W, N)))
+
+  /* Overwrite "result" with the contents of "result_bis"[Y].  */
+#define TEST_EXTRA_CHUNK(T1, W, N, X, Y)               \
+  memcpy(VECT_VAR(result, T1, W, N),                   \
+        &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),    \
+        sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* We need all variants in 64 bits, but there is no 64x2 variant.  */
+#define DECL_ALL_VLDX_LANE(X)                  \
+  DECL_VLDX_LANE(int, 8, 8, X);                        \
+  DECL_VLDX_LANE(int, 16, 4, X);               \
+  DECL_VLDX_LANE(int, 32, 2, X);               \
+  DECL_VLDX_LANE(uint, 8, 8, X);               \
+  DECL_VLDX_LANE(uint, 16, 4, X);              \
+  DECL_VLDX_LANE(uint, 32, 2, X);              \
+  DECL_VLDX_LANE(poly, 8, 8, X);               \
+  DECL_VLDX_LANE(poly, 16, 4, X);              \
+  DECL_VLDX_LANE(int, 16, 8, X);               \
+  DECL_VLDX_LANE(int, 32, 4, X);               \
+  DECL_VLDX_LANE(uint, 16, 8, X);              \
+  DECL_VLDX_LANE(uint, 32, 4, X);              \
+  DECL_VLDX_LANE(poly, 16, 8, X);              \
+  DECL_VLDX_LANE(float, 32, 2, X);             \
+  DECL_VLDX_LANE(float, 32, 4, X)
+
+  /* Add some padding to try to catch out of bound accesses.  */
+#define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={42}
+#define DUMMY_ARRAY(V, T, W, N, L) \
+  VECT_VAR_DECL(V,T,W,N)[N*L]={0}; \
+  ARRAY1(V##_pad,T,W,N)
+
+  /* Use the same lanes regardless of the size of the array (X), for
+     simplicity.  */
+#define TEST_ALL_VLDX_LANE(X)                  \
+  TEST_VLDX_LANE(, int, s, 8, 8, X, 7);                \
+  TEST_VLDX_LANE(, int, s, 16, 4, X, 2);       \
+  TEST_VLDX_LANE(, int, s, 32, 2, X, 0);       \
+  TEST_VLDX_LANE(, uint, u, 8, 8, X, 4);       \
+  TEST_VLDX_LANE(, uint, u, 16, 4, X, 3);      \
+  TEST_VLDX_LANE(, uint, u, 32, 2, X, 1);      \
+  TEST_VLDX_LANE(, poly, p, 8, 8, X, 4);       \
+  TEST_VLDX_LANE(, poly, p, 16, 4, X, 3);      \
+  TEST_VLDX_LANE(q, int, s, 16, 8, X, 6);      \
+  TEST_VLDX_LANE(q, int, s, 32, 4, X, 2);      \
+  TEST_VLDX_LANE(q, uint, u, 16, 8, X, 5);     \
+  TEST_VLDX_LANE(q, uint, u, 32, 4, X, 0);     \
+  TEST_VLDX_LANE(q, poly, p, 16, 8, X, 5);     \
+  TEST_VLDX_LANE(, float, f, 32, 2, X, 0);     \
+  TEST_VLDX_LANE(q, float, f, 32, 4, X, 2)
+
+#define TEST_ALL_EXTRA_CHUNKS(X, Y)            \
+  TEST_EXTRA_CHUNK(int, 8, 8, X, Y);           \
+  TEST_EXTRA_CHUNK(int, 16, 4, X, Y);          \
+  TEST_EXTRA_CHUNK(int, 32, 2, X, Y);          \
+  TEST_EXTRA_CHUNK(uint, 8, 8, X, Y);          \
+  TEST_EXTRA_CHUNK(uint, 16, 4, X, Y);         \
+  TEST_EXTRA_CHUNK(uint, 32, 2, X, Y);         \
+  TEST_EXTRA_CHUNK(poly, 8, 8, X, Y);          \
+  TEST_EXTRA_CHUNK(poly, 16, 4, X, Y);         \
+  TEST_EXTRA_CHUNK(int, 16, 8, X, Y);          \
+  TEST_EXTRA_CHUNK(int, 32, 4, X, Y);          \
+  TEST_EXTRA_CHUNK(uint, 16, 8, X, Y);         \
+  TEST_EXTRA_CHUNK(uint, 32, 4, X, Y);         \
+  TEST_EXTRA_CHUNK(poly, 16, 8, X, Y);         \
+  TEST_EXTRA_CHUNK(float, 32, 2, X, Y);                \
+  TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
+
+  /* Declare the temporary buffers / variables.  */
+  DECL_ALL_VLDX_LANE(2);
+  DECL_ALL_VLDX_LANE(3);
+  DECL_ALL_VLDX_LANE(4);
+
+  /* Define dummy input arrays, large enough for x4 vectors.  */
+  DUMMY_ARRAY(buffer_src, int, 8, 8, 4);
+  DUMMY_ARRAY(buffer_src, int, 16, 4, 4);
+  DUMMY_ARRAY(buffer_src, int, 32, 2, 4);
+  DUMMY_ARRAY(buffer_src, uint, 8, 8, 4);
+  DUMMY_ARRAY(buffer_src, uint, 16, 4, 4);
+  DUMMY_ARRAY(buffer_src, uint, 32, 2, 4);
+  DUMMY_ARRAY(buffer_src, poly, 8, 8, 4);
+  DUMMY_ARRAY(buffer_src, poly, 16, 4, 4);
+  DUMMY_ARRAY(buffer_src, int, 16, 8, 4);
+  DUMMY_ARRAY(buffer_src, int, 32, 4, 4);
+  DUMMY_ARRAY(buffer_src, uint, 16, 8, 4);
+  DUMMY_ARRAY(buffer_src, uint, 32, 4, 4);
+  DUMMY_ARRAY(buffer_src, poly, 16, 8, 4);
+  DUMMY_ARRAY(buffer_src, float, 32, 2, 4);
+  DUMMY_ARRAY(buffer_src, float, 32, 4, 4);
+
+  /* Check vld2_lane/vld2q_lane.  */
+  clean_results ();
+#define TEST_MSG "VLD2_LANE/VLD2Q_LANE"
+  TEST_ALL_VLDX_LANE(2);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_0, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(2, 1);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_1, " chunk 1");
+
+  /* Check vld3_lane/vld3q_lane.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD3_LANE/VLD3Q_LANE"
+  TEST_ALL_VLDX_LANE(3);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_0, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(3, 1);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_1, " chunk 1");
+
+  TEST_ALL_EXTRA_CHUNKS(3, 2);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_2, " chunk 2");
+
+  /* Check vld4_lane/vld4q_lane.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD4_LANE/VLD4Q_LANE"
+  TEST_ALL_VLDX_LANE(4);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_0, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 1);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_1, " chunk 1");
+  TEST_ALL_EXTRA_CHUNKS(4, 2);
+
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_2, " chunk 2");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 3);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_3, " chunk 3");
+}
+
+int main (void)
+{
+  exec_vldX_lane ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul.c
new file mode 100644 (file)
index 0000000..7527861
--- /dev/null
@@ -0,0 +1,156 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0x1, 0x12, 0x23,
+                                      0x34, 0x45, 0x56, 0x67 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfde0, 0xfe02, 0xfe24, 0xfe46 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffcd0, 0xfffffd03 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xc0, 0x4, 0x48, 0x8c,
+                                       0xd0, 0x14, 0x58, 0x9c };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfab0, 0xfb05, 0xfb5a, 0xfbaf };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff9a0, 0xfffffa06 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xc0, 0x84, 0x48, 0xc,
+                                       0xd0, 0x94, 0x58, 0x1c };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc4053333, 0xc3f9c000 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x90, 0x7, 0x7e, 0xf5,
+                                       0x6c, 0xe3, 0x5a, 0xd1,
+                                       0x48, 0xbf, 0x36, 0xad,
+                                       0x24, 0x9b, 0x12, 0x89 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xf780, 0xf808, 0xf890, 0xf918,
+                                       0xf9a0, 0xfa28, 0xfab0, 0xfb38 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffff670, 0xfffff709,
+                                       0xfffff7a2, 0xfffff83b };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+                                       0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x60, 0xa, 0xb4, 0x5e,
+                                        0x8, 0xb2, 0x5c, 0x6,
+                                        0xb0, 0x5a, 0x4, 0xae,
+                                        0x58, 0x2, 0xac, 0x56 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf450, 0xf50b, 0xf5c6, 0xf681,
+                                        0xf73c, 0xf7f7, 0xf8b2, 0xf96d };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffff340, 0xfffff40c,
+                                        0xfffff4d8, 0xfffff5a4 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x60, 0xca, 0x34, 0x9e,
+                                        0xc8, 0x62, 0x9c, 0x36,
+                                        0x30, 0x9a, 0x64, 0xce,
+                                        0x98, 0x32, 0xcc, 0x66 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4c73333, 0xc4bac000,
+                                          0xc4ae4ccd, 0xc4a1d999 };
+
+#ifndef INSN_NAME
+#define INSN_NAME vmul
+#define TEST_MSG "VMUL"
+#endif
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+#define DECL_VMUL(T, W, N)                     \
+  DECL_VARIABLE(vector1, T, W, N);             \
+  DECL_VARIABLE(vector2, T, W, N);             \
+  DECL_VARIABLE(vector_res, T, W, N)
+
+  /* vector_res = OP(vector1, vector2), then store the result.  */
+#define TEST_VMUL1(INSN, Q, T1, T2, W, N)                              \
+  VECT_VAR(vector_res, T1, W, N) =                                     \
+    INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),                     \
+                     VECT_VAR(vector2, T1, W, N));                     \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),                                \
+                   VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMUL(INSN, Q, T1, T2, W, N)       \
+  TEST_VMUL1(INSN, Q, T1, T2, W, N)
+
+  DECL_VMUL(int, 8, 8);
+  DECL_VMUL(int, 16, 4);
+  DECL_VMUL(int, 32, 2);
+  DECL_VMUL(uint, 8, 8);
+  DECL_VMUL(uint, 16, 4);
+  DECL_VMUL(uint, 32, 2);
+  DECL_VMUL(poly, 8, 8);
+  DECL_VMUL(float, 32, 2);
+  DECL_VMUL(int, 8, 16);
+  DECL_VMUL(int, 16, 8);
+  DECL_VMUL(int, 32, 4);
+  DECL_VMUL(uint, 8, 16);
+  DECL_VMUL(uint, 16, 8);
+  DECL_VMUL(uint, 32, 4);
+  DECL_VMUL(poly, 8, 16);
+  DECL_VMUL(float, 32, 4);
+
+  clean_results ();
+
+  /* Initialize input "vector1" from "buffer".  */
+  VLOAD(vector1, buffer, , int, s, 8, 8);
+  VLOAD(vector1, buffer, , int, s, 16, 4);
+  VLOAD(vector1, buffer, , int, s, 32, 2);
+  VLOAD(vector1, buffer, , uint, u, 8, 8);
+  VLOAD(vector1, buffer, , uint, u, 16, 4);
+  VLOAD(vector1, buffer, , uint, u, 32, 2);
+  VLOAD(vector1, buffer, , poly, p, 8, 8);
+  VLOAD(vector1, buffer, , float, f, 32, 2);
+  VLOAD(vector1, buffer, q, int, s, 8, 16);
+  VLOAD(vector1, buffer, q, int, s, 16, 8);
+  VLOAD(vector1, buffer, q, int, s, 32, 4);
+  VLOAD(vector1, buffer, q, uint, u, 8, 16);
+  VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  VLOAD(vector1, buffer, q, uint, u, 32, 4);
+  VLOAD(vector1, buffer, q, poly, p, 8, 16);
+  VLOAD(vector1, buffer, q, float, f, 32, 4);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, , int, s, 8, 8, 0x11);
+  VDUP(vector2, , int, s, 16, 4, 0x22);
+  VDUP(vector2, , int, s, 32, 2, 0x33);
+  VDUP(vector2, , uint, u, 8, 8, 0x44);
+  VDUP(vector2, , uint, u, 16, 4, 0x55);
+  VDUP(vector2, , uint, u, 32, 2, 0x66);
+  VDUP(vector2, , poly, p, 8, 8, 0x44);
+  VDUP(vector2, , float, f, 32, 2, 33.3f);
+  VDUP(vector2, q, int, s, 8, 16, 0x77);
+  VDUP(vector2, q, int, s, 16, 8, 0x88);
+  VDUP(vector2, q, int, s, 32, 4, 0x99);
+  VDUP(vector2, q, uint, u, 8, 16, 0xAA);
+  VDUP(vector2, q, uint, u, 16, 8, 0xBB);
+  VDUP(vector2, q, uint, u, 32, 4, 0xCC);
+  VDUP(vector2, q, poly, p, 8, 16, 0xAA);
+  VDUP(vector2, q, float, f, 32, 4, 99.6f);
+
+  /* Execute the tests.  */
+  TEST_VMUL(INSN_NAME, , int, s, 8, 8);
+  TEST_VMUL(INSN_NAME, , int, s, 16, 4);
+  TEST_VMUL(INSN_NAME, , int, s, 32, 2);
+  TEST_VMUL(INSN_NAME, , uint, u, 8, 8);
+  TEST_VMUL(INSN_NAME, , uint, u, 16, 4);
+  TEST_VMUL(INSN_NAME, , uint, u, 32, 2);
+  TEST_VMUL(INSN_NAME, , poly, p, 8, 8);
+  TEST_VMUL(INSN_NAME, , float, f, 32, 2);
+  TEST_VMUL(INSN_NAME, q, int, s, 8, 16);
+  TEST_VMUL(INSN_NAME, q, int, s, 16, 8);
+  TEST_VMUL(INSN_NAME, q, int, s, 32, 4);
+  TEST_VMUL(INSN_NAME, q, uint, u, 8, 16);
+  TEST_VMUL(INSN_NAME, q, uint, u, 16, 8);
+  TEST_VMUL(INSN_NAME, q, uint, u, 32, 4);
+  TEST_VMUL(INSN_NAME, q, poly, p, 8, 16);
+  TEST_VMUL(INSN_NAME, q, float, f, 32, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vneg.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vneg.c
new file mode 100644 (file)
index 0000000..c45492d
--- /dev/null
@@ -0,0 +1,74 @@
+#define INSN_NAME vneg
+#define TEST_MSG "VNEG/VNEGQ"
+
+/* Extra tests for functions requiring floating-point types.  */
+void exec_vneg_f32(void);
+#define EXTRA_TESTS exec_vneg_f32
+
+#include "unary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd,
+                                      0xc, 0xb, 0xa, 0x9 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9,
+                                       0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd,
+                                       0xc, 0xb, 0xa, 0x9 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+                                       0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+                                        0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+/* Expected results for float32 variants. Needs to be separated since
+   the generic test function does not test floating-point
+   versions.  */
+VECT_VAR_DECL(expected_float32,hfloat,32,2) [] = { 0xc0133333, 0xc0133333 };
+VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0xc059999a, 0xc059999a,
+                                                  0xc059999a, 0xc059999a };
+
+void exec_vneg_f32(void)
+{
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 4);
+
+  VDUP(vector, , float, f, 32, 2, 2.3f);
+  VDUP(vector, q, float, f, 32, 4, 3.4f);
+
+  TEST_UNARY_OP(INSN_NAME, , float, f, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, float, f, 32, 4);
+
+  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_float32, "");
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, "");
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorn.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorn.c
new file mode 100644 (file)
index 0000000..6905cb6
--- /dev/null
@@ -0,0 +1,48 @@
+#define INSN_NAME vorn
+#define TEST_MSG "VORN/VORNQ"
+
+#include "binary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xfd, 0xfd, 0xff, 0xff,
+                                      0xfd, 0xfd, 0xff, 0xff };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffd };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffb };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfb, 0xfb, 0xfb, 0xfb,
+                                       0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff3, 0xfff3 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff7, 0xfffffff7 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffffd };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf9, 0xf9, 0xfb, 0xfb,
+                                       0xfd, 0xfd, 0xff, 0xff,
+                                       0xf9, 0xf9, 0xfb, 0xfb,
+                                       0xfd, 0xfd, 0xff, 0xff };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3,
+                                       0xfff7, 0xfff7, 0xfff7, 0xfff7 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffd, 0xfffffffd,
+                                       0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff7,
+                                       0xfffffffffffffff7 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
+                                        0xf7, 0xf7, 0xf7, 0xf7,
+                                        0xfb, 0xfb, 0xfb, 0xfb,
+                                        0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff,
+                                        0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff8, 0xfffffff9,
+                                        0xfffffffa, 0xfffffffb };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffffc,
+                                        0xfffffffffffffffd };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorr.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorr.c
new file mode 100644 (file)
index 0000000..b2a7dff
--- /dev/null
@@ -0,0 +1,48 @@
+#define INSN_NAME vorr
+#define TEST_MSG "VORR/VORRQ"
+
+#include "binary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf3, 0xf2, 0xf3,
+                                      0xf6, 0xf7, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff3, 0xfffffff3 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff4 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf4, 0xf5, 0xf6, 0xf7,
+                                       0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffe, 0xffff, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff8, 0xfffffff9 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf6, 0xf7, 0xf6, 0xf7,
+                                       0xf6, 0xf7, 0xf6, 0xf7,
+                                       0xfe, 0xff, 0xfe, 0xff,
+                                       0xfe, 0xff, 0xfe, 0xff };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff,
+                                       0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff2, 0xfffffff3,
+                                       0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff8,
+                                       0xfffffffffffffff9 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff,
+                                        0xfc, 0xfd, 0xfe, 0xff,
+                                        0xfc, 0xfd, 0xfe, 0xff,
+                                        0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3,
+                                        0xfff7, 0xfff7, 0xfff7, 0xfff7 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff7, 0xfffffff7,
+                                        0xfffffff7, 0xfffffff7 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3,
+                                        0xfffffffffffffff3 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqabs.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqabs.c
new file mode 100644 (file)
index 0000000..f2be790
--- /dev/null
@@ -0,0 +1,127 @@
+#define INSN_NAME vqabs
+#define TEST_MSG "VQABS/VQABSQ"
+
+/* Extra tests for functions requiring corner cases tests.  */
+void vqabs_extra(void);
+#define EXTRA_TESTS vqabs_extra
+
+#include "unary_sat_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd,
+                                       0xc, 0xb, 0xa, 0x9,
+                                       0x8, 0x7, 0x6, 0x5,
+                                       0x4, 0x3, 0x2, 0x1 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd,
+                                       0xc, 0xb, 0xa, 0x9 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+                                        0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
+int VECT_VAR(expected_cumulative_sat,int,8,16) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
+
+/* Expected results when input is the min negative value of the type.  */
+VECT_VAR_DECL(expected_min_neg,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
+                                              0x7f, 0x7f, 0x7f, 0x7f };
+VECT_VAR_DECL(expected_min_neg,int,16,4) [] = { 0x7fff, 0x7fff,
+                                               0x7fff, 0x7fff };
+VECT_VAR_DECL(expected_min_neg,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
+VECT_VAR_DECL(expected_min_neg,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
+                                               0x7f, 0x7f, 0x7f, 0x7f,
+                                               0x7f, 0x7f, 0x7f, 0x7f,
+                                               0x7f, 0x7f, 0x7f, 0x7f };
+VECT_VAR_DECL(expected_min_neg,int,16,8) [] = { 0x7fff, 0x7fff,
+                                               0x7fff, 0x7fff,
+                                               0x7fff, 0x7fff,
+                                               0x7fff, 0x7fff };
+VECT_VAR_DECL(expected_min_neg,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
+                                               0x7fffffff, 0x7fffffff };
+
+/* Expected values of cumulative_saturation flag when input is the min
+   negative value of the type.  */
+int VECT_VAR(expected_cumulative_sat_min_neg,int,8,8) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,16,4) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,32,2) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,8,16) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,16,8) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,32,4) = 1;
+
+void vqabs_extra()
+{
+  /* No need for 64 bits variants.  */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  clean_results ();
+
+  /* Initialize input "vector" with min negative values to check
+     saturation.  */
+  VDUP(vector, , int, s, 8, 8, 0x80);
+  VDUP(vector, , int, s, 16, 4, 0x8000);
+  VDUP(vector, , int, s, 32, 2, 0x80000000);
+  VDUP(vector, q, int, s, 8, 16, 0x80);
+  VDUP(vector, q, int, s, 16, 8, 0x8000);
+  VDUP(vector, q, int, s, 32, 4, 0x80000000);
+
+#define MSG "min negative input"
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 8, 16, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 16, 8, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4, expected_cumulative_sat_min_neg, MSG);
+
+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 16, 4, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 32, 2, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 16, 8, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 32, 4, PRIx8, expected_min_neg, MSG);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqadd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqadd.c
new file mode 100644 (file)
index 0000000..c07f5ff
--- /dev/null
@@ -0,0 +1,278 @@
+#define INSN_NAME vqadd
+#define TEST_MSG "VQADD/VQADDQ"
+
+/* Extra tests for special cases:
+   - some requiring intermediate types larger than 64 bits to
+   compute saturation flag.
+   - corner case saturations with types smaller than 64 bits.
+*/
+void vqadd_extras(void);
+#define EXTRA_TESTS vqadd_extras
+
+#include "binary_sat_op.inc"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1;
+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1;
+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1;
+int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1;
+int VECT_VAR(expected_cumulative_sat,int,8,16) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1;
+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x1, 0x2, 0x3, 0x4,
+                                      0x5, 0x6, 0x7, 0x8 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x12, 0x13, 0x14, 0x15 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x23, 0x24 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x34 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
+                                       0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x1, 0x2, 0x3, 0x4,
+                                       0x5, 0x6, 0x7, 0x8,
+                                       0x9, 0xa, 0xb, 0xc,
+                                       0xd, 0xe, 0xf, 0x10 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x12, 0x13, 0x14, 0x15,
+                                       0x16, 0x17, 0x18, 0x19 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x23, 0x24, 0x25, 0x26 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x34, 0x35 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
+                                        0xff, 0xff, 0xff, 0xff,
+                                        0xff, 0xff, 0xff, 0xff,
+                                        0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
+                                        0xffff, 0xffff, 0xffff, 0xffff };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+                                        0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff,
+                                        0xffffffffffffffff };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+
+/* 64-bits types, with 0 as second input.  */
+int VECT_VAR(expected_cumulative_sat_64,int,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat_64,uint,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat_64,int,64,2) = 0;
+int VECT_VAR(expected_cumulative_sat_64,uint,64,2) = 0;
+VECT_VAR_DECL(expected_64,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_64,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_64,int,64,2) [] = { 0xfffffffffffffff0,
+                                          0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_64,uint,64,2) [] = { 0xfffffffffffffff0,
+                                           0xfffffffffffffff1 };
+
+/* 64-bits types, some cases causing cumulative saturation.  */
+int VECT_VAR(expected_cumulative_sat_64_2,int,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat_64_2,uint,64,1) = 1;
+int VECT_VAR(expected_cumulative_sat_64_2,int,64,2) = 0;
+int VECT_VAR(expected_cumulative_sat_64_2,uint,64,2) = 1;
+VECT_VAR_DECL(expected_64_2,int,64,1) [] = { 0x34 };
+VECT_VAR_DECL(expected_64_2,uint,64,1) [] = { 0xffffffffffffffff };
+VECT_VAR_DECL(expected_64_2,int,64,2) [] = { 0x34, 0x35 };
+VECT_VAR_DECL(expected_64_2,uint,64,2) [] = { 0xffffffffffffffff,
+                                             0xffffffffffffffff };
+
+/* 64-bits types, all causing cumulative saturation.  */
+int VECT_VAR(expected_cumulative_sat_64_3,int,64,1) = 1;
+int VECT_VAR(expected_cumulative_sat_64_3,uint,64,1) = 1;
+int VECT_VAR(expected_cumulative_sat_64_3,int,64,2) = 1;
+int VECT_VAR(expected_cumulative_sat_64_3,uint,64,2) = 1;
+VECT_VAR_DECL(expected_64_3,int,64,1) [] = { 0x8000000000000000 };
+VECT_VAR_DECL(expected_64_3,uint,64,1) [] = { 0xffffffffffffffff };
+VECT_VAR_DECL(expected_64_3,int,64,2) [] = { 0x7fffffffffffffff,
+                                            0x7fffffffffffffff };
+VECT_VAR_DECL(expected_64_3,uint,64,2) [] = { 0xffffffffffffffff,
+                                             0xffffffffffffffff };
+
+/* smaller types, corner cases causing cumulative saturation. (1)  */
+int VECT_VAR(expected_csat_lt_64_1,int,8,8) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,16,4) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,32,2) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,8,16) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,16,8) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,32,4) = 1;
+VECT_VAR_DECL(expected_lt_64_1,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
+                                              0x80, 0x80, 0x80, 0x80 };
+VECT_VAR_DECL(expected_lt_64_1,int,16,4) [] = { 0x8000, 0x8000,
+                                               0x8000, 0x8000 };
+VECT_VAR_DECL(expected_lt_64_1,int,32,2) [] = { 0x80000000, 0x80000000 };
+VECT_VAR_DECL(expected_lt_64_1,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
+                                               0x80, 0x80, 0x80, 0x80,
+                                               0x80, 0x80, 0x80, 0x80,
+                                               0x80, 0x80, 0x80, 0x80 };
+VECT_VAR_DECL(expected_lt_64_1,int,16,8) [] = { 0x8000, 0x8000,
+                                               0x8000, 0x8000,
+                                               0x8000, 0x8000,
+                                               0x8000, 0x8000 };
+VECT_VAR_DECL(expected_lt_64_1,int,32,4) [] = { 0x80000000, 0x80000000,
+                                               0x80000000, 0x80000000 };
+
+/* smaller types, corner cases causing cumulative saturation. (2)  */
+int VECT_VAR(expected_csat_lt_64_2,uint,8,8) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,16,4) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,32,2) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,8,16) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,16,8) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,32,4) = 1;
+VECT_VAR_DECL(expected_lt_64_2,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
+                                               0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected_lt_64_2,uint,16,4) [] = { 0xffff, 0xffff,
+                                                0xffff, 0xffff };
+VECT_VAR_DECL(expected_lt_64_2,uint,32,2) [] = { 0xffffffff,
+                                                0xffffffff };
+VECT_VAR_DECL(expected_lt_64_2,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
+                                                0xff, 0xff, 0xff, 0xff,
+                                                0xff, 0xff, 0xff, 0xff,
+                                                0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected_lt_64_2,uint,16,8) [] = { 0xffff, 0xffff,
+                                                0xffff, 0xffff,
+                                                0xffff, 0xffff,
+                                                0xffff, 0xffff };
+VECT_VAR_DECL(expected_lt_64_2,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+                                                0xffffffff, 0xffffffff };
+
+void vqadd_extras(void)
+{
+  DECL_VARIABLE_ALL_VARIANTS(vector1);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  /* Initialize input "vector1" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+
+  /* Use a second vector full of 0.  */
+  VDUP(vector2, , int, s, 64, 1, 0);
+  VDUP(vector2, , uint, u, 64, 1, 0);
+  VDUP(vector2, q, int, s, 64, 2, 0);
+  VDUP(vector2, q, uint, u, 64, 2, 0);
+
+#define MSG "64 bits saturation adding zero"
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1, expected_cumulative_sat_64, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1, expected_cumulative_sat_64, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2, expected_cumulative_sat_64, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2, expected_cumulative_sat_64, MSG);
+
+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64, MSG);
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_64, MSG);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64, MSG);
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_64, MSG);
+
+  /* Another set of tests with non-zero values, some chosen to create
+     overflow.  */
+  VDUP(vector2, , int, s, 64, 1, 0x44);
+  VDUP(vector2, , uint, u, 64, 1, 0x88);
+  VDUP(vector2, q, int, s, 64, 2, 0x44);
+  VDUP(vector2, q, uint, u, 64, 2, 0x88);
+
+#undef MSG
+#define MSG "64 bits saturation cumulative_sat (2)"
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1, expected_cumulative_sat_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1, expected_cumulative_sat_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2, expected_cumulative_sat_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2, expected_cumulative_sat_64_2, MSG);
+
+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64_2, MSG);
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_64_2, MSG);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64_2, MSG);
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_64_2, MSG);
+
+  /* Another set of tests, with input values chosen to set
+     cumulative_sat in all cases.  */
+  VDUP(vector2, , int, s, 64, 1, 0x8000000000000003LL);
+  VDUP(vector2, , uint, u, 64, 1, 0x88);
+  /* To check positive saturation, we need to write a positive value
+     in vector1.  */
+  VDUP(vector1, q, int, s, 64, 2, 0x4000000000000000LL);
+  VDUP(vector2, q, int, s, 64, 2, 0x4000000000000000LL);
+  VDUP(vector2, q, uint, u, 64, 2, 0x22);
+
+#undef MSG
+#define MSG "64 bits saturation cumulative_sat (3)"
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1, expected_cumulative_sat_64_3, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1, expected_cumulative_sat_64_3, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2, expected_cumulative_sat_64_3, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2, expected_cumulative_sat_64_3, MSG);
+
+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64_3, MSG);
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_64_3, MSG);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64_3, MSG);
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_64_3, MSG);
+
+  /* To improve coverage, check saturation with less than 64 bits
+     too.  */
+  VDUP(vector2, , int, s, 8, 8, 0x81);
+  VDUP(vector2, , int, s, 16, 4, 0x8001);
+  VDUP(vector2, , int, s, 32, 2, 0x80000001);
+  VDUP(vector2, q, int, s, 8, 16, 0x81);
+  VDUP(vector2, q, int, s, 16, 8, 0x8001);
+  VDUP(vector2, q, int, s, 32, 4, 0x80000001);
+
+#undef MSG
+#define MSG "less than 64 bits saturation cumulative_sat (1)"
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 8, 8, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 16, 4, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 32, 2, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 8, 16, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 16, 8, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 32, 4, expected_csat_lt_64_1, MSG);
+
+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_lt_64_1, MSG);
+
+  /* Another set of tests with large vector1 values.  */
+  VDUP(vector1, , uint, u, 8, 8, 0xF0);
+  VDUP(vector1, , uint, u, 16, 4, 0xFFF0);
+  VDUP(vector1, , uint, u, 32, 2, 0xFFFFFFF0);
+  VDUP(vector1, q, uint, u, 8, 16, 0xF0);
+  VDUP(vector1, q, uint, u, 16, 8, 0xFFF0);
+  VDUP(vector1, q, uint, u, 32, 4, 0xFFFFFFF0);
+
+  VDUP(vector2, , uint, u, 8, 8, 0x20);
+  VDUP(vector2, , uint, u, 16, 4, 0x20);
+  VDUP(vector2, , uint, u, 32, 2, 0x20);
+  VDUP(vector2, q, uint, u, 8, 16, 0x20);
+  VDUP(vector2, q, uint, u, 16, 8, 0x20);
+  VDUP(vector2, q, uint, u, 32, 4, 0x20);
+
+#undef MSG
+#define MSG "less than 64 bits saturation cumulative_sat (2)"
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 8, 8, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 16, 4, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 32, 2, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 8, 16, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 16, 8, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 32, 4, expected_csat_lt_64_2, MSG);
+
+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_lt_64_2, MSG);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqneg.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqneg.c
new file mode 100644 (file)
index 0000000..942eba8
--- /dev/null
@@ -0,0 +1,127 @@
+#define INSN_NAME vqneg
+#define TEST_MSG "VQNEG/VQNEGQ"
+
+/* Extra tests for functions requiring corner cases tests */
+void vqneg_extra(void);
+#define EXTRA_TESTS vqneg_extra
+
+#include "unary_sat_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd,
+                                       0xc, 0xb, 0xa, 0x9,
+                                       0x8, 0x7, 0x6, 0x5,
+                                       0x4, 0x3, 0x2, 0x1 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd,
+                                       0xc, 0xb, 0xa, 0x9 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+                                        0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
+int VECT_VAR(expected_cumulative_sat,int,8,16) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
+
+/* Expected results when input is the min negative value of the type.  */
+VECT_VAR_DECL(expected_min_neg,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
+                                              0x7f, 0x7f, 0x7f, 0x7f };
+VECT_VAR_DECL(expected_min_neg,int,16,4) [] = { 0x7fff, 0x7fff,
+                                               0x7fff, 0x7fff };
+VECT_VAR_DECL(expected_min_neg,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
+VECT_VAR_DECL(expected_min_neg,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
+                                               0x7f, 0x7f, 0x7f, 0x7f,
+                                               0x7f, 0x7f, 0x7f, 0x7f,
+                                               0x7f, 0x7f, 0x7f, 0x7f };
+VECT_VAR_DECL(expected_min_neg,int,16,8) [] = { 0x7fff, 0x7fff,
+                                               0x7fff, 0x7fff,
+                                               0x7fff, 0x7fff,
+                                               0x7fff, 0x7fff };
+VECT_VAR_DECL(expected_min_neg,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
+                                               0x7fffffff, 0x7fffffff };
+
+/* Expected values of cumulative_saturation flag when input is the min
+   negative value of the type.  */
+int VECT_VAR(expected_cumulative_sat_min_neg,int,8,8) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,16,4) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,32,2) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,8,16) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,16,8) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,32,4) = 1;
+
+void vqneg_extra()
+{
+  /* No need for 64 bits variants.  */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  clean_results ();
+
+  /* Initialize input "vector" with min negative values to check
+     saturation.  */
+  VDUP(vector, , int, s, 8, 8, 0x80);
+  VDUP(vector, , int, s, 16, 4, 0x8000);
+  VDUP(vector, , int, s, 32, 2, 0x80000000);
+  VDUP(vector, q, int, s, 8, 16, 0x80);
+  VDUP(vector, q, int, s, 16, 8, 0x8000);
+  VDUP(vector, q, int, s, 32, 4, 0x80000000);
+
+#define MSG "min negative input"
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 8, 16, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 16, 8, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4, expected_cumulative_sat_min_neg, MSG);
+
+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 16, 4, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 32, 2, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 16, 8, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 32, 4, PRIx8, expected_min_neg, MSG);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqsub.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqsub.c
new file mode 100644 (file)
index 0000000..04df5fe
--- /dev/null
@@ -0,0 +1,278 @@
+#define INSN_NAME vqsub
+#define TEST_MSG "VQSUB/VQSUBQ"
+
+/* Extra tests for special cases:
+   - some requiring intermediate types larger than 64 bits to
+   compute saturation flag.
+   - corner case saturations with types smaller than 64 bits.
+*/
+void vqsub_extras(void);
+#define EXTRA_TESTS vqsub_extras
+
+#include "binary_sat_op.inc"
+
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xdf, 0xe0, 0xe1, 0xe2,
+                                      0xe3, 0xe4, 0xe5, 0xe6 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffce, 0xffcf,
+                                       0xffd0, 0xffd1 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffbd, 0xffffffbe };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffac };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x9b, 0x9c, 0x9d, 0x9e,
+                                       0x9f, 0xa0, 0xa1, 0xa2 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff8a, 0xff8b,
+                                        0xff8c, 0xff8d };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffff79, 0xffffff7a };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffff68 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xdf, 0xe0, 0xe1, 0xe2,
+                                       0xe3, 0xe4, 0xe5, 0xe6,
+                                       0xe7, 0xe8, 0xe9, 0xea,
+                                       0xeb, 0xec, 0xed, 0xee };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xffce, 0xffcf, 0xffd0, 0xffd1,
+                                       0xffd2, 0xffd3, 0xffd4, 0xffd5 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffbd, 0xffffffbe,
+                                       0xffffffbf, 0xffffffc0 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffac,
+                                       0xffffffffffffffad };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x9b, 0x9c, 0x9d, 0x9e,
+                                        0x9f, 0xa0, 0xa1, 0xa2,
+                                        0xa3, 0xa4, 0xa5, 0xa6,
+                                        0xa7, 0xa8, 0xa9, 0xaa };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xff8a, 0xff8b, 0xff8c, 0xff8d,
+                                        0xff8e, 0xff8f, 0xff90, 0xff91 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffff79, 0xffffff7a,
+                                        0xffffff7b, 0xffffff7c };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffff68,
+                                        0xffffffffffffff69 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+/* Expected values of cumulative saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat,int,8,16) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0;
+
+/* 64-bits types, with 0 as second input.  */
+VECT_VAR_DECL(expected_64,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_64,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_64,int,64,2) [] = { 0xfffffffffffffff0,
+                                          0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_64,uint,64,2) [] = { 0xfffffffffffffff0,
+                                           0xfffffffffffffff1 };
+int VECT_VAR(expected_cumulative_sat_64,int,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat_64,uint,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat_64,int,64,2) = 0;
+int VECT_VAR(expected_cumulative_sat_64,uint,64,2) = 0;
+
+/* 64-bits types, other cases.  */
+VECT_VAR_DECL(expected_64_2,int,64,1) [] = { 0xffffffffffffffac };
+VECT_VAR_DECL(expected_64_2,uint,64,1) [] = { 0xffffffffffffff68 };
+VECT_VAR_DECL(expected_64_2,int,64,2) [] = { 0xffffffffffffffac,
+                                            0xffffffffffffffad };
+VECT_VAR_DECL(expected_64_2,uint,64,2) [] = { 0xffffffffffffff68,
+                                             0xffffffffffffff69 };
+int VECT_VAR(expected_cumulative_sat_64_2,int,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat_64_2,uint,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat_64_2,int,64,2) = 0;
+int VECT_VAR(expected_cumulative_sat_64_2,uint,64,2) = 0;
+
+/* 64-bits types, all causing cumulative saturation.  */
+VECT_VAR_DECL(expected_64_3,int,64,1) [] = { 0x8000000000000000 };
+VECT_VAR_DECL(expected_64_3,uint,64,1) [] = { 0x0 };
+VECT_VAR_DECL(expected_64_3,int,64,2) [] = { 0x7fffffffffffffff,
+                                            0x7fffffffffffffff };
+VECT_VAR_DECL(expected_64_3,uint,64,2) [] = { 0x0, 0x0 };
+int VECT_VAR(expected_cumulative_sat_64_3,int,64,1) = 1;
+int VECT_VAR(expected_cumulative_sat_64_3,uint,64,1) = 1;
+int VECT_VAR(expected_cumulative_sat_64_3,int,64,2) = 1;
+int VECT_VAR(expected_cumulative_sat_64_3,uint,64,2) = 1;
+
+/* smaller types, corner cases causing cumulative saturation. (1)  */
+VECT_VAR_DECL(expected_lt_64_1,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
+                                              0x80, 0x80, 0x80, 0x80 };
+VECT_VAR_DECL(expected_lt_64_1,int,16,4) [] = { 0x8000, 0x8000,
+                                               0x8000, 0x8000 };
+VECT_VAR_DECL(expected_lt_64_1,int,32,2) [] = { 0x80000000, 0x80000000 };
+VECT_VAR_DECL(expected_lt_64_1,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
+                                               0x80, 0x80, 0x80, 0x80,
+                                               0x80, 0x80, 0x80, 0x80,
+                                               0x80, 0x80, 0x80, 0x80 };
+VECT_VAR_DECL(expected_lt_64_1,int,16,8) [] = { 0x8000, 0x8000,
+                                               0x8000, 0x8000,
+                                               0x8000, 0x8000,
+                                               0x8000, 0x8000 };
+VECT_VAR_DECL(expected_lt_64_1,int,32,4) [] = { 0x80000000, 0x80000000,
+                                               0x80000000, 0x80000000 };
+int VECT_VAR(expected_csat_lt_64_1,int,8,8) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,16,4) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,32,2) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,8,16) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,16,8) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,32,4) = 1;
+
+/* smaller types, corner cases causing cumulative saturation. (2)  */
+VECT_VAR_DECL(expected_lt_64_2,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
+                                               0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_lt_64_2,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_lt_64_2,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_lt_64_2,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+                                                0x0, 0x0, 0x0, 0x0,
+                                                0x0, 0x0, 0x0, 0x0,
+                                                0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_lt_64_2,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+                                                0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_lt_64_2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+int VECT_VAR(expected_csat_lt_64_2,uint,8,8) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,16,4) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,32,2) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,8,16) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,16,8) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,32,4) = 1;
+
+void vqsub_extras(void)
+{
+  DECL_VARIABLE_ALL_VARIANTS(vector1);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  /* Initialize input "vector1" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+
+  /* Use a second vector full of 0.  */
+  VDUP(vector2, , int, s, 64, 1, 0x0);
+  VDUP(vector2, , uint, u, 64, 1, 0x0);
+  VDUP(vector2, q, int, s, 64, 2, 0x0);
+  VDUP(vector2, q, uint, u, 64, 2, 0x0);
+
+#define MSG "64 bits saturation when adding zero"
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1, expected_cumulative_sat_64, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1, expected_cumulative_sat_64, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2, expected_cumulative_sat_64, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2, expected_cumulative_sat_64, MSG);
+
+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64, MSG);
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_64, MSG);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64, MSG);
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_64, MSG);
+
+  /* Another set of tests with non-zero values.  */
+  VDUP(vector2, , int, s, 64, 1, 0x44);
+  VDUP(vector2, , uint, u, 64, 1, 0x88);
+  VDUP(vector2, q, int, s, 64, 2, 0x44);
+  VDUP(vector2, q, uint, u, 64, 2, 0x88);
+
+#undef MSG
+#define MSG "64 bits saturation cumulative_sat (2)"
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1, expected_cumulative_sat_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1, expected_cumulative_sat_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2, expected_cumulative_sat_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2, expected_cumulative_sat_64_2, MSG);
+
+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64_2, MSG);
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_64_2, MSG);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64_2, MSG);
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_64_2, MSG);
+
+  /* Another set of tests, with input values chosen to set
+     cumulative_sat in all cases.  */
+  VDUP(vector2, , int, s, 64, 1, 0x7fffffffffffffffLL);
+  VDUP(vector2, , uint, u, 64, 1, 0xffffffffffffffffULL);
+  /* To check positive saturation, we need to write a positive value
+     in vector1.  */
+  VDUP(vector1, q, int, s, 64, 2, 0x3fffffffffffffffLL);
+  VDUP(vector2, q, int, s, 64, 2, 0x8000000000000000LL);
+  VDUP(vector2, q, uint, u, 64, 2, 0xffffffffffffffffULL);
+
+#undef MSG
+#define MSG "64 bits saturation cumulative_sat (3)"
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1, expected_cumulative_sat_64_3, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1, expected_cumulative_sat_64_3, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2, expected_cumulative_sat_64_3, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2, expected_cumulative_sat_64_3, MSG);
+
+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64_3, MSG);
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_64_3, MSG);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64_3, MSG);
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_64_3, MSG);
+
+  /* To improve coverage, check saturation with less than 64 bits
+     too.  */
+  VDUP(vector2, , int, s, 8, 8, 0x7F);
+  VDUP(vector2, , int, s, 16, 4, 0x7FFF);
+  VDUP(vector2, , int, s, 32, 2, 0x7FFFFFFF);
+  VDUP(vector2, q, int, s, 8, 16, 0x7F);
+  VDUP(vector2, q, int, s, 16, 8, 0x7FFF);
+  VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF);
+
+#undef MSG
+#define MSG "less than 64 bits saturation cumulative_sat (1)"
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 8, 8, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 16, 4, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 32, 2, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 8, 16, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 16, 8, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 32, 4, expected_csat_lt_64_1, MSG);
+
+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_lt_64_1, MSG);
+
+  /* Another set of tests with vector1 values smaller than
+     vector2.  */
+  VDUP(vector1, , uint, u, 8, 8, 0x10);
+  VDUP(vector1, , uint, u, 16, 4, 0x10);
+  VDUP(vector1, , uint, u, 32, 2, 0x10);
+  VDUP(vector1, q, uint, u, 8, 16, 0x10);
+  VDUP(vector1, q, uint, u, 16, 8, 0x10);
+  VDUP(vector1, q, uint, u, 32, 4, 0x10);
+
+  VDUP(vector2, , uint, u, 8, 8, 0x20);
+  VDUP(vector2, , uint, u, 16, 4, 0x20);
+  VDUP(vector2, , uint, u, 32, 2, 0x20);
+  VDUP(vector2, q, uint, u, 8, 16, 0x20);
+  VDUP(vector2, q, uint, u, 16, 8, 0x20);
+  VDUP(vector2, q, uint, u, 32, 4, 0x20);
+
+#undef MSG
+#define MSG "less than 64 bits saturation cumulative_sat (2)"
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 8, 8, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 16, 4, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 32, 2, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 8, 16, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 16, 8, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 32, 4, expected_csat_lt_64_2, MSG);
+
+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_lt_64_2, MSG);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl.c
new file mode 100644 (file)
index 0000000..e64d6e3
--- /dev/null
@@ -0,0 +1,230 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
+                                      0xe8, 0xea, 0xec, 0xee };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff80 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
+                                       0xe8, 0xea, 0xec, 0xee };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff000, 0xfffff100 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffff80 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333,
+                                        0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x20, 0x40, 0x60,
+                                       0x80, 0xa0, 0xc0, 0xe0,
+                                       0x0, 0x20, 0x40, 0x60,
+                                       0x80, 0xa0, 0xc0, 0xe0 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000,
+                                       0x4000, 0x5000, 0x6000, 0x7000 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x40000000,
+                                       0x80000000, 0xc0000000 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x8000000000000000 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x20, 0x40, 0x60,
+                                        0x80, 0xa0, 0xc0, 0xe0,
+                                        0x0, 0x20, 0x40, 0x60,
+                                        0x80, 0xa0, 0xc0, 0xe0 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000,
+                                        0x4000, 0x5000, 0x6000, 0x7000 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x40000000,
+                                        0x80000000, 0xc0000000 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x8000000000000000 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                          0x33333333, 0x33333333 };
+
+/* Expected results with large shift amount.  */
+VECT_VAR_DECL(expected_large_shift,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
+                                                  0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,64,1) [] = { 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
+                                                   0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,64,1) [] = { 0x0 };
+VECT_VAR_DECL(expected_large_shift,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                                   0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_large_shift,poly,16,4) [] = { 0x3333, 0x3333,
+                                                    0x3333, 0x3333 };
+VECT_VAR_DECL(expected_large_shift,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_large_shift,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+                                                   0x0, 0x0, 0x0, 0x0,
+                                                   0x0, 0x0, 0x0, 0x0,
+                                                   0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+                                                   0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,64,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+                                                    0x0, 0x0, 0x0, 0x0,
+                                                    0x0, 0x0, 0x0, 0x0,
+                                                    0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+                                                    0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,64,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                                    0x33, 0x33, 0x33, 0x33,
+                                                    0x33, 0x33, 0x33, 0x33,
+                                                    0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_large_shift,poly,16,8) [] = { 0x3333, 0x3333,
+                                                    0x3333, 0x3333,
+                                                    0x3333, 0x3333,
+                                                    0x3333, 0x3333 };
+VECT_VAR_DECL(expected_large_shift,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                                      0x33333333, 0x33333333 };
+
+
+/* Expected results with negative shift amount.  */
+VECT_VAR_DECL(expected_negative_shift,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9,
+                                                     0xfa, 0xfa, 0xfb, 0xfb };
+VECT_VAR_DECL(expected_negative_shift,int,16,4) [] = { 0xfff8, 0xfff8,
+                                                      0xfff9, 0xfff9  };
+VECT_VAR_DECL(expected_negative_shift,int,32,2) [] = { 0xfffffffc, 0xfffffffc };
+VECT_VAR_DECL(expected_negative_shift,int,64,1) [] = { 0xffffffffffffffff };
+VECT_VAR_DECL(expected_negative_shift,uint,8,8) [] = { 0x78, 0x78, 0x79, 0x79,
+                                                      0x7a, 0x7a, 0x7b, 0x7b };
+VECT_VAR_DECL(expected_negative_shift,uint,16,4) [] = { 0x7ff8, 0x7ff8,
+                                                       0x7ff9, 0x7ff9 };
+VECT_VAR_DECL(expected_negative_shift,uint,32,2) [] = { 0x3ffffffc,
+                                                       0x3ffffffc };
+VECT_VAR_DECL(expected_negative_shift,uint,64,1) [] = { 0xfffffffffffffff };
+VECT_VAR_DECL(expected_negative_shift,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                                      0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_negative_shift,poly,16,4) [] = { 0x3333, 0x3333,
+                                                       0x3333, 0x3333 };
+VECT_VAR_DECL(expected_negative_shift,hfloat,32,2) [] = { 0x33333333,
+                                                         0x33333333 };
+VECT_VAR_DECL(expected_negative_shift,int,8,16) [] = { 0xfc, 0xfc, 0xfc, 0xfc,
+                                                      0xfd, 0xfd, 0xfd, 0xfd,
+                                                      0xfe, 0xfe, 0xfe, 0xfe,
+                                                      0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected_negative_shift,int,16,8) [] = { 0xffff, 0xffff,
+                                                      0xffff, 0xffff,
+                                                      0xffff, 0xffff,
+                                                      0xffff, 0xffff };
+VECT_VAR_DECL(expected_negative_shift,int,32,4) [] = {  0xfffffffe, 0xfffffffe,
+                                                       0xfffffffe, 0xfffffffe };
+VECT_VAR_DECL(expected_negative_shift,int,64,2) [] = { 0xffffffffffffffff,
+                                                      0xffffffffffffffff };
+VECT_VAR_DECL(expected_negative_shift,uint,8,16) [] = { 0x3c, 0x3c, 0x3c, 0x3c,
+                                                       0x3d, 0x3d, 0x3d, 0x3d,
+                                                       0x3e, 0x3e, 0x3e, 0x3e,
+                                                       0x3f, 0x3f, 0x3f, 0x3f };
+VECT_VAR_DECL(expected_negative_shift,uint,16,8) [] = { 0x7ff, 0x7ff,
+                                                       0x7ff, 0x7ff,
+                                                       0x7ff, 0x7ff,
+                                                       0x7ff, 0x7ff };
+VECT_VAR_DECL(expected_negative_shift,uint,32,4) [] = { 0x1ffffffe, 0x1ffffffe,
+                                                       0x1ffffffe, 0x1ffffffe };
+VECT_VAR_DECL(expected_negative_shift,uint,64,2) [] = { 0x7ffffffffffffff,
+                                                       0x7ffffffffffffff };
+VECT_VAR_DECL(expected_negative_shift,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                                       0x33, 0x33, 0x33, 0x33,
+                                                       0x33, 0x33, 0x33, 0x33,
+                                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_negative_shift,poly,16,8) [] = { 0x3333, 0x3333,
+                                                       0x3333, 0x3333,
+                                                       0x3333, 0x3333,
+                                                       0x3333, 0x3333 };
+VECT_VAR_DECL(expected_negative_shift,hfloat,32,4) [] = { 0x33333333,
+                                                         0x33333333,
+                                                         0x33333333,
+                                                         0x33333333 };
+
+
+#ifndef INSN_NAME
+#define INSN_NAME vshl
+#define TEST_MSG "VSHL/VSHLQ"
+#endif
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* Basic test: v3=vshl(v1,v2), then store the result.  */
+#define TEST_VSHL(T3, Q, T1, T2, W, N)                                 \
+  VECT_VAR(vector_res, T1, W, N) =                                     \
+    vshl##Q##_##T2##W(VECT_VAR(vector, T1, W, N),                      \
+                     VECT_VAR(vector_shift, T3, W, N));                \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+
+  /* Choose init value arbitrarily, will be used as shift amount.  */
+  VDUP(vector_shift, , int, s, 8, 8, 1);
+  VDUP(vector_shift, , int, s, 16, 4, 3);
+  VDUP(vector_shift, , int, s, 32, 2, 8);
+  VDUP(vector_shift, , int, s, 64, 1, 3);
+  VDUP(vector_shift, q, int, s, 8, 16, 5);
+  VDUP(vector_shift, q, int, s, 16, 8, 12);
+  VDUP(vector_shift, q, int, s, 32, 4, 30);
+  VDUP(vector_shift, q, int, s, 64, 2, 63);
+
+  /* Execute the tests.  */
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
+
+  CHECK_RESULTS (TEST_MSG, "");
+
+
+  /* Test large shift amount (larger or equal to the type width.  */
+  VDUP(vector_shift, , int, s, 8, 8, 8);
+  VDUP(vector_shift, , int, s, 16, 4, 16);
+  VDUP(vector_shift, , int, s, 32, 2, 32);
+  VDUP(vector_shift, , int, s, 64, 1, 64);
+  VDUP(vector_shift, q, int, s, 8, 16, 8);
+  VDUP(vector_shift, q, int, s, 16, 8, 17);
+  VDUP(vector_shift, q, int, s, 32, 4, 33);
+  VDUP(vector_shift, q, int, s, 64, 2, 65);
+
+  /* Execute the tests.  */
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
+
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_large_shift, "(large shift amount)");
+
+
+  /* Test negative shift amount. */
+  VDUP(vector_shift, , int, s, 8, 8, -1);
+  VDUP(vector_shift, , int, s, 16, 4, -1);
+  VDUP(vector_shift, , int, s, 32, 2, -2);
+  VDUP(vector_shift, , int, s, 64, 1, -4);
+  VDUP(vector_shift, q, int, s, 8, 16, -2);
+  VDUP(vector_shift, q, int, s, 16, 8, -5);
+  VDUP(vector_shift, q, int, s, 32, 4, -3);
+  VDUP(vector_shift, q, int, s, 64, 2, -5);
+
+  /* Execute the tests.  */
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
+
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_negative_shift, "(negative shift amount)");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsub.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsub.c
new file mode 100644 (file)
index 0000000..7620479
--- /dev/null
@@ -0,0 +1,82 @@
+#define INSN_NAME vsub
+#define TEST_MSG "VSUB/VSUBQ"
+
+/* Extra tests for functions requiring floating-point types */
+void exec_vsub_f32(void);
+#define EXTRA_TESTS exec_vsub_f32
+
+#include "binary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xee, 0xef, 0xf0, 0xf1,
+                                      0xf2, 0xf3, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffed, 0xffffffee };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff8c };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xdc, 0xdd, 0xde, 0xdf,
+                                       0xe0, 0xe1, 0xe2, 0xe3 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffd2, 0xffd3, 0xffd4, 0xffd5 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffc8, 0xffffffc9 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffee };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+                                       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xfa, 0xfb, 0xfc, 0xfd,
+                                       0xfe, 0xff, 0x0, 0x1,
+                                       0x2, 0x3, 0x4, 0x5,
+                                       0x6, 0x7, 0x8, 0x9 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x4, 0x5, 0x6, 0x7,
+                                       0x8, 0x9, 0xa, 0xb };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xe, 0xf, 0x10, 0x11 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffd8,
+                                       0xffffffffffffffd9 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xe4, 0xe5, 0xe6, 0xe7,
+                                        0xe8, 0xe9, 0xea, 0xeb,
+                                        0xec, 0xed, 0xee, 0xef,
+                                        0xf0, 0xf1, 0xf2, 0xf3};
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffed, 0xffee, 0xffef, 0xfff0,
+                                        0xfff1, 0xfff2, 0xfff3, 0xfff4 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffb9, 0xffffffba,
+                                        0xffffffbb, 0xffffffbc };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffed,
+                                        0xffffffffffffffee };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33,
+                                        0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+                                        0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+                                         0x33333333, 0x33333333 };
+
+/* Expected results for float32 variants. Needs to be separated since
+   the generic test function does not test floating-point
+   versions.  */
+VECT_VAR_DECL(expected_float32,hfloat,32,2) [] = { 0xc00ccccd, 0xc00ccccd };
+VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0xc00ccccc, 0xc00ccccc,
+                                                  0xc00ccccc, 0xc00ccccc };
+
+void exec_vsub_f32(void)
+{
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector2, float, 32, 2);
+  DECL_VARIABLE(vector2, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 4);
+
+  VDUP(vector, , float, f, 32, 2, 2.3f);
+  VDUP(vector, q, float, f, 32, 4, 3.4f);
+
+  VDUP(vector2, , float, f, 32, 2, 4.5f);
+  VDUP(vector2, q, float, f, 32, 4, 5.6f);
+
+  TEST_BINARY_OP(INSN_NAME, , float, f, 32, 2);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+
+  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_float32, "");
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, "");
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c
new file mode 100644 (file)
index 0000000..53f875e
--- /dev/null
@@ -0,0 +1,245 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results splitted in several chunks.  */
+/* Chunk 0.  */
+VECT_VAR_DECL(expected0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                       0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected0,int,16,4) [] = { 0xfff0, 0xfff1,
+                                        0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected0,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                        0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected0,uint,16,4) [] = { 0xfff0, 0xfff1,
+                                         0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0,
+                                         0xfffffff1 };
+VECT_VAR_DECL(expected0,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                        0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1,
+                                         0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                        0xf4, 0xf5, 0xf6, 0xf7,
+                                        0xf8, 0xf9, 0xfa, 0xfb,
+                                        0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected0,int,16,8) [] = { 0xfff0, 0xfff1,
+                                        0xfff2, 0xfff3,
+                                        0xfff4, 0xfff5,
+                                        0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+                                        0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected0,int,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                         0xf4, 0xf5, 0xf6, 0xf7,
+                                         0xf8, 0xf9, 0xfa, 0xfb,
+                                         0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected0,uint,16,8) [] = { 0xfff0, 0xfff1,
+                                         0xfff2, 0xfff3,
+                                         0xfff4, 0xfff5,
+                                         0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+                                         0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected0,uint,64,2) [] = { 0x3333333333333333,
+                                         0x3333333333333333 };
+VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+                                         0xf4, 0xf5, 0xf6, 0xf7,
+                                         0xf8, 0xf9, 0xfa, 0xfb,
+                                         0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1,
+                                         0xfff2, 0xfff3,
+                                         0xfff4, 0xfff5,
+                                         0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+                                           0xc1600000, 0xc1500000 };
+
+/* Chunk 1.  */
+VECT_VAR_DECL(expected1,int,8,8) [] = { 0x11, 0x11, 0x11, 0x11,
+                                       0x11, 0x11, 0x11, 0x11 };
+VECT_VAR_DECL(expected1,int,16,4) [] = { 0x22, 0x22, 0x22, 0x22 };
+VECT_VAR_DECL(expected1,int,32,2) [] = { 0x33, 0x33 };
+VECT_VAR_DECL(expected1,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected1,uint,8,8) [] = { 0x55, 0x55, 0x55, 0x55,
+                                        0x55, 0x55, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,uint,16,4) [] = { 0x66, 0x66, 0x66, 0x66 };
+VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
+VECT_VAR_DECL(expected1,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected1,poly,8,8) [] = { 0x55, 0x55, 0x55, 0x55,
+                                        0x55, 0x55, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,poly,16,4) [] = { 0x66, 0x66, 0x66, 0x66 };
+VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
+VECT_VAR_DECL(expected1,int,8,16) [] = { 0x11, 0x11, 0x11, 0x11,
+                                        0x11, 0x11, 0x11, 0x11,
+                                        0x11, 0x11, 0x11, 0x11,
+                                        0x11, 0x11, 0x11, 0x11 };
+VECT_VAR_DECL(expected1,int,16,8) [] = { 0x22, 0x22, 0x22, 0x22,
+                                        0x22, 0x22, 0x22, 0x22 };
+VECT_VAR_DECL(expected1,int,32,4) [] = { 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected1,int,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected1,uint,8,16) [] = { 0x55, 0x55, 0x55, 0x55,
+                                         0x55, 0x55, 0x55, 0x55,
+                                         0x55, 0x55, 0x55, 0x55,
+                                         0x55, 0x55, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,uint,16,8) [] = { 0x66, 0x66, 0x66, 0x66,
+                                         0x66, 0x66, 0x66, 0x66 };
+VECT_VAR_DECL(expected1,uint,32,4) [] = { 0x77, 0x77, 0x77, 0x77 };
+VECT_VAR_DECL(expected1,uint,64,2) [] = { 0x3333333333333333,
+                                         0x3333333333333333 };
+VECT_VAR_DECL(expected1,poly,8,16) [] = { 0x55, 0x55, 0x55, 0x55,
+                                         0x55, 0x55, 0x55, 0x55,
+                                         0x55, 0x55, 0x55, 0x55,
+                                         0x55, 0x55, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,poly,16,8) [] = { 0x66, 0x66, 0x66, 0x66,
+                                         0x66, 0x66, 0x66, 0x66 };
+VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0x42073333, 0x42073333,
+                                           0x42073333, 0x42073333 };
+
+#ifndef INSN_NAME
+#define INSN_NAME vuzp
+#define TEST_MSG "VUZP/VUZPQ"
+#endif
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* In this case, output variables are arrays of vectors.  */
+#define DECL_VUZP(T1, W, N)                                            \
+  VECT_ARRAY_TYPE(T1, W, N, 2) VECT_ARRAY_VAR(result_vec, T1, W, N, 2);        \
+  VECT_VAR_DECL(result_bis, T1, W, N)[2 * N]
+
+  /* We need to use a temporary result buffer (result_bis), because
+     the one used for other tests is not large enough. A subset of the
+     result data is moved from result_bis to result, and it is this
+     subset which is used to check the actual behaviour. The next
+     macro enables to move another chunk of data from result_bis to
+     result.  */
+#define TEST_VUZP(INSN, Q, T1, T2, W, N)                               \
+  VECT_ARRAY_VAR(result_vec, T1, W, N, 2) =                            \
+    INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),                     \
+                     VECT_VAR(vector2, T1, W, N));                     \
+  vst2##Q##_##T2##W(VECT_VAR(result_bis, T1, W, N),                    \
+                   VECT_ARRAY_VAR(result_vec, T1, W, N, 2));           \
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis, T1, W, N),   \
+        sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* Overwrite "result" with the contents of "result_bis"[X].  */
+#define TEST_EXTRA_CHUNK(T1, W, N, X)                                  \
+  memcpy(VECT_VAR(result, T1, W, N), &(VECT_VAR(result_bis, T1, W, N)[X*N]), \
+        sizeof(VECT_VAR(result, T1, W, N)));
+
+  DECL_VARIABLE_ALL_VARIANTS(vector1);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+
+  /* We don't need 64 bits variants.  */
+#define DECL_ALL_VUZP()                                \
+  DECL_VUZP(int, 8, 8);                                \
+  DECL_VUZP(int, 16, 4);                       \
+  DECL_VUZP(int, 32, 2);                       \
+  DECL_VUZP(uint, 8, 8);                       \
+  DECL_VUZP(uint, 16, 4);                      \
+  DECL_VUZP(uint, 32, 2);                      \
+  DECL_VUZP(poly, 8, 8);                       \
+  DECL_VUZP(poly, 16, 4);                      \
+  DECL_VUZP(float, 32, 2);                     \
+  DECL_VUZP(int, 8, 16);                       \
+  DECL_VUZP(int, 16, 8);                       \
+  DECL_VUZP(int, 32, 4);                       \
+  DECL_VUZP(uint, 8, 16);                      \
+  DECL_VUZP(uint, 16, 8);                      \
+  DECL_VUZP(uint, 32, 4);                      \
+  DECL_VUZP(poly, 8, 16);                      \
+  DECL_VUZP(poly, 16, 8);                      \
+  DECL_VUZP(float, 32, 4)
+
+  DECL_ALL_VUZP();
+
+  /* Initialize input "vector" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+  VLOAD(vector1, buffer, , float, f, 32, 2);
+  VLOAD(vector1, buffer, q, float, f, 32, 4);
+
+  /* Choose arbitrary initialization values.  */
+  VDUP(vector2, , int, s, 8, 8, 0x11);
+  VDUP(vector2, , int, s, 16, 4, 0x22);
+  VDUP(vector2, , int, s, 32, 2, 0x33);
+  VDUP(vector2, , uint, u, 8, 8, 0x55);
+  VDUP(vector2, , uint, u, 16, 4, 0x66);
+  VDUP(vector2, , uint, u, 32, 2, 0x77);
+  VDUP(vector2, , poly, p, 8, 8, 0x55);
+  VDUP(vector2, , poly, p, 16, 4, 0x66);
+  VDUP(vector2, , float, f, 32, 2, 33.6f);
+
+  VDUP(vector2, q, int, s, 8, 16, 0x11);
+  VDUP(vector2, q, int, s, 16, 8, 0x22);
+  VDUP(vector2, q, int, s, 32, 4, 0x33);
+  VDUP(vector2, q, uint, u, 8, 16, 0x55);
+  VDUP(vector2, q, uint, u, 16, 8, 0x66);
+  VDUP(vector2, q, uint, u, 32, 4, 0x77);
+  VDUP(vector2, q, poly, p, 8, 16, 0x55);
+  VDUP(vector2, q, poly, p, 16, 8, 0x66);
+  VDUP(vector2, q, float, f, 32, 4, 33.8f);
+
+#define TEST_ALL_VUZP(INSN)                    \
+  TEST_VUZP(INSN, , int, s, 8, 8);             \
+  TEST_VUZP(INSN, , int, s, 16, 4);            \
+  TEST_VUZP(INSN, , int, s, 32, 2);            \
+  TEST_VUZP(INSN, , uint, u, 8, 8);            \
+  TEST_VUZP(INSN, , uint, u, 16, 4);           \
+  TEST_VUZP(INSN, , uint, u, 32, 2);           \
+  TEST_VUZP(INSN, , poly, p, 8, 8);            \
+  TEST_VUZP(INSN, , poly, p, 16, 4);           \
+  TEST_VUZP(INSN, , float, f, 32, 2);          \
+  TEST_VUZP(INSN, q, int, s, 8, 16);           \
+  TEST_VUZP(INSN, q, int, s, 16, 8);           \
+  TEST_VUZP(INSN, q, int, s, 32, 4);           \
+  TEST_VUZP(INSN, q, uint, u, 8, 16);          \
+  TEST_VUZP(INSN, q, uint, u, 16, 8);          \
+  TEST_VUZP(INSN, q, uint, u, 32, 4);          \
+  TEST_VUZP(INSN, q, poly, p, 8, 16);          \
+  TEST_VUZP(INSN, q, poly, p, 16, 8);          \
+  TEST_VUZP(INSN, q, float, f, 32, 4)
+
+#define TEST_ALL_EXTRA_CHUNKS()                        \
+  TEST_EXTRA_CHUNK(int, 8, 8, 1);              \
+  TEST_EXTRA_CHUNK(int, 16, 4, 1);             \
+  TEST_EXTRA_CHUNK(int, 32, 2, 1);             \
+  TEST_EXTRA_CHUNK(uint, 8, 8, 1);             \
+  TEST_EXTRA_CHUNK(uint, 16, 4, 1);            \
+  TEST_EXTRA_CHUNK(uint, 32, 2, 1);            \
+  TEST_EXTRA_CHUNK(poly, 8, 8, 1);             \
+  TEST_EXTRA_CHUNK(poly, 16, 4, 1);            \
+  TEST_EXTRA_CHUNK(float, 32, 2, 1);           \
+  TEST_EXTRA_CHUNK(int, 8, 16, 1);             \
+  TEST_EXTRA_CHUNK(int, 16, 8, 1);             \
+  TEST_EXTRA_CHUNK(int, 32, 4, 1);             \
+  TEST_EXTRA_CHUNK(uint, 8, 16, 1);            \
+  TEST_EXTRA_CHUNK(uint, 16, 8, 1);            \
+  TEST_EXTRA_CHUNK(uint, 32, 4, 1);            \
+  TEST_EXTRA_CHUNK(poly, 8, 16, 1);            \
+  TEST_EXTRA_CHUNK(poly, 16, 8, 1);            \
+  TEST_EXTRA_CHUNK(float, 32, 4, 1)
+
+  clean_results ();
+
+  /* Execute the tests.  */
+  TEST_ALL_VUZP(INSN_NAME);
+
+  CHECK_RESULTS_NAMED (TEST_MSG, expected0, "(chunk 0)");
+
+  TEST_ALL_EXTRA_CHUNKS();
+  CHECK_RESULTS_NAMED (TEST_MSG, expected1, "(chunk 1)");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c
new file mode 100644 (file)
index 0000000..a1f1eee
--- /dev/null
@@ -0,0 +1,243 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results splitted in several chunks.  */
+/* Chunk 0.  */
+VECT_VAR_DECL(expected0,int,8,8) [] = { 0xf0, 0xf4, 0x11, 0x11,
+                                       0xf1, 0xf5, 0x11, 0x11 };
+VECT_VAR_DECL(expected0,int,16,4) [] = { 0xfff0, 0xfff2,
+                                        0x22, 0x22 };
+VECT_VAR_DECL(expected0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected0,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected0,uint,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55,
+                                        0xf1, 0xf5, 0x55, 0x55 };
+VECT_VAR_DECL(expected0,uint,16,4) [] = { 0xfff0, 0xfff2,
+                                         0x66, 0x66 };
+VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected0,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55,
+                                        0xf1, 0xf5, 0x55, 0x55 };
+VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff2,
+                                         0x66, 0x66 };
+VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf8, 0x11, 0x11,
+                                        0xf1, 0xf9, 0x11, 0x11,
+                                        0xf2, 0xfa, 0x11, 0x11,
+                                        0xf3, 0xfb, 0x11, 0x11 };
+VECT_VAR_DECL(expected0,int,16,8) [] = { 0xfff0, 0xfff4, 0x22, 0x22,
+                                        0xfff1, 0xfff5, 0x22, 0x22 };
+VECT_VAR_DECL(expected0,int,32,4) [] = { 0xfffffff0, 0xfffffff2,
+                                        0x33, 0x33 };
+VECT_VAR_DECL(expected0,int,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55,
+                                         0xf1, 0xf9, 0x55, 0x55,
+                                         0xf2, 0xfa, 0x55, 0x55,
+                                         0xf3, 0xfb, 0x55, 0x55 };
+VECT_VAR_DECL(expected0,uint,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66,
+                                         0xfff1, 0xfff5, 0x66, 0x66 };
+VECT_VAR_DECL(expected0,uint,32,4) [] = { 0xfffffff0, 0xfffffff2,
+                                         0x77, 0x77 };
+VECT_VAR_DECL(expected0,uint,64,2) [] = { 0x3333333333333333,
+                                         0x3333333333333333 };
+VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55,
+                                         0xf1, 0xf9, 0x55, 0x55,
+                                         0xf2, 0xfa, 0x55, 0x55,
+                                         0xf3, 0xfb, 0x55, 0x55 };
+VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66,
+                                         0xfff1, 0xfff5, 0x66, 0x66 };
+VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1600000,
+                                           0x42073333, 0x42073333 };
+
+/* Chunk 1.  */
+VECT_VAR_DECL(expected1,int,8,8) [] = { 0xf2, 0xf6, 0x11, 0x11,
+                                       0xf3, 0xf7, 0x11, 0x11 };
+VECT_VAR_DECL(expected1,int,16,4) [] = { 0xfff1, 0xfff3,
+                                        0x22, 0x22 };
+VECT_VAR_DECL(expected1,int,32,2) [] = { 0x33, 0x33 };
+VECT_VAR_DECL(expected1,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55,
+                                        0xf3, 0xf7, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xfff1, 0xfff3,
+                                         0x66, 0x66 };
+VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
+VECT_VAR_DECL(expected1,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55,
+                                        0xf3, 0xf7, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff3,
+                                         0x66, 0x66 };
+VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
+VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf4, 0xfc, 0x11, 0x11,
+                                        0xf5, 0xfd, 0x11, 0x11,
+                                        0xf6, 0xfe, 0x11, 0x11,
+                                        0xf7, 0xff, 0x11, 0x11 };
+VECT_VAR_DECL(expected1,int,16,8) [] = { 0xfff2, 0xfff6, 0x22, 0x22,
+                                        0xfff3, 0xfff7, 0x22, 0x22 };
+VECT_VAR_DECL(expected1,int,32,4) [] = { 0xfffffff1, 0xfffffff3,
+                                        0x33, 0x33 };
+VECT_VAR_DECL(expected1,int,64,2) [] = { 0x3333333333333333,
+                                        0x3333333333333333 };
+VECT_VAR_DECL(expected1,uint,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55,
+                                         0xf5, 0xfd, 0x55, 0x55,
+                                         0xf6, 0xfe, 0x55, 0x55,
+                                         0xf7, 0xff, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,uint,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66,
+                                         0xfff3, 0xfff7, 0x66, 0x66 };
+VECT_VAR_DECL(expected1,uint,32,4) [] = { 0xfffffff1, 0xfffffff3,
+                                         0x77, 0x77 };
+VECT_VAR_DECL(expected1,uint,64,2) [] = { 0x3333333333333333,
+                                         0x3333333333333333 };
+VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55,
+                                         0xf5, 0xfd, 0x55, 0x55,
+                                         0xf6, 0xfe, 0x55, 0x55,
+                                         0xf7, 0xff, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66,
+                                         0xfff3, 0xfff7, 0x66, 0x66 };
+VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1500000,
+                                           0x42073333, 0x42073333 };
+
+#ifndef INSN_NAME
+#define INSN_NAME vzip
+#define TEST_MSG "VZIP/VZIPQ"
+#endif
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* In this case, output variables are arrays of vectors.  */
+#define DECL_VZIP(T1, W, N)                                            \
+  VECT_ARRAY_TYPE(T1, W, N, 2) VECT_ARRAY_VAR(result_vec, T1, W, N, 2);        \
+  VECT_VAR_DECL(result_bis, T1, W, N)[2 * N]
+
+  /* We need to use a temporary result buffer (result_bis), because
+     the one used for other tests is not large enough. A subset of the
+     result data is moved from result_bis to result, and it is this
+     subset which is used to check the actual behaviour. The next
+     macro enables to move another chunk of data from result_bis to
+     result.  */
+#define TEST_VZIP(INSN, Q, T1, T2, W, N)                               \
+  VECT_ARRAY_VAR(result_vec, T1, W, N, 2) =                            \
+    INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),                     \
+                     VECT_VAR(vector2, T1, W, N));                     \
+  vst2##Q##_##T2##W(VECT_VAR(result_bis, T1, W, N),                    \
+                   VECT_ARRAY_VAR(result_vec, T1, W, N, 2));           \
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis, T1, W, N),   \
+        sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* Overwrite "result" with the contents of "result_bis"[X].  */
+#define TEST_EXTRA_CHUNK(T1, W, N, X)                                  \
+  memcpy(VECT_VAR(result, T1, W, N), &(VECT_VAR(result_bis, T1, W, N)[X*N]), \
+        sizeof(VECT_VAR(result, T1, W, N)));
+
+  DECL_VARIABLE_ALL_VARIANTS(vector1);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+
+  /* We don't need 64 bits variants.  */
+#define DECL_ALL_VZIP()                                \
+  DECL_VZIP(int, 8, 8);                                \
+  DECL_VZIP(int, 16, 4);                       \
+  DECL_VZIP(int, 32, 2);                       \
+  DECL_VZIP(uint, 8, 8);                       \
+  DECL_VZIP(uint, 16, 4);                      \
+  DECL_VZIP(uint, 32, 2);                      \
+  DECL_VZIP(poly, 8, 8);                       \
+  DECL_VZIP(poly, 16, 4);                      \
+  DECL_VZIP(float, 32, 2);                     \
+  DECL_VZIP(int, 8, 16);                       \
+  DECL_VZIP(int, 16, 8);                       \
+  DECL_VZIP(int, 32, 4);                       \
+  DECL_VZIP(uint, 8, 16);                      \
+  DECL_VZIP(uint, 16, 8);                      \
+  DECL_VZIP(uint, 32, 4);                      \
+  DECL_VZIP(poly, 8, 16);                      \
+  DECL_VZIP(poly, 16, 8);                      \
+  DECL_VZIP(float, 32, 4)
+
+  DECL_ALL_VZIP();
+
+  /* Initialize input "vector" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+  VLOAD(vector1, buffer, , float, f, 32, 2);
+  VLOAD(vector1, buffer, q, float, f, 32, 4);
+
+  /* Choose arbitrary initialization values.  */
+  VDUP(vector2, , int, s, 8, 8, 0x11);
+  VDUP(vector2, , int, s, 16, 4, 0x22);
+  VDUP(vector2, , int, s, 32, 2, 0x33);
+  VDUP(vector2, , uint, u, 8, 8, 0x55);
+  VDUP(vector2, , uint, u, 16, 4, 0x66);
+  VDUP(vector2, , uint, u, 32, 2, 0x77);
+  VDUP(vector2, , poly, p, 8, 8, 0x55);
+  VDUP(vector2, , poly, p, 16, 4, 0x66);
+  VDUP(vector2, , float, f, 32, 2, 33.6f);
+
+  VDUP(vector2, q, int, s, 8, 16, 0x11);
+  VDUP(vector2, q, int, s, 16, 8, 0x22);
+  VDUP(vector2, q, int, s, 32, 4, 0x33);
+  VDUP(vector2, q, uint, u, 8, 16, 0x55);
+  VDUP(vector2, q, uint, u, 16, 8, 0x66);
+  VDUP(vector2, q, uint, u, 32, 4, 0x77);
+  VDUP(vector2, q, poly, p, 8, 16, 0x55);
+  VDUP(vector2, q, poly, p, 16, 8, 0x66);
+  VDUP(vector2, q, float, f, 32, 4, 33.8f);
+
+#define TEST_ALL_VZIP(INSN)                    \
+  TEST_VZIP(INSN, , int, s, 8, 8);             \
+  TEST_VZIP(INSN, , int, s, 16, 4);            \
+  TEST_VZIP(INSN, , int, s, 32, 2);            \
+  TEST_VZIP(INSN, , uint, u, 8, 8);            \
+  TEST_VZIP(INSN, , uint, u, 16, 4);           \
+  TEST_VZIP(INSN, , uint, u, 32, 2);           \
+  TEST_VZIP(INSN, , poly, p, 8, 8);            \
+  TEST_VZIP(INSN, , poly, p, 16, 4);           \
+  TEST_VZIP(INSN, , float, f, 32, 2);          \
+  TEST_VZIP(INSN, q, int, s, 8, 16);           \
+  TEST_VZIP(INSN, q, int, s, 16, 8);           \
+  TEST_VZIP(INSN, q, int, s, 32, 4);           \
+  TEST_VZIP(INSN, q, uint, u, 8, 16);          \
+  TEST_VZIP(INSN, q, uint, u, 16, 8);          \
+  TEST_VZIP(INSN, q, uint, u, 32, 4);          \
+  TEST_VZIP(INSN, q, poly, p, 8, 16);          \
+  TEST_VZIP(INSN, q, poly, p, 16, 8);          \
+  TEST_VZIP(INSN, q, float, f, 32, 4)
+
+#define TEST_ALL_EXTRA_CHUNKS()                        \
+  TEST_EXTRA_CHUNK(int, 8, 8, 1);              \
+  TEST_EXTRA_CHUNK(int, 16, 4, 1);             \
+  TEST_EXTRA_CHUNK(int, 32, 2, 1);             \
+  TEST_EXTRA_CHUNK(uint, 8, 8, 1);             \
+  TEST_EXTRA_CHUNK(uint, 16, 4, 1);            \
+  TEST_EXTRA_CHUNK(uint, 32, 2, 1);            \
+  TEST_EXTRA_CHUNK(poly, 8, 8, 1);             \
+  TEST_EXTRA_CHUNK(poly, 16, 4, 1);            \
+  TEST_EXTRA_CHUNK(float, 32, 2, 1);           \
+  TEST_EXTRA_CHUNK(int, 8, 16, 1);             \
+  TEST_EXTRA_CHUNK(int, 16, 8, 1);             \
+  TEST_EXTRA_CHUNK(int, 32, 4, 1);             \
+  TEST_EXTRA_CHUNK(uint, 8, 16, 1);            \
+  TEST_EXTRA_CHUNK(uint, 16, 8, 1);            \
+  TEST_EXTRA_CHUNK(uint, 32, 4, 1);            \
+  TEST_EXTRA_CHUNK(poly, 8, 16, 1);            \
+  TEST_EXTRA_CHUNK(poly, 16, 8, 1);            \
+  TEST_EXTRA_CHUNK(float, 32, 4, 1)
+
+  clean_results ();
+
+  /* Execute the tests.  */
+  TEST_ALL_VZIP(INSN_NAME);
+
+  CHECK_RESULTS_NAMED (TEST_MSG, expected0, "(chunk 0)");
+
+  TEST_ALL_EXTRA_CHUNKS();
+  CHECK_RESULTS_NAMED (TEST_MSG, expected1, "(chunk 1)");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/README.advsimd-intrinsics b/gcc/testsuite/gcc.target/arm/README.advsimd-intrinsics
new file mode 100644 (file)
index 0000000..f246349
--- /dev/null
@@ -0,0 +1 @@
+Advanced SIMD intrinsics tests are located in gcc.target/aarch64.