From 43031fbdda7d4edbd607365a4f3bbec069fe3983 Mon Sep 17 00:00:00 2001 From: Delia Burduv Date: Thu, 5 Mar 2020 11:18:04 +0000 Subject: [PATCH] [AArch32] ACLE intrinsics bfloat16 vmmla and vfma for AArch32 AdvSIMD This patch adds the ARMv8.6 ACLE intrinsics for vmmla, vfmab and vfmat as part of the BFloat16 extension. (https://developer.arm.com/docs/101028/latest.) The intrinsics are declared in arm_neon.h and the RTL patterns are defined in neon.md. Two new tests are added to check assembler output and lane indices. 2020-03-05 Delia Burduv * config/arm/arm_neon.h (vbfmmlaq_f32): New. (vbfmlalbq_f32): New. (vbfmlaltq_f32): New. (vbfmlalbq_lane_f32): New. (vbfmlaltq_lane_f32): New. (vbfmlalbq_laneq_f32): New. (vbfmlaltq_laneq_f32): New. * config/arm/arm_neon_builtins.def (vmmla): New. (vfmab): New. (vfmat): New. (vfmab_lane): New. (vfmat_lane): New. (vfmab_laneq): New. (vfmat_laneq): New. * config/arm/iterators.md (BF_MA): New int iterator. (bt): New int attribute. (VQXBF): Copy of VQX with V8BF. * config/arm/neon.md (neon_vmmlav8bf): New insn. (neon_vfmav8bf): New insn. (neon_vfma_lanev8bf): New insn. (neon_vfma_laneqv8bf): New expand. (neon_vget_high): Changed iterator to VQXBF. * config/arm/unspecs.md (UNSPEC_BFMMLA): New UNSPEC. (UNSPEC_BFMAB): New UNSPEC. (UNSPEC_BFMAT): New UNSPEC. 2020-03-05 Delia Burduv * gcc.target/arm/simd/bf16_ma_1.c: New test. * gcc.target/arm/simd/bf16_ma_2.c: New test. * gcc.target/arm/simd/bf16_mmla_1.c: New test. --- gcc/testsuite/ChangeLog | 6 ++ gcc/testsuite/gcc.target/arm/simd/bf16_ma_1.c | 79 +++++++++++++++++++++++++ gcc/testsuite/gcc.target/arm/simd/bf16_ma_2.c | 35 +++++++++++ gcc/testsuite/gcc.target/arm/simd/bf16_mmla_1.c | 19 ++++++ 4 files changed, 139 insertions(+) create mode 100644 gcc/testsuite/gcc.target/arm/simd/bf16_ma_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/bf16_ma_2.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/bf16_mmla_1.c diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5b94ab5..7dcc80d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2020-03-05 Delia Burduv + + * gcc.target/arm/simd/bf16_ma_1.c: New test. + * gcc.target/arm/simd/bf16_ma_2.c: New test. + * gcc.target/arm/simd/bf16_mmla_1.c: New test. + 2020-03-05 Jakub Jelinek PR middle-end/93399 diff --git a/gcc/testsuite/gcc.target/arm/simd/bf16_ma_1.c b/gcc/testsuite/gcc.target/arm/simd/bf16_ma_1.c new file mode 100644 index 0000000..6729af7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/bf16_ma_1.c @@ -0,0 +1,79 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-save-temps -O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "arm_neon.h" + +/* +**test_vfmabq_f32: +** ... +** vfmab.bf16 q0, q1, q2 +** bx lr +*/ +float32x4_t +test_vfmabq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) +{ + return vbfmlalbq_f32 (r, a, b); +} + +/* +**test_vfmatq_f32: +** ... +** vfmat.bf16 q0, q1, q2 +** bx lr +*/ +float32x4_t +test_vfmatq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) +{ + return vbfmlaltq_f32 (r, a, b); +} + +/* +**test_vfmabq_lane_f32: +** ... +** vfmab.bf16 q0, q1, d4\[0\] +** bx lr +*/ +float32x4_t +test_vfmabq_lane_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) +{ + return vbfmlalbq_lane_f32 (r, a, b, 0); +} + +/* +**test_vfmatq_lane_f32: +** ... +** vfmat.bf16 q0, q1, d4\[2\] +** bx lr +*/ +float32x4_t +test_vfmatq_lane_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) +{ + return vbfmlaltq_lane_f32 (r, a, b, 2); +} + +/* +**test_vfmabq_laneq_f32: +** ... +** vfmab.bf16 q0, q1, d5\[1\] +** bx lr +*/ +float32x4_t +test_vfmabq_laneq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) +{ + return vbfmlalbq_laneq_f32 (r, a, b, 5); +} + +/* +**test_vfmatq_laneq_f32: +** ... +** vfmat.bf16 q0, q1, d5\[3\] +** bx lr +*/ +float32x4_t +test_vfmatq_laneq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) +{ + return vbfmlaltq_laneq_f32 (r, a, b, 7); +} diff --git a/gcc/testsuite/gcc.target/arm/simd/bf16_ma_2.c b/gcc/testsuite/gcc.target/arm/simd/bf16_ma_2.c new file mode 100644 index 0000000..5a7a2a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/bf16_ma_2.c @@ -0,0 +1,35 @@ +/* { dg-do compile { target { arm*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ + +#include "arm_neon.h" + +/* Test lane index limits for vfmabq_lane_f32 */ +float32x4_t +test_vfmabq_lane_f32_low (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) +{ + /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */ + return vbfmlalbq_lane_f32 (r, a, b, -1); +} + +float32x4_t +test_vfmabq_lane_f32_high (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) +{ + /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */ + return vbfmlalbq_lane_f32 (r, a, b, 4); +} + +/* Test lane index limits for vfmatq_lane_f32 */ +float32x4_t +test_vfmatq_lane_f32_low (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) +{ + /* { dg-error "lane -2 out of range 0 - 3" "" { target *-*-* } 0 } */ + return vbfmlaltq_lane_f32 (r, a, b, -2); +} + +float32x4_t +test_vfmatq_lane_f32_high (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) +{ + /* { dg-error "lane 5 out of range 0 - 3" "" { target *-*-* } 0 } */ + return vbfmlaltq_lane_f32 (r, a, b, 5); +} diff --git a/gcc/testsuite/gcc.target/arm/simd/bf16_mmla_1.c b/gcc/testsuite/gcc.target/arm/simd/bf16_mmla_1.c new file mode 100644 index 0000000..5f9c85b --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/bf16_mmla_1.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-save-temps -O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include + +/* +**test_vmmlaq_f32: +** ... +** vmmla.bf16 q0, q1, q2 +** bx lr +*/ +float32x4_t +test_vmmlaq_f32 (float32x4_t r, bfloat16x8_t x, bfloat16x8_t y) +{ + return vbfmmlaq_f32 (r, x, y); +} -- 2.7.4