From 361da782a25031c6ae3967bf8c10a8119845255c Mon Sep 17 00:00:00 2001 From: Andreas Krebbel Date: Wed, 4 Aug 2021 18:40:11 +0200 Subject: [PATCH] IBM Z: Implement TARGET_VECTORIZE_VEC_PERM_CONST for vpdi This patch makes use of the vector permute double immediate instruction for constant permute vectors. gcc/ChangeLog: * config/s390/s390.c (expand_perm_with_vpdi): New function. (vectorize_vec_perm_const_1): Call expand_perm_with_vpdi. * config/s390/vector.md (*vpdi1, @vpdi1): Enable a parameterized expander. (*vpdi4, @vpdi4): Likewise. gcc/testsuite/ChangeLog: * gcc.target/s390/vector/perm-vpdi.c: New test. --- gcc/config/s390/s390.c | 47 +++++++++++++++++++++++ gcc/config/s390/vector.md | 5 +-- gcc/testsuite/gcc.target/s390/vector/perm-vpdi.c | 49 ++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/vector/perm-vpdi.c diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 8dc805f..673a134 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -16979,6 +16979,50 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d) return merge_lo_p || merge_hi_p; } +/* Try to expand the vector permute operation described by D using the + vector permute doubleword immediate instruction vpdi. Return true + if vpdi could be used. + + VPDI allows 4 different immediate values (0, 1, 4, 5). The 0 and 5 + cases are covered by vmrhg and vmrlg already. So we only care + about the 1, 4 cases here. + 1 - First element of src1 and second of src2 + 4 - Second element of src1 and first of src2 */ +static bool +expand_perm_with_vpdi (const struct expand_vec_perm_d &d) +{ + bool vpdi1_p = false; + bool vpdi4_p = false; + rtx op0_reg, op1_reg; + + // Only V2DI and V2DF are supported here. + if (d.nelt != 2) + return false; + + if (d.perm[0] == 0 && d.perm[1] == 3) + vpdi1_p = true; + + if (d.perm[0] == 1 && d.perm[1] == 2) + vpdi4_p = true; + + if (!vpdi1_p && !vpdi4_p) + return false; + + if (d.testing_p) + return true; + + op0_reg = force_reg (GET_MODE (d.op0), d.op0); + op1_reg = force_reg (GET_MODE (d.op1), d.op1); + + if (vpdi1_p) + emit_insn (gen_vpdi1 (d.vmode, d.target, op0_reg, op1_reg)); + + if (vpdi4_p) + emit_insn (gen_vpdi4 (d.vmode, d.target, op0_reg, op1_reg)); + + return true; +} + /* Try to find the best sequence for the vector permute operation described by D. Return true if the operation could be expanded. */ @@ -16988,6 +17032,9 @@ vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d) if (expand_perm_with_merge (d)) return true; + if (expand_perm_with_vpdi (d)) + return true; + return false; } diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index d224165..70274a6 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -768,7 +768,7 @@ ; First DW of op1 and second DW of op2 -(define_insn "*vpdi1" +(define_insn "@vpdi1" [(set (match_operand:V_HW_2 0 "register_operand" "=v") (vec_select:V_HW_2 (vec_concat: @@ -780,7 +780,7 @@ [(set_attr "op_type" "VRR")]) ; Second DW of op1 and first of op2 -(define_insn "*vpdi4" +(define_insn "@vpdi4" [(set (match_operand:V_HW_2 0 "register_operand" "=v") (vec_select:V_HW_2 (vec_concat: @@ -926,7 +926,6 @@ operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, 8); }) -; vec_perm_const for V2DI using vpdi? ;; ;; Vector integer arithmetic instructions diff --git a/gcc/testsuite/gcc.target/s390/vector/perm-vpdi.c b/gcc/testsuite/gcc.target/s390/vector/perm-vpdi.c new file mode 100644 index 0000000..cc92531 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/perm-vpdi.c @@ -0,0 +1,49 @@ +/* { dg-do run { target { s390*-*-* } } } */ +/* { dg-options "-O3 -mzarch -march=z14 -mzvector --save-temps" } */ + +/* { dg-final { scan-assembler-times "\tvmrhg\t" 3 } } */ +/* { dg-final { scan-assembler-times "\tvmrlg\t" 3 } } */ +/* { dg-final { scan-assembler-times "\tvpdi\t" 6 } } */ + +#include "vec-types.h" +#include + +#define GEN_PERMI_BITS(VEC_TYPE, BITS) \ + VEC_TYPE __attribute__((noinline)) \ + permi_##BITS##_##VEC_TYPE(VEC_TYPE a, VEC_TYPE b) { \ + return (VEC_TYPE){a[((BITS) & 2) >> 1], b[(BITS) & 1] }; } + +#define GEN_PERMI(VEC_TYPE) \ + GEN_PERMI_BITS(VEC_TYPE, 0); \ + GEN_PERMI_BITS(VEC_TYPE, 1); \ + GEN_PERMI_BITS(VEC_TYPE, 2); \ + GEN_PERMI_BITS(VEC_TYPE, 3); \ + +GEN_PERMI(v2di) +GEN_PERMI(uv2di) +GEN_PERMI(v2df) + + +#define CHECK_PERMI_BITS(VEC_TYPE, BITS) \ + VEC_TYPE r##BITS = permi_##BITS##_##VEC_TYPE (a, b); \ + if (r##BITS[0] != ((BITS) & 2) >> 1 \ + || r##BITS[1] != ((BITS) & 1) + 2) \ + __builtin_abort(); + +#define CHECK_PERMI(VEC_TYPE) \ + { \ + VEC_TYPE a = GEN_SEQ_VEC (VEC_TYPE, 0); \ + VEC_TYPE b = GEN_SEQ_VEC (VEC_TYPE, 2); \ + CHECK_PERMI_BITS (VEC_TYPE, 0); \ + CHECK_PERMI_BITS (VEC_TYPE, 1); \ + CHECK_PERMI_BITS (VEC_TYPE, 2); \ + CHECK_PERMI_BITS (VEC_TYPE, 3); \ + } + +int +main () +{ + CHECK_PERMI (v2di); + CHECK_PERMI (uv2di); + CHECK_PERMI (v2df); +} -- 2.7.4