From 3c33c00f43bfe585d9414dfb620f0f518e55a457 Mon Sep 17 00:00:00 2001 From: Marius Hillenbrand Date: Tue, 4 May 2021 13:30:05 +0200 Subject: [PATCH] IBM Z: Fix error checking for builtin vec_permi The builtin vec_permi is peculiar in that its immediate operand is encoded differently than the immediate operand that is backing the builtin. This fixes the check for the immediate operand, adding a regression test in the process. This partially reverts commit 3191c1f4488d1f7563b563d7ae2a102a26f16d82 2021-05-06 Marius Hillenbrand gcc/ChangeLog: * config/s390/s390-builtins.def (O_M5, O1_M5, ...): Remove unused macros. (s390_vec_permi_s64, s390_vec_permi_b64, s390_vec_permi_u64) (s390_vec_permi_dbl, s390_vpdi): Use the O3_U2 type for the immediate operand. * config/s390/s390.c (s390_const_operand_ok): Remove unused values. gcc/testsuite/ChangeLog: * gcc.target/s390/zvector/imm-range-error-1.c: Fix test for __builtin_s390_vpdi. * gcc.target/s390/zvector/vec-permi.c: New test for builtin vec_permi. --- gcc/config/s390/s390-builtins.def | 44 +++++++----------- gcc/config/s390/s390.c | 7 ++- .../gcc.target/s390/zvector/imm-range-error-1.c | 2 +- gcc/testsuite/gcc.target/s390/zvector/vec-permi.c | 54 ++++++++++++++++++++++ 4 files changed, 76 insertions(+), 31 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec-permi.c diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def index f77ab75..8ca002d 100644 --- a/gcc/config/s390/s390-builtins.def +++ b/gcc/config/s390/s390-builtins.def @@ -29,7 +29,6 @@ #undef O_U16 #undef O_U32 -#undef O_M5 #undef O_M12 #undef O_S2 @@ -89,11 +88,6 @@ #undef O3_U32 #undef O4_U32 -#undef O1_M5 -#undef O2_M5 -#undef O3_M5 -#undef O4_M5 - #undef O1_M12 #undef O2_M12 #undef O3_M12 @@ -164,20 +158,19 @@ #define O_U16 8 /* unsigned 16 bit literal */ #define O_U32 9 /* unsigned 32 bit literal */ -#define O_M5 10 /* matches bitmask of 5 */ -#define O_M12 11 /* matches bitmask of 12 */ +#define O_M12 10 /* matches bitmask of 12 */ -#define O_S2 12 /* signed 2 bit literal */ -#define O_S3 13 /* signed 3 bit literal */ -#define O_S4 14 /* signed 4 bit literal */ -#define O_S5 15 /* signed 5 bit literal */ -#define O_S8 16 /* signed 8 bit literal */ -#define O_S12 17 /* signed 12 bit literal */ -#define O_S16 18 /* signed 16 bit literal */ -#define O_S32 19 /* signed 32 bit literal */ +#define O_S2 11 /* signed 2 bit literal */ +#define O_S3 12 /* signed 3 bit literal */ +#define O_S4 13 /* signed 4 bit literal */ +#define O_S5 14 /* signed 5 bit literal */ +#define O_S8 15 /* signed 8 bit literal */ +#define O_S12 16 /* signed 12 bit literal */ +#define O_S16 17 /* signed 16 bit literal */ +#define O_S32 18 /* signed 32 bit literal */ -#define O_ELEM 20 /* Element selector requiring modulo arithmetic. */ -#define O_LIT 21 /* Operand must be a literal fitting the target type. */ +#define O_ELEM 19 /* Element selector requiring modulo arithmetic. */ +#define O_LIT 20 /* Operand must be a literal fitting the target type. */ #define O_SHIFT 5 @@ -230,11 +223,6 @@ #define O3_U32 (O_U32 << (2 * O_SHIFT)) #define O4_U32 (O_U32 << (3 * O_SHIFT)) -#define O1_M5 O_M5 -#define O2_M5 (O_M5 << O_SHIFT) -#define O3_M5 (O_M5 << (2 * O_SHIFT)) -#define O4_M5 (O_M5 << (3 * O_SHIFT)) - #define O1_M12 O_M12 #define O2_M12 (O_M12 << O_SHIFT) #define O3_M12 (O_M12 << (2 * O_SHIFT)) @@ -671,12 +659,12 @@ OB_DEF_VAR (s390_vec_perm_dbl, s390_vperm, 0, B_DEF (s390_vperm, vec_permv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI) OB_DEF (s390_vec_permi, s390_vec_permi_s64, s390_vec_permi_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_INT) -OB_DEF_VAR (s390_vec_permi_s64, s390_vpdi, 0, O3_M5, BT_OV_V2DI_V2DI_V2DI_INT) -OB_DEF_VAR (s390_vec_permi_b64, s390_vpdi, 0, O3_M5, BT_OV_BV2DI_BV2DI_BV2DI_INT) -OB_DEF_VAR (s390_vec_permi_u64, s390_vpdi, 0, O3_M5, BT_OV_UV2DI_UV2DI_UV2DI_INT) -OB_DEF_VAR (s390_vec_permi_dbl, s390_vpdi, 0, O3_M5, BT_OV_V2DF_V2DF_V2DF_INT) +OB_DEF_VAR (s390_vec_permi_s64, s390_vpdi, 0, O3_U2, BT_OV_V2DI_V2DI_V2DI_INT) +OB_DEF_VAR (s390_vec_permi_b64, s390_vpdi, 0, O3_U2, BT_OV_BV2DI_BV2DI_BV2DI_INT) +OB_DEF_VAR (s390_vec_permi_u64, s390_vpdi, 0, O3_U2, BT_OV_UV2DI_UV2DI_UV2DI_INT) +OB_DEF_VAR (s390_vec_permi_dbl, s390_vpdi, 0, O3_U2, BT_OV_V2DF_V2DF_V2DF_INT) -B_DEF (s390_vpdi, vec_permiv2di, 0, B_VX, O3_M5, BT_FN_UV2DI_UV2DI_UV2DI_INT) +B_DEF (s390_vpdi, vec_permiv2di, 0, B_VX, O3_U2, BT_FN_UV2DI_UV2DI_UV2DI_INT) OB_DEF (s390_vec_splat, s390_vec_splat2_s8, s390_vec_splat2_dbl,B_VX, BT_FN_OV4SI_OV4SI_UCHAR) OB_DEF_VAR (s390_vec_splat2_s8, s390_vrepb, 0, O2_U4, BT_OV_V16QI_V16QI_UCHAR) diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 88361f9..6bbeb64 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -734,11 +734,14 @@ s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl) { if (O_UIMM_P (op_flags)) { - unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 4, 4 }; - unsigned HOST_WIDE_INT bitmasks[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 12 }; + unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 4 }; + unsigned HOST_WIDE_INT bitmasks[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 }; unsigned HOST_WIDE_INT bitwidth = bitwidths[op_flags - O_U1]; unsigned HOST_WIDE_INT bitmask = bitmasks[op_flags - O_U1]; + gcc_assert(ARRAY_SIZE(bitwidths) == (O_M12 - O_U1 + 1)); + gcc_assert(ARRAY_SIZE(bitmasks) == (O_M12 - O_U1 + 1)); + if (!tree_fits_uhwi_p (arg) || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1 || (bitmask && tree_to_uhwi (arg) & ~bitmask)) diff --git a/gcc/testsuite/gcc.target/s390/zvector/imm-range-error-1.c b/gcc/testsuite/gcc.target/s390/zvector/imm-range-error-1.c index 1fe68f5..021c8d2 100644 --- a/gcc/testsuite/gcc.target/s390/zvector/imm-range-error-1.c +++ b/gcc/testsuite/gcc.target/s390/zvector/imm-range-error-1.c @@ -22,5 +22,5 @@ main () __builtin_s390_vrepf (s, 4); /* { dg-error "constant argument 2 for builtin '__builtin_s390_vrepf' is out of range \\(0..3\\)" } */ __builtin_s390_vrepg (d, 2); /* { dg-error "constant argument 2 for builtin '__builtin_s390_vrepg' is out of range \\(0..1\\)" } */ - __builtin_s390_vpdi (d, d, 2); /* { dg-error "constant argument 3 for builtin '__builtin_s390_vpdi' is invalid \\(0, 1, 4, 5\\)" } */ + __builtin_s390_vpdi (d, d, 4); /* { dg-error "constant argument 3 for builtin '__builtin_s390_vpdi' is out of range \\(0..3\\)" } */ } diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-permi.c b/gcc/testsuite/gcc.target/s390/zvector/vec-permi.c new file mode 100644 index 0000000..c0a852b --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/zvector/vec-permi.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z13 -mzarch --save-temps" } */ +/* { dg-do run { target { s390_z13_hw } } } */ + +/* + * The vector intrinsic vec_permi(a, b, c) chooses one of the two eight-byte + * vector elements in each of a and b, depending on the value of c. The valid + * values for c differ from the encoding for the M4 field in assembly and in the + * binary instruction. + * + * selection | c | encoding in assembly + * a[0] b[0] | 0 | 0 + * a[0] b[1] | 1 | 1 + * a[1] b[0] | 2 | 4 + * a[1] b[1] | 3 | 5 + * + * (i.e., indices a[i] b[j] are encoded for c as (i<<1) | j, yet for the + * M4 field as (i<<2) | j. + */ +#include +#include + +typedef unsigned long long uv2di __attribute__((vector_size(16))); + +__attribute__ ((noipa)) static uv2di +do_vec_permi(uv2di a, uv2di b, int c) +{ + switch(c) { + case 0: return vec_permi(a, b, 0); + case 1: return vec_permi(a, b, 1); + case 2: return vec_permi(a, b, 2); + case 3: return vec_permi(a, b, 3); + default: assert(0); + } +} + +/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,4\n} 1 } } */ +/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,5\n} 1 } } */ + +int +main (void) +{ + uv2di a = { 0xa0, 0xa1 }; + uv2di b = { 0xb0, 0xb1 }; + + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) { + uv2di res = do_vec_permi(a, b, (i<<1)|j); + assert(res[0] == a[i]); + assert(res[1] == b[j]); + } +} -- 2.7.4