From b8eaa7545b643a418aa44054b8f2d79a2b3c6ef2 Mon Sep 17 00:00:00 2001 From: Carl Love Date: Mon, 11 May 2020 21:22:07 -0500 Subject: [PATCH] rs6000: Add xxgenpcvwm and xxgenpcvdm Add support for xxgenpcv[dw]m, along with individual and overloaded built-in functions for access. [gcc] 2020-05-11 Carl Love * config/rs6000/altivec.h (vec_genpcvm): New #define. * config/rs6000/rs6000-builtin.def (XXGENPCVM_V16QI): New built-in instantiation. (XXGENPCVM_V8HI): Likewise. (XXGENPCVM_V4SI): Likewise. (XXGENPCVM_V2DI): Likewise. (XXGENPCVM): New overloaded built-in instantiation. * config/rs6000/rs6000-call.c (altivec_overloaded_builtins): Add entries for FUTURE_BUILTIN_VEC_XXGENPCVM. (altivec_expand_builtin): Add special handling for FUTURE_BUILTIN_VEC_XXGENPCVM. (builtin_function_type): Add handling for FUTURE_BUILTIN_XXGENPCVM_{V16QI,V8HI,V4SI,V2DI}. * config/rs6000/vsx.md (VSX_EXTRACT_I4): New mode iterator. (UNSPEC_XXGENPCV): New constant. (xxgenpcvm__internal): New insn. (xxgenpcvm_): New expansion. * doc/extend.texi: Add documentation for vec_genpcvm built-ins. [gcc/testsuite] 2020-05-11 Carl Love * gcc.target/powerpc/xxgenpc-runnable.c: New. --- gcc/ChangeLog | 21 ++ gcc/config/rs6000/altivec.h | 1 + gcc/config/rs6000/rs6000-builtin.def | 5 + gcc/config/rs6000/rs6000-call.c | 31 +++ gcc/config/rs6000/vsx.md | 32 +++ gcc/doc/extend.texi | 12 ++ gcc/testsuite/ChangeLog | 4 + .../gcc.target/powerpc/xxgenpc-runnable.c | 231 +++++++++++++++++++++ 8 files changed, 337 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/xxgenpc-runnable.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 28a2c8f..0edf580 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,24 @@ +2020-05-11 Carl Love + + * config/rs6000/altivec.h (vec_genpcvm): New #define. + * config/rs6000/rs6000-builtin.def (XXGENPCVM_V16QI): New built-in + instantiation. + (XXGENPCVM_V8HI): Likewise. + (XXGENPCVM_V4SI): Likewise. + (XXGENPCVM_V2DI): Likewise. + (XXGENPCVM): New overloaded built-in instantiation. + * config/rs6000/rs6000-call.c (altivec_overloaded_builtins): Add + entries for FUTURE_BUILTIN_VEC_XXGENPCVM. + (altivec_expand_builtin): Add special handling for + FUTURE_BUILTIN_VEC_XXGENPCVM. + (builtin_function_type): Add handling for + FUTURE_BUILTIN_XXGENPCVM_{V16QI,V8HI,V4SI,V2DI}. + * config/rs6000/vsx.md (VSX_EXTRACT_I4): New mode iterator. + (UNSPEC_XXGENPCV): New constant. + (xxgenpcvm__internal): New insn. + (xxgenpcvm_): New expansion. + * doc/extend.texi: Add documentation for vec_genpcvm built-ins. + 2020-05-11 Kelvin Nilsen * config/rs6000/altivec.h (vec_strir): New #define. diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index b29413d..3729cea 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -694,6 +694,7 @@ __altivec_scalar_pred(vec_any_nle, #define vec_pdep(a, b) __builtin_altivec_vpdepd (a, b) #define vec_pext(a, b) __builtin_altivec_vpextd (a, b) #define vec_cfuge(a, b) __builtin_altivec_vcfuged (a, b) +#define vec_genpcvm(a, b) __builtin_vec_xxgenpcvm (a, b) /* Overloaded built-in functions for future architecture. */ #define vec_gnb(a, b) __builtin_vec_gnb (a, b) diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 1f86293..9acb448 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2611,6 +2611,10 @@ BU_FUTURE_V_2 (VPDEPD, "vpdepd", CONST, vpdepd) BU_FUTURE_V_2 (VPEXTD, "vpextd", CONST, vpextd) BU_FUTURE_V_2 (VGNB, "vgnb", CONST, vgnb) BU_FUTURE_V_4 (XXEVAL, "xxeval", CONST, xxeval) +BU_FUTURE_V_2 (XXGENPCVM_V16QI, "xxgenpcvm_v16qi", CONST, xxgenpcvm_v16qi) +BU_FUTURE_V_2 (XXGENPCVM_V8HI, "xxgenpcvm_v8hi", CONST, xxgenpcvm_v8hi) +BU_FUTURE_V_2 (XXGENPCVM_V4SI, "xxgenpcvm_v4si", CONST, xxgenpcvm_v4si) +BU_FUTURE_V_2 (XXGENPCVM_V2DI, "xxgenpcvm_v2di", CONST, xxgenpcvm_v2di) BU_FUTURE_V_1 (VSTRIBR, "vstribr", CONST, vstrir_v16qi) BU_FUTURE_V_1 (VSTRIHR, "vstrihr", CONST, vstrir_v8hi) @@ -2627,6 +2631,7 @@ BU_FUTURE_OVERLOAD_2 (CLRL, "clrl") BU_FUTURE_OVERLOAD_2 (CLRR, "clrr") BU_FUTURE_OVERLOAD_2 (GNB, "gnb") BU_FUTURE_OVERLOAD_4 (XXEVAL, "xxeval") +BU_FUTURE_OVERLOAD_2 (XXGENPCVM, "xxgenpcvm") BU_FUTURE_OVERLOAD_1 (VSTRIR, "strir") BU_FUTURE_OVERLOAD_1 (VSTRIL, "stril") diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index 93f8748..d4dffc5 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -5532,6 +5532,15 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTQI, 0 }, { FUTURE_BUILTIN_VEC_GNB, FUTURE_BUILTIN_VGNB, RS6000_BTI_unsigned_long_long, RS6000_BTI_unsigned_V1TI, RS6000_BTI_UINTQI, 0 }, + { FUTURE_BUILTIN_VEC_XXGENPCVM, FUTURE_BUILTIN_XXGENPCVM_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, 0 }, + { FUTURE_BUILTIN_VEC_XXGENPCVM, FUTURE_BUILTIN_XXGENPCVM_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 }, + { FUTURE_BUILTIN_VEC_XXGENPCVM, FUTURE_BUILTIN_XXGENPCVM_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, 0 }, + { FUTURE_BUILTIN_VEC_XXGENPCVM, FUTURE_BUILTIN_XXGENPCVM_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_INTSI, 0 }, /* The overloaded XXEVAL definitions are handled specially because the fourth unsigned char operand is not encoded in this table. */ @@ -10384,6 +10393,24 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) } break; + case FUTURE_BUILTIN_VEC_XXGENPCVM: + arg1 = CALL_EXPR_ARG (exp, 1); + STRIP_NOPS (arg1); + + /* Generate a normal call if it is invalid. */ + if (arg1 == error_mark_node) + return expand_call (exp, target, false); + + if (TREE_CODE (arg1) != INTEGER_CST + || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 3)) + { + size_t uns_fcode = (size_t) fcode; + const char *name = rs6000_builtin_info[uns_fcode].name; + error ("Second argument of %qs must be in the range [0, 3].", name); + return expand_call (exp, target, false); + } + break; + default: break; /* Fall through. */ @@ -13202,6 +13229,10 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, case FUTURE_BUILTIN_VGNB: case FUTURE_BUILTIN_VPDEPD: case FUTURE_BUILTIN_VPEXTD: + case FUTURE_BUILTIN_XXGENPCVM_V16QI: + case FUTURE_BUILTIN_XXGENPCVM_V8HI: + case FUTURE_BUILTIN_XXGENPCVM_V4SI: + case FUTURE_BUILTIN_XXGENPCVM_V2DI: h.uns_p[0] = 1; h.uns_p[1] = 1; h.uns_p[2] = 1; diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 1fcc1b0..62b4f61 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -217,6 +217,7 @@ ;; done on ISA 2.07 and not just ISA 3.0. (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI]) (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI]) +(define_mode_iterator VSX_EXTRACT_I4 [V16QI V8HI V4SI V2DI]) (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b") (V8HI "h") @@ -342,6 +343,7 @@ UNSPEC_VSX_FIRST_MATCH_EOS_INDEX UNSPEC_VSX_FIRST_MISMATCH_INDEX UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX + UNSPEC_XXGENPCV ]) ;; VSX moves @@ -2998,6 +3000,36 @@ "xxpermdi %x0,%x1,%x1,2" [(set_attr "type" "vecperm")]) +(define_insn "xxgenpcvm__internal" + [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa") + (unspec:VSX_EXTRACT_I4 + [(match_operand:VSX_EXTRACT_I4 1 "altivec_register_operand" "v") + (match_operand:QI 2 "const_0_to_3_operand" "n")] + UNSPEC_XXGENPCV))] + "TARGET_FUTURE && TARGET_64BIT" + "xxgenpcvm %x0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_expand "xxgenpcvm_" + [(use (match_operand:VSX_EXTRACT_I4 0 "register_operand")) + (use (match_operand:VSX_EXTRACT_I4 1 "register_operand")) + (use (match_operand:QI 2 "immediate_operand"))] + "TARGET_FUTURE" +{ + if (!BYTES_BIG_ENDIAN) + { + /* gen_xxgenpcvm assumes Big Endian order. If LE, + change swap upper and lower double words. */ + rtx tmp = gen_reg_rtx (mode); + + emit_insn (gen_xxswapd_ (tmp, operands[1])); + operands[1] = tmp; + } + emit_insn (gen_xxgenpcvm__internal (operands[0], operands[1], + operands[2])); + DONE; +}) + ;; lxvd2x for little endian loads. We need several of ;; these since the form of the PARALLEL differs by mode. (define_insn "*vsx_lxvd2x2_le_" diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index e35db43..c352c51 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -20889,6 +20889,18 @@ little-endian targets. Choose this built-in to check for presence of zero element if the same argument is also passed to @code{vec_strir}. @findex vec_strir_p +@smallexample +@exdent vector unsigned char vec_genpcvm (vector unsigned char, const int) +@exdent vector unsigned short vec_genpcvm (vector unsigned short, const int) +@exdent vector unsigned int vec_genpcvm (vector unsigned int, const int) +@exdent vector unsigned int vec_genpcvm (vector unsigned long long int, + const int) +@end smallexample +Generate PCV from specified Mask size, as if implemented by the Future +@code{xxgenpcvbm}, @code{xxgenpcvhm}, @code{xxgenpcvwm} instructions, where +immediate value is either 0, 1, 2 or 3. +@findex vec_genpcvm + @node PowerPC Hardware Transactional Memory Built-in Functions @subsection PowerPC Hardware Transactional Memory Built-in Functions GCC provides two interfaces for accessing the Hardware Transactional diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f35b022..960dff0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2020-05-11 Carl Love + + * gcc.target/powerpc/xxgenpc-runnable.c: New. + 2020-05-11 Kelvin Nilsen * gcc.target/powerpc/vec-stril-0.c: New. diff --git a/gcc/testsuite/gcc.target/powerpc/xxgenpc-runnable.c b/gcc/testsuite/gcc.target/powerpc/xxgenpc-runnable.c new file mode 100644 index 0000000..de309ef --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/xxgenpc-runnable.c @@ -0,0 +1,231 @@ +/* { dg-do run } */ +/* { dg-options "-mcpu=future -O2" } */ +/* { dg-require-effective-target powerpc_future_hw } */ + +#include + +#ifdef DEBUG +#include +#endif + +#define IMM0 0 +#define IMM1 1 +#define IMM2 2 +#define IMM3 3 + +void abort (void); + +int main () +{ + int i; + vector unsigned char vec_byte_arg; + vector unsigned char vec_byte_result, vec_byte_expected0, vec_byte_expected1; + vector unsigned char vec_byte_expected2, vec_byte_expected3; + + vector unsigned short vec_hword_arg; + vector unsigned short vec_hword_result, vec_hword_expected0; + vector unsigned short vec_hword_expected1, vec_hword_expected2; + vector unsigned short vec_hword_expected3; + + vector unsigned int vec_word_arg; + vector unsigned int vec_word_result, vec_word_expected0, vec_word_expected1; + vector unsigned int vec_word_expected2, vec_word_expected3; + + vec_byte_arg = (vector unsigned char ){ 0xFF, 0xF0, 0x7F, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0x01, 0x23, + 0x45, 0x67, 0x00, 0x00 }; + + vec_byte_result = (vector unsigned char ){ 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF }; + + vec_byte_expected0 = (vector unsigned char){ 0x1F, 0x1E, 0x1D, 0x1C, + 0x1B, 0x1A, 0x19, 0x18, + 0x06, 0x05, 0x15, 0x04, + 0x03, 0x02, 0x01, 0x00 }; + + vec_byte_expected1 = (vector unsigned char){ 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x07, 0x06, 0x04, + 0x03, 0x02, 0x01, 0x00 }; + + vec_byte_expected2 = (vector unsigned char){ 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17, + 0x00, 0x01, 0x1a, 0x02, + 0x03, 0x04, 0x05, 0x06 }; + + vec_byte_expected3 = (vector unsigned char){ 0x08, 0x09, 0x0B, 0x0C, + 0x0D, 0x0E, 0x0F, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 }; + + vec_hword_arg = (vector unsigned short) { 0x0004, 0xF003, 0x0002, 0x0001, + 0xF004, 0x1003, 0xF002, 0x0001 }; + vec_hword_expected0 = (vector unsigned short int){ 0x405, 0x1c1d, 0x203, + 0x1819, 0x1617, 0x1, + 0x1213, 0x1011 }; + vec_hword_expected1 = (vector unsigned short int){ 0x0, 0x0, 0x0, 0x0, + 0x0, 0xe0f, 0xa0b, 0x405 }; + vec_hword_expected2 = (vector unsigned short int){ 0x100, 0x1312, 0x302, + 0x1716, 0x1918, 0x504, + 0x1d1c, 0x1f1e }; + vec_hword_expected3 = (vector unsigned short int){ 0x100, 0x504, 0xb0a, 0x0, + 0x0, 0x0, 0x0, 0x0 }; + + vec_word_arg = (vector unsigned int){ 0xFEDCBA90, 0xF101, 0xF0000202, 0xF303 }; + vec_word_expected0 = (vector unsigned int){ 0x4050607, 0x18191a1b, + 0x10203, 0x10111213 }; + vec_word_expected1 = (vector unsigned int){ 0x0, 0x0, 0xc0d0e0f, 0x4050607 }; + vec_word_expected2 = (vector unsigned int){ 0x3020100, 0x17161514, + 0x7060504, 0x1f1e1d1c }; + vec_word_expected3 = (vector unsigned int){ 0x3020100, 0xb0a0908, 0x0, 0x0 }; + + vec_byte_result = vec_genpcvm (vec_byte_arg, IMM0); + + for (i = 0; i < 16; i++) { + if (vec_byte_expected0[i] != vec_byte_result[i]) +#if DEBUG + printf("ERROR: vec_genpcvm(IMM = 0), vec_byte_expected[%d] = 0x%x does not match vec_byte_result[%d] = 0x%x\n", + i, vec_byte_expected0[i], i, vec_byte_result[i]); +#else + abort(); +#endif + } + + vec_byte_result = vec_genpcvm (vec_byte_arg, IMM1); + + for (i = 0; i < 16; i++) { + if (vec_byte_expected1[i] != vec_byte_result[i]) +#if DEBUG + printf("ERROR: vec_genpcvm(IMM = 1), vec_byte_expected[%d] = 0x%x does not match vec_byte_result[%d] = 0x%x\n", + i, vec_byte_expected1[i], i, vec_byte_result[i]); +#else + abort(); +#endif + } + + vec_byte_result = vec_genpcvm (vec_byte_arg, IMM2); + + for (i = 0; i < 16; i++) { + if (vec_byte_expected2[i] != vec_byte_result[i]) +#if DEBUG + printf("ERROR: vec_genpcvmbm(IMM = 2), vec_byte_expected[%d] = 0x%x does not match vec_byte_result[%d] = 0x%x\n", + i, vec_byte_expected2[i], i, vec_byte_result[i]); +#else + abort(); +#endif + } + + vec_byte_result = vec_genpcvm (vec_byte_arg, IMM3); + + for (i = 0; i < 16; i++) { + if (vec_byte_expected3[i] != vec_byte_result[i]) +#if DEBUG + printf("ERROR: vec_genpcvm(IMM = 3), vec_byte_expected[%d] = 0x%x does not match vec_byte_result[%d] = 0x%x\n", + i, vec_byte_expected3[i], i, vec_byte_result[i]); +#else + abort(); +#endif + } + + vec_hword_result = vec_genpcvm (vec_hword_arg, IMM0); + + for (i = 0; i < 8; i++) { + if (vec_hword_expected0[i] != vec_hword_result[i]) +#if DEBUG + printf("ERROR: vec_genpcvmhm(IMM = 0), vec_hword_expected[%d] = 0x%x does not match vec_hword_result[%d] = 0x%x\n", + i, vec_hword_expected0[i], i, vec_hword_result[i]); +#else + abort(); +#endif + } + + vec_hword_result = vec_genpcvm (vec_hword_arg, IMM1); + + for (i = 0; i < 8; i++) { + if (vec_hword_expected1[i] != vec_hword_result[i]) +#if DEBUG + printf("ERROR: vec_genpcvm(IMM = 1), vec_hword_expected[%d] = 0x%x does not match vec_hword_result[%d] = 0x%x\n", + i, vec_hword_expected1[i], i, vec_hword_result[i]); +#else + abort(); +#endif + } + + vec_hword_result = vec_genpcvm (vec_hword_arg, IMM2); + + for (i = 0; i < 8; i++) { + if (vec_hword_expected2[i] != vec_hword_result[i]) +#if DEBUG + printf("ERROR: vec_genpcvm(IMM = 2), vec_hword_expected[%d] = 0x%x does not match vec_hword_result[%d] = 0x%x\n", + i, vec_hword_expected2[i], i, vec_hword_result[i]); +#else + abort(); +#endif + } + + vec_hword_result = vec_genpcvm (vec_hword_arg, IMM3); + + for (i = 0; i < 8; i++) { + if (vec_hword_expected3[i] != vec_hword_result[i]) +#if DEBUG + printf("ERROR: vec_genpcvm(IMM = 3), vec_hword_expected[%d] = 0x%x does not match vec_hword_result[%d] = 0x%x\n", + i, vec_hword_expected3[i], i, vec_hword_result[i]); +#else + abort(); +#endif + } + + + vec_word_result = vec_genpcvm (vec_word_arg, IMM0); + + for (i = 0; i < 4; i++) { + if (vec_word_expected0[i] != vec_word_result[i]) +#if DEBUG + printf("ERROR: vec_genpcvm(IMM = 0), vec_word_expected[%d] = 0x%x does not match vec_word_result[%d] = 0x%x\n", + i, vec_word_expected0[i], i, vec_word_result[i]); +#else + abort(); +#endif + } + + vec_word_result = vec_genpcvm (vec_word_arg, IMM1); + + for (i = 0; i < 4; i++) { + if (vec_word_expected1[i] != vec_word_result[i]) +#if DEBUG + printf("ERROR: vec_genpcvm(IMM = 1), vec_word_expected[%d] = 0%x does not match vec_word_result[%d] = 0x%x\n", + i, vec_word_expected1[i], i, vec_word_result[i]); +#else + abort(); +#endif + } + + vec_word_result = vec_genpcvm (vec_word_arg, IMM2); + + for (i = 0; i < 4; i++) { + if (vec_word_expected2[i] != vec_word_result[i]) +#if DEBUG + printf("ERROR: vec_genpcvm(IMM = 2), vec_word_expected[%d] = 0x%x does not match vec_word_result[%d] = 0x%x\n", + i, vec_word_expected2[i], i, vec_word_result[i]); +#else + abort(); +#endif + } + + vec_word_result = vec_genpcvm (vec_word_arg, IMM3); + + for (i = 0; i < 4; i++) { + if (vec_word_expected3[i] != vec_word_result[i]) +#if DEBUG + printf("ERROR: vec_genpcvm(IMM = 3), vec_word_expected[%d] = 0x%x does not match vec_word_result[%d] = 0x%x\n", + i, vec_word_expected3[i], i, vec_word_result[i]); +#else + abort(); +#endif + } + + return 0; +} -- 2.7.4