From 880ab4be197623b3f39eefde091611ef4ad77549 Mon Sep 17 00:00:00 2001 From: Andrey Turetskiy Date: Tue, 6 Nov 2012 07:43:37 +0000 Subject: [PATCH] i386.c (bdesc_args): Rename CODE_FOR_avx2_umulhrswv16hi3 to CODE_FOR_avx2_pmulhrswv16hi3. * config/i386/i386.c (bdesc_args): Rename CODE_FOR_avx2_umulhrswv16hi3 to CODE_FOR_avx2_pmulhrswv16hi3. * config/i386/predicates.md (const1_operand): Extend for vectors. * config/i386/sse.md (ssse3_avx2): Extend. (ssedoublemode): Ditto. (_uavg3): Merge avx2_uavgv32qi3, sse2_uavgv16qi3, avx2_uavgv16hi3 and sse2_uavgv8hi3 into one. (*_uavg3): Merge *avx2_uavgv32qi3, *sse2_uavgv16qi3, *avx2_uavgv16hi3 and *sse2_uavgv8hi3 into one. (PMULHRSW): New. (_pmulhrsw3): Merge avx2_umulhrswv16hi3, ssse3_pmulhrswv8hi3 and ssse3_pmulhrswv4hi3 into one. (*avx2_pmulhrswv16hi3): Replace const_vector with const1_operand predicate. (*ssse3_pmulhrswv8hi3): Ditto. (*ssse3_pmulhrswv4hi3): Ditto. From-SVN: r193208 --- gcc/ChangeLog | 19 +++ gcc/config/i386/i386.c | 2 +- gcc/config/i386/predicates.md | 10 +- gcc/config/i386/sse.md | 325 ++++++++---------------------------------- 4 files changed, 83 insertions(+), 273 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9b170a0..fff511f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2012-11-06 Andrey Turetskiy + + * config/i386/i386.c (bdesc_args): Rename CODE_FOR_avx2_umulhrswv16hi3 to + CODE_FOR_avx2_pmulhrswv16hi3. + * config/i386/predicates.md (const1_operand): Extend for vectors. + * config/i386/sse.md (ssse3_avx2): Extend. + (ssedoublemode): Ditto. + (_uavg3): Merge avx2_uavgv32qi3, sse2_uavgv16qi3, + avx2_uavgv16hi3 and sse2_uavgv8hi3 into one. + (*_uavg3): Merge *avx2_uavgv32qi3, *sse2_uavgv16qi3, + *avx2_uavgv16hi3 and *sse2_uavgv8hi3 into one. + (PMULHRSW): New. + (_pmulhrsw3): Merge avx2_umulhrswv16hi3, + ssse3_pmulhrswv8hi3 and ssse3_pmulhrswv4hi3 into one. + (*avx2_pmulhrswv16hi3): Replace const_vector with const1_operand + predicate. + (*ssse3_pmulhrswv8hi3): Ditto. + (*ssse3_pmulhrswv4hi3): Ditto. + 2012-11-06 Joern Rennecke * config/epiphany/epiphany.c (epiphany_address_cost): diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index d92fb16..a8b0962 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -27658,7 +27658,7 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 4e5c17d..830c740f2 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -568,10 +568,14 @@ return op == CONST0_RTX (mode); }) -;; Match exactly one. +;; Match one or vector filled with ones. (define_predicate "const1_operand" - (and (match_code "const_int") - (match_test "op == const1_rtx"))) + (match_code "const_int,const_double,const_vector") +{ + if (mode == VOIDmode) + mode = GET_MODE (op); + return op == CONST1_RTX (mode); +}) ;; Match exactly eight. (define_predicate "const8_operand" diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 299b0d9..d6cf7f3 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -222,7 +222,7 @@ (define_mode_attr ssse3_avx2 [(V16QI "ssse3") (V32QI "avx2") - (V8HI "ssse3") (V16HI "avx2") + (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V4SI "ssse3") (V8SI "avx2") (V2DI "ssse3") (V4DI "avx2") (TI "ssse3") (V2TI "avx2")]) @@ -246,7 +246,8 @@ (V2DI "vec") (V4DI "avx2")]) (define_mode_attr ssedoublemode - [(V16HI "V16SI") (V8HI "V8SI")]) + [(V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") + (V32QI "V32HI") (V16QI "V16HI")]) (define_mode_attr ssebytemode [(V4DI "V32QI") (V2DI "V16QI")]) @@ -7637,209 +7638,45 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_expand "avx2_uavgv32qi3" - [(set (match_operand:V32QI 0 "register_operand") - (truncate:V32QI - (lshiftrt:V32HI - (plus:V32HI - (plus:V32HI - (zero_extend:V32HI - (match_operand:V32QI 1 "nonimmediate_operand")) - (zero_extend:V32HI - (match_operand:V32QI 2 "nonimmediate_operand"))) - (const_vector:V32QI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] - "TARGET_AVX2" - "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);") - -(define_expand "sse2_uavgv16qi3" - [(set (match_operand:V16QI 0 "register_operand") - (truncate:V16QI - (lshiftrt:V16HI - (plus:V16HI - (plus:V16HI - (zero_extend:V16HI - (match_operand:V16QI 1 "nonimmediate_operand")) - (zero_extend:V16HI - (match_operand:V16QI 2 "nonimmediate_operand"))) - (const_vector:V16QI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] - "TARGET_SSE2" - "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);") - -(define_insn "*avx2_uavgv32qi3" - [(set (match_operand:V32QI 0 "register_operand" "=x") - (truncate:V32QI - (lshiftrt:V32HI - (plus:V32HI - (plus:V32HI - (zero_extend:V32HI - (match_operand:V32QI 1 "nonimmediate_operand" "%x")) - (zero_extend:V32HI - (match_operand:V32QI 2 "nonimmediate_operand" "xm"))) - (const_vector:V32QI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] - "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)" - "vpavgb\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix" "vex") - (set_attr "mode" "OI")]) - -(define_insn "*sse2_uavgv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x,x") - (truncate:V16QI - (lshiftrt:V16HI - (plus:V16HI - (plus:V16HI - (zero_extend:V16HI - (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")) - (zero_extend:V16HI - (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))) - (const_vector:V16QI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] - "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)" - "@ - pavgb\t{%2, %0|%0, %2} - vpavgb\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") - (set_attr "type" "sseiadd") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) - -(define_expand "avx2_uavgv16hi3" - [(set (match_operand:V16HI 0 "register_operand") - (truncate:V16HI - (lshiftrt:V16SI - (plus:V16SI - (plus:V16SI - (zero_extend:V16SI - (match_operand:V16HI 1 "nonimmediate_operand")) - (zero_extend:V16SI - (match_operand:V16HI 2 "nonimmediate_operand"))) - (const_vector:V16HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] - "TARGET_AVX2" - "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);") - -(define_expand "sse2_uavgv8hi3" - [(set (match_operand:V8HI 0 "register_operand") - (truncate:V8HI - (lshiftrt:V8SI - (plus:V8SI - (plus:V8SI - (zero_extend:V8SI - (match_operand:V8HI 1 "nonimmediate_operand")) - (zero_extend:V8SI - (match_operand:V8HI 2 "nonimmediate_operand"))) - (const_vector:V8HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) +(define_expand "_uavg3" + [(set (match_operand:VI12_AVX2 0 "register_operand") + (truncate:VI12_AVX2 + (lshiftrt: + (plus: + (plus: + (zero_extend: + (match_operand:VI12_AVX2 1 "nonimmediate_operand")) + (zero_extend: + (match_operand:VI12_AVX2 2 "nonimmediate_operand"))) + (match_dup 3)) (const_int 1))))] "TARGET_SSE2" - "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);") - -(define_insn "*avx2_uavgv16hi3" - [(set (match_operand:V16HI 0 "register_operand" "=x") - (truncate:V16HI - (lshiftrt:V16SI - (plus:V16SI - (plus:V16SI - (zero_extend:V16SI - (match_operand:V16HI 1 "nonimmediate_operand" "%x")) - (zero_extend:V16SI - (match_operand:V16HI 2 "nonimmediate_operand" "xm"))) - (const_vector:V16HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] - "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)" - "vpavgw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix" "vex") - (set_attr "mode" "OI")]) +{ + operands[3] = CONST1_RTX(mode); + ix86_fixup_binary_operands_no_copy (PLUS, mode, operands); +}) -(define_insn "*sse2_uavgv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") - (truncate:V8HI - (lshiftrt:V8SI - (plus:V8SI - (plus:V8SI - (zero_extend:V8SI - (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")) - (zero_extend:V8SI - (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))) - (const_vector:V8HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) +(define_insn "*_uavg3" + [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x") + (truncate:VI12_AVX2 + (lshiftrt: + (plus: + (plus: + (zero_extend: + (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x")) + (zero_extend: + (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm"))) + (match_operand:VI12_AVX2 3 "const1_operand")) (const_int 1))))] - "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)" + "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, mode, operands)" "@ - pavgw\t{%2, %0|%0, %2} - vpavgw\t{%2, %1, %0|%0, %1, %2}" + pavg\t{%2, %0|%0, %2} + vpavg\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") (set_attr "prefix_data16" "1,*") (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "")]) ;; The correct representation for this is absolutely enormous, and ;; surely not generally useful. @@ -8366,31 +8203,30 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) -(define_expand "avx2_umulhrswv16hi3" - [(set (match_operand:V16HI 0 "register_operand") - (truncate:V16HI - (lshiftrt:V16SI - (plus:V16SI - (lshiftrt:V16SI - (mult:V16SI - (sign_extend:V16SI - (match_operand:V16HI 1 "nonimmediate_operand")) - (sign_extend:V16SI - (match_operand:V16HI 2 "nonimmediate_operand"))) +(define_mode_iterator PMULHRSW + [V4HI V8HI (V16HI "TARGET_AVX2")]) + +(define_expand "_pmulhrsw3" + [(set (match_operand:PMULHRSW 0 "register_operand") + (truncate:PMULHRSW + (lshiftrt: + (plus: + (lshiftrt: + (mult: + (sign_extend: + (match_operand:PMULHRSW 1 "nonimmediate_operand")) + (sign_extend: + (match_operand:PMULHRSW 2 "nonimmediate_operand"))) (const_int 14)) - (const_vector:V16HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) + (match_dup 3)) (const_int 1))))] "TARGET_AVX2" - "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);") +{ + operands[3] = CONST1_RTX(mode); + ix86_fixup_binary_operands_no_copy (MULT, mode, operands); +}) -(define_insn "*avx2_umulhrswv16hi3" +(define_insn "*avx2_pmulhrswv16hi3" [(set (match_operand:V16HI 0 "register_operand" "=x") (truncate:V16HI (lshiftrt:V16SI @@ -8402,14 +8238,7 @@ (sign_extend:V16SI (match_operand:V16HI 2 "nonimmediate_operand" "xm"))) (const_int 14)) - (const_vector:V16HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) + (match_operand:V16HI 3 "const1_operand")) (const_int 1))))] "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)" "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" @@ -8418,26 +8247,6 @@ (set_attr "prefix" "vex") (set_attr "mode" "OI")]) -(define_expand "ssse3_pmulhrswv8hi3" - [(set (match_operand:V8HI 0 "register_operand") - (truncate:V8HI - (lshiftrt:V8SI - (plus:V8SI - (lshiftrt:V8SI - (mult:V8SI - (sign_extend:V8SI - (match_operand:V8HI 1 "nonimmediate_operand")) - (sign_extend:V8SI - (match_operand:V8HI 2 "nonimmediate_operand"))) - (const_int 14)) - (const_vector:V8HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] - "TARGET_SSSE3" - "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") - (define_insn "*ssse3_pmulhrswv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x,x") (truncate:V8HI @@ -8450,10 +8259,7 @@ (sign_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))) (const_int 14)) - (const_vector:V8HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) + (match_operand:V8HI 3 "const1_operand")) (const_int 1))))] "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)" "@ @@ -8466,24 +8272,6 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_expand "ssse3_pmulhrswv4hi3" - [(set (match_operand:V4HI 0 "register_operand") - (truncate:V4HI - (lshiftrt:V4SI - (plus:V4SI - (lshiftrt:V4SI - (mult:V4SI - (sign_extend:V4SI - (match_operand:V4HI 1 "nonimmediate_operand")) - (sign_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand"))) - (const_int 14)) - (const_vector:V4HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] - "TARGET_SSSE3" - "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") - (define_insn "*ssse3_pmulhrswv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") (truncate:V4HI @@ -8496,8 +8284,7 @@ (sign_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) (const_int 14)) - (const_vector:V4HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) + (match_operand:V4HI 3 "const1_operand")) (const_int 1))))] "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)" "pmulhrsw\t{%2, %0|%0, %2}" -- 2.7.4