From 7b988cc3b438a17fda01da75d12d72d9ce6f7c97 Mon Sep 17 00:00:00 2001 From: kyukhin Date: Wed, 24 Sep 2014 08:05:17 +0000 Subject: [PATCH] AVX-512. Add vpshuf[lh]w insn patterns. gcc/ * config/i386/sse.md (define_c_enum "unspec"): Add UNSPEC_PSHUFHW, UNSPEC_PSHUFLW. (define_insn "avx512bw_pshuflwv32hi"): New. (define_expand "avx512vl_pshuflwv3_mask"): Ditto. (define_insn "avx2_pshuflw_1"): Add masking. (define_expand "avx512vl_pshuflw_mask"): New. (define_insn "sse2_pshuflw_1"): Add masking. (define_insn "avx512bw_pshufhwv32hi"): New. (define_expand "avx512vl_pshufhwv3_mask"): Ditto. (define_insn "avx2_pshufhw_1"): Add masking. (define_expand "avx512vl_pshufhw_mask"): New. (define_insn "sse2_pshufhw_1"): Add masking. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@215544 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 22 ++++++++ gcc/config/i386/sse.md | 150 ++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 152 insertions(+), 20 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6fd0bf9..a9707a3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -7,6 +7,28 @@ Kirill Yukhin Michael Zolotukhin + * config/i386/sse.md + (define_c_enum "unspec"): Add UNSPEC_PSHUFHW, UNSPEC_PSHUFLW. + (define_insn "avx512bw_pshuflwv32hi"): New. + (define_expand "avx512vl_pshuflwv3_mask"): Ditto. + (define_insn "avx2_pshuflw_1"): Add masking. + (define_expand "avx512vl_pshuflw_mask"): New. + (define_insn "sse2_pshuflw_1"): Add masking. + (define_insn "avx512bw_pshufhwv32hi"): New. + (define_expand "avx512vl_pshufhwv3_mask"): Ditto. + (define_insn "avx2_pshufhw_1"): Add masking. + (define_expand "avx512vl_pshufhw_mask"): New. + (define_insn "sse2_pshufhw_1"): Add masking. + +2014-09-24 Alexander Ivchenko + Maxim Kuznetsov + Anna Tikhonova + Ilya Tocar + Andrey Turetskiy + Ilya Verbin + Kirill Yukhin + Michael Zolotukhin + * config/i386/i386.c (ix86_expand_args_builtin): Handle CODE_FOR_sse2_shufpd, CODE_FOR_sse2_sse2_shufpd_mask, CODE_FOR_sse2_avx512dq_shuf_f64x2_mask, diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index dddf16d..d1c399c 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -129,6 +129,10 @@ UNSPEC_SHA256MSG2 UNSPEC_SHA256RNDS2 + ;; For AVX512BW support + UNSPEC_PSHUFHW + UNSPEC_PSHUFLW + ;; For AVX512DQ support UNSPEC_REDUCE UNSPEC_FPCLASS @@ -11789,6 +11793,40 @@ (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) +(define_insn "avx512bw_pshuflwv32hi" + [(set (match_operand:V32HI 0 "register_operand" "=v") + (unspec:V32HI + [(match_operand:V32HI 1 "nonimmediate_operand" "vm") + (match_operand:SI 2 "const_0_to_255_operand" "n")] + UNSPEC_PSHUFLW))] + "TARGET_AVX512BW" + "vpshuflw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + +(define_expand "avx512vl_pshuflwv3_mask" + [(match_operand:V16HI 0 "register_operand") + (match_operand:V16HI 1 "nonimmediate_operand") + (match_operand:SI 2 "const_0_to_255_operand") + (match_operand:V16HI 3 "register_operand") + (match_operand:HI 4 "register_operand")] + "TARGET_AVX512VL && TARGET_AVX512BW" +{ + int mask = INTVAL (operands[2]); + emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1], + GEN_INT ((mask >> 0) & 3), + GEN_INT ((mask >> 2) & 3), + GEN_INT ((mask >> 4) & 3), + GEN_INT ((mask >> 6) & 3), + GEN_INT (((mask >> 0) & 3) + 8), + GEN_INT (((mask >> 2) & 3) + 8), + GEN_INT (((mask >> 4) & 3) + 8), + GEN_INT (((mask >> 6) & 3) + 8), + operands[3], operands[4])); + DONE; +}) + (define_expand "avx2_pshuflwv3" [(match_operand:V16HI 0 "register_operand") (match_operand:V16HI 1 "nonimmediate_operand") @@ -11808,10 +11846,10 @@ DONE; }) -(define_insn "avx2_pshuflw_1" - [(set (match_operand:V16HI 0 "register_operand" "=x") +(define_insn "avx2_pshuflw_1" + [(set (match_operand:V16HI 0 "register_operand" "=v") (vec_select:V16HI - (match_operand:V16HI 1 "nonimmediate_operand" "xm") + (match_operand:V16HI 1 "nonimmediate_operand" "vm") (parallel [(match_operand 2 "const_0_to_3_operand") (match_operand 3 "const_0_to_3_operand") (match_operand 4 "const_0_to_3_operand") @@ -11829,6 +11867,7 @@ (const_int 14) (const_int 15)])))] "TARGET_AVX2 + && && && INTVAL (operands[2]) + 8 == INTVAL (operands[6]) && INTVAL (operands[3]) + 8 == INTVAL (operands[7]) && INTVAL (operands[4]) + 8 == INTVAL (operands[8]) @@ -11841,13 +11880,31 @@ mask |= INTVAL (operands[5]) << 6; operands[2] = GEN_INT (mask); - return "vpshuflw\t{%2, %1, %0|%0, %1, %2}"; + return "vpshuflw\t{%2, %1, %0|%0, %1, %2}"; } [(set_attr "type" "sselog") - (set_attr "prefix" "vex") + (set_attr "prefix" "maybe_evex") (set_attr "length_immediate" "1") (set_attr "mode" "OI")]) +(define_expand "avx512vl_pshuflw_mask" + [(match_operand:V8HI 0 "register_operand") + (match_operand:V8HI 1 "nonimmediate_operand") + (match_operand:SI 2 "const_0_to_255_operand") + (match_operand:V8HI 3 "register_operand") + (match_operand:QI 4 "register_operand")] + "TARGET_AVX512VL && TARGET_AVX512BW" +{ + int mask = INTVAL (operands[2]); + emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1], + GEN_INT ((mask >> 0) & 3), + GEN_INT ((mask >> 2) & 3), + GEN_INT ((mask >> 4) & 3), + GEN_INT ((mask >> 6) & 3), + operands[3], operands[4])); + DONE; +}) + (define_expand "sse2_pshuflw" [(match_operand:V8HI 0 "register_operand") (match_operand:V8HI 1 "nonimmediate_operand") @@ -11863,10 +11920,10 @@ DONE; }) -(define_insn "sse2_pshuflw_1" - [(set (match_operand:V8HI 0 "register_operand" "=x") +(define_insn "sse2_pshuflw_1" + [(set (match_operand:V8HI 0 "register_operand" "=v") (vec_select:V8HI - (match_operand:V8HI 1 "nonimmediate_operand" "xm") + (match_operand:V8HI 1 "nonimmediate_operand" "vm") (parallel [(match_operand 2 "const_0_to_3_operand") (match_operand 3 "const_0_to_3_operand") (match_operand 4 "const_0_to_3_operand") @@ -11875,7 +11932,7 @@ (const_int 5) (const_int 6) (const_int 7)])))] - "TARGET_SSE2" + "TARGET_SSE2 && && " { int mask = 0; mask |= INTVAL (operands[2]) << 0; @@ -11884,7 +11941,7 @@ mask |= INTVAL (operands[5]) << 6; operands[2] = GEN_INT (mask); - return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}"; + return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}"; } [(set_attr "type" "sselog") (set_attr "prefix_data16" "0") @@ -11912,10 +11969,44 @@ DONE; }) -(define_insn "avx2_pshufhw_1" - [(set (match_operand:V16HI 0 "register_operand" "=x") +(define_insn "avx512bw_pshufhwv32hi" + [(set (match_operand:V32HI 0 "register_operand" "=v") + (unspec:V32HI + [(match_operand:V32HI 1 "nonimmediate_operand" "vm") + (match_operand:SI 2 "const_0_to_255_operand" "n")] + UNSPEC_PSHUFHW))] + "TARGET_AVX512BW" + "vpshufhw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + +(define_expand "avx512vl_pshufhwv3_mask" + [(match_operand:V16HI 0 "register_operand") + (match_operand:V16HI 1 "nonimmediate_operand") + (match_operand:SI 2 "const_0_to_255_operand") + (match_operand:V16HI 3 "register_operand") + (match_operand:HI 4 "register_operand")] + "TARGET_AVX512VL && TARGET_AVX512BW" +{ + int mask = INTVAL (operands[2]); + emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1], + GEN_INT (((mask >> 0) & 3) + 4), + GEN_INT (((mask >> 2) & 3) + 4), + GEN_INT (((mask >> 4) & 3) + 4), + GEN_INT (((mask >> 6) & 3) + 4), + GEN_INT (((mask >> 0) & 3) + 12), + GEN_INT (((mask >> 2) & 3) + 12), + GEN_INT (((mask >> 4) & 3) + 12), + GEN_INT (((mask >> 6) & 3) + 12), + operands[3], operands[4])); + DONE; +}) + +(define_insn "avx2_pshufhw_1" + [(set (match_operand:V16HI 0 "register_operand" "=v") (vec_select:V16HI - (match_operand:V16HI 1 "nonimmediate_operand" "xm") + (match_operand:V16HI 1 "nonimmediate_operand" "vm") (parallel [(const_int 0) (const_int 1) (const_int 2) @@ -11933,6 +12024,7 @@ (match_operand 8 "const_12_to_15_operand") (match_operand 9 "const_12_to_15_operand")])))] "TARGET_AVX2 + && && && INTVAL (operands[2]) + 8 == INTVAL (operands[6]) && INTVAL (operands[3]) + 8 == INTVAL (operands[7]) && INTVAL (operands[4]) + 8 == INTVAL (operands[8]) @@ -11945,13 +12037,31 @@ mask |= (INTVAL (operands[5]) - 4) << 6; operands[2] = GEN_INT (mask); - return "vpshufhw\t{%2, %1, %0|%0, %1, %2}"; + return "vpshufhw\t{%2, %1, %0|%0, %1, %2}"; } [(set_attr "type" "sselog") - (set_attr "prefix" "vex") + (set_attr "prefix" "maybe_evex") (set_attr "length_immediate" "1") (set_attr "mode" "OI")]) +(define_expand "avx512vl_pshufhw_mask" + [(match_operand:V8HI 0 "register_operand") + (match_operand:V8HI 1 "nonimmediate_operand") + (match_operand:SI 2 "const_0_to_255_operand") + (match_operand:V8HI 3 "register_operand") + (match_operand:QI 4 "register_operand")] + "TARGET_AVX512VL && TARGET_AVX512BW" +{ + int mask = INTVAL (operands[2]); + emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1], + GEN_INT (((mask >> 0) & 3) + 4), + GEN_INT (((mask >> 2) & 3) + 4), + GEN_INT (((mask >> 4) & 3) + 4), + GEN_INT (((mask >> 6) & 3) + 4), + operands[3], operands[4])); + DONE; +}) + (define_expand "sse2_pshufhw" [(match_operand:V8HI 0 "register_operand") (match_operand:V8HI 1 "nonimmediate_operand") @@ -11967,10 +12077,10 @@ DONE; }) -(define_insn "sse2_pshufhw_1" - [(set (match_operand:V8HI 0 "register_operand" "=x") +(define_insn "sse2_pshufhw_1" + [(set (match_operand:V8HI 0 "register_operand" "=v") (vec_select:V8HI - (match_operand:V8HI 1 "nonimmediate_operand" "xm") + (match_operand:V8HI 1 "nonimmediate_operand" "vm") (parallel [(const_int 0) (const_int 1) (const_int 2) @@ -11979,7 +12089,7 @@ (match_operand 3 "const_4_to_7_operand") (match_operand 4 "const_4_to_7_operand") (match_operand 5 "const_4_to_7_operand")])))] - "TARGET_SSE2" + "TARGET_SSE2 && && " { int mask = 0; mask |= (INTVAL (operands[2]) - 4) << 0; @@ -11988,7 +12098,7 @@ mask |= (INTVAL (operands[5]) - 4) << 6; operands[2] = GEN_INT (mask); - return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}"; + return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}"; } [(set_attr "type" "sselog") (set_attr "prefix_rep" "1") -- 2.7.4