From 7e204bd2f189850cb940677c99d8d93eb7dd40cd Mon Sep 17 00:00:00 2001 From: liuhongt Date: Mon, 24 Jan 2022 11:05:47 +0800 Subject: [PATCH] Add vect_recog_cond_expr_convert_pattern. The pattern converts (cond (cmp a b) (convert c) (convert d)) to (convert (cond (cmp a b) c d)) when 1) types_match (c, d) 2) single_use for (convert c) and (convert d) 3) TYPE_PRECISION (TREE_TYPE (c)) == TYPE_PRECISION (TREE_TYPE (a)) 4) INTEGERAL_TYPE_P (TREE_TYPE (c)) The pattern can save packing of mask and data(partial for data, 2 vs 1). gcc/ChangeLog: PR target/103771 * match.pd (cond_expr_convert_p): New match. * tree-vect-patterns.cc (gimple_cond_expr_convert_p): Declare. (vect_recog_cond_expr_convert_pattern): New. gcc/testsuite/ChangeLog: * gcc.target/i386/pr103771-2.c: New test. * gcc.target/i386/pr103771-3.c: New test. --- gcc/match.pd | 14 +++++ gcc/testsuite/gcc.target/i386/pr103771-2.c | 8 +++ gcc/testsuite/gcc.target/i386/pr103771-3.c | 21 +++++++ gcc/tree-vect-patterns.cc | 96 ++++++++++++++++++++++++++++++ 4 files changed, 139 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr103771-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr103771-3.c diff --git a/gcc/match.pd b/gcc/match.pd index 10f6228..05a10ab 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -7686,3 +7686,17 @@ and, to the number of trailing zeroes. */ (match (ctz_table_index @1 @2 @3) (rshift (mult (bit_and:c (negate @1) @1) INTEGER_CST@2) INTEGER_CST@3)) + +(match (cond_expr_convert_p @0 @2 @3 @6) + (cond (simple_comparison@6 @0 @1) (convert@4 @2) (convert@5 @3)) + (if (INTEGRAL_TYPE_P (type) + && INTEGRAL_TYPE_P (TREE_TYPE (@2)) + && INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && INTEGRAL_TYPE_P (TREE_TYPE (@3)) + && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0)) + && TYPE_PRECISION (TREE_TYPE (@0)) + == TYPE_PRECISION (TREE_TYPE (@2)) + && TYPE_PRECISION (TREE_TYPE (@0)) + == TYPE_PRECISION (TREE_TYPE (@3)) + && single_use (@4) + && single_use (@5)))) diff --git a/gcc/testsuite/gcc.target/i386/pr103771-2.c b/gcc/testsuite/gcc.target/i386/pr103771-2.c new file mode 100644 index 0000000..962a3a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103771-2.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-march=cascadelake -O3" } */ +/* { dg-final { scan-assembler-not "kunpck" } } */ +/* { dg-final { scan-assembler-not "kand" } } */ +/* { dg-final { scan-assembler-not "kor" } } */ +/* { dg-final { scan-assembler-not "kshift" } } */ + +#include "pr103771.c" diff --git a/gcc/testsuite/gcc.target/i386/pr103771-3.c b/gcc/testsuite/gcc.target/i386/pr103771-3.c new file mode 100644 index 0000000..ef379b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103771-3.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=cascadelake -O3" } */ +/* { dg-final { scan-assembler-not "kunpck" } } */ +/* { dg-final { scan-assembler-not "kand" } } */ +/* { dg-final { scan-assembler-not "kor" } } */ +/* { dg-final { scan-assembler-not "kshift" } } */ + +typedef unsigned char uint8_t; + +static uint8_t x264_clip_uint8 (int x, unsigned int y) +{ + return x & (~255) ? (-x) >> 31 : y; +} + +void +mc_weight (uint8_t* __restrict dst, uint8_t* __restrict src, + int i_width,int i_scale, unsigned int* __restrict y) +{ + for(int x = 0; x < i_width; x++) + dst[x] = x264_clip_uint8 (src[x] * i_scale, y[x]); +} diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 2baf974..a8f96d5 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -924,6 +924,101 @@ vect_reassociating_reduction_p (vec_info *vinfo, return true; } +/* match.pd function to match + (cond (cmp@3 a b) (convert@1 c) (convert@2 d)) + with conditions: + 1) @1, @2, c, d, a, b are all integral type. + 2) There's single_use for both @1 and @2. + 3) a, c and d have same precision. + 4) c and @1 have different precision. + + record a and c and d and @3. */ + +extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree)); + +/* Function vect_recog_cond_expr_convert + + Try to find the following pattern: + + TYPE_AB A,B; + TYPE_CD C,D; + TYPE_E E; + TYPE_E op_true = (TYPE_E) A; + TYPE_E op_false = (TYPE_E) B; + + E = C cmp D ? op_true : op_false; + + where + TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD); + TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD); + single_use of op_true and op_false. + TYPE_AB could differ in sign. + + Input: + + * STMT_VINFO: The stmt from which the pattern search begins. + here it starts with E = c cmp D ? op_true : op_false; + + Output: + + TYPE1 E' = C cmp D ? A : B; + TYPE3 E = (TYPE3) E'; + + There may extra nop_convert for A or B to handle different signness. + + * TYPE_OUT: The vector type of the output of this pattern. + + * Return value: A new stmt that will be used to replace the sequence of + stmts that constitute the pattern. In this case it will be: + E = (TYPE3)E'; + E' = C cmp D ? A : B; is recorded in pattern definition statements; */ + +static gimple * +vect_recog_cond_expr_convert_pattern (vec_info *vinfo, + stmt_vec_info stmt_vinfo, tree *type_out) +{ + gassign *last_stmt = dyn_cast (stmt_vinfo->stmt); + tree lhs, match[4], temp, type, new_lhs, op2; + gimple *cond_stmt; + gimple *pattern_stmt; + + if (!last_stmt) + return NULL; + + lhs = gimple_assign_lhs (last_stmt); + + /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B; + TYPE_PRECISION (A) == TYPE_PRECISION (C). */ + if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL)) + return NULL; + + vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt); + + op2 = match[2]; + type = TREE_TYPE (match[1]); + if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2]))) + { + op2 = vect_recog_temp_ssa_var (type, NULL); + gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]); + append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt, + get_vectype_for_scalar_type (vinfo, type)); + } + + temp = vect_recog_temp_ssa_var (type, NULL); + cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3], + match[1], op2)); + append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt, + get_vectype_for_scalar_type (vinfo, type)); + new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); + pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp); + *type_out = STMT_VINFO_VECTYPE (stmt_vinfo); + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "created pattern stmt: %G", pattern_stmt); + return pattern_stmt; +} + /* Function vect_recog_dot_prod_pattern Try to find the following pattern: @@ -5492,6 +5587,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = { /* Must come after over_widening, which narrows the shift as much as possible beforehand. */ { vect_recog_average_pattern, "average" }, + { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" }, { vect_recog_mulhs_pattern, "mult_high" }, { vect_recog_cast_forwprop_pattern, "cast_forwprop" }, { vect_recog_widen_mult_pattern, "widen_mult" }, -- 2.7.4