From 959c4b00628524099c9f5e689b5cace9fe66d146 Mon Sep 17 00:00:00 2001 From: ienkovich Date: Tue, 10 Nov 2015 12:17:30 +0000 Subject: [PATCH] gcc/ * optabs.c (expand_binop_directly): Allow scalar mode for vec_pack_trunc_optab. * tree-vect-loop.c (vect_determine_vectorization_factor): Skip boolean vector producers from pattern sequence when computing VF. * tree-vect-patterns.c (vect_vect_recog_func_ptrs) Add vect_recog_mask_conversion_pattern. (search_type_for_mask): Choose the smallest type if different size types are mixed. (build_mask_conversion): New. (vect_recog_mask_conversion_pattern): New. (vect_pattern_recog_1): Allow scalar mode for boolean vectype. * tree-vect-stmts.c (vectorizable_mask_load_store): Support masked load with pattern. (vectorizable_conversion): Support boolean vectors. (free_stmt_vec_info): Allow patterns for statements with no lhs. * tree-vectorizer.h (NUM_PATTERNS): Increase to 14. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@230103 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 19 ++++ gcc/optabs.c | 3 +- gcc/tree-vect-loop.c | 31 +++--- gcc/tree-vect-patterns.c | 275 ++++++++++++++++++++++++++++++++++++++++++++++- gcc/tree-vect-stmts.c | 42 ++++++-- gcc/tree-vectorizer.h | 2 +- 6 files changed, 345 insertions(+), 27 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ade9a3b..e868b5d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,24 @@ 2015-11-10 Ilya Enkovich + * optabs.c (expand_binop_directly): Allow scalar mode for + vec_pack_trunc_optab. + * tree-vect-loop.c (vect_determine_vectorization_factor): Skip + boolean vector producers from pattern sequence when computing VF. + * tree-vect-patterns.c (vect_vect_recog_func_ptrs) Add + vect_recog_mask_conversion_pattern. + (search_type_for_mask): Choose the smallest + type if different size types are mixed. + (build_mask_conversion): New. + (vect_recog_mask_conversion_pattern): New. + (vect_pattern_recog_1): Allow scalar mode for boolean vectype. + * tree-vect-stmts.c (vectorizable_mask_load_store): Support masked + load with pattern. + (vectorizable_conversion): Support boolean vectors. + (free_stmt_vec_info): Allow patterns for statements with no lhs. + * tree-vectorizer.h (NUM_PATTERNS): Increase to 14. + +2015-11-10 Ilya Enkovich + * config/i386/i386-protos.h (ix86_expand_sse_movcc): New. * config/i386/i386.c (ix86_expand_sse_movcc): Make public. Cast mask to FP mode if required. diff --git a/gcc/optabs.c b/gcc/optabs.c index 9b8e958..5545302 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -1047,7 +1047,8 @@ expand_binop_directly (machine_mode mode, optab binoptab, /* The mode of the result is different then the mode of the arguments. */ tmp_mode = insn_data[(int) icode].operand[0].mode; - if (GET_MODE_NUNITS (tmp_mode) != 2 * GET_MODE_NUNITS (mode)) + if (VECTOR_MODE_P (mode) + && GET_MODE_NUNITS (tmp_mode) != 2 * GET_MODE_NUNITS (mode)) { delete_insns_since (last); return NULL_RTX; diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index cbf0073..55e5309 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -492,20 +492,27 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) } } - /* The vectorization factor is according to the smallest - scalar type (or the largest vector size, but we only - support one vector size per loop). */ - if (!bool_result) - scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, - &dummy); - if (dump_enabled_p ()) + /* Don't try to compute VF out scalar types if we stmt + produces boolean vector. Use result vectype instead. */ + if (VECTOR_BOOLEAN_TYPE_P (vectype)) + vf_vectype = vectype; + else { - dump_printf_loc (MSG_NOTE, vect_location, - "get vectype for scalar type: "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type); - dump_printf (MSG_NOTE, "\n"); + /* The vectorization factor is according to the smallest + scalar type (or the largest vector size, but we only + support one vector size per loop). */ + if (!bool_result) + scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, + &dummy); + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "get vectype for scalar type: "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type); + dump_printf (MSG_NOTE, "\n"); + } + vf_vectype = get_vectype_for_scalar_type (scalar_type); } - vf_vectype = get_vectype_for_scalar_type (scalar_type); if (!vf_vectype) { if (dump_enabled_p ()) diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index 917eeb7..b9d900c 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -66,6 +66,7 @@ static gimple *vect_recog_mult_pattern (vec *, static gimple *vect_recog_mixed_size_cond_pattern (vec *, tree *, tree *); static gimple *vect_recog_bool_pattern (vec *, tree *, tree *); +static gimple *vect_recog_mask_conversion_pattern (vec *, tree *, tree *); static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { vect_recog_widen_mult_pattern, vect_recog_widen_sum_pattern, @@ -79,7 +80,8 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { vect_recog_divmod_pattern, vect_recog_mult_pattern, vect_recog_mixed_size_cond_pattern, - vect_recog_bool_pattern}; + vect_recog_bool_pattern, + vect_recog_mask_conversion_pattern}; static inline void append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt) @@ -3152,7 +3154,7 @@ search_type_for_mask (tree var, vec_info *vinfo) enum vect_def_type dt; tree rhs1; enum tree_code rhs_code; - tree res = NULL_TREE; + tree res = NULL_TREE, res2; if (TREE_CODE (var) != SSA_NAME) return NULL_TREE; @@ -3185,13 +3187,26 @@ search_type_for_mask (tree var, vec_info *vinfo) case BIT_AND_EXPR: case BIT_IOR_EXPR: case BIT_XOR_EXPR: - if (!(res = search_type_for_mask (rhs1, vinfo))) - res = search_type_for_mask (gimple_assign_rhs2 (def_stmt), vinfo); + res = search_type_for_mask (rhs1, vinfo); + res2 = search_type_for_mask (gimple_assign_rhs2 (def_stmt), vinfo); + if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2))) + res = res2; break; default: if (TREE_CODE_CLASS (rhs_code) == tcc_comparison) { + tree comp_vectype, mask_type; + + comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1)); + if (comp_vectype == NULL_TREE) + return NULL_TREE; + + mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1)); + if (!mask_type + || !expand_vec_cmp_expr_p (comp_vectype, mask_type)) + return NULL_TREE; + if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE || !TYPE_UNSIGNED (TREE_TYPE (rhs1))) { @@ -3453,6 +3468,255 @@ vect_recog_bool_pattern (vec *stmts, tree *type_in, } +/* A helper for vect_recog_mask_conversion_pattern. Build + conversion of MASK to a type suitable for masking VECTYPE. + Built statement gets required vectype and is appended to + a pattern sequence of STMT_VINFO. + + Return converted mask. */ + +static tree +build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo, + vec_info *vinfo) +{ + gimple *stmt; + tree masktype, tmp; + stmt_vec_info new_stmt_info; + + masktype = build_same_sized_truth_vector_type (vectype); + tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL); + stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask); + new_stmt_info = new_stmt_vec_info (stmt, vinfo); + set_vinfo_for_stmt (stmt, new_stmt_info); + STMT_VINFO_VECTYPE (new_stmt_info) = masktype; + append_pattern_def_seq (stmt_vinfo, stmt); + + return tmp; +} + + +/* Function vect_recog_mask_conversion_pattern + + Try to find statements which require boolean type + converison. Additional conversion statements are + added to handle such cases. For example: + + bool m_1, m_2, m_3; + int i_4, i_5; + double d_6, d_7; + char c_1, c_2, c_3; + + S1 m_1 = i_4 > i_5; + S2 m_2 = d_6 < d_7; + S3 m_3 = m_1 & m_2; + S4 c_1 = m_3 ? c_2 : c_3; + + Will be transformed into: + + S1 m_1 = i_4 > i_5; + S2 m_2 = d_6 < d_7; + S3'' m_2' = (_Bool[bitsize=32])m_2 + S3' m_3' = m_1 & m_2'; + S4'' m_3'' = (_Bool[bitsize=8])m_3' + S4' c_1' = m_3'' ? c_2 : c_3; */ + +static gimple * +vect_recog_mask_conversion_pattern (vec *stmts, tree *type_in, + tree *type_out) +{ + gimple *last_stmt = stmts->pop (); + enum tree_code rhs_code; + tree lhs, rhs1, rhs2, tmp, rhs1_type, rhs2_type, vectype1, vectype2; + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + stmt_vec_info pattern_stmt_info; + vec_info *vinfo = stmt_vinfo->vinfo; + gimple *pattern_stmt; + + /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */ + if (is_gimple_call (last_stmt) + && gimple_call_internal_p (last_stmt) + && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE + || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD)) + { + bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD); + + if (load) + { + lhs = gimple_call_lhs (last_stmt); + vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs)); + } + else + { + rhs2 = gimple_call_arg (last_stmt, 3); + vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2)); + } + + rhs1 = gimple_call_arg (last_stmt, 2); + rhs1_type = search_type_for_mask (rhs1, vinfo); + if (!rhs1_type) + return NULL; + vectype2 = get_mask_type_for_scalar_type (rhs1_type); + + if (!vectype1 || !vectype2 + || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2)) + return NULL; + + tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo); + + if (load) + { + lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); + pattern_stmt + = gimple_build_call_internal (IFN_MASK_LOAD, 3, + gimple_call_arg (last_stmt, 0), + gimple_call_arg (last_stmt, 1), + tmp); + gimple_call_set_lhs (pattern_stmt, lhs); + } + else + pattern_stmt + = gimple_build_call_internal (IFN_MASK_STORE, 4, + gimple_call_arg (last_stmt, 0), + gimple_call_arg (last_stmt, 1), + tmp, + gimple_call_arg (last_stmt, 3)); + + + pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo); + set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); + STMT_VINFO_DATA_REF (pattern_stmt_info) + = STMT_VINFO_DATA_REF (stmt_vinfo); + STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info) + = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo); + STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo); + STMT_VINFO_DR_OFFSET (pattern_stmt_info) + = STMT_VINFO_DR_OFFSET (stmt_vinfo); + STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo); + STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info) + = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo); + DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt; + + *type_out = vectype1; + *type_in = vectype1; + stmts->safe_push (last_stmt); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "vect_recog_mask_conversion_pattern: detected:\n"); + + return pattern_stmt; + } + + if (!is_gimple_assign (last_stmt)) + return NULL; + + lhs = gimple_assign_lhs (last_stmt); + rhs1 = gimple_assign_rhs1 (last_stmt); + rhs_code = gimple_assign_rhs_code (last_stmt); + + /* Check for cond expression requiring mask conversion. */ + if (rhs_code == COND_EXPR) + { + /* vect_recog_mixed_size_cond_pattern could apply. + Do nothing then. */ + if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) + return NULL; + + vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs)); + + if (TREE_CODE (rhs1) == SSA_NAME) + { + rhs1_type = search_type_for_mask (rhs1, vinfo); + if (!rhs1_type) + return NULL; + } + else + rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0)); + + vectype2 = get_mask_type_for_scalar_type (rhs1_type); + + if (!vectype1 || !vectype2 + || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2)) + return NULL; + + /* If rhs1 is a comparison we need to move it into a + separate statement. */ + if (TREE_CODE (rhs1) != SSA_NAME) + { + tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL); + pattern_stmt = gimple_build_assign (tmp, rhs1); + rhs1 = tmp; + + pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo); + set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); + STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2; + append_pattern_def_seq (stmt_vinfo, pattern_stmt); + } + + tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo); + + lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); + pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp, + gimple_assign_rhs2 (last_stmt), + gimple_assign_rhs3 (last_stmt)); + + *type_out = vectype1; + *type_in = vectype1; + stmts->safe_push (last_stmt); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "vect_recog_mask_conversion_pattern: detected:\n"); + + return pattern_stmt; + } + + /* Now check for binary boolean operations requiring conversion for + one of operands. */ + if (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE) + return NULL; + + if (rhs_code != BIT_IOR_EXPR + && rhs_code != BIT_XOR_EXPR + && rhs_code != BIT_AND_EXPR) + return NULL; + + rhs2 = gimple_assign_rhs2 (last_stmt); + + rhs1_type = search_type_for_mask (rhs1, vinfo); + rhs2_type = search_type_for_mask (rhs2, vinfo); + + if (!rhs1_type || !rhs2_type + || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type)) + return NULL; + + if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type)) + { + vectype1 = get_mask_type_for_scalar_type (rhs1_type); + if (!vectype1) + return NULL; + rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo); + } + else + { + vectype1 = get_mask_type_for_scalar_type (rhs2_type); + if (!vectype1) + return NULL; + rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo); + } + + lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); + pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2); + + *type_out = vectype1; + *type_in = vectype1; + stmts->safe_push (last_stmt); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "vect_recog_mask_conversion_pattern: detected:\n"); + + return pattern_stmt; +} + + /* Mark statements that are involved in a pattern. */ static inline void @@ -3548,7 +3812,8 @@ vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func, stmt_info = vinfo_for_stmt (stmt); loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - if (VECTOR_MODE_P (TYPE_MODE (type_in))) + if (VECTOR_BOOLEAN_TYPE_P (type_in) + || VECTOR_MODE_P (TYPE_MODE (type_in))) { /* No need to check target support (already checked by the pattern recognition function). */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index c024348..cfe30e0 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1974,6 +1974,11 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi, /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed from the IL. */ + if (STMT_VINFO_RELATED_STMT (stmt_info)) + { + stmt = STMT_VINFO_RELATED_STMT (stmt_info); + stmt_info = vinfo_for_stmt (stmt); + } tree lhs = gimple_call_lhs (stmt); new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); set_vinfo_for_stmt (new_stmt, stmt_info); @@ -2092,6 +2097,11 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi, { /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed from the IL. */ + if (STMT_VINFO_RELATED_STMT (stmt_info)) + { + stmt = STMT_VINFO_RELATED_STMT (stmt_info); + stmt_info = vinfo_for_stmt (stmt); + } tree lhs = gimple_call_lhs (stmt); new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); set_vinfo_for_stmt (new_stmt, stmt_info); @@ -3565,12 +3575,13 @@ vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi, && SCALAR_FLOAT_TYPE_P (rhs_type)))) return false; - if ((INTEGRAL_TYPE_P (lhs_type) - && (TYPE_PRECISION (lhs_type) - != GET_MODE_PRECISION (TYPE_MODE (lhs_type)))) - || (INTEGRAL_TYPE_P (rhs_type) - && (TYPE_PRECISION (rhs_type) - != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))) + if (!VECTOR_BOOLEAN_TYPE_P (vectype_out) + && ((INTEGRAL_TYPE_P (lhs_type) + && (TYPE_PRECISION (lhs_type) + != GET_MODE_PRECISION (TYPE_MODE (lhs_type)))) + || (INTEGRAL_TYPE_P (rhs_type) + && (TYPE_PRECISION (rhs_type) + != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -3628,6 +3639,21 @@ vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi, return false; } + if (VECTOR_BOOLEAN_TYPE_P (vectype_out) + && !VECTOR_BOOLEAN_TYPE_P (vectype_in)) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't convert between boolean and non " + "boolean vectors"); + dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type); + dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); + } + + return false; + } + nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); if (nunits_in < nunits_out) @@ -8217,7 +8243,7 @@ free_stmt_vec_info (gimple *stmt) gimple *patt_stmt = STMT_VINFO_STMT (patt_info); gimple_set_bb (patt_stmt, NULL); tree lhs = gimple_get_lhs (patt_stmt); - if (TREE_CODE (lhs) == SSA_NAME) + if (lhs && TREE_CODE (lhs) == SSA_NAME) release_ssa_name (lhs); if (seq) { @@ -8227,7 +8253,7 @@ free_stmt_vec_info (gimple *stmt) gimple *seq_stmt = gsi_stmt (si); gimple_set_bb (seq_stmt, NULL); lhs = gimple_get_lhs (seq_stmt); - if (TREE_CODE (lhs) == SSA_NAME) + if (lhs && TREE_CODE (lhs) == SSA_NAME) release_ssa_name (lhs); free_stmt_vec_info (seq_stmt); } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index d890eb1..6ad0cc4 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1087,7 +1087,7 @@ extern gimple *vect_find_last_scalar_stmt_in_slp (slp_tree); Additional pattern recognition functions can (and will) be added in the future. */ typedef gimple *(* vect_recog_func_ptr) (vec *, tree *, tree *); -#define NUM_PATTERNS 13 +#define NUM_PATTERNS 14 void vect_pattern_recog (vec_info *); /* In tree-vectorizer.c. */ -- 2.7.4