From 78595e918ee168f595d16268073a3754c64d67fe Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 7 Jan 2021 15:00:38 +0000 Subject: [PATCH] gimple-isel: Fall back to using vcond_mask [PR98560] MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit PR98560 is about a case in which the vectoriser initially generates: mask_1 = a < 0; mask_2 = mask_1 & ...; res = VEC_COND_EXPR ; The vectoriser thus expects res to be calculated using vcond_mask. However, we later manage to fold mask_2 to mask_1, leaving: mask_1 = a < 0; res = VEC_COND_EXPR ; gimple-isel then required a combined vcond to exist. On most targets, it's not too onerous to provide all possible (compare x select) combinations. For each data mode, you just need to provide unsigned comparisons, signed comparisons, and floating-point comparisons, with the data mode and type of comparison uniquely determining the mode of the compared values. But for targets like SVE that support “unpacked” vectors, it's not that simple: the level of unpacking adds another degree of freedom. Rather than insist that the combined versions exist, I think we should be prepared to fall back to using separate comparisons and vcond_masks. I think that makes more sense on targets like AArch64 and AArch32 in which compares and selects are fundementally separate operations anyway. gcc/ PR tree-optimization/98560 * gimple-isel.cc (gimple_expand_vec_cond_expr): If we fail to use IFN_VCOND{,U,EQ}, fall back on IFN_VCOND_MASK. gcc/testsuite/ PR tree-optimization/98560 * gcc.dg/vect/pr98560-1.c: New test. --- gcc/gimple-isel.cc | 26 +++++++++++++++++++------- gcc/testsuite/gcc.dg/vect/pr98560-1.c | 17 +++++++++++++++++ 2 files changed, 36 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr98560-1.c diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc index d40338c..0f3d6bb 100644 --- a/gcc/gimple-isel.cc +++ b/gcc/gimple-isel.cc @@ -154,6 +154,7 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi, return gimple_build_assign (lhs, tem3); } + bool can_compute_op0 = true; gcc_assert (!COMPARISON_CLASS_P (op0)); if (TREE_CODE (op0) == SSA_NAME) { @@ -184,13 +185,16 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi, tree op0_type = TREE_TYPE (op0); tree op0a_type = TREE_TYPE (op0a); + if (TREE_CODE_CLASS (tcode) == tcc_comparison) + can_compute_op0 = expand_vec_cmp_expr_p (op0a_type, op0_type, + tcode); /* Try to fold x CMP y ? -1 : 0 to x CMP y. */ - if (integer_minus_onep (op1) + if (can_compute_op0 + && integer_minus_onep (op1) && integer_zerop (op2) - && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0)) - && expand_vec_cmp_expr_p (op0a_type, op0_type, tcode)) + && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0))) { tree conv_op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), op0); gassign *new_stmt = gimple_build_assign (lhs, conv_op); @@ -198,10 +202,10 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi, return new_stmt; } - if (used_vec_cond_exprs >= 2 + if (can_compute_op0 + && used_vec_cond_exprs >= 2 && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type)) - != CODE_FOR_nothing) - && expand_vec_cmp_expr_p (op0a_type, op0_type, tcode)) + != CODE_FOR_nothing)) { /* Keep the SSA name and use vcond_mask. */ tcode = TREE_CODE (op0); @@ -254,7 +258,15 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi, } } - gcc_assert (icode != CODE_FOR_nothing); + if (icode == CODE_FOR_nothing) + { + gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)) + && can_compute_op0 + && (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0))) + != CODE_FOR_nothing)); + return gimple_build_call_internal (IFN_VCOND_MASK, 3, op0, op1, op2); + } + tree tcode_tree = build_int_cst (integer_type_node, tcode); return gimple_build_call_internal (unsignedp ? IFN_VCONDU : IFN_VCOND, 5, op0a, op0b, op1, op2, tcode_tree); diff --git a/gcc/testsuite/gcc.dg/vect/pr98560-1.c b/gcc/testsuite/gcc.dg/vect/pr98560-1.c new file mode 100644 index 0000000..2583fc4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr98560-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -fno-tree-vrp -fno-tree-fre -fno-tree-pre -fno-code-hoisting -fvect-cost-model=dynamic" } */ +/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve } } */ + +#include + +void +f (uint16_t *restrict dst, uint32_t *restrict src1, float *restrict src2) +{ + int i = 0; + for (int j = 0; j < 4; ++j) + { + uint16_t tmp = src1[i] >> 1; + dst[i] = (uint16_t) (src2[i] < 0 && i < 4 ? tmp : 1); + i += 1; + } +} -- 2.7.4