From 78595e918ee168f595d16268073a3754c64d67fe Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 7 Jan 2021 15:00:38 +0000
Subject: [PATCH] gimple-isel: Fall back to using vcond_mask [PR98560]
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

PR98560 is about a case in which the vectoriser initially generates:

  mask_1 = a < 0;
  mask_2 = mask_1 & ...;
  res = VEC_COND_EXPR <mask_2, b, c>;

The vectoriser thus expects res to be calculated using vcond_mask.
However, we later manage to fold mask_2 to mask_1, leaving:

  mask_1 = a < 0;
  res = VEC_COND_EXPR <mask_1, b, c>;

gimple-isel then required a combined vcond to exist.

On most targets, it's not too onerous to provide all possible
(compare x select) combinations.  For each data mode, you just
need to provide unsigned comparisons, signed comparisons, and
floating-point comparisons, with the data mode and type of
comparison uniquely determining the mode of the compared values.
But for targets like SVE that support âunpackedâ vectors,
it's not that simple: the level of unpacking adds another
degree of freedom.

Rather than insist that the combined versions exist, I think
we should be prepared to fall back to using separate comparisons
and vcond_masks.  I think that makes more sense on targets like
AArch64 and AArch32 in which compares and selects are fundementally
separate operations anyway.

gcc/
	PR tree-optimization/98560
	* gimple-isel.cc (gimple_expand_vec_cond_expr): If we fail to use
	IFN_VCOND{,U,EQ}, fall back on IFN_VCOND_MASK.

gcc/testsuite/
	PR tree-optimization/98560
	* gcc.dg/vect/pr98560-1.c: New test.
---
 gcc/gimple-isel.cc                    | 26 +++++++++++++++++++-------
 gcc/testsuite/gcc.dg/vect/pr98560-1.c | 17 +++++++++++++++++
 2 files changed, 36 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr98560-1.c

diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
index d40338c..0f3d6bb 100644
--- a/gcc/gimple-isel.cc
+++ b/gcc/gimple-isel.cc
@@ -154,6 +154,7 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
       return gimple_build_assign (lhs, tem3);
     }
 
+  bool can_compute_op0 = true;
   gcc_assert (!COMPARISON_CLASS_P (op0));
   if (TREE_CODE (op0) == SSA_NAME)
     {
@@ -184,13 +185,16 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
 
 	  tree op0_type = TREE_TYPE (op0);
 	  tree op0a_type = TREE_TYPE (op0a);
+	  if (TREE_CODE_CLASS (tcode) == tcc_comparison)
+	    can_compute_op0 = expand_vec_cmp_expr_p (op0a_type, op0_type,
+						     tcode);
 
 	  /* Try to fold x CMP y ? -1 : 0 to x CMP y.  */
 
-	  if (integer_minus_onep (op1)
+	  if (can_compute_op0
+	      && integer_minus_onep (op1)
 	      && integer_zerop (op2)
-	      && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0))
-	      && expand_vec_cmp_expr_p (op0a_type, op0_type, tcode))
+	      && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0)))
 	    {
 	      tree conv_op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), op0);
 	      gassign *new_stmt = gimple_build_assign (lhs, conv_op);
@@ -198,10 +202,10 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
 	      return new_stmt;
 	    }
 
-	  if (used_vec_cond_exprs >= 2
+	  if (can_compute_op0
+	      && used_vec_cond_exprs >= 2
 	      && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type))
-		  != CODE_FOR_nothing)
-	      && expand_vec_cmp_expr_p (op0a_type, op0_type, tcode))
+		  != CODE_FOR_nothing))
 	    {
 	      /* Keep the SSA name and use vcond_mask.  */
 	      tcode = TREE_CODE (op0);
@@ -254,7 +258,15 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
 	}
     }
 
-  gcc_assert (icode != CODE_FOR_nothing);
+  if (icode == CODE_FOR_nothing)
+    {
+      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0))
+		  && can_compute_op0
+		  && (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
+		      != CODE_FOR_nothing));
+      return gimple_build_call_internal (IFN_VCOND_MASK, 3, op0, op1, op2);
+    }
+
   tree tcode_tree = build_int_cst (integer_type_node, tcode);
   return gimple_build_call_internal (unsignedp ? IFN_VCONDU : IFN_VCOND,
 				     5, op0a, op0b, op1, op2, tcode_tree);
diff --git a/gcc/testsuite/gcc.dg/vect/pr98560-1.c b/gcc/testsuite/gcc.dg/vect/pr98560-1.c
new file mode 100644
index 0000000..2583fc4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr98560-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -fno-tree-vrp -fno-tree-fre -fno-tree-pre -fno-code-hoisting -fvect-cost-model=dynamic" } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve } } */
+
+#include <stdint.h>
+
+void
+f (uint16_t *restrict dst, uint32_t *restrict src1, float *restrict src2)
+{
+  int i = 0;
+  for (int j = 0; j < 4; ++j)
+    {
+      uint16_t tmp = src1[i] >> 1;
+      dst[i] = (uint16_t) (src2[i] < 0 && i < 4 ? tmp : 1);
+      i += 1;
+    }
+}
-- 
2.7.4