[2/3] [vect] Add widening add, subtract patterns

author Joel Hutton <joel.hutton@arm.com>

Thu, 19 Nov 2020 10:39:38 +0000 (10:39 +0000)

committer Joel Hutton <joel.hutton@arm.com>

Thu, 19 Nov 2020 11:49:59 +0000 (11:49 +0000)
author Joel Hutton <joel.hutton@arm.com>
Thu, 19 Nov 2020 10:39:38 +0000 (10:39 +0000)
committer Joel Hutton <joel.hutton@arm.com>
Thu, 19 Nov 2020 11:49:59 +0000 (11:49 +0000)
diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi

index 5c1d3cda48c0376ab851a560c5bb419d73603ff2..9791f8b675a84be2c37b6f18b5c1313302ea55e3 100644 (file)
--- a/gcc/doc/generic.texi
+++ b/gcc/doc/generic.texi
@@ -1798,6 +1798,10 @@ a value from @code{enum annot_expr_kind}, the third is an @code{INTEGER_CST}.
  @tindex VEC_RSHIFT_EXPR
  @tindex VEC_WIDEN_MULT_HI_EXPR
  @tindex VEC_WIDEN_MULT_LO_EXPR
+@tindex VEC_WIDEN_PLUS_HI_EXPR
+@tindex VEC_WIDEN_PLUS_LO_EXPR
+@tindex VEC_WIDEN_MINUS_HI_EXPR
+@tindex VEC_WIDEN_MINUS_LO_EXPR
  @tindex VEC_UNPACK_HI_EXPR
  @tindex VEC_UNPACK_LO_EXPR
  @tindex VEC_UNPACK_FLOAT_HI_EXPR
@@ -1844,6 +1848,33 @@ vector of @code{N/2} products. In the case of @code{VEC_WIDEN_MULT_LO_EXPR} the
  low @code{N/2} elements of the two vector are multiplied to produce the
  vector of @code{N/2} products.
  
+@item VEC_WIDEN_PLUS_HI_EXPR
+@itemx VEC_WIDEN_PLUS_LO_EXPR
+These nodes represent widening vector addition of the high and low parts of
+the two input vectors, respectively.  Their operands are vectors that contain
+the same number of elements (@code{N}) of the same integral type. The result
+is a vector that contains half as many elements, of an integral type whose size
+is twice as wide.  In the case of @code{VEC_WIDEN_PLUS_HI_EXPR} the high
+@code{N/2} elements of the two vectors are added to produce the vector of
+@code{N/2} products.  In the case of @code{VEC_WIDEN_PLUS_LO_EXPR} the low
+@code{N/2} elements of the two vectors are added to produce the vector of
+@code{N/2} products.
+
+@item VEC_WIDEN_MINUS_HI_EXPR
+@itemx VEC_WIDEN_MINUS_LO_EXPR
+These nodes represent widening vector subtraction of the high and low parts of
+the two input vectors, respectively.  Their operands are vectors that contain
+the same number of elements (@code{N}) of the same integral type. The high/low
+elements of the second vector are subtracted from the high/low elements of the
+first. The result is a vector that contains half as many elements, of an
+integral type whose size is twice as wide.  In the case of
+@code{VEC_WIDEN_MINUS_HI_EXPR} the high @code{N/2} elements of the second
+vector are subtracted from the high @code{N/2} of the first to produce the
+vector of @code{N/2} products.  In the case of
+@code{VEC_WIDEN_MINUS_LO_EXPR} the low @code{N/2} elements of the second
+vector are subtracted from the low @code{N/2} of the first to produce the
+vector of @code{N/2} products.
+
  @item VEC_UNPACK_HI_EXPR
  @itemx VEC_UNPACK_LO_EXPR
  These nodes represent unpacking of the high and low parts of the input vector,
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi

index 813875b973bc73920f1195c4897ce3491ca713c2..da8c9a283dd42e2b3078ed5f370a37180ee0b538 100644 (file)
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5626,6 +5626,28 @@ with N signed/unsigned elements of size S@.  Operand 2 is a constant.  Shift
  the high/low elements of operand 1, and put the N/2 results of size 2*S in the
  output vector (operand 0).
  
+@cindex @code{vec_widen_saddl_hi_@var{m}} instruction pattern
+@cindex @code{vec_widen_saddl_lo_@var{m}} instruction pattern
+@cindex @code{vec_widen_uaddl_hi_@var{m}} instruction pattern
+@cindex @code{vec_widen_uaddl_lo_@var{m}} instruction pattern
+@item @samp{vec_widen_uaddl_hi_@var{m}}, @samp{vec_widen_uaddl_lo_@var{m}}
+@itemx @samp{vec_widen_saddl_hi_@var{m}}, @samp{vec_widen_saddl_lo_@var{m}}
+Signed/Unsigned widening add long.  Operands 1 and 2 are vectors with N
+signed/unsigned elements of size S@.  Add the high/low elements of 1 and 2
+together, widen the resulting elements and put the N/2 results of size 2*S in
+the output vector (operand 0).
+
+@cindex @code{vec_widen_ssubl_hi_@var{m}} instruction pattern
+@cindex @code{vec_widen_ssubl_lo_@var{m}} instruction pattern
+@cindex @code{vec_widen_usubl_hi_@var{m}} instruction pattern
+@cindex @code{vec_widen_usubl_lo_@var{m}} instruction pattern
+@item @samp{vec_widen_usubl_hi_@var{m}}, @samp{vec_widen_usubl_lo_@var{m}}
+@itemx @samp{vec_widen_ssubl_hi_@var{m}}, @samp{vec_widen_ssubl_lo_@var{m}}
+Signed/Unsigned widening subtract long.  Operands 1 and 2 are vectors with N
+signed/unsigned elements of size S@.  Subtract the high/low elements of 2 from
+1 and widen the resulting elements. Put the N/2 results of size 2*S in the
+output vector (operand 0).
+
  @cindex @code{mulhisi3} instruction pattern
  @item @samp{mulhisi3}
  Multiply operands 1 and 2, which have mode @code{HImode}, and store
diff --git a/gcc/expr.c b/gcc/expr.c

index ae16f07775870792729e3805436d7f2debafb6ca..83aa63c41b57c676ea5c3449152bed91d10fae64 100644 (file)
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9034,6 +9034,8 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
                                           target, unsignedp);
        return target;
  
+    case WIDEN_PLUS_EXPR:
+    case WIDEN_MINUS_EXPR:
      case WIDEN_MULT_EXPR:
        /* If first operand is constant, swap them.
          Thus the following special case checks need only
@@ -9754,6 +9756,10 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
         return temp;
        }
  
+    case VEC_WIDEN_PLUS_HI_EXPR:
+    case VEC_WIDEN_PLUS_LO_EXPR:
+    case VEC_WIDEN_MINUS_HI_EXPR:
+    case VEC_WIDEN_MINUS_LO_EXPR:
      case VEC_WIDEN_MULT_HI_EXPR:
      case VEC_WIDEN_MULT_LO_EXPR:
      case VEC_WIDEN_MULT_EVEN_EXPR:
diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c

index 4dfda756932de1693667c39c6fabed043b20b63b..b797d018c84e2f55a683b786f855b018c2549d24 100644 (file)
--- a/gcc/optabs-tree.c
+++ b/gcc/optabs-tree.c
@@ -170,6 +170,22 @@ optab_for_tree_code (enum tree_code code, const_tree type,
        return (TYPE_UNSIGNED (type)
               ? vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab);
  
+    case VEC_WIDEN_PLUS_LO_EXPR:
+      return (TYPE_UNSIGNED (type)
+             ? vec_widen_uaddl_lo_optab : vec_widen_saddl_lo_optab);
+
+    case VEC_WIDEN_PLUS_HI_EXPR:
+      return (TYPE_UNSIGNED (type)
+             ? vec_widen_uaddl_hi_optab : vec_widen_saddl_hi_optab);
+
+    case VEC_WIDEN_MINUS_LO_EXPR:
+      return (TYPE_UNSIGNED (type)
+             ? vec_widen_usubl_lo_optab : vec_widen_ssubl_lo_optab);
+
+    case VEC_WIDEN_MINUS_HI_EXPR:
+      return (TYPE_UNSIGNED (type)
+             ? vec_widen_usubl_hi_optab : vec_widen_ssubl_hi_optab);
+
      case VEC_UNPACK_HI_EXPR:
        return (TYPE_UNSIGNED (type)
               ? vec_unpacku_hi_optab : vec_unpacks_hi_optab);
diff --git a/gcc/optabs.def b/gcc/optabs.def

index 78409aa14537d259bf90277751aac00d452a0d3f..5607f51e6b4b775a92d1d8ffcd3e9b53e9270d6c 100644 (file)
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -383,6 +383,10 @@ OPTAB_D (vec_widen_smult_even_optab, "vec_widen_smult_even_$a")
  OPTAB_D (vec_widen_smult_hi_optab, "vec_widen_smult_hi_$a")
  OPTAB_D (vec_widen_smult_lo_optab, "vec_widen_smult_lo_$a")
  OPTAB_D (vec_widen_smult_odd_optab, "vec_widen_smult_odd_$a")
+OPTAB_D (vec_widen_ssubl_hi_optab, "vec_widen_ssubl_hi_$a")
+OPTAB_D (vec_widen_ssubl_lo_optab, "vec_widen_ssubl_lo_$a")
+OPTAB_D (vec_widen_saddl_hi_optab, "vec_widen_saddl_hi_$a")
+OPTAB_D (vec_widen_saddl_lo_optab, "vec_widen_saddl_lo_$a")
  OPTAB_D (vec_widen_sshiftl_hi_optab, "vec_widen_sshiftl_hi_$a")
  OPTAB_D (vec_widen_sshiftl_lo_optab, "vec_widen_sshiftl_lo_$a")
  OPTAB_D (vec_widen_umult_even_optab, "vec_widen_umult_even_$a")
@@ -391,6 +395,10 @@ OPTAB_D (vec_widen_umult_lo_optab, "vec_widen_umult_lo_$a")
  OPTAB_D (vec_widen_umult_odd_optab, "vec_widen_umult_odd_$a")
  OPTAB_D (vec_widen_ushiftl_hi_optab, "vec_widen_ushiftl_hi_$a")
  OPTAB_D (vec_widen_ushiftl_lo_optab, "vec_widen_ushiftl_lo_$a")
+OPTAB_D (vec_widen_usubl_hi_optab, "vec_widen_usubl_hi_$a")
+OPTAB_D (vec_widen_usubl_lo_optab, "vec_widen_usubl_lo_$a")
+OPTAB_D (vec_widen_uaddl_hi_optab, "vec_widen_uaddl_hi_$a")
+OPTAB_D (vec_widen_uaddl_lo_optab, "vec_widen_uaddl_lo_$a")
  
  OPTAB_D (sync_add_optab, "sync_add$I$a")
  OPTAB_D (sync_and_optab, "sync_and$I$a")
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-add.c b/gcc/testsuite/gcc.target/aarch64/vect-widen-add.c

new file mode 100644 (file)

index 0000000..220bd93
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-add.c
@@ -0,0 +1,92 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps" } */
+#include <stdint.h>
+#include <string.h>
+
+#pragma GCC target "+nosve"
+
+#define ARR_SIZE 1024
+
+/* Should produce an uaddl */
+void uadd_opt (uint32_t *foo, uint16_t *a, uint16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   + b[i];
+        foo[i+1] = a[i+1] + b[i+1];
+        foo[i+2] = a[i+2] + b[i+2];
+        foo[i+3] = a[i+3] + b[i+3];
+    }
+}
+
+__attribute__((optimize (0)))
+void uadd_nonopt (uint32_t *foo, uint16_t *a, uint16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   + b[i];
+        foo[i+1] = a[i+1] + b[i+1];
+        foo[i+2] = a[i+2] + b[i+2];
+        foo[i+3] = a[i+3] + b[i+3];
+    }
+}
+
+/* Should produce an saddl */
+void sadd_opt (int32_t *foo, int16_t *a, int16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   + b[i];
+        foo[i+1] = a[i+1] + b[i+1];
+        foo[i+2] = a[i+2] + b[i+2];
+        foo[i+3] = a[i+3] + b[i+3];
+    }
+}
+
+__attribute__((optimize (0)))
+void sadd_nonopt (int32_t *foo, int16_t *a, int16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   + b[i];
+        foo[i+1] = a[i+1] + b[i+1];
+        foo[i+2] = a[i+2] + b[i+2];
+        foo[i+3] = a[i+3] + b[i+3];
+    }
+}
+
+
+void __attribute__((optimize (0)))
+init(uint16_t *a, uint16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE;i++)
+    {
+      a[i] = i;
+      b[i] = 2*i;
+    }
+}
+
+int __attribute__((optimize (0)))
+main()
+{
+    uint32_t foo_arr[ARR_SIZE];
+    uint32_t bar_arr[ARR_SIZE];
+    uint16_t a[ARR_SIZE];
+    uint16_t b[ARR_SIZE];
+
+    init(a, b);
+    uadd_opt(foo_arr, a, b);
+    uadd_nonopt(bar_arr, a, b);
+    if (memcmp(foo_arr, bar_arr, ARR_SIZE) != 0)
+      return 1;
+    sadd_opt((int32_t*) foo_arr, (int16_t*) a, (int16_t*) b);
+    sadd_nonopt((int32_t*) bar_arr, (int16_t*) a, (int16_t*) b);
+    if (memcmp(foo_arr, bar_arr, ARR_SIZE) != 0)
+      return 1;
+    return 0;
+}
+
+/* { dg-final { scan-assembler-times {\tuaddl\t} 1} } */
+/* { dg-final { scan-assembler-times {\tuaddl2\t} 1} } */
+/* { dg-final { scan-assembler-times {\tsaddl\t} 1} } */
+/* { dg-final { scan-assembler-times {\tsaddl2\t} 1} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-sub.c b/gcc/testsuite/gcc.target/aarch64/vect-widen-sub.c

new file mode 100644 (file)

index 0000000..a2bed63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-sub.c
@@ -0,0 +1,92 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps" } */
+#include <stdint.h>
+#include <string.h>
+
+#pragma GCC target "+nosve"
+
+#define ARR_SIZE 1024
+
+/* Should produce an usubl */
+void usub_opt (uint32_t *foo, uint16_t *a, uint16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   - b[i];
+        foo[i+1] = a[i+1] - b[i+1];
+        foo[i+2] = a[i+2] - b[i+2];
+        foo[i+3] = a[i+3] - b[i+3];
+    }
+}
+
+__attribute__((optimize (0)))
+void usub_nonopt (uint32_t *foo, uint16_t *a, uint16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   - b[i];
+        foo[i+1] = a[i+1] - b[i+1];
+        foo[i+2] = a[i+2] - b[i+2];
+        foo[i+3] = a[i+3] - b[i+3];
+    }
+}
+
+/* Should produce an ssubl */
+void ssub_opt (int32_t *foo, int16_t *a, int16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   - b[i];
+        foo[i+1] = a[i+1] - b[i+1];
+        foo[i+2] = a[i+2] - b[i+2];
+        foo[i+3] = a[i+3] - b[i+3];
+    }
+}
+
+__attribute__((optimize (0)))
+void ssub_nonopt (int32_t *foo, int16_t *a, int16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
+    {
+        foo[i]   = a[i]   - b[i];
+        foo[i+1] = a[i+1] - b[i+1];
+        foo[i+2] = a[i+2] - b[i+2];
+        foo[i+3] = a[i+3] - b[i+3];
+    }
+}
+
+
+void __attribute__((optimize (0)))
+init(uint16_t *a, uint16_t *b)
+{
+    for( int i = 0; i < ARR_SIZE;i++)
+    {
+      a[i] = i;
+      b[i] = 2*i;
+    }
+}
+
+int __attribute__((optimize (0)))
+main()
+{
+    uint32_t foo_arr[ARR_SIZE];
+    uint32_t bar_arr[ARR_SIZE];
+    uint16_t a[ARR_SIZE];
+    uint16_t b[ARR_SIZE];
+
+    init(a, b);
+    usub_opt(foo_arr, a, b);
+    usub_nonopt(bar_arr, a, b);
+    if (memcmp(foo_arr, bar_arr, ARR_SIZE) != 0)
+      return 1;
+    ssub_opt((int32_t*) foo_arr, (int16_t*) a, (int16_t*) b);
+    ssub_nonopt((int32_t*) bar_arr, (int16_t*) a, (int16_t*) b);
+    if (memcmp(foo_arr, bar_arr, ARR_SIZE) != 0)
+      return 1;
+    return 0;
+}
+
+/* { dg-final { scan-assembler-times {\tusubl\t} 1} } */
+/* { dg-final { scan-assembler-times {\tusubl2\t} 1} } */
+/* { dg-final { scan-assembler-times {\tssubl\t} 1} } */
+/* { dg-final { scan-assembler-times {\tssubl2\t} 1} } */
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c

index 5139f111fecc7ec6e0902145b808308a5e47450b..aaf390bda42648f226f3c3d592b61d736851c0ea 100644 (file)
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -3885,6 +3885,8 @@ verify_gimple_assign_binary (gassign *stmt)
          return false;
        }
  
+    case WIDEN_PLUS_EXPR:
+    case WIDEN_MINUS_EXPR:
      case PLUS_EXPR:
      case MINUS_EXPR:
        {
@@ -4005,6 +4007,10 @@ verify_gimple_assign_binary (gassign *stmt)
          return false;
        }
  
+    case VEC_WIDEN_MINUS_HI_EXPR:
+    case VEC_WIDEN_MINUS_LO_EXPR:
+    case VEC_WIDEN_PLUS_HI_EXPR:
+    case VEC_WIDEN_PLUS_LO_EXPR:
      case VEC_WIDEN_MULT_HI_EXPR:
      case VEC_WIDEN_MULT_LO_EXPR:
      case VEC_WIDEN_MULT_EVEN_EXPR:
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c

index 32424b169c7310c03168baf43cc56a8b6ef2f15b..d9814bd10d39f006894a115568acc742820fed38 100644 (file)
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -4224,6 +4224,8 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights,
  
      case REALIGN_LOAD_EXPR:
  
+    case WIDEN_PLUS_EXPR:
+    case WIDEN_MINUS_EXPR:
      case WIDEN_SUM_EXPR:
      case WIDEN_MULT_EXPR:
      case DOT_PROD_EXPR:
@@ -4232,6 +4234,10 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights,
      case WIDEN_MULT_MINUS_EXPR:
      case WIDEN_LSHIFT_EXPR:
  
+    case VEC_WIDEN_PLUS_HI_EXPR:
+    case VEC_WIDEN_PLUS_LO_EXPR:
+    case VEC_WIDEN_MINUS_HI_EXPR:
+    case VEC_WIDEN_MINUS_LO_EXPR:
      case VEC_WIDEN_MULT_HI_EXPR:
      case VEC_WIDEN_MULT_LO_EXPR:
      case VEC_WIDEN_MULT_EVEN_EXPR:
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c

index d7bafa77134079faf743c1b482251311abb681c5..23bc1cb04b795769d69cb7f58e4aed27e577f44a 100644 (file)
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -2118,6 +2118,10 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi,
       arguments, not the widened result.  VEC_UNPACK_FLOAT_*_EXPR is
       calculated in the same way above.  */
    if (code == WIDEN_SUM_EXPR
+      || code == VEC_WIDEN_PLUS_HI_EXPR
+      || code == VEC_WIDEN_PLUS_LO_EXPR
+      || code == VEC_WIDEN_MINUS_HI_EXPR
+      || code == VEC_WIDEN_MINUS_LO_EXPR
        || code == VEC_WIDEN_MULT_HI_EXPR
        || code == VEC_WIDEN_MULT_LO_EXPR
        || code == VEC_WIDEN_MULT_EVEN_EXPR
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c

index f68a87e05ed54145a25ccff598eeef9e57f9a759..f2ce75aac3ebf39a5a6001c5cc33cf94a2942486 100644 (file)
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -1148,7 +1148,7 @@ vect_recog_sad_pattern (vec_info *vinfo,
    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
       inside the loop (in case we are analyzing an outer-loop).  */
    vect_unpromoted_value unprom[2];
-  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
+  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
                              false, 2, unprom, &half_type))
      return NULL;
  
@@ -1262,6 +1262,29 @@ vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
                                       "vect_recog_widen_mult_pattern");
  }
  
+/* Try to detect addition on widened inputs, converting PLUS_EXPR
+   to WIDEN_PLUS_EXPR.  See vect_recog_widen_op_pattern for details.  */
+
+static gimple *
+vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
+                              tree *type_out)
+{
+  return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
+                                     PLUS_EXPR, WIDEN_PLUS_EXPR, false,
+                                     "vect_recog_widen_plus_pattern");
+}
+
+/* Try to detect subtraction on widened inputs, converting MINUS_EXPR
+   to WIDEN_MINUS_EXPR.  See vect_recog_widen_op_pattern for details.  */
+static gimple *
+vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
+                              tree *type_out)
+{
+  return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
+                                     MINUS_EXPR, WIDEN_MINUS_EXPR, false,
+                                     "vect_recog_widen_minus_pattern");
+}
+
  /* Function vect_recog_pow_pattern
  
     Try to find the following pattern:
@@ -1978,7 +2001,7 @@ vect_recog_average_pattern (vec_info *vinfo,
    vect_unpromoted_value unprom[3];
    tree new_type;
    unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
-                                           PLUS_EXPR, false, 3,
+                                           WIDEN_PLUS_EXPR, false, 3,
                                             unprom, &new_type);
    if (nops == 0)
      return NULL;
@@ -5249,7 +5272,9 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
       of mask conversion that are needed for gather and scatter
       internal functions.  */
    { vect_recog_gather_scatter_pattern, "gather_scatter" },
-  { vect_recog_mask_conversion_pattern, "mask_conversion" }
+  { vect_recog_mask_conversion_pattern, "mask_conversion" },
+  { vect_recog_widen_plus_pattern, "widen_plus" },
+  { vect_recog_widen_minus_pattern, "widen_minus" },
  };
  
  const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c

index 4e535fec9ca523e717076fabb3c9be6961af2c26..f88f07a62df9f4906ac8b74b69b5f1955c69656b 100644 (file)
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -4571,6 +4571,8 @@ vectorizable_conversion (vec_info *vinfo,
    if (!CONVERT_EXPR_CODE_P (code)
        && code != FIX_TRUNC_EXPR
        && code != FLOAT_EXPR
+      && code != WIDEN_PLUS_EXPR
+      && code != WIDEN_MINUS_EXPR
        && code != WIDEN_MULT_EXPR
        && code != WIDEN_LSHIFT_EXPR)
      return false;
@@ -4616,7 +4618,8 @@ vectorizable_conversion (vec_info *vinfo,
  
    if (op_type == binary_op)
      {
-      gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
+      gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
+                 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
  
        op1 = gimple_assign_rhs2 (stmt);
        tree vectype1_in;
@@ -11535,6 +11538,16 @@ supportable_widening_operation (vec_info *vinfo,
        c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
        break;
  
+    case WIDEN_PLUS_EXPR:
+      c1 = VEC_WIDEN_PLUS_LO_EXPR;
+      c2 = VEC_WIDEN_PLUS_HI_EXPR;
+      break;
+
+    case WIDEN_MINUS_EXPR:
+      c1 = VEC_WIDEN_MINUS_LO_EXPR;
+      c2 = VEC_WIDEN_MINUS_HI_EXPR;
+      break;
+
      CASE_CONVERT:
        c1 = VEC_UNPACK_LO_EXPR;
        c2 = VEC_UNPACK_HI_EXPR;
diff --git a/gcc/tree.def b/gcc/tree.def

index 462672f2c69fc988a314799e7342a29298674e8f..11f5a984bbd60d121cf000dbe506e16b08edcc42 100644 (file)
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -1365,6 +1365,8 @@ DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_minus_expr", tcc_expression, 3)
     the first argument from type t1 to type t2, and then shifting it
     by the second argument.  */
  DEFTREECODE (WIDEN_LSHIFT_EXPR, "widen_lshift_expr", tcc_binary, 2)
+DEFTREECODE (WIDEN_PLUS_EXPR, "widen_plus_expr", tcc_binary, 2)
+DEFTREECODE (WIDEN_MINUS_EXPR, "widen_minus_expr", tcc_binary, 2)
  
  /* Widening vector multiplication.
     The two operands are vectors with N elements of size S. Multiplying the
@@ -1429,6 +1431,10 @@ DEFTREECODE (VEC_PACK_FLOAT_EXPR, "vec_pack_float_expr", tcc_binary, 2)
   */
  DEFTREECODE (VEC_WIDEN_LSHIFT_HI_EXPR, "widen_lshift_hi_expr", tcc_binary, 2)
  DEFTREECODE (VEC_WIDEN_LSHIFT_LO_EXPR, "widen_lshift_lo_expr", tcc_binary, 2)
+DEFTREECODE (VEC_WIDEN_PLUS_HI_EXPR, "widen_plus_hi_expr", tcc_binary, 2)
+DEFTREECODE (VEC_WIDEN_PLUS_LO_EXPR, "widen_plus_lo_expr", tcc_binary, 2)
+DEFTREECODE (VEC_WIDEN_MINUS_HI_EXPR, "widen_minus_hi_expr", tcc_binary, 2)
+DEFTREECODE (VEC_WIDEN_MINUS_LO_EXPR, "widen_minus_lo_expr", tcc_binary, 2)
  
  /* PREDICT_EXPR.  Specify hint for branch prediction.  The
     PREDICT_EXPR_PREDICTOR specify predictor and PREDICT_EXPR_OUTCOME the
author	Joel Hutton <joel.hutton@arm.com>
	Thu, 19 Nov 2020 10:39:38 +0000 (10:39 +0000)
committer	Joel Hutton <joel.hutton@arm.com>
	Thu, 19 Nov 2020 11:49:59 +0000 (11:49 +0000)
gcc/doc/generic.texi		patch \| blob \| history
gcc/doc/md.texi		patch \| blob \| history
gcc/expr.c		patch \| blob \| history
gcc/optabs-tree.c		patch \| blob \| history
gcc/optabs.def		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/vect-widen-add.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/vect-widen-sub.c	[new file with mode: 0644]	patch \| blob
gcc/tree-cfg.c		patch \| blob \| history
gcc/tree-inline.c		patch \| blob \| history
gcc/tree-vect-generic.c		patch \| blob \| history
gcc/tree-vect-patterns.c		patch \| blob \| history
gcc/tree-vect-stmts.c		patch \| blob \| history
gcc/tree.def		patch \| blob \| history