Add x86 addsub SLP pattern

author Richard Biener <rguenther@suse.de>

Mon, 31 May 2021 11:19:01 +0000 (13:19 +0200)

committer Richard Biener <rguenther@suse.de>

Thu, 24 Jun 2021 11:08:25 +0000 (13:08 +0200)
author Richard Biener <rguenther@suse.de>
Mon, 31 May 2021 11:19:01 +0000 (13:19 +0200)
committer Richard Biener <rguenther@suse.de>
Thu, 24 Jun 2021 11:08:25 +0000 (13:08 +0200)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def

index 31df3a6..ea79e0b 100644 (file)
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -855,8 +855,8 @@ BDESC (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv1di3, "__
  BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF)
  BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF)
  
-BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
-BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
+BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_vec_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
+BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_vec_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
  BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
  BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
  BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
@@ -996,8 +996,8 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128
  /* AVX */
  BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF)
  BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF)
+BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_vec_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF)
+BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_vec_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF)
  BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF)
  BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF)
  BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md

index 5bd65dd..1f1db82 100644 (file)
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2410,7 +2410,7 @@
     (set_attr "prefix" "<round_saeonly_scalar_prefix>")
     (set_attr "mode" "<ssescalarmode>")])
  
-(define_insn "avx_addsubv4df3"
+(define_insn "vec_addsubv4df3"
    [(set (match_operand:V4DF 0 "register_operand" "=x")
         (vec_merge:V4DF
           (minus:V4DF
@@ -2424,7 +2424,7 @@
     (set_attr "prefix" "vex")
     (set_attr "mode" "V4DF")])
  
-(define_insn "sse3_addsubv2df3"
+(define_insn "vec_addsubv2df3"
    [(set (match_operand:V2DF 0 "register_operand" "=x,x")
         (vec_merge:V2DF
           (minus:V2DF
@@ -2442,7 +2442,7 @@
     (set_attr "prefix" "orig,vex")
     (set_attr "mode" "V2DF")])
  
-(define_insn "avx_addsubv8sf3"
+(define_insn "vec_addsubv8sf3"
    [(set (match_operand:V8SF 0 "register_operand" "=x")
         (vec_merge:V8SF
           (minus:V8SF
@@ -2456,7 +2456,7 @@
     (set_attr "prefix" "vex")
     (set_attr "mode" "V8SF")])
  
-(define_insn "sse3_addsubv4sf3"
+(define_insn "vec_addsubv4sf3"
    [(set (match_operand:V4SF 0 "register_operand" "=x,x")
         (vec_merge:V4SF
           (minus:V4SF
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi

index 00caf38..1b91814 100644 (file)
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5682,6 +5682,14 @@ signed/unsigned elements of size S@.  Subtract the high/low elements of 2 from
  1 and widen the resulting elements. Put the N/2 results of size 2*S in the
  output vector (operand 0).
  
+@cindex @code{vec_addsub@var{m}3} instruction pattern
+@item @samp{vec_addsub@var{m}3}
+Alternating subtract, add with even lanes doing subtract and odd
+lanes doing addition.  Operands 1 and 2 and the outout operand are vectors
+with mode @var{m}.
+
+These instructions are not allowed to @code{FAIL}.
+
  @cindex @code{mulhisi3} instruction pattern
  @item @samp{mulhisi3}
  Multiply operands 1 and 2, which have mode @code{HImode}, and store
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def

index b2f414d..c3b8e73 100644 (file)
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -281,6 +281,7 @@ DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT90, ECF_CONST, cadd90, binary)
  DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT270, ECF_CONST, cadd270, binary)
  DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL, ECF_CONST, cmul, binary)
  DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL_CONJ, ECF_CONST, cmul_conj, binary)
+DEF_INTERNAL_OPTAB_FN (VEC_ADDSUB, ECF_CONST, vec_addsub, binary)
  
  
  /* FP scales.  */
diff --git a/gcc/optabs.def b/gcc/optabs.def

index b192a9d..41ab259 100644 (file)
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -407,6 +407,7 @@ OPTAB_D (vec_widen_usubl_hi_optab, "vec_widen_usubl_hi_$a")
  OPTAB_D (vec_widen_usubl_lo_optab, "vec_widen_usubl_lo_$a")
  OPTAB_D (vec_widen_uaddl_hi_optab, "vec_widen_uaddl_hi_$a")
  OPTAB_D (vec_widen_uaddl_lo_optab, "vec_widen_uaddl_lo_$a")
+OPTAB_D (vec_addsub_optab, "vec_addsub$a3")
  
  OPTAB_D (sync_add_optab, "sync_add$I$a")
  OPTAB_D (sync_and_optab, "sync_and$I$a")
diff --git a/gcc/testsuite/gcc.target/i386/vect-addsub-2.c b/gcc/testsuite/gcc.target/i386/vect-addsub-2.c

new file mode 100644 (file)

index 0000000..a6b9414
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-addsub-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target sse3 } */
+/* { dg-options "-O3 -msse3" } */
+
+float a[1024], b[1024];
+
+void foo()
+{
+  for (int i = 0; i < 256; i++)
+    {
+      a[4*i+0] = a[4*i+0] - b[4*i+0];
+      a[4*i+1] = a[4*i+1] + b[4*i+1];
+      a[4*i+2] = a[4*i+2] - b[4*i+2];
+      a[4*i+3] = a[4*i+3] + b[4*i+3];
+    }
+}
+
+/* We should be able to vectorize this with SLP using the addsub
+   SLP pattern.  */
+/* { dg-final { scan-assembler "addsubps" } } */
+/* { dg-final { scan-assembler-not "shuf" } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-addsub-3.c b/gcc/testsuite/gcc.target/i386/vect-addsub-3.c

new file mode 100644 (file)

index 0000000..b27ee56
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-addsub-3.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse3 } */
+/* { dg-options "-O3 -msse3" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse3_test
+#endif
+
+#include CHECK_H
+
+double a[2], b[2], c[2];
+
+void __attribute__((noipa))
+foo ()
+{
+  /* When we want to use addsubpd we have to keep permuting both
+     loads, if instead we blend the result of an add and a sub we
+     can combine the blend with the permute.  Both are similar in cost,
+     verify we did not wrongly apply both.  */
+  double tem0 = a[1] - b[1];
+  double tem1 = a[0] + b[0];
+  c[0] = tem0;
+  c[1] = tem1;
+}
+
+static void
+TEST (void)
+{
+  a[0] = 1.; a[1] = 2.;
+  b[0] = 2.; b[1] = 4.;
+  foo ();
+  if (c[0] != -2. || c[1] != 3.)
+    __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/vect-addsubv2df.c b/gcc/testsuite/gcc.target/i386/vect-addsubv2df.c

new file mode 100644 (file)

index 0000000..547485d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-addsubv2df.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse3 } */
+/* { dg-options "-O3 -msse3 -fdump-tree-slp2" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse3_test
+#endif
+
+#include CHECK_H
+
+double x[2], y[2], z[2];
+void __attribute__((noipa)) foo ()
+{
+  x[0] = y[0] - z[0];
+  x[1] = y[1] + z[1];
+}
+void __attribute__((noipa)) bar ()
+{
+  x[0] = y[0] + z[0];
+  x[1] = y[1] - z[1];
+}
+static void
+TEST (void)
+{
+  for (int i = 0; i < 2; ++i)
+    {
+      y[i] = i + 1;
+      z[i] = 2 * i + 1;
+    }
+  foo ();
+  if (x[0] != 0 || x[1] != 5)
+    __builtin_abort ();
+  bar ();
+  if (x[0] != 2 || x[1] != -1)
+    __builtin_abort ();
+}
+
+/* { dg-final { scan-tree-dump-times "ADDSUB" 1 "slp2" } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-addsubv4df.c b/gcc/testsuite/gcc.target/i386/vect-addsubv4df.c

new file mode 100644 (file)

index 0000000..e0a1b3d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-addsubv4df.c
@@ -0,0 +1,36 @@
+/* { dg-do run { target avx_runtime } } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O3 -mavx -fdump-tree-slp2" } */
+
+double x[4], y[4], z[4];
+void __attribute__((noipa)) foo ()
+{
+  x[0] = y[0] - z[0];
+  x[1] = y[1] + z[1];
+  x[2] = y[2] - z[2];
+  x[3] = y[3] + z[3];
+}
+void __attribute__((noipa)) bar ()
+{
+  x[0] = y[0] + z[0];
+  x[1] = y[1] - z[1];
+  x[2] = y[2] + z[2];
+  x[3] = y[3] - z[3];
+}
+int main()
+{
+  for (int i = 0; i < 4; ++i)
+    {
+      y[i] = i + 1;
+      z[i] = 2 * i + 1;
+    }
+  foo ();
+  if (x[0] != 0 || x[1] != 5 || x[2] != -2 || x[3] != 11)
+    __builtin_abort ();
+  bar ();
+  if (x[0] != 2 || x[1] != -1 || x[2] != 8 || x[3] != -3)
+    __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "ADDSUB" 1 "slp2" } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-addsubv4sf.c b/gcc/testsuite/gcc.target/i386/vect-addsubv4sf.c

new file mode 100644 (file)

index 0000000..b524f0c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-addsubv4sf.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse3 } */
+/* { dg-options "-O3 -msse3 -fdump-tree-slp2" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse3_test
+#endif
+
+#include CHECK_H
+
+float x[4], y[4], z[4];
+void __attribute__((noipa)) foo ()
+{
+  x[0] = y[0] - z[0];
+  x[1] = y[1] + z[1];
+  x[2] = y[2] - z[2];
+  x[3] = y[3] + z[3];
+}
+void __attribute__((noipa)) bar ()
+{
+  x[0] = y[0] + z[0];
+  x[1] = y[1] - z[1];
+  x[2] = y[2] + z[2];
+  x[3] = y[3] - z[3];
+}
+static void
+TEST (void)
+{
+  for (int i = 0; i < 4; ++i)
+    {
+      y[i] = i + 1;
+      z[i] = 2 * i + 1;
+    }
+  foo ();
+  if (x[0] != 0 || x[1] != 5 || x[2] != -2 || x[3] != 11)
+    __builtin_abort ();
+  bar ();
+  if (x[0] != 2 || x[1] != -1 || x[2] != 8 || x[3] != -3)
+    __builtin_abort ();
+}
+
+/* { dg-final { scan-tree-dump-times "ADDSUB" 1 "slp2" } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-addsubv8sf.c b/gcc/testsuite/gcc.target/i386/vect-addsubv8sf.c

new file mode 100644 (file)

index 0000000..0eed33b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-addsubv8sf.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target avx_runtime } } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O3 -mavx -fdump-tree-slp2" } */
+
+float x[8], y[8], z[8];
+void __attribute__((noipa)) foo ()
+{
+  x[0] = y[0] - z[0];
+  x[1] = y[1] + z[1];
+  x[2] = y[2] - z[2];
+  x[3] = y[3] + z[3];
+  x[4] = y[4] - z[4];
+  x[5] = y[5] + z[5];
+  x[6] = y[6] - z[6];
+  x[7] = y[7] + z[7];
+}
+void __attribute__((noipa)) bar ()
+{
+  x[0] = y[0] + z[0];
+  x[1] = y[1] - z[1];
+  x[2] = y[2] + z[2];
+  x[3] = y[3] - z[3];
+  x[4] = y[4] + z[4];
+  x[5] = y[5] - z[5];
+  x[6] = y[6] + z[6];
+  x[7] = y[7] - z[7];
+}
+int main()
+{
+  for (int i = 0; i < 8; ++i)
+    {
+      y[i] = i + 1;
+      z[i] = 2 * i + 1;
+    }
+  foo ();
+  if (x[0] != 0 || x[1] != 5 || x[2] != -2 || x[3] != 11
+      || x[4] != -4 || x[5] != 17 || x[6] != -6 || x[7] != 23)
+    __builtin_abort ();
+  bar ();
+  if (x[0] != 2 || x[1] != -1 || x[2] != 8 || x[3] != -3
+      || x[4] != 14 || x[5] != -5 || x[6] != 20 || x[7] != -7)
+    __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "ADDSUB" 1 "slp2" } } */
diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c

index 2ed49cd..d536494 100644 (file)
--- a/gcc/tree-vect-slp-patterns.c
+++ b/gcc/tree-vect-slp-patterns.c
@@ -1490,6 +1490,105 @@ complex_operations_pattern::build (vec_info * /* vinfo */)
    gcc_unreachable ();
  }
  
+
+/* The addsub_pattern.  */
+
+class addsub_pattern : public vect_pattern
+{
+  public:
+    addsub_pattern (slp_tree *node)
+       : vect_pattern (node, NULL, IFN_VEC_ADDSUB) {};
+
+    void build (vec_info *);
+
+    static vect_pattern*
+    recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
+};
+
+vect_pattern *
+addsub_pattern::recognize (slp_tree_to_load_perm_map_t *, slp_tree *node_)
+{
+  slp_tree node = *node_;
+  if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
+      || SLP_TREE_CHILDREN (node).length () != 2)
+    return NULL;
+
+  /* Match a blend of a plus and a minus op with the same number of plus and
+     minus lanes on the same operands.  */
+  slp_tree sub = SLP_TREE_CHILDREN (node)[0];
+  slp_tree add = SLP_TREE_CHILDREN (node)[1];
+  bool swapped_p = false;
+  if (vect_match_expression_p (sub, PLUS_EXPR))
+    {
+      std::swap (add, sub);
+      swapped_p = true;
+    }
+  if (!(vect_match_expression_p (add, PLUS_EXPR)
+       && vect_match_expression_p (sub, MINUS_EXPR)))
+    return NULL;
+  if (!((SLP_TREE_CHILDREN (sub)[0] == SLP_TREE_CHILDREN (add)[0]
+        && SLP_TREE_CHILDREN (sub)[1] == SLP_TREE_CHILDREN (add)[1])
+       || (SLP_TREE_CHILDREN (sub)[0] == SLP_TREE_CHILDREN (add)[1]
+           && SLP_TREE_CHILDREN (sub)[1] == SLP_TREE_CHILDREN (add)[0])))
+    return NULL;
+
+  for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i)
+    {
+      std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i];
+      if (swapped_p)
+       perm.first = perm.first == 0 ? 1 : 0;
+      /* It has to be alternating -, +, -, ...
+        While we could permute the .ADDSUB inputs and the .ADDSUB output
+        that's only profitable over the add + sub + blend if at least
+        one of the permute is optimized which we can't determine here.  */
+      if (perm.first != (i & 1)
+         || perm.second != i)
+       return NULL;
+    }
+
+  if (!vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
+    return NULL;
+
+  return new addsub_pattern (node_);
+}
+
+void
+addsub_pattern::build (vec_info *vinfo)
+{
+  slp_tree node = *m_node;
+
+  slp_tree sub = SLP_TREE_CHILDREN (node)[0];
+  slp_tree add = SLP_TREE_CHILDREN (node)[1];
+  if (vect_match_expression_p (sub, PLUS_EXPR))
+    std::swap (add, sub);
+
+  /* Modify the blend node in-place.  */
+  SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0];
+  SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1];
+  SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
+  SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
+
+  /* Build IFN_VEC_ADDSUB from the sub representative operands.  */
+  stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub);
+  gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2,
+                                           gimple_assign_rhs1 (rep->stmt),
+                                           gimple_assign_rhs2 (rep->stmt));
+  gimple_call_set_lhs (call, make_ssa_name
+                              (TREE_TYPE (gimple_assign_lhs (rep->stmt))));
+  gimple_call_set_nothrow (call, true);
+  gimple_set_bb (call, gimple_bb (rep->stmt));
+  SLP_TREE_REPRESENTATIVE (node) = vinfo->add_pattern_stmt (call, rep);
+  STMT_VINFO_RELEVANT (SLP_TREE_REPRESENTATIVE (node)) = vect_used_in_scope;
+  STMT_SLP_TYPE (SLP_TREE_REPRESENTATIVE (node)) = pure_slp;
+  STMT_VINFO_VECTYPE (SLP_TREE_REPRESENTATIVE (node)) = SLP_TREE_VECTYPE (node);
+  STMT_VINFO_SLP_VECT_ONLY_PATTERN (SLP_TREE_REPRESENTATIVE (node)) = true;
+  SLP_TREE_CODE (node) = ERROR_MARK;
+  SLP_TREE_LANE_PERMUTATION (node).release ();
+
+  vect_free_slp_tree (sub);
+  vect_free_slp_tree (add);
+}
+
  /*******************************************************************************
   * Pattern matching definitions
   ******************************************************************************/
@@ -1502,6 +1601,7 @@ vect_pattern_decl_t slp_patterns[]
       overlap in what they can detect.  */
  
    SLP_PATTERN (complex_operations_pattern),
+  SLP_PATTERN (addsub_pattern)
  };
  #undef SLP_PATTERN
  
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c

index 69ee8fa..227d6aa 100644 (file)
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -3705,6 +3705,7 @@ vect_optimize_slp (vec_info *vinfo)
               case CFN_COMPLEX_ADD_ROT270:
               case CFN_COMPLEX_MUL:
               case CFN_COMPLEX_MUL_CONJ:
+             case CFN_VEC_ADDSUB:
                 continue;
               default:;
               }
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h

index 5c71fbc..fa28336 100644 (file)
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2100,7 +2100,8 @@ class vect_pattern
        this->m_ifn = ifn;
        this->m_node = node;
        this->m_ops.create (0);
-      this->m_ops.safe_splice (*m_ops);
+      if (m_ops)
+       this->m_ops.safe_splice (*m_ops);
      }
  
    public:
author	Richard Biener <rguenther@suse.de>
	Mon, 31 May 2021 11:19:01 +0000 (13:19 +0200)
committer	Richard Biener <rguenther@suse.de>
	Thu, 24 Jun 2021 11:08:25 +0000 (13:08 +0200)
gcc/config/i386/i386-builtin.def		patch \| blob \| history
gcc/config/i386/sse.md		patch \| blob \| history
gcc/doc/md.texi		patch \| blob \| history
gcc/internal-fn.def		patch \| blob \| history
gcc/optabs.def		patch \| blob \| history
gcc/testsuite/gcc.target/i386/vect-addsub-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/vect-addsub-3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/vect-addsubv2df.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/vect-addsubv4df.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/vect-addsubv4sf.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/vect-addsubv8sf.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-slp-patterns.c		patch \| blob \| history
gcc/tree-vect-slp.c		patch \| blob \| history
gcc/tree-vectorizer.h		patch \| blob \| history