re PR tree-optimization/44423 (Massive performance regression in SSE code due to...
authorMartin Jambor <mjambor@suse.cz>
Wed, 9 Jun 2010 11:20:03 +0000 (13:20 +0200)
committerMartin Jambor <jamborm@gcc.gnu.org>
Wed, 9 Jun 2010 11:20:03 +0000 (13:20 +0200)
2010-06-09  Martin Jambor  <mjambor@suse.cz>

PR tree-optimization/44423
* tree-sra.c (dump_access): Dump also grp_assignment_read.
(analyze_access_subtree): Pass negative allow_replacements to children
if the current type is scalar.

* testsuite/gcc.dg/tree-ssa/pr44423.c: New test.

From-SVN: r160462

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/tree-ssa/pr44423.c [new file with mode: 0644]
gcc/tree-sra.c

index a1d7c0d..a66edd0 100644 (file)
@@ -1,3 +1,10 @@
+2010-06-09  Martin Jambor  <mjambor@suse.cz>
+
+       PR tree-optimization/44423
+       * tree-sra.c (dump_access): Dump also grp_assignment_read.
+       (analyze_access_subtree): Pass negative allow_replacements to children
+       if the current type is scalar.
+
 2010-06-09  Joern Rennecke  <amylaar@spamcop.net>
 
        PR testsuite/42843
index a17f5ae..6d4b6f1 100644 (file)
@@ -1,3 +1,8 @@
+2010-06-09  Martin Jambor  <mjambor@suse.cz>
+
+       PR tree-optimization/44423
+       * gcc.dg/tree-ssa/pr44423.c: New test.
+
 2010-06-09  Joern Rennecke  <amylaar@spamcop.net>
 
        PR testsuite/42843
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr44423.c b/gcc/testsuite/gcc.dg/tree-ssa/pr44423.c
new file mode 100644 (file)
index 0000000..6232d64
--- /dev/null
@@ -0,0 +1,47 @@
+/* { dg-do compile { target x86_64-*-* } } */
+/* { dg-options "-O2 -fdump-tree-esra-details" } */
+
+#include "xmmintrin.h"
+
+typedef __m128 v4sf; // vector of 4 floats (SSE1)
+
+#define ARRSZ 1024
+
+typedef union {
+  float f[4];
+  v4sf  v;
+} V4SF;
+
+struct COLOUR {
+  float r,g,b;
+};
+
+void func (float *pre1, float pre2, struct COLOUR *a, V4SF *lpic)
+  {
+  V4SF va;
+  int y;
+  va.f[0]=a->r;va.f[1]=a->g;va.f[2]=a->b;va.f[3]=0.f;
+  for (y=0; y<20; ++y)
+    {
+    float att = pre1[y]*pre2;
+    v4sf tmpatt=_mm_load1_ps(&att);
+    tmpatt=_mm_mul_ps(tmpatt,va.v);
+    lpic[y].v=_mm_add_ps(tmpatt,lpic[y].v);
+    }
+  }
+
+int main()
+  {
+  V4SF lpic[ARRSZ];
+  float pre1[ARRSZ];
+  int i;
+  struct COLOUR col={0.,2.,4.};
+  for (i=0; i<20; ++i)
+    pre1[i]=0.4;
+  for (i=0;i<10000000;++i)
+    func(&pre1[0],0.3,&col,&lpic[0]);
+  return 0;
+  }
+
+/* { dg-final { scan-tree-dump-times "Created a replacement" 0 "esra"} } */
+/* { dg-final { cleanup-tree-dump "esra" } } */
index 702187c..5387a19 100644 (file)
@@ -356,13 +356,13 @@ dump_access (FILE *f, struct access *access, bool grp)
   print_generic_expr (f, access->type, 0);
   if (grp)
     fprintf (f, ", grp_write = %d, total_scalarization = %d, "
-            "grp_read = %d, grp_hint = %d, "
+            "grp_read = %d, grp_hint = %d, grp_assignment_read = %d,"
             "grp_covered = %d, grp_unscalarizable_region = %d, "
             "grp_unscalarized_data = %d, grp_partial_lhs = %d, "
             "grp_to_be_replaced = %d, grp_maybe_modified = %d, "
             "grp_not_necessarilly_dereferenced = %d\n",
             access->grp_write, access->total_scalarization,
-            access->grp_read, access->grp_hint,
+            access->grp_read, access->grp_hint, access->grp_assignment_read,
             access->grp_covered, access->grp_unscalarizable_region,
             access->grp_unscalarized_data, access->grp_partial_lhs,
             access->grp_to_be_replaced, access->grp_maybe_modified,
@@ -1791,7 +1791,8 @@ analyze_access_subtree (struct access *root, bool allow_replacements,
       else
        covered_to += child->size;
 
-      sth_created |= analyze_access_subtree (child, allow_replacements,
+      sth_created |= analyze_access_subtree (child,
+                                            allow_replacements && !scalar,
                                             mark_read, mark_write);
 
       root->grp_unscalarized_data |= child->grp_unscalarized_data;