FUNCTION_BOUNDARY.
2007-07-12 Dorit Nuzman <dorit@il.ibm.com>
+ Devang Patel <dpatel@apple.com>
+
+ PR tree-optimization/25413
+ * targhooks.c (default_builtin_vector_alignment_reachable): New.
+ * targhooks.h (default_builtin_vector_alignment_reachable): New.
+ * tree.h (contains_packed_reference): New.
+ * expr.c (contains_packed_reference): New.
+ * tree-vect-analyze.c (vector_alignment_reachable_p): New.
+ (vect_enhance_data_refs_alignment): Call
+ vector_alignment_reachable_p.
+ * target.h (vector_alignment_reachable): New builtin.
+ * target-def.h (TARGET_VECTOR_ALIGNMENT_REACHABLE): New.
+ * config/rs6000/rs6000.c (rs6000_vector_alignment_reachable): New.
+ (TARGET_VECTOR_ALIGNMENT_REACHABLE): Define.
+
+2007-07-12 Dorit Nuzman <dorit@il.ibm.com>
* target.h (builtin_vectorization_cost): Add new target builtin.
* target-def.h (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New.
static tree rs6000_builtin_conversion (enum tree_code, tree);
static void def_builtin (int, const char *, tree, int);
+static bool rs6000_vector_alignment_reachable (tree, bool);
static void rs6000_init_builtins (void);
static rtx rs6000_expand_unop_builtin (enum insn_code, tree, rtx);
static rtx rs6000_expand_binop_builtin (enum insn_code, tree, rtx);
#undef TARGET_VECTORIZE_BUILTIN_CONVERSION
#define TARGET_VECTORIZE_BUILTIN_CONVERSION rs6000_builtin_conversion
+#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
+#define TARGET_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
+
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS rs6000_init_builtins
}
}
+
+/* Return true iff, data reference of TYPE can reach vector alignment (16)
+ after applying N number of iterations. This routine does not determine
+ how may iterations are required to reach desired alignment. */
+
+static bool
+rs6000_vector_alignment_reachable (tree type ATTRIBUTE_UNUSED, bool is_packed)
+{
+ if (is_packed)
+ return false;
+
+ if (TARGET_32BIT)
+ {
+ if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
+ return true;
+
+ if (rs6000_alignment_flags == MASK_ALIGN_POWER)
+ return true;
+
+ return false;
+ }
+ else
+ {
+ if (TARGET_MACHO)
+ return false;
+
+ /* Assuming that all other types are naturally aligned. CHECKME! */
+ return true;
+ }
+}
+
/* Handle generic options of the form -mfoo=yes/no.
NAME is the option name.
VALUE is the option value.
return exp;
}
+/* Given an expression EXP that may be a COMPONENT_REF or an ARRAY_REF,
+ look for whether EXP or any nested component-refs within EXP is marked
+ as PACKED. */
+
+bool
+contains_packed_reference (tree exp)
+{
+ bool packed_p = false;
+
+ while (1)
+ {
+ switch (TREE_CODE (exp))
+ {
+ case COMPONENT_REF:
+ {
+ tree field = TREE_OPERAND (exp, 1);
+ packed_p = DECL_PACKED (field)
+ || TYPE_PACKED (TREE_TYPE (field))
+ || TYPE_PACKED (TREE_TYPE (exp));
+ if (packed_p)
+ goto done;
+ }
+ break;
+
+ case BIT_FIELD_REF:
+ case ARRAY_REF:
+ case ARRAY_RANGE_REF:
+ case REALPART_EXPR:
+ case IMAGPART_EXPR:
+ case VIEW_CONVERT_EXPR:
+ break;
+
+ default:
+ goto done;
+ }
+ exp = TREE_OPERAND (exp, 0);
+ }
+ done:
+ return packed_p;
+}
+
/* Return a tree of sizetype representing the size, in bytes, of the element
of EXP, an ARRAY_REF. */
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 0
+#define TARGET_VECTOR_ALIGNMENT_REACHABLE \
+ default_builtin_vector_alignment_reachable
#define TARGET_VECTORIZE \
{ \
TARGET_VECTORIZE_BUILTIN_CONVERSION, \
TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN, \
TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD, \
- TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+ TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST, \
+ TARGET_VECTOR_ALIGNMENT_REACHABLE \
}
#define TARGET_DEFAULT_TARGET_FLAGS 0
/* Returns the cost to be added to the overheads involved with
executing the vectorized version of a loop. */
int (*builtin_vectorization_cost) (bool);
+
+ /* Return true if vector alignment is reachable (by peeling N
+ interations) for the given type. */
+ bool (* vector_alignment_reachable) (tree, bool);
} vectorize;
/* The initial value of target_flags. */
return id;
}
+bool
+default_builtin_vector_alignment_reachable (tree type, bool is_packed)
+{
+ if (is_packed)
+ return false;
+
+ /* Assuming that types whose size is > pointer-size are not guaranteed to be
+ naturally aligned. */
+ if (tree_int_cst_compare (TYPE_SIZE (type), bitsize_int (POINTER_SIZE)) > 0)
+ return false;
+
+ /* Assuming that types whose size is <= pointer-size
+ are naturally aligned. */
+ return true;
+}
+
#include "gt-targhooks.h"
extern tree default_builtin_reciprocal (enum built_in_function, bool, bool);
+extern bool default_builtin_vector_alignment_reachable (tree, bool);
+
/* These are here, and not in hooks.[ch], because not all users of
hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */
+2007-07-12 Dorit Nuzman <dorit@il.ibm.com>
+ Devang Patel <dpatel@apple.com>
+
+ PR tree-optimization/25413
+ * gcc.dg/vect/vect-align-1.c: New.
+ * gcc.dg/vect/vect-align-2.c: New.
+ * gcc.dg/vect/pr25413.c: New.
+ * gcc.dg/vect/pr25413a.c: New.
+ * gcc.dg/vect/pr31699.c: Fix dg-final check.
+
2007-07-12 Nathan Froyd <froydnj@codesourcery.com>
* lib/target-support.exp (check_ultrasparc_hw_available):
--- /dev/null
+/* { dg-require-effective-target vect_double } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 8
+
+struct
+{
+ char c;
+ double d[N];
+} a;
+
+int main1()
+{
+ int i;
+ for ( i=0; i<N; ++i )
+ a.d[i]=1;
+ return 0;
+}
+
+int main (void)
+{
+ int i;
+ check_vect ();
+
+ main1 ();
+ for (i=0; i<N; i++)
+ if (a.d[i] != 1)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vector alignment may not be reachable" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "not vectorized: unsupported unaligned store" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- /dev/null
+/* { dg-require-effective-target vect_double } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 8
+
+typedef unsigned int size_t;
+
+extern void *malloc (size_t __size) __attribute__ ((__nothrow__)) __attribute__ ((__malloc__));
+
+typedef double num_t;
+static const num_t num__infty = ((num_t)1.0)/((num_t)0.0);
+
+struct oct_tt;
+typedef struct oct_tt oct_t;
+
+typedef unsigned int var_t;
+typedef enum {
+ OCT_EMPTY = 0,
+ OCT_NORMAL = 1,
+ OCT_CLOSED = 2
+} oct_state;
+
+struct oct_tt {
+ var_t n;
+
+ int ref;
+
+ oct_state state;
+ struct oct_tt* closed;
+
+ num_t* c;
+};
+
+void* octfapg_mm_malloc (size_t t);
+oct_t* octfapg_alloc (var_t n);
+oct_t* octfapg_full_copy (oct_t* m);
+
+struct mmalloc_tt;
+typedef struct mmalloc_tt mmalloc_t;
+
+struct mmalloc_tt
+{
+ int id;
+
+ int nb_alloc;
+ int nb_realloc;
+ int nb_free;
+
+ size_t rem;
+ size_t max;
+ size_t tot;
+
+};
+
+typedef struct
+{
+ size_t size;
+
+ mmalloc_t* mm;
+ int id;
+
+ double dummy;
+
+} mmheader_t;
+
+void*
+octfapg_mm_malloc (size_t t)
+{
+ char* m = (char*)malloc(t+sizeof(mmheader_t));
+ return m+sizeof(mmheader_t);
+}
+
+oct_t* octfapg_empty (var_t n);
+
+oct_t*
+octfapg_empty (const var_t n)
+{
+ oct_t* m;
+ /*octfapg_timing_enter("oct_empty",3);*/
+ m = ((oct_t*) octfapg_mm_malloc (sizeof(oct_t)));
+ m->n = n;
+ m->ref = 1;
+ m->state = OCT_EMPTY;
+ m->closed = (oct_t*)((void *)0);
+ m->c = (num_t*)((void *)0);
+ /*octfapg_timing_exit("oct_empty",3);*/
+ return m;
+}
+
+oct_t*
+octfapg_alloc (const var_t n)
+{
+ size_t nn = (2*(size_t)(n)*((size_t)(n)+1));
+ oct_t* m;
+ m = octfapg_empty(n);
+ m->c = ((num_t*) octfapg_mm_malloc (sizeof(num_t)*(nn)));
+ ;
+ m->state = OCT_NORMAL;
+ m->closed = (oct_t*)((void *)0);
+ return m;
+}
+
+oct_t*
+octfapg_universe (const var_t n)
+{
+ oct_t* m;
+ size_t i, nn = (2*(size_t)(n)*((size_t)(n)+1));
+ m = octfapg_alloc(n);
+ for (i=0;i<nn;i++) *(m->c+i) = num__infty;
+ for (i=0;i<2*n;i++) *(m->c+((size_t)(i)+(((size_t)(i)+1)*((size_t)(i)+1))/2)) = (num_t)(0);
+ m->state = OCT_CLOSED;
+ return m;
+}
+
+int main (void)
+{
+ int i;
+ check_vect ();
+
+ oct_t *p = octfapg_universe(10);
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vector alignment may not be reachable" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_intfloat_cvt } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "vector alignment may not be reachable" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include "tree-vect.h"
+
+/* Compile time known misalignment. Cannot use loop peeling to align
+ the store. */
+
+#define N 16
+
+struct foo {
+ char x;
+ int y[N];
+} __attribute__((packed));
+
+int
+main1 (struct foo * __restrict__ p)
+{
+ int i;
+ int x[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+ for (i = 0; i < N; i++)
+ {
+ p->y[i] = x[i];
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (p->y[i] != x[i])
+ abort ();
+ }
+ return 0;
+}
+
+
+int main (void)
+{
+ int i;
+ struct foo *p = malloc (2*sizeof (struct foo));
+ check_vect ();
+
+ main1 (p);
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include "tree-vect.h"
+
+/* Compile time unknown misalignment. Cannot use loop peeling to align
+ the store. */
+
+#define N 17
+
+struct foo {
+ char x0;
+ int y[N][N];
+} __attribute__ ((packed));
+
+struct foo f2;
+int z[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+void fbar(struct foo *fp)
+{
+ int i,j;
+ for (i=0; i<N; i++)
+ for (j=0; j<N; j++)
+ f2.y[i][j] = z[i];
+
+ for (i=0; i<N; i++)
+ for (j=0; j<N; j++)
+ if (f2.y[i][j] != z[i])
+ abort ();
+}
+
+int main (void)
+{
+ struct foo *fp = (struct foo *) malloc (2*sizeof (struct foo));
+
+ fbar(fp);
+ return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
#include "tm.h"
#include "ggc.h"
#include "tree.h"
+#include "target.h"
#include "basic-block.h"
#include "diagnostic.h"
#include "tree-flow.h"
}
+/* Function vector_alignment_reachable_p
+
+ Return true if vector alignment for DR is reachable by peeling
+ a few loop iterations. Return false otherwise. */
+
+static bool
+vector_alignment_reachable_p (struct data_reference *dr)
+{
+ tree stmt = DR_STMT (dr);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+
+ if (DR_GROUP_FIRST_DR (stmt_info))
+ {
+ /* For interleaved access we peel only if number of iterations in
+ the prolog loop ({VF - misalignment}), is a multiple of the
+ number of the interleaved accesses. */
+ int elem_size, mis_in_elements;
+ int nelements = TYPE_VECTOR_SUBPARTS (vectype);
+
+ /* FORNOW: handle only known alignment. */
+ if (!known_alignment_for_access_p (dr))
+ return false;
+
+ elem_size = UNITS_PER_SIMD_WORD / nelements;
+ mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
+
+ if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
+ return false;
+ }
+
+ /* If misalignment is known at the compile time then allow peeling
+ only if natural alignment is reachable through peeling. */
+ if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
+ {
+ HOST_WIDE_INT elmsize =
+ int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
+ fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
+ }
+ if (DR_MISALIGNMENT (dr) % elmsize)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "data size does not divide the misalignment.\n");
+ return false;
+ }
+ }
+
+ if (!known_alignment_for_access_p (dr))
+ {
+ tree type = (TREE_TYPE (DR_REF (dr)));
+ tree ba = DR_BASE_OBJECT (dr);
+ bool is_packed = false;
+
+ if (ba)
+ is_packed = contains_packed_reference (ba);
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
+ if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
+ return true;
+ else
+ return false;
+ }
+
+ return true;
+}
+
/* Function vect_enhance_data_refs_alignment
This pass will use loop versioning and loop peeling in order to enhance
if (!DR_IS_READ (dr) && !aligned_access_p (dr))
{
- if (DR_GROUP_FIRST_DR (stmt_info))
- {
- /* For interleaved access we peel only if number of iterations in
- the prolog loop ({VF - misalignment}), is a multiple of the
- number of the interleaved accesses. */
- int elem_size, mis_in_elements;
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- int nelements = TYPE_VECTOR_SUBPARTS (vectype);
-
- /* FORNOW: handle only known alignment. */
- if (!known_alignment_for_access_p (dr))
- {
- do_peeling = false;
- break;
- }
-
- elem_size = UNITS_PER_SIMD_WORD / nelements;
- mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
-
- if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
- {
- do_peeling = false;
- break;
- }
- }
- dr0 = dr;
- do_peeling = true;
+ do_peeling = vector_alignment_reachable_p (dr);
+ if (do_peeling)
+ dr0 = dr;
+ if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vector alignment may not be reachable");
break;
}
}
tree *, enum machine_mode *, int *, int *,
bool);
+/* Given an expression EXP that may be a COMPONENT_REF or an ARRAY_REF,
+ look for whether EXP or any nested component-refs within EXP is marked
+ as PACKED. */
+
+extern bool contains_packed_reference (tree exp);
+
/* Return 1 if T is an expression that get_inner_reference handles. */
extern int handled_component_p (tree);