--- /dev/null
+/* { dg-do run { target lp64 } } */
+/* This is a test exercising peeling for alignment for a negative step
+ vector loop. We're forcing atom tuning here because that has a higher
+ unaligned vs aligned cost unlike most other archs. */
+/* { dg-options "-O3 -march=x86-64 -mtune=atom -fdump-tree-vect-details -save-temps" } */
+
+float a[1024], b[1024];
+
+void __attribute__((noipa)) foo1 ()
+{
+ for (int i = 507; i > 1; --i)
+ a[i] = b[i] * 2.;
+}
+void __attribute__((noipa)) foo2 ()
+{
+ for (int i = 506; i > 1; --i)
+ a[i] = b[i] * 2.;
+}
+void __attribute__((noipa)) foo3 ()
+{
+ for (int i = 505; i > 1; --i)
+ a[i] = b[i] * 2.;
+}
+void __attribute__((noipa)) foo4 ()
+{
+ for (int i = 504; i > 1; --i)
+ a[i] = b[i] * 2.;
+}
+void __attribute__((noipa)) foo5 (int start)
+{
+ for (int i = start; i > 1; --i)
+ a[i] = b[i] * 2.;
+}
+
+int main()
+{
+ for (int i = 2; i < 508; ++i)
+ {
+ __asm__ volatile ("" : : : "memory");
+ b[i] = i;
+ }
+ foo1 ();
+ for (int i = 2; i < 508; ++i)
+ if (a[i] != 2*i)
+ __builtin_abort ();
+
+ for (int i = 2; i < 507; ++i)
+ {
+ __asm__ volatile ("" : : : "memory");
+ b[i] = i;
+ }
+ foo2 ();
+ for (int i = 2; i < 507; ++i)
+ if (a[i] != 2*i)
+ __builtin_abort ();
+
+ for (int i = 2; i < 506; ++i)
+ {
+ __asm__ volatile ("" : : : "memory");
+ b[i] = i;
+ }
+ foo3 ();
+ for (int i = 2; i < 506; ++i)
+ if (a[i] != 2*i)
+ __builtin_abort ();
+
+ for (int i = 2; i < 505; ++i)
+ {
+ __asm__ volatile ("" : : : "memory");
+ b[i] = i;
+ }
+ foo4 ();
+ for (int i = 2; i < 505; ++i)
+ if (a[i] != 2*i)
+ __builtin_abort ();
+
+ for (int i = 2; i < 506; ++i)
+ {
+ __asm__ volatile ("" : : : "memory");
+ b[i] = i;
+ }
+ foo5 (505);
+ for (int i = 2; i < 506; ++i)
+ if (a[i] != 2*i)
+ __builtin_abort ();
+}
+
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 4 "vect" } } */
+/* Verify all vector accesses are emitted as aligned. */
+/* { dg-final { scan-assembler-not "movup" } } */
--- /dev/null
+/* { dg-do run { target lp64 } } */
+/* This is a test exercising peeling for alignment for a positive step
+ vector loop. We're forcing atom tuning here because that has a higher
+ unaligned vs aligned cost unlike most other archs. */
+/* { dg-options "-O3 -march=x86-64 -mtune=atom -fdump-tree-vect-details -save-temps" } */
+
+float a[1024], b[1024];
+
+void __attribute__((noipa)) foo1 ()
+{
+ for (int i = 2; i < 508; ++i)
+ a[i] = b[i] * 2.;
+}
+void __attribute__((noipa)) foo2 ()
+{
+ for (int i = 3; i < 508; ++i)
+ a[i] = b[i] * 2.;
+}
+void __attribute__((noipa)) foo3 ()
+{
+ for (int i = 4; i < 508; ++i)
+ a[i] = b[i] * 2.;
+}
+void __attribute__((noipa)) foo4 ()
+{
+ for (int i = 5; i < 508; ++i)
+ a[i] = b[i] * 2.;
+}
+void __attribute__((noipa)) foo5 (int start)
+{
+ for (int i = start; i < 508; ++i)
+ a[i] = b[i] * 2.;
+}
+
+int main()
+{
+ for (int i = 2; i < 508; ++i)
+ {
+ __asm__ volatile ("" : : : "memory");
+ b[i] = i;
+ }
+ foo1 ();
+ for (int i = 2; i < 508; ++i)
+ if (a[i] != 2*i)
+ __builtin_abort ();
+
+ for (int i = 3; i < 508; ++i)
+ {
+ __asm__ volatile ("" : : : "memory");
+ b[i] = i;
+ }
+ foo2 ();
+ for (int i = 3; i < 508; ++i)
+ if (a[i] != 2*i)
+ __builtin_abort ();
+
+ for (int i = 4; i < 508; ++i)
+ {
+ __asm__ volatile ("" : : : "memory");
+ b[i] = i;
+ }
+ foo3 ();
+ for (int i = 4; i < 508; ++i)
+ if (a[i] != 2*i)
+ __builtin_abort ();
+
+ for (int i = 5; i < 508; ++i)
+ {
+ __asm__ volatile ("" : : : "memory");
+ b[i] = i;
+ }
+ foo4 ();
+ for (int i = 5; i < 508; ++i)
+ if (a[i] != 2*i)
+ __builtin_abort ();
+
+ for (int i = 3; i < 508; ++i)
+ {
+ __asm__ volatile ("" : : : "memory");
+ b[i] = i;
+ }
+ foo5 (3);
+ for (int i = 3; i < 508; ++i)
+ if (a[i] != 2*i)
+ __builtin_abort ();
+}
+
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 4 "vect" } } */
+/* Verify all vector accesses are emitted as aligned. */
+/* { dg-final { scan-assembler-not "movup" } } */
return vect_dr_aligned_if_related_peeled_dr_is (dr_info, dr_peel_info);
}
+/* Compute the value for dr_info->misalign so that the access appears
+ aligned. This is used by peeling to compensate for dr_misalignment
+ applying the offset for negative step. */
+
+int
+vect_dr_misalign_for_aligned_access (dr_vec_info *dr_info)
+{
+ if (tree_int_cst_sgn (DR_STEP (dr_info->dr)) >= 0)
+ return 0;
+
+ tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
+ poly_int64 misalignment
+ = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
+ * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
+
+ unsigned HOST_WIDE_INT target_alignment_c;
+ int misalign;
+ if (!dr_info->target_alignment.is_constant (&target_alignment_c)
+ || !known_misalignment (misalignment, target_alignment_c, &misalign))
+ return DR_MISALIGNMENT_UNKNOWN;
+ return misalign;
+}
+
/* Function vect_update_misalignment_for_peel.
Sets DR_INFO's misalignment
- to 0 if it has the same alignment as DR_PEEL_INFO,
/* If dr_info is aligned of dr_peel_info is, then mark it so. */
if (vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info))
{
- SET_DR_MISALIGNMENT (dr_info, 0);
+ SET_DR_MISALIGNMENT (dr_info,
+ vect_dr_misalign_for_aligned_access (dr_peel_info));
return;
}
tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
if (DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment)
&& known_alignment_for_access_p (dr_info, vectype)
- && known_alignment_for_access_p (dr_peel_info, vectype))
+ && npeel != -1)
{
- int misal = dr_misalignment (dr_info, vectype);
+ int misal = dr_info->misalignment;
misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
misal &= alignment - 1;
set_dr_misalignment (dr_info, misal);
if (npeel == 0)
;
else if (unknown_misalignment && dr_info == dr0_info)
- SET_DR_MISALIGNMENT (dr_info, 0);
+ SET_DR_MISALIGNMENT (dr_info,
+ vect_dr_misalign_for_aligned_access (dr0_info));
else
vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
vect_get_data_access_cost (loop_vinfo, dr_info, inside_cost, outside_cost,
LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
else
LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = -1;
- SET_DR_MISALIGNMENT (dr0_info, 0);
+ SET_DR_MISALIGNMENT (dr0_info,
+ vect_dr_misalign_for_aligned_access (dr0_info));
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
{
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
- SET_DR_MISALIGNMENT (dr_info, 0);
+ SET_DR_MISALIGNMENT (dr_info,
+ vect_dr_misalign_for_aligned_access (dr_info));
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"Alignment of access forced using versioning.\n");