i386.c (ix86_data_alignment): Calculate max_align from prefetch_block tune setting.
authorUros Bizjak <uros@gcc.gnu.org>
Sun, 5 Jan 2014 15:42:07 +0000 (16:42 +0100)
committerUros Bizjak <uros@gcc.gnu.org>
Sun, 5 Jan 2014 15:42:07 +0000 (16:42 +0100)
* config/i386/i386.c (ix86_data_alignment): Calculate max_align
from prefetch_block tune setting.
(nocona_cost): Correct size of prefetch block to 64.

From-SVN: r206345

gcc/ChangeLog
gcc/ChangeLog-2013
gcc/config/i386/i386.c

index 83b39e2..600c24b 100644 (file)
@@ -1,3 +1,9 @@
+2014-01-05  Uros Bizjak  <ubizjak@gmail.com>
+
+       * config/i386/i386.c (ix86_data_alignment): Calculate max_align
+       from prefetch_block tune setting.
+       (nocona_cost): Correct size of prefetch block to 64.
+
 2014-01-04  Eric Botcazou  <ebotcazou@adacore.com>
 
        * config/arm/arm.c (arm_get_frame_offsets): Revamp long lines.
@@ -24,7 +30,7 @@
 2014-01-03  Bingfeng Mei  <bmei@broadcom.com>
 
        PR tree-optimization/59651
-       * tree-vect-loop-manip.c (vect_create_cond_for_alias_checks): 
+       * tree-vect-loop-manip.c (vect_create_cond_for_alias_checks):
        Address range for negative step should be added by TYPE_SIZE_UNIT.
 
 2014-01-03  Andreas Schwab  <schwab@linux-m68k.org>
 
 2014-01-02  Richard Sandiford  <rdsandiford@googlemail.com>
 
-       Update copyright years
+       Update copyright years.
 
 2014-01-02  Richard Sandiford  <rdsandiford@googlemail.com>
 
        * common/config/arc/arc-common.c, config/arc/arc-modes.def,
        config/arc/arc-protos.h, config/arc/arc.c, config/arc/arc.h,
-       config/arc/arc.md, config/arc/arc.opt, config/arm/arm_neon_builtins.def,
-       config/arm/crypto.def, config/i386/avx512cdintrin.h,
-       config/i386/avx512erintrin.h, config/i386/avx512fintrin.h,
-       config/i386/avx512pfintrin.h, config/i386/btver2.md,
-       config/i386/shaintrin.h, config/i386/slm.md, config/linux-protos.h,
-       config/linux.c, config/winnt-c.c, diagnostic-color.c,
-       diagnostic-color.h, gimple-ssa-isolate-paths.c, vtable-verify.c,
-       vtable-verify.h: Use the standard form for the copyright notice.
+       config/arc/arc.md, config/arc/arc.opt,
+       config/arm/arm_neon_builtins.def, config/arm/crypto.def,
+       config/i386/avx512cdintrin.h, config/i386/avx512erintrin.h,
+       config/i386/avx512fintrin.h, config/i386/avx512pfintrin.h,
+       config/i386/btver2.md, config/i386/shaintrin.h, config/i386/slm.md,
+       config/linux-protos.h, config/linux.c, config/winnt-c.c,
+       diagnostic-color.c, diagnostic-color.h, gimple-ssa-isolate-paths.c,
+       vtable-verify.c, vtable-verify.h: Use the standard form for the
+       copyright notice.
 
 2014-01-02  Tobias Burnus  <burnus@net-b.de>
 
index 2ffd959..2c11878 100644 (file)
        (_mm_roundscale_ss): Ditto.
        (_mm_roundscale_sd): Ditto.
        * config/i386/i386-builtin-types.def: New types to support
-       new built-ins: <V2DF, V2DF, V2DF, INT, INT>, <V4SF, V4SF, V4SF, INT, INT>,
-       <(V4SF, V4SF, V2DF, INT>, <V2DF, V2DF, V4SF, INT>,
-       <V4SF, V4SF, V4SF, V4SF, IN>.
-       * config/i386/i386.c (enum ix86_builtins): Add IX86_BUILTIN_ADDSD_ROUND,
-       IX86_BUILTIN_ADDSS_ROUND, IX86_BUILTIN_CVTSD2SS_ROUND,
-       IX86_BUILTIN_CVTSS2SD_ROUND, IX86_BUILTIN_DIVSD_ROUND,
-       IX86_BUILTIN_GETEXPSD128, IX86_BUILTIN_DIVSS_ROUND,
-       IX86_BUILTIN_GETEXPSS128, IX86_BUILTIN_GETMANTSD128,
-       IX86_BUILTIN_GETMANTSS128, IX86_BUILTIN_MAXSD_ROUND,
-       IX86_BUILTIN_MAXSS_ROUND, IX86_BUILTIN_MINSD_ROUND,
-       IX86_BUILTIN_MINSS_ROUND, IX86_BUILTIN_MULSD_ROUND,
-       IX86_BUILTIN_MULSS_ROUND, IX86_BUILTIN_RCP14SD,
-       IX86_BUILTIN_RCP14SS, IX86_BUILTIN_RNDSCALESD,
+       new built-ins: <V2DF, V2DF, V2DF, INT, INT>,
+       <V4SF, V4SF, V4SF, INT, INT>, <(V4SF, V4SF, V2DF, INT>,
+       <V2DF, V2DF, V4SF, INT>, <V4SF, V4SF, V4SF, V4SF, IN>.
+       * config/i386/i386.c (enum ix86_builtins): Add
+       IX86_BUILTIN_ADDSD_ROUND, IX86_BUILTIN_ADDSS_ROUND,
+       IX86_BUILTIN_CVTSD2SS_ROUND, IX86_BUILTIN_CVTSS2SD_ROUND,
+       IX86_BUILTIN_DIVSD_ROUND, IX86_BUILTIN_GETEXPSD128,
+       IX86_BUILTIN_DIVSS_ROUND, IX86_BUILTIN_GETEXPSS128,
+       IX86_BUILTIN_GETMANTSD128, IX86_BUILTIN_GETMANTSS128,
+       IX86_BUILTIN_MAXSD_ROUND, IX86_BUILTIN_MAXSS_ROUND,
+       IX86_BUILTIN_MINSD_ROUND, IX86_BUILTIN_MINSS_ROUND,
+       IX86_BUILTIN_MULSD_ROUND, IX86_BUILTIN_MULSS_ROUND,
+       IX86_BUILTIN_RCP14SD, IX86_BUILTIN_RCP14SS, IX86_BUILTIN_RNDSCALESD,
        IX86_BUILTIN_RNDSCALESS, IX86_BUILTIN_RSQRT14SD,
        IX86_BUILTIN_RSQRT14SS, IX86_BUILTIN_SCALEFSD,
        IX86_BUILTIN_SCALEFSS, IX86_BUILTIN_SQRTSD_ROUND,
        (ix86_expand_int_vcond): Ditto.
        (ix86_expand_vec_perm): Ditto.
        (ix86_expand_sse_unpack): Ditto.
-       (ix86_constant_alignment): Ditto.
        (ix86_builtin_vectorized_function): Ditto.
        (ix86_vectorize_builtin_gather): Ditto.
        (avx_vpermilp_parallel): Ditto.
        (ix86_autovectorize_vector_sizes): Ditto.
        (ix86_expand_vec_perm_vpermi2): New.
        (ix86_vector_duplicate_value): Ditto.
-       (IX86_BUILTIN_SQRTPD512, IX86_BUILTIN_EXP2PS, IX86_BUILTIN_SQRTPS_NR512,
-       IX86_BUILTIN_GATHER3ALTDIV16SF, IX86_BUILTIN_GATHER3ALTDIV16SI,
-       IX86_BUILTIN_GATHER3ALTSIV8DF, IX86_BUILTIN_GATHER3ALTSIV8DI,
-       IX86_BUILTIN_GATHER3DIV16SF, IX86_BUILTIN_GATHER3DIV16SI,
-       IX86_BUILTIN_GATHER3DIV8DF, IX86_BUILTIN_GATHER3DIV8DI,
-       IX86_BUILTIN_GATHER3SIV16SF, IX86_BUILTIN_GATHER3SIV16SI,
-       IX86_BUILTIN_GATHER3SIV8DF, IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
-       IX86_BUILTIN_CPYSGNPS512, IX86_BUILTIN_CPYSGNPD512,
-       IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
+       (IX86_BUILTIN_SQRTPD512, IX86_BUILTIN_EXP2PS,
+       IX86_BUILTIN_SQRTPS_NR512, IX86_BUILTIN_GATHER3ALTDIV16SF,
+       IX86_BUILTIN_GATHER3ALTDIV16SI, IX86_BUILTIN_GATHER3ALTSIV8DF,
+       IX86_BUILTIN_GATHER3ALTSIV8DI, IX86_BUILTIN_GATHER3DIV16SF,
+       IX86_BUILTIN_GATHER3DIV16SI, IX86_BUILTIN_GATHER3DIV8DF,
+       IX86_BUILTIN_GATHER3DIV8DI, IX86_BUILTIN_GATHER3SIV16SF,
+       IX86_BUILTIN_GATHER3SIV16SI, IX86_BUILTIN_GATHER3SIV8DF,
+       IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, IX86_BUILTIN_CPYSGNPS512,
+       IX86_BUILTIN_CPYSGNPD512, IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
        IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512): Ditto.
        * config/i386/sse.md (*mov<mode>_internal): Disable SSE typeless
        stores vectors > 128bit (AVX*).
        * config/nios2/constraints.md: New file.
        * config/nios2/t-nios2: New file.
        * common/config/nios2/nios2-common.c: New file.
-       * doc/invoke.texi (Nios II options): Document Nios II specific
-       options.
-       * doc/md.texi (Nios II family): Document Nios II specific
-       constraints.
+       * doc/invoke.texi (Nios II options): Document Nios II specific options.
+       * doc/md.texi (Nios II family): Document Nios II specific constraints.
        * doc/extend.texi (Function Specific Option Pragmas): Document
        Nios II supported target pragma functionality.
 
 
 2013-12-26  Ganesh Gopalasubramanian  <Ganesh.Gopalasubramanian@amd.com>
 
-       * config/i386/i386.c (get_builtin_code_for_version): Rename AMD 
-       CPU names M_AMD_BOBCAT to M_AMD_BTVER1 and M_AMD_JAGUAR 
+       * config/i386/i386.c (get_builtin_code_for_version): Rename AMD
+       CPU names M_AMD_BOBCAT to M_AMD_BTVER1 and M_AMD_JAGUAR
        to M_AMD_BTVER2.
        (processor_model): Likewise.
        (arch_names_table): Likewise.
index 1fc68e1..39891c9 100644 (file)
@@ -1568,7 +1568,7 @@ struct processor_costs nocona_cost = {
   8,                                   /* MMX or SSE register to integer */
   8,                                   /* size of l1 cache.  */
   1024,                                        /* size of l2 cache.  */
-  128,                                 /* size of prefetch block */
+  64,                                  /* size of prefetch block */
   8,                                   /* number of parallel prefetches */
   1,                                   /* Branch cost */
   COSTS_N_INSNS (6),                   /* cost of FADD and FSUB insns.  */
@@ -26465,8 +26465,16 @@ ix86_constant_alignment (tree exp, int align)
 int
 ix86_data_alignment (tree type, int align, bool opt)
 {
-  int max_align = optimize_size ? BITS_PER_WORD
-                               : MIN (512, MAX_OFILE_ALIGNMENT);
+  /* A data structure, equal or greater than the size of a cache line
+     (64 bytes in the Pentium 4 and other recent Intel processors, including
+     processors based on Intel Core microarchitecture) should be aligned
+     so that its base address is a multiple of a cache line size.  */
+
+  int max_align
+    = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
+
+  if (max_align < BITS_PER_WORD)
+    max_align = BITS_PER_WORD;
 
   if (opt
       && AGGREGATE_TYPE_P (type)