config.gcc: Revamp target_cpu_default2 to strings; support new x86 variants.
authorJan Hubicka <hubicka@gcc.gnu.org>
Fri, 14 Dec 2001 20:27:05 +0000 (20:27 +0000)
committerJan Hubicka <hubicka@gcc.gnu.org>
Fri, 14 Dec 2001 20:27:05 +0000 (20:27 +0000)
* config.gcc: Revamp target_cpu_default2 to strings;
support new x86 variants.
* i386.c (override_options): Default x86_cpu_string and x86_arch_string
properly; set prefetch_sse.
* i386.h (x86_prefetch_sse): Declare.
(TARGET_PREFETCH_SSE): New.
(CPP_CPU_DEFAULT_SPEC): Define according to the new macros.
(TARGET_CPU_DEFAULT_*): New.
* config/i386/i386.h (struct processor_costs): Add new members
  prefetch_block and simultaneous_prefetches.
  (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES): New.
* config/i386/i386.c (processor_costs structs): Add values for
  prefetch_block and simultaneous_prefetches.
* config/i386/i386.md (unspec values): Remove values for prefetch
  operations, which now use the PREFETCH rtx code.
  (prefetch_sse, prefetch_3dnow, prefetchw): Combine to use new
  unified prefetch support.

From-SVN: r48006

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/i386.h
gcc/config/i386/i386.md

index 7eb9f77..63d2845 100644 (file)
@@ -1,3 +1,27 @@
+Fri Dec 14 21:23:54 CET 2001  Jan Hubicka  <jh@suse.cz>
+
+       * config.gcc: Revamp target_cpu_default2 to strings;
+       support new x86 variants.
+       * i386.c (override_options): Default x86_cpu_string and x86_arch_string
+       properly; set prefetch_sse.
+       * i386.h (x86_prefetch_sse): Declare.
+       (TARGET_PREFETCH_SSE): New.
+       (CPP_CPU_DEFAULT_SPEC): Define according to the new macros.
+       (TARGET_CPU_DEFAULT_*): New.
+
+Thu Dec 13 21:57:13 CET 2001  Janis Johnson <janis187@us.ibm.com>
+                             Jan Hubicka  <jh@suse.cz>
+
+       * config/i386/i386.h (struct processor_costs): Add new members
+         prefetch_block and simultaneous_prefetches.
+         (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES): New.
+       * config/i386/i386.c (processor_costs structs): Add values for
+         prefetch_block and simultaneous_prefetches.
+       * config/i386/i386.md (unspec values): Remove values for prefetch
+         operations, which now use the PREFETCH rtx code.
+         (prefetch_sse, prefetch_3dnow, prefetchw): Combine to use new
+         unified prefetch support.
+
 2001-12-14  Jason Merrill  <jason@redhat.com>
 
        * diagnostic.c (sorry): Increment sorrycount before saving the
index 55113a5..aa65281 100644 (file)
@@ -81,6 +81,8 @@ struct processor_costs size_cost = {  /* costs for tunning for size */
   {3, 3, 3},                           /* cost of storing SSE registers
                                           in SImode, DImode and TImode */
   3,                                   /* MMX or SSE register to integer */
+  0,                                   /* size of prefetch block */
+  0,                                   /* number of parallel prefetches */
 };
 /* Processor costs (relative to an add) */
 static const 
@@ -116,6 +118,8 @@ struct processor_costs i386_cost = {        /* 386 specific costs */
   {4, 8, 16},                          /* cost of storing SSE registers
                                           in SImode, DImode and TImode */
   3,                                   /* MMX or SSE register to integer */
+  0,                                   /* size of prefetch block */
+  0,                                   /* number of parallel prefetches */
 };
 
 static const 
@@ -150,7 +154,9 @@ struct processor_costs i486_cost = {        /* 486 specific costs */
                                           in SImode, DImode and TImode */
   {4, 8, 16},                          /* cost of storing SSE registers
                                           in SImode, DImode and TImode */
-  3                                    /* MMX or SSE register to integer */
+  3,                                   /* MMX or SSE register to integer */
+  0,                                   /* size of prefetch block */
+  0,                                   /* number of parallel prefetches */
 };
 
 static const 
@@ -185,7 +191,9 @@ struct processor_costs pentium_cost = {
                                           in SImode, DImode and TImode */
   {4, 8, 16},                          /* cost of storing SSE registers
                                           in SImode, DImode and TImode */
-  3                                    /* MMX or SSE register to integer */
+  3,                                   /* MMX or SSE register to integer */
+  0,                                   /* size of prefetch block */
+  0,                                   /* number of parallel prefetches */
 };
 
 static const 
@@ -220,7 +228,9 @@ struct processor_costs pentiumpro_cost = {
                                           in SImode, DImode and TImode */
   {2, 2, 8},                           /* cost of storing SSE registers
                                           in SImode, DImode and TImode */
-  3                                    /* MMX or SSE register to integer */
+  3,                                   /* MMX or SSE register to integer */
+  32,                                  /* size of prefetch block */
+  6,                                   /* number of parallel prefetches */
 };
 
 static const 
@@ -255,7 +265,9 @@ struct processor_costs k6_cost = {
                                           in SImode, DImode and TImode */
   {2, 2, 8},                           /* cost of storing SSE registers
                                           in SImode, DImode and TImode */
-  6                                    /* MMX or SSE register to integer */
+  6,                                   /* MMX or SSE register to integer */
+  32,                                  /* size of prefetch block */
+  1,                                   /* number of parallel prefetches */
 };
 
 static const 
@@ -290,7 +302,9 @@ struct processor_costs athlon_cost = {
                                           in SImode, DImode and TImode */
   {2, 2, 8},                           /* cost of storing SSE registers
                                           in SImode, DImode and TImode */
-  6                                    /* MMX or SSE register to integer */
+  6,                                   /* MMX or SSE register to integer */
+  64,                                  /* size of prefetch block */
+  6,                                   /* number of parallel prefetches */
 };
 
 static const 
@@ -326,6 +340,8 @@ struct processor_costs pentium4_cost = {
   {2, 2, 8},                           /* cost of storing SSE registers
                                           in SImode, DImode and TImode */
   10,                                  /* MMX or SSE register to integer */
+  64,                                  /* size of prefetch block */
+  6,                                   /* number of parallel prefetches */
 };
 
 const struct processor_costs *ix86_cost = &pentium_cost;
@@ -592,6 +608,9 @@ const char *ix86_fpmath_string;             /* for -mfpmath=<xxx> */
 /* # of registers to use to pass arguments.  */
 const char *ix86_regparm_string;
 
+/* true if sse prefetch instruction is not NOOP.  */
+int x86_prefetch_sse;
+
 /* ix86_regparm_string as a number */
 int ix86_regparm;
 
@@ -817,6 +836,7 @@ override_options ()
       {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
     };
 
+  static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
   static struct pta
     {
       const char *const name;          /* processor name or nickname.  */
@@ -826,7 +846,7 @@ override_options ()
          PTA_SSE = 1,
          PTA_SSE2 = 2,
          PTA_MMX = 4,
-         PTA_SSEPREFETCH = 8,
+         PTA_PREFETCH_SSE = 8,
          PTA_3DNOW = 16,
          PTA_3DNOW_A = 64
        } flags;
@@ -841,21 +861,21 @@ override_options ()
       {"i686", PROCESSOR_PENTIUMPRO, 0},
       {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
       {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
-      {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSEPREFETCH},
+      {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
       {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
-                                      PTA_MMX | PTA_SSEPREFETCH},
+                                      PTA_MMX | PTA_PREFETCH_SSE},
       {"k6", PROCESSOR_K6, PTA_MMX},
       {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
       {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
-      {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH | PTA_3DNOW
+      {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
                                   | PTA_3DNOW_A},
-      {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH
+      {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
                                         | PTA_3DNOW | PTA_3DNOW_A},
-      {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH | PTA_3DNOW
+      {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
                                    | PTA_3DNOW_A | PTA_SSE},
-      {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH | PTA_3DNOW
+      {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
                                      | PTA_3DNOW_A | PTA_SSE},
-      {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH | PTA_3DNOW
+      {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
                                      | PTA_3DNOW_A | PTA_SSE},
     };
 
@@ -865,8 +885,12 @@ override_options ()
   SUBTARGET_OVERRIDE_OPTIONS;
 #endif
 
-  ix86_arch = PROCESSOR_I386;
-  ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
+  if (!ix86_cpu_string && ix86_arch_string)
+    ix86_cpu_string = ix86_arch_string;
+  if (!ix86_cpu_string)
+    ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
+  if (!ix86_arch_string)
+    ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
 
   if (ix86_cmodel_string != 0)
     {
@@ -900,47 +924,45 @@ override_options ()
     sorry ("%i-bit mode not compiled in",
           (target_flags & MASK_64BIT) ? 64 : 32);
 
-  if (ix86_arch_string != 0)
-    {
-      for (i = 0; i < pta_size; i++)
-       if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
-         {
-           ix86_arch = processor_alias_table[i].processor;
-           /* Default cpu tuning to the architecture.  */
-           ix86_cpu = ix86_arch;
-           if (processor_alias_table[i].flags & PTA_MMX
-               && !(target_flags & MASK_MMX_SET))
-             target_flags |= MASK_MMX;
-           if (processor_alias_table[i].flags & PTA_3DNOW
-               && !(target_flags & MASK_3DNOW_SET))
-             target_flags |= MASK_3DNOW;
-           if (processor_alias_table[i].flags & PTA_3DNOW_A
-               && !(target_flags & MASK_3DNOW_A_SET))
-             target_flags |= MASK_3DNOW_A;
-           if (processor_alias_table[i].flags & PTA_SSE
-               && !(target_flags & MASK_SSE_SET))
-             target_flags |= MASK_SSE;
-           if (processor_alias_table[i].flags & PTA_SSE2
-               && !(target_flags & MASK_SSE2_SET))
-             target_flags |= MASK_SSE2;
-           break;
-         }
+  for (i = 0; i < pta_size; i++)
+    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
+      {
+       ix86_arch = processor_alias_table[i].processor;
+       /* Default cpu tuning to the architecture.  */
+       ix86_cpu = ix86_arch;
+       if (processor_alias_table[i].flags & PTA_MMX
+           && !(target_flags & MASK_MMX_SET))
+         target_flags |= MASK_MMX;
+       if (processor_alias_table[i].flags & PTA_3DNOW
+           && !(target_flags & MASK_3DNOW_SET))
+         target_flags |= MASK_3DNOW;
+       if (processor_alias_table[i].flags & PTA_3DNOW_A
+           && !(target_flags & MASK_3DNOW_A_SET))
+         target_flags |= MASK_3DNOW_A;
+       if (processor_alias_table[i].flags & PTA_SSE
+           && !(target_flags & MASK_SSE_SET))
+         target_flags |= MASK_SSE;
+       if (processor_alias_table[i].flags & PTA_SSE2
+           && !(target_flags & MASK_SSE2_SET))
+         target_flags |= MASK_SSE2;
+       if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
+         x86_prefetch_sse = true;
+       break;
+      }
 
-      if (i == pta_size)
-       error ("bad value (%s) for -march= switch", ix86_arch_string);
-    }
+  if (i == pta_size)
+    error ("bad value (%s) for -march= switch", ix86_arch_string);
 
-  if (ix86_cpu_string != 0)
-    {
-      for (i = 0; i < pta_size; i++)
-       if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
-         {
-           ix86_cpu = processor_alias_table[i].processor;
-           break;
-         }
-      if (i == pta_size)
-       error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
-    }
+  for (i = 0; i < pta_size; i++)
+    if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
+      {
+       ix86_cpu = processor_alias_table[i].processor;
+       break;
+      }
+  if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
+    x86_prefetch_sse = true;
+  if (i == pta_size)
+    error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
 
   if (optimize_size)
     ix86_cost = &size_cost;
@@ -11857,22 +11879,13 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
       return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
 
     case IX86_BUILTIN_PREFETCH_3DNOW:
-      icode = CODE_FOR_prefetch_3dnow;
-      arg0 = TREE_VALUE (arglist);
-      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
-      mode0 = insn_data[icode].operand[0].mode;
-      pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
-      if (! pat)
-        return NULL_RTX;
-      emit_insn (pat);
-      return NULL_RTX;
-
     case IX86_BUILTIN_PREFETCHW:
-      icode = CODE_FOR_prefetchw;
+      icode = CODE_FOR_prefetch_3dnow;
       arg0 = TREE_VALUE (arglist);
       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = (fcode == IX86_BUILTIN_PREFETCH_3DNOW ? const0_rtx : const1_rtx);
       mode0 = insn_data[icode].operand[0].mode;
-      pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
+      pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0), op1);
       if (! pat)
         return NULL_RTX;
       emit_insn (pat);
index b6e567e..cfbb10b 100644 (file)
@@ -86,6 +86,9 @@ struct processor_costs {
                                   in SImode, DImode and TImode*/
   const int mmxsse_to_integer; /* cost of moving mmxsse register to
                                   integer and vice versa.  */
+  const int prefetch_block;    /* bytes moved to cache for prefetch.  */
+  const int simultaneous_prefetches; /* number of parallel prefetch
+                                  operations.  */
 };
 
 extern const struct processor_costs *ix86_cost;
@@ -224,6 +227,7 @@ extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
 extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall;
 extern const int x86_accumulate_outgoing_args, x86_prologue_using_move;
 extern const int x86_epilogue_using_move, x86_decompose_lea;
+extern int x86_prefetch_sse;
 
 #define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
 #define TARGET_PUSH_MEMORY (x86_push_memory & CPUMASK)
@@ -262,6 +266,7 @@ extern const int x86_epilogue_using_move, x86_decompose_lea;
 #define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & CPUMASK)
 #define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & CPUMASK)
 #define TARGET_DECOMPOSE_LEA (x86_decompose_lea & CPUMASK)
+#define TARGET_PREFETCH_SSE (x86_prefetch_sse)
 
 #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
 
@@ -480,24 +485,61 @@ extern int ix86_arch;
 %n`-mpentiumpro' is deprecated. Use `-march=pentiumpro' or `-mcpu=pentiumpro' instead.\n}}"
 #endif
 \f
+#define TARGET_CPU_DEFAULT_i386 0
+#define TARGET_CPU_DEFAULT_i486 1
+#define TARGET_CPU_DEFAULT_pentium 2
+#define TARGET_CPU_DEFAULT_pentiumpro 3
+#define TARGET_CPU_DEFAULT_pentium2 4
+#define TARGET_CPU_DEFAULT_pentium3 5
+#define TARGET_CPU_DEFAULT_pentium4 6
+#define TARGET_CPU_DEFAULT_k6 7
+#define TARGET_CPU_DEFAULT_k6_2 8
+#define TARGET_CPU_DEFAULT_k6_3 9
+#define TARGET_CPU_DEFAULT_athlon 10
+#define TARGET_CPU_DEFAULT_athlon_sse 11
+
+#define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",\
+                                 "pentiumpro", "pentium2", "pentium3", \
+                                 "pentium4", "k6", "k6-2", "k6-3",\
+                                 "athlon", "athlon-4"}
 #ifndef CPP_CPU_DEFAULT_SPEC
-#if TARGET_CPU_DEFAULT == 1
+#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_i486
 #define CPP_CPU_DEFAULT_SPEC "-D__tune_i486__"
 #endif
-#if TARGET_CPU_DEFAULT == 2
+#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentium
 #define CPP_CPU_DEFAULT_SPEC "-D__tune_i586__ -D__tune_pentium__"
 #endif
-#if TARGET_CPU_DEFAULT == 3
+#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentium_mmx
+#define CPP_CPU_DEFAULT_SPEC "-D__tune_i586__ -D__tune_pentium__ -D__tune_pentium_mmx__"
+#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentiumpro
 #define CPP_CPU_DEFAULT_SPEC "-D__tune_i686__ -D__tune_pentiumpro__"
 #endif
-#if TARGET_CPU_DEFAULT == 4
+#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentium2
+#define CPP_CPU_DEFAULT_SPEC "-D__tune_i686__ -D__tune_pentiumpro__\
+-D__tune_pentium2__"
+#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentium3
+#define CPP_CPU_DEFAULT_SPEC "-D__tune_i686__ -D__tune_pentiumpro__\
+-D__tune_pentium2__ -D__tune_pentium3__"
+#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentium4
+#define CPP_CPU_DEFAULT_SPEC "-D__tune_pentium4__"
+#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_k6
 #define CPP_CPU_DEFAULT_SPEC "-D__tune_k6__"
 #endif
-#if TARGET_CPU_DEFAULT == 5
+#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_k6_2
+#define CPP_CPU_DEFAULT_SPEC "-D__tune_k6__ -D__tune_k6_2__"
+#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_k6_3
+#define CPP_CPU_DEFAULT_SPEC "-D__tune_k6__ -D__tune_k6_3__"
+#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_athlon
 #define CPP_CPU_DEFAULT_SPEC "-D__tune_athlon__"
 #endif
-#if TARGET_CPU_DEFAULT == 6
-#define CPP_CPU_DEFAULT_SPEC "-D__tune_pentium4__"
+#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_athlon_sse
+#define CPP_CPU_DEFAULT_SPEC "-D__tune_athlon__ -D__tune_athlon_sse__"
 #endif
 #ifndef CPP_CPU_DEFAULT_SPEC
 #define CPP_CPU_DEFAULT_SPEC "-D__tune_i386__"
@@ -531,30 +573,45 @@ extern int ix86_arch;
 %{march=i486:-D__i486 -D__i486__ %{!mcpu*:-D__tune_i486__ }}\
 %{march=pentium|march=i586:-D__i586 -D__i586__ -D__pentium -D__pentium__ \
   %{!mcpu*:-D__tune_i586__ -D__tune_pentium__ }}\
+%{march=pentium-mmx:-D__i586 -D__i586__ -D__pentium -D__pentium__ \
+  -D__pentium__mmx__ \
+  %{!mcpu*:-D__tune_i586__ -D__tune_pentium__ -D__tune_pentium_mmx__}}\
 %{march=pentiumpro|march=i686:-D__i686 -D__i686__ \
   -D__pentiumpro -D__pentiumpro__ \
   %{!mcpu*:-D__tune_i686__ -D__tune_pentiumpro__ }}\
 %{march=k6:-D__k6 -D__k6__ %{!mcpu*:-D__tune_k6__ }}\
-%{march=athlon:-D__athlon -D__athlon__ %{!mcpu*:-D__tune_athlon__ }}\
+%{march=k6-2:-D__k6 -D__k6__ -D__k6_2__ \
+  %{!mcpu*:-D__tune_k6__ -D__tune_k6_2__ }}\
+%{march=k6-3:-D__k6 -D__k6__ -D__k6_3__ \
+  %{!mcpu*:-D__tune_k6__ -D__tune_k6_3__ }}\
+%{march=athlon|march=athlon-tbird:-D__athlon -D__athlon__ \
+  %{!mcpu*:-D__tune_athlon__ }}\
+%{march=athlon-4|march=athlon-xp|march=athlon-mp:-D__athlon -D__athlon__ \
+  -D__athlon_sse__ \
+  %{!mcpu*:-D__tune_athlon__ -D__tune_athlon_sse__ }}\
 %{march=pentium4:-D__pentium4 -D__pentium4__ %{!mcpu*:-D__tune_pentium4__ }}\
 %{m386|mcpu=i386:-D__tune_i386__ }\
 %{m486|mcpu=i486:-D__tune_i486__ }\
 %{mpentium|mcpu=pentium|mcpu=i586|mcpu=pentium-mmx:-D__tune_i586__ -D__tune_pentium__ }\
-%{mpentiumpro|mcpu=pentiumpro|mcpu=i686|cpu=pentium2|cpu=pentium3:-D__tune_i686__\
+%{mpentiumpro|mcpu=pentiumpro|mcpu=i686|cpu=pentium2|cpu=pentium3:-D__tune_i686__ \
 -D__tune_pentiumpro__ }\
 %{mcpu=k6|mcpu=k6-2|mcpu=k6-3:-D__tune_k6__ }\
 %{mcpu=athlon|mcpu=athlon-tbird|mcpu=athlon-4|mcpu=athlon-xp|mcpu=athlon-mp:\
 -D__tune_athlon__ }\
+%{mcpu=athlon-4|mcpu=athlon-xp|mcpu=athlon-mp:\
+-D__tune_athlon_sse__ }\
 %{mcpu=pentium4:-D__tune_pentium4__ }\
 %{march=march=athlon-tbird|march=athlon-xp|march=athlon-mp|march=pentium3|march=pentium4:\
 -D__SSE__ }\
 %{march=pentium-mmx|march=k6|march=k6-2|march=k6-3\
 march=athlon|march=athlon-tbird|march=athlon-4|march=athlon-xp\
 |march=athlon-mp|march=pentium2|march=pentium3|march=pentium4: -D__MMX__ }\
-%{march=k6|march=k6-2|march=k6-3\
+%{march=k6-2|march=k6-3\
 march=athlon|march=athlon-tbird|march=athlon-4|march=athlon-xp\
 |march=athlon-mp: -D__3dNOW__ }\
-%{mcpu=mcpu=pentium4: -D__SSE2__ }\
+%{march=athlon|march=athlon-tbird|march=athlon-4|march=athlon-xp\
+|march=athlon-mp: -D__3dNOW_A__ }\
+%{march=mcpu=pentium4: -D__SSE2__ }\
 %{!march*:%{!mcpu*:%{!m386:%{!m486:%{!mpentium*:%(cpp_cpu_default)}}}}}"
 
 #ifndef CPP_CPU_SPEC
@@ -2261,6 +2318,12 @@ while (0)
 /* Define this as 1 if `char' should by default be signed; else as 0.  */
 #define DEFAULT_SIGNED_CHAR 1
 
+/* Number of bytes moved into a data cache for a single prefetch operation.  */
+#define PREFETCH_BLOCK ix86_cost->prefetch_block
+
+/* Number of prefetch operations that can be done in parallel.  */
+#define SIMULTANEOUS_PREFETCHES ix86_cost->simultaneous_prefetches
+
 /* Max number of bytes we can move from memory to memory
    in one reasonably fast instruction.  */
 #define MOVE_MAX 16
index eee94a7..3da4cab 100644 (file)
@@ -93,8 +93,6 @@
 ;; 44 This is a `sfence' operation.
 ;; 45 This is a noop to prevent excessive combiner cleverness.
 ;; 46 This is a `femms' operation.
-;; 47 This is a `prefetch' (3DNow) operation.
-;; 48 This is a `prefetchw' operation.
 ;; 49 This is a 'pavgusb' operation.
 ;; 50 This is a `pfrcp' operation.
 ;; 51 This is a `pfrcpit1' operation.
   [(set_attr "type" "sse")
    (set_attr "memory" "unknown")])
 
+(define_expand "prefetch"
+  [(prefetch (match_operand:SI 0 "address_operand" "p")
+            (match_operand:SI 1 "const_int_operand" "n")
+            (match_operand:SI 2 "const_int_operand" "n"))]
+  "TARGET_PREFETCH_SSE || TARGET_3DNOW"
+  "
+{
+  int rw = INTVAL (operands[1]);
+  int locality = INTVAL (operands[2]);
+  if (rw != 0 && rw != 1)
+    abort ();
+  if (locality < 0 || locality > 3)
+    abort ();
+  /* Use 3dNOW prefetch in case we are asking for write prefetch not
+     suported by SSE counterpart or the SSE prefetch is not available
+     (K6 machines).  Otherwise use SSE prefetch as it allows specifying
+     of locality.  */
+  if (TARGET_3DNOW
+       && (!TARGET_PREFETCH_SSE || rw))
+    {
+      emit_insn (gen_prefetch_3dnow (operands[0], operands[1]));
+    }
+  else
+    {
+      int i;
+      switch (locality)
+       {
+         case 0:       /* No temporal locality.  */
+           i = 0;
+           break;
+         case 1:       /* Lowest level of temporal locality.  */
+           i = 3;
+           break;
+         case 2:       /* Moderate level of temporal locality.  */
+           i = 2;
+           break;
+         case 3:       /* Highest level of temporal locality.  */
+           i = 1;
+           break;
+         default:
+           abort ();   /* We already checked for valid values above.  */
+           break;
+       }
+      emit_insn (gen_prefetch_sse (operands[0], GEN_INT (i)));
+    }
+  DONE;
+}")
+
 (define_insn "prefetch_sse"
   [(unspec [(match_operand:SI 0 "address_operand" "p")
            (match_operand:SI 1 "immediate_operand" "n")] 35)]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "TARGET_PREFETCH_SSE"
 {
   switch (INTVAL (operands[1]))
     {
   [(set_attr "type" "mmx")])
 
 (define_insn "prefetch_3dnow"
-  [(unspec [(match_operand:SI 0 "address_operand" "p")] 47)]
-  "TARGET_3DNOW"
-  "prefetch\\t%a0"
-  [(set_attr "type" "mmx")])
-
-(define_insn "prefetchw"
-  [(unspec [(match_operand:SI 0 "address_operand" "p")] 48)]
+  [(prefetch (match_operand:SI 0 "address_operand" "p")
+            (match_operand:SI 1 "const_int_operand" "n")
+            (const_int 0))]
   "TARGET_3DNOW"
-  "prefetchw\\t%a0"
+{
+  if (INTVAL (operands[1]) == 0)
+    return "prefetch\t%a0";
+  else
+    return "prefetchw\t%a0";
+}
   [(set_attr "type" "mmx")])
 
 (define_insn "pf2id"