From bcbbac264c104c997e3361588fefb5480933d31e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 15 Dec 1997 10:33:21 -0800 Subject: [PATCH] alpha.c (alpha_cpu_name): New variable. * alpha.c (alpha_cpu_name): New variable. (alpha_mlat_string): Likewise. (alpha_memory_latency): Likewise. (override_options): Handle -mmemory-latency. (alpha_adjust_cost): Adjust load cost for latency. * alpha.h (TARGET_OPTIONS): Add meory-latency. (REGISTER_MOVE_COST): Define in terms of memory_latency. Take TARGET_CIX into account. (MEMORY_MOVE_COST): Define in terms of memory_latency. * invoke.texi (DEC Alpha Options): Document -mmemory-latency. * alpha.h (ASM_COMMENT_START): New macro. From-SVN: r17106 --- gcc/ChangeLog | 17 ++++++++++++- gcc/config/alpha/alpha.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++- gcc/config/alpha/alpha.h | 16 +++++++++--- gcc/config/alpha/alpha.md | 9 ++++--- 4 files changed, 95 insertions(+), 9 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d73a175..275d6c7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,4 +1,19 @@ -Mon Dec 15 17:48:05 1997 Ricahrd Henderson +Mon Dec 15 18:31:43 1997 Richard Henderson + + * alpha.c (alpha_cpu_name): New variable. + (alpha_mlat_string): Likewise. + (alpha_memory_latency): Likewise. + (override_options): Handle -mmemory-latency. + (alpha_adjust_cost): Adjust load cost for latency. + * alpha.h (TARGET_OPTIONS): Add meory-latency. + (REGISTER_MOVE_COST): Define in terms of memory_latency. Take + TARGET_CIX into account. + (MEMORY_MOVE_COST): Define in terms of memory_latency. + * invoke.texi (DEC Alpha Options): Document -mmemory-latency. + + * alpha.h (ASM_COMMENT_START): New macro. + +Mon Dec 15 17:48:05 1997 Richard Henderson * reload.h, reload1.c (eliminate_regs), caller-save.c, dbxout.c, dwarfout.c, dwarf2out.c, reload.c, sdbout.c: Revert March 15 change. diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c index 0bf28c4..6b28bda 100644 --- a/gcc/config/alpha/alpha.c +++ b/gcc/config/alpha/alpha.c @@ -22,6 +22,7 @@ Boston, MA 02111-1307, USA. */ #include "config.h" #include +#include #include "rtl.h" #include "regs.h" #include "hard-reg-set.h" @@ -47,6 +48,10 @@ extern int rtx_equal_function_value_matters; /* Specify which cpu to schedule for. */ enum processor_type alpha_cpu; +static char* const alpha_cpu_name[] = +{ + "ev4", "ev5", "ev6" +}; /* Specify how accurate floating-point traps need to be. */ @@ -62,10 +67,11 @@ enum alpha_fp_trap_mode alpha_fptm; /* Strings decoded into the above options. */ -char *alpha_cpu_string; /* -mcpu=ev[4|5] */ +char *alpha_cpu_string; /* -mcpu= */ char *alpha_tp_string; /* -mtrap-precision=[p|s|i] */ char *alpha_fprm_string; /* -mfp-rounding-mode=[n|m|c|d] */ char *alpha_fptm_string; /* -mfp-trap-mode=[n|u|su|sui] */ +char *alpha_mlat_string; /* -mmemory-latency= */ /* Save information from a "cmpxx" operation until the branch or scc is emitted. */ @@ -91,6 +97,10 @@ int alpha_function_needs_gp; static rtx alpha_return_addr_rtx; +/* The number of cycles of latency we should assume on memory reads. */ + +int alpha_memory_latency = 3; + /* Declarations of static functions. */ static void alpha_set_memflags_1 PROTO((rtx, int, int, int)); static rtx alpha_emit_set_const_1 PROTO((rtx, enum machine_mode, @@ -243,6 +253,52 @@ override_options () alpha_fptm = ALPHA_FPTM_SU; } } + + { + char *end; + int lat; + + if (!alpha_mlat_string) + alpha_mlat_string = "L1"; + + if (isdigit (alpha_mlat_string[0]) + && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0')) + ; + else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l') + && isdigit (alpha_mlat_string[1]) + && alpha_mlat_string[2] == '\0') + { + static int const cache_latency[][4] = + { + { 3, 30, -1 }, /* ev4 -- Bcache is a guess */ + { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */ + { 3, 12, -1 }, /* ev6 -- Ho hum, doesn't exist yet */ + }; + + lat = alpha_mlat_string[1] - '0'; + if (lat < 0 || lat > 3 || cache_latency[alpha_cpu][lat-1] == -1) + { + warning ("L%d cache latency unknown for %s", + lat, alpha_cpu_name[alpha_cpu]); + lat = 3; + } + else + lat = cache_latency[alpha_cpu][lat-1]; + } + else if (! strcmp (alpha_mlat_string, "main")) + { + /* Most current memories have about 370ns latency. This is + a reasonable guess for a fast cpu. */ + lat = 150; + } + else + { + warning ("bad value `%s' for -mmemory-latency", alpha_mlat_string); + lat = 3; + } + + alpha_memory_latency = lat; + } } /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */ @@ -1217,6 +1273,10 @@ alpha_adjust_cost (insn, link, dep_insn, cost) insn_type = get_attr_type (insn); dep_insn_type = get_attr_type (dep_insn); + /* Bring in the user-defined memory latency. */ + if (dep_insn_type == TYPE_LD || dep_insn_type == TYPE_LDSYM) + cost += alpha_memory_latency-1; + if (alpha_cpu == PROCESSOR_EV5) { /* And the lord DEC saith: "A special bypass provides an effective diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h index 339c4a6..2c45a73 100644 --- a/gcc/config/alpha/alpha.h +++ b/gcc/config/alpha/alpha.h @@ -245,10 +245,11 @@ extern enum alpha_fp_trap_mode alpha_fptm; extern char *m88k_short_data; #define TARGET_OPTIONS { { "short-data-", &m88k_short_data } } */ -extern char *alpha_cpu_string; /* For -mcpu=ev[4|5] */ +extern char *alpha_cpu_string; /* For -mcpu= */ extern char *alpha_fprm_string; /* For -mfp-rounding-mode=[n|m|c|d] */ extern char *alpha_fptm_string; /* For -mfp-trap-mode=[n|u|su|sui] */ extern char *alpha_tp_string; /* For -mtrap-precision=[p|f|i] */ +extern char *alpha_mlat_string; /* For -mmemory-latency= */ #define TARGET_OPTIONS \ { \ @@ -256,6 +257,7 @@ extern char *alpha_tp_string; /* For -mtrap-precision=[p|f|i] */ {"fp-rounding-mode=", &alpha_fprm_string}, \ {"fp-trap-mode=", &alpha_fptm_string}, \ {"trap-precision=", &alpha_tp_string}, \ + {"memory-latency=", &alpha_mlat_string}, \ } /* Sometimes certain combinations of command options do not make sense @@ -792,15 +794,17 @@ enum reg_class { NO_REGS, GENERAL_REGS, FLOAT_REGS, ALL_REGS, reduce the impact of not being able to allocate a pseudo to a hard register. */ -#define REGISTER_MOVE_COST(CLASS1, CLASS2) \ - (((CLASS1) == FLOAT_REGS) == ((CLASS2) == FLOAT_REGS) ? 2 : 20) +#define REGISTER_MOVE_COST(CLASS1, CLASS2) \ + (TARGET_CIX || ((CLASS1) == FLOAT_REGS) == ((CLASS2) == FLOAT_REGS) \ + ? 2 : 4+2*alpha_memory_latency) /* A C expressions returning the cost of moving data of MODE from a register to or from memory. On the Alpha, bump this up a bit. */ -#define MEMORY_MOVE_COST(MODE) 6 +extern int alpha_memory_latency; +#define MEMORY_MOVE_COST(MODE) (2*alpha_memory_latency) /* Provide the cost of a branch. Exact meaning under development. */ #define BRANCH_COST 5 @@ -1107,6 +1111,10 @@ extern int alpha_compare_fp_p; IS_LOCAL is 0 if name is used in call, 1 if name is used in definition. */ extern void alpha_need_linkage (); +/* This macro defines the start of an assembly comment. */ + +#define ASM_COMMENT_START " #" + /* This macro produces the initial definition of a function name. On the Alpha, we need to save the function name for the prologue and epilogue. */ diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md index cfc98cd..c6e234e 100644 --- a/gcc/config/alpha/alpha.md +++ b/gcc/config/alpha/alpha.md @@ -45,11 +45,13 @@ ;; the address, BBOX, used for branches, EBOX, used for integer ;; operations, and FBOX, used for FP operations. -;; Memory delivers its result in three cycles. +;; Memory delivers its result in three cycles. Actually return one and +;; take care of this in adjust_cost, since we want to handle user-defined +;; memory latencies. (define_function_unit "ev4_abox" 1 0 (and (eq_attr "cpu" "ev4") (eq_attr "type" "ld,ldsym,st")) - 3 1) + 1 1) ;; Branches have no delay cost, but do tie up the unit for two cycles. (define_function_unit "ev4_bbox" 1 1 @@ -127,10 +129,11 @@ 1 1) ;; Memory takes at least 2 clocks, and load cannot dual issue with stores. +;; Return one from here and fix up with user-defined latencies in adjust_cost. (define_function_unit "ev5_ebox" 2 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "ld,ldsym")) - 2 1) + 1 1) (define_function_unit "ev5_e0" 1 0 (and (eq_attr "cpu" "ev5") -- 2.7.4