From 367242d3da71d3774f5cf58898e561dcf7bdc2d8 Mon Sep 17 00:00:00 2001 From: rth Date: Sat, 18 Apr 1998 01:24:59 +0000 Subject: [PATCH] Sat Apr 18 01:23:11 1998 John Carr * sparc.c, sparc.h, sparc.md, sol2.h: Many changes related to V9 code generation. Use 64 bit instructions in 32 bit mode when possible. Use V9 return instruction. UltraSPARC optimizations. * sparc.h: Change gen_rtx (CODE to gen_rtx_CODE (. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@19278 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 8 + gcc/config/sparc/sol2.h | 6 + gcc/config/sparc/sparc.c | 546 +++++++++++++++++++-------- gcc/config/sparc/sparc.h | 188 +++++----- gcc/config/sparc/sparc.md | 926 ++++++++++++++++++++++++++++------------------ 5 files changed, 1088 insertions(+), 586 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9b52ad1..3100c60 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +Sat Apr 18 01:23:11 1998 John Carr + + * sparc.c, sparc.h, sparc.md, sol2.h: Many changes related to V9 + code generation. Use 64 bit instructions in 32 bit mode when + possible. Use V9 return instruction. UltraSPARC optimizations. + + * sparc.h: Change gen_rtx (CODE to gen_rtx_CODE (. + Fri Apr 17 22:38:17 1998 Jeffrey A Law (law@cygnus.com) * global.c (global_alloc): Don't pass HARD_CONST (0) to find_reg, diff --git a/gcc/config/sparc/sol2.h b/gcc/config/sparc/sol2.h index f0c3b13..2c8c5f3 100644 --- a/gcc/config/sparc/sol2.h +++ b/gcc/config/sparc/sol2.h @@ -198,3 +198,9 @@ Boston, MA 02111-1307, USA. */ #define TARGET_LIVE_G0 0 #undef TARGET_BROKEN_SAVERESTORE #define TARGET_BROKEN_SAVERESTORE 0 + +/* Solaris allows 64 bit out and global registers in 32 bit mode. + sparc_override_options will disable V8+ if not generating V9 code. */ +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_APP_REGS + MASK_EPILOGUE + MASK_FPU + MASK_V8PLUS) + diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index 36ccb15..caebb08 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -22,17 +22,7 @@ the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "config.h" -#include -#ifdef HAVE_STDLIB_H -#include -#endif -#ifdef HAVE_STRING_H -#include -#else -#ifdef HAVE_STRINGS_H -#include -#endif -#endif +#include "system.h" #include "tree.h" #include "rtl.h" #include "regs.h" @@ -208,11 +198,9 @@ sparc_override_options () { "sparclet", PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET }, /* TEMIC sparclet */ { "tsc701", PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET }, - /* "v8plus" is what Sun calls Solaris2.5 running on UltraSPARC's. */ - { "v8plus", PROCESSOR_V8PLUS, MASK_ISA, MASK_V8PLUS }, { "v9", PROCESSOR_V9, MASK_ISA, MASK_V9 }, /* TI ultrasparc */ - { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V8PLUS }, + { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9 }, { 0 } }; struct cpu_table *cpu; @@ -288,6 +276,10 @@ sparc_override_options () if (TARGET_V9 && TARGET_ARCH32) target_flags |= MASK_DEPRECATED_V8_INSNS; + /* V8PLUS requires V9 */ + if (! TARGET_V9) + target_flags &= ~MASK_V8PLUS; + /* Validate -malign-loops= value, or provide default. */ if (sparc_align_loops_string) { @@ -333,40 +325,6 @@ sparc_override_options () sparc_init_modes (); } -/* Float conversions (v9 only). - - The floating point registers cannot hold DImode values because SUBREG's - on them get the wrong register. "(subreg:SI (reg:DI M int-reg) 0)" is the - same as "(subreg:SI (reg:DI N float-reg) 1)", but gcc doesn't know how to - turn the "0" to a "1". Therefore, we must explicitly do the conversions - to/from int/fp regs. `sparc64_fpconv_stack_slot' is the address of an - 8 byte stack slot used during the transfer. - ??? I could have used [%fp-16] but I didn't want to add yet another - dependence on this. */ -/* ??? Can we use assign_stack_temp here? */ - -static rtx fpconv_stack_temp; - -/* Called once for each function. */ - -void -sparc_init_expanders () -{ - fpconv_stack_temp = NULL_RTX; -} - -/* Assign a stack temp for fp/int DImode conversions. */ - -rtx -sparc64_fpconv_stack_temp () -{ - if (fpconv_stack_temp == NULL_RTX) - fpconv_stack_temp = - assign_stack_local (DImode, GET_MODE_SIZE (DImode), 0); - - return fpconv_stack_temp; -} - /* Miscellaneous utilities. */ /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move @@ -380,6 +338,14 @@ v9_regcmp_p (code) || code == LE || code == GT); } +/* 32 bit registers are zero extended so only zero/non-zero comparisons + work. */ +int +v8plus_regcmp_p (code) + enum rtx_code code; +{ + return (code == EQ || code == NE); +} /* Operand constraints. */ @@ -798,6 +764,16 @@ v9_regcmp_op (op, mode) return v9_regcmp_p (code); } +int +v8plus_regcmp_op (op, mode) + register rtx op; + enum machine_mode mode; +{ + enum rtx_code code = GET_CODE (op); + + return (code == EQ || code == NE); +} + /* Return 1 if this is a SIGN_EXTEND or ZERO_EXTEND operation. */ int @@ -848,8 +824,13 @@ arith_operand (op, mode) rtx op; enum machine_mode mode; { - return (register_operand (op, mode) - || (GET_CODE (op) == CONST_INT && SMALL_INT (op))); + int val; + if (register_operand (op, mode)) + return 1; + if (GET_CODE (op) != CONST_INT) + return 0; + val = INTVAL (op) & 0xffffffff; + return SPARC_SIMM13_P (val); } /* Return true if OP is a register, or is a CONST_INT that can fit in a @@ -1059,8 +1040,15 @@ gen_compare_reg (code, x, y) else cc_reg = gen_rtx (REG, mode, SPARC_ICC_REG); - emit_insn (gen_rtx (SET, VOIDmode, cc_reg, - gen_rtx (COMPARE, mode, x, y))); + if (TARGET_V8PLUS && mode == CCXmode) + { + emit_insn (gen_cmpdi_v8plus (x, y)); + } + else + { + emit_insn (gen_rtx (SET, VOIDmode, cc_reg, + gen_rtx (COMPARE, mode, x, y))); + } return cc_reg; } @@ -1287,14 +1275,53 @@ eligible_for_epilogue_delay (trial, slot) || register_operand (XEXP (src, 1), DImode))) return 1; - /* This matches "*return_subsi". */ - else if (GET_CODE (src) == MINUS - && register_operand (XEXP (src, 0), SImode) - && small_int (XEXP (src, 1), VOIDmode) - && INTVAL (XEXP (src, 1)) != -4096) + return 0; +} + +static int +check_return_regs (x) + rtx x; +{ + switch (GET_CODE (x)) + { + case REG: + return IN_OR_GLOBAL_P (x); + + case CONST_INT: + case CONST_DOUBLE: + case CONST: + case SYMBOL_REF: + case LABEL_REF: return 1; + case SET: + case IOR: + case AND: + case XOR: + case PLUS: + case MINUS: + if (check_return_regs (XEXP (x, 1)) == 0) return 0; + case NOT: + case NEG: + case MEM: + return check_return_regs (XEXP (x, 0)); + + default: + return 0; + } + +} + +/* Return 1 if TRIAL references only in and global registers. */ +int +eligible_for_return_delay (trial) + rtx trial; +{ + if (GET_CODE (PATTERN (trial)) != SET) + return 0; + + return check_return_regs (PATTERN (trial)); } int @@ -1346,6 +1373,10 @@ reg_unused_after (reg, insn) /* The table we use to reference PIC data. */ static rtx global_offset_table; +/* The function we use to get at it. */ +static rtx get_pc_symbol; +static char get_pc_symbol_name[256]; + /* Ensure that we are not using patterns that are not OK with PIC. */ int @@ -1499,61 +1530,11 @@ initialize_pic () static rtx pic_setup_code () { - rtx pic_pc_rtx; - rtx l1, l2; rtx seq; start_sequence (); - - /* If -O0, show the PIC register remains live before this. */ - if (obey_regdecls) - emit_insn (gen_rtx (USE, VOIDmode, pic_offset_table_rtx)); - - l1 = gen_label_rtx (); - - pic_pc_rtx = gen_rtx (CONST, Pmode, - gen_rtx (MINUS, Pmode, - global_offset_table, - gen_rtx (CONST, Pmode, - gen_rtx (MINUS, Pmode, - gen_rtx (LABEL_REF, - VOIDmode, l1), - pc_rtx)))); - - /* sparc64: the RDPC instruction doesn't pair, and puts 4 bubbles in the - pipe to boot. So don't use it here, especially when we're - doing a save anyway because of %l7. */ - - l2 = gen_label_rtx (); - emit_label (l1); - - /* Iff we are doing delay branch optimization, slot the sethi up - here so that it will fill the delay slot of the call. */ - if (flag_delayed_branch) - emit_insn (gen_rtx (SET, VOIDmode, pic_offset_table_rtx, - gen_rtx (HIGH, Pmode, pic_pc_rtx))); - - /* Note that we pun calls and jumps here! */ - emit_jump_insn (gen_get_pc_via_call (l2, l1)); - - emit_label (l2); - - if (!flag_delayed_branch) - emit_insn (gen_rtx (SET, VOIDmode, pic_offset_table_rtx, - gen_rtx (HIGH, Pmode, pic_pc_rtx))); - - emit_insn (gen_rtx (SET, VOIDmode, - pic_offset_table_rtx, - gen_rtx (LO_SUM, Pmode, - pic_offset_table_rtx, pic_pc_rtx))); - emit_insn (gen_rtx (SET, VOIDmode, - pic_offset_table_rtx, - gen_rtx (PLUS, Pmode, - pic_offset_table_rtx, - gen_rtx (REG, Pmode, 15)))); - - /* emit_insn (gen_rtx (ASM_INPUT, VOIDmode, "!#PROLOGUE# 1")); */ - + emit_insn (gen_get_pc (pic_offset_table_rtx, global_offset_table, + get_pc_symbol)); seq = gen_sequence (); end_sequence (); @@ -1575,9 +1556,21 @@ finalize_pic () if (! flag_pic) abort (); + /* If we havn't emitted the special get_pc helper function, do so now. */ + if (get_pc_symbol_name[0] == 0) + { + ASM_GENERATE_INTERNAL_LABEL (get_pc_symbol_name, "LGETPC", 0); + + text_section (); + ASM_OUTPUT_ALIGN (asm_out_file, 3); + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "LGETPC", 0); + fputs ("\tretl\n\tadd %o7,%l7,%l7\n", asm_out_file); + } + /* Initialize every time through, since we can't easily know this to be permanent. */ global_offset_table = gen_rtx (SYMBOL_REF, Pmode, "_GLOBAL_OFFSET_TABLE_"); + get_pc_symbol = gen_rtx (SYMBOL_REF, Pmode, get_pc_symbol_name); flag_pic = 0; emit_insn_after (pic_setup_code (), get_insns ()); @@ -1618,6 +1611,15 @@ emit_move_sequence (operands, mode) /* Handle most common case first: storing into a register. */ if (register_operand (operand0, mode)) { + /* Integer constant to FP register. */ + if (GET_CODE (operand0) == REG + && REGNO (operand0) >= 32 + && REGNO (operand0) < FIRST_PSEUDO_REGISTER + && CONSTANT_P (operand1)) + { + operand1 = validize_mem (force_const_mem (GET_MODE (operand0), operand1)); + } + if (register_operand (operand1, mode) || (GET_CODE (operand1) == CONST_INT && SMALL_INT (operand1)) || (GET_CODE (operand1) == CONST_DOUBLE @@ -1683,6 +1685,7 @@ emit_move_sequence (operands, mode) } else if (GET_CODE (operand1) == CONST_INT ? (! SMALL_INT (operand1) + && INTVAL (operand1) != -4096 && ! SPARC_SETHI_P (INTVAL (operand1))) : GET_CODE (operand1) == CONST_DOUBLE ? ! arith_double_operand (operand1, DImode) @@ -1704,16 +1707,20 @@ emit_move_sequence (operands, mode) rtx temp = ((reload_in_progress || mode == DImode) ? operand0 : gen_reg_rtx (mode)); + if (mode == SImode) + { + if (GET_CODE (operand1) == CONST_INT) + operand1 = GEN_INT (INTVAL (operand1) & 0xffffffff); + else if (GET_CODE (operand1) == CONST_DOUBLE) + operand1 = GEN_INT (CONST_DOUBLE_LOW (operand1) & 0xffffffff); + } + if (TARGET_ARCH64 && mode == DImode) emit_insn (gen_sethi_di_sp64 (temp, operand1)); else emit_insn (gen_rtx (SET, VOIDmode, temp, gen_rtx (HIGH, mode, operand1))); - if (GET_CODE (operand1) == CONST_INT) - operand1 = GEN_INT (INTVAL (operand1) & 0xffffffff); - else if (GET_CODE (operand1) == CONST_DOUBLE) - operand1 = GEN_INT (CONST_DOUBLE_LOW (operand1) & 0xffffffff); operands[1] = gen_rtx (LO_SUM, mode, temp, operand1); } } @@ -1763,10 +1770,16 @@ singlemove_string (operands) else return "sethi %%hi(%a1),%0"; } - else if (GET_CODE (operands[1]) == CONST_INT - && ! CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'I')) + else if (GET_CODE (operands[1]) == CONST_INT) { - HOST_WIDE_INT i = INTVAL (operands[1]); + /* Only consider the low 32 bits of the constant. */ + int i = INTVAL (operands[1]) & 0xffffffff; + + if (SPARC_SIMM13_P (i)) + return "mov %1,%0"; + + if (i == 4096) + return "sub %%g0,-4096,%0"; /* If all low order 10 bits are clear, then we only need a single sethi insn to load the constant. */ @@ -2291,9 +2304,9 @@ output_move_quad (operands) operands[2] = adj_offsettable_operand (mem, 8); /* ??? In arch64 case, shouldn't we use ldd/std for fp regs. */ if (mem == op1) - return TARGET_ARCH64 ? "ldx %1,%0;ldx %2,%R0" : "ldd %1,%0;ldd %2,%S0"; + return TARGET_ARCH64 ? "ldx %1,%0\n\tldx %2,%R0" : "ldd %1,%0\n\tldd %2,%S0"; else - return TARGET_ARCH64 ? "stx %1,%0;stx %R1,%2" : "std %1,%0;std %S1,%2"; + return TARGET_ARCH64 ? "stx %1,%0\n\tstx %R1,%2" : "std %1,%0\n\tstd %S1,%2"; } } @@ -2968,13 +2981,10 @@ enum sparc_mode_class { /* Modes for double-float and smaller quantities. */ #define DF_MODES (S_MODES | D_MODES) -/* ??? Sparc64 fp regs cannot hold DImode values. */ -#define DF_MODES64 (SF_MODES | (1 << (int) DF_MODE) /* | (1 << (int) D_MODE)*/) +#define DF_MODES64 DF_MODES /* Modes for double-float only quantities. */ -/* ??? Sparc64 fp regs cannot hold DImode values. - See fix_truncsfdi2. */ -#define DF_ONLY_MODES ((1 << (int) DF_MODE) /*| (1 << (int) D_MODE)*/) +#define DF_ONLY_MODES ((1 << (int) DF_MODE) | (1 << (int) D_MODE)) /* Modes for double-float and larger quantities. */ #define DF_UP_MODES (DF_ONLY_MODES | TF_ONLY_MODES) @@ -2985,8 +2995,6 @@ enum sparc_mode_class { /* Modes for quad-float and smaller quantities. */ #define TF_MODES (DF_MODES | TF_ONLY_MODES) -/* ??? Sparc64 fp regs cannot hold DImode values. - See fix_truncsfdi2. */ #define TF_MODES64 (DF_MODES64 | TF_ONLY_MODES) /* Modes for condition codes. */ @@ -3115,7 +3123,9 @@ sparc_init_modes () /* Initialize the array used by REGNO_REG_CLASS. */ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) { - if (i < 32) + if (i < 16 && TARGET_V8PLUS) + sparc_regno_reg_class[i] = I64_REGS; + else if (i < 32) sparc_regno_reg_class[i] = GENERAL_REGS; else if (i < 64) sparc_regno_reg_class[i] = FP_REGS; @@ -3584,6 +3594,8 @@ output_function_epilogue (file, size, leaf_function) PATTERN (insn))); final_scan_insn (insn, file, 1, 0, 1); } + else if (TARGET_V9 && ! SKIP_CALLERS_UNIMP_P) + fputs ("\treturn %i7+8\n\tnop\n", file); else fprintf (file, "\t%s\n\trestore\n", ret); } @@ -4566,22 +4578,77 @@ output_v9branch (op, reg, label, reversed, annul, noop) return string; } -/* Output assembler code to return from a function. */ +/* Renumber registers in delay slot. Replace registers instead of + renumbering because they may be shared. -/* ??? v9: Update to use the new `return' instruction. Also, add patterns to - md file for the `return' instruction. */ + This does not handle instructions other than move. */ + +static void +epilogue_renumber (where) + rtx *where; +{ + rtx x = *where; + enum rtx_code code = GET_CODE (x); + + switch (code) + { + case MEM: + *where = x = copy_rtx (x); + epilogue_renumber (&XEXP (x, 0)); + return; + + case REG: + { + int regno = REGNO (x); + if (regno > 8 && regno < 24) + abort (); + if (regno >= 24 && regno < 32) + *where = gen_rtx_REG (GET_MODE (x), regno - 16); + return; + } + case CONST_INT: + case CONST_DOUBLE: + case CONST: + case SYMBOL_REF: + case LABEL_REF: + return; + + case IOR: + case AND: + case XOR: + case PLUS: + case MINUS: + epilogue_renumber (&XEXP (x, 1)); + case NEG: + case NOT: + epilogue_renumber (&XEXP (x, 0)); + return; + + default: + debug_rtx (*where); + abort(); + } +} + +/* Output assembler code to return from a function. */ char * output_return (operands) rtx *operands; { + rtx delay = final_sequence ? XVECEXP (final_sequence, 0, 1) : 0; + if (leaf_label) { operands[0] = leaf_label; - return "b,a %l0"; + return "b%* %l0%("; } else if (leaf_function) { + /* No delay slot in a leaf function. */ + if (delay) + abort (); + /* If we didn't allocate a frame pointer for the current function, the stack pointer might have been adjusted. Output code to restore it now. */ @@ -4621,8 +4688,22 @@ output_return (operands) return "sethi %%hi(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp"; } } + else if (TARGET_V9) + { + if (delay) + { + epilogue_renumber (&SET_DEST (PATTERN (delay))); + epilogue_renumber (&SET_SRC (PATTERN (delay))); + } + if (SKIP_CALLERS_UNIMP_P) + return "return %%i7+12%#"; + else + return "return %%i7+8%#"; + } else { + if (delay) + abort (); if (SKIP_CALLERS_UNIMP_P) return "jmp %%i7+12\n\trestore"; else @@ -4795,14 +4876,14 @@ print_operand (file, x, code) /* On UltraSPARC, a branch in a delay slot causes a pipeline flush. Always emit a nop in case the next instruction is a branch. */ if (dbr_sequence_length () == 0 - && (optimize && (int)sparc_cpu < PROCESSOR_V8PLUS)) + && (optimize && (int)sparc_cpu < PROCESSOR_V9)) fputs (",a", file); return; case '(': /* Output a 'nop' if there's nothing for the delay slot and we are not optimizing. This is always used with '*' above. */ if (dbr_sequence_length () == 0 - && ! (optimize && (int)sparc_cpu < PROCESSOR_V8PLUS)) + && ! (optimize && (int)sparc_cpu < PROCESSOR_V9)) fputs ("\n\tnop", file); return; case '_': @@ -6066,7 +6147,8 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost) dep_type = get_attr_type (dep_insn); #define SLOW_FP(dep_type) \ -(dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD) +(dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD) + switch (REG_NOTE_KIND (link)) { case 0: @@ -6080,16 +6162,16 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost) case TYPE_FPSTORE: if (! SLOW_FP (dep_type)) return 0; - break; + return cost; case TYPE_STORE: if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) return cost; + if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) /* The dependency between the two instructions is on the data that is being stored. Assume that the address of the store is not also dependent. */ - if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) return 0; return cost; @@ -6109,15 +6191,15 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost) compensate for a dependency which might not really exist, and 0. */ if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET - || GET_CODE (SET_DEST (pat)) != MEM - || GET_CODE (SET_SRC (dep_pat)) != MEM - || ! rtx_equal_p (XEXP (SET_DEST (pat), 0), - XEXP (SET_SRC (dep_pat), 0))) + || GET_CODE (SET_SRC (pat)) != MEM + || GET_CODE (SET_DEST (dep_pat)) != MEM + || ! rtx_equal_p (XEXP (SET_SRC (pat), 0), + XEXP (SET_DEST (dep_pat), 0))) return cost + 2; return cost + 8; } - break; + return cost; case TYPE_BRANCH: /* Compare to branch latency is 0. There is no benefit from @@ -6128,16 +6210,15 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost) compare to conditional move. */ if (dep_type == TYPE_FPCMP) return cost - 1; - break; + return cost; case TYPE_FPCMOVE: /* FMOVR class instructions can not issue in the same cycle or the cycle after an instruction which writes any integer register. Model this as cost 2 for dependent instructions. */ - if (GET_CODE (PATTERN (insn)) == SET - && (GET_MODE (SET_DEST (PATTERN (insn))) == SFmode - || GET_MODE (SET_DEST (PATTERN (insn))) == DFmode) + if ((dep_type == TYPE_IALU || dep_type == TYPE_UNARY + || dep_type == TYPE_BINARY) && cost < 2) return 2; /* Otherwise check as for integer conditional moves. */ @@ -6149,7 +6230,7 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost) to model. */ if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD) return cost + 3; - break; + return cost; default: break; @@ -6190,9 +6271,8 @@ sparc_issue_rate () { default: return 1; - case PROCESSOR_V8PLUS: case PROCESSOR_V9: - /* Assume these generic V9 types are capable of at least dual-issue. */ + /* Assume V9 processors are capable of at least dual-issue. */ return 2; case PROCESSOR_SUPERSPARC: return 3; @@ -6200,3 +6280,175 @@ sparc_issue_rate () return 4; } } + +static int +set_extends(x, insn) + rtx x, insn; +{ + register rtx pat = PATTERN (insn); + + switch (GET_CODE (SET_SRC (pat))) + { + /* Load and some shift instructions zero extend. */ + case MEM: + case ZERO_EXTEND: + /* sethi clears the high bits */ + case HIGH: + /* LO_SUM is used with sethi. sethi cleared the high + bits and the values used with lo_sum are positive */ + case LO_SUM: + /* UNSPEC is v8plus_clear_high */ + case UNSPEC: + /* Store flag stores 0 or 1 */ + case LT: case LTU: + case GT: case GTU: + case LE: case LEU: + case GE: case GEU: + case EQ: + case NE: + return 1; + case AND: + { + rtx op1 = XEXP (SET_SRC (pat), 1); + if (GET_CODE (op1) == CONST_INT) + return INTVAL (op1) >= 0; + if (GET_CODE (XEXP (SET_SRC (pat), 0)) == REG + && sparc_check_64 (XEXP (SET_SRC (pat), 0), insn) == 1) + return 1; + if (GET_CODE (op1) == REG + && sparc_check_64 ((op1), insn) == 1) + return 1; + } + case ASHIFT: + case LSHIFTRT: + return GET_MODE (SET_SRC (pat)) == SImode; + /* Positive integers leave the high bits zero. */ + case CONST_DOUBLE: + return ! (CONST_DOUBLE_LOW (x) & 0x80000000); + case CONST_INT: + return ! (INTVAL (x) & 0x80000000); + case ASHIFTRT: + case SIGN_EXTEND: + return - (GET_MODE (SET_SRC (pat)) == SImode); + default: + return 0; + } +} + +/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are + unknown. Return 1 if the high bits are zero, -1 if the register is + sign extended. */ +int +sparc_check_64 (x, insn) + rtx x, insn; +{ + /* If a register is set only once it is safe to ignore insns this + code does not know how to handle. The loop will either recognize + the single set and return the correct value or fail to recognize + it and return 0. */ + int set_once = 0; + + if (GET_CODE (x) == REG + && flag_expensive_optimizations + && REG_N_SETS (REGNO (x)) == 1) + set_once = 1; + + if (insn == 0) + if (set_once) + insn = get_last_insn_anywhere (); + else + return 0; + + while (insn = PREV_INSN (insn)) + { + switch (GET_CODE (insn)) + { + case JUMP_INSN: + case NOTE: + break; + case CODE_LABEL: + case CALL_INSN: + default: + if (! set_once) + return 0; + break; + case INSN: + { + rtx pat = PATTERN (insn); + if (GET_CODE (pat) != SET) + return 0; + if (rtx_equal_p (x, SET_DEST (pat))) + return set_extends (x, insn); + if (reg_overlap_mentioned_p (SET_DEST (pat), x)) + return 0; + } + } + } + return 0; +} + +char * +sparc_v8plus_shift (operands, insn, opcode) + rtx *operands; + rtx insn; + char *opcode; +{ + static char asm_code[60]; + + if (GET_CODE (operands[3]) == SCRATCH) + operands[3] = operands[0]; + output_asm_insn ("sllx %H1,32,%3", operands); + if (sparc_check_64 (operands[1], insn) <= 0) + output_asm_insn ("srl %L1,0,%L1", operands); + output_asm_insn ("or %L1,%3,%3", operands); + + strcpy(asm_code, opcode); + if (which_alternative != 2) + return strcat (asm_code, " %0,%2,%L0\n\tsrlx %L0,32,%H0"); + else + return strcat (asm_code, " %3,%2,%3\n\tsrlx %3,32,%H0\n\tmov %3,%L0"); +} + + +/* Return 1 if DEST and SRC reference only global and in registers. */ + +int +sparc_return_peephole_ok (dest, src) + rtx dest, src; +{ + if (! TARGET_V9) + return 0; + if (leaf_function) + return 0; + if (GET_CODE (src) != CONST_INT + && (GET_CODE (src) != REG || ! IN_OR_GLOBAL_P (src))) + return 0; + return IN_OR_GLOBAL_P (dest); +} + +int +delay_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + switch (GET_CODE (op)) + { + case CONST: + case CONST_INT: + case SYMBOL_REF: + case LABEL_REF: + return 1; + + case MEM: + return delay_operand (XEXP (op, 0), Pmode); + + case REG: + return IN_OR_GLOBAL_P (op); + + case PLUS: + return delay_operand (XEXP (op, 0), Pmode) && delay_operand (XEXP (op, 1), Pmode); + + default: + return 0; + } +} diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h index 1659e68..c573f40 100644 --- a/gcc/config/sparc/sparc.h +++ b/gcc/config/sparc/sparc.h @@ -158,7 +158,6 @@ Unrecognized value in TARGET_CPU_DEFAULT. %{mcpu=f930:-D__sparclite__} %{mcpu=f934:-D__sparclite__} \ %{mcpu=v8:-D__sparc_v8__} \ %{mcpu=supersparc:-D__supersparc__ -D__sparc_v8__} \ -%{mcpu=v8plus:-D__sparc_v9__} \ %{mcpu=v9:-D__sparc_v9__} \ %{mcpu=ultrasparc:-D__sparc_v9__} \ %{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(cpp_cpu_default)}}}}}}} \ @@ -209,9 +208,9 @@ Unrecognized value in TARGET_CPU_DEFAULT. %{mf930:-Asparclite} %{mf934:-Asparclite} \ %{mcpu=sparclite:-Asparclite} \ %{mcpu=f930:-Asparclite} %{mcpu=f934:-Asparclite} \ -%{mcpu=v8plus:-Av8plus} \ +%{mv8plus:-Av8plus} \ %{mcpu=v9:-Av9} \ -%{mcpu=ultrasparc:-Av9a} \ +%{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \ %{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(asm_cpu_default)}}}}}}} \ " @@ -453,13 +452,17 @@ extern int target_flags; #define MASK_VIS 0x1000000 #define TARGET_VIS (target_flags & MASK_VIS) -/* Compile for Solaris V8+. 64 bit instructions are available but the - high 32 bits of all registers except the globals and current outs may - be cleared at any time. */ +/* Compile for Solaris V8+. 32 bit Solaris preserves the high bits of + the current out and global registers. Linux saves the high bits on + context switches but not signals. */ #define MASK_V8PLUS 0x2000000 #define TARGET_V8PLUS (target_flags & MASK_V8PLUS) -/* See sparc.md */ +/* TARGET_HARD_MUL: Use hardware multiply instructions but not %y. + TARGET_HARD_MUL32: Use hardware multiply instructions with rd %y + to get high 32 bits. False in V8+ or V9 because multiply stores + a 64 bit result in a register. */ + #define TARGET_HARD_MUL32 \ ((TARGET_V8 || TARGET_SPARCLITE \ || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS) \ @@ -495,6 +498,8 @@ extern int target_flags; {"no-app-regs", -MASK_APP_REGS}, \ {"hard-quad-float", MASK_HARD_QUAD}, \ {"soft-quad-float", -MASK_HARD_QUAD}, \ + {"v8plus", MASK_V8PLUS}, \ + {"no-v8plus", -MASK_V8PLUS}, \ {"vis", MASK_VIS}, \ /* ??? These are deprecated, coerced to -mcpu=. Delete in 2.9. */ \ {"cypress", 0}, \ @@ -502,7 +507,6 @@ extern int target_flags; {"f930", 0}, \ {"f934", 0}, \ {"v8", 0}, \ - {"v8plus", 0}, \ {"supersparc", 0}, \ /* End of deprecated options. */ \ /* -mptrNN exists for *experimental* purposes. */ \ @@ -535,7 +539,6 @@ enum processor_type { PROCESSOR_F934, PROCESSOR_SPARCLET, PROCESSOR_TSC701, - PROCESSOR_V8PLUS, PROCESSOR_V9, PROCESSOR_ULTRASPARC }; @@ -977,6 +980,12 @@ while (0) : (GET_MODE_SIZE (MODE) + 3) / 4) \ : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) +/* A subreg in 64 bit mode will have the wrong offset for a floating point + register. The least significant part is at offset 1, compared to 0 for + integer registers. */ +#define ALTER_HARD_SUBREG(TMODE, WORD, FMODE, REGNO) \ + (TARGET_ARCH64 && (REGNO) >= 32 && (REGNO) < 96 && (TMODE) == SImode ? 1 : ((REGNO) + (WORD))) + /* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. See sparc.c for how we initialize this. */ extern int *hard_regno_mode_classes; @@ -1093,14 +1102,14 @@ extern int sparc_mode_class[]; #define STRUCT_VALUE \ (TARGET_ARCH64 \ ? 0 \ - : gen_rtx (MEM, Pmode, \ - gen_rtx (PLUS, Pmode, stack_pointer_rtx, \ + : gen_rtx_MEM (Pmode, \ + gen_rtx_PLUS (Pmode, stack_pointer_rtx, \ GEN_INT (STRUCT_VALUE_OFFSET)))) #define STRUCT_VALUE_INCOMING \ (TARGET_ARCH64 \ ? 0 \ - : gen_rtx (MEM, Pmode, \ - gen_rtx (PLUS, Pmode, frame_pointer_rtx, \ + : gen_rtx_MEM (Pmode, \ + gen_rtx_PLUS (Pmode, frame_pointer_rtx, \ GEN_INT (STRUCT_VALUE_OFFSET)))) /* Define the classes of registers for register constraints in the @@ -1157,8 +1166,8 @@ extern int sparc_mode_class[]; ??? Should %fcc[0123] be handled similarly? */ -enum reg_class { NO_REGS, FPCC_REGS, GENERAL_REGS, FP_REGS, EXTRA_FP_REGS, - GENERAL_OR_FP_REGS, GENERAL_OR_EXTRA_FP_REGS, +enum reg_class { NO_REGS, FPCC_REGS, I64_REGS, GENERAL_REGS, FP_REGS, + EXTRA_FP_REGS, GENERAL_OR_FP_REGS, GENERAL_OR_EXTRA_FP_REGS, ALL_REGS, LIM_REG_CLASSES }; #define N_REG_CLASSES (int) LIM_REG_CLASSES @@ -1166,15 +1175,16 @@ enum reg_class { NO_REGS, FPCC_REGS, GENERAL_REGS, FP_REGS, EXTRA_FP_REGS, /* Give names of register classes as strings for dump file. */ #define REG_CLASS_NAMES \ - { "NO_REGS", "FPCC_REGS", "GENERAL_REGS", "FP_REGS", "EXTRA_FP_REGS", \ - "GENERAL_OR_FP_REGS", "GENERAL_OR_EXTRA_FP_REGS", "ALL_REGS" } + { "NO_REGS", "FPCC_REGS", "I64_REGS", "GENERAL_REGS", "FP_REGS", \ + "EXTRA_FP_REGS", "GENERAL_OR_FP_REGS", "GENERAL_OR_EXTRA_FP_REGS", \ + "ALL_REGS" } /* Define which registers fit in which classes. This is an initializer for a vector of HARD_REG_SET of length N_REG_CLASSES. */ #define REG_CLASS_CONTENTS \ - {{0, 0, 0, 0}, {0, 0, 0, 0xf}, \ + {{0, 0, 0, 0}, {0, 0, 0, 0xf}, {0xffff, 0, 0, 0}, \ {-1, 0, 0, 0}, {0, -1, 0, 0}, {0, -1, -1, 0}, \ {-1, -1, 0, 0}, {-1, -1, -1, 0}, {-1, -1, -1, 0x1f}} @@ -1266,15 +1276,18 @@ extern char leaf_reg_remap[]; /* Get reg_class from a letter such as appears in the machine description. In the not-v9 case, coerce v9's 'e' class to 'f', so we can use 'e' in the .md file for v8 and v9. - Use 'd' and 'b' for single precision VIS operations if TARGET_VIS. */ + 'd' and 'b' are used for single and double precision VIS operations, + if TARGET_VIS. + 'h' is used for V8+ 64 bit global and out registers. */ #define REG_CLASS_FROM_LETTER(C) \ (TARGET_V9 \ ? ((C) == 'f' ? FP_REGS \ : (C) == 'e' ? EXTRA_FP_REGS \ : (C) == 'c' ? FPCC_REGS \ - : ((C) == 'd' && TARGET_VIS) ? FP_REGS \ - : ((C) == 'b' && TARGET_VIS) ? FP_REGS \ + : ((C) == 'd' && TARGET_VIS) ? FP_REGS\ + : ((C) == 'b' && TARGET_VIS) ? EXTRA_FP_REGS\ + : ((C) == 'h' && TARGET_V8PLUS) ? I64_REGS\ : NO_REGS) \ : ((C) == 'f' ? FP_REGS \ : (C) == 'e' ? FP_REGS \ @@ -1299,6 +1312,8 @@ extern char leaf_reg_remap[]; /* 10 and 11 bit immediates are only used for a few specific insns. SMALL_INT is used throughout the port so we continue to use it. */ #define SMALL_INT(X) (SPARC_SIMM13_P (INTVAL (X))) +/* 13 bit immediate, considering only the low 32 bits */ +#define SMALL_INT32(X) (SPARC_SIMM13_P ((int)INTVAL (X) & 0xffffffff)) #define SPARC_SETHI_P(X) \ (((unsigned HOST_WIDE_INT) (X) & ~(unsigned HOST_WIDE_INT) 0xfffffc00) == 0) @@ -1366,7 +1381,7 @@ extern char leaf_reg_remap[]; #define SECONDARY_MEMORY_NEEDED_RTX(MODE) \ (get_frame_size () == 0 \ ? assign_stack_local (MODE, GET_MODE_SIZE (MODE), 0) \ - : gen_rtx (MEM, MODE, gen_rtx (PLUS, Pmode, frame_pointer_rtx, \ + : gen_rtx_MEM (MODE, gen_rtx_PLUS (Pmode, frame_pointer_rtx, \ GEN_INT (STARTING_FRAME_OFFSET)))) /* Get_secondary_mem widens it's argument to BITS_PER_WORD which loses on v9 @@ -1501,18 +1516,18 @@ extern char leaf_reg_remap[]; /* On SPARC the value is found in the first "output" register. */ #define FUNCTION_VALUE(VALTYPE, FUNC) \ - gen_rtx (REG, TYPE_MODE (VALTYPE), BASE_RETURN_VALUE_REG (TYPE_MODE (VALTYPE))) + gen_rtx_REG (TYPE_MODE (VALTYPE), BASE_RETURN_VALUE_REG (TYPE_MODE (VALTYPE))) /* But the called function leaves it in the first "input" register. */ #define FUNCTION_OUTGOING_VALUE(VALTYPE, FUNC) \ - gen_rtx (REG, TYPE_MODE (VALTYPE), BASE_OUTGOING_VALUE_REG (TYPE_MODE (VALTYPE))) + gen_rtx_REG (TYPE_MODE (VALTYPE), BASE_OUTGOING_VALUE_REG (TYPE_MODE (VALTYPE))) /* Define how to find the value returned by a library function assuming the value has mode MODE. */ #define LIBCALL_VALUE(MODE) \ - gen_rtx (REG, MODE, BASE_RETURN_VALUE_REG (MODE)) + gen_rtx_REG (MODE, BASE_RETURN_VALUE_REG (MODE)) /* 1 if N is a possible register number for a function value as seen by the caller. @@ -1615,7 +1630,7 @@ function_arg_pass_by_reference (& (CUM), (MODE), (TYPE), (NAMED)) to pad out an argument with extra space. The value should be of type `enum direction': either `upward' to pad above the argument, `downward' to pad below, or `none' to inhibit padding. */ -extern enum direction function_arg_padding (); + #define FUNCTION_ARG_PADDING(MODE, TYPE) \ function_arg_padding ((MODE), (TYPE)) @@ -1630,17 +1645,6 @@ function_arg_padding ((MODE), (TYPE)) || ((TYPE) && TYPE_ALIGN (TYPE) == 128))) \ ? 128 : PARM_BOUNDARY) -/* Initialize data used by insn expanders. This is called from - init_emit, once for each function, before code is generated. - For v9, clear the temp slot used by float/int DImode conversions. - ??? There is the 16 bytes at [%fp-16], however we'd like to delete this - space at some point. - ??? Use assign_stack_temp? */ - -extern void sparc_init_expanders (); -extern struct rtx_def *sparc64_fpconv_stack_temp (); -#define INIT_EXPANDERS sparc_init_expanders () - /* Define the information needed to generate branch and scc insns. This is stored from the compare operation. Note that we can't use "rtx" here since it hasn't been defined! */ @@ -1691,8 +1695,8 @@ do { \ extern int leaf_function; #define FUNCTION_PROLOGUE(FILE, SIZE) \ - (TARGET_FLAT ? sparc_flat_output_function_prologue (FILE, SIZE) \ - : output_function_prologue (FILE, SIZE, leaf_function)) + (TARGET_FLAT ? sparc_flat_output_function_prologue (FILE, (int)SIZE) \ + : output_function_prologue (FILE, (int)SIZE, leaf_function)) /* Output assembler code to FILE to increment profiler label # LABELNO for profiling a function entry. @@ -2070,8 +2074,8 @@ extern int current_function_outgoing_args_size; extern union tree_node *current_function_decl; #define FUNCTION_EPILOGUE(FILE, SIZE) \ - (TARGET_FLAT ? sparc_flat_output_function_epilogue (FILE, SIZE) \ - : output_function_epilogue (FILE, SIZE, leaf_function)) + (TARGET_FLAT ? sparc_flat_output_function_epilogue (FILE, (int)SIZE) \ + : output_function_epilogue (FILE, (int)SIZE, leaf_function)) #define DELAY_SLOTS_FOR_EPILOGUE \ (TARGET_FLAT ? sparc_flat_epilogue_delay_slots () : 1) @@ -2120,11 +2124,11 @@ do { \ } \ else \ { \ - ASM_OUTPUT_INT (FILE, GEN_INT (0x00000000)); \ - ASM_OUTPUT_INT (FILE, GEN_INT (0x00000000)); \ - ASM_OUTPUT_INT (FILE, GEN_INT (0x00000000)); \ + ASM_OUTPUT_INT (FILE, const0_rtx); \ + ASM_OUTPUT_INT (FILE, const0_rtx); \ + ASM_OUTPUT_INT (FILE, const0_rtx); \ ASM_OUTPUT_INT (FILE, GEN_INT (0x81C04000)); \ - ASM_OUTPUT_INT (FILE, GEN_INT (0x00000000)); \ + ASM_OUTPUT_INT (FILE, const0_rtx); \ } \ } while (0) @@ -2175,7 +2179,7 @@ extern struct rtx_def *sparc_builtin_saveregs (); that holds the dynamic chain--the previous frame's address. ??? -mflat support? */ #define DYNAMIC_CHAIN_ADDRESS(frame) \ - gen_rtx (PLUS, Pmode, frame, GEN_INT (14 * UNITS_PER_WORD)) + gen_rtx_PLUS (Pmode, frame, GEN_INT (14 * UNITS_PER_WORD)) /* The return address isn't on the stack, it is in a register, so we can't access it from the current frame pointer. We can access it from the @@ -2194,8 +2198,8 @@ extern struct rtx_def *sparc_builtin_saveregs (); returns, and +12 for structure returns. */ #define RETURN_ADDR_RTX(count, frame) \ ((count == -1) \ - ? gen_rtx (REG, Pmode, 31) \ - : gen_rtx (MEM, Pmode, \ + ? gen_rtx_REG (Pmode, 31) \ + : gen_rtx_MEM (Pmode, \ memory_address (Pmode, plus_constant (frame, 15 * UNITS_PER_WORD)))) /* Before the prologue, the return address is %o7 + 8. OK, sometimes it's @@ -2203,7 +2207,7 @@ extern struct rtx_def *sparc_builtin_saveregs (); Actually, just using %o7 is close enough for unwinding, but %o7+8 is something you can return to. */ #define INCOMING_RETURN_ADDR_RTX \ - gen_rtx (PLUS, word_mode, gen_rtx (REG, word_mode, 15), GEN_INT (8)) + gen_rtx_PLUS (word_mode, gen_rtx_REG (word_mode, 15), GEN_INT (8)) /* The offset from the incoming value of %sp to the top of the stack frame for the current function. On sparc64, we have to account for the stack @@ -2250,6 +2254,9 @@ extern struct rtx_def *sparc_builtin_saveregs (); /* 1 if X is an fp register. */ #define FP_REG_P(X) (REG_P (X) && REGNO_OK_FOR_FP_P (REGNO (X))) + +/* Is X, a REG, an in or global register? i.e. is regno 0..7 or 24..31 */ +#define IN_OR_GLOBAL_P(X) (REGNO (X) < 8 || (REGNO (X) >= 24 && REGNO (X) <= 31)) /* Maximum number of registers that can appear in a valid memory address. */ @@ -2439,30 +2446,30 @@ extern struct rtx_def *legitimize_pic_address (); #define LEGITIMIZE_ADDRESS(X,OLDX,MODE,WIN) \ { rtx sparc_x = (X); \ if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 0)) == MULT) \ - (X) = gen_rtx (PLUS, Pmode, XEXP (X, 1), \ + (X) = gen_rtx_PLUS (Pmode, XEXP (X, 1), \ force_operand (XEXP (X, 0), NULL_RTX)); \ if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 1)) == MULT) \ - (X) = gen_rtx (PLUS, Pmode, XEXP (X, 0), \ + (X) = gen_rtx_PLUS (Pmode, XEXP (X, 0), \ force_operand (XEXP (X, 1), NULL_RTX)); \ if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 0)) == PLUS) \ - (X) = gen_rtx (PLUS, Pmode, force_operand (XEXP (X, 0), NULL_RTX),\ + (X) = gen_rtx_PLUS (Pmode, force_operand (XEXP (X, 0), NULL_RTX),\ XEXP (X, 1)); \ if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 1)) == PLUS) \ - (X) = gen_rtx (PLUS, Pmode, XEXP (X, 0), \ + (X) = gen_rtx_PLUS (Pmode, XEXP (X, 0), \ force_operand (XEXP (X, 1), NULL_RTX)); \ if (sparc_x != (X) && memory_address_p (MODE, X)) \ goto WIN; \ if (flag_pic) (X) = legitimize_pic_address (X, MODE, 0); \ else if (GET_CODE (X) == PLUS && CONSTANT_ADDRESS_P (XEXP (X, 1))) \ - (X) = gen_rtx (PLUS, Pmode, XEXP (X, 0), \ + (X) = gen_rtx_PLUS (Pmode, XEXP (X, 0), \ copy_to_mode_reg (Pmode, XEXP (X, 1))); \ else if (GET_CODE (X) == PLUS && CONSTANT_ADDRESS_P (XEXP (X, 0))) \ - (X) = gen_rtx (PLUS, Pmode, XEXP (X, 1), \ + (X) = gen_rtx_PLUS (Pmode, XEXP (X, 1), \ copy_to_mode_reg (Pmode, XEXP (X, 0))); \ else if (GET_CODE (X) == SYMBOL_REF || GET_CODE (X) == CONST \ || GET_CODE (X) == LABEL_REF) \ - (X) = gen_rtx (LO_SUM, Pmode, \ - copy_to_mode_reg (Pmode, gen_rtx (HIGH, Pmode, X)), X); \ + (X) = gen_rtx_LO_SUM (Pmode, \ + copy_to_mode_reg (Pmode, gen_rtx_HIGH (Pmode, X)), X); \ if (memory_address_p (MODE, X)) \ goto WIN; } @@ -2512,7 +2519,7 @@ extern struct rtx_def *legitimize_pic_address (); /* This is how to refer to the variable errno. */ #define GEN_ERRNO_RTX \ - gen_rtx (MEM, SImode, gen_rtx (SYMBOL_REF, Pmode, "errno")) + gen_rtx_MEM (SImode, gen_rtx_SYMBOL_REF (Pmode, "errno")) #endif /* 0 */ /* Define if operations between registers always perform the operation @@ -2585,7 +2592,7 @@ extern struct rtx_def *legitimize_pic_address (); : ((GET_CODE (X) == PLUS || GET_CODE (X) == MINUS \ || GET_CODE (X) == NEG || GET_CODE (X) == ASHIFT) \ ? (TARGET_ARCH64 && GET_MODE (X) == DImode ? CCX_NOOVmode : CC_NOOVmode) \ - : (TARGET_ARCH64 && GET_MODE (X) == DImode ? CCXmode : CCmode))) + : ((TARGET_ARCH64 || TARGET_V8PLUS) && GET_MODE (X) == DImode ? CCXmode : CCmode))) /* Return non-zero if SELECT_CC_MODE will never return MODE for a floating point inequality comparison. */ @@ -2645,32 +2652,32 @@ extern struct rtx_def *legitimize_pic_address (); #define INIT_TARGET_OPTABS \ do { \ add_optab->handlers[(int) TFmode].libfunc \ - = gen_rtx (SYMBOL_REF, Pmode, ADDTF3_LIBCALL); \ + = gen_rtx_SYMBOL_REF (Pmode, ADDTF3_LIBCALL); \ sub_optab->handlers[(int) TFmode].libfunc \ - = gen_rtx (SYMBOL_REF, Pmode, SUBTF3_LIBCALL); \ + = gen_rtx_SYMBOL_REF (Pmode, SUBTF3_LIBCALL); \ neg_optab->handlers[(int) TFmode].libfunc \ - = gen_rtx (SYMBOL_REF, Pmode, NEGTF2_LIBCALL); \ + = gen_rtx_SYMBOL_REF (Pmode, NEGTF2_LIBCALL); \ smul_optab->handlers[(int) TFmode].libfunc \ - = gen_rtx (SYMBOL_REF, Pmode, MULTF3_LIBCALL); \ + = gen_rtx_SYMBOL_REF (Pmode, MULTF3_LIBCALL); \ flodiv_optab->handlers[(int) TFmode].libfunc \ - = gen_rtx (SYMBOL_REF, Pmode, DIVTF3_LIBCALL); \ - eqtf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, EQTF2_LIBCALL); \ - netf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, NETF2_LIBCALL); \ - gttf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, GTTF2_LIBCALL); \ - getf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, GETF2_LIBCALL); \ - lttf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, LTTF2_LIBCALL); \ - letf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, LETF2_LIBCALL); \ - trunctfsf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, TRUNCTFSF2_LIBCALL); \ - trunctfdf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, TRUNCTFDF2_LIBCALL); \ - extendsftf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, EXTENDSFTF2_LIBCALL); \ - extenddftf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, EXTENDDFTF2_LIBCALL); \ - floatsitf_libfunc = gen_rtx (SYMBOL_REF, Pmode, FLOATSITF2_LIBCALL); \ - fixtfsi_libfunc = gen_rtx (SYMBOL_REF, Pmode, FIX_TRUNCTFSI2_LIBCALL); \ + = gen_rtx_SYMBOL_REF (Pmode, DIVTF3_LIBCALL); \ + eqtf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, EQTF2_LIBCALL); \ + netf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, NETF2_LIBCALL); \ + gttf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, GTTF2_LIBCALL); \ + getf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, GETF2_LIBCALL); \ + lttf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, LTTF2_LIBCALL); \ + letf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, LETF2_LIBCALL); \ + trunctfsf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, TRUNCTFSF2_LIBCALL); \ + trunctfdf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, TRUNCTFDF2_LIBCALL); \ + extendsftf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, EXTENDSFTF2_LIBCALL); \ + extenddftf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, EXTENDDFTF2_LIBCALL); \ + floatsitf_libfunc = gen_rtx_SYMBOL_REF (Pmode, FLOATSITF2_LIBCALL); \ + fixtfsi_libfunc = gen_rtx_SYMBOL_REF (Pmode, FIX_TRUNCTFSI2_LIBCALL); \ fixunstfsi_libfunc \ - = gen_rtx (SYMBOL_REF, Pmode, FIXUNS_TRUNCTFSI2_LIBCALL); \ + = gen_rtx_SYMBOL_REF (Pmode, FIXUNS_TRUNCTFSI2_LIBCALL); \ if (TARGET_FPU) \ sqrt_optab->handlers[(int) TFmode].libfunc \ - = gen_rtx (SYMBOL_REF, Pmode, "_Q_sqrt"); \ + = gen_rtx_SYMBOL_REF (Pmode, "_Q_sqrt"); \ INIT_SUBTARGET_OPTABS; \ } while (0) @@ -2709,12 +2716,12 @@ extern struct rtx_def *legitimize_pic_address (); /* Compute extra cost of moving data between one register class and another. */ +#define GENERAL_OR_I64(C) ((C) == GENERAL_REGS || (C) == I64_REGS) #define REGISTER_MOVE_COST(CLASS1, CLASS2) \ - (((FP_REG_CLASS_P (CLASS1) && (CLASS2) == GENERAL_REGS) \ - || ((CLASS1) == GENERAL_REGS && FP_REG_CLASS_P (CLASS2)) \ + (((FP_REG_CLASS_P (CLASS1) && GENERAL_OR_I64 (CLASS2)) \ + || (GENERAL_OR_I64 (CLASS1) && FP_REG_CLASS_P (CLASS2)) \ || (CLASS1) == FPCC_REGS || (CLASS2) == FPCC_REGS) \ - ? (sparc_cpu == PROCESSOR_ULTRASPARC ? 12 : 6) \ - : 2) + ? (sparc_cpu == PROCESSOR_ULTRASPARC ? 12 : 6) : 2) /* Provide the costs of a rtl expression. This is in the body of a switch on CODE. The purpose for the cost of MULT is to encourage @@ -2741,20 +2748,17 @@ extern struct rtx_def *legitimize_pic_address (); /* Adjust the cost of dependencies. */ #define ADJUST_COST(INSN,LINK,DEP,COST) \ -do { \ if (sparc_cpu == PROCESSOR_SUPERSPARC) \ (COST) = supersparc_adjust_cost (INSN, LINK, DEP, COST); \ else if (sparc_cpu == PROCESSOR_ULTRASPARC) \ (COST) = ultrasparc_adjust_cost (INSN, LINK, DEP, COST); \ -} while (0) + else /* Conditional branches with empty delay slots have a length of two. */ #define ADJUST_INSN_LENGTH(INSN, LENGTH) \ -do { \ if (GET_CODE (INSN) == CALL_INSN \ || (GET_CODE (INSN) == JUMP_INSN && ! simplejump_p (insn))) \ - LENGTH += 1; \ -} while (0) + LENGTH += 1; else /* Control the assembler format that we output. */ @@ -3252,6 +3256,16 @@ extern int v9_regcmp_p (); extern unsigned long sparc_flat_compute_frame_size (); extern unsigned long sparc_type_code (); +extern char *sparc_v8plus_shift (); + +#ifdef __STDC__ +/* Function used for V8+ code generation. Returns 1 if the high + 32 bits of REG are 0 before INSN. */ +extern int sparc_check_64 (struct rtx_def *, struct rtx_def *); +extern int sparc_return_peephole_ok (struct rtx_def *, struct rtx_def *); +extern int compute_frame_size (int, int); +#endif + /* Defined in flags.h, but insn-emit.c does not include flags.h. */ extern int flag_pic; diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index ac79f68..8ef692d 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -34,7 +34,7 @@ ;; Attribute for cpu type. ;; These must match the values for enum processor_type in sparc.h. -(define_attr "cpu" "v7,cypress,v8,supersparc,sparclite,f930,f934,sparclet,tsc701,v8plus,v9,ultrasparc" +(define_attr "cpu" "v7,cypress,v8,supersparc,sparclite,f930,f934,sparclet,tsc701,v9,ultrasparc" (const (symbol_ref "sparc_cpu_attr"))) ;; Attribute for the instruction set. @@ -67,7 +67,7 @@ ;; type "call_no_delay_slot" is a call followed by an unimp instruction. (define_attr "type" - "move,unary,binary,compare,load,sload,store,ialu,shift,uncond_branch,branch,call,call_no_delay_slot,address,imul,fpload,fpstore,fp,fpmove,fpcmove,fpcmp,fpmul,fpdivs,fpdivd,fpsqrt,cmove,multi,misc" + "move,unary,binary,compare,load,sload,store,ialu,shift,uncond_branch,branch,call,call_no_delay_slot,return,address,imul,fpload,fpstore,fp,fpmove,fpcmove,fpcmp,fpmul,fpdivs,fpdivd,fpsqrt,cmove,multi,misc" (const_string "binary")) ;; Set true if insn uses call-clobbered intermediate register. @@ -110,7 +110,7 @@ ;; Attributes for instruction and branch scheduling (define_attr "in_call_delay" "false,true" - (cond [(eq_attr "type" "uncond_branch,branch,call,call_no_delay_slot,multi") + (cond [(eq_attr "type" "uncond_branch,branch,call,call_no_delay_slot,return,multi") (const_string "false") (eq_attr "type" "load,fpload,store,fpstore") (if_then_else (eq_attr "length" "1") @@ -127,6 +127,22 @@ (define_delay (eq_attr "type" "call") [(eq_attr "in_call_delay" "true") (nil) (nil)]) +(define_attr "leaf_function" "false,true" + (const (symbol_ref "leaf_function"))) + + +(define_attr "in_return_delay" "false,true" + (if_then_else (and (and (and (eq_attr "type" "move,load,sload,store,binary,ialu") + (eq_attr "length" "1")) + (eq_attr "leaf_function" "false")) + (match_insn "eligible_for_return_delay")) + (const_string "true") + (const_string "false"))) + +(define_delay (and (eq_attr "type" "return") + (eq_attr "isa" "v9")) + [(eq_attr "in_return_delay" "true") (nil) (nil)]) + ;; ??? Should implement the notion of predelay slots for floating point ;; branches. This would allow us to remove the nop always inserted before ;; a floating point branch. @@ -356,7 +372,7 @@ (define_function_unit "ieu" 1 0 (and (eq_attr "cpu" "ultrasparc") - (eq_attr "type" "ialu,shift,compare,cmove,call")) + (eq_attr "type" "ialu,binary,shift,compare,cmove,call")) 1 1) (define_function_unit "ieu_shift" 1 0 @@ -370,12 +386,15 @@ 2 1) ;; Timings; throughput/latency -;; ?? FADD 1/3 add/sub, format conv, compar, abs, neg -;; ?? FMUL 1/3 -;; ?? FDIVs 1/12 -;; ?? FDIVd 1/22 -;; ?? FSQRTs 1/12 -;; ?? FSQRTd 1/22 +;; FMOV 1/1 fmov, fabs, fneg +;; FMOVcc 1/2 +;; FADD 1/4 add/sub, format conv, compar +;; FMUL 1/4 +;; FDIVs 12/12 +;; FDIVd 22/22 +;; FSQRTs 12/12 +;; FSQRTd 22/22 +;; FCMP takes 1 cycle to branch, 2 cycles to conditional move. (define_function_unit "fadd" 1 0 (and (eq_attr "cpu" "ultrasparc") @@ -456,7 +475,7 @@ [(set (reg:CCX 100) (compare:CCX (match_operand:DI 0 "register_operand" "") (match_operand:DI 1 "arith_double_operand" "")))] - "TARGET_ARCH64" + "TARGET_ARCH64 || TARGET_V8PLUS" " { sparc_compare_op0 = operands[0]; @@ -521,6 +540,37 @@ "cmp %0,%1" [(set_attr "type" "compare")]) +(define_insn "cmpdi_v8plus" + [(set (reg:CCX 100) + (compare:CCX (match_operand:DI 0 "register_operand" "r,r,r") + (match_operand:DI 1 "arith_double_operand" "J,I,r"))) + (clobber (match_scratch:SI 2 "=&h,&h,&h")) + (clobber (match_scratch:SI 3 "=X,X,&h"))] + "TARGET_V8PLUS" + "* +{ + /* The srl can be omitted if the value in the %L0 or %L1 is already + zero extended. */ + + output_asm_insn (\"sllx %H0,32,%2\", operands); + + if (sparc_check_64 (operands[0], insn) <= 0) + output_asm_insn (\"srl %L0,0,%L0\", operands); + + switch (which_alternative) + { + case 0: + return \"orcc %L0,%2,%%g0\"; + case 1: + return \"or %L0,%2,%2\;cmp %2,%1\"; + case 2: + if (sparc_check_64 (operands[1], insn) <= 0) + output_asm_insn (\"srl %L1,0,%L1\", operands); + return \"sllx %H1,32,%3\;or %L0,%2,%2\;or %L1,%3,%3\;cmp %2,%3\"; + } +}" + [(set_attr "length" "3,4,7")]) + (define_insn "*cmpsf_fpe" [(set (match_operand:CCFPE 0 "fcc_reg_operand" "=c") (compare:CCFPE (match_operand:SF 1 "register_operand" "f") @@ -1008,7 +1058,7 @@ (const_int 0)))] "TARGET_ARCH64" "mov 0,%0\;movrnz %1,1,%0" - [(set_attr "type" "unary") + [(set_attr "type" "cmove") (set_attr "length" "2")]) (define_insn "*neg_snedi_zero" @@ -1017,7 +1067,7 @@ (const_int 0))))] "TARGET_ARCH64" "mov 0,%0\;movrnz %1,-1,%0" - [(set_attr "type" "unary") + [(set_attr "type" "cmove") (set_attr "length" "2")]) (define_insn "*snedi_zero_trunc" @@ -1026,7 +1076,7 @@ (const_int 0)))] "TARGET_ARCH64" "mov 0,%0\;movrnz %1,1,%0" - [(set_attr "type" "unary") + [(set_attr "type" "cmove") (set_attr "length" "2")]) (define_insn "*seqsi_zero" @@ -1065,7 +1115,7 @@ (const_int 0)))] "TARGET_ARCH64" "mov 0,%0\;movrz %1,1,%0" - [(set_attr "type" "unary") + [(set_attr "type" "cmove") (set_attr "length" "2")]) (define_insn "*neg_seqdi_zero" @@ -1074,7 +1124,7 @@ (const_int 0))))] "TARGET_ARCH64" "mov 0,%0\;movrz %1,-1,%0" - [(set_attr "type" "unary") + [(set_attr "type" "cmove") (set_attr "length" "2")]) (define_insn "*seqdi_zero_trunc" @@ -1083,7 +1133,7 @@ (const_int 0)))] "TARGET_ARCH64" "mov 0,%0\;movrz %1,1,%0" - [(set_attr "type" "unary") + [(set_attr "type" "cmove") (set_attr "length" "2")]) ;; We can also do (x + (i == 0)) and related, so put them in. @@ -1645,15 +1695,16 @@ [(set_attr "type" "move") (set_attr "length" "1")]) -(define_insn "get_pc_via_call" - [(set (pc) (label_ref (match_operand 0 "" ""))) - (set (reg:SI 15) (label_ref (match_operand 1 "" "")))] - "" - "call %l0%#" - [(set_attr "type" "uncond_branch")]) +(define_insn "get_pc" + [(clobber (reg:SI 15)) + (set (match_operand 0 "register_operand" "=r") + (unspec [(match_operand 1 "" "") (match_operand 2 "" "")] 2))] + "flag_pic && REGNO (operands[0]) == 23" + "sethi %%hi(%a1-4),%0\;call %a2\;add %0,%%lo(%a1+4),%0" + [(set_attr "length" "3")]) (define_insn "get_pc_via_rdpc" - [(set (match_operand:DI 0 "register_operand" "=r") (pc))] + [(set (match_operand 0 "register_operand" "=r") (pc))] "TARGET_V9" "rd %%pc,%0" [(set_attr "type" "move")]) @@ -2089,7 +2140,10 @@ "! TARGET_LIVE_G0 && (register_operand (operands[0], SImode) || register_operand (operands[1], SImode) - || operands[1] == const0_rtx)" + || operands[1] == const0_rtx) + && (GET_CODE (operands[0]) != REG || ! CONSTANT_P (operands[1]) + || REGNO (operands[0]) < 32 + || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)" "@ mov %1,%0 fmovs %1,%0 @@ -2099,7 +2153,7 @@ st %r1,%0 st %1,%0 fzeros %0" - [(set_attr "type" "move,fp,move,load,fpload,store,fpstore,fpmove") + [(set_attr "type" "move,fpmove,move,load,fpload,store,fpstore,fpmove") (set_attr "length" "1")]) (define_insn "*movsi_insn_liveg0" @@ -2141,16 +2195,20 @@ DONE; }") -;; V8+ movdi is like regular 32 bit except that a 64 bit zero can be stored -;; to aligned memory with a single instruction and the ldd/std instructions -;; are not used. -(define_insn "*movdi_v8plus" - [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,Q,r,r,f,f,Q,b") - (match_operand:DI 1 "general_operand" "r,J,r,Q,i,?f,?Q,?f,?J"))] - "TARGET_V8PLUS +;; 32 bit V9 movdi is like regular 32 bit except: a 64 bit zero can be stored +;; to aligned memory with a single instruction, the ldd/std instructions +;; are not used, and constants can not be moved to floating point registers. + +(define_insn "*movdi_sp32_v9" + [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,Q,r,r,?e,?e,?Q,?b") + (match_operand:DI 1 "general_operand" "r,J,r,Q,i,e,Q,e,J"))] + "TARGET_V9 && (register_operand (operands[0], DImode) || register_operand (operands[1], DImode) - || operands[1] == const0_rtx)" + || operands[1] == const0_rtx) + && (GET_CODE (operands[0]) != REG || ! CONSTANT_P (operands[1]) + || REGNO (operands[0]) < 32 + || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)" "* { if (which_alternative == 1) @@ -2164,13 +2222,11 @@ [(set_attr "type" "move,store,store,load,multi,fp,fpload,fpstore,fpmove") (set_attr "length" "2,1,3,3,3,2,3,3,1")]) -;; ??? The Haifa scheduler does not split instructions after reload if -;; it also ran before reload. - +;; SPARC V9 deprecates std. Split it here. (define_split [(set (match_operand:DI 0 "memory_operand" "=m") (match_operand:DI 1 "register_operand" "r"))] - "TARGET_V8PLUS && !TARGET_ARCH64 && reload_completed + "TARGET_V9 && ! TARGET_ARCH64 && reload_completed && REGNO (operands[1]) < 32 && ! MEM_VOLATILE_P (operands[0]) && offsettable_memref_p (operands[0])" [(set (match_dup 2) (match_dup 3)) @@ -2182,10 +2238,10 @@ operands[2] = copy_rtx (operands[0]); PUT_MODE (operands[2], SImode);") -(define_insn "*movdi_sp32_insn" +(define_insn "*movdi_sp32" [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,U,Q,r,r,?f,?f,?Q") (match_operand:DI 1 "general_operand" "r,U,T,r,Q,i,f,Q,f"))] - "! TARGET_ARCH64 + "! TARGET_V9 && (register_operand (operands[0], DImode) || register_operand (operands[1], DImode) || operands[1] == const0_rtx)" @@ -2207,8 +2263,8 @@ ;;; This needs the original value of operands[1], not the inverted value. (define_insn "*movdi_sp64_insn" - [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,r,r,Q,?f,?f,?Q") - (match_operand:DI 1 "move_operand" "rI,K,Q,rJ,f,Q,f"))] + [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,r,r,Q,?e,?e,?Q") + (match_operand:DI 1 "move_operand" "rI,K,Q,rJ,e,Q,e"))] "TARGET_ARCH64 && (register_operand (operands[0], DImode) || register_operand (operands[1], DImode) @@ -2693,24 +2749,22 @@ " { enum rtx_code code = GET_CODE (operands[1]); - - if (GET_MODE (sparc_compare_op0) == DImode - && ! TARGET_ARCH64) - FAIL; + enum machine_mode op0_mode = GET_MODE (sparc_compare_op0); if (sparc_compare_op1 == const0_rtx && GET_CODE (sparc_compare_op0) == REG - && GET_MODE (sparc_compare_op0) == DImode - && v9_regcmp_p (code)) + && ((TARGET_ARCH64 && op0_mode == DImode && v9_regcmp_p (code)) + || (op0_mode == SImode && v8plus_regcmp_p (code)))) { - operands[1] = gen_rtx (code, DImode, + operands[1] = gen_rtx_fmt_ee (code, op0_mode, sparc_compare_op0, sparc_compare_op1); } else { rtx cc_reg = gen_compare_reg (code, sparc_compare_op0, sparc_compare_op1); - operands[1] = gen_rtx (code, GET_MODE (cc_reg), cc_reg, const0_rtx); + operands[1] = gen_rtx_fmt_ee (code, GET_MODE (cc_reg), + cc_reg, const0_rtx); } }") @@ -2729,14 +2783,15 @@ && GET_MODE (sparc_compare_op0) == DImode && v9_regcmp_p (code)) { - operands[1] = gen_rtx (code, DImode, + operands[1] = gen_rtx_fmt_ee (code, DImode, sparc_compare_op0, sparc_compare_op1); } else { rtx cc_reg = gen_compare_reg (code, sparc_compare_op0, sparc_compare_op1); - operands[1] = gen_rtx (code, GET_MODE (cc_reg), cc_reg, const0_rtx); + operands[1] = gen_rtx_fmt_ee (code, GET_MODE (cc_reg), + cc_reg, const0_rtx); } }") @@ -2963,6 +3018,57 @@ movr%d1 %2,%r4,%0" [(set_attr "type" "cmove")]) +;; On UltraSPARC this is slightly worse than cmp/mov %icc if the register +;; needs to be zero extended but better on average. +(define_insn "*movsi_cc_reg_v8plus" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (if_then_else:SI (match_operator 1 "v8plus_regcmp_op" + [(match_operand:SI 2 "register_operand" "r,r") + (const_int 0)]) + (match_operand:SI 3 "arith10_operand" "rM,0") + (match_operand:SI 4 "arith10_operand" "0,rM")))] + "TARGET_V9" + "* +{ + if (! sparc_check_64 (operands[2], insn)) + output_asm_insn (\"srl %2,0,%2\", operands); + if (which_alternative == 0) + return \"movr%D1 %2,%r3,%0\"; + return \"movr%d1 %2,%r4,%0\"; +}" + [(set_attr "type" "cmove") + (set_attr "length" "2")]) + +;; To work well this needs to know the current insn, but that is not an +;; argument to gen_split_*. + +(define_split + [(set (match_operand:SI 0 "register_operand" "=r,r") + (if_then_else:SI (match_operator 1 "v8plus_regcmp_op" + [(match_operand:SI 2 "register_operand" "r,r") + (const_int 0)]) + (match_operand:SI 3 "arith10_operand" "rM,0") + (match_operand:SI 4 "arith10_operand" "0,rM")))] + "reload_completed" + [(set (match_dup 0) + (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 4)] 9))] + "if (! sparc_check_64 (operands[2], NULL_RTX)) + emit_insn (gen_v8plus_clear_high (operands[2], operands[2]));") + +;; A conditional move with the condition argument known to be zero extended +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI [(match_operator 1 "v8plus_regcmp_op" + [(match_operand:SI 2 "register_operand" "r,r") + (const_int 0)]) + (match_operand:SI 3 "arith10_operand" "rM,0") + (match_operand:SI 4 "arith10_operand" "0,rM")] 9))] + "TARGET_V9" + "@ + movr%D1 %2,%r3,%0 + movr%d1 %2,%r4,%0" + [(set_attr "type" "cmove")]) + ;; ??? The constraints of operands 3,4 need work. (define_insn "*movdi_cc_reg_sp64" [(set (match_operand:DI 0 "register_operand" "=r,r") @@ -3130,6 +3236,7 @@ "lduh %1,%0" [(set_attr "type" "load")]) + ;; ??? Write truncdisi pattern using sra? (define_expand "zero_extendsidi2" @@ -3148,6 +3255,20 @@ [(set_attr "type" "unary,load") (set_attr "length" "1")]) +;; Zero extend a 32 bit value in a 64 bit register. +(define_insn "v8plus_clear_high" + [(set (match_operand:SI 0 "reg_or_nonsymb_mem_operand" "=r,Q") + (unspec:SI [(match_operand:SI 1 "register_operand" "r,r")] 10))] + "TARGET_V9" + "* +if (which_alternative == 1) + return \"st %1,%0\"; +if (sparc_check_64 (operands[1], insn) > 0) + return \"nop\"; +return \"srl %1,0,%0\"; +" + [(set_attr "type" "shift,store")]) + ;; Simplify comparisons of extended values. (define_insn "*cmp_zero_extendqisi2" @@ -3480,115 +3601,25 @@ [(set_attr "type" "fp")]) ;; Now the same for 64 bit sources. -;; ??? We cannot put DImode values in fp regs (see below near fix_truncdfsi2). - -(define_expand "floatdisf2" - [(parallel [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:DI 1 "general_operand" ""))) - (clobber (match_dup 2)) - (clobber (match_dup 3))])] - "TARGET_ARCH64 && TARGET_FPU" - " -{ - operands[2] = gen_reg_rtx (DFmode); - operands[3] = sparc64_fpconv_stack_temp (); -}") - -(define_expand "floatdidf2" - [(parallel [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:DI 1 "general_operand" ""))) - (clobber (match_dup 2)) - (clobber (match_dup 3))])] - "TARGET_ARCH64 && TARGET_FPU" - " -{ - operands[2] = gen_reg_rtx (DFmode); - operands[3] = sparc64_fpconv_stack_temp (); -}") - -(define_expand "floatditf2" - [(parallel [(set (match_operand:TF 0 "register_operand" "") - (float:TF (match_operand:DI 1 "general_operand" ""))) - (clobber (match_dup 2)) - (clobber (match_dup 3))])] - "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD" - " -{ - operands[2] = gen_reg_rtx (DFmode); - operands[3] = sparc64_fpconv_stack_temp (); -}") - -(define_insn "*floatdisf2_insn" - [(parallel [(set (match_operand:SF 0 "register_operand" "=f") - (float:SF (match_operand:DI 1 "general_operand" "rm"))) - (clobber (match_operand:DF 2 "register_operand" "=&e")) - (clobber (match_operand:DI 3 "memory_operand" "m"))])] - "TARGET_ARCH64 && TARGET_FPU" - "* -{ - if (GET_CODE (operands[1]) == MEM) - output_asm_insn (\"ldd %1,%2\", operands); - else - output_asm_insn (\"stx %1,%3\;ldd %3,%2\", operands); - return \"fxtos %2,%0\"; -}" - [(set_attr "type" "fp") - (set_attr "length" "3")]) - -(define_insn "*floatdidf2_insn" - [(parallel [(set (match_operand:DF 0 "register_operand" "=e") - (float:DF (match_operand:DI 1 "general_operand" "rm"))) - (clobber (match_operand:DF 2 "register_operand" "=&e")) - (clobber (match_operand:DI 3 "memory_operand" "m"))])] - "TARGET_ARCH64 && TARGET_FPU" - "* -{ - if (GET_CODE (operands[1]) == MEM) - output_asm_insn (\"ldd %1,%2\", operands); - else - output_asm_insn (\"stx %1,%3\;ldd %3,%2\", operands); - return \"fxtod %2,%0\"; -}" - [(set_attr "type" "fp") - (set_attr "length" "3")]) -(define_insn "*floatditf2_insn" - [(parallel [(set (match_operand:TF 0 "register_operand" "=e") - (float:TF (match_operand:DI 1 "general_operand" "rm"))) - (clobber (match_operand:DF 2 "register_operand" "=&e")) - (clobber (match_operand:DI 3 "memory_operand" "m"))])] - "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD" - "* -{ - if (GET_CODE (operands[1]) == MEM) - output_asm_insn (\"ldd %1,%2\", operands); - else - output_asm_insn (\"stx %1,%3\;ldd %3,%2\", operands); - return \"fxtoq %2,%0\"; -}" - [(set_attr "type" "fp") - (set_attr "length" "3")]) - -;; ??? Ideally, these are what we would like to use. - -(define_insn "floatdisf2_sp64" +(define_insn "floatdisf2" [(set (match_operand:SF 0 "register_operand" "=f") (float:SF (match_operand:DI 1 "register_operand" "e")))] - "0 && TARGET_ARCH64 && TARGET_FPU" + "TARGET_V9 && TARGET_FPU" "fxtos %1,%0" [(set_attr "type" "fp")]) -(define_insn "floatdidf2_sp64" +(define_insn "floatdidf2" [(set (match_operand:DF 0 "register_operand" "=e") (float:DF (match_operand:DI 1 "register_operand" "e")))] - "0 && TARGET_ARCH64 && TARGET_FPU" + "TARGET_V9 && TARGET_FPU" "fxtod %1,%0" [(set_attr "type" "fp")]) -(define_insn "floatditf2_sp64" +(define_insn "floatditf2" [(set (match_operand:TF 0 "register_operand" "=e") (float:TF (match_operand:DI 1 "register_operand" "e")))] - "0 && TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD" + "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD" "fxtoq %1,%0" [(set_attr "type" "fp")]) @@ -3616,121 +3647,26 @@ "fqtoi %1,%0" [(set_attr "type" "fp")]) -;; Now the same, for 64-bit targets -;; ??? We try to work around an interesting problem. -;; If gcc tries to do a subreg on the result it will get the wrong answer: -;; "(subreg:SI (reg:DI M int-reg) 0)" is the same as -;; "(subreg:SI (reg:DI N float-reg) 1)", but gcc does not know how to change -;; the "0" to a "1". One could enhance alter_subreg but it is not clear how to -;; do this cleanly. - -(define_expand "fix_truncsfdi2" - [(parallel [(set (match_operand:DI 0 "general_operand" "") - (fix:DI (fix:SF (match_operand:SF 1 "register_operand" "")))) - (clobber (match_dup 2)) - (clobber (match_dup 3))])] - "TARGET_ARCH64 && TARGET_FPU" - " -{ - operands[2] = gen_reg_rtx (DFmode); - operands[3] = sparc64_fpconv_stack_temp (); -}") - -(define_expand "fix_truncdfdi2" - [(parallel [(set (match_operand:DI 0 "general_operand" "") - (fix:DI (fix:DF (match_operand:DF 1 "register_operand" "")))) - (clobber (match_dup 2)) - (clobber (match_dup 3))])] - "TARGET_ARCH64 && TARGET_FPU" - " -{ - operands[2] = gen_reg_rtx (DFmode); - operands[3] = sparc64_fpconv_stack_temp (); -}") +;; Now the same, for V9 targets -(define_expand "fix_trunctfdi2" - [(parallel [(set (match_operand:DI 0 "general_operand" "") - (fix:DI (fix:TF (match_operand:TF 1 "register_operand" "")))) - (clobber (match_dup 2)) - (clobber (match_dup 3))])] - "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD" - " -{ - operands[2] = gen_reg_rtx (DFmode); - operands[3] = sparc64_fpconv_stack_temp (); -}") - -(define_insn "*fix_truncsfdi2_insn" - [(parallel [(set (match_operand:DI 0 "general_operand" "=rm") - (fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f")))) - (clobber (match_operand:DF 2 "register_operand" "=&e")) - (clobber (match_operand:DI 3 "memory_operand" "m"))])] - "TARGET_ARCH64 && TARGET_FPU" - "* -{ - output_asm_insn (\"fstox %1,%2\", operands); - if (GET_CODE (operands[0]) == MEM) - return \"std %2,%0\"; - else - return \"std %2,%3\;ldx %3,%0\"; -}" - [(set_attr "type" "fp") - (set_attr "length" "3")]) - -(define_insn "*fix_truncdfdi2_insn" - [(parallel [(set (match_operand:DI 0 "general_operand" "=rm") - (fix:DI (fix:DF (match_operand:DF 1 "register_operand" "e")))) - (clobber (match_operand:DF 2 "register_operand" "=&e")) - (clobber (match_operand:DI 3 "memory_operand" "m"))])] - "TARGET_ARCH64 && TARGET_FPU" - "* -{ - output_asm_insn (\"fdtox %1,%2\", operands); - if (GET_CODE (operands[0]) == MEM) - return \"std %2,%0\"; - else - return \"std %2,%3\;ldx %3,%0\"; -}" - [(set_attr "type" "fp") - (set_attr "length" "3")]) - -(define_insn "*fix_trunctfdi2_insn" - [(parallel [(set (match_operand:DI 0 "general_operand" "=rm") - (fix:DI (fix:TF (match_operand:TF 1 "register_operand" "e")))) - (clobber (match_operand:DF 2 "register_operand" "=&e")) - (clobber (match_operand:DI 3 "memory_operand" "m"))])] - "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD" - "* -{ - output_asm_insn (\"fqtox %1,%2\", operands); - if (GET_CODE (operands[0]) == MEM) - return \"std %2,%0\"; - else - return \"std %2,%3\;ldx %3,%0\"; -}" - [(set_attr "type" "fp") - (set_attr "length" "3")]) - -;; ??? Ideally, these are what we would like to use. - -(define_insn "fix_truncsfdi2_sp64" +(define_insn "fix_truncsfdi2" [(set (match_operand:DI 0 "register_operand" "=e") (fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))] - "0 && TARGET_ARCH64 && TARGET_FPU" + "TARGET_V9 && TARGET_FPU" "fstox %1,%0" [(set_attr "type" "fp")]) -(define_insn "fix_truncdfdi2_sp64" +(define_insn "fix_truncdfdi2" [(set (match_operand:DI 0 "register_operand" "=e") (fix:DI (fix:DF (match_operand:DF 1 "register_operand" "e"))))] - "0 && TARGET_ARCH64 && TARGET_FPU" + "TARGET_V9 && TARGET_FPU" "fdtox %1,%0" [(set_attr "type" "fp")]) -(define_insn "fix_trunctfdi2_sp64" +(define_insn "fix_trunctfdi2" [(set (match_operand:DI 0 "register_operand" "=e") (fix:DI (fix:TF (match_operand:TF 1 "register_operand" "e"))))] - "0 && TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD" + "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD" "fqtox %1,%0" [(set_attr "type" "fp")]) @@ -3785,6 +3721,77 @@ }" [(set_attr "length" "2")]) + +;; Split DImode arithmetic + +(define_split + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "arith_double_operand" "%r") + (match_operand:DI 2 "arith_double_operand" "rHI"))) + (clobber (reg:SI 100))] + "! TARGET_ARCH64 && reload_completed" + [(parallel [(set (reg:CC_NOOV 100) + (compare:CC_NOOV (plus:SI (match_dup 4) + (match_dup 5)) + (const_int 0))) + (set (match_dup 3) + (plus:SI (match_dup 4) (match_dup 5)))]) + (set (match_dup 6) + (plus:SI (plus:SI (match_dup 7) + (match_dup 8)) + (ltu:SI (reg:CC_NOOV 100) (const_int 0))))] + "operands[3] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_lowpart (SImode, operands[1]); + operands[5] = gen_lowpart (SImode, operands[2]); + operands[6] = gen_highpart (SImode, operands[0]); + operands[7] = gen_highpart (SImode, operands[1]); + if (GET_CODE (operands[2]) == CONST_INT) + operands[8] = INTVAL (operands[2]) < 0 ? constm1_rtx : const0_rtx; + else + operands[8] = gen_highpart (SImode, operands[2]);") + +(define_split + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (match_operand:DI 1 "arith_double_operand" "r") + (match_operand:DI 2 "arith_double_operand" "rHI"))) + (clobber (reg:SI 100))] + "! TARGET_ARCH64 && reload_completed" + [(parallel [(set (reg:CC_NOOV 100) + (compare:CC_NOOV (minus:SI (match_dup 4) + (match_dup 5)) + (const_int 0))) + (set (match_dup 3) + (minus:SI (match_dup 4) (match_dup 5)))]) + (set (match_dup 6) + (minus:SI (minus:SI (match_dup 7) + (match_dup 8)) + (ltu:SI (reg:CC_NOOV 100) (const_int 0))))] + "operands[3] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_lowpart (SImode, operands[1]); + operands[5] = gen_lowpart (SImode, operands[2]); + operands[6] = gen_highpart (SImode, operands[0]); + operands[7] = gen_highpart (SImode, operands[1]); + operands[8] = gen_highpart (SImode, operands[2]);") + +;; LTU here means "carry set" +(define_insn "*addx" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (plus:SI (match_operand:SI 1 "arith_operand" "%r") + (match_operand:SI 2 "arith_operand" "rI")) + (ltu:SI (reg:CC_NOOV 100) (const_int 0))))] + "" + "addx %1,%2,%0" + [(set_attr "type" "unary")]) + +(define_insn "*subx" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (minus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI")) + (ltu:SI (reg:CC_NOOV 100) (const_int 0))))] + "" + "subx %1,%2,%0" + [(set_attr "type" "unary")]) + (define_insn "" [(set (match_operand:DI 0 "register_operand" "=r") (plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) @@ -3976,13 +3983,50 @@ "smul %1,%2,%0" [(set_attr "type" "imul")]) -(define_insn "muldi3" +(define_expand "muldi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (match_operand:DI 1 "arith_double_operand" "%r") + (match_operand:DI 2 "arith_double_operand" "rHI")))] + "TARGET_ARCH64 || TARGET_V8PLUS" + " +{ + if (TARGET_V8PLUS) + { + emit_insn (gen_muldi3_v8plus (operands[0], operands[1], operands[2])); + DONE; + } +}") + +(define_insn "*muldi3_sp64" [(set (match_operand:DI 0 "register_operand" "=r") (mult:DI (match_operand:DI 1 "arith_double_operand" "%r") (match_operand:DI 2 "arith_double_operand" "rHI")))] "TARGET_ARCH64" "mulx %1,%2,%0") +;; V8plus wide multiply. +(define_insn "muldi3_v8plus" + [(set (match_operand:DI 0 "register_operand" "=r,h") + (mult:DI (match_operand:DI 1 "arith_double_operand" "%r,0") + (match_operand:DI 2 "arith_double_operand" "rHI,rHI"))) + (clobber (match_scratch:SI 3 "=&h,X")) + (clobber (match_scratch:SI 4 "=&h,X"))] + "TARGET_V8PLUS" + "* +{ + if (sparc_check_64 (operands[1], insn) <= 0) + output_asm_insn (\"srl %L1,0,%L1\", operands); + if (which_alternative == 1) + output_asm_insn (\"sllx %H1,32,%H1\", operands); + if (sparc_check_64 (operands[2], insn) <= 0) + output_asm_insn (\"srl %L2,0,%L2\", operands); + if (which_alternative == 1) + return \"or %L1,%H1,%H1\;sllx %H2,32,%L1\;or %L2,%L1,%L1\;mulx %H1,%L1,%L0\;srlx %L0,32,%H0\"; + else + return \"sllx %H1,32,%3\;sllx %H2,32,%4\;or %L1,%3,%3\;or %L2,%4,%4\;mulx %3,%4,%3\;srlx %3,32,%H0\;mov %3,%L0\"; +}" + [(set_attr "length" "9,8")]) + ;; It is not known whether this will match. (define_insn "*cmp_mul_set" @@ -4010,11 +4054,35 @@ } }") +;; V9 puts the 64 bit product in a 64 bit register. Only out or global +;; registers can hold 64 bit values in the V8plus environment. +(define_insn "*mulsidi3_v8plus" + [(set (match_operand:DI 0 "register_operand" "=h,r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r")))) + (clobber (match_scratch:SI 3 "=X,&h"))] + "TARGET_V8PLUS" + "@ + smul %1,%2,%L0\;srlx %L0,32,%H0 + smul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0" + [(set_attr "length" "2,3")]) + +(define_insn "*const_mulsidi3_v8plus" + [(set (match_operand:DI 0 "register_operand" "=h,r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r")) + (match_operand:SI 2 "small_int" "I,I"))) + (clobber (match_scratch:SI 3 "=X,&h"))] + "TARGET_V8PLUS" + "@ + smul %1,%2,%L0\;srlx %L0,32,%H0 + smul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0" + [(set_attr "length" "2,3")]) + (define_insn "*mulsidi3_sp32" [(set (match_operand:DI 0 "register_operand" "=r") (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r")) (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))] - "TARGET_HARD_MUL" + "TARGET_HARD_MUL32" "* { return TARGET_SPARCLET ? \"smuld %1,%2,%L0\" : \"smul %1,%2,%L0\;rd %%y,%H0\"; @@ -4052,15 +4120,34 @@ emit_insn (gen_const_smulsi3_highpart (operands[0], operands[1], operands[2])); DONE; } + if (TARGET_V8PLUS) + { + emit_insn (gen_smulsidi3_highpart_v8plus (operands[0], operands[1], + operands[2], GEN_INT (32))); + DONE; + } }") +(define_insn "smulsidi3_highpart_v8plus" + [(set (match_operand:SI 0 "register_operand" "=h,r") + (truncate:SI + (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r"))) + (match_operand:SI 3 "const_int_operand" "i,i")))) + (clobber (match_scratch:SI 4 "=X,&h"))] + "TARGET_V8PLUS" + "@ + smul %1,%2,%0\;srlx %0,%3,%0 + smul %1,%2,%4\;srlx %4,%3,%0" + [(set_attr "length" "2")]) + (define_insn "*smulsidi3_highpart_sp32" [(set (match_operand:SI 0 "register_operand" "=r") (truncate:SI (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r")) (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))) (const_int 32))))] - "TARGET_HARD_MUL" + "TARGET_HARD_MUL32" "smul %1,%2,%%g0\;rd %%y,%0" [(set_attr "length" "2")]) @@ -4070,7 +4157,7 @@ (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r")) (match_operand:SI 2 "register_operand" "r")) (const_int 32))))] - "TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS" + "TARGET_HARD_MUL32" "smul %1,%2,%%g0\;rd %%y,%0" [(set_attr "length" "2")]) @@ -4086,13 +4173,29 @@ emit_insn (gen_const_umulsidi3 (operands[0], operands[1], operands[2])); DONE; } + if (TARGET_V8PLUS) + { + emit_insn (gen_umulsidi3_v8plus (operands[0], operands[1], operands[2])); + DONE; + } }") +(define_insn "umulsidi3_v8plus" + [(set (match_operand:DI 0 "register_operand" "=h,r") + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r")) + (zero_extend:DI (match_operand:SI 2 "register_operand" "r,r")))) + (clobber (match_scratch:SI 3 "=X,&h"))] + "TARGET_V8PLUS" + "@ + umul %1,%2,%L0\;srlx %L0,32,%H0 + umul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0" + [(set_attr "length" "2,3")]) + (define_insn "*umulsidi3_sp32" [(set (match_operand:DI 0 "register_operand" "=r") (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))] - "TARGET_HARD_MUL" + "TARGET_HARD_MUL32" "* { return TARGET_SPARCLET ? \"umuld %1,%2,%L0\" : \"umul %1,%2,%L0\;rd %%y,%H0\"; @@ -4107,7 +4210,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) (match_operand:SI 2 "uns_small_int" "")))] - "TARGET_HARD_MUL" + "TARGET_HARD_MUL32" "* { return TARGET_SPARCLET ? \"umuld %1,%2,%L0\" : \"umul %1,%2,%L0\;rd %%y,%H0\"; @@ -4116,6 +4219,17 @@ (if_then_else (eq_attr "isa" "sparclet") (const_int 1) (const_int 2)))]) +(define_insn "const_umulsidi3_v8plus" + [(set (match_operand:DI 0 "register_operand" "=h,r") + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r")) + (match_operand:SI 2 "uns_small_int" ""))) + (clobber (match_scratch:SI 3 "=X,h"))] + "TARGET_V8PLUS" + "@ + umul %1,%2,%L0\;srlx %L0,32,%H0 + umul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0" + [(set_attr "length" "2,3")]) + (define_expand "umulsi3_highpart" [(set (match_operand:SI 0 "register_operand" "") (truncate:SI @@ -4125,6 +4239,12 @@ "TARGET_HARD_MUL" " { + if (TARGET_V8PLUS) + { + emit_insn (gen_umulsidi3_highpart_v8plus (operands[0], operands[1], + operands[2], GEN_INT (32))); + DONE; + } if (CONSTANT_P (operands[2])) { emit_insn (gen_const_umulsi3_highpart (operands[0], operands[1], operands[2])); @@ -4132,13 +4252,39 @@ } }") +(define_insn "umulsidi3_highpart_v8plus" + [(set (match_operand:SI 0 "register_operand" "=h,r") + (truncate:SI + (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r")) + (zero_extend:DI (match_operand:SI 2 "register_operand" "r,r"))) + (match_operand:SI 3 "const_int_operand" "i,i")))) + (clobber (match_scratch:SI 4 "=X,h"))] + "TARGET_V8PLUS" + "@ + umul %1,%2,%0\;srlx %0,%3,%0 + umul %1,%2,%4\;srlx %4,%3,%0" + [(set_attr "length" "2")]) + +(define_insn "const_umulsi3_highpart_v8plus" + [(set (match_operand:SI 0 "register_operand" "=h,r") + (truncate:SI + (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r")) + (match_operand:SI 2 "uns_small_int" "")) + (match_operand:SI 3 "const_int_operand" "i,i")))) + (clobber (match_scratch:SI 4 "=X,h"))] + "TARGET_V8PLUS" + "@ + umul %1,%2,%0\;srlx %0,%3,%0 + umul %1,%2,%4\;srlx %4,%3,%0" + [(set_attr "length" "2")]) + (define_insn "*umulsidi3_highpart_sp32" [(set (match_operand:SI 0 "register_operand" "=r") (truncate:SI (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))) (const_int 32))))] - "TARGET_HARD_MUL" + "TARGET_HARD_MUL32" "umul %1,%2,%%g0\;rd %%y,%0" [(set_attr "length" "2")]) @@ -4148,7 +4294,7 @@ (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) (match_operand:SI 2 "uns_small_int" "")) (const_int 32))))] - "TARGET_HARD_MUL" + "TARGET_HARD_MUL32" "umul %1,%2,%%g0\;rd %%y,%0" [(set_attr "length" "2")]) @@ -4156,21 +4302,27 @@ ;; a y register write and a use of it for correct results. (define_insn "divsi3" - [(set (match_operand:SI 0 "register_operand" "=r") - (div:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "arith_operand" "rI"))) - (clobber (match_scratch:SI 3 "=&r"))] + [(set (match_operand:SI 0 "register_operand" "=r,r") + (div:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "move_operand" "rI,m"))) + (clobber (match_scratch:SI 3 "=&r,&r"))] "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS" "* { + if (which_alternative == 0) if (TARGET_V9) return \"sra %1,31,%3\;wr %%g0,%3,%%y\;sdiv %1,%2,%0\"; else return \"sra %1,31,%3\;wr %%g0,%3,%%y\;nop\;nop\;nop\;sdiv %1,%2,%0\"; + else + if (TARGET_V9) + return \"sra %1,31,%3\;wr %%g0,%3,%%y\;ld %2,%3\;sdiv %1,%3,%0\"; + else + return \"sra %1,31,%3\;wr %%g0,%3,%%y\;ld %2,%3\;nop\;nop\;sdiv %1,%3,%0\"; }" [(set (attr "length") (if_then_else (eq_attr "isa" "v9") - (const_int 3) (const_int 6)))]) + (const_int 4) (const_int 7)))]) (define_insn "divdi3" [(set (match_operand:DI 0 "register_operand" "=r") @@ -4202,19 +4354,28 @@ (const_int 3) (const_int 6)))]) (define_insn "udivsi3" - [(set (match_operand:SI 0 "register_operand" "=r") - (udiv:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "arith_operand" "rI")))] + [(set (match_operand:SI 0 "register_operand" "=r,&r,&r") + (udiv:SI (match_operand:SI 1 "reg_or_nonsymb_mem_operand" "r,r,m") + (match_operand:SI 2 "move_operand" "rI,m,r")))] "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS" "* { + output_asm_insn (\"wr %%g0,%%g0,%%y\", operands); + switch (which_alternative) + { + default: if (TARGET_V9) - return \"wr %%g0,%%g0,%%y\;udiv %1,%2,%0\"; - else - return \"wr %%g0,%%g0,%%y\;nop\;nop\;nop\;udiv %1,%2,%0\"; + return \"udiv %1,%2,%0\"; + return \"nop\;nop\;nop\;udiv %1,%2,%0\"; + case 1: + return \"ld %2,%0\;nop\;nop\;udiv %1,%0,%0\"; + case 2: + return \"ld %1,%0\;nop\;nop\;udiv %0,%2,%0\"; + } }" [(set (attr "length") - (if_then_else (eq_attr "isa" "v9") + (if_then_else (and (eq_attr "isa" "v9") + (eq_attr "alternative" "0")) (const_int 2) (const_int 5)))]) (define_insn "udivdi3" @@ -4341,13 +4502,13 @@ (match_operand:SI 2 "" ""))) (clobber (match_operand:SI 3 "register_operand" ""))] "GET_CODE (operands[2]) == CONST_INT - && !SMALL_INT (operands[2]) + && !SMALL_INT32 (operands[2]) && (INTVAL (operands[2]) & 0x3ff) == 0x3ff" [(set (match_dup 3) (match_dup 4)) (set (match_dup 0) (and:SI (not:SI (match_dup 3)) (match_dup 1)))] " { - operands[4] = GEN_INT (~INTVAL (operands[2])); + operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff); }") (define_insn "*and_not_di_sp32" @@ -4436,13 +4597,13 @@ (match_operand:SI 2 "" ""))) (clobber (match_operand:SI 3 "register_operand" ""))] "GET_CODE (operands[2]) == CONST_INT - && !SMALL_INT (operands[2]) + && !SMALL_INT32 (operands[2]) && (INTVAL (operands[2]) & 0x3ff) == 0x3ff" [(set (match_dup 3) (match_dup 4)) (set (match_dup 0) (ior:SI (not:SI (match_dup 3)) (match_dup 1)))] " { - operands[4] = GEN_INT (~INTVAL (operands[2])); + operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff); }") (define_insn "*or_not_di_sp32" @@ -4479,7 +4640,7 @@ "" "") -(define_insn "*xorsi3_sp32" +(define_insn "*xordi3_sp32" [(set (match_operand:DI 0 "register_operand" "=r,b") (xor:DI (match_operand:DI 1 "arith_double_operand" "%r,b") (match_operand:DI 2 "arith_double_operand" "rHI,b")))] @@ -4506,7 +4667,8 @@ } return \"xor %1,%2,%0\;xor %R1,%R2,%R0\"; }" - [(set_attr "length" "2,1")]) + [(set_attr "length" "2,1") + (set_attr "type" "ialu,fp")]) (define_insn "*xordi3_sp64" [(set (match_operand:DI 0 "register_operand" "=r") @@ -4531,13 +4693,13 @@ (match_operand:SI 2 "" ""))) (clobber (match_operand:SI 3 "register_operand" ""))] "GET_CODE (operands[2]) == CONST_INT - && !SMALL_INT (operands[2]) + && !SMALL_INT32 (operands[2]) && (INTVAL (operands[2]) & 0x3ff) == 0x3ff" [(set (match_dup 3) (match_dup 4)) (set (match_dup 0) (not:SI (xor:SI (match_dup 3) (match_dup 1))))] " { - operands[4] = GEN_INT (~INTVAL (operands[2])); + operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff); }") (define_split @@ -4546,13 +4708,13 @@ (match_operand:SI 2 "" "")))) (clobber (match_operand:SI 3 "register_operand" ""))] "GET_CODE (operands[2]) == CONST_INT - && !SMALL_INT (operands[2]) + && !SMALL_INT32 (operands[2]) && (INTVAL (operands[2]) & 0x3ff) == 0x3ff" [(set (match_dup 3) (match_dup 4)) (set (match_dup 0) (xor:SI (match_dup 3) (match_dup 1)))] " { - operands[4] = GEN_INT (~INTVAL (operands[2])); + operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff); }") ;; xnor patterns. Note that (a ^ ~b) == (~a ^ b) == ~(a ^ b). @@ -4849,7 +5011,7 @@ { if (which_alternative == 0) return \"xnor %1,0,%0\"; - if (which_alternative == 1) + if (which_alternative == 2) return \"fnot1s %1,%0\"; if (TARGET_LIVE_G0) output_asm_insn (\"and %%g0,0,%%g0\", operands); @@ -5138,7 +5300,23 @@ }" [(set_attr "type" "shift")]) -(define_insn "ashldi3" +(define_expand "ashldi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI")))] + "TARGET_ARCH64 || TARGET_V8PLUS" + " +{ + if (! TARGET_ARCH64) + { + if (GET_CODE (operands[2]) == CONST_INT) + FAIL; + emit_insn (gen_ashldi3_v8plus (operands[0], operands[1], operands[2])); + DONE; + } +}") + +(define_insn "" [(set (match_operand:DI 0 "register_operand" "=r") (ashift:DI (match_operand:DI 1 "register_operand" "r") (match_operand:SI 2 "arith_operand" "rI")))] @@ -5152,6 +5330,15 @@ return \"sllx %1,%2,%0\"; }") +(define_insn "ashldi3_v8plus" + [(set (match_operand:DI 0 "register_operand" "=&h,&h,r") + (ashift:DI (match_operand:DI 1 "register_operand" "r,0,r") + (match_operand:SI 2 "arith_operand" "rI,rI,rI"))) + (clobber (match_scratch:SI 3 "=X,X,&h"))] + "TARGET_V8PLUS" + "*return sparc_v8plus_shift (operands, insn, \"sllx\");" + [(set_attr "length" "5,5,6")]) + (define_insn "*cmp_cc_ashift_1" [(set (reg:CC_NOOV 100) (compare:CC_NOOV (ashift:SI (match_operand:SI 0 "register_operand" "r") @@ -5186,7 +5373,21 @@ }" [(set_attr "type" "shift")]) -(define_insn "ashrdi3" +(define_expand "ashrdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI")))] + "TARGET_ARCH64 || TARGET_V8PLUS" + " +if (! TARGET_ARCH64) + { + if (GET_CODE (operands[2]) == CONST_INT) + FAIL; /* prefer generic code in this case */ + emit_insn (gen_ashrdi3_v8plus (operands[0], operands[1], operands[2])); + DONE; + }") + +(define_insn "" [(set (match_operand:DI 0 "register_operand" "=r") (ashiftrt:DI (match_operand:DI 1 "register_operand" "r") (match_operand:SI 2 "arith_operand" "rI")))] @@ -5200,6 +5401,15 @@ return \"srax %1,%2,%0\"; }") +(define_insn "ashrdi3_v8plus" + [(set (match_operand:DI 0 "register_operand" "=&h,&h,r") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "r,0,r") + (match_operand:SI 2 "arith_operand" "rI,rI,rI"))) + (clobber (match_scratch:SI 3 "=X,X,&h"))] + "TARGET_V8PLUS" + "*return sparc_v8plus_shift (operands, insn, \"srax\");" + [(set_attr "length" "5,5,6")]) + (define_insn "lshrsi3" [(set (match_operand:SI 0 "register_operand" "=r") (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") @@ -5215,7 +5425,21 @@ }" [(set_attr "type" "shift")]) -(define_insn "lshrdi3" +(define_expand "lshrdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI")))] + "TARGET_ARCH64 || TARGET_V8PLUS" + " +if (! TARGET_ARCH64) + { + if (GET_CODE (operands[2]) == CONST_INT) + FAIL; + emit_insn (gen_lshrdi3_v8plus (operands[0], operands[1], operands[2])); + DONE; + }") + +(define_insn "" [(set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (match_operand:DI 1 "register_operand" "r") (match_operand:SI 2 "arith_operand" "rI")))] @@ -5228,6 +5452,15 @@ return \"srlx %1,%2,%0\"; }") + +(define_insn "lshrdi3_v8plus" + [(set (match_operand:DI 0 "register_operand" "=&h,&h,r") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "r,0,r") + (match_operand:SI 2 "arith_operand" "rI,rI,rI"))) + (clobber (match_scratch:SI 3 "=X,X,&h"))] + "TARGET_V8PLUS" + "*return sparc_v8plus_shift (operands, insn, \"srlx\");" + [(set_attr "length" "5,5,6")]) ;; Unconditional and other jump instructions ;; On the Sparc, by setting the annul bit on an unconditional branch, the @@ -5658,7 +5891,15 @@ (use (reg:SI 31))] "! TARGET_EPILOGUE" "* return output_return (operands);" - [(set_attr "type" "multi")]) + [(set_attr "type" "return")]) + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operand:SI 1 "arith_operand" "rI")) + (parallel [(return) + (use (reg:SI 31))])] + "sparc_return_peephole_ok (operands[0], operands[1])" + "return %%i7+8\;mov %Y1,%Y0") (define_insn "nop" [(const_int 0)] @@ -5684,10 +5925,10 @@ ;; ??? Doesn't work with -mflat. (define_expand "nonlocal_goto" - [(match_operand:SI 0 "" "") + [(match_operand:SI 0 "general_operand" "") (match_operand:SI 1 "general_operand" "") (match_operand:SI 2 "general_operand" "") - (match_operand:SI 3 "general_operand" "")] + (match_operand:SI 3 "" "")] "" " { @@ -5715,15 +5956,20 @@ and reload the appropriate value into %fp. */ emit_move_insn (frame_pointer_rtx, stack); - /* Put in the static chain register the nonlocal label address. */ - emit_move_insn (static_chain_rtx, chain); - /* USE of frame_pointer_rtx added for consistency; not clear if really needed. */ - emit_insn (gen_rtx (USE, VOIDmode, frame_pointer_rtx)); + /*emit_insn (gen_rtx (USE, VOIDmode, frame_pointer_rtx));*/ emit_insn (gen_rtx (USE, VOIDmode, stack_pointer_rtx)); - emit_insn (gen_rtx (USE, VOIDmode, static_chain_rtx)); /* Return, restoring reg window and jumping to goto handler. */ + if (TARGET_V9 && GET_CODE (chain) == CONST_INT) + { + emit_insn (gen_goto_handler_and_restore_v9 (static_chain_rtx, chain)); + emit_barrier (); + DONE; + } + /* Put in the static chain register the nonlocal label address. */ + emit_move_insn (static_chain_rtx, chain); + emit_insn (gen_rtx (USE, VOIDmode, static_chain_rtx)); emit_insn (gen_goto_handler_and_restore ()); emit_barrier (); DONE; @@ -5733,22 +5979,32 @@ (define_insn "flush_register_windows" [(unspec_volatile [(const_int 0)] 1)] "" - ;; ??? Use TARGET_V9 instead? - "* return TARGET_ARCH64 ? \"flushw\" : \"ta 3\";" + "* return TARGET_V9 ? \"flushw\" : \"ta 3\";" [(set_attr "type" "misc")]) (define_insn "goto_handler_and_restore" - [(unspec_volatile [(const_int 0)] 2) - (use (reg:SI 8))] - "" + [(unspec_volatile [(reg:SI 8)] 2)] + "! TARGET_V9" "jmp %%o0+0\;restore" [(set_attr "type" "misc") (set_attr "length" "2")]) -;; Implement setjmp. Step one, set up the buffer. +(define_insn "goto_handler_and_restore_v9" + [(unspec_volatile [(reg:SI 8) + (match_operand:SI 0 "register_operand" "=r,r") + (match_operand:SI 1 "const_int_operand" "I,n")] 3)] + "TARGET_V9" + "@ + return %%o0+0\;mov %1,%Y0 + sethi %%hi(%1),%0\;return %%o0+0\;or %Y0,%%lo(%1),%Y0" + [(set_attr "type" "misc") + (set_attr "length" "2,3")]) + +;; Pattern for use after a setjmp to store FP and the return register +;; into the stack area. -(define_expand "builtin_setjmp_setup" - [(unspec [(match_operand 0 "" "")] 3)] +(define_expand "setjmp" + [(const_int 0)] "" " { @@ -6146,54 +6402,6 @@ && ! FP_REG_P (operands[0]) && ! FP_REG_P (operands[1])" "orcc %1,0,%0") -;; Do {sign,zero}-extended compares somewhat more efficiently. -;; ??? Is this now the Right Way to do this? Or will SCRATCH -;; eventually have some impact here? - -(define_peephole - [(set (match_operand:HI 0 "register_operand" "") - (match_operand:HI 1 "memory_operand" "")) - (set (match_operand:SI 2 "register_operand" "") - (sign_extend:SI (match_dup 0))) - (set (reg:CC 100) - (compare:CC (match_dup 2) - (const_int 0)))] - "" - "ldsh %1,%0\;orcc %0,0,%2") - -(define_peephole - [(set (match_operand:HI 0 "register_operand" "") - (match_operand:HI 1 "memory_operand" "")) - (set (match_operand:DI 2 "register_operand" "") - (sign_extend:DI (match_dup 0))) - (set (reg:CCX 100) - (compare:CCX (match_dup 2) - (const_int 0)))] - "TARGET_ARCH64" - "ldsh %1,%0\;orcc %0,0,%2") - -(define_peephole - [(set (match_operand:QI 0 "register_operand" "") - (match_operand:QI 1 "memory_operand" "")) - (set (match_operand:SI 2 "register_operand" "") - (sign_extend:SI (match_dup 0))) - (set (reg:CC 100) - (compare:CC (match_dup 2) - (const_int 0)))] - "" - "ldsb %1,%0\;orcc %0,0,%2") - -(define_peephole - [(set (match_operand:QI 0 "register_operand" "") - (match_operand:QI 1 "memory_operand" "")) - (set (match_operand:DI 2 "register_operand" "") - (sign_extend:DI (match_dup 0))) - (set (reg:CCX 100) - (compare:CCX (match_dup 2) - (const_int 0)))] - "TARGET_ARCH64" - "ldsb %1,%0\;orcc %0,0,%2") - ;; Floating-point move peepholes ;; ??? v9: Do we want similar ones? @@ -6235,6 +6443,9 @@ { if (! TARGET_ARCH64 && current_function_returns_struct) return \"jmp %%i7+12\;restore %%g0,%1,%Y0\"; + else if (TARGET_V9 && (GET_CODE (operands[1]) == CONST_INT + || IN_OR_GLOBAL_P (operands[1]))) + return \"return %%i7+8\;mov %Y1,%Y0\"; else return \"ret\;restore %%g0,%1,%Y0\"; }" @@ -6249,6 +6460,9 @@ { if (! TARGET_ARCH64 && current_function_returns_struct) return \"jmp %%i7+12\;restore %%g0,%1,%Y0\"; + else if (TARGET_V9 && (GET_CODE (operands[1]) == CONST_INT + || IN_OR_GLOBAL_P (operands[1]))) + return \"return %%i7+8\;mov %Y1,%Y0\"; else return \"ret\;restore %%g0,%1,%Y0\"; }" @@ -6263,6 +6477,9 @@ { if (! TARGET_ARCH64 && current_function_returns_struct) return \"jmp %%i7+12\;restore %%g0,%1,%Y0\"; + else if (TARGET_V9 && (GET_CODE (operands[1]) == CONST_INT + || IN_OR_GLOBAL_P (operands[1]))) + return \"return %%i7+8\;mov %Y1,%Y0\"; else return \"ret\;restore %%g0,%1,%Y0\"; }" @@ -6280,6 +6497,8 @@ { if (! TARGET_ARCH64 && current_function_returns_struct) return \"jmp %%i7+12\;restore %%g0,%1,%Y0\"; + else if (TARGET_V9 && IN_OR_GLOBAL_P (operands[1])) + return \"return %%i7+8\;mov %Y1,%Y0\"; else return \"ret\;restore %%g0,%1,%Y0\"; }" @@ -6287,16 +6506,19 @@ (define_insn "*return_addsi" [(set (match_operand:SI 0 "restore_operand" "") - (plus:SI (match_operand:SI 1 "arith_operand" "%r") + (plus:SI (match_operand:SI 1 "register_operand" "r") (match_operand:SI 2 "arith_operand" "rI"))) (return)] - "! TARGET_EPILOGUE && ! TARGET_LIVE_G0 - && (register_operand (operands[1], SImode) - || register_operand (operands[2], SImode))" + "! TARGET_EPILOGUE && ! TARGET_LIVE_G0" "* { if (! TARGET_ARCH64 && current_function_returns_struct) return \"jmp %%i7+12\;restore %r1,%2,%Y0\"; + /* If operands are global or in registers, can use return */ + else if (TARGET_V9 && IN_OR_GLOBAL_P (operands[1]) + && (GET_CODE (operands[2]) == CONST_INT + || IN_OR_GLOBAL_P (operands[2]))) + return \"return %%i7+8\;add %Y1,%Y2,%Y0\"; else return \"ret\;restore %r1,%2,%Y0\"; }" -- 2.7.4