From c65ebc55cab8f22f366038c7536b30c37d71837f Mon Sep 17 00:00:00 2001 From: Jim Wilson Date: Thu, 9 Mar 2000 00:26:04 +0000 Subject: [PATCH] Add ia64 port. From-SVN: r32438 --- gcc/ChangeLog | 6 + gcc/config/ia64/build.hacks | 97 ++ gcc/config/ia64/crtbegin.asm | 154 ++ gcc/config/ia64/crtend.asm | 91 ++ gcc/config/ia64/elf.h | 18 + gcc/config/ia64/ia64-protos.h | 101 ++ gcc/config/ia64/ia64.c | 3279 +++++++++++++++++++++++++++++++++++++ gcc/config/ia64/ia64.h | 2795 ++++++++++++++++++++++++++++++++ gcc/config/ia64/ia64.md | 3591 +++++++++++++++++++++++++++++++++++++++++ gcc/config/ia64/ia64intrin.h | 60 + gcc/config/ia64/lib1funcs.asm | 635 ++++++++ gcc/config/ia64/linux.h | 29 + gcc/config/ia64/sysv4.h | 248 +++ gcc/config/ia64/t-ia64 | 41 + gcc/config/ia64/xm-ia64.h | 61 + gcc/configure | 21 + gcc/configure.in | 21 + 17 files changed, 11248 insertions(+) create mode 100644 gcc/config/ia64/build.hacks create mode 100644 gcc/config/ia64/crtbegin.asm create mode 100644 gcc/config/ia64/crtend.asm create mode 100644 gcc/config/ia64/elf.h create mode 100644 gcc/config/ia64/ia64-protos.h create mode 100644 gcc/config/ia64/ia64.c create mode 100644 gcc/config/ia64/ia64.h create mode 100644 gcc/config/ia64/ia64.md create mode 100644 gcc/config/ia64/ia64intrin.h create mode 100644 gcc/config/ia64/lib1funcs.asm create mode 100644 gcc/config/ia64/linux.h create mode 100644 gcc/config/ia64/sysv4.h create mode 100644 gcc/config/ia64/t-ia64 create mode 100644 gcc/config/ia64/xm-ia64.h diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 89c56a5..5e2f966 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +Wed Mar 8 16:19:42 2000 Jim Wilson + + * configure.in (ia64*-*-elf*, ia64*-*-linux*): New. + * configure: Regenerate. + * config/ia64: New. + 2000-03-08 Zack Weinberg * Makefile.in (LIBCPP_DEPS): New macro. diff --git a/gcc/config/ia64/build.hacks b/gcc/config/ia64/build.hacks new file mode 100644 index 0000000..5da0d83 --- /dev/null +++ b/gcc/config/ia64/build.hacks @@ -0,0 +1,97 @@ +The gcse.c patch fixes an optimization problem. This is probably not the right +solution, but it was quick. I will replace with a better solution later. + +The libio/libstdc++ patches are useful if you have a version of glibc without +thread support. There is no official ia64 glibc version yet, and some of the +unofficial ones in common use are missing thread support. libio/libstdc++ +assume that glibc always has thread support, so we need to patch them until +the official ia64 glibc is available. + +Index: gcc/gcse.c +=================================================================== +RCS file: /cvs/cvsfiles/devo/gcc/gcse.c,v +retrieving revision 1.87 +diff -p -r1.87 gcse.c +*** gcse.c 2000/01/11 14:59:28 1.87 +--- gcse.c 2000/02/16 04:17:06 +*************** try_replace_reg (from, to, insn) +*** 4039,4045 **** + information. */ + if (!success && !note) + { +! if (!set) + return 0; + note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, + copy_rtx (SET_SRC (set)), +--- 4039,4048 ---- + information. */ + if (!success && !note) + { +! /* Don't add a REG_EQUAL note for a CCmode destination, because this +! confuses the code in cse.c that simplifies compare and branch +! instructions. */ +! if (!set || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) == MODE_CC) + return 0; + note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, + copy_rtx (SET_SRC (set)), +Index: libio/configure.in +=================================================================== +RCS file: /cvs/cvsfiles/devo/libio/configure.in,v +retrieving revision 1.57 +diff -p -r1.57 configure.in +*** configure.in 1999/10/26 03:42:26 1.57 +--- configure.in 2000/02/16 04:17:56 +*************** case "${target}" in +*** 57,62 **** +--- 57,64 ---- + frags="linux.mt linuxaxp1.mt mtsafe.mt" ;; + *-linux-gnulibc1) + frags=linuxlibc1.mt ;; ++ # ??? glibc does not have thread support yet, so we can't use mtsafe.mt. ++ ia64*-linux-gnu) frags="linux.mt" ;; + *-linux-gnu) frags="linux.mt mtsafe.mt" ;; + *-sco3.2v[45]*) frags=sco4.mt ;; + *-isc*) frags=isc.mt ;; +Index: libstdc++/configure.in +=================================================================== +RCS file: /cvs/cvsfiles/devo/libstdc++/configure.in,v +retrieving revision 1.46 +diff -p -r1.46 configure.in +*** configure.in 1999/09/21 19:26:16 1.46 +--- configure.in 2000/02/16 04:17:57 +*************** fi +*** 89,94 **** +--- 89,96 ---- + case "${target}" in + alpha*-*-linux-gnulibc1) frags="${frags} linux.mt" ;; + powerpc*-*-linux-gnulibc1) frags="${frags} linux.mt" ;; ++ # ??? ia64 glibc port does not have thread support yet. ++ ia64*-*-linux-gnu) ;; + *-*-linux-gnu) frags="${frags} linux.mt" ;; + *-*-openbsd*) + case "x${enable_threads}" in +Index: libstdc++/stl/stl_config.h +=================================================================== +RCS file: /cvs/cvsfiles/devo/libstdc++/stl/stl_config.h,v +retrieving revision 1.17 +diff -p -r1.17 stl_config.h +*** stl_config.h 1999/12/24 16:21:31 1.17 +--- stl_config.h 2000/02/16 04:17:58 +*************** +*** 168,174 **** + # if defined(__linux__) + /* glibc pre 2.0 is very buggy. We have to disable thread for it. + It should be upgraded to glibc 2.0 or later. */ +! # if !defined(_NOTHREADS) && __GLIBC__ >= 2 && defined(_G_USING_THUNKS) + # define __STL_PTHREADS + # ifdef __STRICT_ANSI__ + /* Work around a bug in the glibc 2.0.x pthread.h. */ +--- 168,175 ---- + # if defined(__linux__) + /* glibc pre 2.0 is very buggy. We have to disable thread for it. + It should be upgraded to glibc 2.0 or later. */ +! /* ??? ia64 glibc port does not have thread support yet. */ +! # if !defined(_NOTHREADS) && __GLIBC__ >= 2 && defined(_G_USING_THUNKS) && !defined(__ia64__) + # define __STL_PTHREADS + # ifdef __STRICT_ANSI__ + /* Work around a bug in the glibc 2.0.x pthread.h. */ diff --git a/gcc/config/ia64/crtbegin.asm b/gcc/config/ia64/crtbegin.asm new file mode 100644 index 0000000..b77ad98 --- /dev/null +++ b/gcc/config/ia64/crtbegin.asm @@ -0,0 +1,154 @@ +/* Copyright (C) 2000 Free Software Foundation, Inc. + Contributed by Jes Sorensen, + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +.section .ctors,"aw","progbits" + .align 8 +__CTOR_LIST__: + data8 -1 + +.section .dtors,"aw","progbits" + .align 8 +__DTOR_LIST__: + data8 -1 + +.section .sdata + .type dtor_ptr#,@object + .size dtor_ptr#,8 +dtor_ptr: + data8 __DTOR_LIST__# + 8 + + .global __dso_handle# + .type __dso_handle#,@object + .size __dso_handle#,8 +#ifdef SHARED +.section .data +__dso_handle: + data8 __dso_handle# +#else +.section .bss +__dso_handle: + data8 0 +#endif + +/* + * Fragment of the ELF _fini routine that invokes our dtor cleanup. + * + * The code going into .fini is spread all over the place, thus we need + * to save gp in order to make sure that other bits don't get into any + * nasty surprises by expecting a gp that has suddenly changed. + */ +.section .fini,"ax","progbits" + { .mfb + st8 [r12] = gp, -16 + br.call.sptk.many b0 = __do_global_dtors_aux# + ;; + } + { .mmi + adds r12 = 16, r12 + ;; + ld8 gp = [r12] + ;; + } + +.text + + .align 16 + .proc __do_global_dtors_aux# + +__do_global_dtors_aux: +#ifndef SHARED + { .mii + alloc loc2 = ar.pfs, 0, 3, 0, 0 + addl loc0 = @gprel(dtor_ptr#), gp + mov loc1 = b0 + } +#else + /* + if (__cxa_finalize) + __cxa_finalize(__dso_handle) + */ + { .mii + alloc loc2 = ar.pfs, 1, 3, 0, 0 + addl loc0 = @gprel(dtor_ptr#), gp + addl r16 = @ltoff(@fptr(__cxa_finalize#)), gp + ;; + } + { .mmi + ld8 r16 = [r16] + ;; + addl r32 = @ltoff(__dso_handle#), gp + cmp.ne p7, p0 = r0, r16 + ;; + } + { .mmi + ld8 r32 = [r32] +(p7) ld8 r18 = [r16], 8 + mov loc1 = b0 + ;; + } + { .mib +(p7) ld8 gp = [r16] +(p7) mov b6 = r18 +(p7) br.call.sptk.many b0 = b6 + } +#endif + /* + do { + dtor_ptr++; + (*(dtor_ptr-1)) (); + } while (dtor_ptr); + */ + { .bbb + br.sptk.few 1f + ;; + } +0: + { .mmi + st8 [loc0] = r15 + ld8 r17 = [r16], 8 + ;; + } + { .mib + ld8 gp = [r16] + mov b6 = r17 + br.call.sptk.many b0 = b6 + } +1: + { .mmi + ld8 r15 = [loc0] + ;; + ld8 r16 = [r15], 8 + ;; + } + { .mfb + cmp.ne p6, p0 = r0, r16 +(p6) br.cond.sptk.few 0b + } + { .mii + mov b0 = loc1 + mov ar.pfs = loc2 + } + { .bbb + br.ret.sptk.many b0 + ;; + } + .endp __do_global_dtors_aux# + +#ifdef SHARED +.weak __cxa_finalize# +#endif diff --git a/gcc/config/ia64/crtend.asm b/gcc/config/ia64/crtend.asm new file mode 100644 index 0000000..3da0f96 --- /dev/null +++ b/gcc/config/ia64/crtend.asm @@ -0,0 +1,91 @@ +/* Copyright (C) 2000 Free Software Foundation, Inc. + Contributed by Jes Sorensen, + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +.section .ctors,"aw","progbits" + .align 8 +__CTOR_END__: + data8 0 + +.section .dtors,"aw","progbits" + .align 8 +__DTOR_END__: + data8 0 + +/* + * Fragment of the ELF _init routine that invokes our dtor cleanup. + * + * The code going into .init is spread all over the place, thus we need + * to save gp in order to make sure that other bits don't get into any + * nasty surprises by expecting a gp that has suddenly changed. + */ +.section .init,"ax","progbits" + { .mfb + st8 [r12] = gp, -16 + br.call.sptk.many b0 = __do_global_ctors_aux + ;; + } + { .mmi + adds r12 = 16, r12 + ;; + ld8 gp = [r12] + ;; + } + +.text + .align 16 + .proc __do_global_ctors_aux# +__do_global_ctors_aux: + /* + for (loc0 = __CTOR_END__-1; *p != -1; --p) + (*p) (); + */ + { .mii + alloc loc2 = ar.pfs, 0, 4, 0, 0 + addl loc0 = @ltoff(__CTOR_END__# - 8), gp + cmp.ne p6, p0 = r0, r0 + ;; + } + { .mfi + ld8 loc0 = [loc0] + mov loc1 = b0 + } +0: + { .mmi +(p6) ld8 r15 = [loc3], 8 + ;; +(p6) ld8 gp = [loc3] +(p6) mov b6 = r15 + } + { .mfb + ld8 loc3 = [loc0], -8 +(p6) br.call.sptk.many b0 = b6 + ;; + } + { .mfb + cmp.ne p6, p0 = -1, loc3 +(p6) br.cond.sptk.few 0b + } + { .mii + mov ar.pfs = loc2 + mov b0 = loc1 + } + { .bbb + br.ret.sptk.many b0 + ;; + } + .endp __do_global_ctors_aux# diff --git a/gcc/config/ia64/elf.h b/gcc/config/ia64/elf.h new file mode 100644 index 0000000..b6c4198 --- /dev/null +++ b/gcc/config/ia64/elf.h @@ -0,0 +1,18 @@ +/* Definitions for embedded ia64-elf target. */ + +#include "ia64/ia64.h" +#include "elfos.h" +#include "sysv4.h" + +/* svr4.h links with crti.o/crtn.o, but elfos.h does not. We override elfos.h + so that we can use the standard ELF Unix method. */ +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "crtend.o%s crtn.o%s" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "%{!shared: \ + %{!symbolic: \ + %{pg:gcrt0.o%s}%{!pg:%{p:mcrt0.o%s}%{!p:crt0.o%s}}}}\ + crti.o%s crtbegin.o%s" + +/* End of elf.h */ diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h new file mode 100644 index 0000000..c2d06a5 --- /dev/null +++ b/gcc/config/ia64/ia64-protos.h @@ -0,0 +1,101 @@ +/* Definitions of target machine for GNU compiler for IA-64. + Copyright (C) 1999 Cygnus Solutions. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* Variables defined in ia64.c. */ + +#ifdef RTX_CODE +extern rtx ia64_compare_op0, ia64_compare_op1; +#endif + +/* Functions defined in ia64.c */ + +#ifdef RTX_CODE +extern int call_operand PARAMS((rtx, enum machine_mode)); +extern int sdata_symbolic_operand PARAMS((rtx, enum machine_mode)); +extern int symbolic_operand PARAMS((rtx, enum machine_mode)); +extern int function_operand PARAMS((rtx, enum machine_mode)); +extern int setjmp_operand PARAMS((rtx, enum machine_mode)); +extern int move_operand PARAMS((rtx, enum machine_mode)); +extern int reg_or_0_operand PARAMS((rtx, enum machine_mode)); +extern int reg_or_6bit_operand PARAMS((rtx, enum machine_mode)); +extern int reg_or_8bit_operand PARAMS((rtx, enum machine_mode)); +extern int reg_or_8bit_adjusted_operand PARAMS((rtx, enum machine_mode)); +extern int reg_or_8bit_and_adjusted_operand PARAMS((rtx, enum machine_mode)); +extern int reg_or_14bit_operand PARAMS((rtx, enum machine_mode)); +extern int reg_or_22bit_operand PARAMS((rtx, enum machine_mode)); +extern int shift_count_operand PARAMS((rtx, enum machine_mode)); +extern int shift_32bit_count_operand PARAMS((rtx, enum machine_mode)); +extern int shladd_operand PARAMS((rtx, enum machine_mode)); +extern int reg_or_fp01_operand PARAMS((rtx, enum machine_mode)); +extern int normal_comparison_operator PARAMS((rtx, enum machine_mode)); +extern int adjusted_comparison_operator PARAMS((rtx, enum machine_mode)); +extern int call_multiple_values_operation PARAMS((rtx, enum machine_mode)); +#endif +extern int ia64_rap_fp_offset PARAMS((void)); +extern unsigned int ia64_compute_frame_size PARAMS((int)); +extern void save_restore_insns PARAMS((int)); +extern void ia64_expand_prologue PARAMS((void)); +extern void ia64_expand_epilogue PARAMS((void)); +extern void ia64_function_prologue PARAMS((FILE *, int)); +extern void ia64_funtion_epilogue PARAMS((FILE *, int)); +extern int ia64_direct_return PARAMS((void)); +#ifdef TREE_CODE +extern void ia64_setup_incoming_varargs PARAMS((CUMULATIVE_ARGS, int, tree, + int *, int)); +#ifdef RTX_CODE +extern rtx ia64_function_arg PARAMS((CUMULATIVE_ARGS *, enum machine_mode, + tree, int, int)); +extern void ia64_init_builtins PARAMS((void)); +extern rtx ia64_expand_builtin PARAMS((tree, rtx, rtx, enum machine_mode, int)); +#endif +extern int ia64_function_arg_partial_nregs PARAMS((CUMULATIVE_ARGS *, + enum machine_mode, + tree, int)); +extern void ia64_function_arg_advance PARAMS((CUMULATIVE_ARGS *, + enum machine_mode, + tree, int)); +#ifdef RTX_CODE +extern void ia64_va_start PARAMS((int, tree, rtx)); +extern rtx ia64_va_arg PARAMS((tree, tree)); +#endif +extern int ia64_return_in_memory PARAMS((tree)); +#ifdef RTX_CODE +extern rtx ia64_function_value PARAMS((tree, tree)); +#endif +#endif +#ifdef RTX_CODE +extern void ia64_print_operand_address PARAMS((FILE *, rtx)); +extern void ia64_print_operand PARAMS((FILE *, rtx, int)); +extern enum reg_class ia64_secondary_reload_class PARAMS((enum reg_class, + enum machine_mode, + rtx)); +#endif +#ifdef TREE_CODE +extern void ia64_asm_output_external PARAMS((FILE *, tree, char *)); +#endif +extern void ia64_override_options PARAMS((void)); +#ifdef RTX_CODE +extern void ia64_reorg PARAMS((rtx)); +#endif +extern int ia64_epilogue_uses PARAMS((int)); +#ifdef TREE_CODE +extern int ia64_valid_type_attribute PARAMS((tree, tree, tree, tree)); +extern void ia64_encode_section_info PARAMS((tree)); +#endif diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c new file mode 100644 index 0000000..f7e9ebd --- /dev/null +++ b/gcc/config/ia64/ia64.c @@ -0,0 +1,3279 @@ +/* Definitions of target machine for GNU compiler. + Copyright (C) 1999 Cygnus Solutions. + Contributed by James E. Wilson and + David Mosberger . + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#include +#include +#include "config.h" +#include "rtl.h" +#include "tree.h" +#include "tm_p.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "real.h" +#include "insn-config.h" +#include "conditions.h" +#include "insn-flags.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "recog.h" +#include "expr.h" +#include "obstack.h" +#include "except.h" +#include "function.h" +#include "ggc.h" +#include "basic-block.h" + +/* This is used for communication between ASM_OUTPUT_LABEL and + ASM_OUTPUT_LABELREF. */ +int ia64_asm_output_label = 0; + +/* Define the information needed to generate branch and scc insns. This is + stored from the compare operation. */ +struct rtx_def * ia64_compare_op0; +struct rtx_def * ia64_compare_op1; + +/* Register number where ar.pfs was saved in the prologue, or zero + if it was not saved. */ + +int ia64_arpfs_regno; + +/* Register number where rp was saved in the prologue, or zero if it was + not saved. */ + +int ia64_rp_regno; + +/* Register number where frame pointer was saved in the prologue, or zero + if it was not saved. */ + +int ia64_fp_regno; + +/* Number of input and local registers used. This is needed for the .regstk + directive, and also for debugging info. */ + +int ia64_input_regs; +int ia64_local_regs; + +/* If true, then we must emit a .regstk directive. */ + +int ia64_need_regstk; + +/* Register names for ia64_expand_prologue. */ +char *ia64_reg_numbers[96] = +{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", + "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", + "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", + "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", + "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71", + "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79", + "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87", + "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95", + "r96", "r97", "r98", "r99", "r100","r101","r102","r103", + "r104","r105","r106","r107","r108","r109","r110","r111", + "r112","r113","r114","r115","r116","r117","r118","r119", + "r120","r121","r122","r123","r124","r125","r126","r127"}; + +/* ??? These strings could be shared with REGISTER_NAMES. */ +char *ia64_input_reg_names[8] = +{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" }; + +/* ??? These strings could be shared with REGISTER_NAMES. */ +char *ia64_local_reg_names[80] = +{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7", + "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15", + "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23", + "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31", + "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39", + "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47", + "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55", + "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63", + "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71", + "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" }; + +/* ??? These strings could be shared with REGISTER_NAMES. */ +char *ia64_output_reg_names[8] = +{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" }; + +/* String used with the -mfixed-range= option. */ +const char *ia64_fixed_range_string; + +/* Variables which are this size or smaller are put in the sdata/sbss + sections. */ + +int ia64_section_threshold; + +/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */ + +int +call_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (mode != GET_MODE (op)) + return 0; + + return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG + || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG)); +} + +/* Return 1 if OP refers to a symbol in the sdata section. */ + +int +sdata_symbolic_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + switch (GET_CODE (op)) + { + case SYMBOL_REF: + return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR; + + case CONST: + return (GET_CODE (XEXP (op, 0)) == PLUS + && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF + && XSTR (XEXP (XEXP (op, 0), 0), 0)[0] == SDATA_NAME_FLAG_CHAR); + default: + break; + } + + return 0; +} + +/* Return 1 if OP refers to a symbol. */ + +int +symbolic_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + switch (GET_CODE (op)) + { + case CONST: + case SYMBOL_REF: + case LABEL_REF: + return 1; + + default: + break; + } + return 0; +} + +/* Return 1 if OP refers to a function. */ + +int +function_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op)) + return 1; + else + return 0; +} + +/* Return 1 if OP is setjmp or a similar function. */ + +/* ??? This is an unsatisfying solution. Should rethink. */ + +int +setjmp_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + char *name; + int retval = 0; + + if (GET_CODE (op) != SYMBOL_REF) + return 0; + + name = XSTR (op, 0); + + /* The following code is borrowed from special_function_p in calls.c. */ + + /* Disregard prefix _, __ or __x. */ + if (name[0] == '_') + { + if (name[1] == '_' && name[2] == 'x') + name += 3; + else if (name[1] == '_') + name += 2; + else + name += 1; + } + + if (name[0] == 's') + { + retval + = ((name[1] == 'e' + && (! strcmp (name, "setjmp") + || ! strcmp (name, "setjmp_syscall"))) + || (name[1] == 'i' + && ! strcmp (name, "sigsetjmp")) + || (name[1] == 'a' + && ! strcmp (name, "savectx"))); + } + else if ((name[0] == 'q' && name[1] == 's' + && ! strcmp (name, "qsetjmp")) + || (name[0] == 'v' && name[1] == 'f' + && ! strcmp (name, "vfork"))) + retval = 1; + + return retval; +} + +/* Return 1 if OP is a general operand, but when pic exclude symbolic + operands. */ + +/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF + from PREDICATE_CODES. */ + +int +move_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (! TARGET_NO_PIC && symbolic_operand (op, mode)) + return 0; + + return general_operand (op, mode); +} + +/* Return 1 if OP is a register operand, or zero. */ + +int +reg_or_0_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return (op == const0_rtx || register_operand (op, mode)); +} + +/* Return 1 if OP is a register operand, or a 6 bit immediate operand. */ + +int +reg_or_6bit_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || register_operand (op, mode)); +} + +/* Return 1 if OP is a register operand, or an 8 bit immediate operand. */ + +int +reg_or_8bit_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || register_operand (op, mode)); +} + +/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate + operand. */ + +int +reg_or_8bit_adjusted_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || register_operand (op, mode)); +} + +/* Return 1 if OP is a register operand, or is valid for both an 8 bit + immediate and an 8 bit adjusted immediate operand. This is necessary + because when we emit a compare, we don't know what the condition will be, + so we need the union of the immediates accepted by GT and LT. */ + +int +reg_or_8bit_and_adjusted_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)) + && CONST_OK_FOR_L (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || register_operand (op, mode)); +} + +/* Return 1 if OP is a register operand, or a 14 bit immediate operand. */ + +int +reg_or_14bit_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || register_operand (op, mode)); +} + +/* Return 1 if OP is a register operand, or a 22 bit immediate operand. */ + +int +reg_or_22bit_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || register_operand (op, mode)); +} + +/* Return 1 if OP is a 6 bit immediate operand. */ + +int +shift_count_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX); +} + +/* Return 1 if OP is a 5 bit immediate operand. */ + +int +shift_32bit_count_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT + && (INTVAL (op) >= 0 && INTVAL (op) < 32)) + || GET_CODE (op) == CONSTANT_P_RTX); +} + +/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */ + +int +shladd_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return (GET_CODE (op) == CONST_INT + && (INTVAL (op) == 2 || INTVAL (op) == 4 + || INTVAL (op) == 8 || INTVAL (op) == 16)); +} + +/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */ + +int +fetchadd_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return (GET_CODE (op) == CONST_INT + && (INTVAL (op) == -16 || INTVAL (op) == -8 || + INTVAL (op) == -4 || INTVAL (op) == -1 || + INTVAL (op) == 1 || INTVAL (op) == 4 || + INTVAL (op) == 8 || INTVAL (op) == 16)); +} + +/* Return 1 if OP is a floating-point constant zero, one, or a register. */ + +int +reg_or_fp01_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op)) + || GET_CODE (op) == CONSTANT_P_RTX + || register_operand (op, mode)); +} + +/* Return 1 if this is a comparison operator, which accepts an normal 8-bit + signed immediate operand. */ + +int +normal_comparison_operator (op, mode) + register rtx op; + enum machine_mode mode; +{ + enum rtx_code code = GET_CODE (op); + return ((mode == VOIDmode || GET_MODE (op) == mode) + && (code == EQ || code == NE + || code == GT || code == LE || code == GTU || code == LEU)); +} + +/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit + signed immediate operand. */ + +int +adjusted_comparison_operator (op, mode) + register rtx op; + enum machine_mode mode; +{ + enum rtx_code code = GET_CODE (op); + return ((mode == VOIDmode || GET_MODE (op) == mode) + && (code == LT || code == GE || code == LTU || code == GEU)); +} + +/* Return 1 if OP is a call returning an HFA. It is known to be a PARALLEL + and the first section has already been tested. */ + +int +call_multiple_values_operation (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + int count = XVECLEN (op, 0) - 2; + int i; + int dest_regno; + + /* Perform a quick check so we don't block up below. */ + if (count <= 1 + || GET_CODE (XVECEXP (op, 0, 0)) != SET + || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG + || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != CALL) + return 0; + + dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0))); + + for (i = 1; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i + 2); + + if (GET_CODE (elt) != SET + || GET_CODE (SET_SRC (elt)) != CALL + || GET_CODE (SET_DEST (elt)) != REG + || REGNO (SET_DEST (elt)) != dest_regno + i) + return 0; + } + + return 1; +} + + +/* Structure to be filled in by ia64_compute_frame_size with register + save masks and offsets for the current function. */ + +struct ia64_frame_info +{ + long total_size; /* # bytes that the entire frame takes up. */ + long var_size; /* # bytes that variables take up. */ + long args_size; /* # bytes that outgoing arguments take up. */ + long pretend_size; /* # bytes that stdarg arguments take up. */ + long pretend_pad_size; /* # bytes padding to align stdarg args. */ + long extra_size; /* # bytes of extra gunk. */ + long gr_size; /* # bytes needed to store general regs. */ + long fr_size; /* # bytes needed to store FP regs. */ + long fr_pad_size; /* # bytes needed to align FP save area. */ + long pr_size; /* # bytes needed to store predicate regs. */ + long br_size; /* # bytes needed to store branch regs. */ + HARD_REG_SET mask; /* mask of saved registers. */ + int initialized; /* != 0 is frame size already calculated. */ +}; + +/* Current frame information calculated by compute_frame_size. */ +struct ia64_frame_info current_frame_info; + +/* Helper function for INITIAL_ELIMINATION_OFFSET. Return the offset from the + frame pointer where b0 is saved. */ + +int +ia64_rap_fp_offset () +{ + return - current_frame_info.br_size; +} + +/* Returns the number of bytes offset between the frame pointer and the stack + pointer for the current function. SIZE is the number of bytes of space + needed for local variables. */ +unsigned int +ia64_compute_frame_size (size) + int size; +{ + int total_size; + int extra_size; + int gr_size = 0; + int fr_size = 0; + int fr_pad_size = 0; + int pr_size = 0; + int br_size = 0; + int pretend_pad_size = 0; + int tmp; + int regno; + HARD_REG_SET mask; + + CLEAR_HARD_REG_SET (mask); + + /* Calculate space needed for general registers. */ + /* We never need to save any of the stacked registers, which are regs + 32 to 127. */ + for (regno = GR_REG (0); regno <= GR_REG (31); regno++) + if (regs_ever_live[regno] && ! call_used_regs[regno]) + { + SET_HARD_REG_BIT (mask, regno); + gr_size += 8; + } + + /* Allocate space to save/restore the unat from. */ + if (gr_size != 0 + || current_function_varargs || current_function_stdarg) + gr_size += 8; + + /* Calculate space needed for FP registers. */ + for (regno = FR_REG (0); regno <= FR_REG (127); regno++) + if (regs_ever_live[regno] && ! call_used_regs[regno]) + { + SET_HARD_REG_BIT (mask, regno); + fr_size += 16; + } + + /* Calculate space needed for predicate registers. */ + for (regno = PR_REG (0); regno <= PR_REG (63); regno++) + if (regs_ever_live[regno] && ! call_used_regs[regno]) + { + SET_HARD_REG_BIT (mask, regno); + pr_size = 8; + } + + /* Calculate space needed for branch registers. */ + for (regno = BR_REG (0); regno <= BR_REG (7); regno++) + if (regs_ever_live[regno] && ! call_used_regs[regno]) + { + SET_HARD_REG_BIT (mask, regno); + br_size += 8; + } + + /* The FR save area needs to be 16-byte aligned. */ + if (fr_size) + { + tmp = (size + fr_size + pr_size + br_size); + fr_pad_size = IA64_STACK_ALIGN (tmp) - tmp; + } + else + fr_pad_size = 0; + + /* If we have an odd number of words of pretend arguments written to the + stack, then the FR save area will be unaligned. We pad below this area + to keep things 16 byte aligned. This needs to be kept distinct, to + avoid confusing it with padding added below the GR save area, which does + not affect the FR area alignment. */ + pretend_pad_size = current_function_pretend_args_size % 16; + + /* The 16 bytes is for the scratch area. */ + tmp = (size + gr_size + fr_pad_size + fr_size + pr_size + br_size + + current_function_outgoing_args_size + 16); + tmp += (current_function_pretend_args_size + ? current_function_pretend_args_size - 16 + : 0) + pretend_pad_size; + total_size = IA64_STACK_ALIGN (tmp); + extra_size = total_size - tmp + 16; + + /* If this is a leaf routine (BR_REG (0) is not live), and if there is no + stack space needed for register saves, then don't allocate the 16 byte + scratch area. */ + if (total_size == 16 && ! regs_ever_live[BR_REG (0)]) + { + total_size = 0; + extra_size = 0; + } + + current_frame_info.total_size = total_size; + current_frame_info.var_size = size; + current_frame_info.args_size = current_function_outgoing_args_size; + current_frame_info.pretend_size + = (current_function_pretend_args_size + ? current_function_pretend_args_size - 16 + : 0); + current_frame_info.pretend_pad_size = pretend_pad_size; + current_frame_info.extra_size = extra_size; + current_frame_info.gr_size = gr_size; + current_frame_info.fr_size = fr_size; + current_frame_info.fr_pad_size = fr_pad_size; + current_frame_info.pr_size = pr_size; + current_frame_info.br_size = br_size; + COPY_HARD_REG_SET (current_frame_info.mask, mask); + current_frame_info.initialized = reload_completed; + + return total_size; +} + +void +save_restore_insns (save_p) + int save_p; +{ + rtx insn; + + if (current_frame_info.gr_size + current_frame_info.fr_size + + current_frame_info.br_size + current_frame_info.pr_size) + { + rtx tmp_reg = gen_rtx_REG (DImode, GR_REG (2)); + rtx tmp_post_inc = gen_rtx_POST_INC (DImode, tmp_reg); + rtx tmp2_reg = gen_rtx_REG (DImode, GR_REG (3)); + int offset = (current_frame_info.total_size + - (current_frame_info.gr_size + current_frame_info.fr_size + + current_frame_info.fr_pad_size + + current_frame_info.br_size + + current_frame_info.pr_size + + current_frame_info.var_size + + current_frame_info.pretend_size + + current_frame_info.pretend_pad_size)); + rtx offset_rtx; + int regno; + + /* If there is a frame pointer, then we use it instead of the stack + pointer, so that the stack pointer does not need to be valid when + the epilogue starts. See EXIT_IGNORE_STACK. */ + if (frame_pointer_needed) + offset = offset - current_frame_info.total_size; + + if (CONST_OK_FOR_I (offset)) + offset_rtx = GEN_INT (offset); + else + { + offset_rtx = tmp_reg; + insn = emit_insn (gen_movdi (tmp_reg, GEN_INT (offset))); + RTX_FRAME_RELATED_P (insn) = 1; + } + insn = emit_insn (gen_adddi3 (tmp_reg, + (frame_pointer_needed ? frame_pointer_rtx + : stack_pointer_rtx), + offset_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + + /* Must save/restore ar.unat if any GR is spilled/restored. */ + if (current_frame_info.gr_size != 0 + || current_function_varargs || current_function_stdarg) + { + rtx mem = gen_rtx_MEM (DImode, tmp_post_inc); + if (save_p) + { + insn = emit_insn (gen_unat_spill (tmp2_reg)); + RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (gen_movdi (mem, tmp2_reg)); + RTX_FRAME_RELATED_P (insn) = 1; + } + else + { + insn = emit_insn (gen_movdi (tmp2_reg, mem)); + RTX_FRAME_RELATED_P (insn) = 1; + /* The restore happens after the last ld8.fill instruction. */ + } + } + + for (regno = GR_REG (0); regno <= GR_REG (127); regno++) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + rtx mem = gen_rtx_MEM (DImode, tmp_post_inc); + if (save_p) + insn = emit_insn (gen_gr_spill (mem, + gen_rtx_REG (DImode, regno))); + else + insn = emit_insn (gen_gr_restore (gen_rtx_REG (DImode, regno), + mem)); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* Now restore the unat register if necessary. */ + if ((current_frame_info.gr_size != 0 + || current_function_varargs || current_function_stdarg) + && ! save_p) + emit_insn (gen_unat_restore (tmp2_reg)); + + for (regno = FR_REG (0); regno <= FR_REG (127); regno++) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + rtx mem = gen_rtx_MEM (XFmode, tmp_post_inc); + if (save_p) + insn = emit_insn (gen_fr_spill (mem, + gen_rtx_REG (XFmode, regno))); + else + insn = emit_insn (gen_fr_restore (gen_rtx_REG (XFmode, regno), + mem)); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* If one is used, we save/restore all of them. */ + for (regno = PR_REG (0); regno <= PR_REG (63); regno++) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + rtx mem = gen_rtx_MEM (DImode, tmp_post_inc); + if (save_p) + { + insn = emit_insn (gen_pr_spill (tmp2_reg)); + RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (gen_movdi (mem, tmp2_reg)); + RTX_FRAME_RELATED_P (insn) = 1; + } + else + { + insn = emit_insn (gen_movdi (tmp2_reg, mem)); + RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (gen_pr_restore (tmp2_reg)); + RTX_FRAME_RELATED_P (insn) = 1; + } + break; + } + + for (regno = BR_REG (0); regno <= BR_REG (7); regno++) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + rtx src, dest; + + if (save_p) + { + src = gen_rtx_REG (DImode, regno); + dest = gen_rtx_MEM (DImode, tmp_post_inc); + } + else + { + src = gen_rtx_MEM (DImode, tmp_post_inc); + dest = gen_rtx_REG (DImode, regno); + } + + insn = emit_insn (gen_movdi (tmp2_reg, src)); + RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (gen_movdi (dest, tmp2_reg)); + RTX_FRAME_RELATED_P (insn) = 1; + } + } +} + + +/* Called after register allocation to add any instructions needed for the + prologue. Using a prologue insn is favored compared to putting all of the + instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler + to intermix instructions with the saves of the caller saved registers. In + some cases, it might be necessary to emit a barrier instruction as the last + insn to prevent such scheduling. + + Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1 + so that the debug info generation code can handle them properly. */ + +/* ??? Get inefficient code when the frame size is larger than can fit in an + adds instruction. */ + +/* ??? Add support for allocating temporaries from the output registers if + they do not need to live past call instructions. */ + +/* ??? If the function does not return, then we don't need to save the rp + and ar.pfs registers. */ + +/* ??? If this is a leaf function, then fp/rp/ar.pfs should be put in the + low 32 regs. */ + +/* ??? Should not reserve a local register for rp/ar.pfs. Should + instead check to see if any local registers are unused, and if so, + allocate them to rp/ar.pfs in that order. Not sure what to do about + fp, we may still need to reserve a local register for it. */ + +void +ia64_expand_prologue () +{ + rtx insn, offset; + int i, locals, inputs, outputs, rotates; + int frame_size = ia64_compute_frame_size (get_frame_size ()); + int leaf_function; + int epilogue_p; + edge e; + + /* ??? This seems like a leaf_function_p bug. It calls get_insns which + returns the first insn of the current sequence, not the first insn + of the function. We work around this by pushing to the topmost + sequence first. */ + push_topmost_sequence (); + leaf_function = leaf_function_p (); + pop_topmost_sequence (); + + /* ??? If there is no epilogue, then we don't need some prologue insns. We + need to avoid emitting the dead prologue insns, because flow will complain + about them. */ + if (optimize) + { + for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next) + if ((e->flags & EDGE_FAKE) == 0 + && (e->flags & EDGE_FALLTHRU) != 0) + break; + epilogue_p = (e != NULL); + } + else + epilogue_p = 1; + + /* Find the highest local register used. */ + /* We have only 80 local registers, because we reserve 8 for the inputs + and 8 for the outputs. */ + + for (i = LOC_REG (79); i >= LOC_REG (0); i--) + if (regs_ever_live[i]) + break; + locals = i - LOC_REG (0) + 1; + + /* Likewise for inputs. */ + + for (i = IN_REG (7); i >= IN_REG (0); i--) + if (regs_ever_live[i]) + break; + inputs = i - IN_REG (0) + 1; + +#if 0 + /* If the function was declared with syscall_linkage, then we may need to + preserve all declared input registers, even if they weren't used. + Currently, syscall_linkage does not have this effect. */ + + if (lookup_attribute ("syscall_linkage", + TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) + inputs = MAX (inputs, current_function_args_info.words); +#endif + + /* Likewise for outputs. */ + + for (i = OUT_REG (7); i >= OUT_REG (0); i--) + if (regs_ever_live[i]) + break; + outputs = i - OUT_REG (0) + 1; + + /* When -p profiling, we need one output register for the mcount argument. + Likwise for -a profiling for the bb_init_func argument. For -ax + profiling, we need two output registers for the two bb_init_trace_func + arguments. */ + if (profile_flag || profile_block_flag == 1) + outputs = MAX (outputs, 1); + else if (profile_block_flag == 2) + outputs = MAX (outputs, 2); + + /* Leaf functions should not use any output registers. */ + if (leaf_function && outputs != 0) + abort (); + + /* No rotating register support as yet. */ + + rotates = 0; + + /* Allocate two extra locals for saving/restoring rp and ar.pfs. Also + allocate one local for use as the frame pointer if frame_pointer_needed + is true. */ + locals += 2 + frame_pointer_needed; + + /* Save these values in global registers for debugging info. */ + ia64_input_regs = inputs; + ia64_local_regs = locals; + + /* Set the local, input, and output register names. We need to do this + for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in + half. If we use in/loc/out register names, then we get assembler errors + in crtn.S because there is no alloc insn or regstk directive in there. + We give in/loc/out names to unused registers, to make invalid uses of + them easy to spot. */ + if (! TARGET_REG_NAMES) + { + for (i = 0; i < 8; i++) + { + if (i < inputs) + reg_names[IN_REG (i)] = ia64_reg_numbers[i]; + else + reg_names[IN_REG (i)] = ia64_input_reg_names[i]; + } + for (i = 0; i < 80; i++) + { + if (i < locals) + reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i]; + else + reg_names[LOC_REG (i)] = ia64_local_reg_names[i]; + } + for (i = 0; i < 8; i++) + { + if (i < outputs) + reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i]; + else + reg_names[OUT_REG (i)] = ia64_output_reg_names[i]; + } + } + + /* Set the frame pointer register name now that it is known, and the + local register names are known. */ + if (frame_pointer_needed) + { + reg_names[FRAME_POINTER_REGNUM] + = reg_names[LOC_REG (locals - 3)]; + ia64_fp_regno = LOC_REG (inputs + locals - 3); + } + else + ia64_fp_regno = 0; + + /* We don't need an alloc instruction if this is a leaf function, and the + locals and outputs are both zero sized. Since we have already allocated + two locals for rp and ar.pfs, we check for two locals. */ + if (locals == 2 && outputs == 0 && leaf_function) + { + /* If there is no alloc, but there are input registers used, then we + need a .regstk directive. */ + if (TARGET_REG_NAMES) + ia64_need_regstk = 1; + else + ia64_need_regstk = 0; + + ia64_arpfs_regno = 0; + ia64_rp_regno = 0; + } + else + { + ia64_need_regstk = 0; + + ia64_arpfs_regno = LOC_REG (locals - 1); + ia64_rp_regno = LOC_REG (locals - 2); + reg_names[RETURN_ADDRESS_REGNUM] = reg_names[ia64_rp_regno]; + + emit_insn (gen_alloc (gen_rtx_REG (DImode, ia64_arpfs_regno), + GEN_INT (inputs), GEN_INT (locals), + GEN_INT (outputs), GEN_INT (rotates))); + + /* ??? FIXME ??? We don't need to save BR_REG (0) if this is a leaf + function. We also don't need to allocate a local reg for it then. */ + /* ??? Likewise if there is no epilogue. */ + if (epilogue_p) + emit_move_insn (gen_rtx_REG (DImode, ia64_rp_regno), + gen_rtx_REG (DImode, BR_REG (0))); + } + + /* Set up frame pointer and stack pointer. */ + if (frame_pointer_needed) + { + insn = emit_insn (gen_movdi (hard_frame_pointer_rtx, stack_pointer_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + } + if (frame_size != 0) + { + if (CONST_OK_FOR_I (-frame_size)) + offset = GEN_INT (-frame_size); + else + { + offset = gen_rtx_REG (DImode, GR_REG (2)); + insn = emit_insn (gen_movdi (offset, GEN_INT (-frame_size))); + RTX_FRAME_RELATED_P (insn) = 1; + } + /* If there is a frame pointer, then we need to make the stack pointer + decrement depend on the frame pointer, so that the stack pointer + update won't be moved past fp-relative stores to the frame. */ + if (frame_pointer_needed) + insn = emit_insn (gen_prologue_allocate_stack (stack_pointer_rtx, + stack_pointer_rtx, + offset, + hard_frame_pointer_rtx)); + else + insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, + offset)); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* Save registers to frame. */ + save_restore_insns (1); +} + +/* Called after register allocation to add any instructions needed for the + epilogue. Using a epilogue insn is favored compared to putting all of the + instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler + to intermix instructions with the saves of the caller saved registers. In + some cases, it might be necessary to emit a barrier instruction as the last + insn to prevent such scheduling. */ + +void +ia64_expand_epilogue () +{ + /* Restore registers from frame. */ + save_restore_insns (0); + + /* ??? The gen_epilogue_deallocate_stack call below does not work. This + is mainly because there is no fp+offset addressing mode, so most loads + from the frame do not actually use the frame pointer; they use a pseudo + computed from the frame pointer. The same problem exists with the + stack pointer when there is no frame pointer. I think this can be + fixed only by making the dependency analysis code in sched smarter, so + that it recognizes references to the frame, and makes succeeding stack + pointer updates anti-dependent on them. */ + emit_insn (gen_blockage ()); + + if (frame_pointer_needed) + { + /* If there is a frame pointer, then we need to make the stack pointer + restore depend on the frame pointer, so that the stack pointer + restore won't be moved up past fp-relative loads from the frame. */ + emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx, + hard_frame_pointer_rtx)); + } + else + { + int frame_size = current_frame_info.total_size; + rtx offset; + + if (frame_size != 0) + { + if (CONST_OK_FOR_I (frame_size)) + offset = GEN_INT (frame_size); + else + { + offset = gen_rtx_REG (DImode, GR_REG (2)); + emit_insn (gen_movdi (offset, GEN_INT (frame_size))); + } + emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, + offset)); + } + } + + if (ia64_arpfs_regno) + emit_insn (gen_pfs_restore (gen_rtx_REG (DImode, ia64_arpfs_regno))); + + if (ia64_rp_regno) + emit_move_insn (gen_rtx_REG (DImode, BR_REG (0)), + gen_rtx_REG (DImode, ia64_rp_regno)); + + emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0)))); +} + +/* Emit the function prologue. */ + +void +ia64_function_prologue (file, size) + FILE *file; + int size; +{ + if (ia64_need_regstk) + fprintf (file, "\t.regstk %d, 0, 0, 0\n", ia64_input_regs); + + /* ??? Emit .body directive. GNU as ignores .body currently. */ +} + +/* Emit the function epilogue. */ + +void +ia64_function_epilogue (file, size) + FILE *file; + int size; +{ +} + +/* Return 1 if br.ret can do all the work required to return from a + function. */ + +int +ia64_direct_return () +{ + return (reload_completed && ! frame_pointer_needed + && ia64_compute_frame_size (get_frame_size ())); +} + + +/* Do any needed setup for a variadic function. CUM has not been updated + for the last named argument which has type TYPE and mode MODE. */ +void +ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time) + CUMULATIVE_ARGS cum; + int int_mode; + tree type; + int * pretend_size; + int second_time; +{ + /* If this is a stdarg function, then don't save the current argument. */ + int offset = ! current_function_varargs; + + if (cum.words < MAX_ARGUMENT_SLOTS) + { + if (! second_time) + { + int i; + int first_reg = GR_ARG_FIRST + cum.words + offset; + rtx tmp_reg = gen_rtx_REG (DImode, GR_REG (16)); + rtx tmp_post_inc = gen_rtx_POST_INC (DImode, tmp_reg); + rtx mem = gen_rtx_MEM (DImode, tmp_post_inc); + rtx insn; + + /* We must emit st8.spill insns instead of st8 because we might + be saving non-argument registers, and non-argument registers might + not contain valid values. */ + emit_move_insn (tmp_reg, virtual_incoming_args_rtx); + for (i = first_reg; i < GR_ARG_FIRST + 8; i++) + { + insn = emit_insn (gen_gr_spill (mem, gen_rtx_REG (DImode, i))); + REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC, tmp_reg, 0); + } + } + *pretend_size = ((MAX_ARGUMENT_SLOTS - cum.words - offset) + * UNITS_PER_WORD); + } +} + +/* Check whether TYPE is a homogeneous floating point aggregate. If + it is, return the mode of the floating point type that appears + in all leafs. If it is not, return VOIDmode. + + An aggregate is a homogeneous floating point aggregate is if all + fields/elements in it have the same floating point type (e.g, + SFmode). 128-bit quad-precision floats are excluded. */ + +static enum machine_mode +hfa_element_mode (type, nested) + tree type; + int nested; +{ + enum machine_mode element_mode = VOIDmode; + enum machine_mode mode; + enum tree_code code = TREE_CODE (type); + int know_element_mode = 0; + tree t; + + switch (code) + { + case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE: + case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE: + case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE: + case FILE_TYPE: case SET_TYPE: case LANG_TYPE: + case FUNCTION_TYPE: + return VOIDmode; + + /* Fortran complex types are supposed to be HFAs, so we need to handle + gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex + types though. */ + case COMPLEX_TYPE: + if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT) + return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type)) + * BITS_PER_UNIT, MODE_FLOAT, 0); + else + return VOIDmode; + + case REAL_TYPE: + /* We want to return VOIDmode for raw REAL_TYPEs, but the actual + mode if this is contained within an aggregate. */ + if (nested) + return TYPE_MODE (type); + else + return VOIDmode; + + case ARRAY_TYPE: + return TYPE_MODE (TREE_TYPE (type)); + + case RECORD_TYPE: + case UNION_TYPE: + case QUAL_UNION_TYPE: + for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t)) + { + if (TREE_CODE (t) != FIELD_DECL) + continue; + + mode = hfa_element_mode (TREE_TYPE (t), 1); + if (know_element_mode) + { + if (mode != element_mode) + return VOIDmode; + } + else if (GET_MODE_CLASS (mode) != MODE_FLOAT) + return VOIDmode; + else + { + know_element_mode = 1; + element_mode = mode; + } + } + return element_mode; + + default: + /* If we reach here, we probably have some front-end specific type + that the backend doesn't know about. This can happen via the + aggregate_value_p call in init_function_start. All we can do is + ignore unknown tree types. */ + return VOIDmode; + } + + return VOIDmode; +} + +/* Return rtx for register where argument is passed, or zero if it is passed + on the stack. */ + +/* ??? 128-bit quad-precision floats are always passed in general + registers. */ + +rtx +ia64_function_arg (cum, mode, type, named, incoming) + CUMULATIVE_ARGS *cum; + enum machine_mode mode; + tree type; + int named; + int incoming; +{ + int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST); + int words = (((mode == BLKmode ? int_size_in_bytes (type) + : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) + / UNITS_PER_WORD); + int offset = 0; + enum machine_mode hfa_mode = VOIDmode; + + /* Arguments larger than 8 bytes start at the next even boundary. */ + if (words > 1 && (cum->words & 1)) + offset = 1; + + /* If all argument slots are used, then it must go on the stack. */ + if (cum->words + offset >= MAX_ARGUMENT_SLOTS) + return 0; + + /* Check for and handle homogeneous FP aggregates. */ + if (type) + hfa_mode = hfa_element_mode (type, 0); + + /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas + and unprototyped hfas are passed specially. */ + if (hfa_mode != VOIDmode && (! cum->prototype || named)) + { + rtx loc[16]; + int i = 0; + int fp_regs = cum->fp_regs; + int int_regs = cum->words + offset; + int hfa_size = GET_MODE_SIZE (hfa_mode); + int byte_size; + int args_byte_size; + + /* If prototyped, pass it in FR regs then GR regs. + If not prototyped, pass it in both FR and GR regs. + + If this is an SFmode aggregate, then it is possible to run out of + FR regs while GR regs are still left. In that case, we pass the + remaining part in the GR regs. */ + + /* Fill the FP regs. We do this always. We stop if we reach the end + of the argument, the last FP register, or the last argument slot. */ + + byte_size = ((mode == BLKmode) + ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); + args_byte_size = int_regs * UNITS_PER_WORD; + offset = 0; + for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS + && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++) + { + loc[i] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (hfa_mode, (FR_ARG_FIRST + + fp_regs)), + GEN_INT (offset)); + /* ??? Padding for XFmode type? */ + offset += hfa_size; + args_byte_size += hfa_size; + fp_regs++; + } + + /* If no prototype, then the whole thing must go in GR regs. */ + if (! cum->prototype) + offset = 0; + /* If this is an SFmode aggregate, then we might have some left over + that needs to go in GR regs. */ + else if (byte_size != offset) + int_regs += offset / UNITS_PER_WORD; + + /* Fill in the GR regs. We must use DImode here, not the hfa mode. */ + + for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++) + { + enum machine_mode gr_mode = DImode; + + /* If we have an odd 4 byte hunk because we ran out of FR regs, + then this goes in a GR reg left adjusted/little endian, right + adjusted/big endian. */ + /* ??? Currently this is handled wrong, because 4-byte hunks are + always right adjusted/little endian. */ + if (offset & 0x4) + gr_mode = SImode; + /* If we have an even 4 byte hunk because the aggregate is a + multiple of 4 bytes in size, then this goes in a GR reg right + adjusted/little endian. */ + else if (byte_size - offset == 4) + gr_mode = SImode; + + loc[i] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (gr_mode, (basereg + + int_regs)), + GEN_INT (offset)); + offset += GET_MODE_SIZE (gr_mode); + int_regs++; + } + + /* If we ended up using just one location, just return that one loc. */ + if (i == 1) + return XEXP (loc[0], 0); + else + return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); + } + + /* Integral and aggregates go in general registers. If we have run out of + FR registers, then FP values must also go in general registers. This can + happen when we have a SFmode HFA. */ + else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS) + return gen_rtx_REG (mode, basereg + cum->words + offset); + + /* If there is a prototype, then FP values go in a FR register when + named, and in a GR registeer when unnamed. */ + else if (cum->prototype) + { + if (! named) + return gen_rtx_REG (mode, basereg + cum->words + offset); + else + return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs); + } + /* If there is no prototype, then FP values go in both FR and GR + registers. */ + else + { + rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode, (FR_ARG_FIRST + + cum->fp_regs)), + const0_rtx); + rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode, + (basereg + cum->words + + offset)), + const0_rtx); + + return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg)); + } +} + +/* Return number of words, at the beginning of the argument, that must be + put in registers. 0 is the argument is entirely in registers or entirely + in memory. */ + +int +ia64_function_arg_partial_nregs (cum, mode, type, named) + CUMULATIVE_ARGS *cum; + enum machine_mode mode; + tree type; + int named; +{ + int words = (((mode == BLKmode ? int_size_in_bytes (type) + : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) + / UNITS_PER_WORD); + int offset = 0; + + /* Arguments larger than 8 bytes start at the next even boundary. */ + if (words > 1 && (cum->words & 1)) + offset = 1; + + /* If all argument slots are used, then it must go on the stack. */ + if (cum->words + offset >= MAX_ARGUMENT_SLOTS) + return 0; + + /* It doesn't matter whether the argument goes in FR or GR regs. If + it fits within the 8 argument slots, then it goes entirely in + registers. If it extends past the last argument slot, then the rest + goes on the stack. */ + + if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS) + return 0; + + return MAX_ARGUMENT_SLOTS - cum->words - offset; +} + +/* Update CUM to point after this argument. This is patterned after + ia64_function_arg. */ + +void +ia64_function_arg_advance (cum, mode, type, named) + CUMULATIVE_ARGS *cum; + enum machine_mode mode; + tree type; + int named; +{ + int words = (((mode == BLKmode ? int_size_in_bytes (type) + : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) + / UNITS_PER_WORD); + int offset = 0; + enum machine_mode hfa_mode = VOIDmode; + + /* If all arg slots are already full, then there is nothing to do. */ + if (cum->words >= MAX_ARGUMENT_SLOTS) + return; + + /* Arguments larger than 8 bytes start at the next even boundary. */ + if (words > 1 && (cum->words & 1)) + offset = 1; + + cum->words += words + offset; + + /* Check for and handle homogeneous FP aggregates. */ + if (type) + hfa_mode = hfa_element_mode (type, 0); + + /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas + and unprototyped hfas are passed specially. */ + if (hfa_mode != VOIDmode && (! cum->prototype || named)) + { + int fp_regs = cum->fp_regs; + /* This is the original value of cum->words + offset. */ + int int_regs = cum->words - words; + int hfa_size = GET_MODE_SIZE (hfa_mode); + int byte_size; + int args_byte_size; + + /* If prototyped, pass it in FR regs then GR regs. + If not prototyped, pass it in both FR and GR regs. + + If this is an SFmode aggregate, then it is possible to run out of + FR regs while GR regs are still left. In that case, we pass the + remaining part in the GR regs. */ + + /* Fill the FP regs. We do this always. We stop if we reach the end + of the argument, the last FP register, or the last argument slot. */ + + byte_size = ((mode == BLKmode) + ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); + args_byte_size = int_regs * UNITS_PER_WORD; + offset = 0; + for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS + && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));) + { + /* ??? Padding for XFmode type? */ + offset += hfa_size; + args_byte_size += hfa_size; + fp_regs++; + } + + cum->fp_regs = fp_regs; + } + + /* Integral and aggregates go in general registers. If we have run out of + FR registers, then FP values must also go in general registers. This can + happen when we have a SFmode HFA. */ + else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS) + return; + + /* If there is a prototype, then FP values go in a FR register when + named, and in a GR registeer when unnamed. */ + else if (cum->prototype) + { + if (! named) + return; + else + /* ??? Complex types should not reach here. */ + cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); + } + /* If there is no prototype, then FP values go in both FR and GR + registers. */ + else + /* ??? Complex types should not reach here. */ + cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); + + return; +} + +/* Implement va_start. */ + +void +ia64_va_start (stdarg_p, valist, nextarg) + int stdarg_p; + tree valist; + rtx nextarg; +{ + int arg_words; + int ofs; + + arg_words = current_function_args_info.words; + + if (stdarg_p) + ofs = 0; + else + ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0); + + nextarg = plus_constant (nextarg, ofs); + std_expand_builtin_va_start (1, valist, nextarg); +} + +/* Implement va_arg. */ + +rtx +ia64_va_arg (valist, type) + tree valist, type; +{ + HOST_WIDE_INT size; + tree t; + + /* Arguments larger than 8 bytes are 16 byte aligned. */ + size = int_size_in_bytes (type); + if (size > UNITS_PER_WORD) + { + t = build (PLUS_EXPR, TREE_TYPE (valist), valist, + build_int_2 (2 * UNITS_PER_WORD - 1, 0)); + t = build (BIT_AND_EXPR, TREE_TYPE (t), t, + build_int_2 (-2 * UNITS_PER_WORD, -1)); + t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } + + return std_expand_builtin_va_arg (valist, type); +} + +/* Return 1 if function return value returned in memory. Return 0 if it is + in a register. */ + +int +ia64_return_in_memory (valtype) + tree valtype; +{ + enum machine_mode mode; + enum machine_mode hfa_mode; + int byte_size; + + mode = TYPE_MODE (valtype); + byte_size = ((mode == BLKmode) + ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode)); + + /* Hfa's with up to 8 elements are returned in the FP argument registers. */ + + hfa_mode = hfa_element_mode (valtype, 0); + if (hfa_mode != VOIDmode) + { + int hfa_size = GET_MODE_SIZE (hfa_mode); + + /* ??? Padding for XFmode type? */ + if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS) + return 1; + else + return 0; + } + + else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS) + return 1; + else + return 0; +} + +/* Return rtx for register that holds the function return value. */ + +rtx +ia64_function_value (valtype, func) + tree valtype; + tree func; +{ + enum machine_mode mode; + enum machine_mode hfa_mode; + + mode = TYPE_MODE (valtype); + hfa_mode = hfa_element_mode (valtype, 0); + + if (hfa_mode != VOIDmode) + { + rtx loc[8]; + int i; + int hfa_size; + int byte_size; + int offset; + + hfa_size = GET_MODE_SIZE (hfa_mode); + byte_size = ((mode == BLKmode) + ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode)); + offset = 0; + for (i = 0; offset < byte_size; i++) + { + loc[i] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i), + GEN_INT (offset)); + /* ??? Padding for XFmode type? */ + offset += hfa_size; + } + + if (i == 1) + return XEXP (loc[0], 0); + else + return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); + } + else if (FLOAT_TYPE_P (valtype)) + return gen_rtx_REG (mode, FR_ARG_FIRST); + else + return gen_rtx_REG (mode, GR_RET_FIRST); +} + +/* Print a memory address as an operand to reference that memory location. */ + +/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps + also call this from ia64_print_operand for memory addresses. */ + +void +ia64_print_operand_address (stream, address) + FILE * stream; + rtx address; +{ +} + +/* Print an operand to a assembler instruction. + B Work arounds for hardware bugs. + C Swap and print a comparison operator. + D Print an FP comparison operator. + E Print 32 - constant, for SImode shifts as extract. + F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or + a floating point register emitted normally. + I Invert a predicate register by adding 1. + O Append .acq for volatile load. + P Postincrement of a MEM. + Q Append .rel for volatile store. + S Shift amount for shladd instruction. + T Print an 8-bit sign extended number (K) as a 32-bit unsigned number + for Intel assembler. + U Print an 8-bit sign extended number (K) as a 64-bit unsigned number + for Intel assembler. + r Print register name, or constant 0 as r0. HP compatibility for + Linux kernel. */ +void +ia64_print_operand (file, x, code) + FILE * file; + rtx x; + int code; +{ + switch (code) + { + /* XXX Add other codes here. */ + + case 0: + /* Handled below. */ + break; + + case 'B': + if (TARGET_A_STEP) + fputs (" ;; nop 0 ;; nop 0 ;;", file); + return; + + case 'C': + { + enum rtx_code c = swap_condition (GET_CODE (x)); + fputs (GET_RTX_NAME (c), file); + return; + } + + case 'D': + fputs (GET_CODE (x) == NE ? "neq" : GET_RTX_NAME (GET_CODE (x)), file); + return; + + case 'E': + fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x)); + return; + + case 'F': + if (x == CONST0_RTX (GET_MODE (x))) + fputs (reg_names [FR_REG (0)], file); + else if (x == CONST1_RTX (GET_MODE (x))) + fputs (reg_names [FR_REG (1)], file); + else if (GET_CODE (x) == REG) + fputs (reg_names [REGNO (x)], file); + else + abort (); + return; + + case 'I': + fputs (reg_names [REGNO (x) + 1], file); + return; + + case 'O': + if (MEM_VOLATILE_P (x)) + fputs(".acq", file); + return; + + case 'P': + { + int value; + + if (GET_CODE (XEXP (x, 0)) != POST_INC + && GET_CODE (XEXP (x, 0)) != POST_DEC) + return; + + fputs (", ", file); + + value = GET_MODE_SIZE (GET_MODE (x)); + + /* ??? This is for ldf.fill and stf.spill which use XFmode, but which + actually need 16 bytes increments. Perhaps we can change them + to use TFmode instead. Or don't use POST_DEC/POST_INC for them. + Currently, there are no other uses of XFmode, so hacking it here + is no problem. */ + if (value == 12) + value = 16; + + if (GET_CODE (XEXP (x, 0)) == POST_DEC) + value = -value; + + fprintf (file, "%d", value); + return; + } + + case 'Q': + if (MEM_VOLATILE_P (x)) + fputs(".rel", file); + return; + + case 'S': + fprintf (file, HOST_WIDE_INT_PRINT_DEC, exact_log2 (INTVAL (x))); + return; + + case 'T': + if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) + { + fprintf (file, "0x%x", INTVAL (x) & 0xffffffff); + return; + } + break; + + case 'U': + if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) + { + char *prefix = "0x"; + if (INTVAL (x) & 0x80000000) + { + fprintf (file, "0xffffffff"); + prefix = ""; + } + fprintf (file, "%s%x", prefix, INTVAL (x) & 0xffffffff); + return; + } + break; + + case 'r': + /* If this operand is the constant zero, write it as zero. */ + if (GET_CODE (x) == REG) + fputs (reg_names[REGNO (x)], file); + else if (x == CONST0_RTX (GET_MODE (x))) + fputs ("r0", file); + else + output_operand_lossage ("invalid %%r value"); + return; + + default: + output_operand_lossage ("ia64_print_operand: unknown code"); + return; + } + + switch (GET_CODE (x)) + { + /* This happens for the spill/restore instructions. */ + case POST_INC: + x = XEXP (x, 0); + /* ... fall through ... */ + + case REG: + fputs (reg_names [REGNO (x)], file); + break; + + case MEM: + { + rtx addr = XEXP (x, 0); + if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) + addr = XEXP (addr, 0); + fprintf (file, "[%s]", reg_names [REGNO (addr)]); + break; + } + + default: + output_addr_const (file, x); + break; + } + + return; +} + + + +/* This function returns the register class required for a secondary + register when copying between one of the registers in CLASS, and X, + using MODE. A return value of NO_REGS means that no secondary register + is required. */ + +enum reg_class +ia64_secondary_reload_class (class, mode, x) + enum reg_class class; + enum machine_mode mode; + rtx x; +{ + int regno = -1; + + if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG) + regno = true_regnum (x); + + /* ??? This is required because of a bad gcse/cse/global interaction. + We end up with two pseudos with overlapping lifetimes both of which are + equiv to the same constant, and both which need to be in BR_REGS. This + results in a BR_REGS to BR_REGS copy which doesn't exist. To reproduce, + return NO_REGS here, and compile divdi3 in libgcc2.c. This seems to be + a cse bug. cse_basic_block_end changes depending on the path length, + which means the qty_first_reg check in make_regs_eqv can give different + answers at different times. */ + /* ??? At some point I'll probably need a reload_indi pattern to handle + this. */ + if (class == BR_REGS && BR_REGNO_P (regno)) + return GR_REGS; + + /* This is needed if a pseudo used as a call_operand gets spilled to a + stack slot. */ + if (class == BR_REGS && GET_CODE (x) == MEM) + return GR_REGS; + + /* This can happen when a paradoxical subreg is an operand to the muldi3 + pattern. */ + /* ??? This shouldn't be necessary after instruction scheduling is enabled, + because paradoxical subregs are not accepted by register_operand when + INSN_SCHEDULING is defined. Or alternatively, stop the paradoxical subreg + stupidity in the *_operand functions in recog.c. */ + if ((class == FR_REGS || class == FR_INT_REGS || class == FR_FP_REGS) + && GET_CODE (x) == MEM + && (GET_MODE (x) == SImode || GET_MODE (x) == HImode + || GET_MODE (x) == QImode)) + return GR_REGS; + + /* This can happen because of the ior/and/etc patterns that accept FP + registers as operands. If the third operand is a constant, then it + needs to be reloaded into a FP register. */ + if ((class == FR_REGS || class == FR_INT_REGS || class == FR_FP_REGS) + && GET_CODE (x) == CONST_INT) + return GR_REGS; + + /* Moving a integer from an FP register to memory requires a general register + as an intermediary. This is not necessary if we are moving a DImode + subreg of a DFmode value from an FP register to memory, since stfd will + do the right thing in this case. */ + if (class == FR_INT_REGS && GET_CODE (x) == MEM && GET_MODE (x) == DImode) + return GR_REGS; + + /* ??? This happens if we cse/gcse a CCmode value across a call, and the + function has a nonlocal goto. This is because global does not allocate + call crossing pseudos to hard registers when current_function_has_ + nonlocal_goto is true. This is relatively common for C++ programs that + use exceptions. To reproduce, return NO_REGS and compile libstdc++. */ + if (class == PR_REGS && GET_CODE (x) == MEM) + return GR_REGS; + + return NO_REGS; +} + + +/* Emit text to declare externally defined variables and functions, because + the Intel assembler does not support undefined externals. */ + +void +ia64_asm_output_external (file, decl, name) + FILE *file; + tree decl; + char *name; +{ + int save_referenced; + + /* GNU as does not need anything here. */ + if (TARGET_GNU_AS) + return; + + /* ??? The Intel assembler creates a reference that needs to be satisfied by + the linker when we do this, so we need to be careful not to do this for + builtin functions which have no library equivalent. Unfortunately, we + can't tell here whether or not a function will actually be called by + expand_expr, so we pull in library functions even if we may not need + them later. */ + if (! strcmp (name, "__builtin_next_arg") + || ! strcmp (name, "alloca") + || ! strcmp (name, "__builtin_constant_p") + || ! strcmp (name, "__builtin_args_info")) + return; + + /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and + restore it. */ + save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)); + if (TREE_CODE (decl) == FUNCTION_DECL) + { + fprintf (file, "\t%s\t ", TYPE_ASM_OP); + assemble_name (file, name); + putc (',', file); + fprintf (file, TYPE_OPERAND_FMT, "function"); + putc ('\n', file); + } + ASM_GLOBALIZE_LABEL (file, name); + TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced; +} + +/* Parse the -mfixed-range= option string. */ + +static void +fix_range (str) + char *str; +{ + int i, first, last; + char *dash, *comma; + + /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and + REG2 are either register names or register numbers. The effect + of this option is to mark the registers in the range from REG1 to + REG2 as ``fixed'' so they won't be used by the compiler. This is + used, e.g., to ensure that kernel mode code doesn't use f32-f127. */ + + while (1) + { + dash = strchr (str, '-'); + if (!dash) + { + warning ("value of -mfixed-range must have form REG1-REG2"); + return; + } + *dash = '\0'; + + comma = strchr (dash + 1, ','); + if (comma) + *comma = '\0'; + + first = decode_reg_name (str); + if (first < 0) + { + warning ("unknown register name: %s", str); + return; + } + + last = decode_reg_name (dash + 1); + if (last < 0) + { + warning ("unknown register name: %s", dash + 1); + return; + } + + *dash = '-'; + + if (first > last) + { + warning ("%s-%s is an empty range", str, dash + 1); + return; + } + + for (i = first; i <= last; ++i) + fixed_regs[i] = call_used_regs[i] = 1; + + if (!comma) + break; + + *comma = ','; + str = comma + 1; + } +} + +/* Called to register all of our global variables with the garbage + collector. */ + +static void +ia64_add_gc_roots () +{ + ggc_add_rtx_root (&ia64_compare_op0, 1); + ggc_add_rtx_root (&ia64_compare_op1, 1); +} + +/* Handle TARGET_OPTIONS switches. */ + +void +ia64_override_options () +{ + if (ia64_fixed_range_string) + fix_range (ia64_fixed_range_string); + + ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE; + + ia64_add_gc_roots (); +} + +/* The following collection of routines emit instruction group stop bits as + necessary to avoid dependencies. */ + +/* Need to track some additional registers as far as serialization is + concerned so we can properly handle br.call and br.ret. We could + make these registers visible to gcc, but since these registers are + never explicitly used in gcc generated code, it seems wasteful to + do so (plus it would make the call and return patterns needlessly + complex). */ +#define REG_GP (GR_REG (1)) +#define REG_RP (BR_REG (0)) +#define REG_AR_PFS (FIRST_PSEUDO_REGISTER) +#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1) +/* ??? This will eventually need to be a hard register. */ +#define REG_AR_EC (FIRST_PSEUDO_REGISTER + 2) +/* This is used for volatile asms which may require a stop bit immediately + before and after them. */ +#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 3) +#define NUM_REGS (FIRST_PSEUDO_REGISTER + 4) + +/* For each register, we keep track of how many times it has been + written in the current instruction group. If a register is written + unconditionally (no qualifying predicate), WRITE_COUNT is set to 2 + and FIRST_PRED is ignored. If a register is written if its + qualifying predicate P is true, we set WRITE_COUNT to 1 and + FIRST_PRED to P. Later on, the same register may be written again + by the complement of P (P+1 if P is even, P-1, otherwise) and when + this happens, WRITE_COUNT gets set to 2. The result of this is + that whenever an insn attempts to write a register whose + WRITE_COUNT is two, we need to issue a insn group barrier first. */ +struct reg_write_state +{ + char write_count; + char written_by_fp; /* Was register written by a floating-point insn? */ + short first_pred; /* 0 means ``no predicate'' */ +}; + +/* Cumulative info for the current instruction group. */ +struct reg_write_state rws_sum[NUM_REGS]; +/* Info for the current instruction. This gets copied to rws_sum after a + stop bit is emitted. */ +struct reg_write_state rws_insn[NUM_REGS]; + +/* Misc flags needed to compute RAW/WAW dependencies while we are traversing + RTL for one instruction. */ +struct reg_flags +{ + unsigned int is_write : 1; /* Is register being written? */ + unsigned int is_fp : 1; /* Is register used as part of an fp op? */ + unsigned int is_branch : 1; /* Is register used as part of a branch? */ +}; + +/* Update *RWS for REGNO, which is being written by the current instruction, + with predicate PRED, and associated register flags in FLAGS. */ + +static void +rws_update (rws, regno, flags, pred) + struct reg_write_state *rws; + int regno; + struct reg_flags flags; + int pred; +{ + rws[regno].write_count += pred ? 1 : 2; + rws[regno].written_by_fp |= flags.is_fp; + rws[regno].first_pred = pred; +} + +/* Handle an access to register REGNO of type FLAGS using predicate register + PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates + a dependency with an earlier instruction in the same group. */ + +static int +rws_access_reg (regno, flags, pred) + int regno; + struct reg_flags flags; + int pred; +{ + int need_barrier = 0; + int is_predicate_reg; + + if (regno >= NUM_REGS) + abort (); + + if (flags.is_write) + { + /* One insn writes same reg multiple times? */ + if (rws_insn[regno].write_count > 0) + abort (); + + /* Update info for current instruction. */ + rws_update (rws_insn, regno, flags, pred); + + /* ??? This is necessary because predicate regs require two hard + registers. However, this should be using HARD_REGNO_NREGS so that + it works for all multi-reg hard registers, instead of only for + predicate registers. */ + is_predicate_reg = REGNO_REG_CLASS (regno) == PR_REGS; + if (is_predicate_reg) + rws_update (rws_insn, regno + 1, flags, pred); + + switch (rws_sum[regno].write_count) + { + case 0: + /* The register has not been written yet. */ + rws_update (rws_sum, regno, flags, pred); + if (is_predicate_reg) + rws_update (rws_sum, regno + 1, flags, pred); + break; + + case 1: + /* The register has been written via a predicate. If this is + not a complementary predicate, then we need a barrier. */ + /* ??? This assumes that P and P+1 are always complementary + predicates for P even. */ + if ((rws_sum[regno].first_pred ^ 1) != pred) + need_barrier = 1; + rws_update (rws_sum, regno, flags, pred); + if (is_predicate_reg) + rws_update (rws_sum, regno + 1, flags, pred); + break; + + case 2: + /* The register has been unconditionally written already. We + need a barrier. */ + need_barrier = 1; + break; + + default: + abort (); + } + } + else + { + if (flags.is_branch) + { + /* Branches have several RAW exceptions that allow to avoid + barriers. */ + + if (REGNO_REG_CLASS (regno) == BR_REGS || regno == REG_AR_PFS) + /* RAW dependencies on branch regs are permissible as long + as the writer is a non-branch instruction. Since we + never generate code that uses a branch register written + by a branch instruction, handling this case is + easy. */ + /* ??? This assumes that we don't emit br.cloop, br.cexit, br.ctop, + br.wexit, br.wtop. This is true currently. */ + return 0; + + if (REGNO_REG_CLASS (regno) == PR_REGS + && ! rws_sum[regno].written_by_fp) + /* The predicates of a branch are available within the + same insn group as long as the predicate was written by + something other than a floating-point instruction. */ + return 0; + } + + switch (rws_sum[regno].write_count) + { + case 0: + /* The register has not been written yet. */ + break; + + case 1: + /* The register has been written via a predicate. If this is + not a complementary predicate, then we need a barrier. */ + /* ??? This assumes that P and P+1 are always complementary + predicates for P even. */ + if ((rws_sum[regno].first_pred ^ 1) != pred) + need_barrier = 1; + break; + + case 2: + /* The register has been unconditionally written already. We + need a barrier. */ + need_barrier = 1; + break; + + default: + abort (); + } + } + + return need_barrier; +} + +/* Handle an access to rtx X of type FLAGS using predicate register PRED. + Return 1 is this access creates a dependency with an earlier instruction + in the same group. */ + +static int +rtx_needs_barrier (x, flags, pred) + rtx x; + struct reg_flags flags; + int pred; +{ + int i, j; + int is_complemented = 0; + int need_barrier = 0; + const char *format_ptr; + struct reg_flags new_flags; + rtx src, dst; + rtx cond = 0; + + if (! x) + return 0; + + new_flags = flags; + + switch (GET_CODE (x)) + { + case SET: + src = SET_SRC (x); + switch (GET_CODE (src)) + { + case CALL: + /* We don't need to worry about the result registers that + get written by subroutine call. */ + need_barrier = rtx_needs_barrier (src, flags, pred); + return need_barrier; + + case IF_THEN_ELSE: + if (SET_DEST (x) == pc_rtx) + { + /* X is a conditional branch. */ + /* ??? This seems redundant, as the caller sets this bit for + all JUMP_INSNs. */ + new_flags.is_branch = 1; + need_barrier = rtx_needs_barrier (src, new_flags, pred); + return need_barrier; + } + else + { + /* X is a conditional move. */ + cond = XEXP (src, 0); + if (GET_CODE (cond) == EQ) + is_complemented = 1; + cond = XEXP (cond, 0); + if (GET_CODE (cond) != REG + && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS) + abort (); + + if (XEXP (src, 1) == SET_DEST (x) + || XEXP (src, 2) == SET_DEST (x)) + { + /* X is a conditional move that conditionally writes the + destination. */ + + /* We need another complement in this case. */ + if (XEXP (src, 1) == SET_DEST (x)) + is_complemented = ! is_complemented; + + pred = REGNO (cond); + if (is_complemented) + ++pred; + } + + /* ??? If this is a conditional write to the dest, then this + instruction does not actually read one source. This probably + doesn't matter, because that source is also the dest. */ + /* ??? Multiple writes to predicate registers are allowed + if they are all AND type compares, or if they are all OR + type compares. We do not generate such instructions + currently. */ + } + /* ... fall through ... */ + + default: + if (GET_RTX_CLASS (GET_CODE (src)) == '<' + && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT) + /* Set new_flags.is_fp to 1 so that we know we're dealing + with a floating point comparison when processing the + destination of the SET. */ + new_flags.is_fp = 1; + break; + } + need_barrier = rtx_needs_barrier (src, flags, pred); + /* This instruction unconditionally uses a predicate register. */ + if (cond) + need_barrier |= rws_access_reg (REGNO (cond), flags, 0); + + dst = SET_DEST (x); + if (GET_CODE (dst) == ZERO_EXTRACT) + { + need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred); + need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred); + dst = XEXP (dst, 0); + } + new_flags.is_write = 1; + need_barrier |= rtx_needs_barrier (dst, new_flags, pred); + break; + + case CALL: + new_flags.is_write = 0; + /* ??? Why is this here? It seems unnecessary. */ + need_barrier |= rws_access_reg (REG_GP, new_flags, pred); + need_barrier |= rws_access_reg (REG_AR_EC, new_flags, pred); + + /* Avoid multiple register writes, in case this is a pattern with + multiple CALL rtx. This avoids an abort in rws_access_reg. */ + /* ??? This assumes that no rtx other than CALL/RETURN sets REG_AR_CFM, + and that we don't have predicated calls/returns. */ + if (! rws_insn[REG_AR_CFM].write_count) + { + new_flags.is_write = 1; + need_barrier |= rws_access_reg (REG_RP, new_flags, pred); + need_barrier |= rws_access_reg (REG_AR_PFS, new_flags, pred); + need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred); + } + break; + + case CLOBBER: +#if 0 + case USE: + /* We must handle USE here in case it occurs within a PARALLEL. + For instance, the mov ar.pfs= instruction has a USE which requires + a barrier between it and an immediately preceeding alloc. */ +#endif + /* Clobber & use are for earlier compiler-phases only. */ + break; + + case ASM_OPERANDS: + case ASM_INPUT: + /* We always emit stop bits for traditional asms. We emit stop bits + for volatile extended asms if TARGET_VOL_ASM_STOP is true. */ + if (GET_CODE (x) != ASM_OPERANDS + || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP)) + { + /* Avoid writing the register multiple times if we have multiple + asm outputs. This avoids an abort in rws_access_reg. */ + if (! rws_insn[REG_VOLATILE].write_count) + { + new_flags.is_write = 1; + rws_access_reg (REG_VOLATILE, new_flags, pred); + } + return 1; + } + + /* For all ASM_OPERANDS, we must traverse the vector of input operands. + We can not just fall through here since then we would be confused + by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate + traditional asms unlike their normal usage. */ + + for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i) + if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred)) + need_barrier = 1; + break; + + case PARALLEL: + for (i = XVECLEN (x, 0) - 1; i >= 0; --i) + if (rtx_needs_barrier (XVECEXP (x, 0, i), flags, pred)) + need_barrier = 1; + break; + + case SUBREG: + x = SUBREG_REG (x); + /* FALLTHRU */ + case REG: + need_barrier = rws_access_reg (REGNO (x), flags, pred); + break; + + case MEM: + /* Find the regs used in memory address computation. */ + new_flags.is_write = 0; + need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); + break; + + case CONST_INT: case CONST_DOUBLE: + case SYMBOL_REF: case LABEL_REF: case CONST: + break; + + /* Operators with side-effects. */ + case POST_INC: case POST_DEC: + if (GET_CODE (XEXP (x, 0)) != REG) + abort (); + + new_flags.is_write = 0; + need_barrier = rws_access_reg (REGNO (XEXP (x, 0)), new_flags, pred); + new_flags.is_write = 1; + need_barrier |= rws_access_reg (REGNO (XEXP (x, 0)), new_flags, pred); + break; + + /* Handle common unary and binary ops for efficiency. */ + case COMPARE: case PLUS: case MINUS: case MULT: case DIV: + case MOD: case UDIV: case UMOD: case AND: case IOR: + case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT: + case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX: + case NE: case EQ: case GE: case GT: case LE: + case LT: case GEU: case GTU: case LEU: case LTU: + need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); + need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); + break; + + case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND: + case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT: + case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS: + case SQRT: case FFS: + need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); + break; + + case UNSPEC: + switch (XINT (x, 1)) + { + /* ??? For the st8.spill/ld8.fill instructions, we can ignore unat + dependencies as long as we don't have both a spill and fill in + the same instruction group. We need to check for that. */ + case 1: /* st8.spill */ + case 2: /* ld8.fill */ + case 3: /* stf.spill */ + case 4: /* ldf.spill */ + case 8: /* popcnt */ + need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); + break; + + case 5: /* mov =pr */ + /* This reads all predicate registers. */ + for (i = PR_REG (1); i < PR_REG (64); i++) + need_barrier |= rws_access_reg (i, flags, pred); + break; + + case 6: /* mov pr= */ + /* This writes all predicate registers. */ + new_flags.is_write = 1; + /* We need to skip by two, because rws_access_reg always writes + to two predicate registers at a time. */ + /* ??? Strictly speaking, we shouldn't be counting writes to pr0. */ + for (i = PR_REG (0); i < PR_REG (64); i += 2) + need_barrier |= rws_access_reg (i, new_flags, pred); + break; + + case 7: + abort (); + + /* ??? Should track unat reads and writes. */ + case 9: /* mov =ar.unat */ + case 10: /* mov ar.unat= */ + break; + case 11: /* mov ar.ccv= */ + break; + case 12: /* mf */ + break; + case 13: /* cmpxchg_acq */ + break; + case 14: /* val_compare_and_swap */ + break; + case 15: /* lock_release */ + break; + case 16: /* lock_test_and_set */ + break; + case 17: /* _and_fetch */ + break; + case 18: /* fetch_and_ */ + break; + case 19: /* fetchadd_acq */ + break; + default: + abort (); + } + break; + + case UNSPEC_VOLATILE: + switch (XINT (x, 1)) + { + case 0: /* alloc */ + /* Alloc must always be the first instruction. Currently, we + only emit it at the function start, so we don't need to worry + about emitting a stop bit before it. */ + need_barrier = rws_access_reg (REG_AR_PFS, flags, pred); + + new_flags.is_write = 1; + need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred); + return need_barrier; + + case 1: /* blockage */ + case 2: /* insn group barrier */ + return 0; + + case 3: /* flush_cache */ + return rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); + + case 4: /* mov ar.pfs= */ + new_flags.is_write = 1; + need_barrier = rws_access_reg (REG_AR_PFS, new_flags, pred); + break; + + default: + abort (); + } + break; + + case RETURN: + new_flags.is_write = 0; + need_barrier = rws_access_reg (REG_RP, flags, pred); + need_barrier |= rws_access_reg (REG_AR_PFS, flags, pred); + + new_flags.is_write = 1; + need_barrier |= rws_access_reg (REG_AR_EC, new_flags, pred); + need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred); + break; + + default: + format_ptr = GET_RTX_FORMAT (GET_CODE (x)); + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + switch (format_ptr[i]) + { + case '0': /* unused field */ + case 'i': /* integer */ + case 'n': /* note */ + case 'w': /* wide integer */ + case 's': /* pointer to string */ + case 'S': /* optional pointer to string */ + break; + + case 'e': + if (rtx_needs_barrier (XEXP (x, i), flags, pred)) + need_barrier = 1; + break; + + case 'E': + for (j = XVECLEN (x, i) - 1; j >= 0; --j) + if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred)) + need_barrier = 1; + break; + + default: + abort (); + } + } + return need_barrier; +} + +/* INSNS is an chain of instructions. Scan the chain, and insert stop bits + as necessary to eliminate dependendencies. */ + +static void +emit_insn_group_barriers (insns) + rtx insns; +{ + int need_barrier = 0; + int exception_nesting; + struct reg_flags flags; + rtx insn, prev_insn; + + memset (rws_sum, 0, sizeof (rws_sum)); + + prev_insn = 0; + for (insn = insns; insn; insn = NEXT_INSN (insn)) + { + memset (&flags, 0, sizeof (flags)); + switch (GET_CODE (insn)) + { + case NOTE: + switch (NOTE_LINE_NUMBER (insn)) + { + case NOTE_INSN_EH_REGION_BEG: + exception_nesting++; + break; + + case NOTE_INSN_EH_REGION_END: + exception_nesting--; + break; + + case NOTE_INSN_EPILOGUE_BEG: + break; + + default: + break; + } + break; + + case JUMP_INSN: + case CALL_INSN: + flags.is_branch = 1; + case INSN: + if (GET_CODE (PATTERN (insn)) == USE) + /* Don't care about USE "insns"---those are used to + indicate to the optimizer that it shouldn't get rid of + certain operations. */ + break; + else + { + memset (rws_insn, 0, sizeof (rws_insn)); + need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0); + + /* Check to see if the previous instruction was a volatile + asm. */ + if (! need_barrier) + need_barrier = rws_access_reg (REG_VOLATILE, flags, 0); + + if (need_barrier) + { + /* PREV_INSN null can happen if the very first insn is a + volatile asm. */ + if (prev_insn) + emit_insn_after (gen_insn_group_barrier (), prev_insn); + memcpy (rws_sum, rws_insn, sizeof (rws_sum)); + } + need_barrier = 0; + prev_insn = insn; + } + break; + + case BARRIER: + /* A barrier doesn't imply an instruction group boundary. */ + break; + + case CODE_LABEL: + /* Leave prev_insn alone so the barrier gets generated in front + of the label, if one is needed. */ + break; + + default: + abort (); + } + } +} + +/* Perform machine dependent operations on the rtl chain INSNS. */ + +void +ia64_reorg (insns) + rtx insns; +{ + emit_insn_group_barriers (insns); +} + +/* Return true if REGNO is used by the epilogue. */ + +int +ia64_epilogue_uses (regno) + int regno; +{ + /* For functions defined with the syscall_linkage attribute, all input + registers are marked as live at all function exits. This prevents the + register allocator from using the input registers, which in turn makes it + possible to restart a system call after an interrupt without having to + save/restore the input registers. */ + + if (IN_REGNO_P (regno) + && (regno < IN_REG (current_function_args_info.words)) + && lookup_attribute ("syscall_linkage", + TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) + return 1; + + return 0; +} + +/* Return true if IDENTIFIER is a valid attribute for TYPE. */ + +int +ia64_valid_type_attribute (type, attributes, identifier, args) + tree type; + tree attributes ATTRIBUTE_UNUSED; + tree identifier; + tree args; +{ + /* We only support an attribute for function calls. */ + + if (TREE_CODE (type) != FUNCTION_TYPE + && TREE_CODE (type) != METHOD_TYPE) + return 0; + + /* The "syscall_linkage" attribute says the callee is a system call entry + point. This affects ia64_epilogue_uses. */ + + if (is_attribute_p ("syscall_linkage", identifier)) + return args == NULL_TREE; + + return 0; +} + +/* For ia64, SYMBOL_REF_FLAG set means that it is a function. + + We add @ to the name if this goes in small data/bss. We can only put + a variable in small data/bss if it is defined in this module or a module + that we are statically linked with. We can't check the second condition, + but TREE_STATIC gives us the first one. */ + +/* ??? If we had IPA, we could check the second condition. We could support + programmer added section attributes if the variable is not defined in this + module. */ + +/* ??? See the v850 port for a cleaner way to do this. */ + +/* ??? We could also support own long data here. Generating movl/add/ld8 + instead of addl,ld8/ld8. This makes the code bigger, but should make the + code faster because there is one less load. This also includes incomplete + types which can't go in sdata/sbss. */ + +/* ??? See select_section. We must put short own readonly variables in + sdata/sbss instead of the more natural rodata, because we can't perform + the DECL_READONLY_SECTION test here. */ + +extern struct obstack * saveable_obstack; + +void +ia64_encode_section_info (decl) + tree decl; +{ + if (TREE_CODE (decl) == FUNCTION_DECL) + SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1; + /* We assume that -fpic is used only to create a shared library (dso). + With -fpic, no global data can ever be sdata. + Without -fpic, global common uninitialized data can never be sdata, since + it can unify with a real definition in a dso. */ + /* ??? Actually, we can put globals in sdata, as long as we don't use gprel + to access them. The linker may then be able to do linker relaxation to + optimize references to them. Currently sdata implies use of gprel. */ + else if (! TARGET_NO_SDATA + && TREE_CODE (decl) == VAR_DECL + && TREE_STATIC (decl) + && ! (TREE_PUBLIC (decl) + && (flag_pic + || (DECL_COMMON (decl) + && (DECL_INITIAL (decl) == 0 + || DECL_INITIAL (decl) == error_mark_node)))) + /* Either the variable must be declared without a section attribute, + or the section must be sdata or sbss. */ + && (DECL_SECTION_NAME (decl) == 0 + || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), + ".sdata") + || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), + ".sbss"))) + { + int size = int_size_in_bytes (TREE_TYPE (decl)); + char *str = XSTR (XEXP (DECL_RTL (decl), 0), 0); + int reloc; + + /* ??? We should redeclare CTOR_LIST, DTOR_END so that we don't have to + special case them here. Currently we put them in ctor/dtors sections + behind the compiler's back. We should use section attributes + instead. */ + if (! strcmp (str, "__CTOR_LIST__") + || ! strcmp (str, "__DTOR_END__")) + ; + + /* If this is an incomplete type with size 0, then we can't put it in + sdata because it might be too big when completed. */ + else if (size > 0 && size <= ia64_section_threshold) + { + int len = strlen (str); + char *newstr = obstack_alloc (saveable_obstack, len + 2); + + strcpy (newstr + 1, str); + *newstr = SDATA_NAME_FLAG_CHAR; + XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr; + } + } +} + +#define def_builtin(name, type, code) \ + builtin_function ((name), (type), (code), BUILT_IN_MD, NULL_PTR) + +struct builtin_description +{ + enum insn_code icode; + const char *name; + enum ia64_builtins code; + enum rtx_code comparison; + unsigned int flag; +}; + +/* All 32 bit intrinsics that take 2 arguments. */ +static struct builtin_description bdesc_2argsi[] = +{ + { CODE_FOR_fetch_and_add_si, "__sync_fetch_and_add_si", IA64_BUILTIN_FETCH_AND_ADD_SI, 0, 0 }, + { CODE_FOR_fetch_and_sub_si, "__sync_fetch_and_sub_si", IA64_BUILTIN_FETCH_AND_SUB_SI, 0, 0 }, + { CODE_FOR_fetch_and_or_si, "__sync_fetch_and_or_si", IA64_BUILTIN_FETCH_AND_OR_SI, 0, 0 }, + { CODE_FOR_fetch_and_and_si, "__sync_fetch_and_and_si", IA64_BUILTIN_FETCH_AND_AND_SI, 0, 0 }, + { CODE_FOR_fetch_and_xor_si, "__sync_fetch_and_xor_si", IA64_BUILTIN_FETCH_AND_XOR_SI, 0, 0 }, + { CODE_FOR_fetch_and_nand_si, "__sync_fetch_and_nand_si", IA64_BUILTIN_FETCH_AND_NAND_SI, 0, 0 }, + { CODE_FOR_add_and_fetch_si, "__sync_add_and_fetch_si", IA64_BUILTIN_ADD_AND_FETCH_SI, 0, 0 }, + { CODE_FOR_sub_and_fetch_si, "__sync_sub_and_fetch_si", IA64_BUILTIN_SUB_AND_FETCH_SI, 0, 0 }, + { CODE_FOR_or_and_fetch_si, "__sync_or_and_fetch_si", IA64_BUILTIN_OR_AND_FETCH_SI, 0, 0 }, + { CODE_FOR_and_and_fetch_si, "__sync_and_and_fetch_si", IA64_BUILTIN_AND_AND_FETCH_SI, 0, 0 }, + { CODE_FOR_xor_and_fetch_si, "__sync_xor_and_fetch_si", IA64_BUILTIN_XOR_AND_FETCH_SI, 0, 0 }, + { CODE_FOR_nand_and_fetch_si, "__sync_nand_and_fetch_si", IA64_BUILTIN_NAND_AND_FETCH_SI, 0, 0 } +}; + +/* All 64 bit intrinsics that take 2 arguments. */ +static struct builtin_description bdesc_2argdi[] = +{ + { CODE_FOR_fetch_and_add_di, "__sync_fetch_and_add_di", IA64_BUILTIN_FETCH_AND_ADD_DI, 0, 0 }, + { CODE_FOR_fetch_and_sub_di, "__sync_fetch_and_sub_di", IA64_BUILTIN_FETCH_AND_SUB_DI, 0, 0 }, + { CODE_FOR_fetch_and_or_di, "__sync_fetch_and_or_di", IA64_BUILTIN_FETCH_AND_OR_DI, 0, 0 }, + { CODE_FOR_fetch_and_and_di, "__sync_fetch_and_and_di", IA64_BUILTIN_FETCH_AND_AND_DI, 0, 0 }, + { CODE_FOR_fetch_and_xor_di, "__sync_fetch_and_xor_di", IA64_BUILTIN_FETCH_AND_XOR_DI, 0, 0 }, + { CODE_FOR_fetch_and_nand_di, "__sync_fetch_and_nand_di", IA64_BUILTIN_FETCH_AND_NAND_DI, 0, 0 }, + { CODE_FOR_add_and_fetch_di, "__sync_add_and_fetch_di", IA64_BUILTIN_ADD_AND_FETCH_DI, 0, 0 }, + { CODE_FOR_sub_and_fetch_di, "__sync_sub_and_fetch_di", IA64_BUILTIN_SUB_AND_FETCH_DI, 0, 0 }, + { CODE_FOR_or_and_fetch_di, "__sync_or_and_fetch_di", IA64_BUILTIN_OR_AND_FETCH_DI, 0, 0 }, + { CODE_FOR_and_and_fetch_di, "__sync_and_and_fetch_di", IA64_BUILTIN_AND_AND_FETCH_DI, 0, 0 }, + { CODE_FOR_xor_and_fetch_di, "__sync_xor_and_fetch_di", IA64_BUILTIN_XOR_AND_FETCH_DI, 0, 0 }, + { CODE_FOR_nand_and_fetch_di, "__sync_nand_and_fetch_di", IA64_BUILTIN_NAND_AND_FETCH_DI, 0, 0 } +}; + +void +ia64_init_builtins () +{ + int i; + struct builtin_description *d; + + tree psi_type_node = build_pointer_type (integer_type_node); + tree pdi_type_node = build_pointer_type (long_integer_type_node); + tree endlink = tree_cons (NULL_TREE, void_type_node, NULL_TREE); + + + /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */ + tree si_ftype_psi_si_si + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, psi_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink)))); + + /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */ + tree di_ftype_pdi_di_di + = build_function_type (long_integer_type_node, + tree_cons (NULL_TREE, pdi_type_node, + tree_cons (NULL_TREE, long_integer_type_node, + tree_cons (NULL_TREE, long_integer_type_node, + endlink)))); + /* __sync_synchronize */ + tree void_ftype_void + = build_function_type (void_type_node, endlink); + + /* __sync_lock_test_and_set_si */ + tree si_ftype_psi_si + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, psi_type_node, + tree_cons (NULL_TREE, integer_type_node, endlink))); + + /* __sync_lock_test_and_set_di */ + tree di_ftype_pdi_di + = build_function_type (long_integer_type_node, + tree_cons (NULL_TREE, pdi_type_node, + tree_cons (NULL_TREE, long_integer_type_node, endlink))); + + /* __sync_lock_release_si */ + tree void_ftype_psi + = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node, endlink)); + + /* __sync_lock_release_di */ + tree void_ftype_pdi + = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node, endlink)); + + def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si, IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI); + + def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di, IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI); + + def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si, IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI); + + def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di, IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI); + + def_builtin ("__sync_synchronize", void_ftype_void, IA64_BUILTIN_SYNCHRONIZE); + + def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si, IA64_BUILTIN_LOCK_TEST_AND_SET_SI); + + def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di, IA64_BUILTIN_LOCK_TEST_AND_SET_DI); + + def_builtin ("__sync_lock_release_si", void_ftype_psi, IA64_BUILTIN_LOCK_RELEASE_SI); + + def_builtin ("__sync_lock_release_di", void_ftype_pdi, IA64_BUILTIN_LOCK_RELEASE_DI); + + /* Add all builtins that are operations on two args. */ + for (i=0, d = bdesc_2argsi; i < sizeof(bdesc_2argsi) / sizeof *d; i++, d++) + def_builtin (d->name, si_ftype_psi_si, d->code); + for (i=0, d = bdesc_2argdi; i < sizeof(bdesc_2argdi) / sizeof *d; i++, d++) + def_builtin (d->name, di_ftype_pdi_di, d->code); +} + +/* Expand fetch_and_op intrinsics. The basic code sequence is: + + mf + ldsz return = [ptr]; + tmp = return; + do { + oldval = tmp; + ar.ccv = tmp; + tmp = value; + cmpxchgsz.acq tmp = [ptr], tmp + cmpxchgsz.acq tmp = [ptr], tmp + } while (tmp != oldval) +*/ +void +ia64_expand_fetch_and_op (code, mode, operands) + enum fetchop_code code; + enum machine_mode mode; + rtx operands[]; +{ + rtx oldval, newlabel; + rtx tmp_reg = gen_rtx_REG (mode, GR_REG(0)); + rtx mfreg = gen_rtx_MEM (BLKmode, tmp_reg); + RTX_UNCHANGING_P (mfreg) = 1; + emit_insn (gen_mf (mfreg)); + tmp_reg = gen_reg_rtx (mode); + oldval = gen_reg_rtx (mode); + + if (mode == SImode) + { + emit_insn (gen_movsi (operands[0], operands[1])); + emit_insn (gen_movsi (tmp_reg, operands[0])); + } + else + { + emit_insn (gen_movdi (operands[0], operands[1])); + emit_insn (gen_movdi (tmp_reg, operands[0])); + } + + newlabel = gen_label_rtx (); + emit_label (newlabel); + if (mode == SImode) + { + emit_insn (gen_movsi (oldval, tmp_reg)); + emit_insn (gen_ccv_restore_si (tmp_reg)); + } + else + { + emit_insn (gen_movdi (oldval, tmp_reg)); + emit_insn (gen_ccv_restore_di (tmp_reg)); + } + + /* Perform the specific operation. */ + switch (code) + { + case IA64_ADD_OP: + { + rtx reg; + if (GET_CODE (operands[2]) == CONST_INT) + reg = gen_reg_rtx (mode); + else + reg = operands[2]; + if (mode == SImode) + { + if (reg != operands[2]) + emit_insn (gen_movsi (reg, operands[2])); + emit_insn (gen_addsi3 (tmp_reg, tmp_reg, reg)); + } + else + { + if (reg != operands[2]) + emit_insn (gen_movdi (reg, operands[2])); + emit_insn (gen_adddi3 (tmp_reg, tmp_reg, reg)); + } + break; + } + + case IA64_SUB_OP: + if (mode == SImode) + emit_insn (gen_subsi3 (tmp_reg, tmp_reg, operands[2])); + else + emit_insn (gen_subdi3 (tmp_reg, tmp_reg, operands[2])); + break; + + case IA64_OR_OP: + emit_insn (gen_iordi3 (tmp_reg, tmp_reg, operands[2])); + break; + + case IA64_AND_OP: + emit_insn (gen_anddi3 (tmp_reg, tmp_reg, operands[2])); + break; + + case IA64_XOR_OP: + emit_insn (gen_xordi3 (tmp_reg, tmp_reg, operands[2])); + break; + + case IA64_NAND_OP: + emit_insn (gen_anddi3 (tmp_reg, tmp_reg, operands[2])); + if (mode == SImode) + emit_insn (gen_one_cmplsi2 (tmp_reg, operands[0])); + else + emit_insn (gen_one_cmpldi2 (tmp_reg, operands[0])); + break; + + default: + break; + } + + if (mode == SImode) + emit_insn (gen_cmpxchg_acq_si (tmp_reg, operands[1], tmp_reg)); + else + emit_insn (gen_cmpxchg_acq_di (tmp_reg, operands[1], tmp_reg)); + + emit_cmp_and_jump_insns (tmp_reg, oldval, NE, 0, mode, 1, 0, newlabel); +} + +/* Expand op_and_fetch intrinsics. The basic code sequence is: + + mf + ldsz return = [ptr]; + do { + oldval = tmp; + ar.ccv = tmp; + return = tmp + value; + cmpxchgsz.acq tmp = [ptr], return + } while (tmp != oldval) +*/ +void +ia64_expand_op_and_fetch (code, mode, operands) + enum fetchop_code code; + enum machine_mode mode; + rtx operands[]; +{ + rtx oldval, newlabel; + rtx tmp_reg, tmp2_reg = gen_rtx_REG (mode, GR_REG(0)); + rtx mfreg = gen_rtx_MEM (BLKmode, tmp2_reg); + RTX_UNCHANGING_P (mfreg) = 1; + + emit_insn (gen_mf (mfreg)); + tmp_reg = gen_reg_rtx (mode); + if (mode == SImode) + emit_insn (gen_movsi (tmp_reg, operands[1])); + else + emit_insn (gen_movdi (tmp_reg, operands[1])); + + newlabel = gen_label_rtx (); + emit_label (newlabel); + oldval = gen_reg_rtx (mode); + if (mode == SImode) + { + emit_insn (gen_movsi (oldval, tmp_reg)); + emit_insn (gen_ccv_restore_si (tmp_reg)); + } + else + { + emit_insn (gen_movdi (oldval, tmp_reg)); + emit_insn (gen_ccv_restore_di (tmp_reg)); + } + + /* Perform the specific operation. */ + switch (code) + { + case IA64_ADD_OP: + if (mode == SImode) + emit_insn (gen_addsi3 (operands[0], tmp_reg, operands[2])); + else + emit_insn (gen_adddi3 (operands[0], tmp_reg, operands[2])); + break; + + case IA64_SUB_OP: + if (mode == SImode) + emit_insn (gen_subsi3 (operands[0], tmp_reg, operands[2])); + else + emit_insn (gen_subdi3 (operands[0], tmp_reg, operands[2])); + break; + + case IA64_OR_OP: + emit_insn (gen_iordi3 (operands[0], tmp_reg, operands[2])); + break; + + case IA64_AND_OP: + emit_insn (gen_anddi3 (operands[0], tmp_reg, operands[2])); + break; + + case IA64_XOR_OP: + emit_insn (gen_xordi3 (operands[0], tmp_reg, operands[2])); + break; + + case IA64_NAND_OP: + emit_insn (gen_anddi3 (operands[0], tmp_reg, operands[2])); + if (mode == SImode) + emit_insn (gen_one_cmplsi2 (operands[0], operands[0])); + else + emit_insn (gen_one_cmpldi2 (operands[0], operands[0])); + break; + + default: + break; + } + + if (mode == SImode) + emit_insn (gen_cmpxchg_acq_si (tmp_reg, operands[1], operands[0])); + else + emit_insn (gen_cmpxchg_acq_di (tmp_reg, operands[1], operands[0])); + + emit_cmp_and_jump_insns (tmp_reg, oldval, NE, 0, mode, 1, 0, newlabel); +} + +/* Expand val_ and bool_compare_and_swap. For val_ we want: + + ar.ccv = oldval + mf + cmpxchgsz.acq ret = [ptr], newval, ar.ccv + return ret + + For bool_ it's the same except return ret == oldval. +*/ +static rtx +ia64_expand_compare_and_swap (icode, arglist, target, boolcode) + enum insn_code icode; + tree arglist; + rtx target; + int boolcode; +{ + tree arg0, arg1, arg2; + rtx newlabel, newlabel2, op0, op1, op2, pat; + enum machine_mode tmode, mode0, mode1, mode2; + + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + mode2 = insn_data[icode].operand[3].mode; + + op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); + RTX_UNCHANGING_P (op0) = 1; + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) + op2 = copy_to_mode_reg (mode2, op2); + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + if (boolcode) + { + if (tmode == SImode) + { + emit_insn (gen_cmpsi (target, op1)); + emit_insn (gen_seq (gen_lowpart (DImode, target))); + } + else + { + emit_insn (gen_cmpdi (target, op1)); + emit_insn (gen_seq (target)); + } + } + return target; +} + +/* Expand all intrinsics that take 2 arguments. */ +static rtx +ia64_expand_binop_builtin (icode, arglist, target) + enum insn_code icode; + tree arglist; + rtx target; +{ + rtx pat; + tree arg0 = TREE_VALUE (arglist); + tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +rtx +ia64_expand_builtin (exp, target, subtarget, mode, ignore) + tree exp; + rtx target; + rtx subtarget; + enum machine_mode mode; + int ignore; +{ + rtx op0, op1, op2, op3, pat; + rtx tmp_reg; + rtx newlabel, newlabel2; + tree arg0, arg1, arg2, arg3; + tree arglist = TREE_OPERAND (exp, 1); + tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); + int fcode = DECL_FUNCTION_CODE (fndecl); + enum machine_mode tmode, mode0, mode1, mode2, mode3; + enum insn_code icode; + int boolcode = 0; + int i; + struct builtin_description *d; + + switch (fcode) + { + case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI: + return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_si, arglist, target, 1); + case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI: + return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_si, arglist, target, 0); + case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI: + return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_di, arglist, target, 1); + case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI: + return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_di, arglist, target, 0); + case IA64_BUILTIN_SYNCHRONIZE: + /* Pass a volatile memory operand. */ + tmp_reg = gen_rtx_REG (DImode, GR_REG(0)); + target = gen_rtx_MEM (BLKmode, tmp_reg); + emit_insn (gen_mf (target)); + return 0; + + case IA64_BUILTIN_LOCK_TEST_AND_SET_SI: + icode = CODE_FOR_lock_test_and_set_si; + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); + RTX_UNCHANGING_P (op0) = 1; + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case IA64_BUILTIN_LOCK_TEST_AND_SET_DI: + icode = CODE_FOR_lock_test_and_set_di; + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); + RTX_UNCHANGING_P (op0) = 1; + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case IA64_BUILTIN_LOCK_RELEASE_SI: + arg0 = TREE_VALUE (arglist); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op0 = gen_rtx_MEM (SImode, copy_to_mode_reg (Pmode, op0)); + MEM_VOLATILE_P (op0) = 1; + emit_insn (gen_movsi (op0, GEN_INT(0))); + return 0; + + case IA64_BUILTIN_LOCK_RELEASE_DI: + arg0 = TREE_VALUE (arglist); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op0 = gen_rtx_MEM (DImode, copy_to_mode_reg (Pmode, op0)); + MEM_VOLATILE_P (op0) = 1; + emit_insn (gen_movdi (op0, GEN_INT(0))); + return 0; + + default: + break; + } + + /* Expand all 32 bit intrinsics that take 2 arguments. */ + for (i=0, d = bdesc_2argsi; i < sizeof (bdesc_2argsi) / sizeof *d; i++, d++) + if (d->code == fcode) + return ia64_expand_binop_builtin (d->icode, arglist, target); + + /* Expand all 64 bit intrinsics that take 2 arguments. */ + for (i=0, d = bdesc_2argdi; i < sizeof (bdesc_2argdi) / sizeof *d; i++, d++) + if (d->code == fcode) + return ia64_expand_binop_builtin (d->icode, arglist, target); + + fail: + return 0; +} diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h new file mode 100644 index 0000000..a245ebd --- /dev/null +++ b/gcc/config/ia64/ia64.h @@ -0,0 +1,2795 @@ +/* Definitions of target machine GNU compiler. IA64 version. + Copyright (C) 1999 Cygnus Solutions. + Contributed by James E. Wilson and + David Mosberger . + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* ??? Use of the upper 32 FP registers for integer values will make context + switching slower, because the kernel only saves any registers past f32 if + it has to. */ + +/* ??? Look at ABI group documents for list of preprocessor macros and + other features required for ABI compliance. */ + +/* ??? Functions containing a non-local goto target save many registers. Why? + See for instance execute/920428-2.c. */ + +/* ??? Get CAN_DEBUG_WITHOUT_FP working so that -fomit-frame-pointer is not + needed. */ + +/* ??? Add support for short data/bss sections. */ + + +/* Run-time target specifications */ + +/* Define this to be a string constant containing `-D' options to define the + predefined macros that identify this machine and system. These macros will + be predefined unless the `-ansi' option is specified. */ +/* ??? This is undefed in svr4.h. */ +#define CPP_PREDEFINES "-Dia64 -Amachine(ia64)" + +/* This declaration should be present. */ +extern int target_flags; + +/* This series of macros is to allow compiler command arguments to enable or + disable the use of optional features of the target machine. */ + +#define MASK_BIG_ENDIAN 0x00000001 /* Generate big endian code. */ + +#define MASK_GNU_AS 0x00000002 /* Generate code for GNU as. */ + +#define MASK_GNU_LD 0x00000004 /* Generate code for GNU ld. */ + +#define MASK_NO_PIC 0x00000008 /* Generate code without GP reg. */ + +#define MASK_VOL_ASM_STOP 0x00000010 /* Emit stop bits for vol ext asm. */ + +#define MASK_A_STEP 0x00000020 /* Emit code for Itanium A step. */ + +#define MASK_REG_NAMES 0x00000040 /* Use in/loc/out register names. */ + +#define MASK_NO_SDATA 0x00000080 /* Disable sdata/scommon/sbss. */ + +#define MASK_DWARF2_ASM 0x40000000 /* test dwarf2 line info via gas. */ + +#define TARGET_BIG_ENDIAN (target_flags & MASK_BIG_ENDIAN) + +#define TARGET_GNU_AS (target_flags & MASK_GNU_AS) + +#define TARGET_GNU_LD (target_flags & MASK_GNU_LD) + +#define TARGET_NO_PIC (target_flags & MASK_NO_PIC) + +#define TARGET_VOL_ASM_STOP (target_flags & MASK_VOL_ASM_STOP) + +#define TARGET_A_STEP (target_flags & MASK_A_STEP) + +#define TARGET_REG_NAMES (target_flags & MASK_REG_NAMES) + +#define TARGET_NO_SDATA (target_flags & MASK_NO_SDATA) + +#define TARGET_DWARF2_ASM (target_flags & MASK_DWARF2_ASM) + +/* This macro defines names of command options to set and clear bits in + `target_flags'. Its definition is an initializer with a subgrouping for + each command option. */ + +#define TARGET_SWITCHES \ +{ \ + { "big-endian", MASK_BIG_ENDIAN, \ + "Generate big endian code" }, \ + { "little-endian", -MASK_BIG_ENDIAN, \ + "Generate little endian code" }, \ + { "gnu-as", MASK_GNU_AS, \ + "Generate code for GNU as" }, \ + { "no-gnu-as", -MASK_GNU_AS, \ + "Generate code for Intel as" }, \ + { "gnu-ld", MASK_GNU_LD, \ + "Generate code for GNU ld" }, \ + { "no-gnu-ld", -MASK_GNU_LD, \ + "Generate code for Intel ld" }, \ + { "no-pic", MASK_NO_PIC, \ + "Generate code without GP reg" }, \ + { "volatile-asm-stop", MASK_VOL_ASM_STOP, \ + "Emit stop bits before and after volatile extended asms" }, \ + { "no-volatile-asm-stop", -MASK_VOL_ASM_STOP, \ + "Don't emit stop bits before and after volatile extended asms" }, \ + { "a-step", MASK_A_STEP, \ + "Emit code for Itanium (TM) processor A step"}, \ + { "register-names", MASK_REG_NAMES, \ + "Use in/loc/out register names"}, \ + { "no-sdata", MASK_NO_SDATA, \ + "Disable use of sdata/scommon/sbss"}, \ + { "sdata", -MASK_NO_SDATA, \ + "Enable use of sdata/scommon/sbss"}, \ + { "dwarf2-asm", MASK_DWARF2_ASM, \ + "Enable Dwarf 2 line debug info via GNU as"}, \ + { "no-dwarf2-asm", -MASK_DWARF2_ASM, \ + "Disable Dwarf 2 line debug info via GNU as"}, \ + { "", TARGET_DEFAULT | TARGET_CPU_DEFAULT, \ + NULL } \ +} + +/* Default target_flags if no switches are specified */ + +#ifndef TARGET_DEFAULT +#define TARGET_DEFAULT MASK_DWARF2_ASM +#endif + +#ifndef TARGET_CPU_DEFAULT +#define TARGET_CPU_DEFAULT 0 +#endif + +/* This macro is similar to `TARGET_SWITCHES' but defines names of command + options that have values. Its definition is an initializer with a + subgrouping for each command option. */ + +extern const char *ia64_fixed_range_string; +#define TARGET_OPTIONS \ +{ \ + { "fixed-range=", &ia64_fixed_range_string, \ + "Specify range of registers to make fixed."}, \ +} + +/* This macro is a C statement to print on `stderr' a string describing the + particular machine description choice. */ + +#define TARGET_VERSION fprintf (stderr, " (IA-64)"); + +/* Sometimes certain combinations of command options do not make sense on a + particular target machine. You can define a macro `OVERRIDE_OPTIONS' to + take account of this. This macro, if defined, is executed once just after + all the command options have been parsed. */ + +#define OVERRIDE_OPTIONS ia64_override_options () + +/* Some machines may desire to change what optimizations are performed for + various optimization levels. This macro, if defined, is executed once just + after the optimization level is determined and before the remainder of the + command options have been parsed. Values set in this macro are used as the + default values for the other command line options. */ + +/* #define OPTIMIZATION_OPTIONS(LEVEL,SIZE) */ + +/* Define this macro if debugging can be performed even without a frame + pointer. If this macro is defined, GNU CC will turn on the + `-fomit-frame-pointer' option whenever `-O' is specified. */ +/* ??? Need to define this. */ +/* #define CAN_DEBUG_WITHOUT_FP */ + + +/* Driver configuration */ + +/* A C string constant that tells the GNU CC driver program options to pass to + CPP. It can also specify how to translate options you give to GNU CC into + options for GNU CC to pass to the CPP. */ + +/* ??? __LONG_MAX__ depends on LP64/ILP32 switch. */ +/* ??? An alternative is to modify glimits.h to check for __LP64__ instead + of checked for CPU specific defines. We could also get rid of all LONG_MAX + defines in other tm.h files. */ +#define CPP_SPEC \ + "%{mcpu=itanium:-D__itanium__} %{mbig-endian:-D__BIG_ENDIAN__} \ + -D__LONG_MAX__=9223372036854775807L" + +/* If this macro is defined, the preprocessor will not define the builtin macro + `__SIZE_TYPE__'. The macro `__SIZE_TYPE__' must then be defined by + `CPP_SPEC' instead. + + This should be defined if `SIZE_TYPE' depends on target dependent flags + which are not accessible to the preprocessor. Otherwise, it should not be + defined. */ +/* ??? Needs to be defined for P64 code. */ +/* #define NO_BUILTIN_SIZE_TYPE */ + +/* If this macro is defined, the preprocessor will not define the builtin macro + `__PTRDIFF_TYPE__'. The macro `__PTRDIFF_TYPE__' must then be defined by + `CPP_SPEC' instead. + + This should be defined if `PTRDIFF_TYPE' depends on target dependent flags + which are not accessible to the preprocessor. Otherwise, it should not be + defined. */ +/* ??? Needs to be defined for P64 code. */ +/* #define NO_BUILTIN_PTRDIFF_TYPE */ + +/* A C string constant that tells the GNU CC driver program options to pass to + `cc1'. It can also specify how to translate options you give to GNU CC into + options for GNU CC to pass to the `cc1'. */ + +/* #define CC1_SPEC "" */ + +/* A C string constant that tells the GNU CC driver program options to pass to + `cc1plus'. It can also specify how to translate options you give to GNU CC + into options for GNU CC to pass to the `cc1plus'. */ + +/* #define CC1PLUS_SPEC "" */ + +/* A C string constant that tells the GNU CC driver program options to pass to + the assembler. It can also specify how to translate options you give to GNU + CC into options for GNU CC to pass to the assembler. */ + +#if ((TARGET_CPU_DEFAULT | TARGET_DEFAULT) & MASK_GNU_AS) != 0 +/* GNU AS. */ +#define ASM_SPEC "%{mno-gnu-as:-N so}" +#else +/* Intel ias. */ +#define ASM_SPEC "%{!mgnu-as:-N so}" +#endif + +/* A C string constant that tells the GNU CC driver program options to pass to + the linker. It can also specify how to translate options you give to GNU CC + into options for GNU CC to pass to the linker. */ + +/* The Intel linker does not support dynamic linking, so we need -dn. + The Intel linker gives annoying messages unless -N so is used. */ +#if ((TARGET_CPU_DEFAULT | TARGET_DEFAULT) & MASK_GNU_LD) != 0 +/* GNU LD. */ +#define LINK_SPEC "%{mno-gnu-ld:-dn -N so}" +#else +/* Intel ild. */ +#define LINK_SPEC "%{!mgnu-ld:-dn -N so}" +#endif + + +/* Storage Layout */ + +/* Define this macro to have the value 1 if the most significant bit in a byte + has the lowest number; otherwise define it to have the value zero. */ + +#define BITS_BIG_ENDIAN 0 + +/* Define this macro to have the value 1 if the most significant byte in a word + has the lowest number. This macro need not be a constant. */ + +#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0) + +/* Define this macro to have the value 1 if, in a multiword object, the most + significant word has the lowest number. */ + +#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0) + +/* Define this macro if WORDS_BIG_ENDIAN is not constant. This must be a + constant value with the same meaning as WORDS_BIG_ENDIAN, which will be used + only when compiling libgcc2.c. Typically the value will be set based on + preprocessor defines. */ +#if defined(__BIG_ENDIAN__) +#define LIBGCC2_WORDS_BIG_ENDIAN 1 +#else +#define LIBGCC2_WORDS_BIG_ENDIAN 0 +#endif + +/* Define this macro to be the number of bits in an addressable storage unit + (byte); normally 8. */ +#define BITS_PER_UNIT 8 + +/* Number of bits in a word; normally 32. */ +#define BITS_PER_WORD 64 + +/* Number of storage units in a word; normally 4. */ +#define UNITS_PER_WORD 8 + +/* Width of a pointer, in bits. You must specify a value no wider than the + width of `Pmode'. If it is not equal to the width of `Pmode', you must + define `POINTERS_EXTEND_UNSIGNED'. */ +/* ??? Implement optional 32 bit pointer size later? */ +#define POINTER_SIZE 64 + +/* A C expression whose value is nonzero if pointers that need to be extended + from being `POINTER_SIZE' bits wide to `Pmode' are sign-extended and zero if + they are zero-extended. + + You need not define this macro if the `POINTER_SIZE' is equal to the width + of `Pmode'. */ +/* ??? May need this for 32 bit pointers. */ +/* #define POINTERS_EXTEND_UNSIGNED */ + +/* A macro to update MODE and UNSIGNEDP when an object whose type is TYPE and + which has the specified mode and signedness is to be stored in a register. + This macro is only called when TYPE is a scalar type. */ + +/* ??? Maybe sign-extend 32 bit values like the alpha? Or maybe zero-extend + because we only have zero-extending loads? */ +#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE) \ +do \ + { \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \ + (MODE) = DImode; \ + } \ +while (0) + +/* Define this macro if the promotion described by `PROMOTE_MODE' should also + be done for outgoing function arguments. */ +/* ??? ABI doesn't allow us to define this. */ +/* #define PROMOTE_FUNCTION_ARGS */ + +/* Define this macro if the promotion described by `PROMOTE_MODE' should also + be done for the return value of functions. + + If this macro is defined, `FUNCTION_VALUE' must perform the same promotions + done by `PROMOTE_MODE'. */ +/* ??? ABI doesn't allow us to define this. */ +/* #define PROMOTE_FUNCTION_RETURN */ + +/* Normal alignment required for function parameters on the stack, in bits. + All stack parameters receive at least this much alignment regardless of data + type. On most machines, this is the same as the size of an integer. */ +#define PARM_BOUNDARY 64 + +/* Define this macro if you wish to preserve a certain alignment for the stack + pointer. The definition is a C expression for the desired alignment + (measured in bits). */ + +#define STACK_BOUNDARY 128 + +/* Align frames on double word boundaries */ +#ifndef IA64_STACK_ALIGN +#define IA64_STACK_ALIGN(LOC) (((LOC) + 15) & ~15) +#endif + +/* Alignment required for a function entry point, in bits. */ +#define FUNCTION_BOUNDARY 128 + +/* Biggest alignment that any data type can require on this machine, + in bits. */ +/* Optional x86 80-bit float, quad-precision 128-bit float, and quad-word + 128 bit integers all require 128 bit alignment. */ +#define BIGGEST_ALIGNMENT 128 + +/* If defined, a C expression to compute the alignment for a static variable. + TYPE is the data type, and ALIGN is the alignment that the object + would ordinarily have. The value of this macro is used instead of that + alignment to align the object. */ + +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + (TREE_CODE (TYPE) == ARRAY_TYPE \ + && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \ + && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN)) + +/* If defined, a C expression to compute the alignment given to a constant that + is being placed in memory. CONSTANT is the constant and ALIGN is the + alignment that the object would ordinarily have. The value of this macro is + used instead of that alignment to align the object. */ + +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + (TREE_CODE (EXP) == STRING_CST \ + && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN)) + +/* Define this macro to be the value 1 if instructions will fail to work if + given data not on the nominal alignment. If instructions will merely go + slower in that case, define this macro as 0. */ +#define STRICT_ALIGNMENT 1 + +/* Define this if you wish to imitate the way many other C compilers handle + alignment of bitfields and the structures that contain them. + The behavior is that the type written for a bitfield (`int', `short', or + other integer type) imposes an alignment for the entire structure, as if the + structure really did contain an ordinary field of that type. In addition, + the bitfield is placed within the structure so that it would fit within such + a field, not crossing a boundary for it. */ +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/* Define this macro as an expression for the overall size of a structure + (given by STRUCT as a tree node) when the size computed from the fields is + SIZE and the alignment is ALIGN. + + The default is to round SIZE up to a multiple of ALIGN. */ +/* ??? Might need this for 80-bit double-extended floats. */ +/* #define ROUND_TYPE_SIZE(STRUCT, SIZE, ALIGN) */ + +/* Define this macro as an expression for the alignment of a structure (given + by STRUCT as a tree node) if the alignment computed in the usual way is + COMPUTED and the alignment explicitly specified was SPECIFIED. + + The default is to use SPECIFIED if it is larger; otherwise, use the smaller + of COMPUTED and `BIGGEST_ALIGNMENT' */ +/* ??? Might need this for 80-bit double-extended floats. */ +/* #define ROUND_TYPE_ALIGN(STRUCT, COMPUTED, SPECIFIED) */ + +/* An integer expression for the size in bits of the largest integer machine + mode that should actually be used. */ + +/* Allow pairs of registers to be used, which is the intent of the default. */ +#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TImode) + +/* A code distinguishing the floating point format of the target machine. */ +#define TARGET_FLOAT_FORMAT IEEE_FLOAT_FORMAT + +/* GNU CC supports two ways of implementing C++ vtables: traditional or with + so-called "thunks". The flag `-fvtable-thunk' chooses between them. Define + this macro to be a C expression for the default value of that flag. If + `DEFAULT_VTABLE_THUNKS' is 0, GNU CC uses the traditional implementation by + default. The "thunk" implementation is more efficient (especially if you + have provided an implementation of `ASM_OUTPUT_MI_THUNK', but is not binary + compatible with code compiled using the traditional implementation. If you + are writing a new ports, define `DEFAULT_VTABLE_THUNKS' to 1. + + If you do not define this macro, the default for `-fvtable-thunk' is 0. */ +#define DEFAULT_VTABLE_THUNKS 1 + + +/* Layout of Source Language Data Types */ + +/* A C expression for the size in bits of the type `int' on the target machine. + If you don't define this, the default is one word. */ +#define INT_TYPE_SIZE 32 + +/* A C expression for the size in bits of the type `short' on the target + machine. If you don't define this, the default is half a word. (If this + would be less than one storage unit, it is rounded up to one unit.) */ +#define SHORT_TYPE_SIZE 16 + +/* A C expression for the size in bits of the type `long' on the target + machine. If you don't define this, the default is one word. */ +/* ??? Should be 32 for ILP32 code. */ +#define LONG_TYPE_SIZE 64 + +/* Maximum number for the size in bits of the type `long' on the target + machine. If this is undefined, the default is `LONG_TYPE_SIZE'. Otherwise, + it is the constant value that is the largest value that `LONG_TYPE_SIZE' can + have at run-time. This is used in `cpp'. */ +/* ??? Should be 64 for ILP32 code. */ +/* #define MAX_LONG_TYPE_SIZE */ + +/* A C expression for the size in bits of the type `long long' on the target + machine. If you don't define this, the default is two words. If you want + to support GNU Ada on your machine, the value of macro must be at least 64. */ +#define LONG_LONG_TYPE_SIZE 64 + +/* A C expression for the size in bits of the type `char' on the target + machine. If you don't define this, the default is one quarter of a word. + (If this would be less than one storage unit, it is rounded up to one unit.) */ +#define CHAR_TYPE_SIZE 8 + +/* A C expression for the size in bits of the type `float' on the target + machine. If you don't define this, the default is one word. */ +#define FLOAT_TYPE_SIZE 32 + +/* A C expression for the size in bits of the type `double' on the target + machine. If you don't define this, the default is two words. */ +#define DOUBLE_TYPE_SIZE 64 + +/* A C expression for the size in bits of the type `long double' on the target + machine. If you don't define this, the default is two words. */ +/* ??? We have an 80 bit extended double format. */ +#define LONG_DOUBLE_TYPE_SIZE 64 + +/* An expression whose value is 1 or 0, according to whether the type `char' + should be signed or unsigned by default. The user can always override this + default with the options `-fsigned-char' and `-funsigned-char'. */ +#define DEFAULT_SIGNED_CHAR 1 + +/* A C expression for a string describing the name of the data type to use for + size values. The typedef name `size_t' is defined using the contents of the + string. */ +/* ??? Needs to be defined for P64 code. */ +/* #define SIZE_TYPE */ + +/* A C expression for a string describing the name of the data type to use for + the result of subtracting two pointers. The typedef name `ptrdiff_t' is + defined using the contents of the string. See `SIZE_TYPE' above for more + information. */ +/* ??? Needs to be defined for P64 code. */ +/* #define PTRDIFF_TYPE */ + +/* A C expression for a string describing the name of the data type to use for + wide characters. The typedef name `wchar_t' is defined using the contents + of the string. See `SIZE_TYPE' above for more information. */ +/* #define WCHAR_TYPE */ + +/* A C expression for the size in bits of the data type for wide characters. + This is used in `cpp', which cannot make use of `WCHAR_TYPE'. */ +/* #define WCHAR_TYPE_SIZE */ + +/* Maximum number for the size in bits of the data type for wide characters. + If this is undefined, the default is `WCHAR_TYPE_SIZE'. Otherwise, it is + the constant value that is the largest value that `WCHAR_TYPE_SIZE' can have + at run-time. This is used in `cpp'. */ +/* #define MAX_WCHAR_TYPE_SIZE */ + +/* A C constant expression for the integer value for escape sequence + `\a'. */ +#define TARGET_BELL 0x7 + +/* C constant expressions for the integer values for escape sequences + `\b', `\t' and `\n'. */ +#define TARGET_BS 0x8 +#define TARGET_TAB 0x9 +#define TARGET_NEWLINE 0xa + +/* C constant expressions for the integer values for escape sequences + `\v', `\f' and `\r'. */ +#define TARGET_VT 0xb +#define TARGET_FF 0xc +#define TARGET_CR 0xd + + +/* Register Basics */ + +/* Number of hardware registers known to the compiler. + We have 128 general registers, 128 floating point registers, 64 predicate + registers, 8 branch registers, and one frame pointer register. */ + +/* ??? Should add ar.lc, ar.ec and probably also ar.pfs. */ + +#define FIRST_PSEUDO_REGISTER 330 + +/* Ranges for the various kinds of registers. */ +#define ADDL_REGNO_P(REGNO) ((REGNO) >= 0 && (REGNO) <= 3) +#define GR_REGNO_P(REGNO) ((REGNO) >= 0 && (REGNO) <= 127) +#define FR_FP_REGNO_P(REGNO) \ + (((REGNO) >= 128 && (REGNO) <= 143) || ((REGNO) >= 152 && (REGNO) <= 223)) +#define FR_INT_REGNO_P(REGNO) \ + (((REGNO) >= 144 && (REGNO) <= 151) || ((REGNO) >= 224 && (REGNO) <= 255)) +#define FR_REGNO_P(REGNO) ((REGNO) >= 128 && (REGNO) <= 255) +#define PR_REGNO_P(REGNO) ((REGNO) >= 256 && (REGNO) <= 319) +#define BR_REGNO_P(REGNO) ((REGNO) >= 320 && (REGNO) <= 327) +#define GENERAL_REGNO_P(REGNO) \ + (GR_REGNO_P (REGNO) \ + || (REGNO) == FRAME_POINTER_REGNUM \ + || (REGNO) == RETURN_ADDRESS_REGNUM) + +#define GR_REG(REGNO) ((REGNO) + 0) +#define FR_REG(REGNO) ((REGNO) + 128) +#define PR_REG(REGNO) ((REGNO) + 256) +#define BR_REG(REGNO) ((REGNO) + 320) +#define OUT_REG(REGNO) ((REGNO) + 120) +#define IN_REG(REGNO) ((REGNO) + 112) +#define LOC_REG(REGNO) ((REGNO) + 32) + +#define IN_REGNO_P(REGNO) ((REGNO) >= IN_REG (0) && (REGNO) <= IN_REG (7)) +#define LOC_REGNO_P(REGNO) ((REGNO) >= LOC_REG (0) && (REGNO) <= LOC_REG (79)) +#define OUT_REGNO_P(REGNO) ((REGNO) >= OUT_REG (0) && (REGNO) <= OUT_REG (7)) + +/* ??? Don't really need two sets of macros. I like this one better because + it is less typing. */ +#define R_GR(REGNO) GR_REG (REGNO) +#define R_FR(REGNO) FR_REG (REGNO) +#define R_PR(REGNO) PR_REG (REGNO) +#define R_BR(REGNO) BR_REG (REGNO) + +/* An initializer that says which registers are used for fixed purposes all + throughout the compiled code and are therefore not available for general + allocation. + + r0: constant 0 + r1: global pointer (gp) + r12: stack pointer (sp) + r13: thread pointer (tp) + f0: constant 0.0 + f1: constant 1.0 + p0: constant true + fp: eliminable frame pointer */ + +/* The last 16 stacked regs are fixed, because they are reserved for the 8 + input and 8 output registers. */ + +/* ??? Must mark the next 3 stacked regs as fixed, because ia64_expand_prologue + assumes that three locals are available for fp, b0, and ar.pfs. */ + +/* ??? Should mark b0 as fixed? */ + +/* ??? input and output registers do not have to be marked as fixed. */ + +#define FIXED_REGISTERS \ +{ /* General registers. */ \ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + /* Floating-point registers. */ \ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + /* Predicate registers. */ \ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + /* Branch registers. */ \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + /*FP RA*/ \ + 1, 1, \ + } + +/* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered (in + general) by function calls as well as for fixed registers. This macro + therefore identifies the registers that are not available for general + allocation of values that must live across function calls. */ + +/* ??? If inputs are not marked as fixed, then they are not call clobbered. */ + +#define CALL_USED_REGISTERS \ +{ /* General registers. */ \ + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + /* Floating-point registers. */ \ + 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + /* Predicate registers. */ \ + 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + /* Branch registers. */ \ + 1, 0, 0, 0, 0, 0, 1, 1, \ + /*FP RA*/ \ + 1, 1, \ +} + +/* Define this macro if the target machine has register windows. This C + expression returns the register number as seen by the called function + corresponding to the register number OUT as seen by the calling function. + Return OUT if register number OUT is not an outbound register. */ + +#define INCOMING_REGNO(OUT) \ + ((unsigned) ((OUT) - OUT_REG (0)) < 8 ? IN_REG ((OUT) - OUT_REG (0)) : (OUT)) + +/* Define this macro if the target machine has register windows. This C + expression returns the register number as seen by the calling function + corresponding to the register number IN as seen by the called function. + Return IN if register number IN is not an inbound register. */ + +#define OUTGOING_REGNO(IN) \ + ((unsigned) ((IN) - IN_REG (0)) < 8 ? OUT_REG ((IN) - IN_REG (0)) : (IN)) + + +/* Order of allocation of registers */ + +/* If defined, an initializer for a vector of integers, containing the numbers + of hard registers in the order in which GNU CC should prefer to use them + (from most preferred to least). + + If this macro is not defined, registers are used lowest numbered first (all + else being equal). + + One use of this macro is on machines where the highest numbered registers + must always be saved and the save-multiple-registers instruction supports + only sequences of consecutive registers. On such machines, define + `REG_ALLOC_ORDER' to be an initializer that lists the highest numbered + allocatable register first. */ + +/* ??? Should the GR return value registers come before or after the rest + of the caller-save GRs? */ + +/* ??? Output registers are cheap, because they will be not be saved + by the register engine. They probably should be early in the list. + We need to make them not fixed first though. Similarly, input registers + are callee-saved (RSE) like the stacked locals. */ + +#define REG_ALLOC_ORDER \ +{ \ + /* Caller-saved general registers. */ \ + R_GR (14), R_GR (15), R_GR (16), R_GR (17), \ + R_GR (18), R_GR (19), R_GR (20), R_GR (21), R_GR (22), R_GR (23), \ + R_GR (24), R_GR (25), R_GR (26), R_GR (27), R_GR (28), R_GR (29), \ + R_GR (30), R_GR (31), \ + /* Caller-saved general registers, also used for return values. */ \ + R_GR (8), R_GR (9), R_GR (10), R_GR (11), \ + /* addl caller-saved general registers. */ \ + R_GR (2), R_GR (3), \ + /* Caller-saved FP registers. */ \ + R_FR (6), R_FR (7), \ + /* Caller-saved FP registers, used for parameters and return values. */ \ + R_FR (8), R_FR (9), R_FR (10), R_FR (11), \ + R_FR (12), R_FR (13), R_FR (14), R_FR (15), \ + /* Rotating caller-saved FP registers. */ \ + R_FR (32), R_FR (33), R_FR (34), R_FR (35), \ + R_FR (36), R_FR (37), R_FR (38), R_FR (39), R_FR (40), R_FR (41), \ + R_FR (42), R_FR (43), R_FR (44), R_FR (45), R_FR (46), R_FR (47), \ + R_FR (48), R_FR (49), R_FR (50), R_FR (51), R_FR (52), R_FR (53), \ + R_FR (54), R_FR (55), R_FR (56), R_FR (57), R_FR (58), R_FR (59), \ + R_FR (60), R_FR (61), R_FR (62), R_FR (63), R_FR (64), R_FR (65), \ + R_FR (66), R_FR (67), R_FR (68), R_FR (69), R_FR (70), R_FR (71), \ + R_FR (72), R_FR (73), R_FR (74), R_FR (75), R_FR (76), R_FR (77), \ + R_FR (78), R_FR (79), R_FR (80), R_FR (81), R_FR (82), R_FR (83), \ + R_FR (84), R_FR (85), R_FR (86), R_FR (87), R_FR (88), R_FR (89), \ + R_FR (90), R_FR (91), R_FR (92), R_FR (93), R_FR (94), R_FR (95), \ + R_FR (96), R_FR (97), R_FR (98), R_FR (99), R_FR (100), R_FR (101), \ + R_FR (102), R_FR (103), R_FR (104), R_FR (105), R_FR (106), R_FR (107), \ + R_FR (108), R_FR (109), R_FR (110), R_FR (111), R_FR (112), R_FR (113), \ + R_FR (114), R_FR (115), R_FR (116), R_FR (117), R_FR (118), R_FR (119), \ + R_FR (120), R_FR (121), R_FR (122), R_FR (123), R_FR (124), R_FR (125), \ + R_FR (126), R_FR (127), \ + /* Caller-saved predicate registers. */ \ + R_PR (6), R_PR (7), R_PR (8), R_PR (9), R_PR (10), R_PR (11), \ + R_PR (12), R_PR (13), R_PR (14), R_PR (15), \ + /* Rotating caller-saved predicate registers. */ \ + R_PR (16), R_PR (17), \ + R_PR (18), R_PR (19), R_PR (20), R_PR (21), R_PR (22), R_PR (23), \ + R_PR (24), R_PR (25), R_PR (26), R_PR (27), R_PR (28), R_PR (29), \ + R_PR (30), R_PR (31), R_PR (32), R_PR (33), R_PR (34), R_PR (35), \ + R_PR (36), R_PR (37), R_PR (38), R_PR (39), R_PR (40), R_PR (41), \ + R_PR (42), R_PR (43), R_PR (44), R_PR (45), R_PR (46), R_PR (47), \ + R_PR (48), R_PR (49), R_PR (50), R_PR (51), R_PR (52), R_PR (53), \ + R_PR (54), R_PR (55), R_PR (56), R_PR (57), R_PR (58), R_PR (59), \ + R_PR (60), R_PR (61), R_PR (62), R_PR (63), \ + /* Caller-saved branch registers. */ \ + R_BR (6), R_BR (7), \ + \ + /* Stacked callee-saved general registers. */ \ + R_GR (32), R_GR (33), R_GR (34), R_GR (35), \ + R_GR (36), R_GR (37), R_GR (38), R_GR (39), R_GR (40), R_GR (41), \ + R_GR (42), R_GR (43), R_GR (44), R_GR (45), R_GR (46), R_GR (47), \ + R_GR (48), R_GR (49), R_GR (50), R_GR (51), R_GR (52), R_GR (53), \ + R_GR (54), R_GR (55), R_GR (56), R_GR (57), R_GR (58), R_GR (59), \ + R_GR (60), R_GR (61), R_GR (62), R_GR (63), R_GR (64), R_GR (65), \ + R_GR (66), R_GR (67), R_GR (68), R_GR (69), R_GR (70), R_GR (71), \ + R_GR (72), R_GR (73), R_GR (74), R_GR (75), R_GR (76), R_GR (77), \ + R_GR (78), R_GR (79), R_GR (80), R_GR (81), R_GR (82), R_GR (83), \ + R_GR (84), R_GR (85), R_GR (86), R_GR (87), R_GR (88), R_GR (89), \ + R_GR (90), R_GR (91), R_GR (92), R_GR (93), R_GR (94), R_GR (95), \ + R_GR (96), R_GR (97), R_GR (98), R_GR (99), R_GR (100), R_GR (101), \ + R_GR (102), R_GR (103), R_GR (104), R_GR (105), R_GR (106), R_GR (107), \ + R_GR (108), \ + /* Callee-saved general registers. */ \ + R_GR (4), R_GR (5), R_GR (6), R_GR (7), \ + /* Callee-saved FP registers. */ \ + R_FR (2), R_FR (3), R_FR (4), R_FR (5), R_FR (16), R_FR (17), \ + R_FR (18), R_FR (19), R_FR (20), R_FR (21), R_FR (22), R_FR (23), \ + R_FR (24), R_FR (25), R_FR (26), R_FR (27), R_FR (28), R_FR (29), \ + R_FR (30), R_FR (31), \ + /* Callee-saved predicate registers. */ \ + R_PR (1), R_PR (2), R_PR (3), R_PR (4), R_PR (5), \ + /* Callee-saved branch registers. */ \ + R_BR (1), R_BR (2), R_BR (3), R_BR (4), R_BR (5), \ + \ + /* ??? Stacked registers reserved for fp, rp, and ar.pfs. */ \ + R_GR (109), R_GR (110), R_GR (111), \ + /* Input registers. */ \ + R_GR (112), R_GR (113), R_GR (114), R_GR (115), R_GR (116), R_GR (117), \ + R_GR (118), R_GR (119), \ + /* Output registers. */ \ + R_GR (120), R_GR (121), R_GR (122), R_GR (123), R_GR (124), R_GR (125), \ + R_GR (126), R_GR (127), \ + \ + /* Special general registers. */ \ + R_GR (0), R_GR (1), R_GR (12), R_GR (13), \ + /* Special FP registers. */ \ + R_FR (0), R_FR (1), \ + /* Special predicate registers. */ \ + R_PR (0), \ + /* Special branch registers. */ \ + R_BR (0), \ + /* Frame pointer. Return address. */ \ + FRAME_POINTER_REGNUM, RETURN_ADDRESS_REGNUM, \ +} + + +/* How Values Fit in Registers */ + +/* A C expression for the number of consecutive hard registers, starting at + register number REGNO, required to hold a value of mode MODE. */ + +/* ??? x86 80-bit FP values only require 1 register. */ +/* ??? We say that CCmode values require two registers. This allows us to + easily store the normal and inverted values. If we want single register + predicates, we can use EXTRA_CC_MODES to give them a different mode. */ + +#define HARD_REGNO_NREGS(REGNO, MODE) \ + ((MODE) == CCmode && PR_REGNO_P (REGNO) ? 2 \ + : FR_REGNO_P (REGNO) && (MODE) == XFmode ? 1 \ + : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* A C expression that is nonzero if it is permissible to store a value of mode + MODE in hard register number REGNO (or in several registers starting with + that one). */ + +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ + (FR_FP_REGNO_P (REGNO) ? ! INTEGRAL_MODE_P (MODE) \ + : FR_INT_REGNO_P (REGNO) ? ! FLOAT_MODE_P (MODE) \ + : PR_REGNO_P (REGNO) ? (MODE) == CCmode \ + : 1) + +/* A C expression that is nonzero if it is desirable to choose register + allocation so as to avoid move instructions between a value of mode MODE1 + and a value of mode MODE2. + + If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, MODE2)' are + ever different for any R, then `MODES_TIEABLE_P (MODE1, MODE2)' must be + zero. */ +/* ??? If the comments are true, then this must be zero if one mode is CCmode, + INTEGRAL_MODE_P or FLOAT_MODE_P and the other is not. Otherwise, it is + true. */ +#define MODES_TIEABLE_P(MODE1, MODE2) 1 + +/* Define this macro if the compiler should avoid copies to/from CCmode + registers. You should only define this macro if support fo copying to/from + CCmode is incomplete. */ +/* ??? CCmode copies are very expensive, so we might want this defined. */ +/* #define AVOID_CCMODE_COPIES */ + + +/* Handling Leaf Functions */ + +/* A C initializer for a vector, indexed by hard register number, which + contains 1 for a register that is allowable in a candidate for leaf function + treatment. */ +/* ??? This might be useful. */ +/* #define LEAF_REGISTERS */ + +/* A C expression whose value is the register number to which REGNO should be + renumbered, when a function is treated as a leaf function. */ +/* ??? This might be useful. */ +/* #define LEAF_REG_REMAP(REGNO) */ + + +/* Register Classes */ + +/* An enumeral type that must be defined with all the register class names as + enumeral values. `NO_REGS' must be first. `ALL_REGS' must be the last + register class, followed by one more enumeral value, `LIM_REG_CLASSES', + which is not a register class but rather tells how many classes there + are. */ +/* ??? FP registers hold INT and FP values in different representations, so + we can't just use a subreg to convert between the two. We get around this + problem by segmenting the FP register set into two parts. One part (FR_INT) + only holds integer values, and one part (FR_FP) only hold FP values. Thus + we always know which representation is being used. */ +/* ??? When compiling without optimization, it is possible for the only use of + a pseudo to be a parameter load from the stack with a REG_EQUIV note. + Regclass handles this case specially and does not assign any costs to the + pseudo. The pseudo then ends up using the last class before ALL_REGS. + Thus we must not let either PR_REGS or BR_REGS be the last class. The + testcase for this is gcc.c-torture/execute/va-arg-7.c. */ +enum reg_class +{ + NO_REGS, + PR_REGS, + BR_REGS, + ADDL_REGS, + GR_REGS, + FR_INT_REGS, + FR_FP_REGS, + FR_REGS, + GR_AND_FR_INT_REGS, + GR_AND_FR_FP_REGS, + GR_AND_FR_REGS, + ALL_REGS, + LIM_REG_CLASSES +}; + +#define GENERAL_REGS GR_REGS + +/* The number of distinct register classes. */ +#define N_REG_CLASSES ((int) LIM_REG_CLASSES) + +/* An initializer containing the names of the register classes as C string + constants. These names are used in writing some of the debugging dumps. */ +#define REG_CLASS_NAMES \ +{ "NO_REGS", "PR_REGS", "BR_REGS", "ADDL_REGS", "GR_REGS", "FR_INT_REGS", \ + "FR_FP_REGS", "FR_REGS", "GR_AND_FR_INT_REGS", "GR_AND_FR_FP_REGS", \ + "GR_AND_FR_REGS", "ALL_REGS" } + +/* An initializer containing the contents of the register classes, as integers + which are bit masks. The Nth integer specifies the contents of class N. + The way the integer MASK is interpreted is that register R is in the class + if `MASK & (1 << R)' is 1. */ +#define REG_CLASS_CONTENTS \ +{ \ + /* NO_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x000 }, \ + /* PR_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0xFFFFFFFF, 0xFFFFFFFF, 0x000 }, \ + /* BR_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x0FF }, \ + /* ADDL_REGS. */ \ + { 0x0000000F, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x000 }, \ + /* GR_REGS. */ \ + { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x300 }, \ + /* FR_INT_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00FF0000, 0x00000000, 0x00000000, 0xFFFFFFFF, \ + 0x00000000, 0x00000000, 0x000 }, \ + /* FR_FP_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0xFF00FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, \ + 0x00000000, 0x00000000, 0x000 }, \ + /* FR_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ + 0x00000000, 0x00000000, 0x000 }, \ + /* GR_AND_FR_INT_REGS. */ \ + { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ + 0x00FF0000, 0x00000000, 0x00000000, 0xFFFFFFFF, \ + 0x00000000, 0x00000000, 0x300 }, \ + /* GR_AND_FR_FP_REGS. */ \ + { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ + 0xFF00FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, \ + 0x00000000, 0x00000000, 0x300 }, \ + /* GR_AND_FR_REGS. */ \ + { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ + 0x00000000, 0x00000000, 0x300 }, \ + /* ALL_REGS. */ \ + { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ + 0xFFFFFFFF, 0xFFFFFFFF, 0x3FF }, \ +} + +/* A C expression whose value is a register class containing hard register + REGNO. In general there is more than one such class; choose a class which + is "minimal", meaning that no smaller class also contains the register. */ +/* The NO_REGS case is primarily for the benefit of rws_access_reg, which + may call here with private (invalid) register numbers, such as + REG_VOLATILE. */ +#define REGNO_REG_CLASS(REGNO) \ +(ADDL_REGNO_P (REGNO) ? ADDL_REGS \ + : GENERAL_REGNO_P (REGNO) ? GR_REGS \ + : FR_FP_REGNO_P (REGNO) ? FR_FP_REGS \ + : FR_INT_REGNO_P (REGNO) ? FR_INT_REGS \ + : PR_REGNO_P (REGNO) ? PR_REGS \ + : BR_REGNO_P (REGNO) ? BR_REGS \ + : NO_REGS) + +/* A macro whose definition is the name of the class to which a valid base + register must belong. A base register is one used in an address which is + the register value plus a displacement. */ +#define BASE_REG_CLASS GENERAL_REGS + +/* A macro whose definition is the name of the class to which a valid index + register must belong. An index register is one used in an address where its + value is either multiplied by a scale factor or added to another register + (as well as added to a displacement). */ +#define INDEX_REG_CLASS NO_REGS + +/* A C expression which defines the machine-dependent operand constraint + letters for register classes. If CHAR is such a letter, the value should be + the register class corresponding to it. Otherwise, the value should be + `NO_REGS'. The register letter `r', corresponding to class `GENERAL_REGS', + will not be passed to this macro; you do not need to handle it. */ + +#define REG_CLASS_FROM_LETTER(CHAR) \ +((CHAR) == 'f' ? FR_FP_REGS \ + : (CHAR) == 'e' ? FR_INT_REGS \ + : (CHAR) == 'a' ? ADDL_REGS \ + : (CHAR) == 'b' ? BR_REGS \ + : (CHAR) == 'c' ? PR_REGS \ + : NO_REGS) + +/* A C expression which is nonzero if register number NUM is suitable for use + as a base register in operand addresses. It may be either a suitable hard + register or a pseudo register that has been allocated such a hard reg. */ +#define REGNO_OK_FOR_BASE_P(REGNO) \ + (GENERAL_REGNO_P (REGNO) || GENERAL_REGNO_P (reg_renumber[REGNO])) + +/* A C expression which is nonzero if register number NUM is suitable for use + as an index register in operand addresses. It may be either a suitable hard + register or a pseudo register that has been allocated such a hard reg. */ +#define REGNO_OK_FOR_INDEX_P(NUM) 0 + +/* A C expression that places additional restrictions on the register class to + use when it is necessary to copy value X into a register in class CLASS. + The value is a register class; perhaps CLASS, or perhaps another, smaller + class. */ + +#define PREFERRED_RELOAD_CLASS(X, CLASS) CLASS + +/* You should define this macro to indicate to the reload phase that it may + need to allocate at least one register for a reload in addition to the + register to contain the data. Specifically, if copying X to a register + CLASS in MODE requires an intermediate register, you should define this + to return the largest register class all of whose registers can be used + as intermediate registers or scratch registers. */ + +#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X) \ + ia64_secondary_reload_class (CLASS, MODE, X) + +/* Certain machines have the property that some registers cannot be copied to + some other registers without using memory. Define this macro on those + machines to be a C expression that is non-zero if objects of mode M in + registers of CLASS1 can only be copied to registers of class CLASS2 by + storing a register of CLASS1 into memory and loading that memory location + into a register of CLASS2. */ +/* ??? We may need this for XFmode moves between FR and GR regs. Using + getf.sig/getf.exp almost works, but the result in the GR regs is not + properly formatted and has two extra bits. */ +/* #define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, M) */ + +/* A C expression for the maximum number of consecutive registers of + class CLASS needed to hold a value of mode MODE. + This is closely related to the macro `HARD_REGNO_NREGS'. */ + +#define CLASS_MAX_NREGS(CLASS, MODE) \ + ((MODE) == CCmode && (CLASS) == PR_REGS ? 2 \ + : (((CLASS) == FR_REGS || (CLASS) == FR_FP_REGS \ + || (CLASS) == FR_INT_REGS) && (MODE) == XFmode) ? 1 \ + : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* A C expression that defines the machine-dependent operand constraint letters + (`I', `J', `K', .. 'P') that specify particular ranges of integer values. */ + +/* 14 bit signed immediate for arithmetic instructions. */ +#define CONST_OK_FOR_I(VALUE) \ + ((unsigned HOST_WIDE_INT)(VALUE) + 0x2000 < 0x4000) +/* 22 bit signed immediate for arith instructions with r0/r1/r2/r3 source. */ +#define CONST_OK_FOR_J(VALUE) \ + ((unsigned HOST_WIDE_INT)(VALUE) + 0x200000 < 0x400000) +/* 8 bit signed immediate for logical instructions. */ +#define CONST_OK_FOR_K(VALUE) ((unsigned HOST_WIDE_INT)(VALUE) + 0x80 < 0x100) +/* 8 bit adjusted signed immediate for compare pseudo-ops. */ +#define CONST_OK_FOR_L(VALUE) ((unsigned HOST_WIDE_INT)(VALUE) + 0x7F < 0x100) +/* 6 bit unsigned immediate for shift counts. */ +#define CONST_OK_FOR_M(VALUE) ((unsigned HOST_WIDE_INT)(VALUE) < 0x40) +/* 9 bit signed immediate for load/store post-increments. */ +/* ??? N is currently not used. */ +#define CONST_OK_FOR_N(VALUE) ((unsigned HOST_WIDE_INT)(VALUE) + 0x100 < 0x200) +/* 0 for r0. Used by Linux kernel, do not change. */ +#define CONST_OK_FOR_O(VALUE) ((VALUE) == 0) +/* 0 or -1 for dep instruction. */ +#define CONST_OK_FOR_P(VALUE) ((VALUE) == 0 || (VALUE) == -1) + +#define CONST_OK_FOR_LETTER_P(VALUE, C) \ +((C) == 'I' ? CONST_OK_FOR_I (VALUE) \ + : (C) == 'J' ? CONST_OK_FOR_J (VALUE) \ + : (C) == 'K' ? CONST_OK_FOR_K (VALUE) \ + : (C) == 'L' ? CONST_OK_FOR_L (VALUE) \ + : (C) == 'M' ? CONST_OK_FOR_M (VALUE) \ + : (C) == 'N' ? CONST_OK_FOR_N (VALUE) \ + : (C) == 'O' ? CONST_OK_FOR_O (VALUE) \ + : (C) == 'P' ? CONST_OK_FOR_P (VALUE) \ + : 0) + +/* A C expression that defines the machine-dependent operand constraint letters + (`G', `H') that specify particular ranges of `const_double' values. */ + +/* 0.0 and 1.0 for fr0 and fr1. */ +#define CONST_DOUBLE_OK_FOR_G(VALUE) \ + ((VALUE) == CONST0_RTX (GET_MODE (VALUE)) \ + || (VALUE) == CONST1_RTX (GET_MODE (VALUE))) + +#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \ + ((C) == 'G' ? CONST_DOUBLE_OK_FOR_G (VALUE) : 0) + +/* A C expression that defines the optional machine-dependent constraint + letters (`Q', `R', `S', `T', `U') that can be used to segregate specific + types of operands, usually memory references, for the target machine. */ +/* ??? This might be useful considering that we have already used all of the + integer constant contraint letters. */ +/* #define EXTRA_CONSTRAINT(VALUE, C) */ + +/* Basic Stack Layout */ + +/* Define this macro if pushing a word onto the stack moves the stack pointer + to a smaller address. */ +#define STACK_GROWS_DOWNWARD 1 + +/* Define this macro if the addresses of local variable slots are at negative + offsets from the frame pointer. */ +#define FRAME_GROWS_DOWNWARD + +/* Offset from the frame pointer to the first local variable slot to be + allocated. */ +/* ??? This leaves 16 bytes unused normally, but it looks funny to store locals + into the 16-byte reserved area. */ +/* ??? This isn't very efficient use of the frame pointer. Better would be + to move it down a ways, so that we have positive and negative offsets. */ +#define STARTING_FRAME_OFFSET \ + (current_function_pretend_args_size \ + ? 16 - current_function_pretend_args_size \ + : 0) + +/* Offset from the stack pointer register to the first location at which + outgoing arguments are placed. If not specified, the default value of zero + is used. This is the proper value for most machines. */ +/* IA64 has a 16 byte scratch area that is at the bottom of the stack. */ +#define STACK_POINTER_OFFSET 16 + +/* Offset from the argument pointer register to the first argument's address. + On some machines it may depend on the data type of the function. */ +#define FIRST_PARM_OFFSET(FUNDECL) 0 + +/* A C expression whose value is RTL representing the value of the return + address for the frame COUNT steps up from the current frame, after the + prologue. */ + +/* ??? Frames other than zero would likely require interpreting the frame + unwind info, so we don't try to support them. We would also need to define + DYNAMIC_CHAIN_ADDRESS and SETUP_FRAME_ADDRESS (for the reg stack flush). */ + +/* ??? This only works for non-leaf functions. In a leaf function, the return + address would be in b0 (rp). */ + +#define RETURN_ADDR_RTX(COUNT, FRAMEADDR) \ + ((count == 0) \ + ? gen_rtx_REG (Pmode, RETURN_ADDRESS_REGNUM) \ + : (rtx) 0) + +/* A C expression whose value is RTL representing the location of the incoming + return address at the beginning of any function, before the prologue. This + RTL is either a `REG', indicating that the return value is saved in `REG', + or a `MEM' representing a location in the stack. This enables DWARF2 + unwind info for C++ EH. */ +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (VOIDmode, BR_REG (0)) +/* ??? This is not defined because of three problems. + 1) dwarf2out.c assumes that DWARF_FRAME_RETURN_COLUMN fits in one byte. + The default value is FIRST_PSEUDO_REGISTER which doesn't. This can be + worked around by setting PC_REGNUM to FR_REG (0) which is an otherwise + unused register number. + 2) dwarf2out_frame_debug core dumps while processing prologue insns. We + need to refine which insns have RTX_FRAME_RELATED_P set and which don't. + 3) It isn't possible to turn off EH frame info by defining DWARF2_UNIND_INFO + to zero, despite what the documentation implies, because it is tested in + a few places with #ifdef instead of #if. */ +#undef INCOMING_RETURN_ADDR_RTX + +/* A C expression whose value is an integer giving the offset, in bytes, from + the value of the stack pointer register to the top of the stack frame at the + beginning of any function, before the prologue. The top of the frame is + defined to be the value of the stack pointer in the previous frame, just + before the call instruction. */ +#define INCOMING_FRAME_SP_OFFSET 0 + + +/* Register That Address the Stack Frame. */ + +/* The register number of the stack pointer register, which must also be a + fixed register according to `FIXED_REGISTERS'. On most machines, the + hardware determines which register this is. */ + +#define STACK_POINTER_REGNUM 12 + +/* The register number of the frame pointer register, which is used to access + automatic variables in the stack frame. On some machines, the hardware + determines which register this is. On other machines, you can choose any + register you wish for this purpose. */ + +#define FRAME_POINTER_REGNUM 328 + +/* Register number where frame pointer was saved in the prologue, or zero + if it was not saved. */ + +extern int ia64_fp_regno; + +/* Number of input and local registers used. This is needed for the .regstk + directive, and also for debugging info. */ + +extern int ia64_input_regs; +extern int ia64_local_regs; + +/* The register number of the arg pointer register, which is used to access the + function's argument list. */ +/* r0 won't otherwise be used, so put the always eliminated argument pointer + in it. */ +#define ARG_POINTER_REGNUM R_GR(0) + +/* The register number for the return address register. This is modified by + ia64_expand_prologue to point to the real return address save register. */ + +#define RETURN_ADDRESS_REGNUM 329 + +/* Register numbers used for passing a function's static chain pointer. */ + +#define STATIC_CHAIN_REGNUM 15 + + +/* Eliminating the Frame Pointer and the Arg Pointer */ + +/* A C expression which is nonzero if a function must have and use a frame + pointer. This expression is evaluated in the reload pass. If its value is + nonzero the function will have a frame pointer. */ + +#define FRAME_POINTER_REQUIRED 0 + +/* If defined, this macro specifies a table of register pairs used to eliminate + unneeded registers that point into the stack frame. */ + +#define ELIMINABLE_REGS \ +{ \ + {ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM} \ +} + +/* A C expression that returns non-zero if the compiler is allowed to try to + replace register number FROM with register number TO. There are no ia64 + specific restrictions. */ + +#define CAN_ELIMINATE(FROM, TO) 1 + +/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'. It specifies the + initial difference between the specified pair of registers. This macro must + be defined if `ELIMINABLE_REGS' is defined. */ +/* ??? I need to decide whether the frame pointer is the old frame SP + or the new frame SP before dynamic allocs. */ +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ +{ \ + unsigned int size = ia64_compute_frame_size (get_frame_size ()); \ + \ + if ((FROM) == FRAME_POINTER_REGNUM && (TO) == STACK_POINTER_REGNUM) \ + (OFFSET) = size; \ + else if ((FROM) == ARG_POINTER_REGNUM) \ + { \ + switch (TO) \ + { \ + case FRAME_POINTER_REGNUM: \ + /* Arguments start above the 16 byte save area, unless stdarg \ + in which case we store through the 16 byte save area. */ \ + (OFFSET) = 16 - current_function_pretend_args_size; \ + break; \ + case STACK_POINTER_REGNUM: \ + (OFFSET) = size + 16 - current_function_pretend_args_size; \ + break; \ + default: \ + abort (); \ + } \ + } \ + else \ + abort (); \ +} + + +/* Passing Function Arguments on the Stack */ + +/* Define this macro if an argument declared in a prototype as an integral type + smaller than `int' should actually be passed as an `int'. In addition to + avoiding errors in certain cases of mismatch, it also makes for better code + on certain machines. */ +/* ??? Investigate. */ +/* #define PROMOTE_PROTOTYPES */ + +/* If defined, the maximum amount of space required for outgoing arguments will + be computed and placed into the variable + `current_function_outgoing_args_size'. */ + +#define ACCUMULATE_OUTGOING_ARGS + +/* A C expression that should indicate the number of bytes of its own arguments + that a function pops on returning, or 0 if the function pops no arguments + and the caller must therefore pop them all after the function returns. */ + +#define RETURN_POPS_ARGS(FUNDECL, FUNTYPE, STACK_SIZE) 0 + + +/* Function Arguments in Registers */ + +#define MAX_ARGUMENT_SLOTS 8 +#define MAX_INT_RETURN_SLOTS 4 +#define GR_ARG_FIRST IN_REG (0) +#define GR_RET_FIRST GR_REG (8) +#define GR_RET_LAST GR_REG (11) +#define FR_ARG_FIRST FR_REG (8) +#define FR_RET_FIRST FR_REG (8) +#define FR_RET_LAST FR_REG (15) +#define AR_ARG_FIRST OUT_REG (0) + +/* A C expression that controls whether a function argument is passed in a + register, and which register. */ + +#define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \ + ia64_function_arg (&CUM, MODE, TYPE, NAMED, 0) + +/* Define this macro if the target machine has "register windows", so that the + register in which a function sees an arguments is not necessarily the same + as the one in which the caller passed the argument. */ + +#define FUNCTION_INCOMING_ARG(CUM, MODE, TYPE, NAMED) \ + ia64_function_arg (&CUM, MODE, TYPE, NAMED, 1) + +/* A C expression for the number of words, at the beginning of an argument, + must be put in registers. The value must be zero for arguments that are + passed entirely in registers or that are entirely pushed on the stack. */ + +#define FUNCTION_ARG_PARTIAL_NREGS(CUM, MODE, TYPE, NAMED) \ + ia64_function_arg_partial_nregs (&CUM, MODE, TYPE, NAMED) + +/* A C expression that indicates when an argument must be passed by reference. + If nonzero for an argument, a copy of that argument is made in memory and a + pointer to the argument is passed instead of the argument itself. The + pointer is passed in whatever way is appropriate for passing a pointer to + that type. */ + +#define FUNCTION_ARG_PASS_BY_REFERENCE(CUM, MODE, TYPE, NAMED) 0 + +/* A C type for declaring a variable that is used as the first argument of + `FUNCTION_ARG' and other related values. For some target machines, the type + `int' suffices and can hold the number of bytes of argument so far. */ + +typedef struct ia64_args +{ + int words; /* # words of arguments so far */ + int fp_regs; /* # FR registers used so far */ + int prototype; /* whether function prototyped */ +} CUMULATIVE_ARGS; + +/* A C statement (sans semicolon) for initializing the variable CUM for the + state at the beginning of the argument list. */ + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT) \ +do { \ + (CUM).words = 0; \ + (CUM).fp_regs = 0; \ + (CUM).prototype = ((FNTYPE) && TYPE_ARG_TYPES (FNTYPE)) || (LIBNAME); \ +} while (0) + +/* Like `INIT_CUMULATIVE_ARGS' but overrides it for the purposes of finding the + arguments for the function being compiled. If this macro is undefined, + `INIT_CUMULATIVE_ARGS' is used instead. */ + +/* We set prototype to true so that we never try to return a PARALLEL from + function_arg. */ +#define INIT_CUMULATIVE_INCOMING_ARGS(CUM, FNTYPE, LIBNAME) \ +do { \ + (CUM).words = 0; \ + (CUM).fp_regs = 0; \ + (CUM).prototype = 1; \ +} while (0) + +/* A C statement (sans semicolon) to update the summarizer variable CUM to + advance past an argument in the argument list. The values MODE, TYPE and + NAMED describe that argument. Once this is done, the variable CUM is + suitable for analyzing the *following* argument with `FUNCTION_ARG'. */ + +#define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED) \ + ia64_function_arg_advance (&CUM, MODE, TYPE, NAMED) + +/* If defined, a C expression that gives the alignment boundary, in bits, of an + argument with the specified mode and type. */ + +/* Arguments larger than 64 bits require 128 bit alignment. */ + +#define FUNCTION_ARG_BOUNDARY(MODE, TYPE) \ + (((((MODE) == BLKmode ? int_size_in_bytes (TYPE) : GET_MODE_SIZE (MODE)) \ + + UNITS_PER_WORD - 1) / UNITS_PER_WORD) > 1 ? 128 : PARM_BOUNDARY) + +/* A C expression that is nonzero if REGNO is the number of a hard register in + which function arguments are sometimes passed. This does *not* include + implicit arguments such as the static chain and the structure-value address. + On many machines, no registers can be used for this purpose since all + function arguments are pushed on the stack. */ +#define FUNCTION_ARG_REGNO_P(REGNO) \ +(((REGNO) >= GR_ARG_FIRST && (REGNO) < (GR_ARG_FIRST + MAX_ARGUMENT_SLOTS)) \ + || ((REGNO) >= FR_ARG_FIRST && (REGNO) < (FR_ARG_FIRST + MAX_ARGUMENT_SLOTS))) + +/* Implement `va_start' for varargs and stdarg. */ +#define EXPAND_BUILTIN_VA_START(stdarg, valist, nextarg) \ + ia64_va_start (stdarg, valist, nextarg) + +/* Implement `va_arg'. */ +#define EXPAND_BUILTIN_VA_ARG(valist, type) \ + ia64_va_arg (valist, type) + +/* How Scalar Function Values are Returned */ + +/* A C expression to create an RTX representing the place where a function + returns a value of data type VALTYPE. */ + +#define FUNCTION_VALUE(VALTYPE, FUNC) \ + ia64_function_value (VALTYPE, FUNC) + +/* A C expression to create an RTX representing the place where a library + function returns a value of mode MODE. */ + +#define LIBCALL_VALUE(MODE) \ + gen_rtx_REG (MODE, \ + ((GET_MODE_CLASS (MODE) == MODE_FLOAT \ + || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \ + ? FR_RET_FIRST : GR_RET_FIRST)) + +/* A C expression that is nonzero if REGNO is the number of a hard register in + which the values of called function may come back. */ + +#define FUNCTION_VALUE_REGNO_P(REGNO) \ + (((REGNO) >= GR_RET_FIRST && (REGNO) <= GR_RET_LAST) \ + || ((REGNO) >= FR_RET_FIRST && (REGNO) <= FR_RET_LAST)) + + +/* How Large Values are Returned */ + +/* A nonzero value says to return the function value in memory, just as large + structures are always returned. */ + +#define RETURN_IN_MEMORY(TYPE) \ + ia64_return_in_memory (TYPE) + +/* If you define this macro to be 0, then the conventions used for structure + and union return values are decided by the `RETURN_IN_MEMORY' macro. */ + +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* If the structure value address is passed in a register, then + `STRUCT_VALUE_REGNUM' should be the number of that register. */ + +#define STRUCT_VALUE_REGNUM GR_REG (8) + + +/* Caller-Saves Register Allocation */ + +/* A C expression to determine whether it is worthwhile to consider placing a + pseudo-register in a call-clobbered hard register and saving and restoring + it around each function call. The expression should be 1 when this is worth + doing, and 0 otherwise. + + If you don't define this macro, a default is used which is good on most + machines: `4 * CALLS < REFS'. */ +/* ??? Investigate. */ +/* #define CALLER_SAVE_PROFITABLE(REFS, CALLS) */ + + +/* Function Entry and Exit */ + +/* A C compound statement that outputs the assembler code for entry to a + function. */ + +#define FUNCTION_PROLOGUE(FILE, SIZE) \ + ia64_function_prologue (FILE, SIZE) + +/* Define this macro as a C expression that is nonzero if the return + instruction or the function epilogue ignores the value of the stack pointer; + in other words, if it is safe to delete an instruction to adjust the stack + pointer before a return from the function. */ + +#define EXIT_IGNORE_STACK 1 + +/* Define this macro as a C expression that is nonzero for registers + used by the epilogue or the `return' pattern. */ + +#define EPILOGUE_USES(REGNO) ia64_epilogue_uses (REGNO) + +/* A C compound statement that outputs the assembler code for exit from a + function. */ + +#define FUNCTION_EPILOGUE(FILE, SIZE) \ + ia64_function_epilogue (FILE, SIZE) + +/* A C compound statement that outputs the assembler code for a thunk function, + used to implement C++ virtual function calls with multiple inheritance. */ + +/* ??? This only supports deltas up to 14 bits. If we need more, then we + must load the delta into a register first. */ + +#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION) \ +do { \ + fprintf (FILE, "\tadd r32 = %d, r32\n", (DELTA)); \ + fprintf (FILE, "\tbr "); \ + assemble_name (FILE, XSTR (XEXP (DECL_RTL (FUNCTION), 0), 0)); \ + fprintf (FILE, "\n"); \ +} while (0) + + +/* Generating Code for Profiling. */ + +/* A C statement or compound statement to output to FILE some assembler code to + call the profiling subroutine `mcount'. */ + +/* ??? Unclear if this will actually work. No way to test this currently. */ + +#define FUNCTION_PROFILER(FILE, LABELNO) \ +do { \ + char buf[20]; \ + ASM_GENERATE_INTERNAL_LABEL (buf, "LP", LABELNO); \ + fputs ("\taddl r16 = @ltoff(", FILE); \ + assemble_name (FILE, buf); \ + fputs ("), gp\n", FILE); \ + fputs ("\tmov r17 = r1;;\n", FILE); \ + fputs ("\tld8 out0 = [r16]\n", FILE); \ + fputs ("\tmov r18 = b0\n", FILE); \ + fputs ("\tbr.call.sptk.many rp = mcount;;\n", FILE); \ + fputs ("\tmov b0 = r18\n", FILE); \ + fputs ("\tmov r1 = r17;;\n", FILE); \ +} while (0) + +/* A C statement or compound statement to output to FILE some assembler code to + initialize basic-block profiling for the current object module. */ + +/* ??? Unclear if this will actually work. No way to test this currently. */ + +#define FUNCTION_BLOCK_PROFILER(FILE, LABELNO) \ +do { \ + int labelno = LABELNO; \ + switch (profile_block_flag) \ + { \ + case 2: \ + fputs ("\taddl r16 = @ltoff(LPBX0), gp\n", FILE); \ + fprintf (FILE, "\tmov out1 = %d;;\n", labelno); \ + fputs ("\tld8 out0 = [r16]\n", FILE); \ + fputs ("\tmov r17 = r1\n", FILE); \ + fputs ("\tmov r18 = b0\n", FILE); \ + fputs ("\tbr.call.sptk.many rp = __bb_init_trace_func;;\n", FILE);\ + fputs ("\tmov r1 = r17\n", FILE); \ + fputs ("\tmov b0 = r18;;\n", FILE); \ + break; \ + default: \ + fputs ("\taddl r16 = @ltoff(LPBX0), gp;;\n", FILE); \ + fputs ("\tld8 out0 = [r16];;\n", FILE); \ + fputs ("\tld8 r17 = [out0];;\n", FILE); \ + fputs ("\tcmp.eq p6, p0 = r0, r17;;\n", FILE); \ + fputs ("(p6)\tmov r16 = r1\n", FILE); \ + fputs ("(p6)\tmov r17 = b0\n", FILE); \ + fputs ("(p6)\tbr.call.sptk.many rp = __bb_init_func;;\n", FILE); \ + fputs ("(p6)\tmov r1 = r16\n", FILE); \ + fputs ("(p6)\tmov b0 = r17;;\n", FILE); \ + break; \ + } \ +} while (0) + +/* A C statement or compound statement to output to FILE some assembler code to + increment the count associated with the basic block number BLOCKNO. */ + +/* ??? This can't work unless we mark some registers as fixed, so that we + can use them as temporaries in this macro. We need two registers for -a + profiling and 4 registers for -ax profiling. */ + +#define BLOCK_PROFILER(FILE, BLOCKNO) \ +do { \ + int blockn = BLOCKNO; \ + switch (profile_block_flag) \ + { \ + case 2: \ + fputs ("\taddl r2 = @ltoff(__bb), gp\n", FILE); \ + fputs ("\taddl r3 = @ltoff(LPBX0), gp;;\n", FILE); \ + fprintf (FILE, "\tmov r9 = %d\n", blockn); \ + fputs ("\tld8 r2 = [r2]\n", FILE); \ + fputs ("\tld8 r3 = [r3];;\n", FILE); \ + fputs ("\tadd r8 = 8, r2\n", FILE); \ + fputs ("\tst8 [r2] = r9;;\n", FILE); \ + fputs ("\tst8 [r8] = r3\n", FILE); \ + fputs ("\tbr.call.sptk.many rp = __bb_trace_func\n", FILE); \ + break; \ + \ + default: \ + fputs ("\taddl r2 = @ltoff(LPBX2), gp;;\n", FILE); \ + fputs ("\tld8 r2 = [r2];;\n", FILE); \ + fprintf (FILE, "\taddl r2 = %d, r2;;\n", 8 * blockn); \ + fputs ("\tld8 r3 = [r2];;\n", FILE); \ + fputs ("\tadd r3 = 1, r3;;\n", FILE); \ + fputs ("\tst8 [r2] = r3;;\n", FILE); \ + break; \ + } \ +} while(0) + +/* A C statement or compound statement to output to FILE assembler + code to call function `__bb_trace_ret'. */ + +/* ??? Unclear if this will actually work. No way to test this currently. */ + +/* ??? This needs to be emitted into the epilogue. Perhaps rewrite to emit + rtl and call from ia64_expand_epilogue? */ + +#define FUNCTION_BLOCK_PROFILER_EXIT(FILE) \ + fputs ("\tbr.call.sptk.many rp = __bb_trace_ret\n", FILE); +#undef FUNCTION_BLOCK_PROFILER_EXIT + +/* A C statement or compound statement to save all registers, which may be + clobbered by a function call, including condition codes. */ + +/* ??? We would have to save 20 GRs, 106 FRs, 10 PRs, 2 BRs, and possibly + other things. This is not practical. Perhaps leave this feature (-ax) + unsupported by undefining above macros? */ + +/* #define MACHINE_STATE_SAVE(ID) */ + +/* A C statement or compound statement to restore all registers, including + condition codes, saved by `MACHINE_STATE_SAVE'. */ + +/* ??? We would have to restore 20 GRs, 106 FRs, 10 PRs, 2 BRs, and possibly + other things. This is not practical. Perhaps leave this feature (-ax) + unsupported by undefining above macros? */ + +/* #define MACHINE_STATE_RESTORE(ID) */ + + +/* Implementing the Varargs Macros. */ + +/* Define this macro to store the anonymous register arguments into the stack + so that all the arguments appear to have been passed consecutively on the + stack. */ + +#define SETUP_INCOMING_VARARGS(ARGS_SO_FAR, MODE, TYPE, PRETEND_ARGS_SIZE, SECOND_TIME) \ + ia64_setup_incoming_varargs (ARGS_SO_FAR, MODE, TYPE, & PRETEND_ARGS_SIZE, SECOND_TIME) + +/* Define this macro if the location where a function argument is passed + depends on whether or not it is a named argument. */ + +#define STRICT_ARGUMENT_NAMING 1 + + +/* Trampolines for Nested Functions. */ + +/* We need 32 bytes, so we can save the sp, ar.rnat, ar.bsp, and ar.pfs of + the function containing a non-local goto target. */ + +#define STACK_SAVEAREA_MODE(LEVEL) \ + ((LEVEL) == SAVE_NONLOCAL ? OImode : Pmode) + +/* Output assembler code for a block containing the constant parts of + a trampoline, leaving space for the variable parts. + + The trampoline should set the static chain pointer to value placed + into the trampoline and should branch to the specified routine. The + gp doesn't have to be set since that is already done by the caller + of the trampoline. To make the normal indirect-subroutine calling + convention work, the trampoline must look like a function descriptor. + That is, the first word must be the target address, the second + word must be the target's global pointer. The complete trampoline + has the following form: + + +----------------+ \ + TRAMP: | TRAMP+32 | | + +----------------+ > fake function descriptor + | gp | | + +----------------+ / + | target addr | + +----------------+ + | static link | + +----------------+ + | mov r2=ip | + + + + | ;; | + +----------------+ + | adds r4=-16,r2 | + + adds r15=-8,r2 + + | ;; | + +----------------+ + | ld8 r4=[r4];; | + + ld8 r15=[r15] + + | mov b6=r4;; | + +----------------+ + | br b6 | + +----------------+ +*/ + +/* ??? Need a version of this and INITIALIZE_TRAMPOLINE for -mno-pic. */ + +#define TRAMPOLINE_TEMPLATE(FILE) \ +{ \ + fprintf (FILE, \ + "\tdata8 0,0,0,0\n" \ + "\t{ mov r2=ip }\n" \ + "\t;;\n" \ + "\t{ adds r4=-16,r2; adds r%d=-8,r2 }\n" \ + "\t;;\n" \ + "\t{ ld8 r4=[r4];; ld8 r%d=[r%d]; mov b6=r4 }\n" \ + "\t;;\n" \ + "\t{ br b6 }\n" \ + "\t;;\n", \ + STATIC_CHAIN_REGNUM, STATIC_CHAIN_REGNUM, \ + STATIC_CHAIN_REGNUM); \ +} + +/* The name of a subroutine to switch to the section in which the trampoline + template is to be placed. + + On ia64, instructions may only be placed in a text segment. */ + +#define TRAMPOLINE_SECTION text_section + +/* A C expression for the size in bytes of the trampoline, as an integer. */ + +#define TRAMPOLINE_SIZE 96 + +/* Alignment required for trampolines, in bits. */ + +#define TRAMPOLINE_ALIGNMENT 256 + +/* A C statement to initialize the variable parts of a trampoline. */ + +#define INITIALIZE_TRAMPOLINE(ADDR, FNADDR, STATIC_CHAIN) \ +{ \ + rtx addr, addr2, addr_reg, fdesc_addr; \ + \ + /* Load function descriptor address into a pseudo. */ \ + fdesc_addr = gen_reg_rtx (DImode); \ + emit_move_insn (fdesc_addr, FNADDR); \ + \ + /* Read target address from function descriptor and store in \ + trampoline. */ \ + addr = memory_address (Pmode, plus_constant (ADDR, 16)); \ + emit_move_insn (gen_rtx_MEM (Pmode, addr), \ + gen_rtx_MEM (Pmode, fdesc_addr)); \ + /* Store static chain in trampoline. */ \ + addr = memory_address (Pmode, plus_constant (ADDR, 24)); \ + emit_move_insn (gen_rtx_MEM (Pmode, addr), STATIC_CHAIN); \ + \ + /* Load GP value from function descriptor and store in trampoline. */\ + addr = memory_address (Pmode, plus_constant (ADDR, 8)); \ + addr2 = memory_address (Pmode, plus_constant (fdesc_addr, 8)); \ + emit_move_insn (gen_rtx_MEM (Pmode, addr), \ + gen_rtx_MEM (Pmode, addr2)); \ + \ + /* Store trampoline entry address in trampoline. */ \ + addr = memory_address (Pmode, ADDR); \ + addr2 = memory_address (Pmode, plus_constant (ADDR, 32)); \ + emit_move_insn (gen_rtx_MEM (Pmode, addr), addr2); \ + \ + /* Flush the relevant 64 bytes from the i-cache. */ \ + addr_reg = force_reg (DImode, plus_constant (ADDR, 0)); \ + emit_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode, \ + gen_rtvec (1, addr_reg), 3)); \ +} + + +/* Implicit Calls to Library Routines */ + +/* ??? The ia64 linux kernel requires that we use the standard names for + divide and modulo routines. However, if we aren't careful, lib1funcs.asm + will be overridden by libgcc2.c. We avoid this by using different names + for lib1funcs.asm modules, e.g. __divdi3 vs _divdi3. Since lib1funcs.asm + goes into libgcc.a first, the linker will find it first. */ + +/* Define this macro as a C statement that declares additional library routines + renames existing ones. */ + +/* ??? Disable the SImode divide routines for now. */ +#define INIT_TARGET_OPTABS \ +do { \ + sdiv_optab->handlers[(int) SImode].libfunc = 0; \ + udiv_optab->handlers[(int) SImode].libfunc = 0; \ + smod_optab->handlers[(int) SImode].libfunc = 0; \ + umod_optab->handlers[(int) SImode].libfunc = 0; \ +} while (0) + +/* Define this macro if GNU CC should generate calls to the System V (and ANSI + C) library functions `memcpy' and `memset' rather than the BSD functions + `bcopy' and `bzero'. */ + +#define TARGET_MEM_FUNCTIONS + + +/* Addressing Modes */ + +/* Define this macro if the machine supports post-increment addressing. */ + +#define HAVE_POST_INCREMENT 1 +#define HAVE_POST_DECREMENT 1 + +/* A C expression that is 1 if the RTX X is a constant which is a valid + address. */ + +#define CONSTANT_ADDRESS_P(X) 0 + +/* The max number of registers that can appear in a valid memory address. */ + +#define MAX_REGS_PER_ADDRESS 1 + +/* A C compound statement with a conditional `goto LABEL;' executed if X (an + RTX) is a legitimate memory address on the target machine for a memory + operand of mode MODE. */ + +/* ??? IA64 post increment addressing mode is much more powerful than this. */ + +#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL) \ +do { \ + if (GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X)) \ + goto LABEL; \ + else if (GET_CODE (X) == SUBREG && GET_CODE (XEXP (X, 0)) == REG \ + && REG_OK_FOR_BASE_P (XEXP (X, 0))) \ + goto LABEL; \ + else if (GET_CODE (X) == POST_INC || GET_CODE (X) == POST_DEC) \ + { \ + if (GET_CODE (XEXP (X, 0)) == REG \ + && REG_OK_FOR_BASE_P (XEXP (X, 0))) \ + goto LABEL; \ + else if (GET_CODE (XEXP (X, 0)) == SUBREG \ + && GET_CODE (XEXP (XEXP (X, 0), 0)) == REG \ + && REG_OK_FOR_BASE_P (XEXP (XEXP (X, 0), 0))) \ + goto LABEL; \ + } \ +} while (0) + +/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for + use as a base register. */ + +#ifdef REG_OK_STRICT +#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X)) +#else +#define REG_OK_FOR_BASE_P(X) \ + (GENERAL_REGNO_P (REGNO (X)) || (REGNO (X) >= FIRST_PSEUDO_REGISTER)) +#endif + +/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for + use as an index register. */ + +#define REG_OK_FOR_INDEX_P(X) 0 + +/* A C compound statement that attempts to replace X with a valid memory + address for an operand of mode MODE. + + This must be present, but there is nothing useful to be done here. */ + +#define LEGITIMIZE_ADDRESS(X, OLDX, MODE, WIN) + +/* A C statement or compound statement with a conditional `goto LABEL;' + executed if memory address X (an RTX) can have different meanings depending + on the machine mode of the memory reference it is used for or if the address + is valid for some modes but not others. */ + +/* ??? Strictly speaking this isn't true, because we can use any increment with + any mode. Unfortunately, the RTL implies that the increment depends on the + mode, so we need this for now. */ + +#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR, LABEL) \ + if (GET_CODE (ADDR) == POST_DEC || GET_CODE (ADDR) == POST_INC) \ + goto LABEL; + +/* A C expression that is nonzero if X is a legitimate constant for an + immediate operand on the target machine. */ + +#define LEGITIMATE_CONSTANT_P(X) \ + (GET_CODE (X) != CONST_DOUBLE || GET_MODE (X) == VOIDmode \ + || GET_MODE (X) == DImode || CONST_DOUBLE_OK_FOR_G (X)) \ + + +/* Condition Code Status */ + +/* One some machines not all possible comparisons are defined, but you can + convert an invalid comparison into a valid one. */ +/* ??? Investigate. See the alpha definition. */ +/* #define CANONICALIZE_COMPARISON(CODE, OP0, OP1) */ + + +/* Describing Relative Costs of Operations */ + +/* A part of a C `switch' statement that describes the relative costs of + constant RTL expressions. */ + +/* ??? This is incomplete. */ + +#define CONST_COSTS(X, CODE, OUTER_CODE) \ + case CONST_INT: \ + if ((X) == const0_rtx) \ + return 0; \ + case CONST_DOUBLE: \ + case CONST: \ + case SYMBOL_REF: \ + case LABEL_REF: \ + return COSTS_N_INSNS (1); + +/* Like `CONST_COSTS' but applies to nonconstant RTL expressions. */ + +/* ??? Should define this to get better optimized code. */ + +/* We make divide expensive, so that divide-by-constant will be optimized to + a multiply. */ + +#define RTX_COSTS(X, CODE, OUTER_CODE) \ + case DIV: \ + case UDIV: \ + case MOD: \ + case UMOD: \ + return COSTS_N_INSNS (20); + +/* An expression giving the cost of an addressing mode that contains ADDRESS. + If not defined, the cost is computed from the ADDRESS expression and the + `CONST_COSTS' values. */ + +#define ADDRESS_COST(ADDRESS) 0 + +/* A C expression for the cost of moving data from a register in class FROM to + one in class TO. */ + +#define REGISTER_MOVE_COST(FROM, TO) \ +((FROM) == BR_REGS && (TO) == BR_REGS ? 8 \ + : ((FROM) == BR_REGS && (TO) != GENERAL_REGS \ + || (TO) == BR_REGS && (FROM) != GENERAL_REGS) ? 6 \ + : ((FROM) == FR_FP_REGS && (TO) == FR_INT_REGS \ + || (FROM) == FR_INT_REGS && (TO) == FR_FP_REGS) ? 4 \ + : 2) + +/* A C expression for the cost of moving data of mode M between a register and + memory. */ +/* ??? Investigate. Might get better code by defining this. */ +/* #define MEMORY_MOVE_COST(M,C,I) */ + +/* A C expression for the cost of a branch instruction. A value of 1 is the + default; other values are interpreted relative to that. */ +/* ??? Investigate. Might get better code by defining this. */ +/* #define BRANCH_COST */ + +/* Define this macro as a C expression which is nonzero if accessing less than + a word of memory (i.e. a `char' or a `short') is no faster than accessing a + word of memory. */ + +#define SLOW_BYTE_ACCESS 1 + +/* Define this macro if it is as good or better to call a constant function + address than to call an address kept in a register. + + Indirect function calls are more expensive that direct function calls, so + don't cse function addresses. */ + +#define NO_FUNCTION_CSE + +/* A C statement (sans semicolon) to update the integer variable COST based on + the relationship between INSN that is dependent on DEP_INSN through the + dependence LINK. */ + +/* ??? Investigate. */ +/* #define ADJUST_COST(INSN, LINK, DEP_INSN, COST) */ + +/* A C statement (sans semicolon) to update the integer scheduling + priority `INSN_PRIORITY(INSN)'. */ + +/* ??? Investigate. */ +/* #define ADJUST_PRIORITY (INSN) */ + + +/* Dividing the output into sections. */ + +/* A C expression whose value is a string containing the assembler operation + that should precede instructions and read-only data. */ + +#define TEXT_SECTION_ASM_OP ".text" + +/* A C expression whose value is a string containing the assembler operation to + identify the following data as writable initialized data. */ + +#define DATA_SECTION_ASM_OP ".data" + +/* If defined, a C expression whose value is a string containing the assembler + operation to identify the following data as uninitialized global data. */ + +#define BSS_SECTION_ASM_OP ".bss" + +/* Define this macro if jump tables (for `tablejump' insns) should be output in + the text section, along with the assembler instructions. */ + +/* ??? It is probably better for the jump tables to be in the rodata section, + which is where they go by default. Unfortunately, that currently does not + work, because of some problem with pcrelative relocations not getting + resolved correctly. */ +/* ??? FIXME ??? rth says that we should use @gprel to solve this problem. */ +/* ??? If jump tables are in the text section, then we can use 4 byte + entries instead of 8 byte entries. */ + +#define JUMP_TABLES_IN_TEXT_SECTION 1 + +/* Define this macro if references to a symbol must be treated differently + depending on something about the variable or function named by the symbol + (such as what section it is in). */ + +#define ENCODE_SECTION_INFO(DECL) ia64_encode_section_info (DECL) + +#define SDATA_NAME_FLAG_CHAR '@' + +#define IA64_DEFAULT_GVALUE 8 + +/* Decode SYM_NAME and store the real name part in VAR, sans the characters + that encode section info. */ + +#define STRIP_NAME_ENCODING(VAR, SYMBOL_NAME) \ + (VAR) = (SYMBOL_NAME) + ((SYMBOL_NAME)[0] == SDATA_NAME_FLAG_CHAR) + + +/* Position Independent Code. */ + +/* The register number of the register used to address a table of static data + addresses in memory. */ + +/* ??? Should modify ia64.md to use pic_offset_table_rtx instead of + gen_rtx_REG (DImode, 1). */ + +/* ??? Should we set flag_pic? Probably need to define + LEGITIMIZE_PIC_OPERAND_P to make that work. */ + +#define PIC_OFFSET_TABLE_REGNUM GR_REG (1) + +/* Define this macro if the register defined by `PIC_OFFSET_TABLE_REGNUM' is + clobbered by calls. */ + +#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED + + +/* The Overall Framework of an Assembler File. */ + +/* A C string constant describing how to begin a comment in the target + assembler language. The compiler assumes that the comment will end at the + end of the line. */ + +#define ASM_COMMENT_START "//" + +/* A C string constant for text to be output before each `asm' statement or + group of consecutive ones. */ + +/* ??? This won't work with the Intel assembler, because it does not accept + # as a comment start character. However, //APP does not work in gas, so we + can't use that either. Same problem for ASM_APP_OFF below. */ + +#define ASM_APP_ON "#APP\n" + +/* A C string constant for text to be output after each `asm' statement or + group of consecutive ones. */ + +#define ASM_APP_OFF "#NO_APP\n" + + +/* Output of Data. */ + +/* A C statement to output to the stdio stream STREAM an assembler instruction + to assemble a floating-point constant of `XFmode', `DFmode', `SFmode', + respectively, whose value is VALUE. */ + +/* ??? This has not been tested. Long doubles are really 10 bytes not 12 + bytes on ia64. */ + +/* ??? Must reverse the word order for big-endian code? */ + +#define ASM_OUTPUT_LONG_DOUBLE(FILE, VALUE) \ +do { \ + long t[3]; \ + REAL_VALUE_TO_TARGET_LONG_DOUBLE (VALUE, t); \ + fprintf (FILE, "\tdata8 0x%08lx, 0x%08lx, 0x%08lx\n", \ + t[0] & 0xffffffff, t[1] & 0xffffffff, t[2] & 0xffffffff); \ +} while (0) + +/* ??? Must reverse the word order for big-endian code? */ + +#define ASM_OUTPUT_DOUBLE(FILE,VALUE) \ +do { \ + long t[2]; \ + REAL_VALUE_TO_TARGET_DOUBLE (VALUE, t); \ + fprintf (FILE, "\tdata8 0x%08lx%08lx\n", \ + t[1] & 0xffffffff, t[0] & 0xffffffff); \ +} while (0) + +#define ASM_OUTPUT_FLOAT(FILE,VALUE) \ + do { \ + long t; \ + REAL_VALUE_TO_TARGET_SINGLE (VALUE, t); \ + fprintf (FILE, "\tdata4 0x%lx\n", t & 0xffffffff); \ +} while (0) + +/* A C statement to output to the stdio stream STREAM an assembler instruction + to assemble an integer of 1, 2, 4, or 8 bytes, respectively, whose value + is VALUE. */ + +/* This is how to output an assembler line defining a `char' constant. */ + +#define ASM_OUTPUT_CHAR(FILE, VALUE) \ +do { \ + fprintf (FILE, "\t%s\t", ASM_BYTE_OP); \ + output_addr_const (FILE, (VALUE)); \ + fprintf (FILE, "\n"); \ +} while (0) + +/* This is how to output an assembler line defining a `short' constant. */ + +#define ASM_OUTPUT_SHORT(FILE, VALUE) \ +do { \ + fprintf (FILE, "\tdata2\t"); \ + output_addr_const (FILE, (VALUE)); \ + fprintf (FILE, "\n"); \ +} while (0) + +/* This is how to output an assembler line defining an `int' constant. + We also handle symbol output here. */ + +/* ??? For ILP32, also need to handle function addresses here. */ + +#define ASM_OUTPUT_INT(FILE, VALUE) \ +do { \ + fprintf (FILE, "\tdata4\t"); \ + output_addr_const (FILE, (VALUE)); \ + fprintf (FILE, "\n"); \ +} while (0) + +/* This is how to output an assembler line defining a `long' constant. + We also handle symbol output here. */ + +#define ASM_OUTPUT_DOUBLE_INT(FILE, VALUE) \ +do { \ + fprintf (FILE, "\tdata8\t"); \ + if (SYMBOL_REF_FLAG (VALUE)) \ + fprintf (FILE, "@fptr("); \ + output_addr_const (FILE, (VALUE)); \ + if (SYMBOL_REF_FLAG (VALUE)) \ + fprintf (FILE, ")"); \ + fprintf (FILE, "\n"); \ +} while (0) + +/* A C statement to output to the stdio stream STREAM an assembler instruction + to assemble a single byte containing the number VALUE. */ + +#define ASM_OUTPUT_BYTE(STREAM, VALUE) \ + fprintf (STREAM, "\t%s\t0x%x\n", ASM_BYTE_OP, (VALUE)) + +/* These macros are defined as C string constant, describing the syntax in the + assembler for grouping arithmetic expressions. */ + +#define ASM_OPEN_PAREN "(" +#define ASM_CLOSE_PAREN ")" + + +/* Output of Uninitialized Variables. */ + +/* This is all handled by svr4.h. */ + + +/* Output and Generation of Labels. */ + +/* A C statement (sans semicolon) to output to the stdio stream STREAM the + assembler definition of a label named NAME. */ + +/* See the ASM_OUTPUT_LABELREF definition in sysv4.h for an explanation of + why ia64_asm_output_label exists. */ + +extern int ia64_asm_output_label; +#define ASM_OUTPUT_LABEL(STREAM, NAME) \ +do { \ + ia64_asm_output_label = 1; \ + assemble_name (STREAM, NAME); \ + fputs (":\n", STREAM); \ + ia64_asm_output_label = 0; \ +} while (0) + +/* A C statement (sans semicolon) to output to the stdio stream STREAM some + commands that will make the label NAME global; that is, available for + reference from other files. */ + +#define ASM_GLOBALIZE_LABEL(STREAM,NAME) \ +do { \ + fputs ("\t.global ", STREAM); \ + assemble_name (STREAM, NAME); \ + fputs ("\n", STREAM); \ +} while (0) + +/* A C statement (sans semicolon) to output to the stdio stream STREAM any text + necessary for declaring the name of an external symbol named NAME which is + referenced in this compilation but not defined. */ + +#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \ + ia64_asm_output_external (FILE, DECL, NAME) + +/* A C statement to store into the string STRING a label whose name is made + from the string PREFIX and the number NUM. */ + +#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \ +do { \ + sprintf (LABEL, "*.%s%d", PREFIX, NUM); \ +} while (0) + +/* A C expression to assign to OUTVAR (which is a variable of type `char *') a + newly allocated string made from the string NAME and the number NUMBER, with + some suitable punctuation added. */ + +/* ??? Not sure if using a ? in the name for Intel as is safe. */ + +#define ASM_FORMAT_PRIVATE_NAME(OUTVAR, NAME, NUMBER) \ +do { \ + (OUTVAR) = (char *) alloca (strlen (NAME) + 12); \ + sprintf (OUTVAR, "%s%c%ld", (NAME), (TARGET_GNU_AS ? '.' : '?'), \ + (long)(NUMBER)); \ +} while (0) + +/* A C statement to output to the stdio stream STREAM assembler code which + defines (equates) the symbol NAME to have the value VALUE. */ + +#define ASM_OUTPUT_DEF(STREAM, NAME, VALUE) \ +do { \ + assemble_name (STREAM, NAME); \ + fputs (" = ", STREAM); \ + assemble_name (STREAM, VALUE); \ + fputc ('\n', STREAM); \ +} while (0) + + +/* Macros Controlling Initialization Routines. */ + +/* This is handled by svr4.h and sysv4.h. */ + + +/* Output of Assembler Instructions. */ + +/* A C initializer containing the assembler's names for the machine registers, + each one as a C string constant. */ + +#define REGISTER_NAMES \ +{ \ + /* General registers. */ \ + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", \ + "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", \ + "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29", \ + "r30", "r31", \ + /* Local registers. */ \ + "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7", \ + "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15", \ + "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23", \ + "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31", \ + "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39", \ + "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47", \ + "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55", \ + "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63", \ + "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71", \ + "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79", \ + /* Input registers. */ \ + "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7", \ + /* Output registers. */ \ + "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7", \ + /* Floating-point registers. */ \ + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", \ + "f10", "f11", "f12", "f13", "f14", "f15", "f16", "f17", "f18", "f19", \ + "f20", "f21", "f22", "f23", "f24", "f25", "f26", "f27", "f28", "f29", \ + "f30", "f31", "f32", "f33", "f34", "f35", "f36", "f37", "f38", "f39", \ + "f40", "f41", "f42", "f43", "f44", "f45", "f46", "f47", "f48", "f49", \ + "f50", "f51", "f52", "f53", "f54", "f55", "f56", "f57", "f58", "f59", \ + "f60", "f61", "f62", "f63", "f64", "f65", "f66", "f67", "f68", "f69", \ + "f70", "f71", "f72", "f73", "f74", "f75", "f76", "f77", "f78", "f79", \ + "f80", "f81", "f82", "f83", "f84", "f85", "f86", "f87", "f88", "f89", \ + "f90", "f91", "f92", "f93", "f94", "f95", "f96", "f97", "f98", "f99", \ + "f100","f101","f102","f103","f104","f105","f106","f107","f108","f109",\ + "f110","f111","f112","f113","f114","f115","f116","f117","f118","f119",\ + "f120","f121","f122","f123","f124","f125","f126","f127", \ + /* Predicate registers. */ \ + "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", \ + "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", \ + "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29", \ + "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39", \ + "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49", \ + "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59", \ + "p60", "p61", "p62", "p63", \ + /* Branch registers. */ \ + "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", \ + /* Frame pointer. Return address. */ \ + "fp", "ra" \ +} + +/* If defined, a C initializer for an array of structures containing a name and + a register number. This macro defines additional names for hard registers, + thus allowing the `asm' option in declarations to refer to registers using + alternate names. */ + +#define ADDITIONAL_REGISTER_NAMES \ +{ \ + { "gp", R_GR (1) }, \ + { "sp", R_GR (12) }, \ + { "in0", IN_REG (0) }, \ + { "in1", IN_REG (1) }, \ + { "in2", IN_REG (2) }, \ + { "in3", IN_REG (3) }, \ + { "in4", IN_REG (4) }, \ + { "in5", IN_REG (5) }, \ + { "in6", IN_REG (6) }, \ + { "in7", IN_REG (7) }, \ + { "out0", OUT_REG (0) }, \ + { "out1", OUT_REG (1) }, \ + { "out2", OUT_REG (2) }, \ + { "out3", OUT_REG (3) }, \ + { "out4", OUT_REG (4) }, \ + { "out5", OUT_REG (5) }, \ + { "out6", OUT_REG (6) }, \ + { "out7", OUT_REG (7) }, \ + { "loc0", LOC_REG (0) }, \ + { "loc1", LOC_REG (1) }, \ + { "loc2", LOC_REG (2) }, \ + { "loc3", LOC_REG (3) }, \ + { "loc4", LOC_REG (4) }, \ + { "loc5", LOC_REG (5) }, \ + { "loc6", LOC_REG (6) }, \ + { "loc7", LOC_REG (7) }, \ + { "loc8", LOC_REG (8) }, \ + { "loc9", LOC_REG (9) }, \ + { "loc10", LOC_REG (10) }, \ + { "loc11", LOC_REG (11) }, \ + { "loc12", LOC_REG (12) }, \ + { "loc13", LOC_REG (13) }, \ + { "loc14", LOC_REG (14) }, \ + { "loc15", LOC_REG (15) }, \ + { "loc16", LOC_REG (16) }, \ + { "loc17", LOC_REG (17) }, \ + { "loc18", LOC_REG (18) }, \ + { "loc19", LOC_REG (19) }, \ + { "loc20", LOC_REG (20) }, \ + { "loc21", LOC_REG (21) }, \ + { "loc22", LOC_REG (22) }, \ + { "loc23", LOC_REG (23) }, \ + { "loc24", LOC_REG (24) }, \ + { "loc25", LOC_REG (25) }, \ + { "loc26", LOC_REG (26) }, \ + { "loc27", LOC_REG (27) }, \ + { "loc28", LOC_REG (28) }, \ + { "loc29", LOC_REG (29) }, \ + { "loc30", LOC_REG (30) }, \ + { "loc31", LOC_REG (31) }, \ + { "loc32", LOC_REG (32) }, \ + { "loc33", LOC_REG (33) }, \ + { "loc34", LOC_REG (34) }, \ + { "loc35", LOC_REG (35) }, \ + { "loc36", LOC_REG (36) }, \ + { "loc37", LOC_REG (37) }, \ + { "loc38", LOC_REG (38) }, \ + { "loc39", LOC_REG (39) }, \ + { "loc40", LOC_REG (40) }, \ + { "loc41", LOC_REG (41) }, \ + { "loc42", LOC_REG (42) }, \ + { "loc43", LOC_REG (43) }, \ + { "loc44", LOC_REG (44) }, \ + { "loc45", LOC_REG (45) }, \ + { "loc46", LOC_REG (46) }, \ + { "loc47", LOC_REG (47) }, \ + { "loc48", LOC_REG (48) }, \ + { "loc49", LOC_REG (49) }, \ + { "loc50", LOC_REG (50) }, \ + { "loc51", LOC_REG (51) }, \ + { "loc52", LOC_REG (52) }, \ + { "loc53", LOC_REG (53) }, \ + { "loc54", LOC_REG (54) }, \ + { "loc55", LOC_REG (55) }, \ + { "loc56", LOC_REG (56) }, \ + { "loc57", LOC_REG (57) }, \ + { "loc58", LOC_REG (58) }, \ + { "loc59", LOC_REG (59) }, \ + { "loc60", LOC_REG (60) }, \ + { "loc61", LOC_REG (61) }, \ + { "loc62", LOC_REG (62) }, \ + { "loc63", LOC_REG (63) }, \ + { "loc64", LOC_REG (64) }, \ + { "loc65", LOC_REG (65) }, \ + { "loc66", LOC_REG (66) }, \ + { "loc67", LOC_REG (67) }, \ + { "loc68", LOC_REG (68) }, \ + { "loc69", LOC_REG (69) }, \ + { "loc70", LOC_REG (70) }, \ + { "loc71", LOC_REG (71) }, \ + { "loc72", LOC_REG (72) }, \ + { "loc73", LOC_REG (73) }, \ + { "loc74", LOC_REG (74) }, \ + { "loc75", LOC_REG (75) }, \ + { "loc76", LOC_REG (76) }, \ + { "loc77", LOC_REG (77) }, \ + { "loc78", LOC_REG (78) }, \ + { "loc79", LOC_REG (78) }, \ +} + +/* A C compound statement to output to stdio stream STREAM the assembler syntax + for an instruction operand X. X is an RTL expression. */ + +#define PRINT_OPERAND(STREAM, X, CODE) \ + ia64_print_operand (STREAM, X, CODE) + +/* A C expression which evaluates to true if CODE is a valid punctuation + character for use in the `PRINT_OPERAND' macro. */ + +/* ??? Keep this around for now, as we might need it later. */ + +/* #define PRINT_OPERAND_PUNCT_VALID_P(CODE) */ + +/* A C compound statement to output to stdio stream STREAM the assembler syntax + for an instruction operand that is a memory reference whose address is X. X + is an RTL expression. */ + +#define PRINT_OPERAND_ADDRESS(STREAM, X) \ + ia64_print_operand_address (STREAM, X) + +/* If defined, C string expressions to be used for the `%R', `%L', `%U', and + `%I' options of `asm_fprintf' (see `final.c'). */ + +#define REGISTER_PREFIX "" +#define LOCAL_LABEL_PREFIX "." +#define USER_LABEL_PREFIX "" +#define IMMEDIATE_PREFIX "" + + +/* Output of dispatch tables. */ + +/* This macro should be provided on machines where the addresses in a dispatch + table are relative to the table's own address. */ + +/* ??? Depends on the pointer size. */ + +#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \ + fprintf (STREAM, "\tdata8 .L%d-.L%d\n", VALUE, REL) + +/* This is how to output an element of a case-vector that is absolute. + (Ia64 does not use such vectors, but we must define this macro anyway.) */ + +#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) abort () + +/* Define this if something special must be output at the end of a jump-table. + We need to align back to a 16 byte boundary because offsets are smaller than + instructions. */ + +#define ASM_OUTPUT_CASE_END(STREAM, NUM, TABLE) ASM_OUTPUT_ALIGN (STREAM, 4) + +/* Jump tables only need 8 byte alignment. */ + +#define ADDR_VEC_ALIGN(ADDR_VEC) 3 + + +/* Assembler Commands for Exception Regions. */ + +/* ??? This entire section of ia64.h needs to be implemented and then cleaned + up. */ + +/* A C expression to output text to mark the start of an exception region. + + This macro need not be defined on most platforms. */ +/* #define ASM_OUTPUT_EH_REGION_BEG() */ + +/* A C expression to output text to mark the end of an exception region. + + This macro need not be defined on most platforms. */ +/* #define ASM_OUTPUT_EH_REGION_END() */ + +/* A C expression to switch to the section in which the main exception table is + to be placed. The default is a section named `.gcc_except_table' on machines + that support named sections via `ASM_OUTPUT_SECTION_NAME', otherwise if `-fpic' + or `-fPIC' is in effect, the `data_section', otherwise the + `readonly_data_section'. */ +/* #define EXCEPTION_SECTION() */ + +/* If defined, a C string constant for the assembler operation to switch to the + section for exception handling frame unwind information. If not defined, + GNU CC will provide a default definition if the target supports named + sections. `crtstuff.c' uses this macro to switch to the appropriate + section. + + You should define this symbol if your target supports DWARF 2 frame unwind + information and the default definition does not work. */ +/* #define EH_FRAME_SECTION_ASM_OP */ + +/* A C expression that is nonzero if the normal exception table output should + be omitted. + + This macro need not be defined on most platforms. */ +/* #define OMIT_EH_TABLE() */ + +/* Alternate runtime support for looking up an exception at runtime and finding + the associated handler, if the default method won't work. + + This macro need not be defined on most platforms. */ +/* #define EH_TABLE_LOOKUP() */ + +/* A C expression that decides whether or not the current function needs to + have a function unwinder generated for it. See the file `except.c' for + details on when to define this, and how. */ +/* #define DOESNT_NEED_UNWINDER */ + +/* An rtx used to mask the return address found via RETURN_ADDR_RTX, so that it + does not contain any extraneous set bits in it. */ +/* #define MASK_RETURN_ADDR */ + +/* Define this macro to 0 if your target supports DWARF 2 frame unwind + information, but it does not yet work with exception handling. Otherwise, + if your target supports this information (if it defines + `INCOMING_RETURN_ADDR_RTX' and either `UNALIGNED_INT_ASM_OP' or + `OBJECT_FORMAT_ELF'), GCC will provide a default definition of 1. + + If this macro is defined to 1, the DWARF 2 unwinder will be the default + exception handling mechanism; otherwise, setjmp/longjmp will be used by + default. + + If this macro is defined to anything, the DWARF 2 unwinder will be used + instead of inline unwinders and __unwind_function in the non-setjmp case. */ +/* #define DWARF2_UNWIND_INFO */ + + +/* Assembler Commands for Alignment. */ + +/* The alignment (log base 2) to put in front of LABEL, which follows + a BARRIER. */ + +/* ??? Investigate. */ + +/* ??? Emitting align directives increases the size of the line number debug + info, because each .align forces use of an extended opcode. Perhaps try + to fix this in the assembler? */ + +/* #define LABEL_ALIGN_AFTER_BARRIER(LABEL) */ + +/* The desired alignment for the location counter at the beginning + of a loop. */ + +/* ??? Investigate. */ +/* #define LOOP_ALIGN(LABEL) */ + +/* Define this macro if `ASM_OUTPUT_SKIP' should not be used in the text + section because it fails put zeros in the bytes that are skipped. */ + +#define ASM_NO_SKIP_IN_TEXT 1 + +/* A C statement to output to the stdio stream STREAM an assembler command to + advance the location counter to a multiple of 2 to the POWER bytes. */ + +#define ASM_OUTPUT_ALIGN(STREAM, POWER) \ + fprintf (STREAM, "\t.align %d\n", 1<<(POWER)) + + +/* Macros Affecting all Debug Formats. */ + +/* This is handled in svr4.h and sysv4.h. */ + + +/* Specific Options for DBX Output. */ + +/* This is handled by dbxelf.h which is included by svr4.h. */ + + +/* Open ended Hooks for DBX Output. */ + +/* Likewise. */ + + +/* File names in DBX format. */ + +/* Likewise. */ + + +/* Macros for SDB and Dwarf Output. */ + +/* Define this macro if GNU CC should produce dwarf version 2 format debugging + output in response to the `-g' option. */ + +#define DWARF2_DEBUGGING_INFO + +/* Section names for DWARF2 debug info. */ + +#define DEBUG_INFO_SECTION ".debug_info, \"\", \"progbits\"" +#define ABBREV_SECTION ".debug_abbrev, \"\", \"progbits\"" +#define ARANGES_SECTION ".debug_aranges, \"\", \"progbits\"" +#define DEBUG_LINE_SECTION ".debug_line, \"\", \"progbits\"" +#define PUBNAMES_SECTION ".debug_pubnames, \"\", \"progbits\"" + +/* C string constants giving the pseudo-op to use for a sequence of + 2, 4, and 8 byte unaligned constants. dwarf2out.c needs these. */ + +#define UNALIGNED_SHORT_ASM_OP "data2.ua" +#define UNALIGNED_INT_ASM_OP "data4.ua" +#define UNALIGNED_DOUBLE_INT_ASM_OP "data8.ua" + +/* We need to override the default definition for this in dwarf2out.c so that + we can emit the necessary # postfix. */ +#define ASM_NAME_TO_STRING(STR, NAME) \ + do { \ + if ((NAME)[0] == '*') \ + dyn_string_append (STR, NAME + 1); \ + else \ + { \ + char *newstr; \ + STRIP_NAME_ENCODING (newstr, NAME); \ + dyn_string_append (STR, user_label_prefix); \ + dyn_string_append (STR, newstr); \ + dyn_string_append (STR, "#"); \ + } \ + } \ + while (0) + +#define DWARF2_ASM_LINE_DEBUG_INFO (TARGET_DWARF2_ASM) + + +/* Cross Compilation and Floating Point. */ + +/* Define to enable software floating point emulation. */ +#define REAL_ARITHMETIC + + +/* Miscellaneous Parameters. */ + +/* Define this if you have defined special-purpose predicates in the file + `MACHINE.c'. For each predicate, list all rtl codes that can be in + expressions matched by the predicate. */ + +#define PREDICATE_CODES \ +{ "call_operand", {SUBREG, REG, SYMBOL_REF}}, \ +{ "sdata_symbolic_operand", {SYMBOL_REF, CONST}}, \ +{ "symbolic_operand", {SYMBOL_REF, CONST, LABEL_REF}}, \ +{ "function_operand", {SYMBOL_REF}}, \ +{ "setjmp_operand", {SYMBOL_REF}}, \ +{ "move_operand", {SUBREG, REG, MEM, CONST_INT, CONST_DOUBLE, \ + CONSTANT_P_RTX, SYMBOL_REF, CONST, LABEL_REF}}, \ +{ "reg_or_0_operand", {SUBREG, REG, CONST_INT}}, \ +{ "reg_or_6bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}}, \ +{ "reg_or_8bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}}, \ +{ "reg_or_8bit_adjusted_operand", {SUBREG, REG, CONST_INT, \ + CONSTANT_P_RTX}}, \ +{ "reg_or_8bit_and_adjusted_operand", {SUBREG, REG, CONST_INT, \ + CONSTANT_P_RTX}}, \ +{ "reg_or_14bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}}, \ +{ "reg_or_22bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}}, \ +{ "shift_count_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}}, \ +{ "shift_32bit_count_operand", {SUBREG, REG, CONST_INT, \ + CONSTANT_P_RTX}}, \ +{ "shladd_operand", {CONST_INT}}, \ +{ "fetchadd_operand", {CONST_INT}}, \ +{ "reg_or_fp01_operand", {SUBREG, REG, CONST_DOUBLE, CONSTANT_P_RTX}}, \ +{ "normal_comparison_operator", {EQ, NE, GT, LE, GTU, LEU}}, \ +{ "adjusted_comparison_operator", {LT, GE, LTU, GEU}}, \ +{ "call_multiple_values_operation", {PARALLEL}}, + +/* An alias for a machine mode name. This is the machine mode that elements of + a jump-table should have. */ + +#define CASE_VECTOR_MODE Pmode + +/* Define as C expression which evaluates to nonzero if the tablejump + instruction expects the table to contain offsets from the address of the + table. */ + +#define CASE_VECTOR_PC_RELATIVE 1 + +/* Define this macro if operations between registers with integral mode smaller + than a word are always performed on the entire register. */ + +#define WORD_REGISTER_OPERATIONS + +/* Define this macro to be a C expression indicating when insns that read + memory in MODE, an integral mode narrower than a word, set the bits outside + of MODE to be either the sign-extension or the zero-extension of the data + read. */ + +#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND + +/* An alias for a tree code that should be used by default for conversion of + floating point values to fixed point. */ + +/* ??? Looks like this macro is obsolete and should be deleted everywhere. */ + +#define IMPLICIT_FIX_EXPR FIX_ROUND_EXPR + +/* An alias for a tree code that is the easiest kind of division to compile + code for in the general case. */ + +#define EASY_DIV_EXPR TRUNC_DIV_EXPR + +/* The maximum number of bytes that a single instruction can move quickly from + memory to memory. */ +#define MOVE_MAX 8 + +/* A C expression which is nonzero if on this machine it is safe to "convert" + an integer of INPREC bits to one of OUTPREC bits (where OUTPREC is smaller + than INPREC) by merely operating on it as if it had only OUTPREC bits. */ + +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* A C expression describing the value returned by a comparison operator with + an integral mode and stored by a store-flag instruction (`sCOND') when the + condition is true. */ + +/* ??? Investigate using -1 instead of 1. */ + +#define STORE_FLAG_VALUE 1 + +/* An alias for the machine mode for pointers. */ + +/* ??? This would change if we had ILP32 support. */ + +#define Pmode DImode + +/* An alias for the machine mode used for memory references to functions being + called, in `call' RTL expressions. */ + +#define FUNCTION_MODE Pmode + +/* Define this macro to handle System V style pragmas: #pragma pack and + #pragma weak. Note, #pragma weak will only be supported if SUPPORT_WEAK is + defined. */ + +#define HANDLE_SYSV_PRAGMA + +/* If defined, a C expression whose value is nonzero if IDENTIFIER with + arguments ARGS is a valid machine specific attribute for TYPE. The + attributes in ATTRIBUTES have previously been assigned to TYPE. */ + +#define VALID_MACHINE_TYPE_ATTRIBUTE(TYPE, ATTRIBUTES, IDENTIFIER, ARGS) \ + ia64_valid_type_attribute (TYPE, ATTRIBUTES, IDENTIFIER, ARGS) + +/* In rare cases, correct code generation requires extra machine dependent + processing between the second jump optimization pass and delayed branch + scheduling. On those machines, define this macro as a C statement to act on + the code starting at INSN. */ + +#define MACHINE_DEPENDENT_REORG(INSN) ia64_reorg (INSN) + +/* A C expression for the maximum number of instructions to execute via + conditional execution instructions instead of a branch. A value of + BRANCH_COST+1 is the default if the machine does not use + cc0, and 1 if it does use cc0. */ +/* ??? Investigate. */ +/* #define MAX_CONDITIONAL_EXECUTE */ + +/* Indicate how many instructions can be issued at the same time. */ + +/* ??? For now, we just schedule to fill bundles. */ + +#define ISSUE_RATE 3 + +enum ia64_builtins +{ + IA64_BUILTIN_SYNCHRONIZE, + + IA64_BUILTIN_FETCH_AND_ADD_SI, + IA64_BUILTIN_FETCH_AND_SUB_SI, + IA64_BUILTIN_FETCH_AND_OR_SI, + IA64_BUILTIN_FETCH_AND_AND_SI, + IA64_BUILTIN_FETCH_AND_XOR_SI, + IA64_BUILTIN_FETCH_AND_NAND_SI, + + IA64_BUILTIN_ADD_AND_FETCH_SI, + IA64_BUILTIN_SUB_AND_FETCH_SI, + IA64_BUILTIN_OR_AND_FETCH_SI, + IA64_BUILTIN_AND_AND_FETCH_SI, + IA64_BUILTIN_XOR_AND_FETCH_SI, + IA64_BUILTIN_NAND_AND_FETCH_SI, + + IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI, + IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI, + + IA64_BUILTIN_SYNCHRONIZE_SI, + + IA64_BUILTIN_LOCK_TEST_AND_SET_SI, + + IA64_BUILTIN_LOCK_RELEASE_SI, + + IA64_BUILTIN_FETCH_AND_ADD_DI, + IA64_BUILTIN_FETCH_AND_SUB_DI, + IA64_BUILTIN_FETCH_AND_OR_DI, + IA64_BUILTIN_FETCH_AND_AND_DI, + IA64_BUILTIN_FETCH_AND_XOR_DI, + IA64_BUILTIN_FETCH_AND_NAND_DI, + + IA64_BUILTIN_ADD_AND_FETCH_DI, + IA64_BUILTIN_SUB_AND_FETCH_DI, + IA64_BUILTIN_OR_AND_FETCH_DI, + IA64_BUILTIN_AND_AND_FETCH_DI, + IA64_BUILTIN_XOR_AND_FETCH_DI, + IA64_BUILTIN_NAND_AND_FETCH_DI, + + IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI, + IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI, + + IA64_BUILTIN_SYNCHRONIZE_DI, + + IA64_BUILTIN_LOCK_TEST_AND_SET_DI, + + IA64_BUILTIN_LOCK_RELEASE_DI +}; + +/* Codes for expand_compare_and_swap and expand_swap_and_compare. */ +enum fetchop_code { + IA64_ADD_OP, IA64_SUB_OP, IA64_OR_OP, IA64_AND_OP, IA64_XOR_OP, IA64_NAND_OP +}; + +#define MD_INIT_BUILTINS do { \ + ia64_init_builtins (); \ + } while (0) + +#define MD_EXPAND_BUILTIN(EXP, TARGET, SUBTARGET, MODE, IGNORE) \ + ia64_expand_builtin ((EXP), (TARGET), (SUBTARGET), (MODE), (IGNORE)) + +/* End of ia64.h */ diff --git a/gcc/config/ia64/ia64.md b/gcc/config/ia64/ia64.md new file mode 100644 index 0000000..405d02a --- /dev/null +++ b/gcc/config/ia64/ia64.md @@ -0,0 +1,3591 @@ +;; IA-64 Machine description template +;; Copyright (C) 1999 Cygnus Solutions. +;; Contributed by James E. Wilson and +;; David Mosberger . + +;; This file is part of GNU CC. + +;; GNU CC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; GNU CC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU CC; see the file COPYING. If not, write to +;; the Free Software Foundation, 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. + +;; ??? Add support for long double XFmode patterns. + +;; ??? register_operand accepts (subreg:DI (mem:SI X)) which forces later +;; reload. This will be fixed once scheduling support is turned on. + +;; ??? Optimize for post-increment addressing modes. + +;; ??? fselect is not supported, because there is no integer register +;; equivalent. + +;; ??? fp abs/min/max instructions may also work for integer values. + +;; ??? Would a predicate_reg_operand predicate be useful? The HP one is buggy, +;; it assumes the operand is a register and takes REGNO of it without checking. + +;; ??? Would a branch_reg_operand predicate be useful? The HP one is buggy, +;; it assumes the operand is a register and takes REGNO of it without checking. + +;; ??? Go through list of documented named patterns and look for more to +;; implement. + +;; ??? Go through instruction manual and look for more instructions that +;; can be emitted. + +;; ??? Add function unit scheduling info for Itanium (TM) processor. + + +;; :::::::::::::::::::: +;; :: +;; :: Attributes +;; :: +;; :::::::::::::::::::: + +;; Instruction type. This primarily determines how instructions can be +;; packed in bundles, and secondarily affects scheduling to function units. + +;; A alu, can go in I or M syllable of a bundle +;; I integer +;; M memory +;; F floating-point +;; B branch +;; L long immediate, takes two syllables +;; S stop bit + +;; ??? Should not have any pattern with type unknown. Perhaps add code to +;; check this in md_reorg? Currently use unknown for patterns which emit +;; multiple instructions, patterns which emit 0 instructions, and patterns +;; which emit instruction that can go in any slot (e.g. nop). + +(define_attr "type" "unknown,A,I,M,F,B,L,S" (const_string "unknown")) + + +;; :::::::::::::::::::: +;; :: +;; :: Function Units +;; :: +;; :::::::::::::::::::: + +;; Each usage of a function units by a class of insns is specified with a +;; `define_function_unit' expression, which looks like this: +;; (define_function_unit NAME MULTIPLICITY SIMULTANEITY TEST READY-DELAY +;; ISSUE-DELAY [CONFLICT-LIST]) + +;; This default scheduling info seeks to pack instructions into bundles +;; efficiently to reduce code size, so we just list how many of each +;; instruction type can go in a bundle. ISSUE_RATE is set to 3. + +;; ??? Add scheduler ready-list hook (MD_SCHED_REORDER) that orders +;; instructions, so that the next instruction can fill the next bundle slot. +;; This really needs to know where the stop bits are though. + +;; ??? Use MD_SCHED_REORDER to put alloc first instead of using an unspec +;; volatile. Use ADJUST_PRIORITY to set the priority of alloc very high to +;; make it schedule first. + +;; ??? Modify the md_reorg code that emits stop bits so that instead of putting +;; them in the last possible place, we put them in places where bundles allow +;; them. This should reduce code size, but may decrease performance if we end +;; up with more stop bits than the minimum we need. + +;; Alu instructions can execute on either the integer or memory function +;; unit. We indicate this by defining an alu function unit, and then marking +;; it as busy everytime we issue a integer or memory type instruction. + +(define_function_unit "alu" 3 1 (eq_attr "type" "A,I,M") 1 0) + +(define_function_unit "integer" 2 1 (eq_attr "type" "I") 1 0) + +(define_function_unit "memory" 3 1 (eq_attr "type" "M") 1 0) + +(define_function_unit "floating_point" 1 1 (eq_attr "type" "F") 1 0) + +(define_function_unit "branch" 3 1 (eq_attr "type" "B") 1 0) + +;; ??? This isn't quite right, because we can only fit two insns in a bundle +;; when using an L type instruction. That isn't modeled currently. + +(define_function_unit "long_immediate" 1 1 (eq_attr "type" "L") 1 0) + + +;; :::::::::::::::::::: +;; :: +;; :: Moves +;; :: +;; :::::::::::::::::::: + +(define_expand "movqi" + [(set (match_operand:QI 0 "general_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" + " +{ + if (! reload_in_progress && ! reload_completed + && GET_CODE (operands[0]) == MEM + && GET_CODE (operands[1]) == MEM) + operands[1] = copy_to_mode_reg (QImode, operands[1]); +}") + +(define_insn "*movqi_internal" + [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m,r,*e") + (match_operand:QI 1 "move_operand" "r,J,m,r,*e,r"))] + "! memory_operand (operands[0], QImode) + || ! memory_operand (operands[1], QImode)" + "@ + mov %0 = %1 + addl %0 = %1, r0 + ld1%O1 %0 = %1%P1 + st1%Q0 %0 = %1%P0 + getf.sig %0 = %1 + setf.sig %0 = %1" + [(set_attr "type" "A,A,M,M,M,M")]) + +(define_expand "movhi" + [(set (match_operand:HI 0 "general_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "" + " +{ + if (! reload_in_progress && ! reload_completed + && GET_CODE (operands[0]) == MEM + && GET_CODE (operands[1]) == MEM) + operands[1] = copy_to_mode_reg (HImode, operands[1]); +}") + +(define_insn "*movhi_internal" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m,r,*e") + (match_operand:HI 1 "move_operand" "r,J,m,r,*e,r"))] + "! memory_operand (operands[0], HImode) + || !memory_operand (operands[1], HImode)" + "@ + mov %0 = %1 + addl %0 = %1, r0 + ld2%O1 %0 = %1%P1 + st2%Q0 %0 = %1%P0 + getf.sig %0 = %1 + setf.sig %0 = %1" + [(set_attr "type" "A,A,M,M,M,M")]) + +(define_expand "movsi" + [(set (match_operand:SI 0 "general_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "" + " +{ + if (! reload_in_progress && ! reload_completed + && GET_CODE (operands[0]) == MEM + && GET_CODE (operands[1]) == MEM) + operands[1] = copy_to_mode_reg (SImode, operands[1]); +}") + +(define_insn "*movsi_internal" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,m,r,*e,*e,r,*f") + (match_operand:SI 1 "move_operand" "r,J,i,m,r,*e,r,*e,*f,r"))] + "! memory_operand (operands[0], SImode) + || ! memory_operand (operands[1], SImode)" + "@ + mov %0 = %1 + addl %0 = %1, r0 + movl %0 = %1 + ld4%O1 %0 = %1%P1 + st4%Q0 %0 = %1%P0 + getf.sig %0 = %1 + setf.sig %0 = %1 + mov %0 = %1 + getf.s %0 = %1 + setf.s %0 = %1" + [(set_attr "type" "A,A,L,M,M,M,M,F,M,M")]) + +(define_expand "movdi" + [(set (match_operand:DI 0 "general_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" + " +{ + /* ??? Should generalize this, so that we can also support 32 bit + pointers. */ + if (! TARGET_NO_PIC && symbolic_operand (operands[1], DImode)) + { + rtx temp; + + /* Operand[0] could be a MEM, which isn't a valid destination for the + PIC load address patterns. */ + if (! register_operand (operands[0], DImode)) + temp = gen_reg_rtx (DImode); + else + temp = operands[0]; + + if (GET_CODE (operands[1]) == SYMBOL_REF && SYMBOL_REF_FLAG (operands[1])) + emit_insn (gen_load_fptr (temp, operands[1])); + else if (sdata_symbolic_operand (operands[1], DImode)) + emit_insn (gen_load_gprel (temp, operands[1])); + else + emit_insn (gen_load_symptr (temp, operands[1])); + + if (temp == operands[0]) + DONE; + + operands[1] = temp; + } + + if (! reload_in_progress && ! reload_completed + && GET_CODE (operands[0]) == MEM + && GET_CODE (operands[1]) == MEM) + operands[1] = copy_to_mode_reg (DImode, operands[1]); +}") + +;; ??? Emit stf8 for m/*e constraint. +(define_insn "*movdi_internal" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r,m,r,*e,*e,r,*f,r,*b") + (match_operand:DI 1 "move_operand" "r,J,i,m,r,*e,r,*e,*f,r,*b,r"))] + "! memory_operand (operands[0], DImode) + || ! memory_operand (operands[1], DImode)" + "@ + mov %0 = %1 + addl %0 = %1, r0 + movl %0 = %1 + ld8%O1 %0 = %1%P1 + st8%Q0 %0 = %1%P0 + getf.sig %0 = %1 + setf.sig %0 = %1 + mov %0 = %1 + getf.d %0 = %1 + setf.d %0 = %1 + mov %0 = %1 + mov %0 = %1" + [(set_attr "type" "A,A,L,M,M,M,M,F,M,M,I,I")]) + +(define_expand "load_fptr" + [(set (match_dup 2) + (plus:DI (reg:DI 1) (match_operand:DI 1 "function_operand" ""))) + (set (match_operand:DI 0 "register_operand" "") (mem:DI (match_dup 2)))] + "" + " +{ + if (reload_in_progress) + operands[2] = operands[0]; + else + operands[2] = gen_reg_rtx (DImode); +}") + +(define_insn "*load_fptr_internal1" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (reg:DI 1) (match_operand:DI 1 "function_operand" "s")))] + "" + "addl %0 = @ltoff(@fptr(%1)), gp" + [(set_attr "type" "A")]) + +(define_insn "load_gprel" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (reg:DI 1) (match_operand:DI 1 "sdata_symbolic_operand" "s")))] + "" + "addl %0 = @gprel(%1), gp" + [(set_attr "type" "A")]) + +(define_expand "load_symptr" + [(set (match_dup 2) + (plus:DI (reg:DI 1) (match_operand:DI 1 "symbolic_operand" ""))) + (set (match_operand:DI 0 "register_operand" "") (mem:DI (match_dup 2)))] + "" + " +{ + if (reload_in_progress) + operands[2] = operands[0]; + else + operands[2] = gen_reg_rtx (DImode); +}") + +(define_insn "*load_symptr_internal1" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (reg:DI 1) (match_operand:DI 1 "symbolic_operand" "s")))] + "" + "addl %0 = @ltoff(%1), gp" + [(set_attr "type" "A")]) + +;; Floating Point Moves +;; +;; Note - Patterns for SF mode moves are compulsory, but +;; patterns for DF are optional, as GCC can synthesise them. + +(define_expand "movsf" + [(set (match_operand:SF 0 "general_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "" + " +{ + if (! reload_in_progress && ! reload_completed + && GET_CODE (operands[0]) == MEM + && GET_CODE (operands[1]) == MEM) + operands[1] = copy_to_mode_reg (SFmode, operands[1]); +}") + +;; ??? The r/m alternative is apparently needed because of paradoxical subregs, +;; so it may no longer be necessary after scheduling is enabled. + +(define_insn "*movsf_internal" + [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,m,*r,f,*r,*r") + (match_operand:SF 1 "general_operand" "fG,m,fG,fG,*r,*r,m"))] + "! memory_operand (operands[0], SFmode) + || ! memory_operand (operands[1], SFmode)" + "@ + mov %0 = %F1 + ldfs %0 = %1%P1 + stfs %0 = %F1%P0 + getf.s %0 = %F1 + setf.s %0 = %1 + mov %0 = %1 + ld4%O1 %0 = %1" + [(set_attr "type" "F,M,M,M,M,A,M")]) + +(define_expand "movdf" + [(set (match_operand:DF 0 "general_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "" + " +{ + if (! reload_in_progress && ! reload_completed + && GET_CODE (operands[0]) == MEM + && GET_CODE (operands[1]) == MEM) + operands[1] = copy_to_mode_reg (DFmode, operands[1]); +}") + +;; ??? movsf has a r/m alternative, movdf doesn't. + +(define_insn "*movdf_internal" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,m,*r,f,*r") + (match_operand:DF 1 "general_operand" "fG,m,fG,fG,*r,*r"))] + "! memory_operand (operands[0], DFmode) + || ! memory_operand (operands[1], DFmode)" + "@ + mov %0 = %F1 + ldfd %0 = %1%P1 + stfd %0 = %F1%P0 + getf.d %0 = %F1 + setf.d %0 = %1 + mov %0 = %1" + [(set_attr "type" "F,M,M,M,M,A")]) + + +;; :::::::::::::::::::: +;; :: +;; :: Conversions +;; :: +;; :::::::::::::::::::: + +;; Signed conversions from a smaller integer to a larger integer + +(define_insn "extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:QI 1 "register_operand" "r")))] + "" + "sxt1 %0 = %1" + [(set_attr "type" "I")]) + +(define_insn "extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:HI 1 "register_operand" "r")))] + "" + "sxt2 %0 = %1" + [(set_attr "type" "I")]) + +(define_insn "extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=r,*e") + (sign_extend:DI (match_operand:SI 1 "register_operand" "r,*e")))] + "" + "@ + sxt4 %0 = %1 + fsxt.r %0 = %1, %1%B0" + [(set_attr "type" "I,F")]) + +;; Unsigned conversions from a smaller integer to a larger integer + +(define_insn "zero_extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] + "" + "@ + zxt1 %0 = %1 + ld1%O1 %0 = %1%P1" + [(set_attr "type" "I,M")]) + +(define_insn "zero_extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + "" + "@ + zxt2 %0 = %1 + ld2%O1 %0 = %1%P1" + [(set_attr "type" "I,M")]) + +(define_insn "zero_extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=r,r,*e") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m,*e")))] + "" + "@ + zxt4 %0 = %1 + ld4%O1 %0 = %1%P1 + fsxt.r %0 = f1, %1%B0" + [(set_attr "type" "I,M,F")]) + +;; Convert between floating point types of different sizes. + +;; ??? Optimization opportunity here. + +(define_insn "extendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (float_extend:DF (match_operand:SF 1 "register_operand" "0,f")))] + "" + "@ + //nop + mov %0 = %1" + [(set_attr "type" "unknown,F")]) + +(define_insn "truncdfsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))] + "" + "fnorm.s %0 = %1%B0" + [(set_attr "type" "F")]) + +;; Convert between signed integer types and floating point. + +;; ??? Instead of having floatdidf2, we should have a floatditf2 pattern, +;; and then add conversions from tf to df and sf. + +(define_insn "floatdidf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (float:DF (match_operand:DI 1 "register_operand" "e")))] + "" + "fcvt.xf %0 = %1\;;;\;fnorm.d %0 = %0%B0" + [(set_attr "type" "unknown")]) + +(define_insn "fix_truncsfdi2" + [(set (match_operand:DI 0 "register_operand" "=e") + (fix:DI (match_operand:SF 1 "register_operand" "f")))] + "" + "fcvt.fx.trunc %0 = %1%B0" + [(set_attr "type" "F")]) + +(define_insn "fix_truncdfdi2" + [(set (match_operand:DI 0 "register_operand" "=e") + (fix:DI (match_operand:DF 1 "register_operand" "f")))] + "" + "fcvt.fx.trunc %0 = %1%B0" + [(set_attr "type" "F")]) + +;; Convert between unsigned integer types and floating point. + +(define_insn "floatunsdisf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (unsigned_float:SF (match_operand:DI 1 "register_operand" "e")))] + "" + "fcvt.xuf.s %0 = %1%B0" + [(set_attr "type" "F")]) + +(define_insn "floatunsdidf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (unsigned_float:DF (match_operand:DI 1 "register_operand" "e")))] + "" + "fcvt.xuf.d %0 = %1%B0" + [(set_attr "type" "F")]) + +(define_insn "fixuns_truncsfdi2" + [(set (match_operand:DI 0 "register_operand" "=e") + (unsigned_fix:DI (match_operand:SF 1 "register_operand" "f")))] + "" + "fcvt.fxu.trunc %0 = %1%B0" + [(set_attr "type" "F")]) + +(define_insn "fixuns_truncdfdi2" + [(set (match_operand:DI 0 "register_operand" "=e") + (unsigned_fix:DI (match_operand:DF 1 "register_operand" "f")))] + "" + "fcvt.fxu.trunc %0 = %1%B0" + [(set_attr "type" "F")]) + + +;; :::::::::::::::::::: +;; :: +;; :: Bit field extraction +;; :: +;; :::::::::::::::::::: + +;; ??? It would be useful to have SImode versions of the extract and insert +;; patterns. + +(define_insn "extv" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extract:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "const_int_operand" "n") + (match_operand:DI 3 "const_int_operand" "n")))] + "" + "extr %0 = %1, %3, %2" + [(set_attr "type" "I")]) + +(define_insn "extzv" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "const_int_operand" "n") + (match_operand:DI 3 "const_int_operand" "n")))] + "" + "extr.u %0 = %1, %3, %2" + [(set_attr "type" "I")]) + +;; Insert a bit field. +;; Can have 3 operands, source1 (inserter), source2 (insertee), dest. +;; Source1 can be 0 or -1. +;; Source2 can be 0. + +;; ??? Actual dep instruction is more powerful than what these insv +;; patterns support. Unfortunately, combine is unable to create patterns +;; where source2 != dest. + +(define_expand "insv" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "const_int_operand" "") + (match_operand:DI 2 "const_int_operand" "")) + (match_operand:DI 3 "nonmemory_operand" ""))] + "" + " +{ + int width = INTVAL (operands[1]); + int shift = INTVAL (operands[2]); + + /* If operand[3] is a constant, and isn't 0 or -1, then load it into a + pseudo. */ + if (! register_operand (operands[3], DImode) + && operands[3] != const0_rtx && operands[3] != constm1_rtx) + operands[3] = force_reg (DImode, operands[3]); + + /* If this is a single dep instruction, we have nothing to do. */ + if (! ((register_operand (operands[3], DImode) && width <= 16) + || operands[3] == const0_rtx || operands[3] == constm1_rtx)) + { + /* Check for cases that can be implemented with a mix instruction. */ + if (width == 32 && shift == 0) + { + /* Directly generating the mix4left instruction confuses + optimize_bit_field in function.c. Since this is performing + a useful optimization, we defer generation of the complicated + mix4left RTL to the first splitting phase. */ + rtx tmp = gen_reg_rtx (DImode); + emit_insn (gen_shift_mix4left (operands[0], operands[3], tmp)); + DONE; + } + else if (width == 32 && shift == 32) + { + emit_insn (gen_mix4right (operands[0], operands[3])); + DONE; + } + + FAIL; + +#if 0 + /* This code may be useful for other IA-64 processors, so we leave it in + for now. */ + while (width > 16) + { + rtx tmp; + + emit_insn (gen_insv (operands[0], GEN_INT (16), GEN_INT (shift), + operands[3])); + shift += 16; + width -= 16; + tmp = gen_reg_rtx (DImode); + emit_insn (gen_lshrdi3 (tmp, operands[3], GEN_INT (16))); + operands[3] = tmp; + } + operands[1] = GEN_INT (width); + operands[2] = GEN_INT (shift); +#endif + } +}") + +(define_insn "*insv_internal" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (match_operand:DI 1 "const_int_operand" "n") + (match_operand:DI 2 "const_int_operand" "n")) + (match_operand:DI 3 "nonmemory_operand" "rP"))] + "(register_operand (operands[3], DImode) && INTVAL (operands[1]) <= 16) + || operands[3] == const0_rtx || operands[3] == constm1_rtx" + "dep %0 = %3, %0, %2, %1" + [(set_attr "type" "I")]) + +(define_insn "shift_mix4left" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 32) (const_int 0)) + (match_operand:DI 1 "register_operand" "r")) + (clobber (match_operand:DI 2 "register_operand" "=r"))] + "" + "#" + [(set_attr "type" "unknown")]) + +;; ??? Need to emit an instruction group barrier here because this gets split +;; after md_reorg. + +(define_split + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "") + (const_int 32) (const_int 0)) + (match_operand:DI 1 "register_operand" "")) + (clobber (match_operand:DI 2 "register_operand" ""))] + "reload_completed" + [(set (match_dup 3) (ashift:DI (match_dup 1) (const_int 32))) + (unspec_volatile [(const_int 0)] 2) + (set (zero_extract:DI (match_dup 0) (const_int 32) (const_int 0)) + (lshiftrt:DI (match_dup 3) (const_int 32)))] + "operands[3] = operands[2];") + +(define_split + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "") + (const_int 32) (const_int 0)) + (match_operand:DI 1 "register_operand" "")) + (clobber (match_operand:DI 2 "register_operand" ""))] + "! reload_completed" + [(set (match_dup 3) (ashift:DI (match_dup 1) (const_int 32))) + (set (zero_extract:DI (match_dup 0) (const_int 32) (const_int 0)) + (lshiftrt:DI (match_dup 3) (const_int 32)))] + "operands[3] = operands[2];") + +(define_insn "*mix4left" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 32) (const_int 0)) + (lshiftrt:DI (match_operand:DI 1 "register_operand" "r") + (const_int 32)))] + "" + "mix4.l %0 = %0, %r1" + [(set_attr "type" "I")]) + +(define_insn "mix4right" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 32) (const_int 32)) + (match_operand:DI 1 "reg_or_0_operand" "rO"))] + "" + "mix4.r %0 = %r1, %0" + [(set_attr "type" "I")]) + +;; This is used by the rotrsi3 pattern. + +(define_insn "*mix4right_3op" + [(set (match_operand:DI 0 "register_operand" "=r") + (ior:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) + (ashift:DI (zero_extend:DI (match_operand:SI 2 "register_operand" "r")) + (const_int 32))))] + "" + "mix4.r %0 = %1, %2" + [(set_attr "type" "I")]) + + +;; :::::::::::::::::::: +;; :: +;; :: 32 bit Integer arithmetic +;; :: +;; :::::::::::::::::::: + +;; We handle 32-bit arithmetic just like the alpha port does. + +(define_expand "addsi3" + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "reg_or_22bit_operand" "")))] + "" + " +{ + if (optimize) + { + rtx op1 = gen_lowpart (DImode, operands[1]); + rtx op2 = gen_lowpart (DImode, operands[2]); + + if (! cse_not_expected) + { + rtx tmp = gen_reg_rtx (DImode); + emit_insn (gen_adddi3 (tmp, op1, op2)); + emit_move_insn (operands[0], gen_lowpart (SImode, tmp)); + } + else + emit_insn (gen_adddi3 (gen_lowpart (DImode, operands[0]), op1, op2)); + DONE; + } +}") + +(define_insn "*addsi3_internal" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (plus:SI (match_operand:SI 1 "register_operand" "%r,r,a") + (match_operand:SI 2 "reg_or_22bit_operand" "r,I,J")))] + "" + "@ + add %0 = %1, %2 + adds %0 = %2, %1 + addl %0 = %2, %1" + [(set_attr "type" "A")]) + +(define_insn "*addsi3_plus1" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (plus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")) + (const_int 1)))] + "" + "add %0 = %1, %2, 1" + [(set_attr "type" "A")]) + +(define_expand "subsi3" + [(set (match_operand:SI 0 "register_operand" "") + (minus:SI (match_operand:SI 1 "reg_or_8bit_operand" "") + (match_operand:SI 2 "register_operand" "")))] + "" + " +{ + if (optimize) + { + rtx op1 = gen_lowpart (DImode, operands[1]); + rtx op2 = gen_lowpart (DImode, operands[2]); + + if (! cse_not_expected) + { + rtx tmp = gen_reg_rtx (DImode); + emit_insn (gen_subdi3 (tmp, op1, op2)); + emit_move_insn (operands[0], gen_lowpart (SImode, tmp)); + } + else + emit_insn (gen_subdi3 (gen_lowpart (DImode, operands[0]), op1, op2)); + DONE; + } +}") + +(define_insn "*subsi3_internal" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "reg_or_8bit_operand" "rK") + (match_operand:SI 2 "register_operand" "r")))] + "" + "sub %0 = %1, %2" + [(set_attr "type" "A")]) + +(define_insn "*subsi3_minus1" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (not:SI (match_operand:SI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "r")))] + "" + "sub %0 = %2, %1, 1" + [(set_attr "type" "A")]) + +(define_expand "mulsi3" + [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" "")))] + "" + " +{ + if (optimize) + { + rtx op1 = gen_lowpart (DImode, operands[1]); + rtx op2 = gen_lowpart (DImode, operands[2]); + + if (! cse_not_expected) + { + rtx tmp = gen_reg_rtx (DImode); + emit_insn (gen_muldi3 (tmp, op1, op2)); + emit_move_insn (operands[0], gen_lowpart (SImode, tmp)); + } + else + emit_insn (gen_muldi3 (gen_lowpart (DImode, operands[0]), op1, op2)); + DONE; + } +}") + +;; ??? Could add maddsi3 patterns patterned after the madddi3 patterns. + +(define_insn "*mulsi3_internal" + [(set (match_operand:SI 0 "register_operand" "=e") + (mult:SI (match_operand:SI 1 "register_operand" "e") + (match_operand:SI 2 "nonmemory_operand" "e")))] + "" + "xma.l %0 = %1, %2, f0%B0" + [(set_attr "type" "F")]) + +(define_expand "negsi2" + [(set (match_operand:SI 0 "register_operand" "") + (neg:SI (match_operand:SI 1 "register_operand" "")))] + "" + " +{ + if (optimize) + { + rtx op1 = gen_lowpart (DImode, operands[1]); + + if (! cse_not_expected) + { + rtx tmp = gen_reg_rtx (DImode); + emit_insn (gen_negdi2 (tmp, op1)); + emit_move_insn (operands[0], gen_lowpart (SImode, tmp)); + } + else + emit_insn (gen_negdi2 (gen_lowpart (DImode, operands[0]), op1)); + DONE; + } +}") + +(define_insn "*negsi2_internal" + [(set (match_operand:SI 0 "register_operand" "=r") + (neg:SI (match_operand:SI 1 "register_operand" "r")))] + "" + "sub %0 = r0, %1" + [(set_attr "type" "A")]) + +(define_expand "abssi2" + [(set (match_dup 2) + (ge:CC (match_operand:SI 1 "register_operand" "") (const_int 0))) + (set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (ne:CC (match_dup 2) (const_int 0)) + (match_dup 1) + (neg:SI (match_dup 1))))] + "" + " +{ + operands[2] = gen_reg_rtx (CCmode); +}") + +(define_expand "sminsi3" + [(set (match_dup 3) + (ge:CC (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" ""))) + (set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (ne:CC (match_dup 3) (const_int 0)) + (match_dup 2) (match_dup 1)))] + "" + " +{ + operands[3] = gen_reg_rtx (CCmode); +}") + +(define_expand "smaxsi3" + [(set (match_dup 3) + (ge:CC (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" ""))) + (set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (ne:CC (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + " +{ + operands[3] = gen_reg_rtx (CCmode); +}") + +(define_expand "uminsi3" + [(set (match_dup 3) + (geu:CC (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" ""))) + (set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (ne:CC (match_dup 3) (const_int 0)) + (match_dup 2) (match_dup 1)))] + "" + " +{ + operands[3] = gen_reg_rtx (CCmode); +}") + +(define_expand "umaxsi3" + [(set (match_dup 3) + (geu:CC (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" ""))) + (set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (ne:CC (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + " +{ + operands[3] = gen_reg_rtx (CCmode); +}") + + +;; :::::::::::::::::::: +;; :: +;; :: 64 bit Integer arithmetic +;; :: +;; :::::::::::::::::::: + +(define_insn "adddi3" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (plus:DI (match_operand:DI 1 "register_operand" "%r,r,a") + (match_operand:DI 2 "reg_or_22bit_operand" "r,I,J")))] + "" + "@ + add %0 = %1, %2 + adds %0 = %2, %1 + addl %0 = %2, %1" + [(set_attr "type" "A")]) + +(define_insn "*adddi3_plus1" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (plus:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r")) + (const_int 1)))] + "" + "add %0 = %1, %2, 1" + [(set_attr "type" "A")]) + +(define_insn "subdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (match_operand:DI 1 "reg_or_8bit_operand" "rK") + (match_operand:DI 2 "register_operand" "r")))] + "" + "sub %0 = %1, %2" + [(set_attr "type" "A")]) + +(define_insn "*subdi3_minus1" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (not:DI (match_operand:DI 1 "register_operand" "r")) + (match_operand:DI 2 "register_operand" "r")))] + "" + "sub %0 = %2, %1, 1" + [(set_attr "type" "A")]) + +(define_insn "muldi3" + [(set (match_operand:DI 0 "register_operand" "=e") + (mult:DI (match_operand:DI 1 "register_operand" "e") + (match_operand:DI 2 "register_operand" "e")))] + "" + "xma.l %0 = %1, %2, f0%B0" + [(set_attr "type" "F")]) + +;; ??? If operand 3 is an eliminable reg, then register elimination causes the +;; same problem that we have with shladd below. Unfortunately, this case is +;; much harder to fix because the multiply puts the result in an FP register, +;; but the add needs inputs from a general register. We add a spurious clobber +;; here so that it will be present just in case register elimination gives us +;; the funny result. + +;; ??? Maybe validate_changes should try adding match_scratch clobbers? + +;; ??? Maybe we should change how adds are canonicalized. + +(define_insn "*madddi3" + [(set (match_operand:DI 0 "register_operand" "=e") + (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "e") + (match_operand:DI 2 "register_operand" "e")) + (match_operand:DI 3 "register_operand" "e"))) + (clobber (match_scratch:DI 4 "=X"))] + "" + "xma.l %0 = %1, %2, %3%B0" + [(set_attr "type" "F")]) + +;; This can be created by register elimination if operand3 of shladd is an +;; eliminable register or has reg_equiv_constant set. + +;; We have to use nonmemory_operand for operand 4, to ensure that the +;; validate_changes call inside eliminate_regs will always succeed. If it +;; doesn't succeed, then this remain a madddi3 pattern, and will be reloaded +;; incorrectly. + +(define_insn "*madddi3_elim" + [(set (match_operand:DI 0 "register_operand" "=&r") + (plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "e") + (match_operand:DI 2 "register_operand" "e")) + (match_operand:DI 3 "register_operand" "e")) + (match_operand:DI 4 "nonmemory_operand" "rI"))) + (clobber (match_scratch:DI 5 "=e"))] + "reload_in_progress" + "#" + [(set_attr "type" "unknown")]) + +;; ??? Need to emit an instruction group barrier here because this gets split +;; after md_reorg. + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" "")) + (match_operand:DI 3 "register_operand" "")) + (match_operand:DI 4 "reg_or_14bit_operand" ""))) + (clobber (match_scratch:DI 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 5) (plus:DI (mult:DI (match_dup 1) (match_dup 2)) + (match_dup 3))) + (clobber (match_dup 0))]) + (unspec_volatile [(const_int 0)] 2) + (set (match_dup 0) (match_dup 5)) + (unspec_volatile [(const_int 0)] 2) + (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))] + "") + +;; ??? There are highpart multiply and add instructions, but we have no way +;; to generate them. + +(define_insn "smuldi3_highpart" + [(set (match_operand:DI 0 "register_operand" "=e") + (truncate:DI + (lshiftrt:TI + (mult:TI (sign_extend:TI (match_operand:DI 1 "register_operand" "e")) + (sign_extend:TI (match_operand:DI 2 "register_operand" "e"))) + (const_int 64))))] + "" + "xma.h %0 = %1, %2, f0%B0" + [(set_attr "type" "F")]) + +(define_insn "umuldi3_highpart" + [(set (match_operand:DI 0 "register_operand" "=e") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "e")) + (zero_extend:TI (match_operand:DI 2 "register_operand" "e"))) + (const_int 64))))] + "" + "xma.hu %0 = %1, %2, f0%B0" + [(set_attr "type" "F")]) + +(define_insn "negdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (neg:DI (match_operand:DI 1 "register_operand" "r")))] + "" + "sub %0 = r0, %1" + [(set_attr "type" "A")]) + +(define_expand "absdi2" + [(set (match_dup 2) + (ge:CC (match_operand:DI 1 "register_operand" "") (const_int 0))) + (set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (ne:CC (match_dup 2) (const_int 0)) + (match_dup 1) + (neg:DI (match_dup 1))))] + "" + " +{ + operands[2] = gen_reg_rtx (CCmode); +}") + +(define_expand "smindi3" + [(set (match_dup 3) + (ge:CC (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (ne:CC (match_dup 3) (const_int 0)) + (match_dup 2) (match_dup 1)))] + "" + " +{ + operands[3] = gen_reg_rtx (CCmode); +}") + +(define_expand "smaxdi3" + [(set (match_dup 3) + (ge:CC (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (ne:CC (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + " +{ + operands[3] = gen_reg_rtx (CCmode); +}") + +(define_expand "umindi3" + [(set (match_dup 3) + (geu:CC (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (ne:CC (match_dup 3) (const_int 0)) + (match_dup 2) (match_dup 1)))] + "" + " +{ + operands[3] = gen_reg_rtx (CCmode); +}") + +(define_expand "umaxdi3" + [(set (match_dup 3) + (geu:CC (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (ne:CC (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + " +{ + operands[3] = gen_reg_rtx (CCmode); +}") + +(define_expand "ffsdi2" + [(set (match_dup 6) + (eq:CC (match_operand:DI 1 "register_operand" "") (const_int 0))) + (set (match_dup 2) (plus:DI (match_dup 1) (const_int -1))) + (set (match_dup 5) (const_int 0)) + (set (match_dup 3) (xor:DI (match_dup 1) (match_dup 2))) + (set (match_dup 4) (unspec:DI [(match_dup 3)] 8)) + (set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (ne:CC (match_dup 6) (const_int 0)) + (match_dup 5) (match_dup 4)))] + "" + " +{ + operands[2] = gen_reg_rtx (DImode); + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_reg_rtx (DImode); + operands[6] = gen_reg_rtx (CCmode); +}") + +(define_insn "*popcnt" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r")] 8))] + "" + "popcnt %0 = %1" + [(set_attr "type" "I")]) + + +;; :::::::::::::::::::: +;; :: +;; :: 32 bit floating point arithmetic +;; :: +;; :::::::::::::::::::: + +(define_insn "addsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (plus:SF (match_operand:SF 1 "register_operand" "%f") + (match_operand:SF 2 "reg_or_fp01_operand" "fG")))] + "" + "fadd.s %0 = %1, %F2%B0" + [(set_attr "type" "F")]) + +(define_insn "subsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (minus:SF (match_operand:SF 1 "reg_or_fp01_operand" "fG") + (match_operand:SF 2 "reg_or_fp01_operand" "fG")))] + "" + "fsub.s %0 = %F1, %F2%B0" + [(set_attr "type" "F")]) + +(define_insn "mulsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (mult:SF (match_operand:SF 1 "register_operand" "%f") + (match_operand:SF 2 "register_operand" "f")))] + "" + "fmpy.s %0 = %1, %2%B0" + [(set_attr "type" "F")]) + +(define_insn "abssf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (abs:SF (match_operand:SF 1 "register_operand" "f")))] + "" + "fabs %0 = %1%B0" + [(set_attr "type" "F")]) + +(define_insn "negsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (neg:SF (match_operand:SF 1 "register_operand" "f")))] + "" + "fneg %0 = %1%B0" + [(set_attr "type" "F")]) + +(define_insn "*nabssf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (neg:SF (abs:SF (match_operand:SF 1 "register_operand" "f"))))] + "" + "fnegabs %0 = %1%B0" + [(set_attr "type" "F")]) + +(define_insn "minsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (smin:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "reg_or_fp01_operand" "fG")))] + "" + "fmin %0 = %1, %F2%B0" + [(set_attr "type" "F")]) + +(define_insn "maxsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (smax:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "reg_or_fp01_operand" "fG")))] + "" + "fmax %0 = %1, %F2%B0" + [(set_attr "type" "F")]) + +(define_insn "*maddsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (plus:SF (mult:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f")) + (match_operand:SF 3 "reg_or_fp01_operand" "fG")))] + "" + "fma.s %0 = %1, %2, %F3%B0" + [(set_attr "type" "F")]) + +(define_insn "*msubsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (minus:SF (mult:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f")) + (match_operand:SF 3 "reg_or_fp01_operand" "fG")))] + "" + "fms.s %0 = %1, %2, %F3%B0" + [(set_attr "type" "F")]) + +(define_insn "*nmulsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (neg:SF (mult:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f"))))] + "" + "fnmpy.s %0 = %1, %2%B0" + [(set_attr "type" "F")]) + +;; ??? Is it possible to canonicalize this as (minus (reg) (mult))? + +(define_insn "*nmaddsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (plus:SF (neg:SF (mult:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f"))) + (match_operand:SF 3 "reg_or_fp01_operand" "fG")))] + "" + "fnma.s %0 = %1, %2, %F3%B0" + [(set_attr "type" "F")]) + + +;; :::::::::::::::::::: +;; :: +;; :: 64 bit floating point arithmetic +;; :: +;; :::::::::::::::::::: + +(define_insn "adddf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (plus:DF (match_operand:DF 1 "register_operand" "%f") + (match_operand:DF 2 "reg_or_fp01_operand" "fG")))] + "" + "fadd.d %0 = %1, %F2%B0" + [(set_attr "type" "F")]) + +(define_insn "subdf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (minus:DF (match_operand:DF 1 "reg_or_fp01_operand" "fG") + (match_operand:DF 2 "reg_or_fp01_operand" "fG")))] + "" + "fsub.d %0 = %F1, %F2%B0" + [(set_attr "type" "F")]) + +(define_insn "muldf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (mult:DF (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f")))] + "" + "fmpy.d %0 = %1, %2%B0" + [(set_attr "type" "F")]) + +(define_insn "absdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (abs:DF (match_operand:DF 1 "register_operand" "f")))] + "" + "fabs %0 = %1%B0" + [(set_attr "type" "F")]) + +(define_insn "negdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (neg:DF (match_operand:DF 1 "register_operand" "f")))] + "" + "fneg %0 = %1%B0" + [(set_attr "type" "F")]) + +(define_insn "*nabsdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (neg:DF (abs:DF (match_operand:DF 1 "register_operand" "f"))))] + "" + "fnegabs %0 = %1%B0" + [(set_attr "type" "F")]) + +(define_insn "mindf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (smin:DF (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "reg_or_fp01_operand" "fG")))] + "" + "fmin %0 = %1, %F2%B0" + [(set_attr "type" "F")]) + +(define_insn "maxdf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (smax:DF (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "reg_or_fp01_operand" "fG")))] + "" + "fmax %0 = %1, %F2%B0" + [(set_attr "type" "F")]) + +(define_insn "*madddf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (plus:DF (mult:DF (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f")) + (match_operand:DF 3 "reg_or_fp01_operand" "fG")))] + "" + "fma.d %0 = %1, %2, %F3%B0" + [(set_attr "type" "F")]) + +(define_insn "*msubdf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (minus:DF (mult:DF (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f")) + (match_operand:DF 3 "reg_or_fp01_operand" "fG")))] + "" + "fms.d %0 = %1, %2, %F3%B0" + [(set_attr "type" "F")]) + +(define_insn "*nmuldf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (neg:DF (mult:DF (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f"))))] + "" + "fnmpy.d %0 = %1, %2%B0" + [(set_attr "type" "F")]) + +;; ??? Is it possible to canonicalize this as (minus (reg) (mult))? + +(define_insn "*nmadddf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (plus:DF (neg:DF (mult:DF (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "register_operand" "f"))) + (match_operand:DF 3 "reg_or_fp01_operand" "fG")))] + "" + "fnma.d %0 = %1, %2, %F3%B0" + [(set_attr "type" "F")]) + + +;; :::::::::::::::::::: +;; :: +;; :: 32 bit Integer Shifts and Rotates +;; :: +;; :::::::::::::::::::: + +;; There is no sign-extend form of dep, so we only get 32 bits of valid result +;; instead of 64 like the patterns below. + +;; Using a predicate that accepts only constants doesn't work, because optabs +;; will load the operand into a register and call the pattern if the predicate +;; did not accept it on the first try. So we use nonmemory_operand and then +;; verify that we have an appropriate constant in the expander. + +(define_expand "ashlsi3" + [(set (match_operand:SI 0 "register_operand" "") + (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + " +{ + if (! shift_32bit_count_operand (operands[2], SImode)) + FAIL; +}") + +(define_insn "*ashlsi3_internal" + [(set (match_operand:SI 0 "register_operand" "=r") + (ashift:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "shift_32bit_count_operand" "n")))] + "" + "dep.z %0 = %1, %2, %E2" + [(set_attr "type" "I")]) + +;; This is really an extract, but this is how combine canonicalizes the +;; operation. + +(define_expand "ashrsi3" + [(set (match_dup 3) + (ashiftrt:DI (sign_extend:DI + (match_operand:SI 1 "register_operand" "")) + (match_operand:DI 2 "nonmemory_operand" ""))) + (set (match_operand:SI 0 "register_operand" "") (match_dup 4))] + "" + " +{ + if (! shift_32bit_count_operand (operands[2], SImode)) + FAIL; + + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_lowpart (SImode, operands[3]); +}") + +(define_insn "*ashrsi3_internal" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashiftrt:DI (sign_extend:DI + (match_operand:SI 1 "register_operand" "r")) + (match_operand:DI 2 "shift_32bit_count_operand" "n")))] + "" + "extr %0 = %1, %2, %E2" + [(set_attr "type" "I")]) + +;; This is really an extract, but this is how combine canonicalizes the +;; operation. + +(define_expand "lshrsi3" + [(set (match_dup 3) + (lshiftrt:DI (zero_extend:DI + (match_operand:SI 1 "register_operand" "")) + (match_operand:DI 2 "nonmemory_operand" ""))) + (set (match_operand:SI 0 "register_operand" "") (match_dup 4))] + "" + " +{ + if (! shift_32bit_count_operand (operands[2], SImode)) + FAIL; + + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_lowpart (SImode, operands[3]); +}") + +(define_insn "*lshrsi3_internal" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (zero_extend:DI + (match_operand:SI 1 "register_operand" "r")) + (match_operand:DI 2 "shift_32bit_count_operand" "n")))] + "" + "extr.u %0 = %1, %2, %E2" + [(set_attr "type" "I")]) + +;; Use mix4.r/shr to implement rotrsi3. We only get 32 bits of valid result +;; here, instead of 64 like the patterns above. + +(define_expand "rotrsi3" + [(set (match_dup 3) + (ior:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "")) + (ashift:DI (zero_extend:DI (match_dup 1)) (const_int 32)))) + (set (match_dup 3) + (lshiftrt:DI (match_dup 3) + (match_operand:DI 2 "nonmemory_operand" ""))) + (set (match_operand:SI 0 "register_operand" "") (match_dup 4))] + "" + " +{ + if (! shift_32bit_count_operand (operands[2], SImode)) + FAIL; + + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_lowpart (SImode, operands[3]); +}") + + +;; :::::::::::::::::::: +;; :: +;; :: 64 bit Integer Shifts and Rotates +;; :: +;; :::::::::::::::::::: + +(define_insn "ashldi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "reg_or_6bit_operand" "rM")))] + "" + "shl %0 = %1, %2" + [(set_attr "type" "I")]) + +;; ??? Maybe combine this with the multiply and add instruction? + +(define_insn "*shladd" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "shladd_operand" "n")) + (match_operand:DI 3 "register_operand" "r")))] + "" + "shladd %0 = %1, %S2, %3" + [(set_attr "type" "A")]) + +;; This can be created by register elimination if operand3 of shladd is an +;; eliminable register or has reg_equiv_constant set. + +;; We have to use nonmemory_operand for operand 4, to ensure that the +;; validate_changes call inside eliminate_regs will always succeed. If it +;; doesn't succeed, then this remain a shladd pattern, and will be reloaded +;; incorrectly. + +(define_insn "*shladd_elim" + [(set (match_operand:DI 0 "register_operand" "=&r") + (plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "shladd_operand" "n")) + (match_operand:DI 3 "register_operand" "r")) + (match_operand:DI 4 "nonmemory_operand" "rI")))] + "reload_in_progress" + "#" + [(set_attr "type" "unknown")]) + +;; ??? Need to emit an instruction group barrier here because this gets split +;; after md_reorg. + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "shladd_operand" "")) + (match_operand:DI 3 "register_operand" "")) + (match_operand:DI 4 "reg_or_14bit_operand" "")))] + "reload_completed" + [(set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (match_dup 2)) + (match_dup 3))) + (unspec_volatile [(const_int 0)] 2) + (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))] + "") + +(define_insn "ashrdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "reg_or_6bit_operand" "rM")))] + "" + "shr %0 = %1, %2" + [(set_attr "type" "I")]) + +(define_insn "lshrdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "reg_or_6bit_operand" "rM")))] + "" + "shr.u %0 = %1, %2" + [(set_attr "type" "I")]) + +;; Using a predicate that accepts only constants doesn't work, because optabs +;; will load the operand into a register and call the pattern if the predicate +;; did not accept it on the first try. So we use nonmemory_operand and then +;; verify that we have an appropriate constant in the expander. + +(define_expand "rotrdi3" + [(set (match_operand:DI 0 "register_operand" "") + (rotatert:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonmemory_operand" "")))] + "" + " +{ + if (! shift_count_operand (operands[2], DImode)) + FAIL; +}") + +(define_insn "*rotrdi3_internal" + [(set (match_operand:DI 0 "register_operand" "=r") + (rotatert:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "shift_count_operand" "M")))] + "" + "shrp %0 = %1, %1, %2" + [(set_attr "type" "I")]) + + +;; :::::::::::::::::::: +;; :: +;; :: 32 Bit Integer Logical operations +;; :: +;; :::::::::::::::::::: + +;; We don't seem to need any other 32-bit logical operations, because gcc +;; generates zero-extend;zero-extend;DImode-op, which combine optimizes to +;; DImode-op;zero-extend, and then we can optimize away the zero-extend. +;; This doesn't work for unary logical operations, because we don't call +;; apply_distributive_law for them. + +;; ??? Likewise, this doesn't work for andnot, which isn't handled by +;; apply_distributive_law. We get inefficient code for +;; int sub4 (int i, int j) { return i & ~j; } +;; We could convert (and (not (sign_extend A)) (sign_extend B)) to +;; (zero_extend (and (not A) B)) in combine. +;; Or maybe fix this by adding andsi3/iorsi3/xorsi3 patterns like the +;; one_cmplsi2 pattern. + +(define_expand "one_cmplsi2" + [(set (match_operand:SI 0 "register_operand" "") + (not:SI (match_operand:SI 1 "register_operand" "")))] + "" + " +{ + if (optimize) + { + rtx op1 = gen_lowpart (DImode, operands[1]); + + if (! cse_not_expected) + { + rtx tmp = gen_reg_rtx (DImode); + emit_insn (gen_one_cmpldi2 (tmp, op1)); + emit_move_insn (operands[0], gen_lowpart (SImode, tmp)); + } + else + emit_insn (gen_one_cmpldi2 (gen_lowpart (DImode, operands[0]), op1)); + DONE; + } +}") + +(define_insn "*one_cmplsi2_internal" + [(set (match_operand:SI 0 "register_operand" "=r") + (not:SI (match_operand:SI 1 "register_operand" "r")))] + "" + "andcm %0 = -1, %1" + [(set_attr "type" "A")]) + + +;; :::::::::::::::::::: +;; :: +;; :: 64 Bit Integer Logical operations +;; :: +;; :::::::::::::::::::: + +(define_insn "anddi3" + [(set (match_operand:DI 0 "register_operand" "=r,*e") + (and:DI (match_operand:DI 1 "register_operand" "%r,*e") + (match_operand:DI 2 "reg_or_8bit_operand" "rK,*e")))] + "" + "@ + and %0 = %2, %1 + fand %0 = %2, %1%B0" + [(set_attr "type" "A,F")]) + +(define_insn "*andnot" + [(set (match_operand:DI 0 "register_operand" "=r,*e") + (and:DI (not:DI (match_operand:DI 1 "register_operand" "r,*e")) + (match_operand:DI 2 "reg_or_8bit_operand" "rK,*e")))] + "" + "@ + andcm %0 = %2, %1 + fandcm %0 = %2, %1%B0" + [(set_attr "type" "A,F")]) + +(define_insn "iordi3" + [(set (match_operand:DI 0 "register_operand" "=r,*e") + (ior:DI (match_operand:DI 1 "register_operand" "%r,*e") + (match_operand:DI 2 "reg_or_8bit_operand" "rK,*e")))] + "" + "@ + or %0 = %2, %1 + for %0 = %2, %1%B0" + [(set_attr "type" "A,F")]) + +(define_insn "xordi3" + [(set (match_operand:DI 0 "register_operand" "=r,*e") + (xor:DI (match_operand:DI 1 "register_operand" "%r,*e") + (match_operand:DI 2 "reg_or_8bit_operand" "rK,*e")))] + "" + "@ + xor %0 = %2, %1 + fxor %0 = %2, %1%B0" + [(set_attr "type" "A,F")]) + +(define_insn "one_cmpldi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (not:DI (match_operand:DI 1 "register_operand" "r")))] + "" + "andcm %0 = -1, %1" + [(set_attr "type" "A")]) + +;; :::::::::::::::::::: +;; :: +;; :: Comparisons +;; :: +;; :::::::::::::::::::: + +(define_expand "cmpsi" + [(set (cc0) + (compare (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "reg_or_8bit_and_adjusted_operand" "")))] + "" + " +{ + ia64_compare_op0 = operands[0]; + ia64_compare_op1 = operands[1]; + DONE; +}") + +(define_expand "cmpdi" + [(set (cc0) + (compare (match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_8bit_and_adjusted_operand" "")))] + "" + " +{ + ia64_compare_op0 = operands[0]; + ia64_compare_op1 = operands[1]; + DONE; +}") + +(define_expand "cmpsf" + [(set (cc0) + (compare (match_operand:SF 0 "reg_or_fp01_operand" "") + (match_operand:SF 1 "reg_or_fp01_operand" "")))] + "" + " +{ + ia64_compare_op0 = operands[0]; + ia64_compare_op1 = operands[1]; + DONE; +}") + +(define_expand "cmpdf" + [(set (cc0) + (compare (match_operand:DF 0 "reg_or_fp01_operand" "") + (match_operand:DF 1 "reg_or_fp01_operand" "")))] + "" + " +{ + ia64_compare_op0 = operands[0]; + ia64_compare_op1 = operands[1]; + DONE; +}") + +;; ??? Enable this for XFmode support. + +(define_expand "cmpxf" + [(set (cc0) + (compare (match_operand:XF 0 "reg_or_fp01_operand" "") + (match_operand:XF 1 "reg_or_fp01_operand" "")))] + "0" + " +{ + ia64_compare_op0 = operands[0]; + ia64_compare_op1 = operands[1]; + DONE; +}") + +(define_insn "*cmpsi_normal" + [(set (match_operand:CC 0 "register_operand" "=c") + (match_operator:CC 1 "normal_comparison_operator" + [(match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "reg_or_8bit_operand" "rK")]))] + "" + "cmp4.%C1 %0, %I0 = %3, %2" + [(set_attr "type" "A")]) + +(define_insn "*cmpsi_adjusted" + [(set (match_operand:CC 0 "register_operand" "=c") + (match_operator:CC 1 "adjusted_comparison_operator" + [(match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "reg_or_8bit_adjusted_operand" + "rL")]))] + "" + "cmp4.%C1 %0, %I0 = %3, %2" + [(set_attr "type" "A")]) + +(define_insn "*cmpdi_normal" + [(set (match_operand:CC 0 "register_operand" "=c") + (match_operator:CC 1 "normal_comparison_operator" + [(match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "reg_or_8bit_operand" "rK")]))] + "" + "cmp.%C1 %0, %I0 = %3, %2" + [(set_attr "type" "A")]) + +(define_insn "*cmpdi_adjusted" + [(set (match_operand:CC 0 "register_operand" "=c") + (match_operator:CC 1 "adjusted_comparison_operator" + [(match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "reg_or_8bit_adjusted_operand" + "rL")]))] + "" + "cmp.%C1 %0, %I0 = %3, %2" + [(set_attr "type" "A")]) + +(define_insn "*cmpsf_internal" + [(set (match_operand:CC 0 "register_operand" "=c") + (match_operator:CC 1 "comparison_operator" + [(match_operand:SF 2 "reg_or_fp01_operand" "fG") + (match_operand:SF 3 "reg_or_fp01_operand" "fG")]))] + "" + "fcmp.%D1 %0, %I0 = %F2, %F3" + [(set_attr "type" "F")]) + +(define_insn "*cmpdf_internal" + [(set (match_operand:CC 0 "register_operand" "=c") + (match_operator:CC 1 "comparison_operator" + [(match_operand:DF 2 "reg_or_fp01_operand" "fG") + (match_operand:DF 3 "reg_or_fp01_operand" "fG")]))] + "" + "fcmp.%D1 %0, %I0 = %F2, %F3" + [(set_attr "type" "F")]) + +;; ??? Can this pattern be generated? + +(define_insn "*bit_zero" + [(set (match_operand:CC 0 "register_operand" "=c") + (eq:CC (zero_extract:DI (match_operand:DI 1 "register_operand" "r") + (const_int 1) + (match_operand:DI 2 "immediate_operand" "n")) + (const_int 0)))] + "" + "tbit.z %0, %I0 = %1, %2" + [(set_attr "type" "I")]) + +(define_insn "*bit_one" + [(set (match_operand:CC 0 "register_operand" "=c") + (ne:CC (zero_extract:DI (match_operand:DI 1 "register_operand" "r") + (const_int 1) + (match_operand:DI 2 "immediate_operand" "n")) + (const_int 0)))] + "" + "tbit.nz %0, %I0 = %1, %2" + [(set_attr "type" "I")]) + +;; ??? We also need this if we run out of PR regs and need to spill some. + +;; ??? We need this if a CCmode value does not get allocated to a hard +;; register. This happens if we cse/gcse a CCmode value across a call, and the +;; function has a nonlocal goto. This is because global does not allocate +;; call crossing pseudos to hard registers when current_function_has_ +;; nonlocal_goto is true. This is relatively common for C++ programs that +;; use exceptions. See ia64_secondary_reload_class. + +;; We use a define_expand here so that cse/gcse/combine can't accidentally +;; create movcc insns. If this was a named define_insn, we would not be able +;; to make it conditional on reload. + +(define_expand "movcc" + [(set (match_operand:CC 0 "nonimmediate_operand" "") + (match_operand:CC 1 "move_operand" ""))] + "" + " +{ + if (! reload_in_progress && ! reload_completed) + FAIL; +}") + +(define_insn "*movcc_internal" + [(set (match_operand:CC 0 "nonimmediate_operand" "=r,c,r,m") + (match_operand:CC 1 "move_operand" "c,r,m,r"))] + "reload_in_progress || reload_completed" + "@ + # + cmp4.ne %0, %I0 = %1, r0 + ld4%O1 %0 = %1%P1 + st4%Q0 %0 = %1%P0" + [(set_attr "type" "unknown,A,M,M")]) + +(define_split + [(set (match_operand:CC 0 "register_operand" "") + (match_operand:CC 1 "register_operand" ""))] + "reload_completed + && GET_CODE (operands[0]) == REG && GR_REGNO_P (REGNO (operands[0])) + && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))" + [(set (match_dup 2) + (if_then_else:DI (ne:CC (match_dup 1) (const_int 0)) + (const_int 1) + (match_dup 2))) + (set (match_dup 2) + (if_then_else:DI (ne:CC (match_dup 1) (const_int 0)) + (match_dup 2) + (const_int 0)))] + "operands[2] = gen_rtx_SUBREG (DImode, operands[0], 0);") + + +;; :::::::::::::::::::: +;; :: +;; :: Branches +;; :: +;; :::::::::::::::::::: + +(define_expand "beq" + [(set (match_dup 1) + (eq:CC (match_dup 2) + (match_dup 3))) + (set (pc) + (if_then_else (ne:CC (match_dup 1) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "bne" + [(set (match_dup 1) + (ne:CC (match_dup 2) + (match_dup 3))) + (set (pc) + (if_then_else (ne:CC (match_dup 1) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "blt" + [(set (match_dup 1) + (lt:CC (match_dup 2) + (match_dup 3))) + (set (pc) + (if_then_else (ne:CC (match_dup 1) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "ble" + [(set (match_dup 1) + (le:CC (match_dup 2) + (match_dup 3))) + (set (pc) + (if_then_else (ne:CC (match_dup 1) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "bgt" + [(set (match_dup 1) + (gt:CC (match_dup 2) + (match_dup 3))) + (set (pc) + (if_then_else (ne:CC (match_dup 1) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "bge" + [(set (match_dup 1) + (ge:CC (match_dup 2) + (match_dup 3))) + (set (pc) + (if_then_else (ne:CC (match_dup 1) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "bltu" + [(set (match_dup 1) + (ltu:CC (match_dup 2) + (match_dup 3))) + (set (pc) + (if_then_else (ne:CC (match_dup 1) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "bleu" + [(set (match_dup 1) + (leu:CC (match_dup 2) + (match_dup 3))) + (set (pc) + (if_then_else (ne:CC (match_dup 1) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "bgtu" + [(set (match_dup 1) + (gtu:CC (match_dup 2) + (match_dup 3))) + (set (pc) + (if_then_else (ne:CC (match_dup 1) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "bgeu" + [(set (match_dup 1) + (geu:CC (match_dup 2) + (match_dup 3))) + (set (pc) + (if_then_else (ne:CC (match_dup 1) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +;; ??? Need a way to choose between dpnt and dptk. Currently, I assume that +;; equality tests will likely fail, and inequality tests will likely succeed. + +(define_insn "*beq_true" + [(set (pc) + (if_then_else (eq:CC (match_operand:CC 0 "register_operand" "c") + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + "(%I0) br.cond.dpnt %l1" + [(set_attr "type" "B")]) + +(define_insn "*beq_false" + [(set (pc) + (if_then_else (eq:CC (match_operand:CC 0 "register_operand" "c") + (const_int 0)) + (pc) + (label_ref (match_operand 1 "" ""))))] + "" + "(%0) br.cond.dptk %l1" + [(set_attr "type" "B")]) + +(define_insn "*bne_true" + [(set (pc) + (if_then_else (ne:CC (match_operand:CC 0 "register_operand" "c") + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + "(%0) br.cond.dptk %l1" + [(set_attr "type" "B")]) + +(define_insn "*bne_false" + [(set (pc) + (if_then_else (ne:CC (match_operand:CC 0 "register_operand" "c") + (const_int 0)) + (pc) + (label_ref (match_operand 1 "" ""))))] + "" + "(%I0) br.cond.dpnt %l1" + [(set_attr "type" "B")]) + + +;; :::::::::::::::::::: +;; :: +;; :: Set flag operations +;; :: +;; :::::::::::::::::::: + +(define_expand "seq" + [(set (match_dup 1) + (eq:CC (match_dup 2) + (match_dup 3))) + (set (match_operand:DI 0 "register_operand" "") + (ne:DI (match_dup 1) (const_int 0)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "sne" + [(set (match_dup 1) + (ne:CC (match_dup 2) + (match_dup 3))) + (set (match_operand:DI 0 "register_operand" "") + (ne:DI (match_dup 1) (const_int 0)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "slt" + [(set (match_dup 1) + (lt:CC (match_dup 2) + (match_dup 3))) + (set (match_operand:DI 0 "register_operand" "") + (ne:DI (match_dup 1) (const_int 0)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "sle" + [(set (match_dup 1) + (le:CC (match_dup 2) + (match_dup 3))) + (set (match_operand:DI 0 "register_operand" "") + (ne:DI (match_dup 1) (const_int 0)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "sgt" + [(set (match_dup 1) + (gt:CC (match_dup 2) + (match_dup 3))) + (set (match_operand:DI 0 "register_operand" "") + (ne:DI (match_dup 1) (const_int 0)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "sge" + [(set (match_dup 1) + (ge:CC (match_dup 2) + (match_dup 3))) + (set (match_operand:DI 0 "register_operand" "") + (ne:DI (match_dup 1) (const_int 0)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "sltu" + [(set (match_dup 1) + (ltu:CC (match_dup 2) + (match_dup 3))) + (set (match_operand:DI 0 "register_operand" "") + (ne:DI (match_dup 1) (const_int 0)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "sleu" + [(set (match_dup 1) + (leu:CC (match_dup 2) + (match_dup 3))) + (set (match_operand:DI 0 "register_operand" "") + (ne:DI (match_dup 1) (const_int 0)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "sgtu" + [(set (match_dup 1) + (gtu:CC (match_dup 2) + (match_dup 3))) + (set (match_operand:DI 0 "register_operand" "") + (ne:DI (match_dup 1) (const_int 0)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +(define_expand "sgeu" + [(set (match_dup 1) + (geu:CC (match_dup 2) + (match_dup 3))) + (set (match_operand:DI 0 "register_operand" "") + (ne:DI (match_dup 1) (const_int 0)))] + "" + " +{ + operands[1] = gen_reg_rtx (CCmode); + operands[2] = ia64_compare_op0; + operands[3] = ia64_compare_op1; +}") + +;; Don't allow memory as destination here, because cmov/cmov/st is more +;; efficient than mov/mov/cst/cst. + +(define_insn "*sne_internal" + [(set (match_operand:DI 0 "register_operand" "=r") + (ne:DI (match_operand:CC 1 "register_operand" "c") + (const_int 0)))] + "" + "#" + [(set_attr "type" "unknown")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (ne:DI (match_operand:CC 1 "register_operand" "") + (const_int 0)))] + "reload_completed" + [(set (match_dup 0) + (if_then_else:DI (ne:CC (match_dup 1) (const_int 0)) + (const_int 1) + (match_dup 0))) + (set (match_dup 0) + (if_then_else:DI (ne:CC (match_dup 1) (const_int 0)) + (match_dup 0) + (const_int 0)))] + "") + +;; ??? Unknown if this can be matched. + +(define_insn "*seq_internal" + [(set (match_operand:DI 0 "register_operand" "=r") + (eq:DI (match_operand:CC 1 "register_operand" "c") + (const_int 0)))] + "" + "#" + [(set_attr "type" "unknown")]) + +;; ??? Unknown if this can be matched. + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (eq:DI (match_operand:CC 1 "register_operand" "") + (const_int 0)))] + "reload_completed" + [(set (match_dup 0) + (if_then_else:DI (eq:CC (match_dup 1) (const_int 0)) + (const_int 1) + (match_dup 0))) + (set (match_dup 0) + (if_then_else:DI (eq:CC (match_dup 1) (const_int 0)) + (match_dup 0) + (const_int 0)))] + "") + + +;; :::::::::::::::::::: +;; :: +;; :: Conditional move instructions. +;; :: +;; :::::::::::::::::::: + +;; ??? Add movXXcc patterns? + +;; ??? The predicates don't match the constraints. + +;; ??? r/c/m/m and m/c/r/r alternatives make sense, but won't work until the +;; predicates are fixed, because the define_splits won't recognize them. + +;; +;; DImode if_then_else patterns. +;; + +(define_insn "*cmovne_internal" + [(set (match_operand:DI 0 "register_operand" "=r,r,m,r,r,m,r") + (if_then_else:DI (ne:CC (match_operand:CC 1 "register_operand" "c,c,c,c,c,c,c") + (const_int 0)) + (match_operand:DI 2 "reg_or_22bit_operand" "0,0,0,rI,m,r,rI") + (match_operand:DI 3 "reg_or_22bit_operand" "rI,m,r,0,0,0,rI")))] + "" + "@ + (%I1) mov %0 = %3 + (%I1) ld8%O3 %0 = %3 + (%I1) st8%Q0 %0 = %3 + (%1) mov %0 = %2 + (%1) ld8%O2 %0 = %2 + (%1) st8%Q0 %0 = %2 + #" + [(set_attr "type" "A,M,M,A,M,M,unknown")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (ne:CC (match_operand:CC 1 "register_operand" "") + (const_int 0)) + (match_operand:DI 2 "reg_or_22bit_operand" "") + (match_operand:DI 3 "reg_or_22bit_operand" "")))] + "(reload_completed + && ! rtx_equal_p (operands[0], operands[2]) + && ! rtx_equal_p (operands[0], operands[3]))" + [(set (match_dup 0) + (if_then_else:DI (ne:CC (match_dup 1) (const_int 0)) + (match_dup 2) + (match_dup 0))) + (set (match_dup 0) + (if_then_else:DI (ne:CC (match_dup 1) (const_int 0)) + (match_dup 0) + (match_dup 3)))] + "") + +;; ??? Unknown if this can be matched. + +(define_insn "*cmoveq_internal" + [(set (match_operand:DI 0 "register_operand" "=r,r,m,r,r,m,r") + (if_then_else:DI (eq:CC (match_operand:CC 1 "register_operand" "c,c,c,c,c,c,c") + (const_int 0)) + (match_operand:DI 2 "reg_or_22bit_operand" "0,0,0,rI,m,r,rI") + (match_operand:DI 3 "reg_or_22bit_operand" "rI,m,r,0,0,0,rI")))] + "" + "@ + (%1) mov %0 = %3 + (%1) ld8%O3 %0 = %3 + (%1) st8%Q0 %0 = %3 + (%I1) mov %0 = %2 + (%I1) ld8%O2 %0 = %2 + (%I1) st8%Q0 %0 = %2 + #" + [(set_attr "type" "A,M,M,A,M,M,unknown")]) + +;; ??? Unknown if this can be matched. + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (eq:CC (match_operand:CC 1 "register_operand" "") + (const_int 0)) + (match_operand:DI 2 "reg_or_22bit_operand" "") + (match_operand:DI 3 "reg_or_22bit_operand" "")))] + "(reload_completed + && ! rtx_equal_p (operands[0], operands[2]) + && ! rtx_equal_p (operands[0], operands[3]))" + [(set (match_dup 0) + (if_then_else:DI (eq:CC (match_dup 1) (const_int 0)) + (match_dup 2) + (match_dup 0))) + (set (match_dup 0) + (if_then_else:DI (eq:CC (match_dup 1) (const_int 0)) + (match_dup 0) + (match_dup 3)))] + "") + +;; Absolute value pattern. + +(define_insn "*absdi2_internal" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (if_then_else:DI (ne:CC (match_operand:CC 1 "register_operand" "c,c") + (const_int 0)) + (match_operand:DI 2 "reg_or_22bit_operand" "0,rI") + (neg:DI (match_operand:DI 3 "reg_or_22bit_operand" "rI,rI"))))] + "" + "@ + (%I1) sub %0 = r0, %3 + #" + [(set_attr "type" "A,unknown")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (ne:CC (match_operand:CC 1 "register_operand" "") + (const_int 0)) + (match_operand:DI 2 "reg_or_22bit_operand" "") + (neg:DI (match_operand:DI 3 "reg_or_22bit_operand" ""))))] + "reload_completed && ! rtx_equal_p (operands[0], operands[2])" + [(set (match_dup 0) + (if_then_else:DI (ne:CC (match_dup 1) (const_int 0)) + (match_dup 2) + (match_dup 0))) + (set (match_dup 0) + (if_then_else:DI (ne:CC (match_dup 1) (const_int 0)) + (match_dup 0) + (neg:DI (match_dup 3))))] + "") + +;; ??? Unknown if this can be generated. If so, then add a define_split as +;; above. + +(define_insn "*absdi2_not_internal" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (if_then_else:DI (ne:CC (match_operand:CC 1 "register_operand" "c,c") + (const_int 0)) + (neg:DI (match_operand:DI 2 "reg_or_22bit_operand" "rI,rI")) + (match_operand:DI 3 "reg_or_22bit_operand" "0,rI")))] + "" + "*abort ();" + [(set_attr "type" "unknown")]) + +;; +;; SImode if_then_else patterns. +;; + +(define_insn "*cmovnesi_internal" + [(set (match_operand:SI 0 "register_operand" "=r,r,m,r,r,m,r") + (if_then_else:SI (ne:CC (match_operand:CC 1 "register_operand" "c,c,c,c,c,c,c") + (const_int 0)) + (match_operand:SI 2 "reg_or_22bit_operand" "0,0,0,rI,m,r,rI") + (match_operand:SI 3 "reg_or_22bit_operand" "rI,m,r,0,0,0,rI")))] + "" + "@ + (%I1) mov %0 = %3 + (%I1) ld4%O3 %0 = %3 + (%I1) st4%Q0 %0 = %3 + (%1) mov %0 = %2 + (%1) ld4%O2 %0 = %2 + (%1) st4%Q0 %0 = %2 + #" + [(set_attr "type" "A,M,M,A,M,M,unknown")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (ne:CC (match_operand:CC 1 "register_operand" "") + (const_int 0)) + (match_operand:SI 2 "reg_or_22bit_operand" "") + (match_operand:SI 3 "reg_or_22bit_operand" "")))] + "(reload_completed + && ! rtx_equal_p (operands[0], operands[2]) + && ! rtx_equal_p (operands[0], operands[3]))" + [(set (match_dup 0) + (if_then_else:SI (ne:CC (match_dup 1) (const_int 0)) + (match_dup 2) + (match_dup 0))) + (set (match_dup 0) + (if_then_else:SI (ne:CC (match_dup 1) (const_int 0)) + (match_dup 0) + (match_dup 3)))] + "") + +(define_insn "*abssi2_internal" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (if_then_else:SI (ne:CC (match_operand:CC 1 "register_operand" "c,c") + (const_int 0)) + (match_operand:SI 2 "reg_or_22bit_operand" "0,rI") + (neg:SI (match_operand:SI 3 "reg_or_22bit_operand" "rI,rI"))))] + "" + "@ + (%I1) sub %0 = r0, %3 + #" + [(set_attr "type" "A,unknown")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (ne:CC (match_operand:CC 1 "register_operand" "") + (const_int 0)) + (match_operand:SI 2 "reg_or_22bit_operand" "") + (neg:SI (match_operand:SI 3 "reg_or_22bit_operand" ""))))] + "reload_completed && ! rtx_equal_p (operands[0], operands[2])" + [(set (match_dup 0) + (if_then_else:SI (ne:CC (match_dup 1) (const_int 0)) + (match_dup 2) + (match_dup 0))) + (set (match_dup 0) + (if_then_else:SI (ne:CC (match_dup 1) (const_int 0)) + (match_dup 0) + (neg:SI (match_dup 3))))] + "") + + +;; :::::::::::::::::::: +;; :: +;; :: Call and branch instructions +;; :: +;; :::::::::::::::::::: + +;; Subroutine call instruction returning no value. Operand 0 is the function +;; to call; operand 1 is the number of bytes of arguments pushed (in mode +;; `SImode', except it is normally a `const_int'); operand 2 is the number of +;; registers used as operands. + +;; On most machines, operand 2 is not actually stored into the RTL pattern. It +;; is supplied for the sake of some RISC machines which need to put this +;; information into the assembler code; they can put it in the RTL instead of +;; operand 1. + +(define_expand "call" + [(use (match_operand:DI 0 "" "")) + (use (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (use (match_operand 3 "" ""))] + "" + " +{ + /* ??? Stripping off the MEM isn't correct. Will lose alias info. */ + rtx addr = XEXP (operands[0], 0); + enum machine_mode mode = GET_MODE (addr); + + if (TARGET_NO_PIC) + emit_call_insn (gen_call_internal (addr, operands[1], + gen_rtx_REG (DImode, R_BR (0)))); + + /* If this is an indirect call, then we have the address of a descriptor. */ + else if (! symbolic_operand (addr, mode)) + emit_insn (gen_indirect_call_pic (addr, operands[1])); + /* ??? This is an unsatisfying solution. Should rethink. */ + else if (setjmp_operand (addr, mode)) + emit_insn (gen_setjmp_call_pic (addr, operands[1])); + else + emit_insn (gen_call_pic (addr, operands[1])); + + DONE; +}") + +(define_expand "indirect_call_pic" + [(set (match_dup 2) (reg:DI 1)) + (set (match_dup 3) (mem:DI (match_operand 0 "" ""))) + (set (match_dup 4) (plus:DI (match_dup 0) (const_int 8))) + (set (reg:DI 1) (mem:DI (match_dup 4))) + (parallel [(call (mem:DI (match_dup 3)) (match_operand 1 "" "")) + (use (reg:DI 1)) + (clobber (reg:DI 320))]) + (set (reg:DI 1) (match_dup 2))] + "" + " +{ + operands[2] = gen_reg_rtx (DImode); + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); +}") + +;; We can't save GP in a pseudo if we are calling setjmp, because pseudos +;; won't be restored by longjmp. For now, we save it in r4. + +;; ??? It would be more efficient to save this directly into a stack slot. +;; Unfortunately, the stack slot address gets cse'd across the setjmp call +;; because the NOTE_INSN_SETJMP note is in the wrong place. + +;; ??? This is an unsatisfying solution. Should rethink. + +(define_expand "setjmp_call_pic" + [(set (match_dup 2) (reg:DI 1)) + (parallel [(call (mem:DI (match_operand 0 "" "")) (match_operand 1 "" "")) + (use (reg:DI 1)) + (clobber (reg:DI 320))]) + (set (reg:DI 1) (match_dup 2))] + "" + " +{ + operands[2] = gen_rtx_REG (DImode, GR_REG (4)); +}") + +;; ??? Saving/restoring the GP register is not needed if we are calling +;; a function in the same module. + +(define_expand "call_pic" + [(set (match_dup 2) (reg:DI 1)) + (parallel [(call (mem:DI (match_operand 0 "" "")) (match_operand 1 "" "")) + (use (reg:DI 1)) + (clobber (reg:DI 320))]) + (set (reg:DI 1) (match_dup 2))] + "" + " +{ + operands[2] = gen_reg_rtx (DImode); +}") + +;; ??? A call must end a group, otherwise, the assembler might pack it in +;; a group with a following branch, and then the function return goes to the +;; wrong place. We could perhaps handle this in emit_insn_group_barriers. + +(define_insn "call_internal" + [(call (mem:DI (match_operand:DI 0 "call_operand" "bi")) + (match_operand 1 "" "")) + (clobber (match_operand:DI 2 "register_operand" "=b"))] + "" + "br.call.sptk.many %2 = %0 ;;" + [(set_attr "type" "B")]) + +(define_insn "*call_internal1" + [(call (mem:DI (match_operand:DI 0 "call_operand" "bi")) + (match_operand 1 "" "")) + (use (reg:DI 1)) + (clobber (match_operand:DI 2 "register_operand" "=b"))] + "" + "br.call.sptk.many %2 = %0 ;;" + [(set_attr "type" "B")]) + +;; Subroutine call instruction returning a value. Operand 0 is the hard +;; register in which the value is returned. There are three more operands, the +;; same as the three operands of the `call' instruction (but with numbers +;; increased by one). + +;; Subroutines that return `BLKmode' objects use the `call' insn. + +(define_expand "call_value" + [(use (match_operand 0 "" "")) + (use (match_operand:DI 1 "" "")) + (use (match_operand 2 "" "")) + (use (match_operand 3 "" "")) + (use (match_operand 4 "" ""))] + "" + " +{ + /* ??? Stripping off the MEM isn't correct. Will lose alias info. */ + rtx addr = XEXP (operands[1], 0); + enum machine_mode mode = GET_MODE (addr); + + if (TARGET_NO_PIC) + emit_call_insn (gen_call_value_internal (operands[0], addr, operands[2], + gen_rtx_REG (DImode, R_BR (0)))); + + /* If this is an indirect call, then we have the address of a descriptor. */ + else if (! symbolic_operand (addr, mode)) + { + /* This is for HFA returns. */ + if (GET_CODE (operands[0]) == PARALLEL) + emit_insn (gen_indirect_call_multiple_values_pic (operands[0], addr, + operands[2])); + else + emit_insn (gen_indirect_call_value_pic (operands[0], addr, + operands[2])); + } + /* ??? This is an unsatisfying solution. Should rethink. */ + else if (setjmp_operand (addr, mode)) + emit_insn (gen_setjmp_call_value_pic (operands[0], addr, operands[2])); + /* This is for HFA returns. */ + else if (GET_CODE (operands[0]) == PARALLEL) + emit_insn (gen_call_multiple_values_pic (operands[0], addr, operands[2])); + else + emit_insn (gen_call_value_pic (operands[0], addr, operands[2])); + + DONE; +}") + +(define_expand "indirect_call_value_pic" + [(set (match_dup 3) (reg:DI 1)) + (set (match_dup 4) (mem:DI (match_operand 1 "" ""))) + (set (match_dup 5) (plus:DI (match_dup 1) (const_int 8))) + (set (reg:DI 1) (mem:DI (match_dup 5))) + (parallel [(set (match_operand 0 "" "") + (call (mem:DI (match_dup 4)) (match_operand 2 "" ""))) + (use (reg:DI 1)) + (clobber (reg:DI 320))]) + (set (reg:DI 1) (match_dup 3))] + "" + " +{ + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_reg_rtx (DImode); +}") + +(define_expand "indirect_call_multiple_values_pic" + [(set (match_dup 3) (reg:DI 1)) + (set (match_dup 4) (mem:DI (match_operand 1 "" ""))) + (set (match_dup 5) (plus:DI (match_dup 1) (const_int 8))) + (set (reg:DI 1) (mem:DI (match_dup 5))) + (match_par_dup 6 [(set (match_operand 0 "" "") + (call (mem:DI (match_dup 4)) + (match_operand 2 "" ""))) + (use (reg:DI 1)) + (clobber (reg:DI 320))]) + (set (reg:DI 1) (match_dup 3))] + "" + " +{ + int count; + int i; + rtx call; + + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_reg_rtx (DImode); + + /* This code is the same as the code in call_multiple_values_pic, except + that op3 was replaced with op6 and op1 was replaced with op4. */ + call = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (DImode, operands[4]), + operands[2]); + + count = XVECLEN (operands[0], 0); + operands[6] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 2)); + + XVECEXP (operands[6], 0, 0) + = gen_rtx_SET (VOIDmode, XEXP (XVECEXP (operands[0], 0, 0), 0), call); + + XVECEXP (operands[6], 0, 1) + = gen_rtx_USE (DImode, gen_rtx_REG (DImode, GR_REG (1))); + XVECEXP (operands[6], 0, 2) + = gen_rtx_CLOBBER (DImode, gen_rtx_REG (DImode, BR_REG (0))); + + for (i = 1; i < count; i++) + XVECEXP (operands[6], 0, i + 2) + = gen_rtx_SET (VOIDmode, XEXP (XVECEXP (operands[0], 0, i), 0), call); + +}") + +;; We can't save GP in a pseudo if we are calling setjmp, because pseudos +;; won't be restored by longjmp. For now, we save it in r4. + +;; ??? It would be more efficient to save this directly into a stack slot. +;; Unfortunately, the stack slot address gets cse'd across the setjmp call +;; because the NOTE_INSN_SETJMP note is in the wrong place. + +;; ??? This is an unsatisfying solution. Should rethink. + +(define_expand "setjmp_call_value_pic" + [(set (match_dup 3) (reg:DI 1)) + (parallel [(set (match_operand 0 "" "") + (call (mem:DI (match_operand 1 "" "")) + (match_operand 2 "" ""))) + (use (reg:DI 1)) + (clobber (reg:DI 320))]) + (set (reg:DI 1) (match_dup 3))] + "" + " +{ + operands[3] = gen_rtx_REG (DImode, GR_REG (4)); +}") + +;; ??? Saving/restoring the GP register is not needed if we are calling +;; a function in the same module. + +(define_expand "call_value_pic" + [(set (match_dup 3) (reg:DI 1)) + (parallel [(set (match_operand 0 "" "") + (call (mem:DI (match_operand 1 "" "")) + (match_operand 2 "" ""))) + (use (reg:DI 1)) + (clobber (reg:DI 320))]) + (set (reg:DI 1) (match_dup 3))] + "" + " +{ + operands[3] = gen_reg_rtx (DImode); +}") + +;; ??? Saving/restoring the GP register is not needed if we are calling +;; a function in the same module. + +(define_expand "call_multiple_values_pic" + [(set (match_dup 4) (reg:DI 1)) + (match_par_dup 3 [(set (match_operand 0 "" "") + (call (mem:DI (match_operand 1 "" "")) + (match_operand 2 "" ""))) + (use (reg:DI 1)) + (clobber (reg:DI 320))]) + (set (reg:DI 1) (match_dup 4))] + "" + " +{ + int count; + int i; + rtx call; + + operands[4] = gen_reg_rtx (DImode); + + call = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (DImode, operands[1]), + operands[2]); + + count = XVECLEN (operands[0], 0); + operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 2)); + + XVECEXP (operands[3], 0, 0) + = gen_rtx_SET (VOIDmode, XEXP (XVECEXP (operands[0], 0, 0), 0), call); + + XVECEXP (operands[3], 0, 1) + = gen_rtx_USE (DImode, gen_rtx_REG (DImode, GR_REG (1))); + XVECEXP (operands[3], 0, 2) + = gen_rtx_CLOBBER (DImode, gen_rtx_REG (DImode, BR_REG (0))); + + for (i = 1; i < count; i++) + XVECEXP (operands[3], 0, i + 2) + = gen_rtx_SET (VOIDmode, XEXP (XVECEXP (operands[0], 0, i), 0), call); +}") + +;; ??? A call must end a group, otherwise, the assembler might pack it in +;; a group with a following branch, and then the function return goes to the +;; wrong place. We could perhaps handle this in emit_insn_group_barriers. + +(define_insn "call_value_internal" + [(set (match_operand 0 "register_operand" "=rf") + (call (mem:DI (match_operand:DI 1 "call_operand" "bi")) + (match_operand 2 "" ""))) + (clobber (match_operand:DI 3 "register_operand" "=b"))] + "" + "br.call.sptk.many %3 = %1 ;;" + [(set_attr "type" "B")]) + +(define_insn "*call_value_internal1" + [(set (match_operand 0 "register_operand" "=rf") + (call (mem:DI (match_operand:DI 1 "call_operand" "bi")) + (match_operand 2 "" ""))) + (use (reg:DI 1)) + (clobber (match_operand:DI 3 "register_operand" "=b"))] + "" + "br.call.sptk.many %3 = %1 ;;" + [(set_attr "type" "B")]) + +(define_insn "*call_multiple_values_internal1" + [(match_parallel 0 "call_multiple_values_operation" + [(set (match_operand 1 "register_operand" "=rf") + (call (mem:DI (match_operand:DI 2 "call_operand" "bi")) + (match_operand 3 "" ""))) + (use (reg:DI 1)) + (clobber (match_operand:DI 4 "register_operand" "=b"))])] + "" + "br.call.sptk.many %4 = %2 ;;" + [(set_attr "type" "B")]) + +;; Call subroutine returning any type. + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "" "") + (const_int 0)) + (match_operand 1 "" "") + (match_operand 2 "" "")])] + "" + " +{ + int i; + + emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx)); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + + DONE; +}") + +(define_insn "return_internal" + [(return) + (use (match_operand:DI 0 "register_operand" "b"))] + "" + "br.ret.sptk.many %0" + [(set_attr "type" "B")]) + +(define_insn "return" + [(return)] + "ia64_direct_return ()" + "br.ret.sptk.many rp" + [(set_attr "type" "B")]) + +(define_insn "*eq_return" + [(set (pc) + (if_then_else (eq:CC (match_operand:CC 0 "register_operand" "c") + (const_int 0)) + (return) + (pc)))] + "ia64_direct_return ()" + "(%I0) br.ret.sptk.many rp" + [(set_attr "type" "B")]) + +(define_insn "*eq_not_return" + [(set (pc) + (if_then_else (eq:CC (match_operand:CC 0 "register_operand" "c") + (const_int 0)) + (pc) + (return)))] + "ia64_direct_return ()" + "(%0) br.ret.sptk.many rp" + [(set_attr "type" "B")]) + +(define_insn "*ne_return" + [(set (pc) + (if_then_else (ne (match_operand:CC 0 "register_operand" "c") + (const_int 0)) + (return) + (pc)))] + "ia64_direct_return ()" + "(%0) br.ret.sptk.many rp" + [(set_attr "type" "B")]) + +(define_insn "*ne_not_return" + [(set (pc) + (if_then_else (ne (match_operand:CC 0 "register_operand" "c") + (const_int 0)) + (pc) + (return)))] + "ia64_direct_return ()" + "(%I0) br.ret.sptk.many rp" + [(set_attr "type" "B")]) + +(define_insn "jump" + [(set (pc) (label_ref (match_operand 0 "" "")))] + "" + "br %l0" + [(set_attr "type" "B")]) + +(define_insn "indirect_jump" + [(set (pc) (match_operand:DI 0 "register_operand" "b"))] + "" + "br %0" + [(set_attr "type" "B")]) + +(define_expand "tablejump" + [(match_operand:DI 0 "register_operand" "") + (match_operand 1 "" "")] + "" + " +{ + rtx tmp1 = gen_reg_rtx (DImode); + rtx tmp2 = gen_reg_rtx (DImode); + + emit_move_insn (tmp1, gen_rtx_LABEL_REF (Pmode, operands[1])); + emit_insn (gen_adddi3 (tmp2, operands[0], tmp1)); + emit_jump_insn (gen_tablejump_internal (tmp2, operands[1])); + DONE; +}") + +(define_insn "tablejump_internal" + [(set (pc) (match_operand:DI 0 "register_operand" "b")) + (use (label_ref (match_operand 1 "" "")))] + "" + "br %0" + [(set_attr "type" "B")]) + + +;; :::::::::::::::::::: +;; :: +;; :: Prologue and Epilogue instructions +;; :: +;; :::::::::::::::::::: + +(define_expand "prologue" + [(const_int 1)] + "" + " +{ + ia64_expand_prologue (); + DONE; +}") + +(define_expand "epilogue" + [(const_int 2)] + "" + " +{ + ia64_expand_epilogue (); + DONE; +}") + +;; This prevents the scheduler from moving the SP decrement past FP-relative +;; stack accesses. This is the same as adddi3 plus the extra set. + +(define_insn "prologue_allocate_stack" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (plus:DI (match_operand:DI 1 "register_operand" "%r,r,a") + (match_operand:DI 2 "reg_or_22bit_operand" "r,I,J"))) + (set (match_operand:DI 3 "register_operand" "=r,r,r") + (match_dup 3))] + "" + "@ + add %0 = %1, %2 + adds %0 = %2, %1 + addl %0 = %2, %1" + [(set_attr "type" "A")]) + +;; This prevents the scheduler from moving the SP restore past FP-relative +;; stack accesses. This is similar to movdi plus the extra set. + +(define_insn "epilogue_deallocate_stack" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "register_operand" "+r")) + (set (match_dup 1) (match_dup 1))] + "" + "mov %0 = %1" + [(set_attr "type" "A")]) + +;; Allocate a new register frame. + +(define_insn "alloc" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(const_int 0)] 0)) + (use (match_operand:DI 1 "const_int_operand" "i")) + (use (match_operand:DI 2 "const_int_operand" "i")) + (use (match_operand:DI 3 "const_int_operand" "i")) + (use (match_operand:DI 4 "const_int_operand" "i"))] + "" + "alloc %0 = ar.pfs, %1, %2, %3, %4" + [(set_attr "type" "M")]) + +(define_insn "gr_spill" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "register_operand" "r")] 1))] + "" + "st8.spill %0 = %1%P0" + [(set_attr "type" "M")]) + +(define_insn "gr_restore" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m")] 2))] + "" + "ld8.fill %0 = %1%P1" + [(set_attr "type" "M")]) + +(define_insn "fr_spill" + [(set (match_operand:XF 0 "memory_operand" "=m") + (unspec:XF [(match_operand:XF 1 "register_operand" "f*e")] 3))] + "" + "stf.spill %0 = %1%P0" + [(set_attr "type" "M")]) + +(define_insn "fr_restore" + [(set (match_operand:XF 0 "register_operand" "=f*e") + (unspec:XF [(match_operand:XF 1 "memory_operand" "m")] 4))] + "" + "ldf.fill %0 = %1%P1" + [(set_attr "type" "M")]) + +(define_insn "pr_spill" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] 5))] + "" + "mov %0 = pr" + [(set_attr "type" "I")]) + +(define_insn "pr_restore" + [(unspec [(const_int 0)] 6) + (use (match_operand:DI 0 "register_operand" "r"))] + "" + "mov pr = %0, -1" + [(set_attr "type" "I")]) + +;; ??? This is volatile to prevent it from being moved before a call. +;; Should instead add a ar.pfs hard register which is call clobbered. + +(define_insn "pfs_restore" + [(unspec_volatile [(const_int 0)] 4) + (use (match_operand:DI 0 "register_operand" "r"))] + "" + "mov ar.pfs = %0" + [(set_attr "type" "I")]) + +(define_insn "unat_spill" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] 9))] + "" + "mov %0 = ar.unat" + [(set_attr "type" "M")]) + +(define_insn "unat_restore" + [(unspec [(const_int 0)] 10) + (use (match_operand:DI 0 "register_operand" "r"))] + "" + "mov ar.unat = %0" + [(set_attr "type" "M")]) + + +;; :::::::::::::::::::: +;; :: +;; :: Miscellaneous instructions +;; :: +;; :::::::::::::::::::: + +;; ??? Emiting a NOP instruction isn't very useful. This should probably +;; be emitting ";;" to force a break in the instruction packing. + +;; No operation, needed in case the user uses -g but not -O. +(define_insn "nop" + [(const_int 0)] + "" + "nop 0" + [(set_attr "type" "unknown")]) + +;; Pseudo instruction that prevents the scheduler from moving code above this +;; point. +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] 1)] + "" + "" + [(set_attr "type" "unknown")]) + +(define_insn "insn_group_barrier" + [(unspec_volatile [(const_int 0)] 2)] + "" + ";;" + [(set_attr "type" "S")]) + + +;; Non-local goto support. + +(define_expand "save_stack_nonlocal" + [(use (match_operand:OI 0 "memory_operand" "")) + (use (match_operand:DI 1 "register_operand" ""))] + "" + " +{ + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, + \"__ia64_save_stack_nonlocal\"), + 0, VOIDmode, 2, XEXP (operands[0], 0), Pmode, + operands[1], Pmode); + DONE; +}") + +(define_expand "nonlocal_goto" + [(use (match_operand 0 "general_operand" "")) + (use (match_operand 1 "general_operand" "")) + (use (match_operand 2 "general_operand" "")) + (use (match_operand 3 "general_operand" ""))] + "" + " +{ + if (GET_CODE (operands[0]) != REG) + operands[0] = force_reg (Pmode, operands[0]); + emit_move_insn (virtual_stack_vars_rtx, operands[0]); + emit_insn (gen_rtx_USE (VOIDmode, frame_pointer_rtx)); + emit_insn (gen_rtx_USE (VOIDmode, stack_pointer_rtx)); + emit_insn (gen_rtx_USE (VOIDmode, static_chain_rtx)); + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, \"__ia64_nonlocal_goto\"), + 0, VOIDmode, 4, + operands[0], Pmode, operands[1], Pmode, + copy_to_reg (XEXP (operands[2], 0)), Pmode, + operands[3], Pmode); + emit_barrier (); + DONE; +}") + +;; ??? We need this because the function __ia64_nonlocal_goto can't easily +;; access the FP which is currently stored in a local register. Maybe move +;; the FP to a global register to avoid this problem? + +(define_expand "nonlocal_goto_receiver" + [(use (const_int 0))] + "" + " +{ + emit_move_insn (frame_pointer_rtx, gen_rtx_REG (DImode, GR_REG (7))); + DONE; +}") + +;; This flushes at least 64 bytes starting from the address pointed +;; to by operand[0]. + +;; ??? This should be a define expand. + +(define_insn "flush_cache" + [(unspec_volatile [(match_operand:DI 0 "register_operand" "=&r")] 3)] + "" + "fc %0\;;;\;adds %0=31,%0\;;;\;fc %0\;;;\;sync.i\;srlz.i" + [(set_attr "type" "unknown")]) + +(define_insn "ccv_restore_si" + [(unspec [(const_int 0)] 11) + (use (match_operand:SI 0 "register_operand" "r"))] + "" + "mov ar.ccv = %0" + [(set_attr "type" "M")]) + +(define_insn "ccv_restore_di" + [(unspec [(const_int 0)] 11) + (use (match_operand:DI 0 "register_operand" "r"))] + "" + "mov ar.ccv = %0" + [(set_attr "type" "M")]) + +(define_insn "mf" + [(unspec [(match_operand:BLK 0 "memory_operand" "m")] 12)] + "" + "mf" + [(set_attr "type" "M")]) + +(define_insn "fetchadd_acq_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "fetchadd_operand" "n")] 19))] + "" + "fetchadd4.acq %0 = %1, %2" + [(set_attr "type" "M")]) + +(define_insn "fetchadd_acq_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "fetchadd_operand" "n")] 19))] + "" + "fetchadd8.acq %0 = %1, %2" + [(set_attr "type" "M")]) + +(define_insn "cmpxchg_acq_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "register_operand" "r")] 13))] + "" + "cmpxchg4.acq %0 = %1, %2, ar.ccv" + [(set_attr "type" "M")]) + +(define_insn "cmpxchg_acq_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "register_operand" "r")] 13))] + "" + "cmpxchg8.acq %0 = %1, %2, ar.ccv" + [(set_attr "type" "M")]) + +(define_expand "val_compare_and_swap_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "register_operand" "r")] 14))] + "" + " +{ + rtx tmp_reg = gen_rtx_REG (DImode, GR_REG(0)); + rtx target = gen_rtx_MEM (BLKmode, tmp_reg); + RTX_UNCHANGING_P (target) = 1; + emit_insn (gen_ccv_restore_si (operands[2])); + emit_insn (gen_mf (target)); + emit_insn (gen_cmpxchg_acq_si (operands[0], operands[1], operands[3])); + DONE; +}") + +(define_expand "val_compare_and_swap_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "register_operand" "r")] 14))] + "" + " +{ + rtx tmp_reg = gen_rtx_REG (DImode, GR_REG(0)); + rtx target = gen_rtx_MEM (BLKmode, tmp_reg); + RTX_UNCHANGING_P (target) = 1; + emit_insn (gen_ccv_restore_di (operands[2])); + emit_insn (gen_mf (target)); + emit_insn (gen_cmpxchg_acq_di (operands[0], operands[1], operands[3])); + DONE; +}") + +(define_insn "xchgsi" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operand:SI 1 "memory_operand" "+m")) + (set (match_dup 1) + (match_operand:SI 2 "register_operand" "r"))] + "" + "xchg4 %0 = %1, %2" + [(set_attr "type" "M")]) + +(define_insn "xchgdi" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "memory_operand" "+m")) + (set (match_dup 1) + (match_operand:DI 2 "register_operand" "r"))] + "" + "xchg8 %0 = %1, %2" + [(set_attr "type" "M")]) + +(define_expand "lock_test_and_set_si" + [(set (match_operand:SI 0 "register_operand" "r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "register_operand" "r")] 16))] + "" + " +{ + emit_insn (gen_xchgsi (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_expand "lock_test_and_set_di" + [(set (match_operand:DI 0 "register_operand" "r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "register_operand" "r")] 16))] + "" + " +{ + emit_insn (gen_xchgdi (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_expand "fetch_and_add_si" + [(set (match_operand:SI 0 "register_operand" "r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "nonmemory_operand" "")] 18))] + "" + " +{ + int x; + + if (GET_CODE (operands[2]) == CONST_INT) + { + x = INTVAL(operands[2]); + if (x == -16 || x == -8 || x == -4 || x == -1 || + x == 16 || x == 8 || x == 4 || x == 1) + { + emit_insn (gen_fetchadd_acq_si (operands[0], operands[1], operands[2])); + DONE; + } + } + + ia64_expand_fetch_and_op (IA64_ADD_OP, SImode, operands); + DONE; +}") + +(define_expand "fetch_and_sub_si" + [(set (match_operand:SI 0 "register_operand" "r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "register_operand" "r")] 18))] + "" + " +{ + ia64_expand_fetch_and_op (IA64_SUB_OP, SImode, operands); + DONE; +}") + +(define_expand "fetch_and_or_si" + [(set (match_operand:SI 0 "register_operand" "r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "register_operand" "r")] 18))] + "" + " +{ + ia64_expand_fetch_and_op (IA64_OR_OP, SImode, operands); + DONE; +}") + +(define_expand "fetch_and_and_si" + [(set (match_operand:SI 0 "register_operand" "r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "register_operand" "r")] 18))] + "" + " +{ + ia64_expand_fetch_and_op (IA64_AND_OP, SImode, operands); + DONE; +}") + +(define_expand "fetch_and_xor_si" + [(set (match_operand:SI 0 "register_operand" "r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "register_operand" "r")] 18))] + "" + " +{ + ia64_expand_fetch_and_op (IA64_XOR_OP, SImode, operands); + DONE; +}") + +(define_expand "fetch_and_nand_si" + [(set (match_operand:SI 0 "register_operand" "r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "register_operand" "r")] 18))] + "" + " +{ + ia64_expand_fetch_and_op (IA64_NAND_OP, SImode, operands); + DONE; +}") + +(define_expand "fetch_and_add_di" + [(set (match_operand:DI 0 "register_operand" "r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "nonmemory_operand" "")] 18))] + "" + " +{ + int x; + + if (GET_CODE (operands[2]) == CONST_INT) + { + x = INTVAL(operands[2]); + if (x == -16 || x == -8 || x == -4 || x == -1 || + x == 16 || x == 8 || x == 4 || x == 1) + { + emit_insn (gen_fetchadd_acq_di (operands[0], operands[1], operands[2])); + DONE; + } + } + + ia64_expand_fetch_and_op (IA64_ADD_OP, DImode, operands); + DONE; +}") + +(define_expand "fetch_and_sub_di" + [(set (match_operand:DI 0 "register_operand" "r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "register_operand" "r")] 18))] + "" + " +{ + ia64_expand_fetch_and_op (IA64_SUB_OP, DImode, operands); + DONE; +}") + +(define_expand "fetch_and_or_di" + [(set (match_operand:DI 0 "register_operand" "r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "register_operand" "r")] 18))] + "" + " +{ + ia64_expand_fetch_and_op (IA64_OR_OP, DImode, operands); + DONE; +}") + +(define_expand "fetch_and_and_di" + [(set (match_operand:DI 0 "register_operand" "r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "register_operand" "r")] 18))] + "" + " +{ + ia64_expand_fetch_and_op (IA64_AND_OP, DImode, operands); + DONE; +}") + +(define_expand "fetch_and_xor_di" + [(set (match_operand:DI 0 "register_operand" "r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "register_operand" "r")] 18))] + "" + " +{ + ia64_expand_fetch_and_op (IA64_XOR_OP, DImode, operands); + DONE; +}") + +(define_expand "fetch_and_nand_di" + [(set (match_operand:DI 0 "register_operand" "r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "register_operand" "r")] 18))] + "" + " +{ + ia64_expand_fetch_and_op (IA64_NAND_OP, DImode, operands); + DONE; +}") + +(define_expand "add_and_fetch_di" + [(set (match_operand:DI 0 "register_operand" "r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "register_operand" "r")] 17))] + "" + " +{ + ia64_expand_op_and_fetch (IA64_ADD_OP, DImode, operands); + DONE; +}") + +(define_expand "sub_and_fetch_di" + [(set (match_operand:DI 0 "register_operand" "r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "register_operand" "r")] 17))] + "" + " +{ + ia64_expand_op_and_fetch (IA64_SUB_OP, DImode, operands); + DONE; +}") + +(define_expand "or_and_fetch_di" + [(set (match_operand:DI 0 "register_operand" "r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "register_operand" "r")] 17))] + "" + " +{ + ia64_expand_op_and_fetch (IA64_OR_OP, DImode, operands); + DONE; +}") + +(define_expand "and_and_fetch_di" + [(set (match_operand:DI 0 "register_operand" "r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "register_operand" "r")] 17))] + "" + " +{ + ia64_expand_op_and_fetch (IA64_AND_OP, DImode, operands); + DONE; +}") + +(define_expand "xor_and_fetch_di" + [(set (match_operand:DI 0 "register_operand" "r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "register_operand" "r")] 17))] + "" + " +{ + ia64_expand_op_and_fetch (IA64_XOR_OP, DImode, operands); + DONE; +}") + +(define_expand "nand_and_fetch_di" + [(set (match_operand:DI 0 "register_operand" "r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "register_operand" "r")] 17))] + "" + " +{ + ia64_expand_op_and_fetch (IA64_NAND_OP, DImode, operands); + DONE; +}") + +(define_expand "add_and_fetch_si" + [(set (match_operand:SI 0 "register_operand" "r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "register_operand" "r")] 17))] + "" + " +{ + ia64_expand_op_and_fetch (IA64_ADD_OP, SImode, operands); + DONE; +}") + +(define_expand "sub_and_fetch_si" + [(set (match_operand:SI 0 "register_operand" "r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "register_operand" "r")] 17))] + "" + " +{ + ia64_expand_op_and_fetch (IA64_SUB_OP, SImode, operands); + DONE; +}") + +(define_expand "or_and_fetch_si" + [(set (match_operand:SI 0 "register_operand" "r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "register_operand" "r")] 17))] + "" + " +{ + ia64_expand_op_and_fetch (IA64_OR_OP, SImode, operands); + DONE; +}") + +(define_expand "and_and_fetch_si" + [(set (match_operand:SI 0 "register_operand" "r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "register_operand" "r")] 17))] + "" + " +{ + ia64_expand_op_and_fetch (IA64_AND_OP, SImode, operands); + DONE; +}") + +(define_expand "xor_and_fetch_si" + [(set (match_operand:SI 0 "register_operand" "r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "register_operand" "r")] 17))] + "" + " +{ + ia64_expand_op_and_fetch (IA64_XOR_OP, SImode, operands); + DONE; +}") + +(define_expand "nand_and_fetch_si" + [(set (match_operand:SI 0 "register_operand" "r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "register_operand" "r")] 17))] + "" + " +{ + ia64_expand_op_and_fetch (IA64_NAND_OP, SImode, operands); + DONE; +}") diff --git a/gcc/config/ia64/ia64intrin.h b/gcc/config/ia64/ia64intrin.h new file mode 100644 index 0000000..c75f185 --- /dev/null +++ b/gcc/config/ia64/ia64intrin.h @@ -0,0 +1,60 @@ +#ifndef _IA64INTRIN_H_INCLUDED +#define _IA64INTRIN_H_INCLUDED + +void __sync_synchronize (void); + +int __sync_val_compare_and_swap_si (int *, int, int); +long __sync_val_compare_and_swap_di (long *, long, long); +#define __sync_val_compare_and_swap(A,B,C) ((sizeof (*(A)) == sizeof(int)) ? __sync_val_compare_and_swap_si((int *)(A),(int)(B),(int)(C)) : __sync_val_compare_and_swap_di((long *)(A),(long)(B),(long)(C))) + +int __sync_bool_compare_and_swap_si (int *, int, int); +long __sync_bool_compare_and_swap_di (long *, long, long); +#define __sync_bool_compare_and_swap(A,B,C) ((sizeof (*(A)) == sizeof(int)) ? __sync_bool_compare_and_swap_si((int *)(A),(int)(B),(int)(C)) : __sync_bool_compare_and_swap_di((long *)(A),(long)(B),(long)(C))) + +void __sync_lock_release_si (int *); +void __sync_lock_release_di (long *); +#define __sync_lock_release(A) ((sizeof (*(A)) == sizeof(int)) ? __sync_lock_release_si((int *)(A)) : __sync_lock_release_di((long *)(A))) + +int __sync_lock_test_and_set_si (int *, int); +long __sync_lock_test_and_set_di (long *, long); +#define __sync_lock_test_and_set(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_lock_test_and_set_si((int *)(A),(int)(B)) : __sync_lock_test_and_set_di((long *)(A),(long)(B))) + +int __sync_fetch_and_add_si (int *, int); +int __sync_fetch_and_sub_si (int *, int); +int __sync_fetch_and_and_si (int *, int); +int __sync_fetch_and_or_si (int *, int); +int __sync_fetch_and_xor_si (int *, int); +int __sync_fetch_and_nand_si (int *, int); +long __sync_fetch_and_add_di (long *, long); +long __sync_fetch_and_sub_di (long *, long); +long __sync_fetch_and_and_di (long *, long); +long __sync_fetch_and_or_di (long *, long); +long __sync_fetch_and_xor_di (long *, long); +long __sync_fetch_and_nand_di (long *, long); +#define __sync_fetch_and_add(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_fetch_and_add_si((int *)(A),(int)(B)) : __sync_fetch_and_add_di((long *)(A),(long)(B))) +#define __sync_fetch_and_sub(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_fetch_and_sub_si((int *)(A),(int)(B)) : __sync_fetch_and_sub_di((long *)(A),(long)(B))) +#define __sync_fetch_and_and(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_fetch_and_and_si((int *)(A),(int)(B)) : __sync_fetch_and_and_di((long *)(A),(long)(B))) +#define __sync_fetch_and_or(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_fetch_and_or_si((int *)(A),(int)(B)) : __sync_fetch_and_or_di((long *)(A),(long)(B))) +#define __sync_fetch_and_xor(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_fetch_and_xor_si((int *)(A),(int)(B)) : __sync_fetch_and_xor_di((long *)(A),(long)(B))) +#define __sync_fetch_and_nand(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_fetch_and_nand_si((int *)(A),(int)(B)) : __sync_fetch_and_nand_di((long *)(A),(long)(B))) + +int __sync_add_and_fetch_si (int *, int); +int __sync_sub_and_fetch_si (int *, int); +int __sync_and_and_fetch_si (int *, int); +int __sync_or_and_fetch_si (int *, int); +int __sync_xor_and_fetch_si (int *, int); +int __sync_nand_and_fetch_si (int *, int); +long __sync_add_and_fetch_di (long *, long); +long __sync_sub_and_fetch_di (long *, long); +long __sync_and_and_fetch_di (long *, long); +long __sync_or_and_fetch_di (long *, long); +long __sync_xor_and_fetch_di (long *, long); +long __sync_nand_and_fetch_di (long *, long); +#define __sync_add_and_fetch(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_add_and_fetch_si((int *)(A),(int)(B)) : __sync_add_and_fetch_di((long *)(A),(long)(B))) +#define __sync_sub_and_fetch(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_sub_and_fetch_si((int *)(A),(int)(B)) : __sync_sub_and_fetch_di((long *)(A),(long)(B))) +#define __sync_and_and_fetch(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_and_and_fetch_si((int *)(A),(int)(B)) : __sync_and_and_fetch_di((long *)(A),(long)(B))) +#define __sync_or_and_fetch(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_or_and_fetch_si((int *)(A),(int)(B)) : __sync_or_and_fetch_di((long *)(A),(long)(B))) +#define __sync_xor_and_fetch(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_xor_and_fetch_si((int *)(A),(int)(B)) : __sync_xor_and_fetch_di((long *)(A),(long)(B))) +#define __sync_nand_and_fetch(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_nand_and_fetch_si((int *)(A),(int)(B)) : __sync_nand_and_fetch_di((long *)(A),(long)(B))) + +#endif diff --git a/gcc/config/ia64/lib1funcs.asm b/gcc/config/ia64/lib1funcs.asm new file mode 100644 index 0000000..d8af8db --- /dev/null +++ b/gcc/config/ia64/lib1funcs.asm @@ -0,0 +1,635 @@ +#ifdef L__divdf3 +// Compute a 64-bit IEEE double quotient. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// farg0 holds the dividend. farg1 holds the divisor. + + .text + .align 16 + .global __divdf3 + .proc __divdf3 +__divdf3: + frcpa f10, p6 = farg0, farg1 + ;; +(p6) fma.s1 f11 = farg0, f10, f0 +(p6) fnma.s1 f12 = farg1, f10, f1 + ;; +(p6) fma.s1 f11 = f12, f11, f11 +(p6) fma.s1 f13 = f12, f12, f0 +(p6) fma.s1 f10 = f12, f10, f10 + ;; +(p6) fma.s1 f11 = f13, f11, f11 +(p6) fma.s1 f12 = f13, f13, f0 +(p6) fma.s1 f10 = f13, f10, f10 + ;; +(p6) fma.d.s1 f11 = f12, f11, f11 +(p6) fma.s1 f10 = f12, f10, f10 + ;; +(p6) fnma.d.s1 f8 = farg1, f11, farg0 + ;; +(p6) fma.d f10 = f8, f10, f11 + ;; + mov fret0 = f10 + br.ret.sptk rp + ;; + .endp __divdf3 +#endif + +#ifdef L__divsf3 +// Compute a 32-bit IEEE float quotient. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// farg0 holds the dividend. farg1 holds the divisor. + + .text + .align 16 + .global __divsf3 + .proc __divsf3 +__divsf3: + frcpa f10, p6 = farg0, farg1 + ;; +(p6) fma.s1 f8 = farg0, f10, f0 +(p6) fnma.s1 f9 = farg1, f10, f1 + ;; +(p6) fma.s1 f8 = f9, f8, f8 +(p6) fma.s1 f9 = f9, f9, f0 + ;; +(p6) fma.s1 f8 = f9, f8, f8 +(p6) fma.s1 f9 = f9, f9, f0 + ;; +(p6) fma.d.s1 f8 = f9, f8, f8 + ;; +(p6) fma.s f10 = f8, f1, f0 + ;; + mov fret0 = f10 + br.ret.sptk rp + ;; + .endp __divsf3 +#endif + +#ifdef L__divdi3 +// Compute a 64-bit integer quotient. +// +// Use reciprocal approximation and Newton-Raphson iteration to compute the +// quotient. frcpa gives 8.6 significant bits, so we need 3 iterations +// to get more than the 64 bits of precision that we need for DImode. +// +// Must use max precision for the reciprocal computations to get 64 bits of +// precision. +// +// r32/f8 holds the dividend. r33/f9 holds the divisor. +// f10 holds the value 2.0. f11 holds the reciprocal approximation. +// f12 is a temporary. + + .text + .align 16 + .global __divdi3 + .proc __divdi3 +__divdi3: + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + setf.sig f8 = in0 + setf.sig f9 = in1 + ;; + // Convert the inputs to FP, so that they won't be treated as unsigned. + fcvt.xf f8 = f8 + fcvt.xf f9 = f9 + ;; + // Compute the reciprocal approximation. + frcpa f10, p6 = f8, f9 + ;; + // 3 Newton-Raphson iterations. +(p6) fma.s1 f11 = farg0, f10, f0 +(p6) fnma.s1 f12 = farg1, f10, f1 + ;; +(p6) fma.s1 f11 = f12, f11, f11 +(p6) fma.s1 f13 = f12, f12, f0 +(p6) fma.s1 f10 = f12, f10, f10 + ;; +(p6) fma.s1 f11 = f13, f11, f11 +(p6) fma.s1 f12 = f13, f13, f0 +(p6) fma.s1 f10 = f13, f10, f10 + ;; +(p6) fma.s1 f11 = f12, f11, f11 +(p6) fma.s1 f10 = f12, f10, f10 + ;; +(p6) fnma.s1 f8 = f9, f11, f8 + ;; +(p6) fma f10 = f8, f10, f11 + ;; + // Round quotient to an integer. + fcvt.fx.trunc f8 = f10 + ;; + // Transfer result to GP registers. + getf.sig ret0 = f8 + br.ret.sptk rp + ;; + .endp __divdi3 +#endif + +#ifdef L__moddi3 +// Compute a 64-bit integer modulus. +// +// Use reciprocal approximation and Newton-Raphson iteration to compute the +// quotient. frcpa gives 8.6 significant bits, so we need 3 iterations +// to get more than the 64 bits of precision that we need for DImode. +// +// Must use max precision for the reciprocal computations to get 64 bits of +// precision. +// +// r32/f8 holds the dividend. r33/f9 holds the divisor. +// f10 holds the value 2.0. f11 holds the reciprocal approximation. +// f12 is a temporary. + + .text + .align 16 + .global __moddi3 + .proc __moddi3 +__moddi3: + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + setf.sig f8 = in0 + setf.sig f9 = in1 + ;; + // Convert the inputs to FP, so that they won't be treated as unsigned. + fcvt.xf f8 = f8 + fcvt.xf f9 = f9 + ;; + // Compute the reciprocal approximation. + frcpa f10, p6 = f8, f9 + ;; + // 3 Newton-Raphson iterations. +(p6) fma.s1 f11 = farg0, f10, f0 +(p6) fnma.s1 f12 = farg1, f10, f1 + ;; +(p6) fma.s1 f11 = f12, f11, f11 +(p6) fma.s1 f13 = f12, f12, f0 +(p6) fma.s1 f10 = f12, f10, f10 + ;; +(p6) fma.s1 f11 = f13, f11, f11 +(p6) fma.s1 f12 = f13, f13, f0 +(p6) fma.s1 f10 = f13, f10, f10 + ;; +(p6) fma.s1 f11 = f12, f11, f11 +(p6) fma.s1 f10 = f12, f10, f10 + ;; +(p6) fnma.s1 f12 = f9, f11, f8 + ;; +(p6) fma f10 = f12, f10, f11 + ;; + // Round quotient to an integer. + fcvt.fx.trunc f10 = f10 + ;; + // Renormalize. + fcvt.xf f10 = f10 + ;; + // Compute remainder. + fnma f8 = f10, f9, f8 + ;; + // Round remainder to an integer. + fcvt.fx.trunc f8 = f8 + ;; + // Transfer result to GP registers. + getf.sig ret0 = f8 + br.ret.sptk rp + ;; + .endp __moddi3 +#endif + +#ifdef L__udivdi3 +// Compute a 64-bit unsigned integer quotient. +// +// Use reciprocal approximation and Newton-Raphson iteration to compute the +// quotient. frcpa gives 8.6 significant bits, so we need 3 iterations +// to get more than the 64 bits of precision that we need for DImode. +// +// Must use max precision for the reciprocal computations to get 64 bits of +// precision. +// +// r32/f8 holds the dividend. r33/f9 holds the divisor. +// f10 holds the value 2.0. f11 holds the reciprocal approximation. +// f12 is a temporary. + + .text + .align 16 + .global __udivdi3 + .proc __udivdi3 +__udivdi3: + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + setf.sig f8 = in0 + setf.sig f9 = in1 + ;; + // Convert the inputs to FP, to avoid FP software-assist faults. + fcvt.xuf f8 = f8 + fcvt.xuf f9 = f9 + ;; + // Compute the reciprocal approximation. + frcpa f10, p6 = f8, f9 + ;; + // 3 Newton-Raphson iterations. +(p6) fma.s1 f11 = farg0, f10, f0 +(p6) fnma.s1 f12 = farg1, f10, f1 + ;; +(p6) fma.s1 f11 = f12, f11, f11 +(p6) fma.s1 f13 = f12, f12, f0 +(p6) fma.s1 f10 = f12, f10, f10 + ;; +(p6) fma.s1 f11 = f13, f11, f11 +(p6) fma.s1 f12 = f13, f13, f0 +(p6) fma.s1 f10 = f13, f10, f10 + ;; +(p6) fma.s1 f11 = f12, f11, f11 +(p6) fma.s1 f10 = f12, f10, f10 + ;; +(p6) fnma.s1 f8 = f9, f11, f8 + ;; +(p6) fma f10 = f8, f10, f11 + ;; + // Round quotient to an unsigned integer. + fcvt.fxu.trunc f8 = f10 + ;; + // Transfer result to GP registers. + getf.sig ret0 = f8 + br.ret.sptk rp + ;; + .endp __udivdi3 +#endif + +#ifdef L__umoddi3 +// Compute a 64-bit unsigned integer modulus. +// +// Use reciprocal approximation and Newton-Raphson iteration to compute the +// quotient. frcpa gives 8.6 significant bits, so we need 3 iterations +// to get more than the 64 bits of precision that we need for DImode. +// +// Must use max precision for the reciprocal computations to get 64 bits of +// precision. +// +// r32/f8 holds the dividend. r33/f9 holds the divisor. +// f10 holds the value 2.0. f11 holds the reciprocal approximation. +// f12 is a temporary. + + .text + .align 16 + .global __umoddi3 + .proc __umoddi3 +__umoddi3: + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + setf.sig f8 = in0 + setf.sig f9 = in1 + ;; + // Convert the inputs to FP, to avoid FP software assist faults. + fcvt.xuf f8 = f8 + fcvt.xuf f9 = f9 + ;; + // Compute the reciprocal approximation. + frcpa f10, p6 = f8, f9 + ;; + // 3 Newton-Raphson iterations. +(p6) fma.s1 f11 = farg0, f10, f0 +(p6) fnma.s1 f12 = farg1, f10, f1 + ;; +(p6) fma.s1 f11 = f12, f11, f11 +(p6) fma.s1 f13 = f12, f12, f0 +(p6) fma.s1 f10 = f12, f10, f10 + ;; +(p6) fma.s1 f11 = f13, f11, f11 +(p6) fma.s1 f12 = f13, f13, f0 +(p6) fma.s1 f10 = f13, f10, f10 + ;; +(p6) fma.s1 f11 = f12, f11, f11 +(p6) fma.s1 f10 = f12, f10, f10 + ;; +(p6) fnma.s1 f12 = f9, f11, f8 + ;; +(p6) fma f10 = f12, f10, f11 + ;; + // Round quotient to an unsigned integer. + fcvt.fxu.trunc f10 = f10 + ;; + // Renormalize. + fcvt.xuf f10 = f10 + ;; + // Compute remainder. + fnma f8 = f10, f9, f8 + ;; + // Round remainder to an integer. + fcvt.fxu.trunc f8 = f8 + ;; + // Transfer result to GP registers. + getf.sig ret0 = f8 + br.ret.sptk rp + ;; + .endp __umoddi3 +#endif + +#ifdef L__divsi3 +// Compute a 32-bit integer quotient. +// +// Use reciprocal approximation and Newton-Raphson iteration to compute the +// quotient. frcpa gives 8.6 significant bits, so we need 2 iterations +// to get more than the 32 bits of precision that we need for SImode. +// +// ??? This is currently not used. It needs to be fixed to be more like the +// above DImode routines. +// +// ??? Check to see if the error is less than >.5ulp error. We may need +// some adjustment code to get precise enough results. +// +// ??? Should probably use max precision for the reciprocal computations. +// +// r32/f8 holds the dividend. r33/f9 holds the divisor. +// f10 holds the value 2.0. f11 holds the reciprocal approximation. +// f12 is a temporary. + + .text + .align 16 + .global __divsi3 + .proc __divsi3 +__divsi3: + .regstk 2,0,0,0 + setf.sig f8 = in0 + setf.sig f9 = in1 + ;; + fcvt.xf f8 = f8 + fcvt.xf f9 = f9 + ;; + frcpa f11, p6 = f8, f9 + fadd f10 = f1, f1 + ;; + fnma f12 = f9, f11, f10 + ;; + fmpy f11 = f11, f12 + ;; + fnma f12 = f9, f11, f10 + ;; + fmpy f11 = f11, f12 + ;; + fmpy f8 = f8, f11 + ;; + fcvt.fx.trunc f8 = f8 + ;; + getf.sig ret0 = f8 + br.ret.sptk rp + ;; + .endp __divsi3 +#endif + +#ifdef L__modsi3 +// Compute a 32-bit integer modulus. +// +// Use reciprocal approximation and Newton-Raphson iteration to compute the +// quotient. frcpa gives 8.6 significant bits, so we need 2 iterations +// to get more than the 32 bits of precision that we need for SImode. +// +// ??? This is currently not used. It needs to be fixed to be more like the +// above DImode routines. +// +// ??? Check to see if the error is less than >.5ulp error. We may need +// some adjustment code to get precise enough results. +// +// ??? Should probably use max precision for the reciprocal computations. +// +// r32/f8 holds the dividend. r33/f9 holds the divisor. +// f10 holds the value 2.0. f11 holds the reciprocal approximation. +// f12 is a temporary. + + .text + .align 16 + .global __modsi3 + .proc __modsi3 +__modsi3: + .regstk 2,0,0,0 + setf.sig f8 = r32 + setf.sig f9 = r33 + ;; + fcvt.xf f8 = f8 + fcvt.xf f9 = f9 + ;; + frcpa f11, p6 = f8, f9 + fadd f10 = f1, f1 + ;; + fnma f12 = f9, f11, f10 + ;; + fmpy f11 = f11, f12 + ;; + fnma f12 = f9, f11, f10 + ;; + fmpy f11 = f11, f12 + ;; + fmpy f10 = f8, f11 + ;; + fcvt.fx.trunc f10 = f10 + ;; + fcvt.xf f10 = f10 + ;; + fnma f8 = f10, f9, f8 + ;; + fcvt.fx f8 = f8 + ;; + getf.sig r32 = f8 + br.ret.sptk rp + ;; + .endp __modsi3 +#endif + +#ifdef L__udivsi3 +// Compute a 32-bit unsigned integer quotient. +// +// Use reciprocal approximation and Newton-Raphson iteration to compute the +// quotient. frcpa gives 8.6 significant bits, so we need 2 iterations +// to get more than the 32 bits of precision that we need for SImode. +// +// ??? This is currently not used. It needs to be fixed to be more like the +// above DImode routines. +// +// ??? Check to see if the error is less than >.5ulp error. We may need +// some adjustment code to get precise enough results. +// +// ??? Should probably use max precision for the reciprocal computations. +// +// r32/f8 holds the dividend. r33/f9 holds the divisor. +// f10 holds the value 2.0. f11 holds the reciprocal approximation. +// f12 is a temporary. +// +// This is the same as divsi3, except that we don't need fcvt instructions +// before the frcpa. + + .text + .align 16 + .global __udivsi3 + .proc __udivsi3 +__udivsi3: + .regstk 2,0,0,0 + setf.sig f8 = r32 + setf.sig f9 = r33 + ;; + frcpa f11, p6 = f8, f9 + fadd f10 = f1, f1 + ;; + fnma f12 = f9, f11, f10 + ;; + fmpy f11 = f11, f12 + ;; + fnma f12 = f9, f11, f10 + ;; + fmpy f11 = f11, f12 + ;; + fmpy f8 = f8, f11 + ;; + fcvt.fxu.trunc f8 = f8 + ;; + getf.sig ret0 = f8 + br.ret.sptk rp + ;; + .endp __udivsi3 +#endif + +#ifdef L__umodsi3 +// Compute a 32-bit unsigned integer modulus. +// +// Use reciprocal approximation and Newton-Raphson iteration to compute the +// quotient. frcpa gives 8.6 significant bits, so we need 2 iterations +// to get more than the 32 bits of precision that we need for SImode. +// +// ??? This is currently not used. It needs to be fixed to be more like the +// above DImode routines. +// +// ??? Check to see if the error is less than >.5ulp error. We may need +// some adjustment code to get precise enough results. +// +// ??? Should probably use max precision for the reciprocal computations. +// +// r32/f8 holds the dividend. r33/f9 holds the divisor. +// f10 holds the value 2.0. f11 holds the reciprocal approximation. +// f12 is a temporary. +// +// This is the same as modsi3, except that we don't need fcvt instructions +// before the frcpa. + + .text + .align 16 + .global __umodsi3 + .proc __umodsi3 +__umodsi3: + .regstk 2,0,0,0 + setf.sig f8 = r32 + setf.sig f9 = r33 + ;; + frcpa f11, p6 = f8, f9 + fadd f10 = f1, f1 + ;; + fnma f12 = f9, f11, f10 + ;; + fmpy f11 = f11, f12 + ;; + fnma f12 = f9, f11, f10 + ;; + fmpy f11 = f11, f12 + ;; + fmpy f10 = f8, f11 + ;; + fcvt.fxu.trunc f10 = f10 + ;; + fcvt.xuf f10 = f10 + ;; + fnma f8 = f10, f9, f8 + ;; + fcvt.fxu f8 = f8 + ;; + getf.sig r32 = f8 + br.ret.sptk rp + ;; + .endp __umodsi3 +#endif + +#ifdef L__save_stack_nonlocal +// Notes on save/restore stack nonlocal: We read ar.bsp but write +// ar.bspstore. This is because ar.bsp can be read at all times +// (independent of the RSE mode) but since it's read-only we need to +// restore the value via ar.bspstore. This is OK because +// ar.bsp==ar.bspstore after executing "flushrs". + +// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer) + + .text + .align 16 + .global __ia64_save_stack_nonlocal + .proc __ia64_save_stack_nonlocal +__ia64_save_stack_nonlocal: + alloc r18=ar.pfs,2,0,0,0 + st8 [in0]=in1,8 + mov r19=ar.rsc + ;; + flushrs + and r19=0x1c,r19 + mov ar.pfs=r18 + ;; + mov ar.rsc=r19 + mov r16=ar.bsp + adds r2=16,in0 + ;; + mov r17=ar.rnat + st8 [in0]=r16,8 + or r19=0x3,r19 + ;; + st8 [in0]=r17 + mov ar.rsc=r19 + st8 [r2]=r18 + mov ar.pfs=r18 + br.ret.sptk.few rp + ;; + .endp __ia64_save_stack_nonlocal +#endif + +#ifdef L__nonlocal_goto +// void __ia64_nonlocal_goto(void *fp, void *target_label, void *save_area, +// void *static_chain); + + .text + .align 16 + .global __ia64_nonlocal_goto + .proc __ia64_nonlocal_goto +__ia64_nonlocal_goto: + alloc r20=ar.pfs,4,0,0,0 + mov r19=ar.rsc + adds r2=8,in2 + ld8 r12=[in2],16 + mov.ret.sptk.few.dc.dc rp = r33, .L0 +// ??? flushrs must be first instruction of a group. Gas is unfortunately +// putting the stop bit before the padding nop instead of after it, making +// flushrs the first instruction of its bundle, but the second instruction +// of its group. We explicitly add the nop to avoid this problem. + nop.i 0 + ;; + flushrs + ld8 r16=[r2],16 + and r19=0x1c,r19 + ld8 r17=[in2] + ;; + ld8 r18=[r2] + mov ar.rsc=r19 + ;; + mov ar.bspstore=r16 + ;; + mov ar.rnat=r17 + mov ar.pfs=r18 + or r19=0x3,r19 + ;; + loadrs + invala + mov r7=r32 +.L0: { + mov ar.rsc=r19 + mov r15=r35 + br.ret.sptk.few rp + } + ;; + .endp __ia64_nonlocal_goto +#endif diff --git a/gcc/config/ia64/linux.h b/gcc/config/ia64/linux.h new file mode 100644 index 0000000..08b002b --- /dev/null +++ b/gcc/config/ia64/linux.h @@ -0,0 +1,29 @@ +/* Definitions for ia64-linux target. */ +#include "ia64/ia64.h" +#include +#include "sysv4.h" + +/* ??? Maybe this should be in sysv4.h? */ +#define CPP_PREDEFINES "\ +-D__ia64 -D__ia64__ -D__linux -D__linux__ -D_LONGLONG -Dlinux -Dunix \ +-D__LP64__ -D__ELF__ -Asystem(linux) -Acpu(ia64) -Amachine(ia64)" + +/* ??? ia64 gas doesn't accept standard svr4 assembler options? */ +#undef ASM_SPEC + +/* Define this for shared library support because it isn't in the main + linux.h file. */ + +#undef LINK_SPEC +#define LINK_SPEC "\ + %{shared:-shared} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + %{!dynamic-linker:-dynamic-linker /lib/ld-linux.so.2}} \ + %{static:-static}}" + + +#define DONT_USE_BUILTIN_SETJMP +#define JMP_BUF_SIZE (8 * 76) +/* End of linux.h */ diff --git a/gcc/config/ia64/sysv4.h b/gcc/config/ia64/sysv4.h new file mode 100644 index 0000000..6e5efdb --- /dev/null +++ b/gcc/config/ia64/sysv4.h @@ -0,0 +1,248 @@ +/* Override definitions in elfos.h/svr4.h to be correct for IA64. */ + +/* We want DWARF2 as specified by the IA64 ABI. */ +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + +/* Various pseudo-ops for which the Intel assembler uses non-standard + definitions. */ + +#undef ASM_BYTE_OP +#define ASM_BYTE_OP "data1" + +#undef STRING_ASM_OP +#define STRING_ASM_OP "stringz" + +#undef SKIP_ASM_OP +#define SKIP_ASM_OP ".skip" + +#undef COMMON_ASM_OP +#define COMMON_ASM_OP ".common" + +#undef ASCII_DATA_ASM_OP +#define ASCII_DATA_ASM_OP "string" + +/* ??? Unfortunately, .lcomm doesn't work, because it puts things in either + .bss or .sbss, and we can't control the decision of which is used. When + I use .lcomm, I get a cryptic "Section group has no member" error from + the Intel simulator. So we must explicitly put variables in .bss + instead. This matters only if we care about the Intel assembler. */ + +/* This is asm_output_aligned_bss from varasm.c without the ASM_GLOBALIZE_LABEL + call at the beginning. */ + +/* This is for final.c, because it is used by ASM_DECLARE_OBJECT_NAME. */ +extern int size_directive_output; + +#undef ASM_OUTPUT_ALIGNED_LOCAL +#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \ +do { \ + if (XSTR (XEXP (DECL_RTL (DECL), 0), 0)[0] == SDATA_NAME_FLAG_CHAR) \ + sbss_section (); \ + else \ + bss_section (); \ + ASM_OUTPUT_ALIGN (FILE, floor_log2 ((ALIGN) / BITS_PER_UNIT)); \ + ASM_DECLARE_OBJECT_NAME (FILE, NAME, DECL); \ + ASM_OUTPUT_SKIP (FILE, SIZE ? SIZE : 1); \ +} while (0) + +/* ??? Intel assembler does not allow "." in section names, so turn off + gnu.linkonce section support, but only when using the Intel assembler. */ +#undef UNIQUE_SECTION_P +#define UNIQUE_SECTION_P(DECL) (TARGET_GNU_AS ? DECL_ONE_ONLY (DECL) : 0) + +/* The # tells the Intel assembler that this is not a register name. + However, we can't emit the # in a label definition, so we set a variable + in ASM_OUTPUT_LABEL to control whether we want the postfix here or not. */ + +#undef ASM_OUTPUT_LABELREF +#define ASM_OUTPUT_LABELREF(STREAM, NAME) \ +do \ + { \ + const char *real_name; \ + STRIP_NAME_ENCODING (real_name, NAME); \ + asm_fprintf (STREAM, "%U%s%s", real_name, \ + (ia64_asm_output_label ? "" : "#")); \ + } \ +while (0) + +/* Intel assembler requires both flags and type if declaring a non-predefined + section. */ +#undef INIT_SECTION_ASM_OP +#define INIT_SECTION_ASM_OP ".section\t.init,\"ax\",\"progbits\"" +#undef FINI_SECTION_ASM_OP +#define FINI_SECTION_ASM_OP ".section\t.fini,\"ax\",\"progbits\"" +#undef CTORS_SECTION_ASM_OP +#define CTORS_SECTION_ASM_OP ".section\t.ctors,\"aw\",\"progbits\"" +#undef DTORS_SECTION_ASM_OP +#define DTORS_SECTION_ASM_OP ".section\t.dtors,\"aw\",\"progbits\"" + +/* A C statement (sans semicolon) to output an element in the table of + global constructors. */ +/* Must override this to get @fptr relocation. */ +#undef ASM_OUTPUT_CONSTRUCTOR +#define ASM_OUTPUT_CONSTRUCTOR(FILE,NAME) \ + do { \ + ctors_section (); \ + fputs ("\tdata8\t @fptr(", FILE); \ + assemble_name (FILE, NAME); \ + fputs (")\n", FILE); \ + } while (0) + +/* A C statement (sans semicolon) to output an element in the table of + global destructors. */ +/* Must override this to get @fptr relocation. */ +#undef ASM_OUTPUT_DESTRUCTOR +#define ASM_OUTPUT_DESTRUCTOR(FILE,NAME) \ + do { \ + dtors_section (); \ + fputs ("\tdata8\t @fptr(", FILE); \ + assemble_name (FILE, NAME); \ + fputs (")\n", FILE); \ + } while (0) + +/* svr4.h undefines this, so we need to define it here. */ +#define DBX_REGISTER_NUMBER(REGNO) \ + (IN_REGNO_P (REGNO) ? (32 + (REGNO) - IN_REG (0)) \ + : LOC_REGNO_P (REGNO) ? (32 + ia64_input_regs + \ + (REGNO) - LOC_REG (0)) \ + : OUT_REGNO_P (REGNO) ? (32 + ia64_input_regs + ia64_local_regs \ + + (REGNO) - OUT_REG (0)) \ + : (REGNO) == FRAME_POINTER_REGNUM ? ia64_fp_regno \ + : (REGNO)) + +/* Things that svr4.h defines to the wrong type, because it assumes 32 bit + ints and 32 bit longs. */ + +#undef SIZE_TYPE +#define SIZE_TYPE "long unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "long int" + +#undef WCHAR_TYPE +#define WCHAR_TYPE "int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +/* We don't want any symbol at the beginning of the file. This is defined in + dbxelf.h which is included from elfos.h, so we need to undef/define it + here. */ + +#undef ASM_IDENTIFY_GCC +#define ASM_IDENTIFY_GCC(FILE) + +/* We redefine this to use the ia64 .proc pseudo-op. */ + +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ +do { \ + fputs ("\t.proc ", FILE); \ + assemble_name (FILE, NAME); \ + fputc ('\n', FILE); \ + ASM_OUTPUT_LABEL (FILE, NAME); \ +} while (0) + +/* We redefine this to use the ia64 .endp pseudo-op. */ + +#undef ASM_DECLARE_FUNCTION_SIZE +#define ASM_DECLARE_FUNCTION_SIZE(FILE, NAME, DECL) \ +do { \ + fputs ("\t.endp ", FILE); \ + assemble_name (FILE, NAME); \ + fputc ('\n', FILE); \ +} while (0) + +/* A C expression which outputs to the stdio stream STREAM some appropriate + text to go at the start of an assembler file. */ + +/* ??? Looks like almost every port, except for a few original ones, get this + wrong. Must emit #NO_APP as first line of file to turn of special assembler + preprocessing of files. */ + +/* ??? Even worse, it doesn't work, because gas does not accept the tab chars + that dwarf2out.c emits when #NO_APP. */ + +/* ??? Unrelated, but dwarf2out.c emits unnecessary newlines after strings, + may as well fix at the same time. */ + +#if 0 +#undef ASM_FILE_START +#define ASM_FILE_START(STREAM) \ +do { \ + fputs (ASM_APP_OFF, STREAM); \ + output_file_directive (STREAM, main_input_filename); \ +} while (0) +#endif + +/* Case label alignment is handled by ADDR_VEC_ALIGN now. */ + +#undef ASM_OUTPUT_BEFORE_CASE_LABEL +#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE,PREFIX,NUM,TABLE) + +/* We override svr4.h so that we can support the sdata section. */ + +#undef SELECT_SECTION +#define SELECT_SECTION(DECL,RELOC) \ +{ \ + if (TREE_CODE (DECL) == STRING_CST) \ + { \ + if (! flag_writable_strings) \ + const_section (); \ + else \ + data_section (); \ + } \ + else if (TREE_CODE (DECL) == VAR_DECL) \ + { \ + if (XSTR (XEXP (DECL_RTL (DECL), 0), 0)[0] \ + == SDATA_NAME_FLAG_CHAR) \ + sdata_section (); \ + /* ??? We need the extra ! RELOC check, because the default is to \ + only check RELOC if flag_pic is set, and we don't set flag_pic \ + (yet?). */ \ + else if (DECL_READONLY_SECTION (DECL, RELOC) && ! (RELOC)) \ + const_section (); \ + else \ + data_section (); \ + } \ + else \ + const_section (); \ +} + +#undef EXTRA_SECTIONS +#define EXTRA_SECTIONS in_const, in_ctors, in_dtors, in_sdata, in_sbss + +#undef EXTRA_SECTION_FUNCTIONS +#define EXTRA_SECTION_FUNCTIONS \ + CONST_SECTION_FUNCTION \ + CTORS_SECTION_FUNCTION \ + DTORS_SECTION_FUNCTION \ + SDATA_SECTION_FUNCTION \ + SBSS_SECTION_FUNCTION + +#define SDATA_SECTION_ASM_OP ".sdata" + +#define SDATA_SECTION_FUNCTION \ +void \ +sdata_section () \ +{ \ + if (in_section != in_sdata) \ + { \ + fprintf (asm_out_file, "%s\n", SDATA_SECTION_ASM_OP); \ + in_section = in_sdata; \ + } \ +} + +#define SBSS_SECTION_ASM_OP ".sbss" + +#define SBSS_SECTION_FUNCTION \ +void \ +sbss_section () \ +{ \ + if (in_section != in_sbss) \ + { \ + fprintf (asm_out_file, "%s\n", SBSS_SECTION_ASM_OP); \ + in_section = in_sbss; \ + } \ +} diff --git a/gcc/config/ia64/t-ia64 b/gcc/config/ia64/t-ia64 new file mode 100644 index 0000000..bf14c0a --- /dev/null +++ b/gcc/config/ia64/t-ia64 @@ -0,0 +1,41 @@ +# Name of assembly file containing libgcc1 functions. +# This entry must be present, but it can be empty if the target does +# not need any assembler functions to support its code generation. +CROSS_LIBGCC1 = libgcc1-asm.a +LIBGCC1 = libgcc1-asm.a +LIB1ASMSRC = ia64/lib1funcs.asm + +# ??? We change the names of the DImode div/mod files so that they won't +# accidentally be overridden by libgcc2.c files. We used to use __ia64 as +# a prefix, now we use __ as the prefix. +LIB1ASMFUNCS = __divdf3 __divsf3 \ + __divdi3 __moddi3 __udivdi3 __umoddi3 \ + __divsi3 __modsi3 __udivsi3 __umodsi3 __save_stack_nonlocal \ + __nonlocal_goto + +# ??? Hack to get -P option used when compiling lib1funcs.asm, because Intel +# assembler does not accept # line number as a comment. +# ??? This breaks C++ pragma interface/implementation, which is used in the +# C++ part of libgcc2, hence it had to be disabled. Must find some other way +# to support the Intel assembler. +#LIBGCC2_DEBUG_CFLAGS = -g1 -P + +# For svr4 we build crtbegin.o and crtend.o which serve to add begin and +# end labels to the .ctors and .dtors section when we link using gcc. + +EXTRA_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o + +# Effectively disable the crtbegin/end rules using crtstuff.c +T = disable + +# Assemble startup files. +crtbegin.o: $(srcdir)/config/ia64/crtbegin.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -o crtbegin.o -x assembler-with-cpp $(srcdir)/config/ia64/crtbegin.asm +crtend.o: $(srcdir)/config/ia64/crtend.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) -c -o crtend.o -x assembler-with-cpp $(srcdir)/config/ia64/crtend.asm +crtbeginS.o: $(srcdir)/config/ia64/crtbegin.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) -DSHARED -c -o crtbeginS.o -x assembler-with-cpp $(srcdir)/config/ia64/crtbegin.asm +crtendS.o: $(srcdir)/config/ia64/crtend.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) -DSHARED -c -o crtendS.o -x assembler-with-cpp $(srcdir)/config/ia64/crtend.asm + +EXTRA_HEADERS = $(srcdir)/config/ia64/ia64intrin.h diff --git a/gcc/config/ia64/xm-ia64.h b/gcc/config/ia64/xm-ia64.h new file mode 100644 index 0000000..541d39a --- /dev/null +++ b/gcc/config/ia64/xm-ia64.h @@ -0,0 +1,61 @@ +/* Definitions of target machine for IA64. + Copyright (C) 1999 Cygnus Solutions. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* #defines that need visibility everywhere. */ +#define FALSE 0 +#define TRUE 1 + +/* A C expression for the status code to be returned when the compiler exits + after serious errors. */ +#define FATAL_EXIT_CODE 33 + +/* A C expression for the status code to be returned when the compiler exits + without serious errors. */ +#define SUCCESS_EXIT_CODE 0 + +/* Defined if the host machine stores words of multi-word values in big-endian + order. (GNU CC does not depend on the host byte ordering within a word.) */ +#ifdef __BIG_ENDIAN__ +#define HOST_WORDS_BIG_ENDIAN +#endif + +/* A C expression for the number of bits in `char' on the host machine. */ +#define HOST_BITS_PER_CHAR 8 + +/* A C expression for the number of bits in `short' on the host machine. */ +#define HOST_BITS_PER_SHORT 16 + +/* A C expression for the number of bits in `int' on the host machine. */ +#define HOST_BITS_PER_INT 32 + +/* ??? This depends on the as yet unimplemented ILP32 option. */ + +/* A C expression for the number of bits in `long' on the host machine. */ +#define HOST_BITS_PER_LONG 64 + +/* A C expression for the number of bits in `long long' on the host + machine. */ +#define HOST_BITS_PER_LONGLONG 64 + +/* target machine dependencies. + tm.h is a symbolic link to the actual target specific file. */ +#include "tm.h" + +/* end of xm-ia64.h */ diff --git a/gcc/configure b/gcc/configure index 3b0b235..cdc381e 100755 --- a/gcc/configure +++ b/gcc/configure @@ -4489,6 +4489,27 @@ for machine in $build $host $target; do i960-*-*) # Default i960 environment. use_collect2=yes ;; + ia64*-*-elf*) + tm_file=ia64/elf.h + tmake_file="ia64/t-ia64" + target_cpu_default="0" + if test x$gas = xyes + then + target_cpu_default="${target_cpu_default}|MASK_GNU_AS" + fi + if test x$gnu_ld = xyes + then + target_cpu_default="${target_cpu_default}|MASK_GNU_LD" + fi + ;; + ia64*-*-linux*) + tm_file=ia64/linux.h + tmake_file="t-linux ia64/t-ia64" + target_cpu_default="MASK_GNU_AS|MASK_GNU_LD" + if test x$enable_threads = xyes; then + thread_file='posix' + fi + ;; m32r-*-elf*) extra_parts="crtinit.o crtfini.o" ;; diff --git a/gcc/configure.in b/gcc/configure.in index f34cf6e..ae5132b 100644 --- a/gcc/configure.in +++ b/gcc/configure.in @@ -1859,6 +1859,27 @@ changequote([,])dnl i960-*-*) # Default i960 environment. use_collect2=yes ;; + ia64*-*-elf*) + tm_file=ia64/elf.h + tmake_file="ia64/t-ia64" + target_cpu_default="0" + if test x$gas = xyes + then + target_cpu_default="${target_cpu_default}|MASK_GNU_AS" + fi + if test x$gnu_ld = xyes + then + target_cpu_default="${target_cpu_default}|MASK_GNU_LD" + fi + ;; + ia64*-*-linux*) + tm_file=ia64/linux.h + tmake_file="t-linux ia64/t-ia64" + target_cpu_default="MASK_GNU_AS|MASK_GNU_LD" + if test x$enable_threads = xyes; then + thread_file='posix' + fi + ;; m32r-*-elf*) extra_parts="crtinit.o crtfini.o" ;; -- 2.7.4