From c65ebc55cab8f22f366038c7536b30c37d71837f Mon Sep 17 00:00:00 2001
From: Jim Wilson <wilson@cygnus.com>
Date: Thu, 9 Mar 2000 00:26:04 +0000
Subject: [PATCH] Add ia64 port.

From-SVN: r32438
---
 gcc/ChangeLog                 |    6 +
 gcc/config/ia64/build.hacks   |   97 ++
 gcc/config/ia64/crtbegin.asm  |  154 ++
 gcc/config/ia64/crtend.asm    |   91 ++
 gcc/config/ia64/elf.h         |   18 +
 gcc/config/ia64/ia64-protos.h |  101 ++
 gcc/config/ia64/ia64.c        | 3279 +++++++++++++++++++++++++++++++++++++
 gcc/config/ia64/ia64.h        | 2795 ++++++++++++++++++++++++++++++++
 gcc/config/ia64/ia64.md       | 3591 +++++++++++++++++++++++++++++++++++++++++
 gcc/config/ia64/ia64intrin.h  |   60 +
 gcc/config/ia64/lib1funcs.asm |  635 ++++++++
 gcc/config/ia64/linux.h       |   29 +
 gcc/config/ia64/sysv4.h       |  248 +++
 gcc/config/ia64/t-ia64        |   41 +
 gcc/config/ia64/xm-ia64.h     |   61 +
 gcc/configure                 |   21 +
 gcc/configure.in              |   21 +
 17 files changed, 11248 insertions(+)
 create mode 100644 gcc/config/ia64/build.hacks
 create mode 100644 gcc/config/ia64/crtbegin.asm
 create mode 100644 gcc/config/ia64/crtend.asm
 create mode 100644 gcc/config/ia64/elf.h
 create mode 100644 gcc/config/ia64/ia64-protos.h
 create mode 100644 gcc/config/ia64/ia64.c
 create mode 100644 gcc/config/ia64/ia64.h
 create mode 100644 gcc/config/ia64/ia64.md
 create mode 100644 gcc/config/ia64/ia64intrin.h
 create mode 100644 gcc/config/ia64/lib1funcs.asm
 create mode 100644 gcc/config/ia64/linux.h
 create mode 100644 gcc/config/ia64/sysv4.h
 create mode 100644 gcc/config/ia64/t-ia64
 create mode 100644 gcc/config/ia64/xm-ia64.h

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 89c56a5..5e2f966 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+Wed Mar  8 16:19:42 2000  Jim Wilson  <wilson@cygnus.com>
+
+	* configure.in (ia64*-*-elf*, ia64*-*-linux*): New.
+	* configure: Regenerate.
+	* config/ia64: New.
+
 2000-03-08  Zack Weinberg  <zack@wolery.cumb.org>
 
 	* Makefile.in (LIBCPP_DEPS): New macro.
diff --git a/gcc/config/ia64/build.hacks b/gcc/config/ia64/build.hacks
new file mode 100644
index 0000000..5da0d83
--- /dev/null
+++ b/gcc/config/ia64/build.hacks
@@ -0,0 +1,97 @@
+The gcse.c patch fixes an optimization problem.  This is probably not the right
+solution, but it was quick.  I will replace with a better solution later.
+
+The libio/libstdc++ patches are useful if you have a version of glibc without
+thread support.  There is no official ia64 glibc version yet, and some of the
+unofficial ones in common use are missing thread support.  libio/libstdc++
+assume that glibc always has thread support, so we need to patch them until
+the official ia64 glibc is available.
+
+Index: gcc/gcse.c
+===================================================================
+RCS file: /cvs/cvsfiles/devo/gcc/gcse.c,v
+retrieving revision 1.87
+diff -p -r1.87 gcse.c
+*** gcse.c	2000/01/11 14:59:28	1.87
+--- gcse.c	2000/02/16 04:17:06
+*************** try_replace_reg (from, to, insn)
+*** 4039,4045 ****
+       information.  */
+    if (!success && !note)
+      {
+!       if (!set)
+  	return 0;
+        note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
+  						   copy_rtx (SET_SRC (set)),
+--- 4039,4048 ----
+       information.  */
+    if (!success && !note)
+      {
+!       /* Don't add a REG_EQUAL note for a CCmode destination, because this
+! 	 confuses the code in cse.c that simplifies compare and branch
+! 	 instructions.  */
+!       if (!set || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) == MODE_CC)
+  	return 0;
+        note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
+  						   copy_rtx (SET_SRC (set)),
+Index: libio/configure.in
+===================================================================
+RCS file: /cvs/cvsfiles/devo/libio/configure.in,v
+retrieving revision 1.57
+diff -p -r1.57 configure.in
+*** configure.in	1999/10/26 03:42:26	1.57
+--- configure.in	2000/02/16 04:17:56
+*************** case "${target}" in
+*** 57,62 ****
+--- 57,64 ----
+  		 frags="linux.mt linuxaxp1.mt mtsafe.mt" ;;
+    *-linux-gnulibc1)
+  		 frags=linuxlibc1.mt ;;
++   # ??? glibc does not have thread support yet, so we can't use mtsafe.mt.
++   ia64*-linux-gnu) frags="linux.mt" ;;
+    *-linux-gnu)   frags="linux.mt mtsafe.mt" ;;
+    *-sco3.2v[45]*)   frags=sco4.mt ;;
+    *-isc*)        frags=isc.mt ;;
+Index: libstdc++/configure.in
+===================================================================
+RCS file: /cvs/cvsfiles/devo/libstdc++/configure.in,v
+retrieving revision 1.46
+diff -p -r1.46 configure.in
+*** configure.in	1999/09/21 19:26:16	1.46
+--- configure.in	2000/02/16 04:17:57
+*************** fi
+*** 89,94 ****
+--- 89,96 ----
+  case "${target}" in
+    alpha*-*-linux-gnulibc1)	frags="${frags} linux.mt" ;;
+    powerpc*-*-linux-gnulibc1)	frags="${frags} linux.mt" ;;
++   # ??? ia64 glibc port does not have thread support yet.
++   ia64*-*-linux-gnu)		;;
+    *-*-linux-gnu)		frags="${frags} linux.mt" ;;
+    *-*-openbsd*)		
+    	case "x${enable_threads}" in
+Index: libstdc++/stl/stl_config.h
+===================================================================
+RCS file: /cvs/cvsfiles/devo/libstdc++/stl/stl_config.h,v
+retrieving revision 1.17
+diff -p -r1.17 stl_config.h
+*** stl_config.h	1999/12/24 16:21:31	1.17
+--- stl_config.h	2000/02/16 04:17:58
+***************
+*** 168,174 ****
+  #   if defined(__linux__)
+       /* glibc pre 2.0 is very buggy. We have to disable thread for it.
+          It should be upgraded to glibc 2.0 or later. */
+! #    if !defined(_NOTHREADS) && __GLIBC__ >= 2 && defined(_G_USING_THUNKS)
+  #      define __STL_PTHREADS
+  #      ifdef __STRICT_ANSI__
+           /* Work around a bug in the glibc 2.0.x pthread.h.  */
+--- 168,175 ----
+  #   if defined(__linux__)
+       /* glibc pre 2.0 is very buggy. We have to disable thread for it.
+          It should be upgraded to glibc 2.0 or later. */
+!      /* ??? ia64 glibc port does not have thread support yet.  */
+! #    if !defined(_NOTHREADS) && __GLIBC__ >= 2 && defined(_G_USING_THUNKS) && !defined(__ia64__)
+  #      define __STL_PTHREADS
+  #      ifdef __STRICT_ANSI__
+           /* Work around a bug in the glibc 2.0.x pthread.h.  */
diff --git a/gcc/config/ia64/crtbegin.asm b/gcc/config/ia64/crtbegin.asm
new file mode 100644
index 0000000..b77ad98
--- /dev/null
+++ b/gcc/config/ia64/crtbegin.asm
@@ -0,0 +1,154 @@
+/* Copyright (C) 2000 Free Software Foundation, Inc.
+   Contributed by Jes Sorensen, <Jes.Sorensen@cern.ch>
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+.section .ctors,"aw","progbits"
+	.align	8
+__CTOR_LIST__:
+	data8	-1
+
+.section .dtors,"aw","progbits"
+	.align	8
+__DTOR_LIST__:
+	data8	-1
+
+.section .sdata
+	.type dtor_ptr#,@object
+	.size dtor_ptr#,8
+dtor_ptr:
+	data8	__DTOR_LIST__# + 8
+
+	.global __dso_handle#
+	.type __dso_handle#,@object
+	.size __dso_handle#,8
+#ifdef SHARED
+.section .data
+__dso_handle:
+	data8	__dso_handle#
+#else
+.section .bss
+__dso_handle:
+	data8	0
+#endif
+
+/*
+ * Fragment of the ELF _fini routine that invokes our dtor cleanup.
+ *
+ * The code going into .fini is spread all over the place, thus we need
+ * to save gp in order to make sure that other bits don't get into any
+ * nasty surprises by expecting a gp that has suddenly changed.
+ */
+.section .fini,"ax","progbits"
+	{ .mfb
+	  st8 [r12] = gp, -16
+	  br.call.sptk.many b0 = __do_global_dtors_aux#
+	  ;;
+	}
+	{ .mmi
+	  adds r12 = 16, r12
+	  ;;
+	  ld8 gp = [r12]
+	  ;;
+	}
+
+.text
+
+	.align	16
+	.proc	__do_global_dtors_aux#
+
+__do_global_dtors_aux:
+#ifndef SHARED
+	{ .mii
+	  alloc loc2 = ar.pfs, 0, 3, 0, 0
+	  addl loc0 = @gprel(dtor_ptr#), gp
+	  mov loc1 = b0
+	}
+#else
+	/*
+		if (__cxa_finalize)
+		  __cxa_finalize(__dso_handle)
+	*/
+	{ .mii
+	  alloc loc2 = ar.pfs, 1, 3, 0, 0
+	  addl loc0 = @gprel(dtor_ptr#), gp
+	  addl r16 = @ltoff(@fptr(__cxa_finalize#)), gp
+	  ;;
+	}
+	{ .mmi
+	  ld8 r16 = [r16]
+	  ;;
+	  addl r32 = @ltoff(__dso_handle#), gp
+	  cmp.ne p7, p0 = r0, r16
+	  ;;
+	}
+	{ .mmi
+	  ld8 r32 = [r32]
+(p7)	  ld8 r18 = [r16], 8
+	  mov loc1 = b0
+	  ;;
+	}
+	{ .mib
+(p7)	  ld8 gp = [r16]
+(p7)	  mov b6 = r18
+(p7)	  br.call.sptk.many b0 = b6
+	}
+#endif
+	/*
+		do {
+		  dtor_ptr++;
+		  (*(dtor_ptr-1)) ();
+		} while (dtor_ptr);
+	*/
+	{ .bbb
+	  br.sptk.few 1f
+	  ;;
+	}
+0:
+	{ .mmi
+	  st8 [loc0] = r15
+	  ld8 r17 = [r16], 8
+	  ;;
+	}
+	{ .mib
+	  ld8 gp = [r16]
+	  mov b6 = r17
+	  br.call.sptk.many b0 = b6
+	}
+1:
+	{ .mmi
+	  ld8 r15 = [loc0]
+	  ;;
+	  ld8 r16 = [r15], 8
+	  ;;
+	}
+	{ .mfb
+	  cmp.ne p6, p0 = r0, r16
+(p6)	  br.cond.sptk.few 0b
+	}
+	{ .mii
+	  mov b0 = loc1
+	  mov ar.pfs = loc2
+	}
+	{ .bbb
+	  br.ret.sptk.many b0
+	  ;;
+	}
+	.endp	__do_global_dtors_aux#
+
+#ifdef SHARED
+.weak __cxa_finalize#
+#endif
diff --git a/gcc/config/ia64/crtend.asm b/gcc/config/ia64/crtend.asm
new file mode 100644
index 0000000..3da0f96
--- /dev/null
+++ b/gcc/config/ia64/crtend.asm
@@ -0,0 +1,91 @@
+/* Copyright (C) 2000 Free Software Foundation, Inc.
+   Contributed by Jes Sorensen, <Jes.Sorensen@cern.ch>
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+.section .ctors,"aw","progbits"
+	.align	8
+__CTOR_END__:
+	data8	0
+
+.section .dtors,"aw","progbits"
+	.align 8
+__DTOR_END__:
+	data8	0
+
+/*
+ * Fragment of the ELF _init routine that invokes our dtor cleanup.
+ *
+ * The code going into .init is spread all over the place, thus we need
+ * to save gp in order to make sure that other bits don't get into any
+ * nasty surprises by expecting a gp that has suddenly changed.
+ */
+.section .init,"ax","progbits"
+	{ .mfb
+	  st8 [r12] = gp, -16
+	  br.call.sptk.many b0 = __do_global_ctors_aux
+	  ;;
+	}
+	{ .mmi
+	  adds r12 = 16, r12
+	  ;;
+	  ld8 gp = [r12]
+	  ;;
+	}
+
+.text
+	.align 16
+	.proc __do_global_ctors_aux#
+__do_global_ctors_aux:
+	/*
+		for (loc0 = __CTOR_END__-1; *p != -1; --p)
+		  (*p) ();
+	*/
+	{ .mii
+	  alloc loc2 = ar.pfs, 0, 4, 0, 0
+	  addl loc0 = @ltoff(__CTOR_END__# - 8), gp
+	  cmp.ne p6, p0 = r0, r0
+	  ;;
+	}
+	{ .mfi
+	  ld8 loc0 = [loc0]
+	  mov loc1 = b0
+	}
+0:
+	{ .mmi
+(p6)	  ld8 r15 = [loc3], 8
+	  ;;
+(p6)	  ld8 gp = [loc3]
+(p6)	  mov b6 = r15
+	}
+	{ .mfb
+	  ld8 loc3 = [loc0], -8
+(p6)	  br.call.sptk.many b0 = b6
+	  ;;
+	}
+	{ .mfb
+	  cmp.ne p6, p0 = -1, loc3
+(p6)	  br.cond.sptk.few 0b
+	}
+	{ .mii
+	  mov ar.pfs = loc2
+	  mov b0 = loc1
+	}
+	{ .bbb
+	  br.ret.sptk.many b0
+	  ;;
+	}
+	.endp __do_global_ctors_aux#
diff --git a/gcc/config/ia64/elf.h b/gcc/config/ia64/elf.h
new file mode 100644
index 0000000..b6c4198
--- /dev/null
+++ b/gcc/config/ia64/elf.h
@@ -0,0 +1,18 @@
+/* Definitions for embedded ia64-elf target.  */
+
+#include "ia64/ia64.h"
+#include "elfos.h"
+#include "sysv4.h"
+
+/* svr4.h links with crti.o/crtn.o, but elfos.h does not.  We override elfos.h
+   so that we can use the standard ELF Unix method.  */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+#undef	STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared: \
+			 %{!symbolic: \
+			  %{pg:gcrt0.o%s}%{!pg:%{p:mcrt0.o%s}%{!p:crt0.o%s}}}}\
+			crti.o%s crtbegin.o%s"
+
+/* End of elf.h */
diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h
new file mode 100644
index 0000000..c2d06a5
--- /dev/null
+++ b/gcc/config/ia64/ia64-protos.h
@@ -0,0 +1,101 @@
+/* Definitions of target machine for GNU compiler for IA-64.
+   Copyright (C) 1999 Cygnus Solutions.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+/* Variables defined in ia64.c.  */
+
+#ifdef RTX_CODE
+extern rtx ia64_compare_op0, ia64_compare_op1;
+#endif
+
+/* Functions defined in ia64.c */
+
+#ifdef RTX_CODE
+extern int call_operand PARAMS((rtx, enum machine_mode));
+extern int sdata_symbolic_operand PARAMS((rtx, enum machine_mode));
+extern int symbolic_operand PARAMS((rtx, enum machine_mode));
+extern int function_operand PARAMS((rtx, enum machine_mode));
+extern int setjmp_operand PARAMS((rtx, enum machine_mode));
+extern int move_operand PARAMS((rtx, enum machine_mode));
+extern int reg_or_0_operand PARAMS((rtx, enum machine_mode));
+extern int reg_or_6bit_operand PARAMS((rtx, enum machine_mode));
+extern int reg_or_8bit_operand PARAMS((rtx, enum machine_mode));
+extern int reg_or_8bit_adjusted_operand PARAMS((rtx, enum machine_mode));
+extern int reg_or_8bit_and_adjusted_operand PARAMS((rtx, enum machine_mode));
+extern int reg_or_14bit_operand PARAMS((rtx, enum machine_mode));
+extern int reg_or_22bit_operand PARAMS((rtx, enum machine_mode));
+extern int shift_count_operand PARAMS((rtx, enum machine_mode));
+extern int shift_32bit_count_operand PARAMS((rtx, enum machine_mode));
+extern int shladd_operand PARAMS((rtx, enum machine_mode));
+extern int reg_or_fp01_operand PARAMS((rtx, enum machine_mode));
+extern int normal_comparison_operator PARAMS((rtx, enum machine_mode));
+extern int adjusted_comparison_operator PARAMS((rtx, enum machine_mode));
+extern int call_multiple_values_operation PARAMS((rtx, enum machine_mode));
+#endif
+extern int ia64_rap_fp_offset PARAMS((void));
+extern unsigned int ia64_compute_frame_size PARAMS((int));
+extern void save_restore_insns PARAMS((int));
+extern void ia64_expand_prologue PARAMS((void));
+extern void ia64_expand_epilogue PARAMS((void));
+extern void ia64_function_prologue PARAMS((FILE *, int));
+extern void ia64_funtion_epilogue PARAMS((FILE *, int));
+extern int ia64_direct_return PARAMS((void));
+#ifdef TREE_CODE
+extern void ia64_setup_incoming_varargs PARAMS((CUMULATIVE_ARGS, int, tree,
+						int *, int));
+#ifdef RTX_CODE
+extern rtx ia64_function_arg PARAMS((CUMULATIVE_ARGS *, enum machine_mode,
+				     tree, int, int));
+extern void ia64_init_builtins PARAMS((void));
+extern rtx ia64_expand_builtin PARAMS((tree, rtx, rtx, enum machine_mode, int));
+#endif
+extern int ia64_function_arg_partial_nregs PARAMS((CUMULATIVE_ARGS *,
+						   enum machine_mode,
+						   tree, int));
+extern void ia64_function_arg_advance PARAMS((CUMULATIVE_ARGS *,
+					      enum machine_mode,
+					      tree, int));
+#ifdef RTX_CODE
+extern void ia64_va_start PARAMS((int, tree, rtx));
+extern rtx ia64_va_arg PARAMS((tree, tree));
+#endif
+extern int ia64_return_in_memory PARAMS((tree));
+#ifdef RTX_CODE
+extern rtx ia64_function_value PARAMS((tree, tree));
+#endif
+#endif
+#ifdef RTX_CODE
+extern void ia64_print_operand_address PARAMS((FILE *, rtx));
+extern void ia64_print_operand PARAMS((FILE *, rtx, int));
+extern enum reg_class ia64_secondary_reload_class PARAMS((enum reg_class,
+							  enum machine_mode,
+							  rtx));
+#endif
+#ifdef TREE_CODE
+extern void ia64_asm_output_external PARAMS((FILE *, tree, char *));
+#endif
+extern void ia64_override_options PARAMS((void));
+#ifdef RTX_CODE
+extern void ia64_reorg PARAMS((rtx));
+#endif
+extern int ia64_epilogue_uses PARAMS((int));
+#ifdef TREE_CODE
+extern int ia64_valid_type_attribute PARAMS((tree, tree, tree, tree));
+extern void ia64_encode_section_info PARAMS((tree));
+#endif
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
new file mode 100644
index 0000000..f7e9ebd
--- /dev/null
+++ b/gcc/config/ia64/ia64.c
@@ -0,0 +1,3279 @@
+/* Definitions of target machine for GNU compiler.
+   Copyright (C) 1999 Cygnus Solutions.
+   Contributed by James E. Wilson <wilson@cygnus.com> and
+   		  David Mosberger <davidm@hpl.hp.com>.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+#include <stdio.h>
+#include <ctype.h>
+#include "config.h"
+#include "rtl.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "real.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "expr.h"
+#include "obstack.h"
+#include "except.h"
+#include "function.h"
+#include "ggc.h"
+#include "basic-block.h"
+
+/* This is used for communication between ASM_OUTPUT_LABEL and
+   ASM_OUTPUT_LABELREF.  */
+int ia64_asm_output_label = 0;
+
+/* Define the information needed to generate branch and scc insns.  This is
+   stored from the compare operation.  */
+struct rtx_def * ia64_compare_op0;
+struct rtx_def * ia64_compare_op1;
+
+/* Register number where ar.pfs was saved in the prologue, or zero
+   if it was not saved.  */
+
+int ia64_arpfs_regno;
+
+/* Register number where rp was saved in the prologue, or zero if it was
+   not saved.  */
+
+int ia64_rp_regno;
+
+/* Register number where frame pointer was saved in the prologue, or zero
+   if it was not saved.  */
+
+int ia64_fp_regno;
+
+/* Number of input and local registers used.  This is needed for the .regstk
+   directive, and also for debugging info.  */
+
+int ia64_input_regs;
+int ia64_local_regs;
+
+/* If true, then we must emit a .regstk directive.  */
+
+int ia64_need_regstk;
+
+/* Register names for ia64_expand_prologue.  */
+char *ia64_reg_numbers[96] =
+{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
+  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
+  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
+  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
+  "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
+  "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
+  "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
+  "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
+  "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
+  "r104","r105","r106","r107","r108","r109","r110","r111",
+  "r112","r113","r114","r115","r116","r117","r118","r119",
+  "r120","r121","r122","r123","r124","r125","r126","r127"};
+
+/* ??? These strings could be shared with REGISTER_NAMES.  */
+char *ia64_input_reg_names[8] =
+{ "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
+
+/* ??? These strings could be shared with REGISTER_NAMES.  */
+char *ia64_local_reg_names[80] =
+{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
+  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
+  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
+  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
+  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
+  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
+  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
+  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
+  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
+  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
+
+/* ??? These strings could be shared with REGISTER_NAMES.  */
+char *ia64_output_reg_names[8] =
+{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
+
+/* String used with the -mfixed-range= option.  */
+const char *ia64_fixed_range_string;
+
+/* Variables which are this size or smaller are put in the sdata/sbss
+   sections.  */
+
+int ia64_section_threshold;
+
+/* Return 1 if OP is a valid operand for the MEM of a CALL insn.  */
+
+int
+call_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  if (mode != GET_MODE (op))
+    return 0;
+
+  return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
+	  || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
+}
+
+/* Return 1 if OP refers to a symbol in the sdata section.  */
+
+int
+sdata_symbolic_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+      return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
+
+    case CONST:
+      return (GET_CODE (XEXP (op, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+	      && XSTR (XEXP (XEXP (op, 0), 0), 0)[0] == SDATA_NAME_FLAG_CHAR);
+    default:
+      break;
+    }
+
+  return 0;
+}
+
+/* Return 1 if OP refers to a symbol.  */
+
+int
+symbolic_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  switch (GET_CODE (op))
+    {
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return 1;
+
+    default:
+      break;
+    }
+  return 0;
+}
+
+/* Return 1 if OP refers to a function.  */
+
+int
+function_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
+    return 1;
+  else
+    return 0;
+}
+
+/* Return 1 if OP is setjmp or a similar function.  */
+
+/* ??? This is an unsatisfying solution.  Should rethink.  */
+
+int
+setjmp_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  char *name;
+  int retval = 0;
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return 0;
+
+  name = XSTR (op, 0);
+
+  /* The following code is borrowed from special_function_p in calls.c.  */
+
+  /* Disregard prefix _, __ or __x.  */
+  if (name[0] == '_')
+    {
+      if (name[1] == '_' && name[2] == 'x')
+	name += 3;
+      else if (name[1] == '_')
+	name += 2;
+      else
+	name += 1;
+    }
+
+  if (name[0] == 's')
+    {
+      retval
+	= ((name[1] == 'e'
+	    && (! strcmp (name, "setjmp")
+		|| ! strcmp (name, "setjmp_syscall")))
+	   || (name[1] == 'i'
+	       && ! strcmp (name, "sigsetjmp"))
+	   || (name[1] == 'a'
+	       && ! strcmp (name, "savectx")));
+    }
+  else if ((name[0] == 'q' && name[1] == 's'
+	    && ! strcmp (name, "qsetjmp"))
+	   || (name[0] == 'v' && name[1] == 'f'
+	       && ! strcmp (name, "vfork")))
+    retval = 1;
+
+  return retval;
+}
+
+/* Return 1 if OP is a general operand, but when pic exclude symbolic
+   operands.  */
+
+/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
+   from PREDICATE_CODES.  */
+
+int
+move_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  if (! TARGET_NO_PIC && symbolic_operand (op, mode))
+    return 0;
+
+  return general_operand (op, mode);
+}
+
+/* Return 1 if OP is a register operand, or zero.  */
+
+int
+reg_or_0_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  return (op == const0_rtx || register_operand (op, mode));
+}
+
+/* Return 1 if OP is a register operand, or a 6 bit immediate operand.  */
+
+int
+reg_or_6bit_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
+	  || GET_CODE (op) == CONSTANT_P_RTX
+	  || register_operand (op, mode));
+}
+
+/* Return 1 if OP is a register operand, or an 8 bit immediate operand.  */
+
+int
+reg_or_8bit_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
+	  || GET_CODE (op) == CONSTANT_P_RTX
+	  || register_operand (op, mode));
+}
+
+/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
+   operand.  */
+
+int
+reg_or_8bit_adjusted_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
+	  || GET_CODE (op) == CONSTANT_P_RTX
+	  || register_operand (op, mode));
+}
+
+/* Return 1 if OP is a register operand, or is valid for both an 8 bit
+   immediate and an 8 bit adjusted immediate operand.  This is necessary
+   because when we emit a compare, we don't know what the condition will be,
+   so we need the union of the immediates accepted by GT and LT.  */
+
+int
+reg_or_8bit_and_adjusted_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
+	   && CONST_OK_FOR_L (INTVAL (op)))
+	  || GET_CODE (op) == CONSTANT_P_RTX
+	  || register_operand (op, mode));
+}
+
+/* Return 1 if OP is a register operand, or a 14 bit immediate operand.  */
+
+int
+reg_or_14bit_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
+	  || GET_CODE (op) == CONSTANT_P_RTX
+	  || register_operand (op, mode));
+}
+
+/* Return 1 if OP is a register operand, or a 22 bit immediate operand.  */
+
+int
+reg_or_22bit_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
+	  || GET_CODE (op) == CONSTANT_P_RTX
+	  || register_operand (op, mode));
+}
+
+/* Return 1 if OP is a 6 bit immediate operand.  */
+
+int
+shift_count_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
+	  || GET_CODE (op) == CONSTANT_P_RTX);
+}
+
+/* Return 1 if OP is a 5 bit immediate operand.  */
+
+int
+shift_32bit_count_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  return ((GET_CODE (op) == CONST_INT
+	   && (INTVAL (op) >= 0 && INTVAL (op) < 32))
+	  || GET_CODE (op) == CONSTANT_P_RTX);
+}
+
+/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand.  */
+
+int
+shladd_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  return (GET_CODE (op) == CONST_INT
+	  && (INTVAL (op) == 2 || INTVAL (op) == 4
+	      || INTVAL (op) == 8 || INTVAL (op) == 16));
+}
+
+/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
+
+int
+fetchadd_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  return (GET_CODE (op) == CONST_INT
+          && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
+              INTVAL (op) == -4  || INTVAL (op) == -1 ||
+              INTVAL (op) == 1   || INTVAL (op) == 4  ||
+              INTVAL (op) == 8   || INTVAL (op) == 16));
+}
+
+/* Return 1 if OP is a floating-point constant zero, one, or a register.  */
+
+int
+reg_or_fp01_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
+	  || GET_CODE (op) == CONSTANT_P_RTX
+	  || register_operand (op, mode));
+}
+
+/* Return 1 if this is a comparison operator, which accepts an normal 8-bit
+   signed immediate operand.  */
+
+int
+normal_comparison_operator (op, mode)
+    register rtx op;
+    enum machine_mode mode;
+{
+  enum rtx_code code = GET_CODE (op);
+  return ((mode == VOIDmode || GET_MODE (op) == mode)
+	  && (code == EQ || code == NE 
+	      || code == GT || code == LE || code == GTU || code == LEU));
+}
+
+/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
+   signed immediate operand.  */
+
+int
+adjusted_comparison_operator (op, mode)
+    register rtx op;
+    enum machine_mode mode;
+{
+  enum rtx_code code = GET_CODE (op);
+  return ((mode == VOIDmode || GET_MODE (op) == mode)
+	  && (code == LT || code == GE || code == LTU || code == GEU));
+}
+
+/* Return 1 if OP is a call returning an HFA.  It is known to be a PARALLEL
+   and the first section has already been tested.  */
+
+int
+call_multiple_values_operation (op, mode)
+     rtx op;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+  int count = XVECLEN (op, 0) - 2;
+  int i;
+  int dest_regno;
+
+  /* Perform a quick check so we don't block up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != CALL)
+    return 0;
+
+  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0)));
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i + 2);
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_SRC (elt)) != CALL
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || REGNO (SET_DEST (elt)) != dest_regno + i)
+	return 0;
+    }
+
+  return 1;
+}
+
+  
+/* Structure to be filled in by ia64_compute_frame_size with register
+   save masks and offsets for the current function.  */
+
+struct ia64_frame_info
+{
+  long total_size;		/* # bytes that the entire frame takes up.  */
+  long var_size;		/* # bytes that variables take up.  */
+  long args_size;		/* # bytes that outgoing arguments take up.  */
+  long pretend_size;		/* # bytes that stdarg arguments take up.  */
+  long pretend_pad_size;	/* # bytes padding to align stdarg args.  */
+  long extra_size;		/* # bytes of extra gunk.  */
+  long gr_size;			/* # bytes needed to store general regs.  */
+  long fr_size;			/* # bytes needed to store FP regs.  */
+  long fr_pad_size;		/* # bytes needed to align FP save area.  */
+  long pr_size;			/* # bytes needed to store predicate regs.  */
+  long br_size;			/* # bytes needed to store branch regs.  */
+  HARD_REG_SET mask;		/* mask of saved registers.  */
+  int initialized;		/* != 0 is frame size already calculated.  */
+};
+
+/* Current frame information calculated by compute_frame_size.  */
+struct ia64_frame_info current_frame_info;
+
+/* Helper function for INITIAL_ELIMINATION_OFFSET.  Return the offset from the
+   frame pointer where b0 is saved.  */
+
+int
+ia64_rap_fp_offset ()
+{
+  return - current_frame_info.br_size;
+}
+
+/* Returns the number of bytes offset between the frame pointer and the stack
+   pointer for the current function.  SIZE is the number of bytes of space
+   needed for local variables.  */
+unsigned int
+ia64_compute_frame_size (size)
+     int size;
+{
+  int total_size;
+  int extra_size;
+  int gr_size = 0;
+  int fr_size = 0;
+  int fr_pad_size = 0;
+  int pr_size = 0;
+  int br_size = 0;
+  int pretend_pad_size = 0;
+  int tmp;
+  int regno;
+  HARD_REG_SET mask;
+
+  CLEAR_HARD_REG_SET (mask);
+
+  /* Calculate space needed for general registers.  */
+  /* We never need to save any of the stacked registers, which are regs
+     32 to 127.  */
+  for (regno = GR_REG (0); regno <= GR_REG (31); regno++)
+    if (regs_ever_live[regno] && ! call_used_regs[regno])
+      {
+	SET_HARD_REG_BIT (mask, regno);
+	gr_size += 8;
+      }
+
+  /* Allocate space to save/restore the unat from.  */
+  if (gr_size != 0
+      || current_function_varargs || current_function_stdarg)
+    gr_size += 8;
+
+  /* Calculate space needed for FP registers.  */
+  for (regno = FR_REG (0); regno <= FR_REG (127); regno++)
+    if (regs_ever_live[regno] && ! call_used_regs[regno])
+      {
+	SET_HARD_REG_BIT (mask, regno);
+	fr_size += 16;
+      }
+
+  /* Calculate space needed for predicate registers.  */
+  for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
+    if (regs_ever_live[regno] && ! call_used_regs[regno])
+      {
+	SET_HARD_REG_BIT (mask, regno);
+	pr_size = 8;
+      }
+
+  /* Calculate space needed for branch registers.  */
+  for (regno = BR_REG (0); regno <= BR_REG (7); regno++)
+    if (regs_ever_live[regno] && ! call_used_regs[regno])
+      {
+	SET_HARD_REG_BIT (mask, regno);
+	br_size += 8;
+      }
+
+  /* The FR save area needs to be 16-byte aligned.  */
+  if (fr_size)
+    {
+      tmp = (size + fr_size + pr_size + br_size);
+      fr_pad_size = IA64_STACK_ALIGN (tmp) - tmp;
+    }
+  else
+    fr_pad_size = 0;
+
+  /* If we have an odd number of words of pretend arguments written to the
+     stack, then the FR save area will be unaligned.  We pad below this area
+     to keep things 16 byte aligned.  This needs to be kept distinct, to
+     avoid confusing it with padding added below the GR save area, which does
+     not affect the FR area alignment.  */
+  pretend_pad_size = current_function_pretend_args_size % 16;
+
+  /* The 16 bytes is for the scratch area.  */
+  tmp = (size + gr_size + fr_pad_size + fr_size + pr_size + br_size
+	 + current_function_outgoing_args_size + 16);
+  tmp += (current_function_pretend_args_size
+	  ? current_function_pretend_args_size - 16
+	  : 0) + pretend_pad_size;
+  total_size = IA64_STACK_ALIGN (tmp);
+  extra_size = total_size - tmp + 16;
+
+  /* If this is a leaf routine (BR_REG (0) is not live), and if there is no
+     stack space needed for register saves, then don't allocate the 16 byte
+     scratch area.  */
+  if (total_size == 16 && ! regs_ever_live[BR_REG (0)])
+    {
+      total_size = 0;
+      extra_size = 0;
+    }
+
+  current_frame_info.total_size = total_size;
+  current_frame_info.var_size = size;
+  current_frame_info.args_size = current_function_outgoing_args_size;
+  current_frame_info.pretend_size
+    = (current_function_pretend_args_size
+       ? current_function_pretend_args_size - 16
+       : 0);
+  current_frame_info.pretend_pad_size = pretend_pad_size;
+  current_frame_info.extra_size = extra_size;
+  current_frame_info.gr_size = gr_size;
+  current_frame_info.fr_size = fr_size;
+  current_frame_info.fr_pad_size = fr_pad_size;
+  current_frame_info.pr_size = pr_size;
+  current_frame_info.br_size = br_size;
+  COPY_HARD_REG_SET (current_frame_info.mask, mask);
+  current_frame_info.initialized = reload_completed;
+
+  return total_size;
+}
+
+void
+save_restore_insns (save_p)
+     int save_p;
+{
+  rtx insn;
+
+  if (current_frame_info.gr_size + current_frame_info.fr_size
+      + current_frame_info.br_size + current_frame_info.pr_size)
+    {
+      rtx tmp_reg = gen_rtx_REG (DImode, GR_REG (2));
+      rtx tmp_post_inc = gen_rtx_POST_INC (DImode, tmp_reg);
+      rtx tmp2_reg = gen_rtx_REG (DImode, GR_REG (3));
+      int offset = (current_frame_info.total_size
+		    - (current_frame_info.gr_size + current_frame_info.fr_size
+		       + current_frame_info.fr_pad_size
+		       + current_frame_info.br_size
+		       + current_frame_info.pr_size
+		       + current_frame_info.var_size
+		       + current_frame_info.pretend_size
+		       + current_frame_info.pretend_pad_size));
+      rtx offset_rtx;
+      int regno;
+	
+      /* If there is a frame pointer, then we use it instead of the stack
+	 pointer, so that the stack pointer does not need to be valid when
+	 the epilogue starts.  See EXIT_IGNORE_STACK.  */
+      if (frame_pointer_needed)
+	offset = offset - current_frame_info.total_size;
+
+      if (CONST_OK_FOR_I (offset))
+	offset_rtx = GEN_INT (offset);
+      else
+	{
+	  offset_rtx = tmp_reg;
+	  insn = emit_insn (gen_movdi (tmp_reg, GEN_INT (offset)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      insn = emit_insn (gen_adddi3 (tmp_reg,
+				    (frame_pointer_needed ? frame_pointer_rtx
+				     : stack_pointer_rtx),
+				    offset_rtx));
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* Must save/restore ar.unat if any GR is spilled/restored.  */
+      if (current_frame_info.gr_size != 0
+	  || current_function_varargs || current_function_stdarg)
+	{
+	  rtx mem = gen_rtx_MEM (DImode, tmp_post_inc);
+	  if (save_p)
+	    {
+	      insn = emit_insn (gen_unat_spill (tmp2_reg));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	      insn = emit_insn (gen_movdi (mem, tmp2_reg));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	  else
+	    {
+	      insn = emit_insn (gen_movdi (tmp2_reg, mem));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	      /* The restore happens after the last ld8.fill instruction.  */
+	    }
+	}
+
+      for (regno = GR_REG (0); regno <= GR_REG (127); regno++)
+	if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+	  {
+	    rtx mem = gen_rtx_MEM (DImode, tmp_post_inc);
+	    if (save_p)
+	      insn = emit_insn (gen_gr_spill (mem,
+					      gen_rtx_REG (DImode, regno)));
+	    else
+	      insn = emit_insn (gen_gr_restore (gen_rtx_REG (DImode, regno),
+						mem));
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	  }
+
+      /* Now restore the unat register if necessary.  */
+      if ((current_frame_info.gr_size != 0
+	   || current_function_varargs || current_function_stdarg)
+	  && ! save_p)
+	emit_insn (gen_unat_restore (tmp2_reg));
+
+      for (regno = FR_REG (0); regno <= FR_REG (127); regno++)
+	if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+	  {
+	    rtx mem = gen_rtx_MEM (XFmode, tmp_post_inc);
+	    if (save_p)
+	      insn = emit_insn (gen_fr_spill (mem,
+					      gen_rtx_REG (XFmode, regno)));
+	    else
+	      insn = emit_insn (gen_fr_restore (gen_rtx_REG (XFmode, regno),
+						mem));
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	  }
+
+      /* If one is used, we save/restore all of them.  */
+      for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
+	if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+	  {
+	    rtx mem = gen_rtx_MEM (DImode, tmp_post_inc);
+	    if (save_p)
+	      {
+		insn = emit_insn (gen_pr_spill (tmp2_reg));
+		RTX_FRAME_RELATED_P (insn) = 1;
+		insn = emit_insn (gen_movdi (mem, tmp2_reg));
+		RTX_FRAME_RELATED_P (insn) = 1;
+	      }
+	    else
+	      {
+		insn = emit_insn (gen_movdi (tmp2_reg, mem));
+		RTX_FRAME_RELATED_P (insn) = 1;
+		insn = emit_insn (gen_pr_restore (tmp2_reg));
+		RTX_FRAME_RELATED_P (insn) = 1;
+	      }
+	    break;
+	  }
+					     
+      for (regno = BR_REG (0); regno <= BR_REG (7); regno++)
+	if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+	  {
+	    rtx src, dest;
+
+	    if (save_p)
+	      {
+		src = gen_rtx_REG (DImode, regno);
+		dest = gen_rtx_MEM (DImode, tmp_post_inc);
+	      }
+	    else
+	      {
+		src = gen_rtx_MEM (DImode, tmp_post_inc);
+		dest = gen_rtx_REG (DImode, regno);
+	      }
+
+	    insn = emit_insn (gen_movdi (tmp2_reg, src));
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    insn = emit_insn (gen_movdi (dest, tmp2_reg));
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	  }
+    }
+}
+
+
+/* Called after register allocation to add any instructions needed for the
+   prologue.  Using a prologue insn is favored compared to putting all of the
+   instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
+   to intermix instructions with the saves of the caller saved registers.  In
+   some cases, it might be necessary to emit a barrier instruction as the last
+   insn to prevent such scheduling.
+
+   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
+   so that the debug info generation code can handle them properly.  */
+
+/* ??? Get inefficient code when the frame size is larger than can fit in an
+   adds instruction.  */
+
+/* ??? Add support for allocating temporaries from the output registers if
+   they do not need to live past call instructions.  */
+
+/* ??? If the function does not return, then we don't need to save the rp
+   and ar.pfs registers.  */
+
+/* ??? If this is a leaf function, then fp/rp/ar.pfs should be put in the
+   low 32 regs.  */
+
+/* ??? Should not reserve a local register for rp/ar.pfs.  Should
+   instead check to see if any local registers are unused, and if so,
+   allocate them to rp/ar.pfs in that order.  Not sure what to do about
+   fp, we may still need to reserve a local register for it.  */
+
+void
+ia64_expand_prologue ()
+{
+  rtx insn, offset;
+  int i, locals, inputs, outputs, rotates;
+  int frame_size = ia64_compute_frame_size (get_frame_size ());
+  int leaf_function;
+  int epilogue_p;
+  edge e;
+
+  /* ??? This seems like a leaf_function_p bug.  It calls get_insns which
+     returns the first insn of the current sequence, not the first insn
+     of the function.  We work around this by pushing to the topmost
+     sequence first.  */
+  push_topmost_sequence ();
+  leaf_function = leaf_function_p ();
+  pop_topmost_sequence ();
+
+  /* ??? If there is no epilogue, then we don't need some prologue insns.  We
+     need to avoid emitting the dead prologue insns, because flow will complain
+     about them.  */
+  if (optimize)
+    {
+      for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
+	if ((e->flags & EDGE_FAKE) == 0
+	    && (e->flags & EDGE_FALLTHRU) != 0)
+	  break;
+      epilogue_p = (e != NULL);
+    }
+  else
+    epilogue_p = 1;
+
+  /* Find the highest local register used.  */
+  /* We have only 80 local registers, because we reserve 8 for the inputs
+     and 8 for the outputs.  */
+
+  for (i = LOC_REG (79); i >= LOC_REG (0); i--)
+    if (regs_ever_live[i])
+      break;
+  locals = i - LOC_REG (0) + 1;
+
+  /* Likewise for inputs.  */
+
+  for (i = IN_REG (7); i >= IN_REG (0); i--)
+    if (regs_ever_live[i])
+      break;
+  inputs = i - IN_REG (0) + 1;
+
+#if 0
+  /* If the function was declared with syscall_linkage, then we may need to
+     preserve all declared input registers, even if they weren't used.
+     Currently, syscall_linkage does not have this effect.  */
+
+  if (lookup_attribute ("syscall_linkage",
+			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
+    inputs = MAX (inputs, current_function_args_info.words);
+#endif
+
+  /* Likewise for outputs.  */
+
+  for (i = OUT_REG (7); i >= OUT_REG (0); i--)
+    if (regs_ever_live[i])
+      break;
+  outputs = i - OUT_REG (0) + 1;
+
+  /* When -p profiling, we need one output register for the mcount argument.
+     Likwise for -a profiling for the bb_init_func argument.  For -ax
+     profiling, we need two output registers for the two bb_init_trace_func
+     arguments.  */
+  if (profile_flag || profile_block_flag == 1)
+    outputs = MAX (outputs, 1);
+  else if (profile_block_flag == 2)
+    outputs = MAX (outputs, 2);
+
+  /* Leaf functions should not use any output registers.  */
+  if (leaf_function && outputs != 0)
+    abort ();
+
+  /* No rotating register support as yet.  */
+
+  rotates = 0;
+
+  /* Allocate two extra locals for saving/restoring rp and ar.pfs.  Also
+     allocate one local for use as the frame pointer if frame_pointer_needed
+     is true.  */
+  locals += 2 + frame_pointer_needed;
+
+  /* Save these values in global registers for debugging info.  */
+  ia64_input_regs = inputs;
+  ia64_local_regs = locals;
+
+  /* Set the local, input, and output register names.  We need to do this
+     for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
+     half.  If we use in/loc/out register names, then we get assembler errors
+     in crtn.S because there is no alloc insn or regstk directive in there.
+     We give in/loc/out names to unused registers, to make invalid uses of
+     them easy to spot.  */
+  if (! TARGET_REG_NAMES)
+    {
+      for (i = 0; i < 8; i++)
+	{
+	  if (i < inputs)
+	    reg_names[IN_REG (i)] = ia64_reg_numbers[i];
+	  else
+	    reg_names[IN_REG (i)] = ia64_input_reg_names[i];
+	}
+      for (i = 0; i < 80; i++)
+	{
+	  if (i < locals)
+	    reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
+	  else
+	    reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
+	}
+      for (i = 0; i < 8; i++)
+	{
+	  if (i < outputs)
+	    reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
+	  else
+	    reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
+	}
+    }
+
+  /* Set the frame pointer register name now that it is known, and the
+     local register names are known.  */
+  if (frame_pointer_needed)
+    {
+      reg_names[FRAME_POINTER_REGNUM] 
+	= reg_names[LOC_REG (locals - 3)];
+      ia64_fp_regno = LOC_REG (inputs + locals - 3);
+    }
+  else
+    ia64_fp_regno = 0;
+
+  /* We don't need an alloc instruction if this is a leaf function, and the
+     locals and outputs are both zero sized.  Since we have already allocated
+     two locals for rp and ar.pfs, we check for two locals.  */
+  if (locals == 2 && outputs == 0 && leaf_function)
+    {
+      /* If there is no alloc, but there are input registers used, then we
+	 need a .regstk directive.  */
+      if (TARGET_REG_NAMES)
+	ia64_need_regstk = 1;
+      else
+	ia64_need_regstk = 0;
+
+      ia64_arpfs_regno = 0;
+      ia64_rp_regno = 0;
+    }
+  else
+    {
+      ia64_need_regstk = 0;
+
+      ia64_arpfs_regno = LOC_REG (locals - 1);
+      ia64_rp_regno = LOC_REG (locals - 2);
+      reg_names[RETURN_ADDRESS_REGNUM] = reg_names[ia64_rp_regno];
+
+      emit_insn (gen_alloc (gen_rtx_REG (DImode, ia64_arpfs_regno),
+			    GEN_INT (inputs), GEN_INT (locals),
+			    GEN_INT (outputs), GEN_INT (rotates)));
+
+      /* ??? FIXME ??? We don't need to save BR_REG (0) if this is a leaf
+	 function.  We also don't need to allocate a local reg for it then.  */
+      /* ??? Likewise if there is no epilogue.  */
+      if (epilogue_p)
+	emit_move_insn (gen_rtx_REG (DImode, ia64_rp_regno),
+			gen_rtx_REG (DImode, BR_REG (0)));
+    }
+
+  /* Set up frame pointer and stack pointer.  */
+  if (frame_pointer_needed)
+    {
+      insn = emit_insn (gen_movdi (hard_frame_pointer_rtx, stack_pointer_rtx));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  if (frame_size != 0)
+    {
+      if (CONST_OK_FOR_I (-frame_size))
+	offset = GEN_INT (-frame_size);
+      else
+	{
+	  offset = gen_rtx_REG (DImode, GR_REG (2));
+	  insn = emit_insn (gen_movdi (offset, GEN_INT (-frame_size)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      /* If there is a frame pointer, then we need to make the stack pointer
+	 decrement depend on the frame pointer, so that the stack pointer
+	 update won't be moved past fp-relative stores to the frame.  */
+      if (frame_pointer_needed)
+	insn = emit_insn (gen_prologue_allocate_stack (stack_pointer_rtx,
+						       stack_pointer_rtx,
+						       offset,
+						       hard_frame_pointer_rtx));
+      else
+	insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
+				      offset));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Save registers to frame.  */
+  save_restore_insns (1);
+}
+
+/* Called after register allocation to add any instructions needed for the
+   epilogue.  Using a epilogue insn is favored compared to putting all of the
+   instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
+   to intermix instructions with the saves of the caller saved registers.  In
+   some cases, it might be necessary to emit a barrier instruction as the last
+   insn to prevent such scheduling.  */
+
+void
+ia64_expand_epilogue ()
+{
+  /* Restore registers from frame.  */
+  save_restore_insns (0);
+
+  /* ??? The gen_epilogue_deallocate_stack call below does not work.  This
+     is mainly because there is no fp+offset addressing mode, so most loads
+     from the frame do not actually use the frame pointer; they use a pseudo
+     computed from the frame pointer.  The same problem exists with the
+     stack pointer when there is no frame pointer.  I think this can be
+     fixed only by making the dependency analysis code in sched smarter, so
+     that it recognizes references to the frame, and makes succeeding stack
+     pointer updates anti-dependent on them.  */
+  emit_insn (gen_blockage ());
+
+  if (frame_pointer_needed)
+    {
+      /* If there is a frame pointer, then we need to make the stack pointer
+	 restore depend on the frame pointer, so that the stack pointer
+	 restore won't be moved up past fp-relative loads from the frame.  */
+      emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx,
+						hard_frame_pointer_rtx));
+    }
+  else
+    {
+      int frame_size = current_frame_info.total_size;
+      rtx offset;
+
+      if (frame_size != 0)
+	{
+	  if (CONST_OK_FOR_I (frame_size))
+	    offset = GEN_INT (frame_size);
+	  else
+	    {
+	      offset = gen_rtx_REG (DImode, GR_REG (2));
+	      emit_insn (gen_movdi (offset, GEN_INT (frame_size)));
+	    }
+	  emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
+				 offset));
+	}
+    }
+
+  if (ia64_arpfs_regno)
+    emit_insn (gen_pfs_restore (gen_rtx_REG (DImode, ia64_arpfs_regno)));
+
+  if (ia64_rp_regno)
+    emit_move_insn (gen_rtx_REG (DImode, BR_REG (0)),
+		    gen_rtx_REG (DImode, ia64_rp_regno));
+
+  emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
+}
+
+/* Emit the function prologue.  */
+
+void
+ia64_function_prologue (file, size)
+     FILE *file;
+     int size;
+{
+  if (ia64_need_regstk)
+    fprintf (file, "\t.regstk %d, 0, 0, 0\n", ia64_input_regs);
+
+  /* ??? Emit .body directive.  GNU as ignores .body currently.  */
+}
+
+/* Emit the function epilogue.  */
+
+void
+ia64_function_epilogue (file, size)
+     FILE *file;
+     int size;
+{
+}
+
+/* Return 1 if br.ret can do all the work required to return from a
+   function.  */
+
+int
+ia64_direct_return ()
+{
+  return (reload_completed && ! frame_pointer_needed
+	  && ia64_compute_frame_size (get_frame_size ()));
+}
+
+
+/* Do any needed setup for a variadic function.  CUM has not been updated
+   for the last named argument which has type TYPE and mode MODE.  */
+void
+ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
+     CUMULATIVE_ARGS cum;
+     int             int_mode;
+     tree            type;
+     int *           pretend_size;
+     int	     second_time;
+{
+  /* If this is a stdarg function, then don't save the current argument.  */
+  int offset = ! current_function_varargs;
+
+  if (cum.words < MAX_ARGUMENT_SLOTS)
+    {
+      if (! second_time)
+	{
+	  int i;
+	  int first_reg = GR_ARG_FIRST + cum.words + offset;
+	  rtx tmp_reg = gen_rtx_REG (DImode, GR_REG (16));
+	  rtx tmp_post_inc = gen_rtx_POST_INC (DImode, tmp_reg);
+	  rtx mem = gen_rtx_MEM (DImode, tmp_post_inc);
+	  rtx insn;
+
+	  /* We must emit st8.spill insns instead of st8 because we might
+	     be saving non-argument registers, and non-argument registers might
+	     not contain valid values.  */
+	  emit_move_insn (tmp_reg, virtual_incoming_args_rtx);
+	  for (i = first_reg; i < GR_ARG_FIRST + 8; i++)
+	    {
+	      insn = emit_insn (gen_gr_spill (mem, gen_rtx_REG (DImode, i)));
+	      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC, tmp_reg, 0);
+	    }
+	}
+      *pretend_size = ((MAX_ARGUMENT_SLOTS - cum.words - offset)
+		       * UNITS_PER_WORD);
+    }
+}
+
+/* Check whether TYPE is a homogeneous floating point aggregate.  If
+   it is, return the mode of the floating point type that appears
+   in all leafs.  If it is not, return VOIDmode.
+
+   An aggregate is a homogeneous floating point aggregate is if all
+   fields/elements in it have the same floating point type (e.g,
+   SFmode).  128-bit quad-precision floats are excluded.  */
+
+static enum machine_mode
+hfa_element_mode (type, nested)
+     tree type;
+     int nested;
+{
+  enum machine_mode element_mode = VOIDmode;
+  enum machine_mode mode;
+  enum tree_code code = TREE_CODE (type);
+  int know_element_mode = 0;
+  tree t;
+
+  switch (code)
+    {
+    case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
+    case BOOLEAN_TYPE:	case CHAR_TYPE:		case POINTER_TYPE:
+    case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
+    case FILE_TYPE:	case SET_TYPE:		case LANG_TYPE:
+    case FUNCTION_TYPE:
+      return VOIDmode;
+
+      /* Fortran complex types are supposed to be HFAs, so we need to handle
+	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
+	 types though.  */
+    case COMPLEX_TYPE:
+      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
+	return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
+			      * BITS_PER_UNIT, MODE_FLOAT, 0);
+      else
+	return VOIDmode;
+
+    case REAL_TYPE:
+      /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
+	 mode if this is contained within an aggregate.  */
+      if (nested)
+	return TYPE_MODE (type);
+      else
+	return VOIDmode;
+
+    case ARRAY_TYPE:
+      return TYPE_MODE (TREE_TYPE (type));
+
+    case RECORD_TYPE:
+    case UNION_TYPE:
+    case QUAL_UNION_TYPE:
+      for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
+	{
+	  if (TREE_CODE (t) != FIELD_DECL)
+	    continue;
+
+	  mode = hfa_element_mode (TREE_TYPE (t), 1);
+	  if (know_element_mode)
+	    {
+	      if (mode != element_mode)
+		return VOIDmode;
+	    }
+	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
+	    return VOIDmode;
+	  else
+	    {
+	      know_element_mode = 1;
+	      element_mode = mode;
+	    }
+	}
+      return element_mode;
+
+    default:
+      /* If we reach here, we probably have some front-end specific type
+	 that the backend doesn't know about.  This can happen via the
+	 aggregate_value_p call in init_function_start.  All we can do is
+	 ignore unknown tree types.  */
+      return VOIDmode;
+    }
+
+  return VOIDmode;
+}
+
+/* Return rtx for register where argument is passed, or zero if it is passed
+   on the stack.  */
+
+/* ??? 128-bit quad-precision floats are always passed in general
+   registers.  */
+
+rtx
+ia64_function_arg (cum, mode, type, named, incoming)
+     CUMULATIVE_ARGS *cum;
+     enum machine_mode mode;
+     tree type;
+     int named;
+     int incoming;
+{
+  int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
+  int words = (((mode == BLKmode ? int_size_in_bytes (type)
+		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
+	       / UNITS_PER_WORD);
+  int offset = 0;
+  enum machine_mode hfa_mode = VOIDmode;
+
+  /* Arguments larger than 8 bytes start at the next even boundary.  */
+  if (words > 1 && (cum->words & 1))
+    offset = 1;
+
+  /* If all argument slots are used, then it must go on the stack.  */
+  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
+    return 0;
+
+  /* Check for and handle homogeneous FP aggregates.  */
+  if (type)
+    hfa_mode = hfa_element_mode (type, 0);
+
+  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
+     and unprototyped hfas are passed specially.  */
+  if (hfa_mode != VOIDmode && (! cum->prototype || named))
+    {
+      rtx loc[16];
+      int i = 0;
+      int fp_regs = cum->fp_regs;
+      int int_regs = cum->words + offset;
+      int hfa_size = GET_MODE_SIZE (hfa_mode);
+      int byte_size;
+      int args_byte_size;
+
+      /* If prototyped, pass it in FR regs then GR regs.
+	 If not prototyped, pass it in both FR and GR regs.
+
+	 If this is an SFmode aggregate, then it is possible to run out of
+	 FR regs while GR regs are still left.  In that case, we pass the
+	 remaining part in the GR regs.  */
+
+      /* Fill the FP regs.  We do this always.  We stop if we reach the end
+	 of the argument, the last FP register, or the last argument slot.  */
+
+      byte_size = ((mode == BLKmode)
+		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
+      args_byte_size = int_regs * UNITS_PER_WORD;
+      offset = 0;
+      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
+	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
+	{
+	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
+							      + fp_regs)),
+				      GEN_INT (offset));
+	  /* ??? Padding for XFmode type?  */
+	  offset += hfa_size;
+	  args_byte_size += hfa_size;
+	  fp_regs++;
+	}
+
+      /* If no prototype, then the whole thing must go in GR regs.  */
+      if (! cum->prototype)
+	offset = 0;
+      /* If this is an SFmode aggregate, then we might have some left over
+	 that needs to go in GR regs.  */
+      else if (byte_size != offset)
+	int_regs += offset / UNITS_PER_WORD;
+
+      /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
+
+      for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
+	{
+	  enum machine_mode gr_mode = DImode;
+
+	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
+	     then this goes in a GR reg left adjusted/little endian, right
+	     adjusted/big endian.  */
+	  /* ??? Currently this is handled wrong, because 4-byte hunks are
+	     always right adjusted/little endian.  */
+	  if (offset & 0x4)
+	    gr_mode = SImode;
+	  /* If we have an even 4 byte hunk because the aggregate is a
+	     multiple of 4 bytes in size, then this goes in a GR reg right
+	     adjusted/little endian.  */
+	  else if (byte_size - offset == 4)
+	    gr_mode = SImode;
+
+	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (gr_mode, (basereg
+							     + int_regs)),
+				      GEN_INT (offset));
+	  offset += GET_MODE_SIZE (gr_mode);
+	  int_regs++;
+	}
+
+      /* If we ended up using just one location, just return that one loc.  */
+      if (i == 1)
+	return XEXP (loc[0], 0);
+      else
+	return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
+    }
+
+  /* Integral and aggregates go in general registers.  If we have run out of
+     FR registers, then FP values must also go in general registers.  This can
+     happen when we have a SFmode HFA.  */
+  else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
+    return gen_rtx_REG (mode, basereg + cum->words + offset);
+
+  /* If there is a prototype, then FP values go in a FR register when
+     named, and in a GR registeer when unnamed.  */
+  else if (cum->prototype)
+    {
+      if (! named)
+	return gen_rtx_REG (mode, basereg + cum->words + offset);
+      else
+	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
+    }
+  /* If there is no prototype, then FP values go in both FR and GR
+     registers.  */
+  else
+    {
+      rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (mode, (FR_ARG_FIRST
+							  + cum->fp_regs)),
+				      const0_rtx);
+      rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (mode,
+						   (basereg + cum->words
+						    + offset)),
+				      const0_rtx);
+						   
+      return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
+    }
+}
+
+/* Return number of words, at the beginning of the argument, that must be
+   put in registers.  0 is the argument is entirely in registers or entirely
+   in memory.  */
+
+int
+ia64_function_arg_partial_nregs (cum, mode, type, named)
+     CUMULATIVE_ARGS *cum;
+     enum machine_mode mode;
+     tree type;
+     int named;
+{
+  int words = (((mode == BLKmode ? int_size_in_bytes (type)
+		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
+	       / UNITS_PER_WORD);
+  int offset = 0;
+
+  /* Arguments larger than 8 bytes start at the next even boundary.  */
+  if (words > 1 && (cum->words & 1))
+    offset = 1;
+
+  /* If all argument slots are used, then it must go on the stack.  */
+  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
+    return 0;
+
+  /* It doesn't matter whether the argument goes in FR or GR regs.  If
+     it fits within the 8 argument slots, then it goes entirely in
+     registers.  If it extends past the last argument slot, then the rest
+     goes on the stack.  */
+
+  if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
+    return 0;
+
+  return MAX_ARGUMENT_SLOTS - cum->words - offset;
+}
+
+/* Update CUM to point after this argument.  This is patterned after
+   ia64_function_arg.  */
+
+void
+ia64_function_arg_advance (cum, mode, type, named)
+     CUMULATIVE_ARGS *cum;
+     enum machine_mode mode;
+     tree type;
+     int named;
+{
+  int words = (((mode == BLKmode ? int_size_in_bytes (type)
+		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
+	       / UNITS_PER_WORD);
+  int offset = 0;
+  enum machine_mode hfa_mode = VOIDmode;
+
+  /* If all arg slots are already full, then there is nothing to do.  */
+  if (cum->words >= MAX_ARGUMENT_SLOTS)
+    return;
+
+  /* Arguments larger than 8 bytes start at the next even boundary.  */
+  if (words > 1 && (cum->words & 1))
+    offset = 1;
+
+  cum->words += words + offset;
+
+  /* Check for and handle homogeneous FP aggregates.  */
+  if (type)
+    hfa_mode = hfa_element_mode (type, 0);
+
+  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
+     and unprototyped hfas are passed specially.  */
+  if (hfa_mode != VOIDmode && (! cum->prototype || named))
+    {
+      int fp_regs = cum->fp_regs;
+      /* This is the original value of cum->words + offset.  */
+      int int_regs = cum->words - words;
+      int hfa_size = GET_MODE_SIZE (hfa_mode);
+      int byte_size;
+      int args_byte_size;
+
+      /* If prototyped, pass it in FR regs then GR regs.
+	 If not prototyped, pass it in both FR and GR regs.
+
+	 If this is an SFmode aggregate, then it is possible to run out of
+	 FR regs while GR regs are still left.  In that case, we pass the
+	 remaining part in the GR regs.  */
+
+      /* Fill the FP regs.  We do this always.  We stop if we reach the end
+	 of the argument, the last FP register, or the last argument slot.  */
+
+      byte_size = ((mode == BLKmode)
+		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
+      args_byte_size = int_regs * UNITS_PER_WORD;
+      offset = 0;
+      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
+	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
+	{
+	  /* ??? Padding for XFmode type?  */
+	  offset += hfa_size;
+	  args_byte_size += hfa_size;
+	  fp_regs++;
+	}
+
+      cum->fp_regs = fp_regs;
+    }
+
+  /* Integral and aggregates go in general registers.  If we have run out of
+     FR registers, then FP values must also go in general registers.  This can
+     happen when we have a SFmode HFA.  */
+  else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
+    return;
+
+  /* If there is a prototype, then FP values go in a FR register when
+     named, and in a GR registeer when unnamed.  */
+  else if (cum->prototype)
+    {
+      if (! named)
+	return;
+      else
+	/* ??? Complex types should not reach here.  */
+	cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
+    }
+  /* If there is no prototype, then FP values go in both FR and GR
+     registers.  */
+  else
+    /* ??? Complex types should not reach here.  */
+    cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
+
+  return;
+}
+
+/* Implement va_start.  */
+
+void
+ia64_va_start (stdarg_p, valist, nextarg)
+     int stdarg_p;
+     tree valist;
+     rtx nextarg;
+{
+  int arg_words;
+  int ofs;
+
+  arg_words = current_function_args_info.words;
+
+  if (stdarg_p)
+    ofs = 0;
+  else
+    ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
+
+  nextarg = plus_constant (nextarg, ofs);
+  std_expand_builtin_va_start (1, valist, nextarg);
+}
+
+/* Implement va_arg.  */
+
+rtx
+ia64_va_arg (valist, type)
+     tree valist, type;
+{
+  HOST_WIDE_INT size;
+  tree t;
+
+  /* Arguments larger than 8 bytes are 16 byte aligned.  */
+  size = int_size_in_bytes (type);
+  if (size > UNITS_PER_WORD)
+    {
+      t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
+		 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
+      t = build (BIT_AND_EXPR, TREE_TYPE (t), t, 
+		 build_int_2 (-2 * UNITS_PER_WORD, -1));
+      t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+
+  return std_expand_builtin_va_arg (valist, type);
+}
+
+/* Return 1 if function return value returned in memory.  Return 0 if it is
+   in a register.  */
+
+int
+ia64_return_in_memory (valtype)
+     tree valtype;
+{
+  enum machine_mode mode;
+  enum machine_mode hfa_mode;
+  int byte_size;
+
+  mode = TYPE_MODE (valtype);
+  byte_size = ((mode == BLKmode)
+	       ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
+
+  /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
+
+  hfa_mode = hfa_element_mode (valtype, 0);
+  if (hfa_mode != VOIDmode)
+    {
+      int hfa_size = GET_MODE_SIZE (hfa_mode);
+
+      /* ??? Padding for XFmode type?  */
+      if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
+	return 1;
+      else
+	return 0;
+    }
+
+  else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
+    return 1;
+  else
+    return 0;
+}
+
+/* Return rtx for register that holds the function return value.  */
+
+rtx
+ia64_function_value (valtype, func)
+     tree valtype;
+     tree func;
+{
+  enum machine_mode mode;
+  enum machine_mode hfa_mode;
+
+  mode = TYPE_MODE (valtype);
+  hfa_mode = hfa_element_mode (valtype, 0);
+
+  if (hfa_mode != VOIDmode)
+    {
+      rtx loc[8];
+      int i;
+      int hfa_size;
+      int byte_size;
+      int offset;
+
+      hfa_size = GET_MODE_SIZE (hfa_mode);
+      byte_size = ((mode == BLKmode)
+		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
+      offset = 0;
+      for (i = 0; offset < byte_size; i++)
+	{
+	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
+				      GEN_INT (offset));
+	  /* ??? Padding for XFmode type?  */
+	  offset += hfa_size;
+	}
+
+      if (i == 1)
+	return XEXP (loc[0], 0);
+      else
+	return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
+    }
+  else if (FLOAT_TYPE_P (valtype))
+    return gen_rtx_REG (mode, FR_ARG_FIRST);
+  else
+    return gen_rtx_REG (mode, GR_RET_FIRST);
+}
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+/* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
+   also call this from ia64_print_operand for memory addresses.  */
+
+void
+ia64_print_operand_address (stream, address)
+     FILE * stream;
+     rtx    address;
+{
+}
+
+/* Print an operand to a assembler instruction.
+   B    Work arounds for hardware bugs.
+   C	Swap and print a comparison operator.
+   D	Print an FP comparison operator.
+   E    Print 32 - constant, for SImode shifts as extract.
+   F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
+        a floating point register emitted normally.
+   I	Invert a predicate register by adding 1.
+   O	Append .acq for volatile load.
+   P	Postincrement of a MEM.
+   Q	Append .rel for volatile store.
+   S	Shift amount for shladd instruction.
+   T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
+	for Intel assembler.
+   U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
+	for Intel assembler.
+   r	Print register name, or constant 0 as r0.  HP compatibility for
+	Linux kernel.  */
+void
+ia64_print_operand (file, x, code)
+     FILE * file;
+     rtx    x;
+     int    code;
+{
+  switch (code)
+    {
+      /* XXX Add other codes here.  */
+      
+    case 0:
+      /* Handled below.  */
+      break;
+      
+    case 'B':
+      if (TARGET_A_STEP)
+	fputs (" ;; nop 0 ;; nop 0 ;;", file);
+      return;
+
+    case 'C':
+      {
+	enum rtx_code c = swap_condition (GET_CODE (x));
+	fputs (GET_RTX_NAME (c), file);
+	return;
+      }
+
+    case 'D':
+      fputs (GET_CODE (x) == NE ? "neq" : GET_RTX_NAME (GET_CODE (x)), file);
+      return;
+
+    case 'E':
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
+      return;
+
+    case 'F':
+      if (x == CONST0_RTX (GET_MODE (x)))
+	fputs (reg_names [FR_REG (0)], file);
+      else if (x == CONST1_RTX (GET_MODE (x)))
+	fputs (reg_names [FR_REG (1)], file);
+      else if (GET_CODE (x) == REG)
+	fputs (reg_names [REGNO (x)], file);
+      else
+	abort ();
+      return;
+
+    case 'I':
+      fputs (reg_names [REGNO (x) + 1], file);
+      return;
+
+    case 'O':
+      if (MEM_VOLATILE_P (x))
+	fputs(".acq", file);
+      return;
+
+    case 'P':
+      {
+	int value;
+
+	if (GET_CODE (XEXP (x, 0)) != POST_INC
+	    && GET_CODE (XEXP (x, 0)) != POST_DEC)
+	  return;
+
+	fputs (", ", file);
+
+	value = GET_MODE_SIZE (GET_MODE (x));
+
+	/* ??? This is for ldf.fill and stf.spill which use XFmode, but which
+	   actually need 16 bytes increments.  Perhaps we can change them
+	   to use TFmode instead.  Or don't use POST_DEC/POST_INC for them.
+	   Currently, there are no other uses of XFmode, so hacking it here
+	   is no problem.  */
+	if (value == 12)
+	  value = 16;
+
+	if (GET_CODE (XEXP (x, 0)) == POST_DEC)
+	  value = -value;
+    
+	fprintf (file, "%d", value);
+	return;
+      }
+
+    case 'Q':
+      if (MEM_VOLATILE_P (x))
+	fputs(".rel", file);
+      return;
+
+    case 'S':
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, exact_log2 (INTVAL (x)));
+      return;
+
+    case 'T':
+      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
+	{
+	  fprintf (file, "0x%x", INTVAL (x) & 0xffffffff);
+	  return;
+	}
+      break;
+
+    case 'U':
+      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
+	{
+	  char *prefix = "0x";
+	  if (INTVAL (x) & 0x80000000)
+	    {
+	      fprintf (file, "0xffffffff");
+	      prefix = "";
+	    }
+	  fprintf (file, "%s%x", prefix, INTVAL (x) & 0xffffffff);
+	  return;
+	}
+      break;
+      
+    case 'r':
+      /* If this operand is the constant zero, write it as zero.  */
+      if (GET_CODE (x) == REG)
+	fputs (reg_names[REGNO (x)], file);
+      else if (x == CONST0_RTX (GET_MODE (x)))
+	fputs ("r0", file);
+      else
+	output_operand_lossage ("invalid %%r value");
+      return;
+
+    default:
+      output_operand_lossage ("ia64_print_operand: unknown code");
+      return;
+    }
+
+  switch (GET_CODE (x))
+    {
+      /* This happens for the spill/restore instructions.  */
+    case POST_INC:
+      x = XEXP (x, 0);
+      /* ... fall through ... */
+
+    case REG:
+      fputs (reg_names [REGNO (x)], file);
+      break;
+
+    case MEM:
+      {
+	rtx addr = XEXP (x, 0);
+	if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
+	  addr = XEXP (addr, 0);
+	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
+	break;
+      }
+      
+    default:
+      output_addr_const (file, x);
+      break;
+    }
+
+  return;
+}
+
+
+
+/* This function returns the register class required for a secondary
+   register when copying between one of the registers in CLASS, and X,
+   using MODE.  A return value of NO_REGS means that no secondary register
+   is required.  */
+
+enum reg_class
+ia64_secondary_reload_class (class, mode, x)
+     enum reg_class class;
+     enum machine_mode mode;
+     rtx x;
+{
+  int regno = -1;
+
+  if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
+    regno = true_regnum (x);
+
+  /* ??? This is required because of a bad gcse/cse/global interaction.
+     We end up with two pseudos with overlapping lifetimes both of which are
+     equiv to the same constant, and both which need to be in BR_REGS.  This
+     results in a BR_REGS to BR_REGS copy which doesn't exist.  To reproduce,
+     return NO_REGS here, and compile divdi3 in libgcc2.c.  This seems to be
+     a cse bug.  cse_basic_block_end changes depending on the path length,
+     which means the qty_first_reg check in make_regs_eqv can give different
+     answers at different times.  */
+  /* ??? At some point I'll probably need a reload_indi pattern to handle
+     this.  */
+  if (class == BR_REGS && BR_REGNO_P (regno))
+    return GR_REGS;
+
+  /* This is needed if a pseudo used as a call_operand gets spilled to a
+     stack slot.  */
+  if (class == BR_REGS && GET_CODE (x) == MEM)
+    return GR_REGS;
+
+  /* This can happen when a paradoxical subreg is an operand to the muldi3
+     pattern.  */
+  /* ??? This shouldn't be necessary after instruction scheduling is enabled,
+     because paradoxical subregs are not accepted by register_operand when
+     INSN_SCHEDULING is defined.  Or alternatively, stop the paradoxical subreg
+     stupidity in the *_operand functions in recog.c.  */
+  if ((class == FR_REGS || class == FR_INT_REGS || class == FR_FP_REGS)
+      && GET_CODE (x) == MEM
+      && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
+	  || GET_MODE (x) == QImode))
+    return GR_REGS;
+
+  /* This can happen because of the ior/and/etc patterns that accept FP
+     registers as operands.  If the third operand is a constant, then it
+     needs to be reloaded into a FP register.  */
+  if ((class == FR_REGS || class == FR_INT_REGS || class == FR_FP_REGS)
+      && GET_CODE (x) == CONST_INT)
+    return GR_REGS;
+
+  /* Moving a integer from an FP register to memory requires a general register
+     as an intermediary.  This is not necessary if we are moving a DImode
+     subreg of a DFmode value from an FP register to memory, since stfd will
+     do the right thing in this case.  */
+  if (class == FR_INT_REGS && GET_CODE (x) == MEM && GET_MODE (x) == DImode)
+    return GR_REGS;
+
+  /* ??? This happens if we cse/gcse a CCmode value across a call, and the
+     function has a nonlocal goto.  This is because global does not allocate
+     call crossing pseudos to hard registers when current_function_has_
+     nonlocal_goto is true.  This is relatively common for C++ programs that
+     use exceptions.  To reproduce, return NO_REGS and compile libstdc++.  */
+  if (class == PR_REGS && GET_CODE (x) == MEM)
+    return GR_REGS;
+
+  return NO_REGS;
+}
+
+
+/* Emit text to declare externally defined variables and functions, because
+   the Intel assembler does not support undefined externals.  */
+
+void
+ia64_asm_output_external (file, decl, name)
+     FILE *file;
+     tree decl;
+     char *name;
+{
+  int save_referenced;
+
+  /* GNU as does not need anything here.  */
+  if (TARGET_GNU_AS)
+    return;
+
+  /* ??? The Intel assembler creates a reference that needs to be satisfied by
+     the linker when we do this, so we need to be careful not to do this for
+     builtin functions which have no library equivalent.  Unfortunately, we
+     can't tell here whether or not a function will actually be called by
+     expand_expr, so we pull in library functions even if we may not need
+     them later.  */
+  if (! strcmp (name, "__builtin_next_arg")
+      || ! strcmp (name, "alloca")
+      || ! strcmp (name, "__builtin_constant_p")
+      || ! strcmp (name, "__builtin_args_info"))
+    return;
+
+  /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
+     restore it.  */
+  save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      fprintf (file, "\t%s\t ", TYPE_ASM_OP);
+      assemble_name (file, name);
+      putc (',', file);
+      fprintf (file, TYPE_OPERAND_FMT, "function");
+      putc ('\n', file);
+    }
+  ASM_GLOBALIZE_LABEL (file, name);
+  TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
+}
+
+/* Parse the -mfixed-range= option string.  */
+
+static void
+fix_range (str)
+     char *str;
+{
+  int i, first, last;
+  char *dash, *comma;
+
+  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
+     REG2 are either register names or register numbers.  The effect
+     of this option is to mark the registers in the range from REG1 to
+     REG2 as ``fixed'' so they won't be used by the compiler.  This is
+     used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
+
+  while (1)
+    {
+      dash = strchr (str, '-');
+      if (!dash)
+	{
+	  warning ("value of -mfixed-range must have form REG1-REG2");
+	  return;
+	}
+      *dash = '\0';
+
+      comma = strchr (dash + 1, ',');
+      if (comma)
+	*comma = '\0';
+
+      first = decode_reg_name (str);
+      if (first < 0)
+	{
+	  warning ("unknown register name: %s", str);
+	  return;
+	}
+
+      last = decode_reg_name (dash + 1);
+      if (last < 0)
+	{
+	  warning ("unknown register name: %s", dash + 1);
+	  return;
+	}
+
+      *dash = '-';
+
+      if (first > last)
+	{
+	  warning ("%s-%s is an empty range", str, dash + 1);
+	  return;
+	}
+
+      for (i = first; i <= last; ++i)
+	fixed_regs[i] = call_used_regs[i] = 1;
+
+      if (!comma)
+	break;
+
+      *comma = ',';
+      str = comma + 1;
+    }
+}
+
+/* Called to register all of our global variables with the garbage
+   collector.  */
+
+static void
+ia64_add_gc_roots ()
+{
+  ggc_add_rtx_root (&ia64_compare_op0, 1);
+  ggc_add_rtx_root (&ia64_compare_op1, 1);
+}
+
+/* Handle TARGET_OPTIONS switches.  */
+
+void
+ia64_override_options ()
+{
+  if (ia64_fixed_range_string)
+    fix_range (ia64_fixed_range_string);
+
+  ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
+
+  ia64_add_gc_roots ();
+}
+
+/* The following collection of routines emit instruction group stop bits as
+   necessary to avoid dependencies.  */
+
+/* Need to track some additional registers as far as serialization is
+   concerned so we can properly handle br.call and br.ret.  We could
+   make these registers visible to gcc, but since these registers are
+   never explicitly used in gcc generated code, it seems wasteful to
+   do so (plus it would make the call and return patterns needlessly
+   complex).  */
+#define REG_GP		(GR_REG (1))
+#define REG_RP		(BR_REG (0))
+#define REG_AR_PFS	(FIRST_PSEUDO_REGISTER)
+#define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
+/* ??? This will eventually need to be a hard register.  */
+#define REG_AR_EC	(FIRST_PSEUDO_REGISTER + 2)
+/* This is used for volatile asms which may require a stop bit immediately
+   before and after them.  */
+#define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 3)
+#define NUM_REGS	(FIRST_PSEUDO_REGISTER + 4)
+
+/* For each register, we keep track of how many times it has been
+   written in the current instruction group.  If a register is written
+   unconditionally (no qualifying predicate), WRITE_COUNT is set to 2
+   and FIRST_PRED is ignored.  If a register is written if its
+   qualifying predicate P is true, we set WRITE_COUNT to 1 and
+   FIRST_PRED to P.  Later on, the same register may be written again
+   by the complement of P (P+1 if P is even, P-1, otherwise) and when
+   this happens, WRITE_COUNT gets set to 2.  The result of this is
+   that whenever an insn attempts to write a register whose
+   WRITE_COUNT is two, we need to issue a insn group barrier first.  */
+struct reg_write_state
+{
+  char write_count;
+  char written_by_fp;	/* Was register written by a floating-point insn?  */
+  short first_pred;	/* 0 means ``no predicate'' */
+};
+
+/* Cumulative info for the current instruction group.  */
+struct reg_write_state rws_sum[NUM_REGS];
+/* Info for the current instruction.  This gets copied to rws_sum after a
+   stop bit is emitted.  */
+struct reg_write_state rws_insn[NUM_REGS];
+
+/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
+   RTL for one instruction.  */
+struct reg_flags
+{
+  unsigned int is_write : 1;	/* Is register being written?  */
+  unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
+  unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
+};
+
+/* Update *RWS for REGNO, which is being written by the current instruction,
+   with predicate PRED, and associated register flags in FLAGS.  */
+
+static void
+rws_update (rws, regno, flags, pred)
+     struct reg_write_state *rws;
+     int regno;
+     struct reg_flags flags;
+     int pred;
+{
+  rws[regno].write_count += pred ? 1 : 2;
+  rws[regno].written_by_fp |= flags.is_fp;
+  rws[regno].first_pred = pred;
+}
+
+/* Handle an access to register REGNO of type FLAGS using predicate register
+   PRED.  Update rws_insn and rws_sum arrays.  Return 1 if this access creates
+   a dependency with an earlier instruction in the same group.  */
+
+static int
+rws_access_reg (regno, flags, pred)
+     int regno;
+     struct reg_flags flags;
+     int pred;
+{
+  int need_barrier = 0;
+  int is_predicate_reg;
+
+  if (regno >= NUM_REGS)
+    abort ();
+
+  if (flags.is_write)
+    {
+      /* One insn writes same reg multiple times?  */
+      if (rws_insn[regno].write_count > 0)
+	abort ();
+
+      /* Update info for current instruction.  */
+      rws_update (rws_insn, regno, flags, pred);
+
+      /* ??? This is necessary because predicate regs require two hard
+	 registers.  However, this should be using HARD_REGNO_NREGS so that
+	 it works for all multi-reg hard registers, instead of only for
+	 predicate registers.  */
+      is_predicate_reg = REGNO_REG_CLASS (regno) == PR_REGS;
+      if (is_predicate_reg)
+	rws_update (rws_insn, regno + 1, flags, pred);
+
+      switch (rws_sum[regno].write_count)
+	{
+	case 0:
+	  /* The register has not been written yet.  */
+	  rws_update (rws_sum, regno, flags, pred);
+	  if (is_predicate_reg)
+	    rws_update (rws_sum, regno + 1, flags, pred);
+	  break;
+
+	case 1:
+	  /* The register has been written via a predicate.  If this is
+	     not a complementary predicate, then we need a barrier.  */
+	  /* ??? This assumes that P and P+1 are always complementary
+	     predicates for P even.  */
+	  if ((rws_sum[regno].first_pred ^ 1) != pred)
+	    need_barrier = 1;
+	  rws_update (rws_sum, regno, flags, pred);
+	  if (is_predicate_reg)
+	    rws_update (rws_sum, regno + 1, flags, pred);
+	  break;
+
+	case 2:
+	  /* The register has been unconditionally written already.  We
+	     need a barrier.  */
+	  need_barrier = 1;
+	  break;
+
+	default:
+	  abort ();
+	}
+    }
+  else
+    {
+      if (flags.is_branch)
+	{
+	  /* Branches have several RAW exceptions that allow to avoid
+	     barriers.  */
+
+	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == REG_AR_PFS)
+	    /* RAW dependencies on branch regs are permissible as long
+	       as the writer is a non-branch instruction.  Since we
+	       never generate code that uses a branch register written
+	       by a branch instruction, handling this case is
+	       easy.  */
+	    /* ??? This assumes that we don't emit br.cloop, br.cexit, br.ctop,
+	       br.wexit, br.wtop.  This is true currently.  */
+	      return 0;
+
+	  if (REGNO_REG_CLASS (regno) == PR_REGS
+	      && ! rws_sum[regno].written_by_fp)
+	    /* The predicates of a branch are available within the
+	       same insn group as long as the predicate was written by
+	       something other than a floating-point instruction.   */
+	    return 0;
+	}
+
+      switch (rws_sum[regno].write_count)
+	{
+	case 0:
+	  /* The register has not been written yet.  */
+	  break;
+
+	case 1:
+	  /* The register has been written via a predicate.  If this is
+	     not a complementary predicate, then we need a barrier.  */
+	  /* ??? This assumes that P and P+1 are always complementary
+	     predicates for P even.  */
+	  if ((rws_sum[regno].first_pred ^ 1) != pred)
+	    need_barrier = 1;
+	  break;
+
+	case 2:
+	  /* The register has been unconditionally written already.  We
+	     need a barrier.  */
+	  need_barrier = 1;
+	  break;
+
+	default:
+	  abort ();
+	}
+    }
+
+  return need_barrier;
+}
+
+/* Handle an access to rtx X of type FLAGS using predicate register PRED.
+   Return 1 is this access creates a dependency with an earlier instruction
+   in the same group.  */
+
+static int
+rtx_needs_barrier (x, flags, pred)
+     rtx x;
+     struct reg_flags flags;
+     int pred;
+{
+  int i, j;
+  int is_complemented = 0;
+  int need_barrier = 0;
+  const char *format_ptr;
+  struct reg_flags new_flags;
+  rtx src, dst;
+  rtx cond = 0;
+
+  if (! x)
+    return 0;
+
+  new_flags = flags;
+
+  switch (GET_CODE (x))
+    {
+    case SET:
+      src = SET_SRC (x);
+      switch (GET_CODE (src))
+	{
+	case CALL:
+	  /* We don't need to worry about the result registers that
+             get written by subroutine call.  */
+	  need_barrier = rtx_needs_barrier (src, flags, pred);
+	  return need_barrier;
+
+	case IF_THEN_ELSE:
+	  if (SET_DEST (x) == pc_rtx)
+	    {
+	      /* X is a conditional branch.  */
+	      /* ??? This seems redundant, as the caller sets this bit for
+		 all JUMP_INSNs.  */
+	      new_flags.is_branch = 1;
+	      need_barrier = rtx_needs_barrier (src, new_flags, pred);
+	      return need_barrier;
+	    }
+	  else
+	    {
+	      /* X is a conditional move.  */
+	      cond = XEXP (src, 0);
+	      if (GET_CODE (cond) == EQ)
+		is_complemented = 1;
+	      cond = XEXP (cond, 0);
+	      if (GET_CODE (cond) != REG
+		  && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
+		abort ();
+
+	      if (XEXP (src, 1) == SET_DEST (x)
+		  || XEXP (src, 2) == SET_DEST (x))
+		{
+		  /* X is a conditional move that conditionally writes the
+		     destination.  */
+
+		  /* We need another complement in this case.  */
+		  if (XEXP (src, 1) == SET_DEST (x))
+		    is_complemented = ! is_complemented;
+
+		  pred = REGNO (cond);
+		  if (is_complemented)
+		    ++pred;
+		}
+
+	      /* ??? If this is a conditional write to the dest, then this
+		 instruction does not actually read one source.  This probably
+		 doesn't matter, because that source is also the dest.  */
+	      /* ??? Multiple writes to predicate registers are allowed
+		 if they are all AND type compares, or if they are all OR
+		 type compares.  We do not generate such instructions
+		 currently.  */
+	    }
+	  /* ... fall through ... */
+
+	default:
+	  if (GET_RTX_CLASS (GET_CODE (src)) == '<'
+	       && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
+	    /* Set new_flags.is_fp to 1 so that we know we're dealing
+	       with a floating point comparison when processing the
+	       destination of the SET.  */
+	    new_flags.is_fp = 1;
+	  break;
+	}
+      need_barrier = rtx_needs_barrier (src, flags, pred);
+      /* This instruction unconditionally uses a predicate register.  */
+      if (cond)
+	need_barrier |= rws_access_reg (REGNO (cond), flags, 0);
+
+      dst = SET_DEST (x);
+      if (GET_CODE (dst) == ZERO_EXTRACT)
+	{
+	  need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
+	  need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
+	  dst = XEXP (dst, 0);
+	}
+      new_flags.is_write = 1;
+      need_barrier |= rtx_needs_barrier (dst, new_flags, pred);
+      break;
+
+    case CALL:
+      new_flags.is_write = 0;
+      /* ??? Why is this here?  It seems unnecessary.  */
+      need_barrier |= rws_access_reg (REG_GP, new_flags, pred);
+      need_barrier |= rws_access_reg (REG_AR_EC, new_flags, pred);
+
+      /* Avoid multiple register writes, in case this is a pattern with
+	 multiple CALL rtx.  This avoids an abort in rws_access_reg.  */
+      /* ??? This assumes that no rtx other than CALL/RETURN sets REG_AR_CFM,
+	 and that we don't have predicated calls/returns.  */
+      if (! rws_insn[REG_AR_CFM].write_count)
+	{
+	  new_flags.is_write = 1;
+	  need_barrier |= rws_access_reg (REG_RP, new_flags, pred);
+	  need_barrier |= rws_access_reg (REG_AR_PFS, new_flags, pred);
+	  need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred);
+	}
+      break;
+
+    case CLOBBER:
+#if 0
+    case USE:
+      /* We must handle USE here in case it occurs within a PARALLEL.
+	 For instance, the mov ar.pfs= instruction has a USE which requires
+	 a barrier between it and an immediately preceeding alloc.  */
+#endif
+      /* Clobber & use are for earlier compiler-phases only.  */
+      break;
+
+    case ASM_OPERANDS:
+    case ASM_INPUT:
+      /* We always emit stop bits for traditional asms.  We emit stop bits
+	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
+      if (GET_CODE (x) != ASM_OPERANDS
+	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
+	{
+	  /* Avoid writing the register multiple times if we have multiple
+	     asm outputs.  This avoids an abort in rws_access_reg.  */
+	  if (! rws_insn[REG_VOLATILE].write_count)
+	    {
+	      new_flags.is_write = 1;
+	      rws_access_reg (REG_VOLATILE, new_flags, pred);
+	    }
+	  return 1;
+	}
+
+      /* For all ASM_OPERANDS, we must traverse the vector of input operands.
+	 We can not just fall through here since then we would be confused
+	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
+	 traditional asms unlike their normal usage.  */
+
+      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
+	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
+	  need_barrier = 1;
+      break;
+
+    case PARALLEL:
+      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
+	if (rtx_needs_barrier (XVECEXP (x, 0, i), flags, pred))
+	  need_barrier = 1;
+      break;
+
+    case SUBREG:
+      x = SUBREG_REG (x);
+      /* FALLTHRU */
+    case REG:
+      need_barrier = rws_access_reg (REGNO (x), flags, pred);
+      break;
+
+    case MEM:
+      /* Find the regs used in memory address computation.  */
+      new_flags.is_write = 0;
+      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
+      break;
+
+    case CONST_INT:   case CONST_DOUBLE:
+    case SYMBOL_REF:  case LABEL_REF:     case CONST:
+      break;
+
+      /* Operators with side-effects.  */
+    case POST_INC:    case POST_DEC:
+      if (GET_CODE (XEXP (x, 0)) != REG)
+	abort ();
+
+      new_flags.is_write = 0;
+      need_barrier  = rws_access_reg (REGNO (XEXP (x, 0)), new_flags, pred);
+      new_flags.is_write = 1;
+      need_barrier |= rws_access_reg (REGNO (XEXP (x, 0)), new_flags, pred);
+      break;
+
+      /* Handle common unary and binary ops for efficiency.  */
+    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
+    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
+    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
+    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
+    case NE:       case EQ:      case GE:      case GT:        case LE:
+    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
+      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
+      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
+      break;
+
+    case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
+    case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
+    case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
+    case SQRT:     case FFS:
+      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
+      break;
+
+    case UNSPEC:
+      switch (XINT (x, 1))
+	{
+	  /* ??? For the st8.spill/ld8.fill instructions, we can ignore unat
+	     dependencies as long as we don't have both a spill and fill in
+	     the same instruction group.  We need to check for that.  */
+	case 1: /* st8.spill */
+	case 2: /* ld8.fill */
+	case 3: /* stf.spill */
+	case 4: /* ldf.spill */
+	case 8: /* popcnt */
+	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
+	  break;
+
+	case 5: /* mov =pr */
+	  /* This reads all predicate registers.  */
+	  for (i = PR_REG (1); i < PR_REG (64); i++)
+	    need_barrier |= rws_access_reg (i, flags, pred);
+	  break;
+
+	case 6: /* mov pr= */
+	  /* This writes all predicate registers.  */
+	  new_flags.is_write = 1;
+	  /* We need to skip by two, because rws_access_reg always writes
+	     to two predicate registers at a time.  */
+	  /* ??? Strictly speaking, we shouldn't be counting writes to pr0.  */
+	  for (i = PR_REG (0); i < PR_REG (64); i += 2)
+	    need_barrier |= rws_access_reg (i, new_flags, pred);
+	  break;
+
+	case 7:
+	  abort ();
+
+	  /* ??? Should track unat reads and writes.  */
+	case 9: /* mov =ar.unat */
+	case 10: /* mov ar.unat= */
+	  break;
+        case 11: /* mov ar.ccv= */
+          break;
+        case 12: /* mf */
+          break;
+        case 13: /* cmpxchg_acq */
+          break;
+        case 14: /* val_compare_and_swap */
+          break;
+        case 15: /* lock_release */
+          break;
+        case 16: /* lock_test_and_set */
+          break;
+        case 17: /* _and_fetch */
+          break;
+        case 18: /* fetch_and_ */
+          break;
+        case 19: /* fetchadd_acq */
+          break;
+	default:
+	  abort ();
+	}
+      break;
+
+    case UNSPEC_VOLATILE:
+      switch (XINT (x, 1))
+	{
+	case 0: /* alloc */
+	  /* Alloc must always be the first instruction.  Currently, we
+	     only emit it at the function start, so we don't need to worry
+	     about emitting a stop bit before it.  */
+	  need_barrier = rws_access_reg (REG_AR_PFS, flags, pred);
+
+	  new_flags.is_write = 1;
+	  need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred);
+	  return need_barrier;
+
+	case 1: /* blockage */
+	case 2: /* insn group barrier */
+	  return 0;
+
+	case 3: /* flush_cache */
+	  return rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
+
+	case 4: /* mov ar.pfs= */
+	  new_flags.is_write = 1;
+	  need_barrier = rws_access_reg (REG_AR_PFS, new_flags, pred);
+	  break;
+
+	default:
+	  abort ();
+	}
+      break;
+
+    case RETURN:
+      new_flags.is_write = 0;
+      need_barrier  = rws_access_reg (REG_RP, flags, pred);
+      need_barrier |= rws_access_reg (REG_AR_PFS, flags, pred);
+
+      new_flags.is_write = 1;
+      need_barrier |= rws_access_reg (REG_AR_EC, new_flags, pred);
+      need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred);
+      break;
+
+    default:
+      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
+      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+	switch (format_ptr[i])
+	  {
+	  case '0':	/* unused field */
+	  case 'i':	/* integer */
+	  case 'n':	/* note */
+	  case 'w':	/* wide integer */
+	  case 's':	/* pointer to string */
+	  case 'S':	/* optional pointer to string */
+	    break;
+
+	  case 'e':
+	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
+	      need_barrier = 1;
+	    break;
+
+	  case 'E':
+	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
+	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
+		need_barrier = 1;
+	    break;
+
+	  default:
+	    abort ();
+	  }
+    }
+  return need_barrier;
+}
+
+/* INSNS is an chain of instructions.  Scan the chain, and insert stop bits
+   as necessary to eliminate dependendencies.  */
+
+static void
+emit_insn_group_barriers (insns)
+     rtx insns;
+{
+  int need_barrier = 0;
+  int exception_nesting;
+  struct reg_flags flags;
+  rtx insn, prev_insn;
+
+  memset (rws_sum, 0, sizeof (rws_sum));
+
+  prev_insn = 0;
+  for (insn = insns; insn; insn = NEXT_INSN (insn))
+    {
+      memset (&flags, 0, sizeof (flags));
+      switch (GET_CODE (insn))
+	{
+	case NOTE:
+	  switch (NOTE_LINE_NUMBER (insn))
+	    {
+	    case NOTE_INSN_EH_REGION_BEG:
+	      exception_nesting++;
+	      break;
+
+	    case NOTE_INSN_EH_REGION_END:
+	      exception_nesting--;
+	      break;
+
+	    case NOTE_INSN_EPILOGUE_BEG:
+	      break;
+
+	    default:
+	      break;
+	    }
+	  break;
+
+	case JUMP_INSN:
+	case CALL_INSN:
+	  flags.is_branch = 1;
+	case INSN:
+	  if (GET_CODE (PATTERN (insn)) == USE)
+	    /* Don't care about USE "insns"---those are used to
+	       indicate to the optimizer that it shouldn't get rid of
+	       certain operations.  */
+	    break;
+	  else
+	    {
+	      memset (rws_insn, 0, sizeof (rws_insn));
+	      need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
+
+	      /* Check to see if the previous instruction was a volatile
+		 asm.  */
+	      if (! need_barrier)
+		need_barrier = rws_access_reg (REG_VOLATILE, flags, 0);
+
+	      if (need_barrier)
+		{
+		  /* PREV_INSN null can happen if the very first insn is a
+		     volatile asm.  */
+		  if (prev_insn)
+		    emit_insn_after (gen_insn_group_barrier (), prev_insn);
+		  memcpy (rws_sum, rws_insn, sizeof (rws_sum));
+		}
+	      need_barrier = 0;
+	      prev_insn = insn;
+	    }
+	  break;
+
+	case BARRIER:
+	  /* A barrier doesn't imply an instruction group boundary.  */
+	  break;
+
+	case CODE_LABEL:
+	  /* Leave prev_insn alone so the barrier gets generated in front
+	     of the label, if one is needed.  */
+	  break;
+
+	default:
+	  abort ();
+	}
+    }
+}
+
+/* Perform machine dependent operations on the rtl chain INSNS.  */
+
+void
+ia64_reorg (insns)
+     rtx insns;
+{
+  emit_insn_group_barriers (insns);
+}
+
+/* Return true if REGNO is used by the epilogue.  */
+
+int
+ia64_epilogue_uses (regno)
+     int regno;
+{
+  /* For functions defined with the syscall_linkage attribute, all input
+     registers are marked as live at all function exits.  This prevents the
+     register allocator from using the input registers, which in turn makes it
+     possible to restart a system call after an interrupt without having to
+     save/restore the input registers.  */
+
+  if (IN_REGNO_P (regno)
+      && (regno < IN_REG (current_function_args_info.words))
+      && lookup_attribute ("syscall_linkage",
+			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
+    return 1;
+
+  return 0;
+}
+
+/* Return true if IDENTIFIER is a valid attribute for TYPE.  */
+
+int
+ia64_valid_type_attribute (type, attributes, identifier, args)
+     tree type;
+     tree attributes ATTRIBUTE_UNUSED;
+     tree identifier;
+     tree args;
+{
+  /* We only support an attribute for function calls.  */
+
+  if (TREE_CODE (type) != FUNCTION_TYPE
+      && TREE_CODE (type) != METHOD_TYPE)
+    return 0;
+
+  /* The "syscall_linkage" attribute says the callee is a system call entry
+     point.  This affects ia64_epilogue_uses.  */
+
+  if (is_attribute_p ("syscall_linkage", identifier))
+    return args == NULL_TREE;
+
+  return 0;
+}
+
+/* For ia64, SYMBOL_REF_FLAG set means that it is a function.
+
+   We add @ to the name if this goes in small data/bss.  We can only put
+   a variable in small data/bss if it is defined in this module or a module
+   that we are statically linked with.  We can't check the second condition,
+   but TREE_STATIC gives us the first one.  */
+
+/* ??? If we had IPA, we could check the second condition.  We could support
+   programmer added section attributes if the variable is not defined in this
+   module.  */
+
+/* ??? See the v850 port for a cleaner way to do this.  */
+
+/* ??? We could also support own long data here.  Generating movl/add/ld8
+   instead of addl,ld8/ld8.  This makes the code bigger, but should make the
+   code faster because there is one less load.  This also includes incomplete
+   types which can't go in sdata/sbss.  */
+
+/* ??? See select_section.  We must put short own readonly variables in
+   sdata/sbss instead of the more natural rodata, because we can't perform
+   the DECL_READONLY_SECTION test here.  */
+
+extern struct obstack * saveable_obstack;
+
+void
+ia64_encode_section_info (decl)
+     tree decl;
+{
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
+  /* We assume that -fpic is used only to create a shared library (dso).
+     With -fpic, no global data can ever be sdata.
+     Without -fpic, global common uninitialized data can never be sdata, since
+     it can unify with a real definition in a dso.  */
+  /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
+     to access them.  The linker may then be able to do linker relaxation to
+     optimize references to them.  Currently sdata implies use of gprel.  */
+  else if (! TARGET_NO_SDATA
+	   && TREE_CODE (decl) == VAR_DECL
+	   && TREE_STATIC (decl)
+	   && ! (TREE_PUBLIC (decl)
+		 && (flag_pic
+		     || (DECL_COMMON (decl)
+			 && (DECL_INITIAL (decl) == 0
+			     || DECL_INITIAL (decl) == error_mark_node))))
+	   /* Either the variable must be declared without a section attribute,
+	      or the section must be sdata or sbss.  */
+	   && (DECL_SECTION_NAME (decl) == 0
+	       || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
+			    ".sdata")
+	       || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
+			    ".sbss")))
+    {
+      int size = int_size_in_bytes (TREE_TYPE (decl));
+      char *str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+      int reloc;
+
+      /* ??? We should redeclare CTOR_LIST, DTOR_END so that we don't have to
+	 special case them here.  Currently we put them in ctor/dtors sections
+	 behind the compiler's back.  We should use section attributes
+	 instead.  */
+      if (! strcmp (str, "__CTOR_LIST__")
+	  || ! strcmp (str, "__DTOR_END__"))
+	;
+
+      /* If this is an incomplete type with size 0, then we can't put it in
+	 sdata because it might be too big when completed.  */
+      else if (size > 0 && size <= ia64_section_threshold)
+	{
+	  int len = strlen (str);
+	  char *newstr = obstack_alloc (saveable_obstack, len + 2);
+
+	  strcpy (newstr + 1, str);
+	  *newstr = SDATA_NAME_FLAG_CHAR;
+	  XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr;
+	}
+    }
+}
+
+#define def_builtin(name, type, code) \
+  builtin_function ((name), (type), (code), BUILT_IN_MD, NULL_PTR)
+
+struct builtin_description
+{
+  enum insn_code icode;
+  const char *name;
+  enum ia64_builtins code;
+  enum rtx_code comparison;
+  unsigned int flag;
+};
+
+/* All 32 bit intrinsics that take 2 arguments. */
+static struct builtin_description bdesc_2argsi[] =
+{
+  { CODE_FOR_fetch_and_add_si, "__sync_fetch_and_add_si", IA64_BUILTIN_FETCH_AND_ADD_SI, 0, 0 },
+  { CODE_FOR_fetch_and_sub_si, "__sync_fetch_and_sub_si", IA64_BUILTIN_FETCH_AND_SUB_SI, 0, 0 },
+  { CODE_FOR_fetch_and_or_si, "__sync_fetch_and_or_si", IA64_BUILTIN_FETCH_AND_OR_SI, 0, 0 },
+  { CODE_FOR_fetch_and_and_si, "__sync_fetch_and_and_si", IA64_BUILTIN_FETCH_AND_AND_SI, 0, 0 },
+  { CODE_FOR_fetch_and_xor_si, "__sync_fetch_and_xor_si", IA64_BUILTIN_FETCH_AND_XOR_SI, 0, 0 },
+  { CODE_FOR_fetch_and_nand_si, "__sync_fetch_and_nand_si", IA64_BUILTIN_FETCH_AND_NAND_SI, 0, 0 },
+  { CODE_FOR_add_and_fetch_si, "__sync_add_and_fetch_si", IA64_BUILTIN_ADD_AND_FETCH_SI, 0, 0 },
+  { CODE_FOR_sub_and_fetch_si, "__sync_sub_and_fetch_si", IA64_BUILTIN_SUB_AND_FETCH_SI, 0, 0 },
+  { CODE_FOR_or_and_fetch_si, "__sync_or_and_fetch_si", IA64_BUILTIN_OR_AND_FETCH_SI, 0, 0 },
+  { CODE_FOR_and_and_fetch_si, "__sync_and_and_fetch_si", IA64_BUILTIN_AND_AND_FETCH_SI, 0, 0 },
+  { CODE_FOR_xor_and_fetch_si, "__sync_xor_and_fetch_si", IA64_BUILTIN_XOR_AND_FETCH_SI, 0, 0 },
+  { CODE_FOR_nand_and_fetch_si, "__sync_nand_and_fetch_si", IA64_BUILTIN_NAND_AND_FETCH_SI, 0, 0 }
+};
+
+/* All 64 bit intrinsics that take 2 arguments. */
+static struct builtin_description bdesc_2argdi[] =
+{
+  { CODE_FOR_fetch_and_add_di, "__sync_fetch_and_add_di", IA64_BUILTIN_FETCH_AND_ADD_DI, 0, 0 },
+  { CODE_FOR_fetch_and_sub_di, "__sync_fetch_and_sub_di", IA64_BUILTIN_FETCH_AND_SUB_DI, 0, 0 },
+  { CODE_FOR_fetch_and_or_di, "__sync_fetch_and_or_di", IA64_BUILTIN_FETCH_AND_OR_DI, 0, 0 },
+  { CODE_FOR_fetch_and_and_di, "__sync_fetch_and_and_di", IA64_BUILTIN_FETCH_AND_AND_DI, 0, 0 },
+  { CODE_FOR_fetch_and_xor_di, "__sync_fetch_and_xor_di", IA64_BUILTIN_FETCH_AND_XOR_DI, 0, 0 },
+  { CODE_FOR_fetch_and_nand_di, "__sync_fetch_and_nand_di", IA64_BUILTIN_FETCH_AND_NAND_DI, 0, 0 },
+  { CODE_FOR_add_and_fetch_di, "__sync_add_and_fetch_di", IA64_BUILTIN_ADD_AND_FETCH_DI, 0, 0 },
+  { CODE_FOR_sub_and_fetch_di, "__sync_sub_and_fetch_di", IA64_BUILTIN_SUB_AND_FETCH_DI, 0, 0 },
+  { CODE_FOR_or_and_fetch_di, "__sync_or_and_fetch_di", IA64_BUILTIN_OR_AND_FETCH_DI, 0, 0 },
+  { CODE_FOR_and_and_fetch_di, "__sync_and_and_fetch_di", IA64_BUILTIN_AND_AND_FETCH_DI, 0, 0 },
+  { CODE_FOR_xor_and_fetch_di, "__sync_xor_and_fetch_di", IA64_BUILTIN_XOR_AND_FETCH_DI, 0, 0 },
+  { CODE_FOR_nand_and_fetch_di, "__sync_nand_and_fetch_di", IA64_BUILTIN_NAND_AND_FETCH_DI, 0, 0 }
+};
+
+void
+ia64_init_builtins ()
+{
+  int i;
+  struct builtin_description *d;
+
+  tree psi_type_node = build_pointer_type (integer_type_node);
+  tree pdi_type_node = build_pointer_type (long_integer_type_node);
+  tree endlink = tree_cons (NULL_TREE, void_type_node, NULL_TREE);
+
+
+  /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
+  tree si_ftype_psi_si_si
+    = build_function_type (integer_type_node,
+                           tree_cons (NULL_TREE, psi_type_node,
+                                      tree_cons (NULL_TREE, integer_type_node,
+                                                 tree_cons (NULL_TREE, integer_type_node,
+                                                            endlink))));
+
+  /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
+  tree di_ftype_pdi_di_di
+    = build_function_type (long_integer_type_node,
+                           tree_cons (NULL_TREE, pdi_type_node,
+                                      tree_cons (NULL_TREE, long_integer_type_node,
+                                                 tree_cons (NULL_TREE, long_integer_type_node,
+                                                            endlink))));
+  /* __sync_synchronize */
+  tree void_ftype_void
+    = build_function_type (void_type_node, endlink);
+
+  /* __sync_lock_test_and_set_si */
+  tree si_ftype_psi_si
+    = build_function_type (integer_type_node,
+                           tree_cons (NULL_TREE, psi_type_node,
+                           tree_cons (NULL_TREE, integer_type_node, endlink)));
+
+  /* __sync_lock_test_and_set_di */
+  tree di_ftype_pdi_di
+    = build_function_type (long_integer_type_node,  
+                           tree_cons (NULL_TREE, pdi_type_node,
+                           tree_cons (NULL_TREE, long_integer_type_node, endlink)));
+
+  /* __sync_lock_release_si */
+  tree void_ftype_psi
+    = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node, endlink));
+
+  /* __sync_lock_release_di */
+  tree void_ftype_pdi
+    = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node, endlink));
+
+  def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si, IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
+
+  def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di, IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
+
+  def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si, IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
+
+  def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di, IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
+
+  def_builtin ("__sync_synchronize", void_ftype_void, IA64_BUILTIN_SYNCHRONIZE);
+
+  def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si, IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
+
+  def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di, IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
+
+  def_builtin ("__sync_lock_release_si", void_ftype_psi, IA64_BUILTIN_LOCK_RELEASE_SI);
+
+  def_builtin ("__sync_lock_release_di", void_ftype_pdi, IA64_BUILTIN_LOCK_RELEASE_DI);
+
+  /* Add all builtins that are operations on two args. */
+  for (i=0, d = bdesc_2argsi; i < sizeof(bdesc_2argsi) / sizeof *d; i++, d++)
+    def_builtin (d->name, si_ftype_psi_si, d->code);
+  for (i=0, d = bdesc_2argdi; i < sizeof(bdesc_2argdi) / sizeof *d; i++, d++)
+    def_builtin (d->name, di_ftype_pdi_di, d->code);
+}
+
+/* Expand fetch_and_op intrinsics.  The basic code sequence is:
+
+     mf
+     ldsz return = [ptr];
+     tmp = return;
+     do {
+       oldval = tmp;
+       ar.ccv = tmp;
+       tmp <op>= value;
+       cmpxchgsz.acq tmp = [ptr], tmp
+       cmpxchgsz.acq tmp = [ptr], tmp
+     } while (tmp != oldval)
+*/
+void
+ia64_expand_fetch_and_op (code, mode, operands)
+     enum fetchop_code code;
+     enum machine_mode mode;
+     rtx operands[];
+{
+  rtx oldval, newlabel;
+  rtx tmp_reg = gen_rtx_REG (mode, GR_REG(0));
+  rtx mfreg = gen_rtx_MEM (BLKmode, tmp_reg);
+  RTX_UNCHANGING_P (mfreg) = 1;
+  emit_insn (gen_mf (mfreg));
+  tmp_reg = gen_reg_rtx (mode);
+  oldval = gen_reg_rtx (mode);
+
+  if (mode == SImode)
+    {
+      emit_insn (gen_movsi (operands[0], operands[1]));
+      emit_insn (gen_movsi (tmp_reg, operands[0]));
+    }
+  else
+    {
+      emit_insn (gen_movdi (operands[0], operands[1]));
+      emit_insn (gen_movdi (tmp_reg, operands[0]));
+    }
+
+  newlabel = gen_label_rtx ();
+  emit_label (newlabel);
+  if (mode == SImode)
+    {
+      emit_insn (gen_movsi (oldval, tmp_reg));
+      emit_insn (gen_ccv_restore_si (tmp_reg));
+    }
+  else
+    {
+      emit_insn (gen_movdi (oldval, tmp_reg));
+      emit_insn (gen_ccv_restore_di (tmp_reg));
+    }
+
+  /* Perform the specific operation. */
+  switch (code)
+  {
+  case IA64_ADD_OP:
+    {
+      rtx reg;
+      if (GET_CODE (operands[2]) == CONST_INT)
+	reg = gen_reg_rtx (mode);
+      else 
+        reg = operands[2];
+      if (mode == SImode)
+	{
+	  if (reg != operands[2])
+	    emit_insn (gen_movsi (reg, operands[2]));
+	  emit_insn (gen_addsi3 (tmp_reg, tmp_reg, reg));
+	}
+      else
+        {
+	  if (reg != operands[2])
+	    emit_insn (gen_movdi (reg, operands[2]));
+	  emit_insn (gen_adddi3 (tmp_reg, tmp_reg, reg));
+	}
+      break;
+    }
+
+  case IA64_SUB_OP:
+    if (mode == SImode)
+      emit_insn (gen_subsi3 (tmp_reg, tmp_reg, operands[2]));
+    else
+      emit_insn (gen_subdi3 (tmp_reg, tmp_reg, operands[2]));
+    break;
+
+  case IA64_OR_OP:
+    emit_insn (gen_iordi3 (tmp_reg, tmp_reg, operands[2]));
+    break;
+
+  case IA64_AND_OP:
+    emit_insn (gen_anddi3 (tmp_reg, tmp_reg, operands[2]));
+    break;
+
+  case IA64_XOR_OP:
+    emit_insn (gen_xordi3 (tmp_reg, tmp_reg, operands[2]));
+    break;
+
+  case IA64_NAND_OP:
+    emit_insn (gen_anddi3 (tmp_reg, tmp_reg, operands[2]));
+    if (mode == SImode)
+      emit_insn (gen_one_cmplsi2 (tmp_reg, operands[0]));
+    else
+      emit_insn (gen_one_cmpldi2 (tmp_reg, operands[0]));
+    break;
+
+  default:
+    break;
+  }
+    
+  if (mode == SImode) 
+    emit_insn (gen_cmpxchg_acq_si (tmp_reg, operands[1], tmp_reg));
+  else
+    emit_insn (gen_cmpxchg_acq_di (tmp_reg, operands[1], tmp_reg));
+
+  emit_cmp_and_jump_insns (tmp_reg, oldval, NE, 0, mode, 1, 0, newlabel);
+}
+
+/* Expand op_and_fetch intrinsics.  The basic code sequence is:
+
+     mf
+     ldsz return = [ptr];
+     do {
+       oldval = tmp;
+       ar.ccv = tmp;
+       return = tmp + value;
+       cmpxchgsz.acq tmp = [ptr], return
+     } while (tmp != oldval)
+*/
+void
+ia64_expand_op_and_fetch (code, mode, operands)
+     enum fetchop_code code;
+     enum machine_mode mode;
+     rtx operands[];
+{
+  rtx oldval, newlabel;
+  rtx tmp_reg, tmp2_reg = gen_rtx_REG (mode, GR_REG(0));
+  rtx mfreg = gen_rtx_MEM (BLKmode, tmp2_reg);
+  RTX_UNCHANGING_P (mfreg) = 1;
+
+  emit_insn (gen_mf (mfreg));
+  tmp_reg = gen_reg_rtx (mode);
+  if (mode == SImode)
+    emit_insn (gen_movsi (tmp_reg, operands[1]));
+  else
+    emit_insn (gen_movdi (tmp_reg, operands[1]));
+
+  newlabel = gen_label_rtx ();
+  emit_label (newlabel);
+  oldval = gen_reg_rtx (mode);
+  if (mode == SImode)
+    {
+      emit_insn (gen_movsi (oldval, tmp_reg));
+      emit_insn (gen_ccv_restore_si (tmp_reg));
+    }
+  else
+    {
+      emit_insn (gen_movdi (oldval, tmp_reg));
+      emit_insn (gen_ccv_restore_di (tmp_reg));
+    }
+
+  /* Perform the specific operation. */
+  switch (code)
+  {
+  case IA64_ADD_OP:
+    if (mode == SImode)
+      emit_insn (gen_addsi3 (operands[0], tmp_reg, operands[2]));
+    else
+      emit_insn (gen_adddi3 (operands[0], tmp_reg, operands[2]));
+    break;
+
+  case IA64_SUB_OP:
+    if (mode == SImode)
+      emit_insn (gen_subsi3 (operands[0], tmp_reg, operands[2]));
+    else
+      emit_insn (gen_subdi3 (operands[0], tmp_reg, operands[2]));
+    break;
+
+  case IA64_OR_OP:
+    emit_insn (gen_iordi3 (operands[0], tmp_reg, operands[2]));
+    break;
+
+  case IA64_AND_OP:
+    emit_insn (gen_anddi3 (operands[0], tmp_reg, operands[2]));
+    break;
+
+  case IA64_XOR_OP:
+    emit_insn (gen_xordi3 (operands[0], tmp_reg, operands[2]));
+    break;
+
+  case IA64_NAND_OP:
+    emit_insn (gen_anddi3 (operands[0], tmp_reg, operands[2]));
+    if (mode == SImode)
+      emit_insn (gen_one_cmplsi2 (operands[0], operands[0]));
+    else
+      emit_insn (gen_one_cmpldi2 (operands[0], operands[0]));
+    break;
+
+  default:
+    break;
+  }
+    
+  if (mode == SImode) 
+    emit_insn (gen_cmpxchg_acq_si (tmp_reg, operands[1], operands[0]));
+  else
+    emit_insn (gen_cmpxchg_acq_di (tmp_reg, operands[1], operands[0]));
+
+  emit_cmp_and_jump_insns (tmp_reg, oldval, NE, 0, mode, 1, 0, newlabel);
+}
+
+/* Expand val_ and bool_compare_and_swap.  For val_ we want:
+
+     ar.ccv = oldval
+     mf
+     cmpxchgsz.acq ret = [ptr], newval, ar.ccv
+     return ret
+
+   For bool_ it's the same except return ret == oldval.
+*/
+static rtx
+ia64_expand_compare_and_swap (icode, arglist, target, boolcode)
+     enum insn_code icode;
+     tree arglist;
+     rtx target;
+     int boolcode;
+{
+  tree arg0, arg1, arg2;
+  rtx newlabel, newlabel2, op0, op1, op2, pat;
+  enum machine_mode tmode, mode0, mode1, mode2;
+ 
+  arg0 = TREE_VALUE (arglist);
+  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+  arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+  op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+  op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+  op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
+  tmode = insn_data[icode].operand[0].mode;
+  mode0 = insn_data[icode].operand[1].mode;
+  mode1 = insn_data[icode].operand[2].mode;
+  mode2 = insn_data[icode].operand[3].mode;
+
+  op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+  RTX_UNCHANGING_P (op0) = 1;
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+  if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
+    op2 = copy_to_mode_reg (mode2, op2);
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  pat = GEN_FCN (icode) (target, op0, op1, op2);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  if (boolcode)
+    {
+      if (tmode == SImode)
+        {
+          emit_insn (gen_cmpsi (target, op1));
+          emit_insn (gen_seq (gen_lowpart (DImode, target)));
+        }
+      else
+        {
+          emit_insn (gen_cmpdi (target, op1));
+          emit_insn (gen_seq (target));
+        }
+    }
+  return target;
+}
+
+/* Expand all intrinsics that take 2 arguments. */
+static rtx
+ia64_expand_binop_builtin (icode, arglist, target)
+     enum insn_code icode;
+     tree arglist;
+     rtx target;
+{
+  rtx pat;
+  tree arg0 = TREE_VALUE (arglist);
+  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+  if (! target
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+rtx
+ia64_expand_builtin (exp, target, subtarget, mode, ignore)
+     tree exp;
+     rtx target;
+     rtx subtarget;
+     enum machine_mode mode;
+     int ignore;
+{
+  rtx op0, op1, op2, op3, pat;
+  rtx tmp_reg;
+  rtx newlabel, newlabel2;
+  tree arg0, arg1, arg2, arg3;
+  tree arglist = TREE_OPERAND (exp, 1);
+  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
+  int fcode = DECL_FUNCTION_CODE (fndecl);
+  enum machine_mode tmode, mode0, mode1, mode2, mode3;
+  enum insn_code icode;
+  int boolcode = 0;
+  int i;
+  struct builtin_description *d;
+
+  switch (fcode)
+    {
+    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
+      return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_si, arglist, target, 1);
+    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
+      return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_si, arglist, target, 0);
+    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
+      return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_di, arglist, target, 1);
+    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
+      return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_di, arglist, target, 0);
+    case IA64_BUILTIN_SYNCHRONIZE:
+      /* Pass a volatile memory operand. */
+      tmp_reg = gen_rtx_REG (DImode, GR_REG(0));
+      target = gen_rtx_MEM (BLKmode, tmp_reg);
+      emit_insn (gen_mf (target));
+      return 0;
+
+    case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
+      icode = CODE_FOR_lock_test_and_set_si;
+      arg0 = TREE_VALUE (arglist);
+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+      tmode = insn_data[icode].operand[0].mode;
+      mode0 = insn_data[icode].operand[1].mode;
+      mode1 = insn_data[icode].operand[2].mode;
+      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+      RTX_UNCHANGING_P (op0) = 1;
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+        op1 = copy_to_mode_reg (mode1, op1);
+      if (target == 0
+          || GET_MODE (target) != tmode
+          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+        target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1);
+      if (! pat)
+        return 0;
+      emit_insn (pat);
+      return target;
+
+    case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
+      icode = CODE_FOR_lock_test_and_set_di;
+      arg0 = TREE_VALUE (arglist);
+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+      tmode = insn_data[icode].operand[0].mode;
+      mode0 = insn_data[icode].operand[1].mode;
+      mode1 = insn_data[icode].operand[2].mode;
+      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+      RTX_UNCHANGING_P (op0) = 1; 
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+        op1 = copy_to_mode_reg (mode1, op1);
+      if (target == 0
+          || GET_MODE (target) != tmode
+          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+        target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1);
+      if (! pat)
+        return 0;
+      emit_insn (pat);
+      return target;
+
+    case IA64_BUILTIN_LOCK_RELEASE_SI:
+      arg0 = TREE_VALUE (arglist);
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op0 = gen_rtx_MEM (SImode, copy_to_mode_reg (Pmode, op0));
+      MEM_VOLATILE_P (op0) = 1;
+      emit_insn (gen_movsi (op0, GEN_INT(0)));
+      return 0;
+
+    case IA64_BUILTIN_LOCK_RELEASE_DI:
+      arg0 = TREE_VALUE (arglist);
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op0 = gen_rtx_MEM (DImode, copy_to_mode_reg (Pmode, op0));
+      MEM_VOLATILE_P (op0) = 1;
+      emit_insn (gen_movdi (op0, GEN_INT(0)));
+      return 0;
+
+    default:
+      break;
+    }
+
+  /* Expand all 32 bit intrinsics that take 2 arguments. */
+  for (i=0, d = bdesc_2argsi; i < sizeof (bdesc_2argsi) / sizeof *d; i++, d++)
+    if (d->code == fcode)
+      return ia64_expand_binop_builtin (d->icode, arglist, target);
+
+  /* Expand all 64 bit intrinsics that take 2 arguments. */
+  for (i=0, d = bdesc_2argdi; i < sizeof (bdesc_2argdi) / sizeof *d; i++, d++)
+    if (d->code == fcode)
+      return ia64_expand_binop_builtin (d->icode, arglist, target);
+
+  fail:
+    return 0;
+}
diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h
new file mode 100644
index 0000000..a245ebd
--- /dev/null
+++ b/gcc/config/ia64/ia64.h
@@ -0,0 +1,2795 @@
+/* Definitions of target machine GNU compiler.  IA64 version.
+   Copyright (C) 1999 Cygnus Solutions.
+   Contributed by James E. Wilson <wilson@cygnus.com> and
+   		  David Mosberger <davidm@hpl.hp.com>.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* ??? Use of the upper 32 FP registers for integer values will make context
+   switching slower, because the kernel only saves any registers past f32 if
+   it has to.  */
+
+/* ??? Look at ABI group documents for list of preprocessor macros and
+   other features required for ABI compliance.  */
+
+/* ??? Functions containing a non-local goto target save many registers.  Why?
+   See for instance execute/920428-2.c.  */
+
+/* ??? Get CAN_DEBUG_WITHOUT_FP working so that -fomit-frame-pointer is not
+   needed.  */
+
+/* ??? Add support for short data/bss sections.  */
+
+
+/* Run-time target specifications */
+
+/* Define this to be a string constant containing `-D' options to define the
+   predefined macros that identify this machine and system.  These macros will
+   be predefined unless the `-ansi' option is specified.  */
+/* ??? This is undefed in svr4.h.  */
+#define CPP_PREDEFINES "-Dia64 -Amachine(ia64)"
+
+/* This declaration should be present.  */
+extern int target_flags;
+
+/* This series of macros is to allow compiler command arguments to enable or
+   disable the use of optional features of the target machine.  */
+
+#define MASK_BIG_ENDIAN	0x00000001	/* Generate big endian code.  */
+
+#define MASK_GNU_AS	0x00000002	/* Generate code for GNU as.  */
+
+#define MASK_GNU_LD	0x00000004	/* Generate code for GNU ld.  */
+
+#define MASK_NO_PIC	0x00000008	/* Generate code without GP reg.  */
+
+#define MASK_VOL_ASM_STOP 0x00000010	/* Emit stop bits for vol ext asm.  */
+
+#define MASK_A_STEP	0x00000020	/* Emit code for Itanium A step.  */
+
+#define MASK_REG_NAMES	0x00000040	/* Use in/loc/out register names.  */
+
+#define MASK_NO_SDATA   0x00000080	/* Disable sdata/scommon/sbss.  */
+
+#define MASK_DWARF2_ASM 0x40000000	/* test dwarf2 line info via gas.  */
+
+#define TARGET_BIG_ENDIAN	(target_flags & MASK_BIG_ENDIAN)
+
+#define TARGET_GNU_AS		(target_flags & MASK_GNU_AS)
+
+#define TARGET_GNU_LD		(target_flags & MASK_GNU_LD)
+
+#define TARGET_NO_PIC		(target_flags & MASK_NO_PIC)
+
+#define TARGET_VOL_ASM_STOP	(target_flags & MASK_VOL_ASM_STOP)
+
+#define TARGET_A_STEP		(target_flags & MASK_A_STEP)
+
+#define TARGET_REG_NAMES	(target_flags & MASK_REG_NAMES)
+
+#define TARGET_NO_SDATA		(target_flags & MASK_NO_SDATA)
+
+#define TARGET_DWARF2_ASM	(target_flags & MASK_DWARF2_ASM)
+
+/* This macro defines names of command options to set and clear bits in
+   `target_flags'.  Its definition is an initializer with a subgrouping for
+   each command option.  */
+
+#define TARGET_SWITCHES \
+{									\
+  { "big-endian",	MASK_BIG_ENDIAN,				\
+      "Generate big endian code" },					\
+  { "little-endian",	-MASK_BIG_ENDIAN,				\
+      "Generate little endian code" },					\
+  { "gnu-as",		MASK_GNU_AS,					\
+      "Generate code for GNU as" },					\
+  { "no-gnu-as",	-MASK_GNU_AS,					\
+      "Generate code for Intel as" },					\
+  { "gnu-ld",		MASK_GNU_LD,					\
+      "Generate code for GNU ld" },					\
+  { "no-gnu-ld",	-MASK_GNU_LD,					\
+      "Generate code for Intel ld" },					\
+  { "no-pic",		MASK_NO_PIC,					\
+      "Generate code without GP reg" },					\
+  { "volatile-asm-stop", MASK_VOL_ASM_STOP,				\
+      "Emit stop bits before and after volatile extended asms" },	\
+  { "no-volatile-asm-stop", -MASK_VOL_ASM_STOP,				\
+      "Don't emit stop bits before and after volatile extended asms" },	\
+  { "a-step",		MASK_A_STEP,					\
+      "Emit code for Itanium (TM) processor A step"},			\
+  { "register-names",	MASK_REG_NAMES,					\
+      "Use in/loc/out register names"},					\
+  { "no-sdata",		MASK_NO_SDATA,					\
+      "Disable use of sdata/scommon/sbss"},				\
+  { "sdata",		-MASK_NO_SDATA,					\
+      "Enable use of sdata/scommon/sbss"},				\
+  { "dwarf2-asm", 	MASK_DWARF2_ASM,				\
+      "Enable Dwarf 2 line debug info via GNU as"},			\
+  { "no-dwarf2-asm", 	-MASK_DWARF2_ASM,				\
+      "Disable Dwarf 2 line debug info via GNU as"},			\
+  { "",			TARGET_DEFAULT | TARGET_CPU_DEFAULT,		\
+      NULL }								\
+}
+
+/* Default target_flags if no switches are specified  */
+
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT MASK_DWARF2_ASM
+#endif
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT 0
+#endif
+
+/* This macro is similar to `TARGET_SWITCHES' but defines names of command
+   options that have values.  Its definition is an initializer with a
+   subgrouping for each command option.  */
+
+extern const char *ia64_fixed_range_string;
+#define TARGET_OPTIONS \
+{									\
+  { "fixed-range=", 	&ia64_fixed_range_string,			\
+      "Specify range of registers to make fixed."},			\
+}
+
+/* This macro is a C statement to print on `stderr' a string describing the
+   particular machine description choice.  */
+
+#define TARGET_VERSION fprintf (stderr, " (IA-64)");
+
+/* Sometimes certain combinations of command options do not make sense on a
+   particular target machine.  You can define a macro `OVERRIDE_OPTIONS' to
+   take account of this.  This macro, if defined, is executed once just after
+   all the command options have been parsed.  */
+
+#define OVERRIDE_OPTIONS ia64_override_options ()
+
+/* Some machines may desire to change what optimizations are performed for
+   various optimization levels.  This macro, if defined, is executed once just
+   after the optimization level is determined and before the remainder of the
+   command options have been parsed.  Values set in this macro are used as the
+   default values for the other command line options.  */
+
+/* #define OPTIMIZATION_OPTIONS(LEVEL,SIZE) */
+
+/* Define this macro if debugging can be performed even without a frame
+   pointer.  If this macro is defined, GNU CC will turn on the
+   `-fomit-frame-pointer' option whenever `-O' is specified.  */
+/* ??? Need to define this.  */
+/* #define CAN_DEBUG_WITHOUT_FP */
+
+
+/* Driver configuration */
+
+/* A C string constant that tells the GNU CC driver program options to pass to
+   CPP.  It can also specify how to translate options you give to GNU CC into
+   options for GNU CC to pass to the CPP.  */
+
+/* ??? __LONG_MAX__ depends on LP64/ILP32 switch.  */
+/* ??? An alternative is to modify glimits.h to check for __LP64__ instead
+   of checked for CPU specific defines.  We could also get rid of all LONG_MAX
+   defines in other tm.h files.  */
+#define CPP_SPEC \
+  "%{mcpu=itanium:-D__itanium__} %{mbig-endian:-D__BIG_ENDIAN__}	\
+   -D__LONG_MAX__=9223372036854775807L"
+
+/* If this macro is defined, the preprocessor will not define the builtin macro
+   `__SIZE_TYPE__'.  The macro `__SIZE_TYPE__' must then be defined by
+   `CPP_SPEC' instead.
+
+   This should be defined if `SIZE_TYPE' depends on target dependent flags
+   which are not accessible to the preprocessor.  Otherwise, it should not be
+   defined.  */
+/* ??? Needs to be defined for P64 code.  */
+/* #define NO_BUILTIN_SIZE_TYPE */
+
+/* If this macro is defined, the preprocessor will not define the builtin macro
+   `__PTRDIFF_TYPE__'.  The macro `__PTRDIFF_TYPE__' must then be defined by
+   `CPP_SPEC' instead.
+
+   This should be defined if `PTRDIFF_TYPE' depends on target dependent flags
+   which are not accessible to the preprocessor.  Otherwise, it should not be
+   defined.  */
+/* ??? Needs to be defined for P64 code.  */
+/* #define NO_BUILTIN_PTRDIFF_TYPE */
+
+/* A C string constant that tells the GNU CC driver program options to pass to
+   `cc1'.  It can also specify how to translate options you give to GNU CC into
+   options for GNU CC to pass to the `cc1'.  */
+
+/* #define CC1_SPEC "" */
+
+/* A C string constant that tells the GNU CC driver program options to pass to
+   `cc1plus'.  It can also specify how to translate options you give to GNU CC
+   into options for GNU CC to pass to the `cc1plus'.  */
+
+/* #define CC1PLUS_SPEC "" */
+
+/* A C string constant that tells the GNU CC driver program options to pass to
+   the assembler.  It can also specify how to translate options you give to GNU
+   CC into options for GNU CC to pass to the assembler.  */
+
+#if ((TARGET_CPU_DEFAULT | TARGET_DEFAULT) & MASK_GNU_AS) != 0
+/* GNU AS.  */
+#define ASM_SPEC "%{mno-gnu-as:-N so}"
+#else
+/* Intel ias.  */
+#define ASM_SPEC "%{!mgnu-as:-N so}"
+#endif
+
+/* A C string constant that tells the GNU CC driver program options to pass to
+   the linker.  It can also specify how to translate options you give to GNU CC
+   into options for GNU CC to pass to the linker.  */
+
+/* The Intel linker does not support dynamic linking, so we need -dn.
+   The Intel linker gives annoying messages unless -N so is used.  */
+#if ((TARGET_CPU_DEFAULT | TARGET_DEFAULT) & MASK_GNU_LD) != 0
+/* GNU LD.  */
+#define LINK_SPEC "%{mno-gnu-ld:-dn -N so}"
+#else
+/* Intel ild.  */
+#define LINK_SPEC "%{!mgnu-ld:-dn -N so}"
+#endif
+
+
+/* Storage Layout */
+
+/* Define this macro to have the value 1 if the most significant bit in a byte
+   has the lowest number; otherwise define it to have the value zero.  */
+
+#define BITS_BIG_ENDIAN 0
+
+/* Define this macro to have the value 1 if the most significant byte in a word
+   has the lowest number.  This macro need not be a constant.  */
+
+#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+/* Define this macro to have the value 1 if, in a multiword object, the most
+   significant word has the lowest number.  */
+
+#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+/* Define this macro if WORDS_BIG_ENDIAN is not constant.  This must be a
+   constant value with the same meaning as WORDS_BIG_ENDIAN, which will be used
+   only when compiling libgcc2.c.  Typically the value will be set based on
+   preprocessor defines.  */
+#if defined(__BIG_ENDIAN__)
+#define LIBGCC2_WORDS_BIG_ENDIAN 1
+#else
+#define LIBGCC2_WORDS_BIG_ENDIAN 0
+#endif
+
+/* Define this macro to be the number of bits in an addressable storage unit
+   (byte); normally 8.  */
+#define BITS_PER_UNIT 8
+
+/* Number of bits in a word; normally 32.  */
+#define BITS_PER_WORD 64
+
+/* Number of storage units in a word; normally 4.  */
+#define UNITS_PER_WORD 8
+
+/* Width of a pointer, in bits.  You must specify a value no wider than the
+   width of `Pmode'.  If it is not equal to the width of `Pmode', you must
+   define `POINTERS_EXTEND_UNSIGNED'.  */
+/* ??? Implement optional 32 bit pointer size later?  */
+#define POINTER_SIZE 64
+
+/* A C expression whose value is nonzero if pointers that need to be extended
+   from being `POINTER_SIZE' bits wide to `Pmode' are sign-extended and zero if
+   they are zero-extended.
+
+   You need not define this macro if the `POINTER_SIZE' is equal to the width
+   of `Pmode'.  */
+/* ??? May need this for 32 bit pointers.  */
+/* #define POINTERS_EXTEND_UNSIGNED */
+
+/* A macro to update MODE and UNSIGNEDP when an object whose type is TYPE and
+   which has the specified mode and signedness is to be stored in a register.
+   This macro is only called when TYPE is a scalar type.  */
+
+/* ??? Maybe sign-extend 32 bit values like the alpha?  Or maybe zero-extend
+   because we only have zero-extending loads? */
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)				\
+do									\
+  {									\
+    if (GET_MODE_CLASS (MODE) == MODE_INT				\
+	&& GET_MODE_SIZE (MODE) < UNITS_PER_WORD)			\
+      (MODE) = DImode;							\
+  }									\
+while (0)
+
+/* Define this macro if the promotion described by `PROMOTE_MODE' should also
+   be done for outgoing function arguments.  */
+/* ??? ABI doesn't allow us to define this.  */
+/* #define PROMOTE_FUNCTION_ARGS */
+
+/* Define this macro if the promotion described by `PROMOTE_MODE' should also
+   be done for the return value of functions.
+
+   If this macro is defined, `FUNCTION_VALUE' must perform the same promotions
+   done by `PROMOTE_MODE'.  */
+/* ??? ABI doesn't allow us to define this.  */
+/* #define PROMOTE_FUNCTION_RETURN */
+
+/* Normal alignment required for function parameters on the stack, in bits.
+   All stack parameters receive at least this much alignment regardless of data
+   type.  On most machines, this is the same as the size of an integer.  */
+#define PARM_BOUNDARY 64
+
+/* Define this macro if you wish to preserve a certain alignment for the stack
+   pointer.  The definition is a C expression for the desired alignment
+   (measured in bits).  */
+
+#define STACK_BOUNDARY 128
+
+/* Align frames on double word boundaries */
+#ifndef IA64_STACK_ALIGN
+#define IA64_STACK_ALIGN(LOC) (((LOC) + 15) & ~15)
+#endif
+
+/* Alignment required for a function entry point, in bits.  */
+#define FUNCTION_BOUNDARY 128
+
+/* Biggest alignment that any data type can require on this machine,
+   in bits.  */
+/* Optional x86 80-bit float, quad-precision 128-bit float, and quad-word
+   128 bit integers all require 128 bit alignment.  */
+#define BIGGEST_ALIGNMENT 128
+
+/* If defined, a C expression to compute the alignment for a static variable.
+   TYPE is the data type, and ALIGN is the alignment that the object
+   would ordinarily have.  The value of this macro is used instead of that
+   alignment to align the object.  */
+
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* If defined, a C expression to compute the alignment given to a constant that
+   is being placed in memory.  CONSTANT is the constant and ALIGN is the
+   alignment that the object would ordinarily have.  The value of this macro is
+   used instead of that alignment to align the object.  */
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  (TREE_CODE (EXP) == STRING_CST	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* Define this macro to be the value 1 if instructions will fail to work if
+   given data not on the nominal alignment.  If instructions will merely go
+   slower in that case, define this macro as 0.  */
+#define STRICT_ALIGNMENT 1
+
+/* Define this if you wish to imitate the way many other C compilers handle
+   alignment of bitfields and the structures that contain them.
+   The behavior is that the type written for a bitfield (`int', `short', or
+   other integer type) imposes an alignment for the entire structure, as if the
+   structure really did contain an ordinary field of that type.  In addition,
+   the bitfield is placed within the structure so that it would fit within such
+   a field, not crossing a boundary for it.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Define this macro as an expression for the overall size of a structure
+   (given by STRUCT as a tree node) when the size computed from the fields is
+   SIZE and the alignment is ALIGN.
+
+   The default is to round SIZE up to a multiple of ALIGN.  */
+/* ??? Might need this for 80-bit double-extended floats.  */
+/* #define ROUND_TYPE_SIZE(STRUCT, SIZE, ALIGN) */
+
+/* Define this macro as an expression for the alignment of a structure (given
+   by STRUCT as a tree node) if the alignment computed in the usual way is
+   COMPUTED and the alignment explicitly specified was SPECIFIED.
+
+   The default is to use SPECIFIED if it is larger; otherwise, use the smaller
+   of COMPUTED and `BIGGEST_ALIGNMENT' */
+/* ??? Might need this for 80-bit double-extended floats.  */
+/* #define ROUND_TYPE_ALIGN(STRUCT, COMPUTED, SPECIFIED) */
+
+/* An integer expression for the size in bits of the largest integer machine
+   mode that should actually be used.  */
+
+/* Allow pairs of registers to be used, which is the intent of the default.  */
+#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TImode)
+
+/* A code distinguishing the floating point format of the target machine.  */
+#define TARGET_FLOAT_FORMAT IEEE_FLOAT_FORMAT
+
+/* GNU CC supports two ways of implementing C++ vtables: traditional or with
+   so-called "thunks".  The flag `-fvtable-thunk' chooses between them.  Define
+   this macro to be a C expression for the default value of that flag.  If
+   `DEFAULT_VTABLE_THUNKS' is 0, GNU CC uses the traditional implementation by
+   default.  The "thunk" implementation is more efficient (especially if you
+   have provided an implementation of `ASM_OUTPUT_MI_THUNK', but is not binary
+   compatible with code compiled using the traditional implementation.  If you
+   are writing a new ports, define `DEFAULT_VTABLE_THUNKS' to 1.
+
+   If you do not define this macro, the default for `-fvtable-thunk' is 0.  */
+#define DEFAULT_VTABLE_THUNKS 1
+
+
+/* Layout of Source Language Data Types */
+
+/* A C expression for the size in bits of the type `int' on the target machine.
+   If you don't define this, the default is one word.  */
+#define INT_TYPE_SIZE 32
+
+/* A C expression for the size in bits of the type `short' on the target
+   machine.  If you don't define this, the default is half a word.  (If this
+   would be less than one storage unit, it is rounded up to one unit.)  */
+#define SHORT_TYPE_SIZE 16
+
+/* A C expression for the size in bits of the type `long' on the target
+   machine.  If you don't define this, the default is one word.  */
+/* ??? Should be 32 for ILP32 code.  */
+#define LONG_TYPE_SIZE 64
+
+/* Maximum number for the size in bits of the type `long' on the target
+   machine.  If this is undefined, the default is `LONG_TYPE_SIZE'.  Otherwise,
+   it is the constant value that is the largest value that `LONG_TYPE_SIZE' can
+   have at run-time.  This is used in `cpp'.  */
+/* ??? Should be 64 for ILP32 code.  */
+/* #define MAX_LONG_TYPE_SIZE */
+
+/* A C expression for the size in bits of the type `long long' on the target
+   machine.  If you don't define this, the default is two words.  If you want
+   to support GNU Ada on your machine, the value of macro must be at least 64.  */
+#define LONG_LONG_TYPE_SIZE 64
+
+/* A C expression for the size in bits of the type `char' on the target
+   machine.  If you don't define this, the default is one quarter of a word.
+   (If this would be less than one storage unit, it is rounded up to one unit.)  */
+#define CHAR_TYPE_SIZE 8
+
+/* A C expression for the size in bits of the type `float' on the target
+   machine.  If you don't define this, the default is one word.  */
+#define FLOAT_TYPE_SIZE 32
+
+/* A C expression for the size in bits of the type `double' on the target
+   machine.  If you don't define this, the default is two words.  */
+#define DOUBLE_TYPE_SIZE 64
+
+/* A C expression for the size in bits of the type `long double' on the target
+   machine.  If you don't define this, the default is two words.  */
+/* ??? We have an 80 bit extended double format.  */
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+/* An expression whose value is 1 or 0, according to whether the type `char'
+   should be signed or unsigned by default.  The user can always override this
+   default with the options `-fsigned-char' and `-funsigned-char'.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* A C expression for a string describing the name of the data type to use for
+   size values.  The typedef name `size_t' is defined using the contents of the
+   string.  */
+/* ??? Needs to be defined for P64 code.  */
+/* #define SIZE_TYPE */
+
+/* A C expression for a string describing the name of the data type to use for
+   the result of subtracting two pointers.  The typedef name `ptrdiff_t' is
+   defined using the contents of the string.  See `SIZE_TYPE' above for more
+   information.  */
+/* ??? Needs to be defined for P64 code.  */
+/* #define PTRDIFF_TYPE */
+
+/* A C expression for a string describing the name of the data type to use for
+   wide characters.  The typedef name `wchar_t' is defined using the contents
+   of the string.  See `SIZE_TYPE' above for more information.  */
+/* #define WCHAR_TYPE */
+
+/* A C expression for the size in bits of the data type for wide characters.
+   This is used in `cpp', which cannot make use of `WCHAR_TYPE'.  */
+/* #define WCHAR_TYPE_SIZE */
+
+/* Maximum number for the size in bits of the data type for wide characters.
+   If this is undefined, the default is `WCHAR_TYPE_SIZE'.  Otherwise, it is
+   the constant value that is the largest value that `WCHAR_TYPE_SIZE' can have
+   at run-time.  This is used in `cpp'.  */
+/* #define MAX_WCHAR_TYPE_SIZE */
+
+/* A C constant expression for the integer value for escape sequence
+   `\a'.  */
+#define TARGET_BELL 0x7
+
+/* C constant expressions for the integer values for escape sequences
+   `\b', `\t' and `\n'.  */
+#define TARGET_BS	0x8
+#define TARGET_TAB	0x9
+#define TARGET_NEWLINE	0xa
+
+/* C constant expressions for the integer values for escape sequences
+   `\v', `\f' and `\r'.  */
+#define TARGET_VT	0xb
+#define TARGET_FF	0xc
+#define TARGET_CR	0xd
+
+
+/* Register Basics */
+
+/* Number of hardware registers known to the compiler.  
+   We have 128 general registers, 128 floating point registers, 64 predicate
+   registers, 8 branch registers, and one frame pointer register.  */
+
+/* ??? Should add ar.lc, ar.ec and probably also ar.pfs.  */
+
+#define FIRST_PSEUDO_REGISTER 330
+
+/* Ranges for the various kinds of registers.  */
+#define ADDL_REGNO_P(REGNO) ((REGNO) >= 0 && (REGNO) <= 3)
+#define GR_REGNO_P(REGNO) ((REGNO) >= 0 && (REGNO) <= 127)
+#define FR_FP_REGNO_P(REGNO) \
+  (((REGNO) >= 128 && (REGNO) <= 143) || ((REGNO) >= 152 && (REGNO) <= 223))
+#define FR_INT_REGNO_P(REGNO) \
+  (((REGNO) >= 144 && (REGNO) <= 151) || ((REGNO) >= 224 && (REGNO) <= 255))
+#define FR_REGNO_P(REGNO) ((REGNO) >= 128 && (REGNO) <= 255)
+#define PR_REGNO_P(REGNO) ((REGNO) >= 256 && (REGNO) <= 319)
+#define BR_REGNO_P(REGNO) ((REGNO) >= 320 && (REGNO) <= 327)
+#define GENERAL_REGNO_P(REGNO) \
+  (GR_REGNO_P (REGNO)							\
+   || (REGNO) == FRAME_POINTER_REGNUM					\
+   || (REGNO) == RETURN_ADDRESS_REGNUM)
+
+#define GR_REG(REGNO) ((REGNO) + 0)
+#define FR_REG(REGNO) ((REGNO) + 128)
+#define PR_REG(REGNO) ((REGNO) + 256)
+#define BR_REG(REGNO) ((REGNO) + 320)
+#define OUT_REG(REGNO) ((REGNO) + 120)
+#define IN_REG(REGNO) ((REGNO) + 112)
+#define LOC_REG(REGNO) ((REGNO) + 32)
+
+#define IN_REGNO_P(REGNO) ((REGNO) >= IN_REG (0) && (REGNO) <= IN_REG (7))
+#define LOC_REGNO_P(REGNO) ((REGNO) >= LOC_REG (0) && (REGNO) <= LOC_REG (79))
+#define OUT_REGNO_P(REGNO) ((REGNO) >= OUT_REG (0) && (REGNO) <= OUT_REG (7))
+
+/* ??? Don't really need two sets of macros.  I like this one better because
+   it is less typing.  */
+#define R_GR(REGNO) GR_REG (REGNO)
+#define R_FR(REGNO) FR_REG (REGNO)
+#define R_PR(REGNO) PR_REG (REGNO)
+#define R_BR(REGNO) BR_REG (REGNO)
+
+/* An initializer that says which registers are used for fixed purposes all
+   throughout the compiled code and are therefore not available for general
+   allocation.
+
+   r0: constant 0
+   r1: global pointer (gp)
+   r12: stack pointer (sp)
+   r13: thread pointer (tp)
+   f0: constant 0.0
+   f1: constant 1.0
+   p0: constant true
+   fp: eliminable frame pointer */   
+
+/* The last 16 stacked regs are fixed, because they are reserved for the 8
+   input and 8 output registers.  */
+
+/* ??? Must mark the next 3 stacked regs as fixed, because ia64_expand_prologue
+   assumes that three locals are available for fp, b0, and ar.pfs.  */
+
+/* ??? Should mark b0 as fixed?  */
+
+/* ??? input and output registers do not have to be marked as fixed.  */
+
+#define FIXED_REGISTERS \
+{ /* General registers.  */				\
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  /* Floating-point registers.  */			\
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  /* Predicate registers.  */				\
+  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  /* Branch registers.  */				\
+  0, 0, 0, 0, 0, 0, 0, 0,				\
+  /*FP RA*/						\
+  1, 1,							\
+ }
+
+/* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered (in
+   general) by function calls as well as for fixed registers.  This macro
+   therefore identifies the registers that are not available for general
+   allocation of values that must live across function calls.  */
+
+/* ??? If inputs are not marked as fixed, then they are not call clobbered.  */
+
+#define CALL_USED_REGISTERS \
+{ /* General registers.  */				\
+  1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  /* Floating-point registers.  */			\
+  1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  /* Predicate registers.  */				\
+  1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  /* Branch registers.  */				\
+  1, 0, 0, 0, 0, 0, 1, 1,				\
+  /*FP RA*/						\
+  1, 1,							\
+}
+
+/* Define this macro if the target machine has register windows.  This C
+   expression returns the register number as seen by the called function
+   corresponding to the register number OUT as seen by the calling function.
+   Return OUT if register number OUT is not an outbound register.  */
+
+#define INCOMING_REGNO(OUT) \
+  ((unsigned) ((OUT) - OUT_REG (0)) < 8 ? IN_REG ((OUT) - OUT_REG (0)) : (OUT))
+
+/* Define this macro if the target machine has register windows.  This C
+   expression returns the register number as seen by the calling function
+   corresponding to the register number IN as seen by the called function.
+   Return IN if register number IN is not an inbound register.  */
+
+#define OUTGOING_REGNO(IN) \
+  ((unsigned) ((IN) - IN_REG (0)) < 8 ? OUT_REG ((IN) - IN_REG (0)) : (IN))
+
+
+/* Order of allocation of registers */
+
+/* If defined, an initializer for a vector of integers, containing the numbers
+   of hard registers in the order in which GNU CC should prefer to use them
+   (from most preferred to least).
+
+   If this macro is not defined, registers are used lowest numbered first (all
+   else being equal).
+
+   One use of this macro is on machines where the highest numbered registers
+   must always be saved and the save-multiple-registers instruction supports
+   only sequences of consecutive registers.  On such machines, define
+   `REG_ALLOC_ORDER' to be an initializer that lists the highest numbered
+   allocatable register first.  */
+
+/* ??? Should the GR return value registers come before or after the rest
+   of the caller-save GRs?  */
+
+/* ??? Output registers are cheap, because they will be not be saved
+   by the register engine.  They probably should be early in the list.
+   We need to make them not fixed first though.  Similarly, input registers
+   are callee-saved (RSE) like the stacked locals.  */
+
+#define REG_ALLOC_ORDER \
+{									   \
+  /* Caller-saved general registers.  */				   \
+  R_GR (14), R_GR (15), R_GR (16), R_GR (17), 				   \
+  R_GR (18), R_GR (19), R_GR (20), R_GR (21), R_GR (22), R_GR (23), 	   \
+  R_GR (24), R_GR (25), R_GR (26), R_GR (27), R_GR (28), R_GR (29), 	   \
+  R_GR (30), R_GR (31),							   \
+  /* Caller-saved general registers, also used for return values.  */	   \
+  R_GR (8), R_GR (9), R_GR (10), R_GR (11), 				   \
+  /* addl caller-saved general registers.  */				   \
+  R_GR (2), R_GR (3),							   \
+  /* Caller-saved FP registers.  */					   \
+  R_FR (6), R_FR (7),							   \
+  /* Caller-saved FP registers, used for parameters and return values.  */ \
+  R_FR (8), R_FR (9), R_FR (10), R_FR (11), 				   \
+  R_FR (12), R_FR (13), R_FR (14), R_FR (15), 				   \
+  /* Rotating caller-saved FP registers.  */				   \
+  R_FR (32), R_FR (33), R_FR (34), R_FR (35), 				   \
+  R_FR (36), R_FR (37), R_FR (38), R_FR (39), R_FR (40), R_FR (41), 	   \
+  R_FR (42), R_FR (43), R_FR (44), R_FR (45), R_FR (46), R_FR (47), 	   \
+  R_FR (48), R_FR (49), R_FR (50), R_FR (51), R_FR (52), R_FR (53), 	   \
+  R_FR (54), R_FR (55), R_FR (56), R_FR (57), R_FR (58), R_FR (59), 	   \
+  R_FR (60), R_FR (61), R_FR (62), R_FR (63), R_FR (64), R_FR (65), 	   \
+  R_FR (66), R_FR (67), R_FR (68), R_FR (69), R_FR (70), R_FR (71), 	   \
+  R_FR (72), R_FR (73), R_FR (74), R_FR (75), R_FR (76), R_FR (77), 	   \
+  R_FR (78), R_FR (79), R_FR (80), R_FR (81), R_FR (82), R_FR (83), 	   \
+  R_FR (84), R_FR (85), R_FR (86), R_FR (87), R_FR (88), R_FR (89), 	   \
+  R_FR (90), R_FR (91), R_FR (92), R_FR (93), R_FR (94), R_FR (95), 	   \
+  R_FR (96), R_FR (97), R_FR (98), R_FR (99), R_FR (100), R_FR (101), 	   \
+  R_FR (102), R_FR (103), R_FR (104), R_FR (105), R_FR (106), R_FR (107),  \
+  R_FR (108), R_FR (109), R_FR (110), R_FR (111), R_FR (112), R_FR (113),  \
+  R_FR (114), R_FR (115), R_FR (116), R_FR (117), R_FR (118), R_FR (119),  \
+  R_FR (120), R_FR (121), R_FR (122), R_FR (123), R_FR (124), R_FR (125),  \
+  R_FR (126), R_FR (127), 						   \
+  /* Caller-saved predicate registers.  */				   \
+  R_PR (6), R_PR (7), R_PR (8), R_PR (9), R_PR (10), R_PR (11), 	   \
+  R_PR (12), R_PR (13), R_PR (14), R_PR (15),				   \
+  /* Rotating caller-saved predicate registers.  */			   \
+  R_PR (16), R_PR (17), 						   \
+  R_PR (18), R_PR (19), R_PR (20), R_PR (21), R_PR (22), R_PR (23), 	   \
+  R_PR (24), R_PR (25), R_PR (26), R_PR (27), R_PR (28), R_PR (29), 	   \
+  R_PR (30), R_PR (31), R_PR (32), R_PR (33), R_PR (34), R_PR (35), 	   \
+  R_PR (36), R_PR (37), R_PR (38), R_PR (39), R_PR (40), R_PR (41), 	   \
+  R_PR (42), R_PR (43), R_PR (44), R_PR (45), R_PR (46), R_PR (47), 	   \
+  R_PR (48), R_PR (49), R_PR (50), R_PR (51), R_PR (52), R_PR (53), 	   \
+  R_PR (54), R_PR (55), R_PR (56), R_PR (57), R_PR (58), R_PR (59), 	   \
+  R_PR (60), R_PR (61), R_PR (62), R_PR (63), 				   \
+  /* Caller-saved branch registers.  */					   \
+  R_BR (6), R_BR (7),							   \
+									   \
+  /* Stacked callee-saved general registers.  */			   \
+  R_GR (32), R_GR (33), R_GR (34), R_GR (35), 				   \
+  R_GR (36), R_GR (37), R_GR (38), R_GR (39), R_GR (40), R_GR (41), 	   \
+  R_GR (42), R_GR (43), R_GR (44), R_GR (45), R_GR (46), R_GR (47), 	   \
+  R_GR (48), R_GR (49), R_GR (50), R_GR (51), R_GR (52), R_GR (53), 	   \
+  R_GR (54), R_GR (55), R_GR (56), R_GR (57), R_GR (58), R_GR (59), 	   \
+  R_GR (60), R_GR (61), R_GR (62), R_GR (63), R_GR (64), R_GR (65), 	   \
+  R_GR (66), R_GR (67), R_GR (68), R_GR (69), R_GR (70), R_GR (71), 	   \
+  R_GR (72), R_GR (73), R_GR (74), R_GR (75), R_GR (76), R_GR (77), 	   \
+  R_GR (78), R_GR (79), R_GR (80), R_GR (81), R_GR (82), R_GR (83), 	   \
+  R_GR (84), R_GR (85), R_GR (86), R_GR (87), R_GR (88), R_GR (89), 	   \
+  R_GR (90), R_GR (91), R_GR (92), R_GR (93), R_GR (94), R_GR (95), 	   \
+  R_GR (96), R_GR (97), R_GR (98), R_GR (99), R_GR (100), R_GR (101), 	   \
+  R_GR (102), R_GR (103), R_GR (104), R_GR (105), R_GR (106), R_GR (107),  \
+  R_GR (108),								   \
+  /* Callee-saved general registers.  */				   \
+  R_GR (4), R_GR (5), R_GR (6), R_GR (7),				   \
+  /* Callee-saved FP registers.  */					   \
+  R_FR (2), R_FR (3), R_FR (4), R_FR (5), R_FR (16), R_FR (17), 	   \
+  R_FR (18), R_FR (19), R_FR (20), R_FR (21), R_FR (22), R_FR (23), 	   \
+  R_FR (24), R_FR (25), R_FR (26), R_FR (27), R_FR (28), R_FR (29), 	   \
+  R_FR (30), R_FR (31),							   \
+  /* Callee-saved predicate registers.  */				   \
+  R_PR (1), R_PR (2), R_PR (3), R_PR (4), R_PR (5), 			   \
+  /* Callee-saved branch registers.  */					   \
+  R_BR (1), R_BR (2), R_BR (3), R_BR (4), R_BR (5),			   \
+									   \
+  /* ??? Stacked registers reserved for fp, rp, and ar.pfs.  */		   \
+  R_GR (109), R_GR (110), R_GR (111),					   \
+  /* Input registers.  */						   \
+  R_GR (112), R_GR (113), R_GR (114), R_GR (115), R_GR (116), R_GR (117),  \
+  R_GR (118), R_GR (119),						   \
+  /* Output registers.  */						   \
+  R_GR (120), R_GR (121), R_GR (122), R_GR (123), R_GR (124), R_GR (125),  \
+  R_GR (126), R_GR (127), 						   \
+									   \
+  /* Special general registers.  */					   \
+  R_GR (0), R_GR (1), R_GR (12), R_GR (13), 				   \
+  /* Special FP registers.  */						   \
+  R_FR (0), R_FR (1),							   \
+  /* Special predicate registers.  */					   \
+  R_PR (0),								   \
+  /* Special branch registers.  */					   \
+  R_BR (0),								   \
+  /* Frame pointer.  Return address.  */				   \
+  FRAME_POINTER_REGNUM, RETURN_ADDRESS_REGNUM,				   \
+}
+
+
+/* How Values Fit in Registers */
+
+/* A C expression for the number of consecutive hard registers, starting at
+   register number REGNO, required to hold a value of mode MODE.  */
+
+/* ??? x86 80-bit FP values only require 1 register.  */
+/* ??? We say that CCmode values require two registers.  This allows us to
+   easily store the normal and inverted values.  If we want single register
+   predicates, we can use EXTRA_CC_MODES to give them a different mode.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+  ((MODE) == CCmode && PR_REGNO_P (REGNO) ? 2				\
+   : FR_REGNO_P (REGNO) && (MODE) == XFmode ? 1				\
+   : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* A C expression that is nonzero if it is permissible to store a value of mode
+   MODE in hard register number REGNO (or in several registers starting with
+   that one).  */
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+  (FR_FP_REGNO_P (REGNO) ? ! INTEGRAL_MODE_P (MODE)			\
+   : FR_INT_REGNO_P (REGNO) ? ! FLOAT_MODE_P (MODE)			\
+   : PR_REGNO_P (REGNO) ? (MODE) == CCmode				\
+   : 1)
+
+/* A C expression that is nonzero if it is desirable to choose register
+   allocation so as to avoid move instructions between a value of mode MODE1
+   and a value of mode MODE2.
+
+   If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, MODE2)' are
+   ever different for any R, then `MODES_TIEABLE_P (MODE1, MODE2)' must be
+   zero.  */
+/* ??? If the comments are true, then this must be zero if one mode is CCmode,
+   INTEGRAL_MODE_P or FLOAT_MODE_P and the other is not.  Otherwise, it is
+   true.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) 1
+
+/* Define this macro if the compiler should avoid copies to/from CCmode
+   registers.  You should only define this macro if support fo copying to/from
+   CCmode is incomplete.  */
+/* ??? CCmode copies are very expensive, so we might want this defined.  */
+/* #define AVOID_CCMODE_COPIES */
+
+
+/* Handling Leaf Functions */
+
+/* A C initializer for a vector, indexed by hard register number, which
+   contains 1 for a register that is allowable in a candidate for leaf function
+   treatment.  */
+/* ??? This might be useful.  */
+/* #define LEAF_REGISTERS */
+
+/* A C expression whose value is the register number to which REGNO should be
+   renumbered, when a function is treated as a leaf function.  */
+/* ??? This might be useful.  */
+/* #define LEAF_REG_REMAP(REGNO) */
+
+
+/* Register Classes */
+
+/* An enumeral type that must be defined with all the register class names as
+   enumeral values.  `NO_REGS' must be first.  `ALL_REGS' must be the last
+   register class, followed by one more enumeral value, `LIM_REG_CLASSES',
+   which is not a register class but rather tells how many classes there
+   are.  */
+/* ??? FP registers hold INT and FP values in different representations, so
+   we can't just use a subreg to convert between the two.  We get around this
+   problem by segmenting the FP register set into two parts.  One part (FR_INT)
+   only holds integer values, and one part (FR_FP) only hold FP values.  Thus
+   we always know which representation is being used.  */
+/* ??? When compiling without optimization, it is possible for the only use of
+   a pseudo to be a parameter load from the stack with a REG_EQUIV note.
+   Regclass handles this case specially and does not assign any costs to the
+   pseudo.  The pseudo then ends up using the last class before ALL_REGS.
+   Thus we must not let either PR_REGS or BR_REGS be the last class.  The
+   testcase for this is gcc.c-torture/execute/va-arg-7.c.  */
+enum reg_class
+{
+  NO_REGS,
+  PR_REGS,
+  BR_REGS,
+  ADDL_REGS,
+  GR_REGS,
+  FR_INT_REGS,
+  FR_FP_REGS,
+  FR_REGS,
+  GR_AND_FR_INT_REGS,
+  GR_AND_FR_FP_REGS,
+  GR_AND_FR_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define GENERAL_REGS GR_REGS
+
+/* The number of distinct register classes.  */
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+/* An initializer containing the names of the register classes as C string
+   constants.  These names are used in writing some of the debugging dumps.  */
+#define REG_CLASS_NAMES \
+{ "NO_REGS", "PR_REGS", "BR_REGS", "ADDL_REGS", "GR_REGS", "FR_INT_REGS", \
+  "FR_FP_REGS", "FR_REGS", "GR_AND_FR_INT_REGS", "GR_AND_FR_FP_REGS",	  \
+  "GR_AND_FR_REGS", "ALL_REGS" }
+
+/* An initializer containing the contents of the register classes, as integers
+   which are bit masks.  The Nth integer specifies the contents of class N.
+   The way the integer MASK is interpreted is that register R is in the class
+   if `MASK & (1 << R)' is 1.  */
+#define REG_CLASS_CONTENTS \
+{ 							\
+  /* NO_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x000 },			\
+  /* PR_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0xFFFFFFFF, 0xFFFFFFFF, 0x000 },			\
+  /* BR_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x0FF },			\
+  /* ADDL_REGS.  */					\
+  { 0x0000000F, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x000 },			\
+  /* GR_REGS.  */					\
+  { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x300 },			\
+  /* FR_INT_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00FF0000, 0x00000000, 0x00000000, 0xFFFFFFFF,	\
+    0x00000000, 0x00000000, 0x000 },			\
+  /* FR_FP_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0xFF00FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,	\
+    0x00000000, 0x00000000, 0x000 },			\
+  /* FR_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0x00000000, 0x00000000, 0x000 },			\
+  /* GR_AND_FR_INT_REGS.  */				\
+  { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0x00FF0000, 0x00000000, 0x00000000, 0xFFFFFFFF,	\
+    0x00000000, 0x00000000, 0x300 },			\
+  /* GR_AND_FR_FP_REGS.  */				\
+  { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0xFF00FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,	\
+    0x00000000, 0x00000000, 0x300 },			\
+  /* GR_AND_FR_REGS.  */				\
+  { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0x00000000, 0x00000000, 0x300 },			\
+  /* ALL_REGS.  */					\
+  { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0xFFFFFFFF, 0xFFFFFFFF, 0x3FF },			\
+}
+
+/* A C expression whose value is a register class containing hard register
+   REGNO.  In general there is more than one such class; choose a class which
+   is "minimal", meaning that no smaller class also contains the register.  */
+/* The NO_REGS case is primarily for the benefit of rws_access_reg, which
+   may call here with private (invalid) register numbers, such as
+   REG_VOLATILE.  */
+#define REGNO_REG_CLASS(REGNO) \
+(ADDL_REGNO_P (REGNO) ? ADDL_REGS	\
+ : GENERAL_REGNO_P (REGNO) ? GR_REGS	\
+ : FR_FP_REGNO_P (REGNO) ? FR_FP_REGS	\
+ : FR_INT_REGNO_P (REGNO) ? FR_INT_REGS	\
+ : PR_REGNO_P (REGNO) ? PR_REGS		\
+ : BR_REGNO_P (REGNO) ? BR_REGS		\
+ : NO_REGS)
+
+/* A macro whose definition is the name of the class to which a valid base
+   register must belong.  A base register is one used in an address which is
+   the register value plus a displacement.  */
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* A macro whose definition is the name of the class to which a valid index
+   register must belong.  An index register is one used in an address where its
+   value is either multiplied by a scale factor or added to another register
+   (as well as added to a displacement).  */
+#define INDEX_REG_CLASS NO_REGS
+
+/* A C expression which defines the machine-dependent operand constraint
+   letters for register classes.  If CHAR is such a letter, the value should be
+   the register class corresponding to it.  Otherwise, the value should be
+   `NO_REGS'.  The register letter `r', corresponding to class `GENERAL_REGS',
+   will not be passed to this macro; you do not need to handle it.  */
+
+#define REG_CLASS_FROM_LETTER(CHAR) \
+((CHAR) == 'f' ? FR_FP_REGS		\
+ : (CHAR) == 'e' ? FR_INT_REGS		\
+ : (CHAR) == 'a' ? ADDL_REGS		\
+ : (CHAR) == 'b' ? BR_REGS		\
+ : (CHAR) == 'c' ? PR_REGS		\
+ : NO_REGS)
+
+/* A C expression which is nonzero if register number NUM is suitable for use
+   as a base register in operand addresses.  It may be either a suitable hard
+   register or a pseudo register that has been allocated such a hard reg.  */
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+  (GENERAL_REGNO_P (REGNO) || GENERAL_REGNO_P (reg_renumber[REGNO]))
+
+/* A C expression which is nonzero if register number NUM is suitable for use
+   as an index register in operand addresses.  It may be either a suitable hard
+   register or a pseudo register that has been allocated such a hard reg.  */
+#define REGNO_OK_FOR_INDEX_P(NUM) 0
+
+/* A C expression that places additional restrictions on the register class to
+   use when it is necessary to copy value X into a register in class CLASS.
+   The value is a register class; perhaps CLASS, or perhaps another, smaller
+   class.  */
+
+#define PREFERRED_RELOAD_CLASS(X, CLASS) CLASS
+
+/* You should define this macro to indicate to the reload phase that it may
+   need to allocate at least one register for a reload in addition to the
+   register to contain the data.  Specifically, if copying X to a register
+   CLASS in MODE requires an intermediate register, you should define this
+   to return the largest register class all of whose registers can be used
+   as intermediate registers or scratch registers.  */
+
+#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X) \
+ ia64_secondary_reload_class (CLASS, MODE, X)
+
+/* Certain machines have the property that some registers cannot be copied to
+   some other registers without using memory.  Define this macro on those
+   machines to be a C expression that is non-zero if objects of mode M in
+   registers of CLASS1 can only be copied to registers of class CLASS2 by
+   storing a register of CLASS1 into memory and loading that memory location
+   into a register of CLASS2.  */
+/* ??? We may need this for XFmode moves between FR and GR regs.  Using
+   getf.sig/getf.exp almost works, but the result in the GR regs is not
+   properly formatted and has two extra bits.  */
+/* #define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, M) */
+
+/* A C expression for the maximum number of consecutive registers of
+   class CLASS needed to hold a value of mode MODE.
+   This is closely related to the macro `HARD_REGNO_NREGS'.  */
+
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+  ((MODE) == CCmode && (CLASS) == PR_REGS ? 2				\
+   : (((CLASS) == FR_REGS || (CLASS) == FR_FP_REGS			\
+       || (CLASS) == FR_INT_REGS) && (MODE) == XFmode) ? 1		\
+   : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* A C expression that defines the machine-dependent operand constraint letters
+   (`I', `J', `K', .. 'P') that specify particular ranges of integer values.  */
+
+/* 14 bit signed immediate for arithmetic instructions.  */
+#define CONST_OK_FOR_I(VALUE) \
+  ((unsigned HOST_WIDE_INT)(VALUE) + 0x2000 < 0x4000)
+/* 22 bit signed immediate for arith instructions with r0/r1/r2/r3 source.  */
+#define CONST_OK_FOR_J(VALUE) \
+  ((unsigned HOST_WIDE_INT)(VALUE) + 0x200000 < 0x400000)
+/* 8 bit signed immediate for logical instructions.  */
+#define CONST_OK_FOR_K(VALUE) ((unsigned HOST_WIDE_INT)(VALUE) + 0x80 < 0x100)
+/* 8 bit adjusted signed immediate for compare pseudo-ops.  */
+#define CONST_OK_FOR_L(VALUE) ((unsigned HOST_WIDE_INT)(VALUE) + 0x7F < 0x100)
+/* 6 bit unsigned immediate for shift counts.  */
+#define CONST_OK_FOR_M(VALUE) ((unsigned HOST_WIDE_INT)(VALUE) < 0x40)
+/* 9 bit signed immediate for load/store post-increments.  */
+/* ??? N is currently not used.  */
+#define CONST_OK_FOR_N(VALUE) ((unsigned HOST_WIDE_INT)(VALUE) + 0x100 < 0x200)
+/* 0 for r0.  Used by Linux kernel, do not change.  */
+#define CONST_OK_FOR_O(VALUE) ((VALUE) == 0)
+/* 0 or -1 for dep instruction.  */
+#define CONST_OK_FOR_P(VALUE) ((VALUE) == 0 || (VALUE) == -1)
+
+#define CONST_OK_FOR_LETTER_P(VALUE, C) \
+((C) == 'I' ? CONST_OK_FOR_I (VALUE)		\
+ : (C) == 'J' ? CONST_OK_FOR_J (VALUE)		\
+ : (C) == 'K' ? CONST_OK_FOR_K (VALUE)		\
+ : (C) == 'L' ? CONST_OK_FOR_L (VALUE)		\
+ : (C) == 'M' ? CONST_OK_FOR_M (VALUE)		\
+ : (C) == 'N' ? CONST_OK_FOR_N (VALUE)		\
+ : (C) == 'O' ? CONST_OK_FOR_O (VALUE)		\
+ : (C) == 'P' ? CONST_OK_FOR_P (VALUE)		\
+ : 0)
+
+/* A C expression that defines the machine-dependent operand constraint letters
+   (`G', `H') that specify particular ranges of `const_double' values.  */
+
+/* 0.0 and 1.0 for fr0 and fr1.  */
+#define CONST_DOUBLE_OK_FOR_G(VALUE) \
+  ((VALUE) == CONST0_RTX (GET_MODE (VALUE))	\
+   || (VALUE) == CONST1_RTX (GET_MODE (VALUE)))
+
+#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \
+  ((C) == 'G' ? CONST_DOUBLE_OK_FOR_G (VALUE) : 0)
+
+/* A C expression that defines the optional machine-dependent constraint
+   letters (`Q', `R', `S', `T', `U') that can be used to segregate specific
+   types of operands, usually memory references, for the target machine.  */
+/* ??? This might be useful considering that we have already used all of the
+   integer constant contraint letters.  */
+/* #define EXTRA_CONSTRAINT(VALUE, C) */
+
+/* Basic Stack Layout */
+
+/* Define this macro if pushing a word onto the stack moves the stack pointer
+   to a smaller address.  */
+#define STACK_GROWS_DOWNWARD 1
+
+/* Define this macro if the addresses of local variable slots are at negative
+   offsets from the frame pointer.  */
+#define FRAME_GROWS_DOWNWARD
+
+/* Offset from the frame pointer to the first local variable slot to be
+   allocated.  */
+/* ??? This leaves 16 bytes unused normally, but it looks funny to store locals
+   into the 16-byte reserved area.  */
+/* ??? This isn't very efficient use of the frame pointer.  Better would be
+   to move it down a ways, so that we have positive and negative offsets.  */
+#define STARTING_FRAME_OFFSET \
+  (current_function_pretend_args_size					\
+   ? 16 - current_function_pretend_args_size				\
+   : 0)
+
+/* Offset from the stack pointer register to the first location at which
+   outgoing arguments are placed.  If not specified, the default value of zero
+   is used.  This is the proper value for most machines.  */
+/* IA64 has a 16 byte scratch area that is at the bottom of the stack.  */
+#define STACK_POINTER_OFFSET 16
+
+/* Offset from the argument pointer register to the first argument's address.
+   On some machines it may depend on the data type of the function.  */
+#define FIRST_PARM_OFFSET(FUNDECL) 0
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame, after the
+   prologue.  */
+
+/* ??? Frames other than zero would likely require interpreting the frame
+   unwind info, so we don't try to support them.  We would also need to define
+   DYNAMIC_CHAIN_ADDRESS and SETUP_FRAME_ADDRESS (for the reg stack flush).  */
+
+/* ??? This only works for non-leaf functions.  In a leaf function, the return
+   address would be in b0 (rp).  */
+
+#define RETURN_ADDR_RTX(COUNT, FRAMEADDR) \
+  ((count == 0)								\
+   ? gen_rtx_REG (Pmode, RETURN_ADDRESS_REGNUM)				\
+   : (rtx) 0)
+
+/* A C expression whose value is RTL representing the location of the incoming
+   return address at the beginning of any function, before the prologue.  This
+   RTL is either a `REG', indicating that the return value is saved in `REG',
+   or a `MEM' representing a location in the stack.  This enables DWARF2
+   unwind info for C++ EH.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (VOIDmode, BR_REG (0))
+/* ??? This is not defined because of three problems.
+   1) dwarf2out.c assumes that DWARF_FRAME_RETURN_COLUMN fits in one byte.
+   The default value is FIRST_PSEUDO_REGISTER which doesn't.  This can be
+   worked around by setting PC_REGNUM to FR_REG (0) which is an otherwise
+   unused register number.
+   2) dwarf2out_frame_debug core dumps while processing prologue insns.  We
+   need to refine which insns have RTX_FRAME_RELATED_P set and which don't.
+   3) It isn't possible to turn off EH frame info by defining DWARF2_UNIND_INFO
+   to zero, despite what the documentation implies, because it is tested in
+   a few places with #ifdef instead of #if.  */
+#undef INCOMING_RETURN_ADDR_RTX
+
+/* A C expression whose value is an integer giving the offset, in bytes, from
+   the value of the stack pointer register to the top of the stack frame at the
+   beginning of any function, before the prologue.  The top of the frame is
+   defined to be the value of the stack pointer in the previous frame, just
+   before the call instruction.  */
+#define INCOMING_FRAME_SP_OFFSET 0
+
+
+/* Register That Address the Stack Frame.  */
+
+/* The register number of the stack pointer register, which must also be a
+   fixed register according to `FIXED_REGISTERS'.  On most machines, the
+   hardware determines which register this is.  */
+
+#define STACK_POINTER_REGNUM 12
+
+/* The register number of the frame pointer register, which is used to access
+   automatic variables in the stack frame.  On some machines, the hardware
+   determines which register this is.  On other machines, you can choose any
+   register you wish for this purpose.  */
+
+#define FRAME_POINTER_REGNUM 328
+
+/* Register number where frame pointer was saved in the prologue, or zero
+   if it was not saved.  */
+
+extern int ia64_fp_regno;
+
+/* Number of input and local registers used.  This is needed for the .regstk
+   directive, and also for debugging info.  */
+
+extern int ia64_input_regs;
+extern int ia64_local_regs;
+
+/* The register number of the arg pointer register, which is used to access the
+   function's argument list.  */
+/* r0 won't otherwise be used, so put the always eliminated argument pointer
+   in it.  */
+#define ARG_POINTER_REGNUM R_GR(0)
+
+/* The register number for the return address register.  This is modified by
+   ia64_expand_prologue to point to the real return address save register.  */
+
+#define RETURN_ADDRESS_REGNUM 329
+
+/* Register numbers used for passing a function's static chain pointer.  */
+
+#define STATIC_CHAIN_REGNUM 15
+
+
+/* Eliminating the Frame Pointer and the Arg Pointer */
+
+/* A C expression which is nonzero if a function must have and use a frame
+   pointer.  This expression is evaluated in the reload pass.  If its value is
+   nonzero the function will have a frame pointer.  */
+
+#define FRAME_POINTER_REQUIRED 0
+
+/* If defined, this macro specifies a table of register pairs used to eliminate
+   unneeded registers that point into the stack frame.  */
+
+#define ELIMINABLE_REGS							\
+{									\
+  {ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},				\
+  {ARG_POINTER_REGNUM,	 FRAME_POINTER_REGNUM},				\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}				\
+}
+
+/* A C expression that returns non-zero if the compiler is allowed to try to
+   replace register number FROM with register number TO.  There are no ia64
+   specific restrictions.  */
+
+#define CAN_ELIMINATE(FROM, TO) 1
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It specifies the
+   initial difference between the specified pair of registers.  This macro must
+   be defined if `ELIMINABLE_REGS' is defined.  */
+/* ??? I need to decide whether the frame pointer is the old frame SP
+   or the new frame SP before dynamic allocs.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+{									\
+  unsigned int size = ia64_compute_frame_size (get_frame_size ());	\
+									\
+  if ((FROM) == FRAME_POINTER_REGNUM && (TO) == STACK_POINTER_REGNUM)	\
+    (OFFSET) = size;							\
+  else if ((FROM) == ARG_POINTER_REGNUM)				\
+    {									\
+      switch (TO)							\
+	{								\
+	case FRAME_POINTER_REGNUM:					\
+	  /* Arguments start above the 16 byte save area, unless stdarg	\
+	     in which case we store through the 16 byte save area.  */	\
+	  (OFFSET) = 16 - current_function_pretend_args_size;		\
+	  break;							\
+	case STACK_POINTER_REGNUM:					\
+	  (OFFSET) = size + 16 - current_function_pretend_args_size;	\
+	  break;							\
+	default:							\
+	  abort ();							\
+	}								\
+    }									\
+  else									\
+    abort ();								\
+}
+
+
+/* Passing Function Arguments on the Stack */
+
+/* Define this macro if an argument declared in a prototype as an integral type
+   smaller than `int' should actually be passed as an `int'.  In addition to
+   avoiding errors in certain cases of mismatch, it also makes for better code
+   on certain machines.  */
+/* ??? Investigate.  */
+/* #define PROMOTE_PROTOTYPES */
+
+/* If defined, the maximum amount of space required for outgoing arguments will
+   be computed and placed into the variable
+   `current_function_outgoing_args_size'.  */
+
+#define ACCUMULATE_OUTGOING_ARGS
+
+/* A C expression that should indicate the number of bytes of its own arguments
+   that a function pops on returning, or 0 if the function pops no arguments
+   and the caller must therefore pop them all after the function returns.  */
+
+#define RETURN_POPS_ARGS(FUNDECL, FUNTYPE, STACK_SIZE) 0
+
+
+/* Function Arguments in Registers */
+
+#define MAX_ARGUMENT_SLOTS 8
+#define MAX_INT_RETURN_SLOTS 4
+#define GR_ARG_FIRST IN_REG (0)
+#define GR_RET_FIRST GR_REG (8)
+#define GR_RET_LAST  GR_REG (11)
+#define FR_ARG_FIRST FR_REG (8)
+#define FR_RET_FIRST FR_REG (8)
+#define FR_RET_LAST  FR_REG (15)
+#define AR_ARG_FIRST OUT_REG (0)
+
+/* A C expression that controls whether a function argument is passed in a
+   register, and which register.  */
+
+#define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \
+  ia64_function_arg (&CUM, MODE, TYPE, NAMED, 0)
+
+/* Define this macro if the target machine has "register windows", so that the
+   register in which a function sees an arguments is not necessarily the same
+   as the one in which the caller passed the argument.  */
+
+#define FUNCTION_INCOMING_ARG(CUM, MODE, TYPE, NAMED) \
+  ia64_function_arg (&CUM, MODE, TYPE, NAMED, 1)
+
+/* A C expression for the number of words, at the beginning of an argument,
+   must be put in registers.  The value must be zero for arguments that are
+   passed entirely in registers or that are entirely pushed on the stack.  */
+
+#define FUNCTION_ARG_PARTIAL_NREGS(CUM, MODE, TYPE, NAMED) \
+ ia64_function_arg_partial_nregs (&CUM, MODE, TYPE, NAMED)
+
+/* A C expression that indicates when an argument must be passed by reference.
+   If nonzero for an argument, a copy of that argument is made in memory and a
+   pointer to the argument is passed instead of the argument itself.  The
+   pointer is passed in whatever way is appropriate for passing a pointer to
+   that type.  */
+
+#define FUNCTION_ARG_PASS_BY_REFERENCE(CUM, MODE, TYPE, NAMED) 0
+
+/* A C type for declaring a variable that is used as the first argument of
+   `FUNCTION_ARG' and other related values.  For some target machines, the type
+   `int' suffices and can hold the number of bytes of argument so far.  */
+
+typedef struct ia64_args
+{
+  int words;			/* # words of arguments so far  */
+  int fp_regs;			/* # FR registers used so far  */
+  int prototype;		/* whether function prototyped  */
+} CUMULATIVE_ARGS;
+
+/* A C statement (sans semicolon) for initializing the variable CUM for the
+   state at the beginning of the argument list.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT) \
+do {									\
+  (CUM).words = 0;							\
+  (CUM).fp_regs = 0;							\
+  (CUM).prototype = ((FNTYPE) && TYPE_ARG_TYPES (FNTYPE)) || (LIBNAME);	\
+} while (0)
+
+/* Like `INIT_CUMULATIVE_ARGS' but overrides it for the purposes of finding the
+   arguments for the function being compiled.  If this macro is undefined,
+   `INIT_CUMULATIVE_ARGS' is used instead.  */
+
+/* We set prototype to true so that we never try to return a PARALLEL from
+   function_arg.  */
+#define INIT_CUMULATIVE_INCOMING_ARGS(CUM, FNTYPE, LIBNAME) \
+do {									\
+  (CUM).words = 0;							\
+  (CUM).fp_regs = 0;							\
+  (CUM).prototype = 1;							\
+} while (0)
+
+/* A C statement (sans semicolon) to update the summarizer variable CUM to
+   advance past an argument in the argument list.  The values MODE, TYPE and
+   NAMED describe that argument.  Once this is done, the variable CUM is
+   suitable for analyzing the *following* argument with `FUNCTION_ARG'.  */
+
+#define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED) \
+ ia64_function_arg_advance (&CUM, MODE, TYPE, NAMED)
+
+/* If defined, a C expression that gives the alignment boundary, in bits, of an
+   argument with the specified mode and type.  */
+
+/* Arguments larger than 64 bits require 128 bit alignment.  */
+
+#define FUNCTION_ARG_BOUNDARY(MODE, TYPE) \
+  (((((MODE) == BLKmode ? int_size_in_bytes (TYPE) : GET_MODE_SIZE (MODE)) \
+     + UNITS_PER_WORD - 1) / UNITS_PER_WORD) > 1 ? 128 : PARM_BOUNDARY)
+
+/* A C expression that is nonzero if REGNO is the number of a hard register in
+   which function arguments are sometimes passed.  This does *not* include
+   implicit arguments such as the static chain and the structure-value address.
+   On many machines, no registers can be used for this purpose since all
+   function arguments are pushed on the stack.  */
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+(((REGNO) >= GR_ARG_FIRST && (REGNO) < (GR_ARG_FIRST + MAX_ARGUMENT_SLOTS)) \
+ || ((REGNO) >= FR_ARG_FIRST && (REGNO) < (FR_ARG_FIRST + MAX_ARGUMENT_SLOTS)))
+
+/* Implement `va_start' for varargs and stdarg.  */
+#define EXPAND_BUILTIN_VA_START(stdarg, valist, nextarg) \
+  ia64_va_start (stdarg, valist, nextarg)
+
+/* Implement `va_arg'.  */
+#define EXPAND_BUILTIN_VA_ARG(valist, type) \
+  ia64_va_arg (valist, type)
+
+/* How Scalar Function Values are Returned */
+
+/* A C expression to create an RTX representing the place where a function
+   returns a value of data type VALTYPE.  */
+
+#define FUNCTION_VALUE(VALTYPE, FUNC) \
+  ia64_function_value (VALTYPE, FUNC)
+
+/* A C expression to create an RTX representing the place where a library
+   function returns a value of mode MODE.  */
+
+#define LIBCALL_VALUE(MODE) \
+  gen_rtx_REG (MODE,							\
+	       ((GET_MODE_CLASS (MODE) == MODE_FLOAT			\
+		 || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT)	\
+		? FR_RET_FIRST : GR_RET_FIRST))
+
+/* A C expression that is nonzero if REGNO is the number of a hard register in
+   which the values of called function may come back.  */
+
+#define FUNCTION_VALUE_REGNO_P(REGNO)				\
+  (((REGNO) >= GR_RET_FIRST && (REGNO) <= GR_RET_LAST)		\
+   || ((REGNO) >= FR_RET_FIRST && (REGNO) <= FR_RET_LAST)) 
+
+
+/* How Large Values are Returned */
+
+/* A nonzero value says to return the function value in memory, just as large
+   structures are always returned.  */
+
+#define RETURN_IN_MEMORY(TYPE) \
+  ia64_return_in_memory (TYPE)
+
+/* If you define this macro to be 0, then the conventions used for structure
+   and union return values are decided by the `RETURN_IN_MEMORY' macro.  */
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* If the structure value address is passed in a register, then
+   `STRUCT_VALUE_REGNUM' should be the number of that register.  */
+
+#define STRUCT_VALUE_REGNUM GR_REG (8)
+
+
+/* Caller-Saves Register Allocation */
+
+/* A C expression to determine whether it is worthwhile to consider placing a
+   pseudo-register in a call-clobbered hard register and saving and restoring
+   it around each function call.  The expression should be 1 when this is worth
+   doing, and 0 otherwise.
+
+   If you don't define this macro, a default is used which is good on most
+   machines: `4 * CALLS < REFS'.  */
+/* ??? Investigate.  */
+/* #define CALLER_SAVE_PROFITABLE(REFS, CALLS) */
+
+
+/* Function Entry and Exit */
+
+/* A C compound statement that outputs the assembler code for entry to a
+   function.  */
+
+#define FUNCTION_PROLOGUE(FILE, SIZE) \
+  ia64_function_prologue (FILE, SIZE)
+
+/* Define this macro as a C expression that is nonzero if the return
+   instruction or the function epilogue ignores the value of the stack pointer;
+   in other words, if it is safe to delete an instruction to adjust the stack
+   pointer before a return from the function.  */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Define this macro as a C expression that is nonzero for registers
+   used by the epilogue or the `return' pattern.  */
+
+#define EPILOGUE_USES(REGNO) ia64_epilogue_uses (REGNO)
+
+/* A C compound statement that outputs the assembler code for exit from a
+   function.  */
+
+#define FUNCTION_EPILOGUE(FILE, SIZE) \
+  ia64_function_epilogue (FILE, SIZE)
+
+/* A C compound statement that outputs the assembler code for a thunk function,
+   used to implement C++ virtual function calls with multiple inheritance.  */
+
+/* ??? This only supports deltas up to 14 bits.  If we need more, then we
+   must load the delta into a register first.  */
+
+#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION) \
+do {									\
+  fprintf (FILE, "\tadd r32 = %d, r32\n", (DELTA));			\
+  fprintf (FILE, "\tbr ");						\
+  assemble_name (FILE, XSTR (XEXP (DECL_RTL (FUNCTION), 0), 0));	\
+  fprintf (FILE, "\n");							\
+} while (0)
+
+
+/* Generating Code for Profiling.  */
+
+/* A C statement or compound statement to output to FILE some assembler code to
+   call the profiling subroutine `mcount'.  */
+
+/* ??? Unclear if this will actually work.  No way to test this currently.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+do {									\
+  char buf[20];								\
+  ASM_GENERATE_INTERNAL_LABEL (buf, "LP", LABELNO);			\
+  fputs ("\taddl r16 = @ltoff(", FILE);					\
+  assemble_name (FILE, buf);						\
+  fputs ("), gp\n", FILE);						\
+  fputs ("\tmov r17 = r1;;\n", FILE);					\
+  fputs ("\tld8 out0 = [r16]\n", FILE);					\
+  fputs ("\tmov r18 = b0\n", FILE);					\
+  fputs ("\tbr.call.sptk.many rp = mcount;;\n", FILE);			\
+  fputs ("\tmov b0 = r18\n", FILE);					\
+  fputs ("\tmov r1 = r17;;\n", FILE);					\
+} while (0)
+
+/* A C statement or compound statement to output to FILE some assembler code to
+   initialize basic-block profiling for the current object module.  */
+
+/* ??? Unclear if this will actually work.  No way to test this currently.  */
+
+#define FUNCTION_BLOCK_PROFILER(FILE, LABELNO) \
+do {									\
+  int labelno = LABELNO;						\
+  switch (profile_block_flag)						\
+    {									\
+    case 2:								\
+      fputs ("\taddl r16 = @ltoff(LPBX0), gp\n", FILE);			\
+      fprintf (FILE, "\tmov out1 = %d;;\n", labelno);			\
+      fputs ("\tld8 out0 = [r16]\n", FILE);				\
+      fputs ("\tmov r17 = r1\n", FILE);					\
+      fputs ("\tmov r18 = b0\n", FILE);					\
+      fputs ("\tbr.call.sptk.many rp = __bb_init_trace_func;;\n", FILE);\
+      fputs ("\tmov r1 = r17\n", FILE);					\
+      fputs ("\tmov b0 = r18;;\n", FILE);				\
+      break;								\
+    default:								\
+      fputs ("\taddl r16 = @ltoff(LPBX0), gp;;\n", FILE);		\
+      fputs ("\tld8 out0 = [r16];;\n", FILE);				\
+      fputs ("\tld8 r17 = [out0];;\n", FILE);				\
+      fputs ("\tcmp.eq p6, p0 = r0, r17;;\n", FILE);			\
+      fputs ("(p6)\tmov r16 = r1\n", FILE);				\
+      fputs ("(p6)\tmov r17 = b0\n", FILE);				\
+      fputs ("(p6)\tbr.call.sptk.many rp = __bb_init_func;;\n", FILE);	\
+      fputs ("(p6)\tmov r1 = r16\n", FILE);				\
+      fputs ("(p6)\tmov b0 = r17;;\n", FILE);				\
+      break;								\
+    }									\
+} while (0)
+
+/* A C statement or compound statement to output to FILE some assembler code to
+   increment the count associated with the basic block number BLOCKNO.  */
+
+/* ??? This can't work unless we mark some registers as fixed, so that we
+   can use them as temporaries in this macro.  We need two registers for -a
+   profiling and 4 registers for -ax profiling.  */
+
+#define BLOCK_PROFILER(FILE, BLOCKNO) \
+do {									\
+  int blockn = BLOCKNO;							\
+  switch (profile_block_flag)						\
+    {									\
+    case 2:								\
+      fputs ("\taddl r2 = @ltoff(__bb), gp\n", FILE);			\
+      fputs ("\taddl r3 = @ltoff(LPBX0), gp;;\n", FILE);		\
+      fprintf (FILE, "\tmov r9 = %d\n", blockn);			\
+      fputs ("\tld8 r2 = [r2]\n", FILE);				\
+      fputs ("\tld8 r3 = [r3];;\n", FILE);				\
+      fputs ("\tadd r8 = 8, r2\n", FILE);				\
+      fputs ("\tst8 [r2] = r9;;\n", FILE);				\
+      fputs ("\tst8 [r8] = r3\n", FILE);				\
+      fputs ("\tbr.call.sptk.many rp = __bb_trace_func\n", FILE);	\
+      break;								\
+									\
+    default:								\
+      fputs ("\taddl r2 = @ltoff(LPBX2), gp;;\n", FILE);		\
+      fputs ("\tld8 r2 = [r2];;\n", FILE);				\
+      fprintf (FILE, "\taddl r2 = %d, r2;;\n", 8 * blockn);		\
+      fputs ("\tld8 r3 = [r2];;\n", FILE);				\
+      fputs ("\tadd r3 = 1, r3;;\n", FILE);				\
+      fputs ("\tst8 [r2] = r3;;\n", FILE);				\
+      break;								\
+    }									\
+} while(0)
+
+/* A C statement or compound statement to output to FILE assembler
+   code to call function `__bb_trace_ret'.  */
+
+/* ??? Unclear if this will actually work.  No way to test this currently.  */
+
+/* ??? This needs to be emitted into the epilogue.  Perhaps rewrite to emit
+   rtl and call from ia64_expand_epilogue?  */
+
+#define FUNCTION_BLOCK_PROFILER_EXIT(FILE) \
+  fputs ("\tbr.call.sptk.many rp = __bb_trace_ret\n", FILE);
+#undef FUNCTION_BLOCK_PROFILER_EXIT
+
+/* A C statement or compound statement to save all registers, which may be
+   clobbered by a function call, including condition codes.  */
+
+/* ??? We would have to save 20 GRs, 106 FRs, 10 PRs, 2 BRs, and possibly
+   other things.  This is not practical.  Perhaps leave this feature (-ax)
+   unsupported by undefining above macros?  */
+
+/* #define MACHINE_STATE_SAVE(ID) */
+
+/* A C statement or compound statement to restore all registers, including
+   condition codes, saved by `MACHINE_STATE_SAVE'.  */
+
+/* ??? We would have to restore 20 GRs, 106 FRs, 10 PRs, 2 BRs, and possibly
+   other things.  This is not practical.  Perhaps leave this feature (-ax)
+   unsupported by undefining above macros?  */
+
+/* #define MACHINE_STATE_RESTORE(ID) */
+
+
+/* Implementing the Varargs Macros.  */
+
+/* Define this macro to store the anonymous register arguments into the stack
+   so that all the arguments appear to have been passed consecutively on the
+   stack.  */
+
+#define SETUP_INCOMING_VARARGS(ARGS_SO_FAR, MODE, TYPE, PRETEND_ARGS_SIZE, SECOND_TIME) \
+    ia64_setup_incoming_varargs (ARGS_SO_FAR, MODE, TYPE, & PRETEND_ARGS_SIZE, SECOND_TIME)
+
+/* Define this macro if the location where a function argument is passed
+   depends on whether or not it is a named argument.  */
+
+#define STRICT_ARGUMENT_NAMING  1
+
+
+/* Trampolines for Nested Functions.  */
+
+/* We need 32 bytes, so we can save the sp, ar.rnat, ar.bsp, and ar.pfs of
+   the function containing a non-local goto target.  */
+
+#define STACK_SAVEAREA_MODE(LEVEL) \
+  ((LEVEL) == SAVE_NONLOCAL ? OImode : Pmode)
+
+/* Output assembler code for a block containing the constant parts of
+   a trampoline, leaving space for the variable parts.
+
+   The trampoline should set the static chain pointer to value placed
+   into the trampoline and should branch to the specified routine.  The
+   gp doesn't have to be set since that is already done by the caller
+   of the trampoline.  To make the normal indirect-subroutine calling
+   convention work, the trampoline must look like a function descriptor.
+   That is, the first word must be the target address, the second
+   word must be the target's global pointer.  The complete trampoline
+   has the following form:
+
+		+----------------+ \
+	TRAMP:	| TRAMP+32     	 | |
+		+----------------+  > fake function descriptor
+		|    gp		 | |
+		+----------------+ /
+		| target addr	 |
+		+----------------+
+		| static link	 |
+		+----------------+
+		| mov r2=ip	 |
+		+		 +
+		| ;;		 |
+		+----------------+
+		| adds r4=-16,r2 |
+		+ adds r15=-8,r2 +
+		| ;;		 |
+		+----------------+
+		| ld8 r4=[r4];;	 |
+		+ ld8 r15=[r15]	 +
+		| mov b6=r4;;	 |
+		+----------------+
+		| br b6		 |
+		+----------------+
+*/
+
+/* ??? Need a version of this and INITIALIZE_TRAMPOLINE for -mno-pic.  */
+
+#define TRAMPOLINE_TEMPLATE(FILE)					\
+{									\
+  fprintf (FILE,							\
+	   "\tdata8 0,0,0,0\n"						\
+	   "\t{ mov r2=ip }\n"						\
+	   "\t;;\n"							\
+	   "\t{ adds r4=-16,r2; adds r%d=-8,r2 }\n"			\
+	   "\t;;\n"							\
+	   "\t{ ld8 r4=[r4];; ld8 r%d=[r%d]; mov b6=r4 }\n"		\
+	   "\t;;\n"							\
+	   "\t{ br b6 }\n"						\
+	   "\t;;\n",							\
+	   STATIC_CHAIN_REGNUM, STATIC_CHAIN_REGNUM,			\
+	   STATIC_CHAIN_REGNUM);					\
+}
+
+/* The name of a subroutine to switch to the section in which the trampoline
+   template is to be placed.
+
+   On ia64, instructions may only be placed in a text segment.  */
+
+#define TRAMPOLINE_SECTION	text_section
+
+/* A C expression for the size in bytes of the trampoline, as an integer.  */
+
+#define TRAMPOLINE_SIZE		96
+
+/* Alignment required for trampolines, in bits.  */
+
+#define TRAMPOLINE_ALIGNMENT	256
+
+/* A C statement to initialize the variable parts of a trampoline.  */
+
+#define INITIALIZE_TRAMPOLINE(ADDR, FNADDR, STATIC_CHAIN) \
+{									\
+  rtx addr, addr2, addr_reg, fdesc_addr;				\
+									\
+  /* Load function descriptor address into a pseudo.  */		\
+  fdesc_addr = gen_reg_rtx (DImode);					\
+  emit_move_insn (fdesc_addr, FNADDR);				     	\
+									\
+  /* Read target address from function descriptor and store in		\
+     trampoline.  */							\
+  addr = memory_address (Pmode, plus_constant (ADDR, 16));		\
+  emit_move_insn (gen_rtx_MEM (Pmode, addr),				\
+		  gen_rtx_MEM (Pmode, fdesc_addr));			\
+  /* Store static chain in trampoline.  */				\
+  addr = memory_address (Pmode, plus_constant (ADDR, 24));		\
+  emit_move_insn (gen_rtx_MEM (Pmode, addr), STATIC_CHAIN);		\
+									\
+  /* Load GP value from function descriptor and store in trampoline.  */\
+  addr = memory_address (Pmode, plus_constant (ADDR, 8));		\
+  addr2 = memory_address (Pmode, plus_constant (fdesc_addr, 8));	\
+  emit_move_insn (gen_rtx_MEM (Pmode, addr),				\
+		  gen_rtx_MEM (Pmode, addr2));				\
+									\
+  /* Store trampoline entry address in trampoline.  */			\
+  addr = memory_address (Pmode, ADDR);					\
+  addr2 = memory_address (Pmode, plus_constant (ADDR, 32));		\
+  emit_move_insn (gen_rtx_MEM (Pmode, addr), addr2);			\
+									\
+  /* Flush the relevant 64 bytes from the i-cache.  */			\
+  addr_reg = force_reg (DImode, plus_constant (ADDR, 0));		\
+  emit_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode,				\
+				      gen_rtvec (1, addr_reg), 3));	\
+}
+
+
+/* Implicit Calls to Library Routines */
+
+/* ??? The ia64 linux kernel requires that we use the standard names for
+   divide and modulo routines.  However, if we aren't careful, lib1funcs.asm
+   will be overridden by libgcc2.c.  We avoid this by using different names
+   for lib1funcs.asm modules, e.g. __divdi3 vs _divdi3.  Since lib1funcs.asm
+   goes into libgcc.a first, the linker will find it first.  */
+
+/* Define this macro as a C statement that declares additional library routines
+   renames existing ones.  */
+
+/* ??? Disable the SImode divide routines for now.  */
+#define INIT_TARGET_OPTABS \
+do {									\
+  sdiv_optab->handlers[(int) SImode].libfunc = 0;			\
+  udiv_optab->handlers[(int) SImode].libfunc = 0;			\
+  smod_optab->handlers[(int) SImode].libfunc = 0;			\
+  umod_optab->handlers[(int) SImode].libfunc = 0;			\
+} while (0)
+
+/* Define this macro if GNU CC should generate calls to the System V (and ANSI
+   C) library functions `memcpy' and `memset' rather than the BSD functions
+   `bcopy' and `bzero'.  */
+
+#define TARGET_MEM_FUNCTIONS
+
+
+/* Addressing Modes */
+
+/* Define this macro if the machine supports post-increment addressing.  */
+
+#define HAVE_POST_INCREMENT 1
+#define HAVE_POST_DECREMENT 1
+
+/* A C expression that is 1 if the RTX X is a constant which is a valid
+   address.  */
+
+#define CONSTANT_ADDRESS_P(X) 0
+
+/* The max number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 1
+
+/* A C compound statement with a conditional `goto LABEL;' executed if X (an
+   RTX) is a legitimate memory address on the target machine for a memory
+   operand of mode MODE.  */
+
+/* ??? IA64 post increment addressing mode is much more powerful than this.  */
+
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL) 			\
+do {									\
+  if (GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X))			\
+    goto LABEL;								\
+  else if (GET_CODE (X) == SUBREG && GET_CODE (XEXP (X, 0)) == REG	\
+	   && REG_OK_FOR_BASE_P (XEXP (X, 0)))				\
+    goto LABEL;								\
+  else if (GET_CODE (X) == POST_INC || GET_CODE (X) == POST_DEC)	\
+    {									\
+      if (GET_CODE (XEXP (X, 0)) == REG					\
+	  && REG_OK_FOR_BASE_P (XEXP (X, 0)))				\
+	goto LABEL;							\
+      else if (GET_CODE (XEXP (X, 0)) == SUBREG				\
+	       && GET_CODE (XEXP (XEXP (X, 0), 0)) == REG		\
+	       && REG_OK_FOR_BASE_P (XEXP (XEXP (X, 0), 0)))		\
+	goto LABEL;							\
+    }									\
+} while (0)
+
+/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for
+   use as a base register.  */
+
+#ifdef REG_OK_STRICT
+#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+#else
+#define REG_OK_FOR_BASE_P(X) \
+  (GENERAL_REGNO_P (REGNO (X)) || (REGNO (X) >= FIRST_PSEUDO_REGISTER))
+#endif
+
+/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for
+   use as an index register.  */
+
+#define REG_OK_FOR_INDEX_P(X) 0
+
+/* A C compound statement that attempts to replace X with a valid memory
+   address for an operand of mode MODE.
+
+   This must be present, but there is nothing useful to be done here.  */
+
+#define LEGITIMIZE_ADDRESS(X, OLDX, MODE, WIN)
+
+/* A C statement or compound statement with a conditional `goto LABEL;'
+   executed if memory address X (an RTX) can have different meanings depending
+   on the machine mode of the memory reference it is used for or if the address
+   is valid for some modes but not others.  */
+
+/* ??? Strictly speaking this isn't true, because we can use any increment with
+   any mode.  Unfortunately, the RTL implies that the increment depends on the
+   mode, so we need this for now.  */
+
+#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR, LABEL) \
+  if (GET_CODE (ADDR) == POST_DEC || GET_CODE (ADDR) == POST_INC)	\
+    goto LABEL;
+
+/* A C expression that is nonzero if X is a legitimate constant for an
+   immediate operand on the target machine.  */
+
+#define LEGITIMATE_CONSTANT_P(X) \
+  (GET_CODE (X) != CONST_DOUBLE || GET_MODE (X) == VOIDmode	\
+   || GET_MODE (X) == DImode || CONST_DOUBLE_OK_FOR_G (X))	\
+
+
+/* Condition Code Status */
+
+/* One some machines not all possible comparisons are defined, but you can
+   convert an invalid comparison into a valid one.  */
+/* ??? Investigate.  See the alpha definition.  */
+/* #define CANONICALIZE_COMPARISON(CODE, OP0, OP1) */
+
+
+/* Describing Relative Costs of Operations */
+
+/* A part of a C `switch' statement that describes the relative costs of
+   constant RTL expressions.  */
+
+/* ??? This is incomplete.  */
+
+#define CONST_COSTS(X, CODE, OUTER_CODE) \
+  case CONST_INT:							\
+    if ((X) == const0_rtx)						\
+      return 0;								\
+  case CONST_DOUBLE:							\
+  case CONST:								\
+  case SYMBOL_REF:							\
+  case LABEL_REF:							\
+    return COSTS_N_INSNS (1);
+
+/* Like `CONST_COSTS' but applies to nonconstant RTL expressions.  */
+
+/* ??? Should define this to get better optimized code.  */
+
+/* We make divide expensive, so that divide-by-constant will be optimized to
+   a multiply.  */
+
+#define RTX_COSTS(X, CODE, OUTER_CODE) \
+  case DIV:								\
+  case UDIV:								\
+  case MOD:								\
+  case UMOD:								\
+    return COSTS_N_INSNS (20);
+
+/* An expression giving the cost of an addressing mode that contains ADDRESS.
+   If not defined, the cost is computed from the ADDRESS expression and the
+   `CONST_COSTS' values.  */
+
+#define ADDRESS_COST(ADDRESS) 0
+
+/* A C expression for the cost of moving data from a register in class FROM to
+   one in class TO.  */
+
+#define REGISTER_MOVE_COST(FROM, TO) \
+((FROM) == BR_REGS && (TO) == BR_REGS ? 8				\
+ : ((FROM) == BR_REGS && (TO) != GENERAL_REGS				\
+    || (TO) == BR_REGS && (FROM) != GENERAL_REGS) ? 6			\
+ : ((FROM) == FR_FP_REGS && (TO) == FR_INT_REGS				\
+    || (FROM) == FR_INT_REGS && (TO) == FR_FP_REGS) ? 4			\
+ : 2)
+
+/* A C expression for the cost of moving data of mode M between a register and
+   memory.  */
+/* ??? Investigate.  Might get better code by defining this.  */
+/* #define MEMORY_MOVE_COST(M,C,I) */
+
+/* A C expression for the cost of a branch instruction.  A value of 1 is the
+   default; other values are interpreted relative to that.  */
+/* ??? Investigate.  Might get better code by defining this.  */
+/* #define BRANCH_COST */
+
+/* Define this macro as a C expression which is nonzero if accessing less than
+   a word of memory (i.e. a `char' or a `short') is no faster than accessing a
+   word of memory.  */
+
+#define SLOW_BYTE_ACCESS 1
+
+/* Define this macro if it is as good or better to call a constant function
+   address than to call an address kept in a register.
+
+   Indirect function calls are more expensive that direct function calls, so
+   don't cse function addresses.  */
+
+#define NO_FUNCTION_CSE
+
+/* A C statement (sans semicolon) to update the integer variable COST based on
+   the relationship between INSN that is dependent on DEP_INSN through the
+   dependence LINK.  */
+
+/* ??? Investigate.  */
+/* #define ADJUST_COST(INSN, LINK, DEP_INSN, COST) */
+
+/* A C statement (sans semicolon) to update the integer scheduling
+   priority `INSN_PRIORITY(INSN)'.  */
+
+/* ??? Investigate.  */
+/* #define ADJUST_PRIORITY (INSN) */
+
+
+/* Dividing the output into sections.  */
+
+/* A C expression whose value is a string containing the assembler operation
+   that should precede instructions and read-only data.  */
+
+#define TEXT_SECTION_ASM_OP ".text"
+
+/* A C expression whose value is a string containing the assembler operation to
+   identify the following data as writable initialized data.  */
+
+#define DATA_SECTION_ASM_OP ".data"
+
+/* If defined, a C expression whose value is a string containing the assembler
+   operation to identify the following data as uninitialized global data.  */
+
+#define BSS_SECTION_ASM_OP ".bss"
+
+/* Define this macro if jump tables (for `tablejump' insns) should be output in
+   the text section, along with the assembler instructions.  */
+
+/* ??? It is probably better for the jump tables to be in the rodata section,
+   which is where they go by default.  Unfortunately, that currently does not
+   work, because of some problem with pcrelative relocations not getting
+   resolved correctly.  */
+/* ??? FIXME ??? rth says that we should use @gprel to solve this problem.  */
+/* ??? If jump tables are in the text section, then we can use 4 byte
+   entries instead of 8 byte entries.  */
+
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* Define this macro if references to a symbol must be treated differently
+   depending on something about the variable or function named by the symbol
+   (such as what section it is in).  */
+
+#define ENCODE_SECTION_INFO(DECL) ia64_encode_section_info (DECL)
+
+#define SDATA_NAME_FLAG_CHAR '@'
+
+#define IA64_DEFAULT_GVALUE 8
+
+/* Decode SYM_NAME and store the real name part in VAR, sans the characters
+   that encode section info.  */
+
+#define STRIP_NAME_ENCODING(VAR, SYMBOL_NAME) \
+  (VAR) = (SYMBOL_NAME) + ((SYMBOL_NAME)[0] == SDATA_NAME_FLAG_CHAR)
+
+
+/* Position Independent Code.  */
+
+/* The register number of the register used to address a table of static data
+   addresses in memory.  */
+
+/* ??? Should modify ia64.md to use pic_offset_table_rtx instead of
+   gen_rtx_REG (DImode, 1).  */
+
+/* ??? Should we set flag_pic?  Probably need to define
+   LEGITIMIZE_PIC_OPERAND_P to make that work.  */
+
+#define PIC_OFFSET_TABLE_REGNUM GR_REG (1)
+
+/* Define this macro if the register defined by `PIC_OFFSET_TABLE_REGNUM' is
+   clobbered by calls.  */
+
+#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED
+
+
+/* The Overall Framework of an Assembler File.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at the
+   end of the line.  */
+
+#define ASM_COMMENT_START "//"
+
+/* A C string constant for text to be output before each `asm' statement or
+   group of consecutive ones.  */
+
+/* ??? This won't work with the Intel assembler, because it does not accept
+   # as a comment start character.  However, //APP does not work in gas, so we
+   can't use that either.  Same problem for ASM_APP_OFF below.  */
+
+#define ASM_APP_ON "#APP\n"
+
+/* A C string constant for text to be output after each `asm' statement or
+   group of consecutive ones.  */
+
+#define ASM_APP_OFF "#NO_APP\n"
+
+
+/* Output of Data.  */
+
+/* A C statement to output to the stdio stream STREAM an assembler instruction
+   to assemble a floating-point constant of `XFmode', `DFmode', `SFmode',
+   respectively, whose value is VALUE.  */
+
+/* ??? This has not been tested.  Long doubles are really 10 bytes not 12
+   bytes on ia64.  */
+
+/* ??? Must reverse the word order for big-endian code?  */
+
+#define ASM_OUTPUT_LONG_DOUBLE(FILE, VALUE) \
+do {									\
+  long t[3];								\
+  REAL_VALUE_TO_TARGET_LONG_DOUBLE (VALUE, t);				\
+  fprintf (FILE, "\tdata8 0x%08lx, 0x%08lx, 0x%08lx\n",			\
+	   t[0] & 0xffffffff, t[1] & 0xffffffff, t[2] & 0xffffffff);	\
+} while (0)
+
+/* ??? Must reverse the word order for big-endian code?  */
+
+#define ASM_OUTPUT_DOUBLE(FILE,VALUE)				\
+do {								\
+  long t[2];							\
+  REAL_VALUE_TO_TARGET_DOUBLE (VALUE, t);			\
+  fprintf (FILE, "\tdata8 0x%08lx%08lx\n",			\
+	   t[1] & 0xffffffff, t[0] & 0xffffffff);		\
+} while (0)
+
+#define ASM_OUTPUT_FLOAT(FILE,VALUE)				\
+  do {								\
+    long t;							\
+    REAL_VALUE_TO_TARGET_SINGLE (VALUE, t);			\
+    fprintf (FILE, "\tdata4 0x%lx\n", t & 0xffffffff);		\
+} while (0)
+  
+/* A C statement to output to the stdio stream STREAM an assembler instruction
+   to assemble an integer of 1, 2, 4, or 8 bytes, respectively, whose value
+   is VALUE.  */
+
+/* This is how to output an assembler line defining a `char' constant.  */
+
+#define ASM_OUTPUT_CHAR(FILE, VALUE)					\
+do {									\
+  fprintf (FILE, "\t%s\t", ASM_BYTE_OP);				\
+  output_addr_const (FILE, (VALUE));					\
+  fprintf (FILE, "\n");							\
+} while (0)
+
+/* This is how to output an assembler line defining a `short' constant.  */
+
+#define ASM_OUTPUT_SHORT(FILE, VALUE)					\
+do {									\
+  fprintf (FILE, "\tdata2\t");						\
+  output_addr_const (FILE, (VALUE));					\
+  fprintf (FILE, "\n");							\
+} while (0)
+
+/* This is how to output an assembler line defining an `int' constant.
+   We also handle symbol output here.  */
+
+/* ??? For ILP32, also need to handle function addresses here.  */
+
+#define ASM_OUTPUT_INT(FILE, VALUE)					\
+do {									\
+  fprintf (FILE, "\tdata4\t");						\
+  output_addr_const (FILE, (VALUE));					\
+  fprintf (FILE, "\n");							\
+} while (0)
+
+/* This is how to output an assembler line defining a `long' constant.
+   We also handle symbol output here.  */
+
+#define ASM_OUTPUT_DOUBLE_INT(FILE, VALUE)				\
+do {									\
+  fprintf (FILE, "\tdata8\t");						\
+  if (SYMBOL_REF_FLAG (VALUE))						\
+    fprintf (FILE, "@fptr(");						\
+  output_addr_const (FILE, (VALUE));					\
+  if (SYMBOL_REF_FLAG (VALUE))						\
+    fprintf (FILE, ")");						\
+  fprintf (FILE, "\n");							\
+} while (0)
+
+/* A C statement to output to the stdio stream STREAM an assembler instruction
+   to assemble a single byte containing the number VALUE.  */
+
+#define ASM_OUTPUT_BYTE(STREAM, VALUE) \
+  fprintf (STREAM, "\t%s\t0x%x\n", ASM_BYTE_OP, (VALUE))
+
+/* These macros are defined as C string constant, describing the syntax in the
+   assembler for grouping arithmetic expressions.  */
+
+#define ASM_OPEN_PAREN "("
+#define ASM_CLOSE_PAREN ")"
+
+
+/* Output of Uninitialized Variables.  */
+
+/* This is all handled by svr4.h.  */
+
+
+/* Output and Generation of Labels.  */
+
+/* A C statement (sans semicolon) to output to the stdio stream STREAM the
+   assembler definition of a label named NAME.  */
+
+/* See the ASM_OUTPUT_LABELREF definition in sysv4.h for an explanation of
+   why ia64_asm_output_label exists.  */
+
+extern int ia64_asm_output_label;
+#define ASM_OUTPUT_LABEL(STREAM, NAME)					\
+do {									\
+  ia64_asm_output_label = 1;						\
+  assemble_name (STREAM, NAME);						\
+  fputs (":\n", STREAM);						\
+  ia64_asm_output_label = 0;						\
+} while (0)
+
+/* A C statement (sans semicolon) to output to the stdio stream STREAM some
+   commands that will make the label NAME global; that is, available for
+   reference from other files.  */
+
+#define ASM_GLOBALIZE_LABEL(STREAM,NAME)				\
+do {									\
+  fputs ("\t.global ", STREAM);						\
+  assemble_name (STREAM, NAME);						\
+  fputs ("\n", STREAM);							\
+} while (0)
+
+/* A C statement (sans semicolon) to output to the stdio stream STREAM any text
+   necessary for declaring the name of an external symbol named NAME which is
+   referenced in this compilation but not defined.  */
+
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \
+  ia64_asm_output_external (FILE, DECL, NAME)
+
+/* A C statement to store into the string STRING a label whose name is made
+   from the string PREFIX and the number NUM.  */
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \
+do {									\
+  sprintf (LABEL, "*.%s%d", PREFIX, NUM);				\
+} while (0)
+
+/* A C expression to assign to OUTVAR (which is a variable of type `char *') a
+   newly allocated string made from the string NAME and the number NUMBER, with
+   some suitable punctuation added.  */
+
+/* ??? Not sure if using a ? in the name for Intel as is safe.  */
+
+#define ASM_FORMAT_PRIVATE_NAME(OUTVAR, NAME, NUMBER)			\
+do {									\
+  (OUTVAR) = (char *) alloca (strlen (NAME) + 12);			\
+  sprintf (OUTVAR, "%s%c%ld", (NAME), (TARGET_GNU_AS ? '.' : '?'),	\
+	   (long)(NUMBER));						\
+} while (0)
+
+/* A C statement to output to the stdio stream STREAM assembler code which
+   defines (equates) the symbol NAME to have the value VALUE.  */
+
+#define ASM_OUTPUT_DEF(STREAM, NAME, VALUE) \
+do {									\
+  assemble_name (STREAM, NAME);						\
+  fputs (" = ", STREAM);						\
+  assemble_name (STREAM, VALUE);					\
+  fputc ('\n', STREAM);							\
+} while (0)
+
+
+/* Macros Controlling Initialization Routines.  */
+
+/* This is handled by svr4.h and sysv4.h.  */
+
+
+/* Output of Assembler Instructions.  */
+
+/* A C initializer containing the assembler's names for the machine registers,
+   each one as a C string constant.  */
+
+#define REGISTER_NAMES \
+{									\
+  /* General registers.  */						\
+  "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9",		\
+  "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19",	\
+  "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29",	\
+  "r30", "r31",								\
+  /* Local registers.  */						\
+  "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",	\
+  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",	\
+  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",	\
+  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",	\
+  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",	\
+  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",	\
+  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",	\
+  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",	\
+  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",	\
+  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79",	\
+  /* Input registers.  */						\
+  "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7",	\
+  /* Output registers.  */						\
+  "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7",	\
+  /* Floating-point registers.  */					\
+  "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9",		\
+  "f10", "f11", "f12", "f13", "f14", "f15", "f16", "f17", "f18", "f19",	\
+  "f20", "f21", "f22", "f23", "f24", "f25", "f26", "f27", "f28", "f29",	\
+  "f30", "f31", "f32", "f33", "f34", "f35", "f36", "f37", "f38", "f39",	\
+  "f40", "f41", "f42", "f43", "f44", "f45", "f46", "f47", "f48", "f49",	\
+  "f50", "f51", "f52", "f53", "f54", "f55", "f56", "f57", "f58", "f59",	\
+  "f60", "f61", "f62", "f63", "f64", "f65", "f66", "f67", "f68", "f69",	\
+  "f70", "f71", "f72", "f73", "f74", "f75", "f76", "f77", "f78", "f79",	\
+  "f80", "f81", "f82", "f83", "f84", "f85", "f86", "f87", "f88", "f89",	\
+  "f90", "f91", "f92", "f93", "f94", "f95", "f96", "f97", "f98", "f99",	\
+  "f100","f101","f102","f103","f104","f105","f106","f107","f108","f109",\
+  "f110","f111","f112","f113","f114","f115","f116","f117","f118","f119",\
+  "f120","f121","f122","f123","f124","f125","f126","f127",		\
+  /* Predicate registers.  */						\
+  "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9",		\
+  "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19",	\
+  "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29",	\
+  "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39",	\
+  "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49",	\
+  "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59",	\
+  "p60", "p61", "p62", "p63",						\
+  /* Branch registers.  */						\
+  "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",			\
+  /* Frame pointer.  Return address.  */				\
+  "fp", "ra"								\
+}
+
+/* If defined, a C initializer for an array of structures containing a name and
+   a register number.  This macro defines additional names for hard registers,
+   thus allowing the `asm' option in declarations to refer to registers using
+   alternate names.  */
+
+#define ADDITIONAL_REGISTER_NAMES \
+{									\
+  { "gp", R_GR (1) },							\
+  { "sp", R_GR (12) },							\
+  { "in0", IN_REG (0) },						\
+  { "in1", IN_REG (1) },						\
+  { "in2", IN_REG (2) },						\
+  { "in3", IN_REG (3) },						\
+  { "in4", IN_REG (4) },						\
+  { "in5", IN_REG (5) },						\
+  { "in6", IN_REG (6) },						\
+  { "in7", IN_REG (7) },						\
+  { "out0", OUT_REG (0) },						\
+  { "out1", OUT_REG (1) },						\
+  { "out2", OUT_REG (2) },						\
+  { "out3", OUT_REG (3) },						\
+  { "out4", OUT_REG (4) },						\
+  { "out5", OUT_REG (5) },						\
+  { "out6", OUT_REG (6) },						\
+  { "out7", OUT_REG (7) },						\
+  { "loc0", LOC_REG (0) },						\
+  { "loc1", LOC_REG (1) },						\
+  { "loc2", LOC_REG (2) },						\
+  { "loc3", LOC_REG (3) },						\
+  { "loc4", LOC_REG (4) },						\
+  { "loc5", LOC_REG (5) },						\
+  { "loc6", LOC_REG (6) },						\
+  { "loc7", LOC_REG (7) },						\
+  { "loc8", LOC_REG (8) }, 						\
+  { "loc9", LOC_REG (9) }, 						\
+  { "loc10", LOC_REG (10) }, 						\
+  { "loc11", LOC_REG (11) }, 						\
+  { "loc12", LOC_REG (12) }, 						\
+  { "loc13", LOC_REG (13) }, 						\
+  { "loc14", LOC_REG (14) }, 						\
+  { "loc15", LOC_REG (15) }, 						\
+  { "loc16", LOC_REG (16) }, 						\
+  { "loc17", LOC_REG (17) }, 						\
+  { "loc18", LOC_REG (18) }, 						\
+  { "loc19", LOC_REG (19) }, 						\
+  { "loc20", LOC_REG (20) }, 						\
+  { "loc21", LOC_REG (21) }, 						\
+  { "loc22", LOC_REG (22) }, 						\
+  { "loc23", LOC_REG (23) }, 						\
+  { "loc24", LOC_REG (24) }, 						\
+  { "loc25", LOC_REG (25) }, 						\
+  { "loc26", LOC_REG (26) }, 						\
+  { "loc27", LOC_REG (27) }, 						\
+  { "loc28", LOC_REG (28) }, 						\
+  { "loc29", LOC_REG (29) }, 						\
+  { "loc30", LOC_REG (30) }, 						\
+  { "loc31", LOC_REG (31) }, 						\
+  { "loc32", LOC_REG (32) }, 						\
+  { "loc33", LOC_REG (33) }, 						\
+  { "loc34", LOC_REG (34) }, 						\
+  { "loc35", LOC_REG (35) }, 						\
+  { "loc36", LOC_REG (36) }, 						\
+  { "loc37", LOC_REG (37) }, 						\
+  { "loc38", LOC_REG (38) }, 						\
+  { "loc39", LOC_REG (39) }, 						\
+  { "loc40", LOC_REG (40) }, 						\
+  { "loc41", LOC_REG (41) }, 						\
+  { "loc42", LOC_REG (42) }, 						\
+  { "loc43", LOC_REG (43) }, 						\
+  { "loc44", LOC_REG (44) }, 						\
+  { "loc45", LOC_REG (45) }, 						\
+  { "loc46", LOC_REG (46) }, 						\
+  { "loc47", LOC_REG (47) }, 						\
+  { "loc48", LOC_REG (48) }, 						\
+  { "loc49", LOC_REG (49) }, 						\
+  { "loc50", LOC_REG (50) }, 						\
+  { "loc51", LOC_REG (51) }, 						\
+  { "loc52", LOC_REG (52) }, 						\
+  { "loc53", LOC_REG (53) }, 						\
+  { "loc54", LOC_REG (54) }, 						\
+  { "loc55", LOC_REG (55) }, 						\
+  { "loc56", LOC_REG (56) }, 						\
+  { "loc57", LOC_REG (57) }, 						\
+  { "loc58", LOC_REG (58) }, 						\
+  { "loc59", LOC_REG (59) }, 						\
+  { "loc60", LOC_REG (60) }, 						\
+  { "loc61", LOC_REG (61) }, 						\
+  { "loc62", LOC_REG (62) }, 						\
+  { "loc63", LOC_REG (63) }, 						\
+  { "loc64", LOC_REG (64) }, 						\
+  { "loc65", LOC_REG (65) }, 						\
+  { "loc66", LOC_REG (66) }, 						\
+  { "loc67", LOC_REG (67) }, 						\
+  { "loc68", LOC_REG (68) }, 						\
+  { "loc69", LOC_REG (69) }, 						\
+  { "loc70", LOC_REG (70) }, 						\
+  { "loc71", LOC_REG (71) }, 						\
+  { "loc72", LOC_REG (72) }, 						\
+  { "loc73", LOC_REG (73) }, 						\
+  { "loc74", LOC_REG (74) }, 						\
+  { "loc75", LOC_REG (75) }, 						\
+  { "loc76", LOC_REG (76) }, 						\
+  { "loc77", LOC_REG (77) }, 						\
+  { "loc78", LOC_REG (78) }, 						\
+  { "loc79", LOC_REG (78) }, 						\
+}
+
+/* A C compound statement to output to stdio stream STREAM the assembler syntax
+   for an instruction operand X.  X is an RTL expression.  */
+
+#define PRINT_OPERAND(STREAM, X, CODE) \
+  ia64_print_operand (STREAM, X, CODE)
+
+/* A C expression which evaluates to true if CODE is a valid punctuation
+   character for use in the `PRINT_OPERAND' macro.  */
+
+/* ??? Keep this around for now, as we might need it later.  */
+
+/* #define PRINT_OPERAND_PUNCT_VALID_P(CODE) */
+
+/* A C compound statement to output to stdio stream STREAM the assembler syntax
+   for an instruction operand that is a memory reference whose address is X.  X
+   is an RTL expression.  */
+
+#define PRINT_OPERAND_ADDRESS(STREAM, X) \
+  ia64_print_operand_address (STREAM, X)
+
+/* If defined, C string expressions to be used for the `%R', `%L', `%U', and
+   `%I' options of `asm_fprintf' (see `final.c').  */
+
+#define REGISTER_PREFIX ""
+#define LOCAL_LABEL_PREFIX "."
+#define USER_LABEL_PREFIX ""
+#define IMMEDIATE_PREFIX ""
+
+
+/* Output of dispatch tables.  */
+
+/* This macro should be provided on machines where the addresses in a dispatch
+   table are relative to the table's own address.  */
+
+/* ??? Depends on the pointer size.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \
+  fprintf (STREAM, "\tdata8 .L%d-.L%d\n", VALUE, REL)
+
+/* This is how to output an element of a case-vector that is absolute.
+   (Ia64 does not use such vectors, but we must define this macro anyway.)  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) abort ()
+
+/* Define this if something special must be output at the end of a jump-table.
+   We need to align back to a 16 byte boundary because offsets are smaller than
+   instructions.  */
+
+#define ASM_OUTPUT_CASE_END(STREAM, NUM, TABLE) ASM_OUTPUT_ALIGN (STREAM, 4)
+
+/* Jump tables only need 8 byte alignment.  */
+
+#define ADDR_VEC_ALIGN(ADDR_VEC) 3
+
+
+/* Assembler Commands for Exception Regions.  */
+
+/* ??? This entire section of ia64.h needs to be implemented and then cleaned
+   up.  */
+
+/* A C expression to output text to mark the start of an exception region.
+
+   This macro need not be defined on most platforms.  */
+/* #define ASM_OUTPUT_EH_REGION_BEG() */
+
+/* A C expression to output text to mark the end of an exception region.
+
+   This macro need not be defined on most platforms.  */
+/* #define ASM_OUTPUT_EH_REGION_END() */
+
+/* A C expression to switch to the section in which the main exception table is
+   to be placed.  The default is a section named `.gcc_except_table' on machines
+   that support named sections via `ASM_OUTPUT_SECTION_NAME', otherwise if `-fpic'
+   or `-fPIC' is in effect, the `data_section', otherwise the
+   `readonly_data_section'.  */
+/* #define EXCEPTION_SECTION() */
+
+/* If defined, a C string constant for the assembler operation to switch to the
+   section for exception handling frame unwind information.  If not defined,
+   GNU CC will provide a default definition if the target supports named
+   sections.  `crtstuff.c' uses this macro to switch to the appropriate
+   section.
+
+   You should define this symbol if your target supports DWARF 2 frame unwind
+   information and the default definition does not work.  */
+/* #define EH_FRAME_SECTION_ASM_OP */
+
+/* A C expression that is nonzero if the normal exception table output should
+   be omitted.
+
+   This macro need not be defined on most platforms.  */
+/* #define OMIT_EH_TABLE() */
+
+/* Alternate runtime support for looking up an exception at runtime and finding
+   the associated handler, if the default method won't work.
+
+   This macro need not be defined on most platforms.  */
+/* #define EH_TABLE_LOOKUP() */
+
+/* A C expression that decides whether or not the current function needs to
+   have a function unwinder generated for it.  See the file `except.c' for
+   details on when to define this, and how.  */
+/* #define DOESNT_NEED_UNWINDER */
+
+/* An rtx used to mask the return address found via RETURN_ADDR_RTX, so that it
+   does not contain any extraneous set bits in it.  */
+/* #define MASK_RETURN_ADDR */
+
+/* Define this macro to 0 if your target supports DWARF 2 frame unwind
+   information, but it does not yet work with exception handling.  Otherwise,
+   if your target supports this information (if it defines
+   `INCOMING_RETURN_ADDR_RTX' and either `UNALIGNED_INT_ASM_OP' or
+   `OBJECT_FORMAT_ELF'), GCC will provide a default definition of 1.
+
+   If this macro is defined to 1, the DWARF 2 unwinder will be the default
+   exception handling mechanism; otherwise, setjmp/longjmp will be used by
+   default.
+
+   If this macro is defined to anything, the DWARF 2 unwinder will be used
+   instead of inline unwinders and __unwind_function in the non-setjmp case.  */
+/* #define DWARF2_UNWIND_INFO */
+
+
+/* Assembler Commands for Alignment.  */
+
+/* The alignment (log base 2) to put in front of LABEL, which follows
+   a BARRIER.  */
+
+/* ??? Investigate.  */
+
+/* ??? Emitting align directives increases the size of the line number debug
+   info, because each .align forces use of an extended opcode.  Perhaps try
+   to fix this in the assembler?  */
+
+/* #define LABEL_ALIGN_AFTER_BARRIER(LABEL) */
+
+/* The desired alignment for the location counter at the beginning
+   of a loop.  */
+
+/* ??? Investigate.  */
+/* #define LOOP_ALIGN(LABEL) */
+
+/* Define this macro if `ASM_OUTPUT_SKIP' should not be used in the text
+   section because it fails put zeros in the bytes that are skipped.  */
+
+#define ASM_NO_SKIP_IN_TEXT 1
+
+/* A C statement to output to the stdio stream STREAM an assembler command to
+   advance the location counter to a multiple of 2 to the POWER bytes.  */
+
+#define ASM_OUTPUT_ALIGN(STREAM, POWER) \
+  fprintf (STREAM, "\t.align %d\n", 1<<(POWER))
+
+
+/* Macros Affecting all Debug Formats.  */
+
+/* This is handled in svr4.h and sysv4.h.  */
+
+
+/* Specific Options for DBX Output.  */
+
+/* This is handled by dbxelf.h which is included by svr4.h.  */
+
+
+/* Open ended Hooks for DBX Output.  */
+
+/* Likewise.  */
+
+
+/* File names in DBX format.  */
+
+/* Likewise.  */
+
+
+/* Macros for SDB and Dwarf Output.  */
+
+/* Define this macro if GNU CC should produce dwarf version 2 format debugging
+   output in response to the `-g' option.  */
+
+#define DWARF2_DEBUGGING_INFO
+
+/* Section names for DWARF2 debug info.  */
+
+#define DEBUG_INFO_SECTION	".debug_info, \"\", \"progbits\""
+#define ABBREV_SECTION		".debug_abbrev, \"\", \"progbits\""
+#define ARANGES_SECTION		".debug_aranges, \"\", \"progbits\""
+#define DEBUG_LINE_SECTION	".debug_line, \"\", \"progbits\""
+#define PUBNAMES_SECTION	".debug_pubnames, \"\", \"progbits\""
+
+/* C string constants giving the pseudo-op to use for a sequence of
+   2, 4, and 8 byte unaligned constants.  dwarf2out.c needs these.  */
+
+#define UNALIGNED_SHORT_ASM_OP		"data2.ua"
+#define UNALIGNED_INT_ASM_OP		"data4.ua"
+#define UNALIGNED_DOUBLE_INT_ASM_OP	"data8.ua"
+
+/* We need to override the default definition for this in dwarf2out.c so that
+   we can emit the necessary # postfix.  */
+#define ASM_NAME_TO_STRING(STR, NAME)			\
+  do {							\
+      if ((NAME)[0] == '*')				\
+	dyn_string_append (STR, NAME + 1);		\
+      else						\
+	{						\
+	  char *newstr;					\
+	  STRIP_NAME_ENCODING (newstr, NAME);		\
+	  dyn_string_append (STR, user_label_prefix);	\
+	  dyn_string_append (STR, newstr);		\
+	  dyn_string_append (STR, "#");			\
+	}						\
+  }							\
+  while (0)
+
+#define DWARF2_ASM_LINE_DEBUG_INFO (TARGET_DWARF2_ASM)
+
+
+/* Cross Compilation and Floating Point.  */
+
+/* Define to enable software floating point emulation. */
+#define REAL_ARITHMETIC
+
+
+/* Miscellaneous Parameters.  */
+
+/* Define this if you have defined special-purpose predicates in the file
+   `MACHINE.c'.  For each predicate, list all rtl codes that can be in
+   expressions matched by the predicate.  */
+
+#define PREDICATE_CODES \
+{ "call_operand", {SUBREG, REG, SYMBOL_REF}},				\
+{ "sdata_symbolic_operand", {SYMBOL_REF, CONST}},			\
+{ "symbolic_operand", {SYMBOL_REF, CONST, LABEL_REF}},			\
+{ "function_operand", {SYMBOL_REF}},					\
+{ "setjmp_operand", {SYMBOL_REF}},					\
+{ "move_operand", {SUBREG, REG, MEM, CONST_INT, CONST_DOUBLE,		\
+		     CONSTANT_P_RTX, SYMBOL_REF, CONST, LABEL_REF}},	\
+{ "reg_or_0_operand", {SUBREG, REG, CONST_INT}},			\
+{ "reg_or_6bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}},	\
+{ "reg_or_8bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}},	\
+{ "reg_or_8bit_adjusted_operand", {SUBREG, REG, CONST_INT,		\
+				     CONSTANT_P_RTX}},			\
+{ "reg_or_8bit_and_adjusted_operand", {SUBREG, REG, CONST_INT,		\
+					 CONSTANT_P_RTX}},		\
+{ "reg_or_14bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}},	\
+{ "reg_or_22bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}},	\
+{ "shift_count_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}},	\
+{ "shift_32bit_count_operand", {SUBREG, REG, CONST_INT,			\
+				  CONSTANT_P_RTX}},			\
+{ "shladd_operand", {CONST_INT}},					\
+{ "fetchadd_operand", {CONST_INT}},					\
+{ "reg_or_fp01_operand", {SUBREG, REG, CONST_DOUBLE, CONSTANT_P_RTX}},	\
+{ "normal_comparison_operator", {EQ, NE, GT, LE, GTU, LEU}},		\
+{ "adjusted_comparison_operator", {LT, GE, LTU, GEU}},			\
+{ "call_multiple_values_operation", {PARALLEL}},
+
+/* An alias for a machine mode name.  This is the machine mode that elements of
+   a jump-table should have.  */
+
+#define CASE_VECTOR_MODE Pmode
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.  */
+
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* Define this macro if operations between registers with integral mode smaller
+   than a word are always performed on the entire register.  */
+
+#define WORD_REGISTER_OPERATIONS
+
+/* Define this macro to be a C expression indicating when insns that read
+   memory in MODE, an integral mode narrower than a word, set the bits outside
+   of MODE to be either the sign-extension or the zero-extension of the data
+   read.  */
+
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* An alias for a tree code that should be used by default for conversion of
+   floating point values to fixed point.  */
+
+/* ??? Looks like this macro is obsolete and should be deleted everywhere.  */
+
+#define IMPLICIT_FIX_EXPR FIX_ROUND_EXPR
+
+/* An alias for a tree code that is the easiest kind of division to compile
+   code for in the general case.  */
+
+#define EASY_DIV_EXPR TRUNC_DIV_EXPR
+
+/* The maximum number of bytes that a single instruction can move quickly from
+   memory to memory.  */
+#define MOVE_MAX 8
+
+/* A C expression which is nonzero if on this machine it is safe to "convert"
+   an integer of INPREC bits to one of OUTPREC bits (where OUTPREC is smaller
+   than INPREC) by merely operating on it as if it had only OUTPREC bits.  */
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* A C expression describing the value returned by a comparison operator with
+   an integral mode and stored by a store-flag instruction (`sCOND') when the
+   condition is true.  */
+
+/* ??? Investigate using -1 instead of 1.  */
+
+#define STORE_FLAG_VALUE 1
+
+/* An alias for the machine mode for pointers.  */
+
+/* ??? This would change if we had ILP32 support.  */
+
+#define Pmode DImode
+
+/* An alias for the machine mode used for memory references to functions being
+   called, in `call' RTL expressions.  */
+
+#define FUNCTION_MODE Pmode
+
+/* Define this macro to handle System V style pragmas: #pragma pack and
+   #pragma weak.  Note, #pragma weak will only be supported if SUPPORT_WEAK is
+   defined.  */
+
+#define HANDLE_SYSV_PRAGMA
+
+/* If defined, a C expression whose value is nonzero if IDENTIFIER with
+   arguments ARGS is a valid machine specific attribute for TYPE.  The
+   attributes in ATTRIBUTES have previously been assigned to TYPE.  */
+
+#define VALID_MACHINE_TYPE_ATTRIBUTE(TYPE, ATTRIBUTES, IDENTIFIER, ARGS) \
+  ia64_valid_type_attribute (TYPE, ATTRIBUTES, IDENTIFIER, ARGS)
+
+/* In rare cases, correct code generation requires extra machine dependent
+   processing between the second jump optimization pass and delayed branch
+   scheduling.  On those machines, define this macro as a C statement to act on
+   the code starting at INSN.  */
+
+#define MACHINE_DEPENDENT_REORG(INSN) ia64_reorg (INSN)
+
+/* A C expression for the maximum number of instructions to execute via
+   conditional execution instructions instead of a branch.  A value of
+   BRANCH_COST+1 is the default if the machine does not use
+   cc0, and 1 if it does use cc0.  */
+/* ??? Investigate.  */
+/* #define MAX_CONDITIONAL_EXECUTE */
+
+/* Indicate how many instructions can be issued at the same time.  */
+
+/* ??? For now, we just schedule to fill bundles.  */
+
+#define ISSUE_RATE 3
+
+enum ia64_builtins
+{
+  IA64_BUILTIN_SYNCHRONIZE,
+
+  IA64_BUILTIN_FETCH_AND_ADD_SI,
+  IA64_BUILTIN_FETCH_AND_SUB_SI,
+  IA64_BUILTIN_FETCH_AND_OR_SI,
+  IA64_BUILTIN_FETCH_AND_AND_SI,
+  IA64_BUILTIN_FETCH_AND_XOR_SI,
+  IA64_BUILTIN_FETCH_AND_NAND_SI,
+
+  IA64_BUILTIN_ADD_AND_FETCH_SI,
+  IA64_BUILTIN_SUB_AND_FETCH_SI,
+  IA64_BUILTIN_OR_AND_FETCH_SI,
+  IA64_BUILTIN_AND_AND_FETCH_SI,
+  IA64_BUILTIN_XOR_AND_FETCH_SI,
+  IA64_BUILTIN_NAND_AND_FETCH_SI,
+
+  IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI,
+  IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI,
+
+  IA64_BUILTIN_SYNCHRONIZE_SI,
+
+  IA64_BUILTIN_LOCK_TEST_AND_SET_SI,
+
+  IA64_BUILTIN_LOCK_RELEASE_SI,
+
+  IA64_BUILTIN_FETCH_AND_ADD_DI,
+  IA64_BUILTIN_FETCH_AND_SUB_DI,
+  IA64_BUILTIN_FETCH_AND_OR_DI,
+  IA64_BUILTIN_FETCH_AND_AND_DI,
+  IA64_BUILTIN_FETCH_AND_XOR_DI,
+  IA64_BUILTIN_FETCH_AND_NAND_DI,
+
+  IA64_BUILTIN_ADD_AND_FETCH_DI,
+  IA64_BUILTIN_SUB_AND_FETCH_DI,
+  IA64_BUILTIN_OR_AND_FETCH_DI,
+  IA64_BUILTIN_AND_AND_FETCH_DI,
+  IA64_BUILTIN_XOR_AND_FETCH_DI,
+  IA64_BUILTIN_NAND_AND_FETCH_DI,
+
+  IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI,
+  IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI,
+
+  IA64_BUILTIN_SYNCHRONIZE_DI,
+
+  IA64_BUILTIN_LOCK_TEST_AND_SET_DI,
+
+  IA64_BUILTIN_LOCK_RELEASE_DI
+};
+
+/* Codes for expand_compare_and_swap and expand_swap_and_compare. */
+enum fetchop_code {
+  IA64_ADD_OP, IA64_SUB_OP, IA64_OR_OP, IA64_AND_OP, IA64_XOR_OP, IA64_NAND_OP
+};
+
+#define MD_INIT_BUILTINS do { \
+    ia64_init_builtins (); \
+  } while (0)
+
+#define MD_EXPAND_BUILTIN(EXP, TARGET, SUBTARGET, MODE, IGNORE) \
+    ia64_expand_builtin ((EXP), (TARGET), (SUBTARGET), (MODE), (IGNORE))
+
+/* End of ia64.h */
diff --git a/gcc/config/ia64/ia64.md b/gcc/config/ia64/ia64.md
new file mode 100644
index 0000000..405d02a
--- /dev/null
+++ b/gcc/config/ia64/ia64.md
@@ -0,0 +1,3591 @@
+;; IA-64 Machine description template
+;; Copyright (C) 1999 Cygnus Solutions.
+;; Contributed by James E. Wilson <wilson@cygnus.com> and
+;;		  David Mosberger <davidm@hpl.hp.com>.
+
+;; This file is part of GNU CC.
+
+;; GNU CC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; GNU CC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU CC; see the file COPYING.  If not, write to
+;; the Free Software Foundation, 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; ??? Add support for long double XFmode patterns.
+
+;; ??? register_operand accepts (subreg:DI (mem:SI X)) which forces later
+;; reload.  This will be fixed once scheduling support is turned on.
+
+;; ??? Optimize for post-increment addressing modes.
+
+;; ??? fselect is not supported, because there is no integer register
+;; equivalent.
+
+;; ??? fp abs/min/max instructions may also work for integer values.
+
+;; ??? Would a predicate_reg_operand predicate be useful?  The HP one is buggy,
+;; it assumes the operand is a register and takes REGNO of it without checking.
+
+;; ??? Would a branch_reg_operand predicate be useful?  The HP one is buggy,
+;; it assumes the operand is a register and takes REGNO of it without checking.
+
+;; ??? Go through list of documented named patterns and look for more to
+;; implement.
+
+;; ??? Go through instruction manual and look for more instructions that
+;; can be emitted.
+
+;; ??? Add function unit scheduling info for Itanium (TM) processor.
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Attributes
+;; ::
+;; ::::::::::::::::::::
+
+;; Instruction type.  This primarily determines how instructions can be
+;; packed in bundles, and secondarily affects scheduling to function units.
+
+;; A alu, can go in I or M syllable of a bundle
+;; I integer
+;; M memory
+;; F floating-point
+;; B branch
+;; L long immediate, takes two syllables
+;; S stop bit
+
+;; ??? Should not have any pattern with type unknown.  Perhaps add code to
+;; check this in md_reorg?  Currently use unknown for patterns which emit
+;; multiple instructions, patterns which emit 0 instructions, and patterns
+;; which emit instruction that can go in any slot (e.g. nop).
+
+(define_attr "type" "unknown,A,I,M,F,B,L,S" (const_string "unknown"))
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Function Units
+;; ::
+;; ::::::::::::::::::::
+
+;; Each usage of a function units by a class of insns is specified with a
+;; `define_function_unit' expression, which looks like this:
+;; (define_function_unit NAME MULTIPLICITY SIMULTANEITY TEST READY-DELAY
+;;   ISSUE-DELAY [CONFLICT-LIST])
+
+;; This default scheduling info seeks to pack instructions into bundles
+;; efficiently to reduce code size, so we just list how many of each
+;; instruction type can go in a bundle.  ISSUE_RATE is set to 3.
+
+;; ??? Add scheduler ready-list hook (MD_SCHED_REORDER) that orders
+;; instructions, so that the next instruction can fill the next bundle slot.
+;; This really needs to know where the stop bits are though.
+
+;; ??? Use MD_SCHED_REORDER to put alloc first instead of using an unspec
+;; volatile.  Use ADJUST_PRIORITY to set the priority of alloc very high to
+;; make it schedule first.
+
+;; ??? Modify the md_reorg code that emits stop bits so that instead of putting
+;; them in the last possible place, we put them in places where bundles allow
+;; them.  This should reduce code size, but may decrease performance if we end
+;; up with more stop bits than the minimum we need.
+
+;; Alu instructions can execute on either the integer or memory function
+;; unit.  We indicate this by defining an alu function unit, and then marking
+;; it as busy everytime we issue a integer or memory type instruction.
+
+(define_function_unit "alu" 3 1 (eq_attr "type" "A,I,M") 1 0)
+
+(define_function_unit "integer" 2 1 (eq_attr "type" "I") 1 0)
+
+(define_function_unit "memory" 3 1 (eq_attr "type" "M") 1 0)
+
+(define_function_unit "floating_point" 1 1 (eq_attr "type" "F") 1 0)
+
+(define_function_unit "branch" 3 1 (eq_attr "type" "B") 1 0)
+
+;; ??? This isn't quite right, because we can only fit two insns in a bundle
+;; when using an L type instruction.  That isn't modeled currently.
+
+(define_function_unit "long_immediate" 1 1 (eq_attr "type" "L") 1 0)
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Moves
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (! reload_in_progress && ! reload_completed
+      && GET_CODE (operands[0]) == MEM
+      && GET_CODE (operands[1]) == MEM)
+    operands[1] = copy_to_mode_reg (QImode, operands[1]);
+}")
+
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m,r,*e")
+	(match_operand:QI 1 "move_operand" "r,J,m,r,*e,r"))]
+  "! memory_operand (operands[0], QImode)
+   || ! memory_operand (operands[1], QImode)"
+  "@
+   mov %0 = %1
+   addl %0 = %1, r0
+   ld1%O1 %0 = %1%P1
+   st1%Q0 %0 = %1%P0
+   getf.sig %0 = %1
+   setf.sig %0 = %1"
+  [(set_attr "type" "A,A,M,M,M,M")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (! reload_in_progress && ! reload_completed
+      && GET_CODE (operands[0]) == MEM
+      && GET_CODE (operands[1]) == MEM)
+    operands[1] = copy_to_mode_reg (HImode, operands[1]);
+}")
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m,r,*e")
+	(match_operand:HI 1 "move_operand" "r,J,m,r,*e,r"))]
+  "! memory_operand (operands[0], HImode)
+   || !memory_operand (operands[1], HImode)"
+  "@
+   mov %0 = %1
+   addl %0 = %1, r0
+   ld2%O1 %0 = %1%P1
+   st2%Q0 %0 = %1%P0
+   getf.sig %0 = %1
+   setf.sig %0 = %1"
+  [(set_attr "type" "A,A,M,M,M,M")])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (! reload_in_progress && ! reload_completed
+      && GET_CODE (operands[0]) == MEM
+      && GET_CODE (operands[1]) == MEM)
+    operands[1] = copy_to_mode_reg (SImode, operands[1]);
+}")
+
+(define_insn "*movsi_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,m,r,*e,*e,r,*f")
+	(match_operand:SI 1 "move_operand"       "r,J,i,m,r,*e,r,*e,*f,r"))]
+  "! memory_operand (operands[0], SImode)
+   || ! memory_operand (operands[1], SImode)"
+  "@
+  mov %0 = %1
+  addl %0 = %1, r0
+  movl %0 = %1
+  ld4%O1 %0 = %1%P1
+  st4%Q0 %0 = %1%P0
+  getf.sig %0 = %1
+  setf.sig %0 = %1
+  mov %0 = %1
+  getf.s %0 = %1
+  setf.s %0 = %1"
+  [(set_attr "type" "A,A,L,M,M,M,M,F,M,M")])
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* ??? Should generalize this, so that we can also support 32 bit
+     pointers.  */
+  if (! TARGET_NO_PIC && symbolic_operand (operands[1], DImode))
+    {
+      rtx temp;
+
+      /* Operand[0] could be a MEM, which isn't a valid destination for the
+	 PIC load address patterns.  */
+      if (! register_operand (operands[0], DImode))
+	temp = gen_reg_rtx (DImode);
+      else
+	temp = operands[0];
+
+      if (GET_CODE (operands[1]) == SYMBOL_REF && SYMBOL_REF_FLAG (operands[1]))
+	emit_insn (gen_load_fptr (temp, operands[1]));
+      else if (sdata_symbolic_operand (operands[1], DImode))
+	emit_insn (gen_load_gprel (temp, operands[1]));
+      else
+	emit_insn (gen_load_symptr (temp, operands[1]));
+
+      if (temp == operands[0])
+	DONE;
+
+      operands[1] = temp;
+    }
+
+  if (! reload_in_progress && ! reload_completed
+      && GET_CODE (operands[0]) == MEM
+      && GET_CODE (operands[1]) == MEM)
+    operands[1] = copy_to_mode_reg (DImode, operands[1]);
+}")
+
+;; ??? Emit stf8 for m/*e constraint.
+(define_insn "*movdi_internal"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r,m,r,*e,*e,r,*f,r,*b")
+	(match_operand:DI 1 "move_operand"       "r,J,i,m,r,*e,r,*e,*f,r,*b,r"))]
+  "! memory_operand (operands[0], DImode)
+   || ! memory_operand (operands[1], DImode)"
+  "@
+  mov %0 = %1
+  addl %0 = %1, r0
+  movl %0 = %1
+  ld8%O1 %0 = %1%P1
+  st8%Q0 %0 = %1%P0
+  getf.sig %0 = %1
+  setf.sig %0 = %1
+  mov %0 = %1
+  getf.d %0 = %1
+  setf.d %0 = %1
+  mov %0 = %1
+  mov %0 = %1"
+  [(set_attr "type" "A,A,L,M,M,M,M,F,M,M,I,I")])
+
+(define_expand "load_fptr"
+  [(set (match_dup 2)
+	(plus:DI (reg:DI 1) (match_operand:DI 1 "function_operand" "")))
+   (set (match_operand:DI 0 "register_operand" "") (mem:DI (match_dup 2)))]
+  ""
+  "
+{
+  if (reload_in_progress)
+    operands[2] = operands[0];
+  else
+    operands[2] = gen_reg_rtx (DImode);
+}")
+
+(define_insn "*load_fptr_internal1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (reg:DI 1) (match_operand:DI 1 "function_operand" "s")))]
+  ""
+  "addl %0 = @ltoff(@fptr(%1)), gp"
+  [(set_attr "type" "A")])
+
+(define_insn "load_gprel"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (reg:DI 1) (match_operand:DI 1 "sdata_symbolic_operand" "s")))]
+  ""
+  "addl %0 = @gprel(%1), gp"
+  [(set_attr "type" "A")])
+
+(define_expand "load_symptr"
+  [(set (match_dup 2)
+	(plus:DI (reg:DI 1) (match_operand:DI 1 "symbolic_operand" "")))
+   (set (match_operand:DI 0 "register_operand" "") (mem:DI (match_dup 2)))]
+  ""
+  "
+{
+  if (reload_in_progress)
+    operands[2] = operands[0];
+  else
+    operands[2] = gen_reg_rtx (DImode);
+}")
+
+(define_insn "*load_symptr_internal1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (reg:DI 1) (match_operand:DI 1 "symbolic_operand" "s")))]
+  ""
+  "addl %0 = @ltoff(%1), gp"
+  [(set_attr "type" "A")])
+
+;; Floating Point Moves
+;;
+;; Note - Patterns for SF mode moves are compulsory, but
+;; patterns for DF are optional, as GCC can synthesise them.
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (! reload_in_progress && ! reload_completed
+      && GET_CODE (operands[0]) == MEM
+      && GET_CODE (operands[1]) == MEM)
+    operands[1] = copy_to_mode_reg (SFmode, operands[1]);
+}")
+
+;; ??? The r/m alternative is apparently needed because of paradoxical subregs,
+;; so it may no longer be necessary after scheduling is enabled.
+
+(define_insn "*movsf_internal"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,m,*r,f,*r,*r")
+	(match_operand:SF 1 "general_operand" "fG,m,fG,fG,*r,*r,m"))]
+  "! memory_operand (operands[0], SFmode)
+   || ! memory_operand (operands[1], SFmode)"
+  "@
+  mov %0 = %F1
+  ldfs %0 = %1%P1
+  stfs %0 = %F1%P0
+  getf.s %0 = %F1
+  setf.s %0 = %1
+  mov %0 = %1
+  ld4%O1 %0 = %1"
+  [(set_attr "type" "F,M,M,M,M,A,M")])
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "general_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (! reload_in_progress && ! reload_completed
+      && GET_CODE (operands[0]) == MEM
+      && GET_CODE (operands[1]) == MEM)
+    operands[1] = copy_to_mode_reg (DFmode, operands[1]);
+}")
+
+;; ??? movsf has a r/m alternative, movdf doesn't.
+
+(define_insn "*movdf_internal"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,m,*r,f,*r")
+	(match_operand:DF 1 "general_operand" "fG,m,fG,fG,*r,*r"))]
+  "! memory_operand (operands[0], DFmode)
+   || ! memory_operand (operands[1], DFmode)"
+  "@
+  mov %0 = %F1
+  ldfd %0 = %1%P1
+  stfd %0 = %F1%P0
+  getf.d %0 = %F1
+  setf.d %0 = %1
+  mov %0 = %1"
+  [(set_attr "type" "F,M,M,M,M,A")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conversions
+;; ::
+;; ::::::::::::::::::::
+
+;; Signed conversions from a smaller integer to a larger integer
+
+(define_insn "extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:QI 1 "register_operand" "r")))]
+  ""
+  "sxt1 %0 = %1"
+  [(set_attr "type" "I")])
+
+(define_insn "extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "sxt2 %0 = %1"
+  [(set_attr "type" "I")])
+
+(define_insn "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,*e")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "r,*e")))]
+  ""
+  "@
+   sxt4 %0 = %1
+   fsxt.r %0 = %1, %1%B0"
+  [(set_attr "type" "I,F")])
+
+;; Unsigned conversions from a smaller integer to a larger integer
+
+(define_insn "zero_extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+  ""
+  "@
+   zxt1 %0 = %1
+   ld1%O1 %0 = %1%P1"
+  [(set_attr "type" "I,M")])
+
+(define_insn "zero_extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+  ""
+  "@
+   zxt2 %0 = %1
+   ld2%O1 %0 = %1%P1"
+  [(set_attr "type" "I,M")])
+
+(define_insn "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,*e")
+	(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m,*e")))]
+  ""
+  "@
+   zxt4 %0 = %1
+   ld4%O1 %0 = %1%P1
+   fsxt.r %0 = f1, %1%B0"
+  [(set_attr "type" "I,M,F")])
+
+;; Convert between floating point types of different sizes.
+
+;; ??? Optimization opportunity here.
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(float_extend:DF (match_operand:SF 1 "register_operand" "0,f")))]
+  ""
+  "@
+  //nop
+  mov %0 = %1"
+  [(set_attr "type" "unknown,F")])
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float_truncate:SF (match_operand:DF 1 "register_operand" "f")))]
+  ""
+  "fnorm.s %0 = %1%B0"
+  [(set_attr "type" "F")])
+
+;; Convert between signed integer types and floating point.
+
+;; ??? Instead of having floatdidf2, we should have a floatditf2 pattern,
+;; and then add conversions from tf to df and sf.
+
+(define_insn "floatdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float:DF (match_operand:DI 1 "register_operand" "e")))]
+  ""
+  "fcvt.xf %0 = %1\;;;\;fnorm.d %0 = %0%B0"
+  [(set_attr "type" "unknown")])
+
+(define_insn "fix_truncsfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=e")
+	(fix:DI (match_operand:SF 1 "register_operand" "f")))]
+  ""
+  "fcvt.fx.trunc %0 = %1%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "fix_truncdfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=e")
+	(fix:DI (match_operand:DF 1 "register_operand" "f")))]
+  ""
+  "fcvt.fx.trunc %0 = %1%B0"
+  [(set_attr "type" "F")])
+
+;; Convert between unsigned integer types and floating point.
+
+(define_insn "floatunsdisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(unsigned_float:SF (match_operand:DI 1 "register_operand" "e")))]
+  ""
+  "fcvt.xuf.s %0 = %1%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "floatunsdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(unsigned_float:DF (match_operand:DI 1 "register_operand" "e")))]
+  ""
+  "fcvt.xuf.d %0 = %1%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "fixuns_truncsfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=e")
+	(unsigned_fix:DI (match_operand:SF 1 "register_operand" "f")))]
+  ""
+  "fcvt.fxu.trunc %0 = %1%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "fixuns_truncdfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=e")
+	(unsigned_fix:DI (match_operand:DF 1 "register_operand" "f")))]
+  ""
+  "fcvt.fxu.trunc %0 = %1%B0"
+  [(set_attr "type" "F")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Bit field extraction
+;; ::
+;; ::::::::::::::::::::
+
+;; ??? It would be useful to have SImode versions of the extract and insert
+;; patterns.
+
+(define_insn "extv"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extract:DI (match_operand:DI 1 "register_operand" "r")
+			 (match_operand:DI 2 "const_int_operand" "n")
+			 (match_operand:DI 3 "const_int_operand" "n")))]
+  ""
+  "extr %0 = %1, %3, %2"
+  [(set_attr "type" "I")])
+
+(define_insn "extzv"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "register_operand" "r")
+			 (match_operand:DI 2 "const_int_operand" "n")
+			 (match_operand:DI 3 "const_int_operand" "n")))]
+  ""
+  "extr.u %0 = %1, %3, %2"
+  [(set_attr "type" "I")])
+
+;; Insert a bit field.
+;; Can have 3 operands, source1 (inserter), source2 (insertee), dest.
+;; Source1 can be 0 or -1.
+;; Source2 can be 0.
+
+;; ??? Actual dep instruction is more powerful than what these insv
+;; patterns support.  Unfortunately, combine is unable to create patterns
+;; where source2 != dest.
+
+(define_expand "insv"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "")
+			 (match_operand:DI 1 "const_int_operand" "")
+			 (match_operand:DI 2 "const_int_operand" ""))
+	(match_operand:DI 3 "nonmemory_operand" ""))]
+  ""
+  "
+{
+  int width = INTVAL (operands[1]);
+  int shift = INTVAL (operands[2]);
+
+  /* If operand[3] is a constant, and isn't 0 or -1, then load it into a
+     pseudo.  */
+  if (! register_operand (operands[3], DImode)
+      && operands[3] != const0_rtx && operands[3] != constm1_rtx)
+    operands[3] = force_reg (DImode, operands[3]);
+
+  /* If this is a single dep instruction, we have nothing to do.  */
+  if (! ((register_operand (operands[3], DImode) && width <= 16)
+	 || operands[3] == const0_rtx || operands[3] == constm1_rtx))
+    {
+      /* Check for cases that can be implemented with a mix instruction.  */
+      if (width == 32 && shift == 0)
+	{
+	  /* Directly generating the mix4left instruction confuses
+	     optimize_bit_field in function.c.  Since this is performing
+	     a useful optimization, we defer generation of the complicated
+	     mix4left RTL to the first splitting phase.  */
+	  rtx tmp = gen_reg_rtx (DImode);
+	  emit_insn (gen_shift_mix4left (operands[0], operands[3], tmp));
+	  DONE;
+	}
+      else if (width == 32 && shift == 32)
+	{
+	  emit_insn (gen_mix4right (operands[0], operands[3]));
+	  DONE;
+	}
+
+      FAIL;
+
+#if 0
+      /* This code may be useful for other IA-64 processors, so we leave it in
+	 for now.  */
+      while (width > 16)
+	{
+	  rtx tmp;
+
+	  emit_insn (gen_insv (operands[0], GEN_INT (16), GEN_INT (shift),
+			       operands[3]));
+	  shift += 16;
+	  width -= 16;
+	  tmp = gen_reg_rtx (DImode);
+	  emit_insn (gen_lshrdi3 (tmp, operands[3], GEN_INT (16)));
+	  operands[3] = tmp;
+	}
+      operands[1] = GEN_INT (width);
+      operands[2] = GEN_INT (shift);
+#endif
+    }
+}")
+
+(define_insn "*insv_internal"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+			 (match_operand:DI 1 "const_int_operand" "n")
+			 (match_operand:DI 2 "const_int_operand" "n"))
+	(match_operand:DI 3 "nonmemory_operand" "rP"))]
+  "(register_operand (operands[3], DImode) && INTVAL (operands[1]) <= 16)
+   || operands[3] == const0_rtx || operands[3] == constm1_rtx"
+  "dep %0 = %3, %0, %2, %1"
+  [(set_attr "type" "I")])
+
+(define_insn "shift_mix4left"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+			 (const_int 32) (const_int 0))
+	(match_operand:DI 1 "register_operand" "r"))
+   (clobber (match_operand:DI 2 "register_operand" "=r"))]
+  ""
+  "#"
+  [(set_attr "type" "unknown")])
+
+;; ??? Need to emit an instruction group barrier here because this gets split
+;; after md_reorg.
+
+(define_split
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "")
+			 (const_int 32) (const_int 0))
+	(match_operand:DI 1 "register_operand" ""))
+   (clobber (match_operand:DI 2 "register_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 3) (ashift:DI (match_dup 1) (const_int 32)))
+   (unspec_volatile [(const_int 0)] 2)
+   (set (zero_extract:DI (match_dup 0) (const_int 32) (const_int 0))
+	(lshiftrt:DI (match_dup 3) (const_int 32)))]
+  "operands[3] = operands[2];")
+
+(define_split
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "")
+			 (const_int 32) (const_int 0))
+	(match_operand:DI 1 "register_operand" ""))
+   (clobber (match_operand:DI 2 "register_operand" ""))]
+  "! reload_completed"
+  [(set (match_dup 3) (ashift:DI (match_dup 1) (const_int 32)))
+   (set (zero_extract:DI (match_dup 0) (const_int 32) (const_int 0))
+	(lshiftrt:DI (match_dup 3) (const_int 32)))]
+  "operands[3] = operands[2];")
+
+(define_insn "*mix4left"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+			 (const_int 32) (const_int 0))
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		     (const_int 32)))]
+  ""
+  "mix4.l %0 = %0, %r1"
+  [(set_attr "type" "I")])
+
+(define_insn "mix4right"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+			 (const_int 32) (const_int 32))
+	(match_operand:DI 1 "reg_or_0_operand" "rO"))]
+  ""
+  "mix4.r %0 = %r1, %0"
+  [(set_attr "type" "I")])
+
+;; This is used by the rotrsi3 pattern.
+
+(define_insn "*mix4right_3op"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		(ashift:DI (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))
+			   (const_int 32))))]
+  ""
+  "mix4.r %0 = %1, %2"
+  [(set_attr "type" "I")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32 bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+;; We handle 32-bit arithmetic just like the alpha port does.
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "reg_or_22bit_operand" "")))]
+  ""
+  "
+{
+  if (optimize)
+    {
+      rtx op1 = gen_lowpart (DImode, operands[1]);
+      rtx op2 = gen_lowpart (DImode, operands[2]);
+
+      if (! cse_not_expected)
+	{
+	  rtx tmp = gen_reg_rtx (DImode);
+	  emit_insn (gen_adddi3 (tmp, op1, op2));
+	  emit_move_insn (operands[0], gen_lowpart (SImode, tmp));
+	}
+      else
+	emit_insn (gen_adddi3 (gen_lowpart (DImode, operands[0]), op1, op2));
+      DONE;
+    }
+}")
+
+(define_insn "*addsi3_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%r,r,a")
+		 (match_operand:SI 2 "reg_or_22bit_operand" "r,I,J")))]
+  ""
+  "@
+  add %0 = %1, %2
+  adds %0 = %2, %1
+  addl %0 = %2, %1"
+  [(set_attr "type" "A")])
+
+(define_insn "*addsi3_plus1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+			  (match_operand:SI 2 "register_operand" "r"))
+		 (const_int 1)))]
+  ""
+  "add %0 = %1, %2, 1"
+  [(set_attr "type" "A")])
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(minus:SI (match_operand:SI 1 "reg_or_8bit_operand" "")
+		  (match_operand:SI 2 "register_operand" "")))]
+  ""
+  "
+{
+  if (optimize)
+    {
+      rtx op1 = gen_lowpart (DImode, operands[1]);
+      rtx op2 = gen_lowpart (DImode, operands[2]);
+
+      if (! cse_not_expected)
+	{
+	  rtx tmp = gen_reg_rtx (DImode);
+	  emit_insn (gen_subdi3 (tmp, op1, op2));
+	  emit_move_insn (operands[0], gen_lowpart (SImode, tmp));
+	}
+      else
+	emit_insn (gen_subdi3 (gen_lowpart (DImode, operands[0]), op1, op2));
+      DONE;
+    }
+}")
+
+(define_insn "*subsi3_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "reg_or_8bit_operand" "rK")
+		  (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "sub %0 = %1, %2"
+  [(set_attr "type" "A")])
+
+(define_insn "*subsi3_minus1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (not:SI (match_operand:SI 1 "register_operand" "r"))
+		 (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "sub %0 = %2, %1, 1"
+  [(set_attr "type" "A")])
+
+(define_expand "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mult:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "register_operand" "")))]
+  ""
+  "
+{
+  if (optimize)
+    {
+      rtx op1 = gen_lowpart (DImode, operands[1]);
+      rtx op2 = gen_lowpart (DImode, operands[2]);
+
+      if (! cse_not_expected)
+	{
+	  rtx tmp = gen_reg_rtx (DImode);
+	  emit_insn (gen_muldi3 (tmp, op1, op2));
+	  emit_move_insn (operands[0], gen_lowpart (SImode, tmp));
+	}
+      else
+	emit_insn (gen_muldi3 (gen_lowpart (DImode, operands[0]), op1, op2));
+      DONE;
+    }
+}")
+
+;; ??? Could add maddsi3 patterns patterned after the madddi3 patterns.
+
+(define_insn "*mulsi3_internal"
+  [(set (match_operand:SI 0 "register_operand" "=e")
+	(mult:SI (match_operand:SI 1 "register_operand" "e")
+		 (match_operand:SI 2 "nonmemory_operand" "e")))]
+  ""
+  "xma.l %0 = %1, %2, f0%B0"
+  [(set_attr "type" "F")])
+
+(define_expand "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(neg:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  "
+{
+  if (optimize)
+    {
+      rtx op1 = gen_lowpart (DImode, operands[1]);
+
+      if (! cse_not_expected)
+	{
+	  rtx tmp = gen_reg_rtx (DImode);
+	  emit_insn (gen_negdi2 (tmp, op1));
+	  emit_move_insn (operands[0], gen_lowpart (SImode, tmp));
+	}
+      else
+	emit_insn (gen_negdi2 (gen_lowpart (DImode, operands[0]), op1));
+      DONE;
+    }
+}")
+
+(define_insn "*negsi2_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "sub %0 = r0, %1"
+  [(set_attr "type" "A")])
+
+(define_expand "abssi2"
+  [(set (match_dup 2)
+	(ge:CC (match_operand:SI 1 "register_operand" "") (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (ne:CC (match_dup 2) (const_int 0))
+			 (match_dup 1)
+			 (neg:SI (match_dup 1))))]
+  ""
+  "
+{
+  operands[2] = gen_reg_rtx (CCmode);
+}")
+
+(define_expand "sminsi3"
+  [(set (match_dup 3)
+	(ge:CC (match_operand:SI 1 "register_operand" "")
+	       (match_operand:SI 2 "register_operand" "")))
+   (set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (ne:CC (match_dup 3) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  ""
+  "
+{
+  operands[3] = gen_reg_rtx (CCmode);
+}")
+
+(define_expand "smaxsi3"
+  [(set (match_dup 3)
+	(ge:CC (match_operand:SI 1 "register_operand" "")
+	       (match_operand:SI 2 "register_operand" "")))
+   (set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (ne:CC (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "
+{
+  operands[3] = gen_reg_rtx (CCmode);
+}")
+
+(define_expand "uminsi3"
+  [(set (match_dup 3)
+	(geu:CC (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "register_operand" "")))
+   (set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (ne:CC (match_dup 3) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  ""
+  "
+{
+  operands[3] = gen_reg_rtx (CCmode);
+}")
+
+(define_expand "umaxsi3"
+  [(set (match_dup 3)
+	(geu:CC (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "register_operand" "")))
+   (set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (ne:CC (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "
+{
+  operands[3] = gen_reg_rtx (CCmode);
+}")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64 bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%r,r,a")
+		 (match_operand:DI 2 "reg_or_22bit_operand" "r,I,J")))]
+  ""
+  "@
+  add %0 = %1, %2
+  adds %0 = %2, %1
+  addl %0 = %2, %1"
+  [(set_attr "type" "A")])
+
+(define_insn "*adddi3_plus1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (plus:DI (match_operand:DI 1 "register_operand" "r")
+			  (match_operand:DI 2 "register_operand" "r"))
+		 (const_int 1)))]
+  ""
+  "add %0 = %1, %2, 1"
+  [(set_attr "type" "A")])
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "reg_or_8bit_operand" "rK")
+		  (match_operand:DI 2 "register_operand" "r")))]
+  ""
+  "sub %0 = %1, %2"
+  [(set_attr "type" "A")])
+
+(define_insn "*subdi3_minus1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (not:DI (match_operand:DI 1 "register_operand" "r"))
+		 (match_operand:DI 2 "register_operand" "r")))]
+  ""
+  "sub %0 = %2, %1, 1"
+  [(set_attr "type" "A")])
+
+(define_insn "muldi3"
+  [(set (match_operand:DI 0 "register_operand" "=e")
+	(mult:DI (match_operand:DI 1 "register_operand" "e")
+		 (match_operand:DI 2 "register_operand" "e")))]
+  ""
+  "xma.l %0 = %1, %2, f0%B0"
+  [(set_attr "type" "F")])
+
+;; ??? If operand 3 is an eliminable reg, then register elimination causes the
+;; same problem that we have with shladd below.  Unfortunately, this case is
+;; much harder to fix because the multiply puts the result in an FP register,
+;; but the add needs inputs from a general register.  We add a spurious clobber
+;; here so that it will be present just in case register elimination gives us
+;; the funny result.
+
+;; ??? Maybe validate_changes should try adding match_scratch clobbers?
+
+;; ??? Maybe we should change how adds are canonicalized.
+
+(define_insn "*madddi3"
+  [(set (match_operand:DI 0 "register_operand" "=e")
+	(plus:DI (mult:DI (match_operand:DI 1 "register_operand" "e")
+			  (match_operand:DI 2 "register_operand" "e"))
+		 (match_operand:DI 3 "register_operand" "e")))
+   (clobber (match_scratch:DI 4 "=X"))]
+  ""
+  "xma.l %0 = %1, %2, %3%B0"
+  [(set_attr "type" "F")])
+
+;; This can be created by register elimination if operand3 of shladd is an
+;; eliminable register or has reg_equiv_constant set.
+
+;; We have to use nonmemory_operand for operand 4, to ensure that the
+;; validate_changes call inside eliminate_regs will always succeed.  If it
+;; doesn't succeed, then this remain a madddi3 pattern, and will be reloaded
+;; incorrectly.
+
+(define_insn "*madddi3_elim"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+	(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "e")
+				   (match_operand:DI 2 "register_operand" "e"))
+			  (match_operand:DI 3 "register_operand" "e"))
+		 (match_operand:DI 4 "nonmemory_operand" "rI")))
+   (clobber (match_scratch:DI 5 "=e"))]
+  "reload_in_progress"
+  "#"
+  [(set_attr "type" "unknown")])
+
+;; ??? Need to emit an instruction group barrier here because this gets split
+;; after md_reorg.
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "")
+				   (match_operand:DI 2 "register_operand" ""))
+			  (match_operand:DI 3 "register_operand" ""))
+		 (match_operand:DI 4 "reg_or_14bit_operand" "")))
+   (clobber (match_scratch:DI 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 5) (plus:DI (mult:DI (match_dup 1) (match_dup 2))
+					  (match_dup 3)))
+	      (clobber (match_dup 0))])
+   (unspec_volatile [(const_int 0)] 2)
+   (set (match_dup 0) (match_dup 5))
+   (unspec_volatile [(const_int 0)] 2)
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
+  "")
+
+;; ??? There are highpart multiply and add instructions, but we have no way
+;; to generate them.
+
+(define_insn "smuldi3_highpart"
+  [(set (match_operand:DI 0 "register_operand" "=e")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (sign_extend:TI (match_operand:DI 1 "register_operand" "e"))
+		   (sign_extend:TI (match_operand:DI 2 "register_operand" "e")))
+	  (const_int 64))))]
+  ""
+  "xma.h %0 = %1, %2, f0%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "umuldi3_highpart"
+  [(set (match_operand:DI 0 "register_operand" "=e")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "e"))
+		   (zero_extend:TI (match_operand:DI 2 "register_operand" "e")))
+	  (const_int 64))))]
+  ""
+  "xma.hu %0 = %1, %2, f0%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "negdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))]
+  ""
+  "sub %0 = r0, %1"
+  [(set_attr "type" "A")])
+
+(define_expand "absdi2"
+  [(set (match_dup 2)
+	(ge:CC (match_operand:DI 1 "register_operand" "") (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (ne:CC (match_dup 2) (const_int 0))
+			 (match_dup 1)
+			 (neg:DI (match_dup 1))))]
+  ""
+  "
+{
+  operands[2] = gen_reg_rtx (CCmode);
+}")
+
+(define_expand "smindi3"
+  [(set (match_dup 3)
+	(ge:CC (match_operand:DI 1 "register_operand" "")
+	       (match_operand:DI 2 "register_operand" "")))
+   (set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (ne:CC (match_dup 3) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  ""
+  "
+{
+  operands[3] = gen_reg_rtx (CCmode);
+}")
+
+(define_expand "smaxdi3"
+  [(set (match_dup 3)
+	(ge:CC (match_operand:DI 1 "register_operand" "")
+	       (match_operand:DI 2 "register_operand" "")))
+   (set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (ne:CC (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "
+{
+  operands[3] = gen_reg_rtx (CCmode);
+}")
+
+(define_expand "umindi3"
+  [(set (match_dup 3)
+	(geu:CC (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "register_operand" "")))
+   (set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (ne:CC (match_dup 3) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  ""
+  "
+{
+  operands[3] = gen_reg_rtx (CCmode);
+}")
+
+(define_expand "umaxdi3"
+  [(set (match_dup 3)
+	(geu:CC (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "register_operand" "")))
+   (set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (ne:CC (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "
+{
+  operands[3] = gen_reg_rtx (CCmode);
+}")
+
+(define_expand "ffsdi2"
+  [(set (match_dup 6)
+	(eq:CC (match_operand:DI 1 "register_operand" "") (const_int 0)))
+   (set (match_dup 2) (plus:DI (match_dup 1) (const_int -1)))
+   (set (match_dup 5) (const_int 0))
+   (set (match_dup 3) (xor:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 4) (unspec:DI [(match_dup 3)] 8))
+   (set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (ne:CC (match_dup 6) (const_int 0))
+			 (match_dup 5) (match_dup 4)))]
+  ""
+  "
+{
+  operands[2] = gen_reg_rtx (DImode);
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DImode);
+  operands[6] = gen_reg_rtx (CCmode);
+}")
+
+(define_insn "*popcnt"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")] 8))]
+  ""
+  "popcnt %0 = %1"
+  [(set_attr "type" "I")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32 bit floating point arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(plus:SF (match_operand:SF 1 "register_operand" "%f")
+		 (match_operand:SF 2 "reg_or_fp01_operand" "fG")))]
+  ""
+  "fadd.s %0 = %1, %F2%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(minus:SF (match_operand:SF 1 "reg_or_fp01_operand" "fG")
+		  (match_operand:SF 2 "reg_or_fp01_operand" "fG")))]
+  ""
+  "fsub.s %0 = %F1, %F2%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(mult:SF (match_operand:SF 1 "register_operand" "%f")
+		 (match_operand:SF 2 "register_operand" "f")))]
+  ""
+  "fmpy.s %0 = %1, %2%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(abs:SF (match_operand:SF 1 "register_operand" "f")))]
+  ""
+  "fabs %0 = %1%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "negsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (match_operand:SF 1 "register_operand" "f")))]
+  ""
+  "fneg %0 = %1%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "*nabssf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (abs:SF (match_operand:SF 1 "register_operand" "f"))))]
+  ""
+  "fnegabs %0 = %1%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "minsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(smin:SF (match_operand:SF 1 "register_operand" "f")
+		 (match_operand:SF 2 "reg_or_fp01_operand" "fG")))]
+  ""
+  "fmin %0 = %1, %F2%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "maxsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(smax:SF (match_operand:SF 1 "register_operand" "f")
+		 (match_operand:SF 2 "reg_or_fp01_operand" "fG")))]
+  ""
+  "fmax %0 = %1, %F2%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "*maddsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(plus:SF (mult:SF (match_operand:SF 1 "register_operand" "f")
+			  (match_operand:SF 2 "register_operand" "f"))
+		 (match_operand:SF 3 "reg_or_fp01_operand" "fG")))]
+  ""
+  "fma.s %0 = %1, %2, %F3%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "*msubsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(minus:SF (mult:SF (match_operand:SF 1 "register_operand" "f")
+			   (match_operand:SF 2 "register_operand" "f"))
+		  (match_operand:SF 3 "reg_or_fp01_operand" "fG")))]
+  ""
+  "fms.s %0 = %1, %2, %F3%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "*nmulsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (mult:SF (match_operand:SF 1 "register_operand" "f")
+			 (match_operand:SF 2 "register_operand" "f"))))]
+  ""
+  "fnmpy.s %0 = %1, %2%B0"
+  [(set_attr "type" "F")])
+
+;; ??? Is it possible to canonicalize this as (minus (reg) (mult))?
+
+(define_insn "*nmaddsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(plus:SF (neg:SF (mult:SF (match_operand:SF 1 "register_operand" "f")
+				  (match_operand:SF 2 "register_operand" "f")))
+		 (match_operand:SF 3 "reg_or_fp01_operand" "fG")))]
+  ""
+  "fnma.s %0 = %1, %2, %F3%B0"
+  [(set_attr "type" "F")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64 bit floating point arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "adddf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(plus:DF (match_operand:DF 1 "register_operand" "%f")
+		 (match_operand:DF 2 "reg_or_fp01_operand" "fG")))]
+  ""
+  "fadd.d %0 = %1, %F2%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "subdf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(minus:DF (match_operand:DF 1 "reg_or_fp01_operand" "fG")
+		  (match_operand:DF 2 "reg_or_fp01_operand" "fG")))]
+  ""
+  "fsub.d %0 = %F1, %F2%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "muldf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(mult:DF (match_operand:DF 1 "register_operand" "f")
+		 (match_operand:DF 2 "register_operand" "f")))]
+  ""
+  "fmpy.d %0 = %1, %2%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(abs:DF (match_operand:DF 1 "register_operand" "f")))]
+  ""
+  "fabs %0 = %1%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "negdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (match_operand:DF 1 "register_operand" "f")))]
+  ""
+  "fneg %0 = %1%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "*nabsdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (abs:DF (match_operand:DF 1 "register_operand" "f"))))]
+  ""
+  "fnegabs %0 = %1%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "mindf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(smin:DF (match_operand:DF 1 "register_operand" "f")
+		 (match_operand:DF 2 "reg_or_fp01_operand" "fG")))]
+  ""
+  "fmin %0 = %1, %F2%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "maxdf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(smax:DF (match_operand:DF 1 "register_operand" "f")
+		 (match_operand:DF 2 "reg_or_fp01_operand" "fG")))]
+  ""
+  "fmax %0 = %1, %F2%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "*madddf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(plus:DF (mult:DF (match_operand:DF 1 "register_operand" "f")
+			  (match_operand:DF 2 "register_operand" "f"))
+		 (match_operand:DF 3 "reg_or_fp01_operand" "fG")))]
+  ""
+  "fma.d %0 = %1, %2, %F3%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "*msubdf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(minus:DF (mult:DF (match_operand:DF 1 "register_operand" "f")
+			   (match_operand:DF 2 "register_operand" "f"))
+		  (match_operand:DF 3 "reg_or_fp01_operand" "fG")))]
+  ""
+  "fms.d %0 = %1, %2, %F3%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "*nmuldf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (mult:DF (match_operand:DF 1 "register_operand" "f")
+			 (match_operand:DF 2 "register_operand" "f"))))]
+  ""
+  "fnmpy.d %0 = %1, %2%B0"
+  [(set_attr "type" "F")])
+
+;; ??? Is it possible to canonicalize this as (minus (reg) (mult))?
+
+(define_insn "*nmadddf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(plus:DF (neg:DF (mult:DF (match_operand:DF 1 "register_operand" "f")
+				  (match_operand:DF 2 "register_operand" "f")))
+		 (match_operand:DF 3 "reg_or_fp01_operand" "fG")))]
+  ""
+  "fnma.d %0 = %1, %2, %F3%B0"
+  [(set_attr "type" "F")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32 bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+;; There is no sign-extend form of dep, so we only get 32 bits of valid result
+;; instead of 64 like the patterns below.
+
+;; Using a predicate that accepts only constants doesn't work, because optabs
+;; will load the operand into a register and call the pattern if the predicate
+;; did not accept it on the first try.  So we use nonmemory_operand and then
+;; verify that we have an appropriate constant in the expander.
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashift:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "
+{
+  if (! shift_32bit_count_operand (operands[2], SImode))
+    FAIL;
+}")
+
+(define_insn "*ashlsi3_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "shift_32bit_count_operand" "n")))]
+  ""
+  "dep.z %0 = %1, %2, %E2"
+  [(set_attr "type" "I")])
+
+;; This is really an extract, but this is how combine canonicalizes the
+;; operation.
+
+(define_expand "ashrsi3"
+  [(set (match_dup 3)
+	(ashiftrt:DI (sign_extend:DI
+		      (match_operand:SI 1 "register_operand" ""))
+		     (match_operand:DI 2 "nonmemory_operand" "")))
+   (set (match_operand:SI 0 "register_operand" "") (match_dup 4))]
+  ""
+  "
+{
+  if (! shift_32bit_count_operand (operands[2], SImode))
+    FAIL;
+
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_lowpart (SImode, operands[3]);
+}")
+
+(define_insn "*ashrsi3_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (sign_extend:DI
+		      (match_operand:SI 1 "register_operand" "r"))
+		     (match_operand:DI 2 "shift_32bit_count_operand" "n")))]
+  ""
+  "extr %0 = %1, %2, %E2"
+  [(set_attr "type" "I")])
+
+;; This is really an extract, but this is how combine canonicalizes the
+;; operation.
+
+(define_expand "lshrsi3"
+  [(set (match_dup 3)
+	(lshiftrt:DI (zero_extend:DI
+		      (match_operand:SI 1 "register_operand" ""))
+		     (match_operand:DI 2 "nonmemory_operand" "")))
+   (set (match_operand:SI 0 "register_operand" "") (match_dup 4))]
+  ""
+  "
+{
+  if (! shift_32bit_count_operand (operands[2], SImode))
+    FAIL;
+
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_lowpart (SImode, operands[3]);
+}")
+
+(define_insn "*lshrsi3_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (zero_extend:DI
+		      (match_operand:SI 1 "register_operand" "r"))
+		     (match_operand:DI 2 "shift_32bit_count_operand" "n")))]
+  ""
+  "extr.u %0 = %1, %2, %E2"
+  [(set_attr "type" "I")])
+
+;; Use mix4.r/shr to implement rotrsi3.  We only get 32 bits of valid result
+;; here, instead of 64 like the patterns above.
+
+(define_expand "rotrsi3"
+  [(set (match_dup 3)
+	(ior:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
+		(ashift:DI (zero_extend:DI (match_dup 1)) (const_int 32))))
+   (set (match_dup 3)
+	(lshiftrt:DI (match_dup 3)
+		     (match_operand:DI 2 "nonmemory_operand" "")))
+   (set (match_operand:SI 0 "register_operand" "") (match_dup 4))]
+  ""
+  "
+{
+  if (! shift_32bit_count_operand (operands[2], SImode))
+    FAIL;
+
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_lowpart (SImode, operands[3]);
+}")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64 bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "reg_or_6bit_operand" "rM")))]
+  ""
+  "shl %0 = %1, %2"
+  [(set_attr "type" "I")])
+
+;; ??? Maybe combine this with the multiply and add instruction?
+
+(define_insn "*shladd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (mult:DI (match_operand:DI 1 "register_operand" "r")
+			  (match_operand:DI 2 "shladd_operand" "n"))
+		 (match_operand:DI 3 "register_operand" "r")))]
+  ""
+  "shladd %0 = %1, %S2, %3"
+  [(set_attr "type" "A")])
+
+;; This can be created by register elimination if operand3 of shladd is an
+;; eliminable register or has reg_equiv_constant set.
+
+;; We have to use nonmemory_operand for operand 4, to ensure that the
+;; validate_changes call inside eliminate_regs will always succeed.  If it
+;; doesn't succeed, then this remain a shladd pattern, and will be reloaded
+;; incorrectly.
+
+(define_insn "*shladd_elim"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+	(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "r")
+				   (match_operand:DI 2 "shladd_operand" "n"))
+			  (match_operand:DI 3 "register_operand" "r"))
+		 (match_operand:DI 4 "nonmemory_operand" "rI")))]
+  "reload_in_progress"
+  "#"
+  [(set_attr "type" "unknown")])
+
+;; ??? Need to emit an instruction group barrier here because this gets split
+;; after md_reorg.
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "")
+				   (match_operand:DI 2 "shladd_operand" ""))
+			  (match_operand:DI 3 "register_operand" ""))
+		 (match_operand:DI 4 "reg_or_14bit_operand" "")))]
+  "reload_completed"
+  [(set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (match_dup 2))
+			       (match_dup 3)))
+   (unspec_volatile [(const_int 0)] 2)
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
+  "")
+
+(define_insn "ashrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		     (match_operand:DI 2 "reg_or_6bit_operand" "rM")))]
+  ""
+  "shr %0 = %1, %2"
+  [(set_attr "type" "I")])
+
+(define_insn "lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		     (match_operand:DI 2 "reg_or_6bit_operand" "rM")))]
+  ""
+  "shr.u %0 = %1, %2"
+  [(set_attr "type" "I")])
+
+;; Using a predicate that accepts only constants doesn't work, because optabs
+;; will load the operand into a register and call the pattern if the predicate
+;; did not accept it on the first try.  So we use nonmemory_operand and then
+;; verify that we have an appropriate constant in the expander.
+
+(define_expand "rotrdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(rotatert:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand:DI 2 "nonmemory_operand" "")))]
+  ""
+  "
+{
+  if (! shift_count_operand (operands[2], DImode))
+    FAIL;
+}")
+
+(define_insn "*rotrdi3_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(rotatert:DI (match_operand:DI 1 "register_operand" "r")
+		     (match_operand:DI 2 "shift_count_operand" "M")))]
+  ""
+  "shrp %0 = %1, %1, %2"
+  [(set_attr "type" "I")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32 Bit Integer Logical operations
+;; ::
+;; ::::::::::::::::::::
+
+;; We don't seem to need any other 32-bit logical operations, because gcc
+;; generates zero-extend;zero-extend;DImode-op, which combine optimizes to
+;; DImode-op;zero-extend, and then we can optimize away the zero-extend.
+;; This doesn't work for unary logical operations, because we don't call
+;; apply_distributive_law for them.
+
+;; ??? Likewise, this doesn't work for andnot, which isn't handled by
+;; apply_distributive_law.  We get inefficient code for
+;; int sub4 (int i, int j) { return i & ~j; }
+;; We could convert (and (not (sign_extend A)) (sign_extend B)) to
+;; (zero_extend (and (not A) B)) in combine.
+;; Or maybe fix this by adding andsi3/iorsi3/xorsi3 patterns like the
+;; one_cmplsi2 pattern.
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(not:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  "
+{
+  if (optimize)
+    {
+      rtx op1 = gen_lowpart (DImode, operands[1]);
+
+      if (! cse_not_expected)
+	{
+	  rtx tmp = gen_reg_rtx (DImode);
+	  emit_insn (gen_one_cmpldi2 (tmp, op1));
+	  emit_move_insn (operands[0], gen_lowpart (SImode, tmp));
+	}
+      else
+	emit_insn (gen_one_cmpldi2 (gen_lowpart (DImode, operands[0]), op1));
+      DONE;
+    }
+}")
+
+(define_insn "*one_cmplsi2_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "andcm %0 = -1, %1"
+  [(set_attr "type" "A")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64 Bit Integer Logical operations
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "anddi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,*e")
+	(and:DI (match_operand:DI 1 "register_operand" "%r,*e")
+		(match_operand:DI 2 "reg_or_8bit_operand" "rK,*e")))]
+  ""
+  "@
+   and %0 = %2, %1
+   fand %0 = %2, %1%B0"
+  [(set_attr "type" "A,F")])
+
+(define_insn "*andnot"
+  [(set (match_operand:DI 0 "register_operand" "=r,*e")
+	(and:DI (not:DI (match_operand:DI 1 "register_operand" "r,*e"))
+		(match_operand:DI 2 "reg_or_8bit_operand" "rK,*e")))]
+  ""
+  "@
+   andcm %0 = %2, %1
+   fandcm %0 = %2, %1%B0"
+  [(set_attr "type" "A,F")])
+
+(define_insn "iordi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,*e")
+	(ior:DI (match_operand:DI 1 "register_operand" "%r,*e")
+		(match_operand:DI 2 "reg_or_8bit_operand" "rK,*e")))]
+  ""
+  "@
+   or %0 = %2, %1
+   for %0 = %2, %1%B0"
+  [(set_attr "type" "A,F")])
+
+(define_insn "xordi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,*e")
+	(xor:DI (match_operand:DI 1 "register_operand" "%r,*e")
+		(match_operand:DI 2 "reg_or_8bit_operand" "rK,*e")))]
+  ""
+  "@
+   xor %0 = %2, %1
+   fxor %0 = %2, %1%B0"
+  [(set_attr "type" "A,F")])
+
+(define_insn "one_cmpldi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (match_operand:DI 1 "register_operand" "r")))]
+  ""
+  "andcm %0 = -1, %1"
+  [(set_attr "type" "A")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Comparisons
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "cmpsi"
+  [(set (cc0)
+        (compare (match_operand:SI 0 "register_operand" "")
+  		 (match_operand:SI 1 "reg_or_8bit_and_adjusted_operand" "")))]
+  ""
+  "
+{
+  ia64_compare_op0 = operands[0];
+  ia64_compare_op1 = operands[1];
+  DONE;
+}")
+
+(define_expand "cmpdi"
+  [(set (cc0)
+        (compare (match_operand:DI 0 "register_operand" "")
+  		 (match_operand:DI 1 "reg_or_8bit_and_adjusted_operand" "")))]
+  ""
+  "
+{
+  ia64_compare_op0 = operands[0];
+  ia64_compare_op1 = operands[1];
+  DONE;
+}")
+
+(define_expand "cmpsf"
+  [(set (cc0)
+        (compare (match_operand:SF 0 "reg_or_fp01_operand" "")
+  		 (match_operand:SF 1 "reg_or_fp01_operand" "")))]
+  ""
+  "
+{
+  ia64_compare_op0 = operands[0];
+  ia64_compare_op1 = operands[1];
+  DONE;
+}")
+
+(define_expand "cmpdf"
+  [(set (cc0)
+        (compare (match_operand:DF 0 "reg_or_fp01_operand" "")
+  		 (match_operand:DF 1 "reg_or_fp01_operand" "")))]
+  ""
+  "
+{
+  ia64_compare_op0 = operands[0];
+  ia64_compare_op1 = operands[1];
+  DONE;
+}")
+
+;; ??? Enable this for XFmode support.
+
+(define_expand "cmpxf"
+  [(set (cc0)
+        (compare (match_operand:XF 0 "reg_or_fp01_operand" "")
+  		 (match_operand:XF 1 "reg_or_fp01_operand" "")))]
+  "0"
+  "
+{
+  ia64_compare_op0 = operands[0];
+  ia64_compare_op1 = operands[1];
+  DONE;
+}")
+
+(define_insn "*cmpsi_normal"
+  [(set (match_operand:CC 0 "register_operand" "=c")
+	(match_operator:CC 1 "normal_comparison_operator"
+			   [(match_operand:SI 2 "register_operand" "r")
+			    (match_operand:SI 3 "reg_or_8bit_operand" "rK")]))]
+  ""
+  "cmp4.%C1 %0, %I0 = %3, %2"
+  [(set_attr "type" "A")])
+
+(define_insn "*cmpsi_adjusted"
+  [(set (match_operand:CC 0 "register_operand" "=c")
+	(match_operator:CC 1 "adjusted_comparison_operator"
+			   [(match_operand:SI 2 "register_operand" "r")
+			    (match_operand:SI 3 "reg_or_8bit_adjusted_operand"
+					      "rL")]))]
+  ""
+  "cmp4.%C1 %0, %I0 = %3, %2"
+  [(set_attr "type" "A")])
+
+(define_insn "*cmpdi_normal"
+  [(set (match_operand:CC 0 "register_operand" "=c")
+	(match_operator:CC 1 "normal_comparison_operator"
+			   [(match_operand:DI 2 "register_operand" "r")
+			    (match_operand:DI 3 "reg_or_8bit_operand" "rK")]))]
+  ""
+  "cmp.%C1 %0, %I0 = %3, %2"
+  [(set_attr "type" "A")])
+
+(define_insn "*cmpdi_adjusted"
+  [(set (match_operand:CC 0 "register_operand" "=c")
+	(match_operator:CC 1 "adjusted_comparison_operator"
+			   [(match_operand:DI 2 "register_operand" "r")
+			    (match_operand:DI 3 "reg_or_8bit_adjusted_operand"
+					      "rL")]))]
+  ""
+  "cmp.%C1 %0, %I0 = %3, %2"
+  [(set_attr "type" "A")])
+
+(define_insn "*cmpsf_internal"
+  [(set (match_operand:CC 0 "register_operand" "=c")
+	(match_operator:CC 1 "comparison_operator"
+			   [(match_operand:SF 2 "reg_or_fp01_operand" "fG")
+			    (match_operand:SF 3 "reg_or_fp01_operand" "fG")]))]
+  ""
+  "fcmp.%D1 %0, %I0 = %F2, %F3"
+  [(set_attr "type" "F")])
+
+(define_insn "*cmpdf_internal"
+  [(set (match_operand:CC 0 "register_operand" "=c")
+	(match_operator:CC 1 "comparison_operator"
+			   [(match_operand:DF 2 "reg_or_fp01_operand" "fG")
+			    (match_operand:DF 3 "reg_or_fp01_operand" "fG")]))]
+  ""
+  "fcmp.%D1 %0, %I0 = %F2, %F3"
+  [(set_attr "type" "F")])
+
+;; ??? Can this pattern be generated?
+
+(define_insn "*bit_zero"
+  [(set (match_operand:CC 0 "register_operand" "=c")
+	(eq:CC (zero_extract:DI (match_operand:DI 1 "register_operand" "r")
+				(const_int 1)
+				(match_operand:DI 2 "immediate_operand" "n"))
+	       (const_int 0)))]
+  ""
+  "tbit.z %0, %I0 = %1, %2"
+  [(set_attr "type" "I")])
+
+(define_insn "*bit_one"
+  [(set (match_operand:CC 0 "register_operand" "=c")
+	(ne:CC (zero_extract:DI (match_operand:DI 1 "register_operand" "r")
+				(const_int 1)
+				(match_operand:DI 2 "immediate_operand" "n"))
+	       (const_int 0)))]
+  ""
+  "tbit.nz %0, %I0 = %1, %2"
+  [(set_attr "type" "I")])
+
+;; ??? We also need this if we run out of PR regs and need to spill some.
+
+;; ??? We need this if a CCmode value does not get allocated to a hard
+;; register.  This happens if we cse/gcse a CCmode value across a call, and the
+;; function has a nonlocal goto.  This is because global does not allocate
+;; call crossing pseudos to hard registers when current_function_has_
+;; nonlocal_goto is true.  This is relatively common for C++ programs that
+;; use exceptions.  See ia64_secondary_reload_class.
+
+;; We use a define_expand here so that cse/gcse/combine can't accidentally
+;; create movcc insns.  If this was a named define_insn, we would not be able
+;; to make it conditional on reload.
+
+(define_expand "movcc"
+  [(set (match_operand:CC 0 "nonimmediate_operand" "")
+	(match_operand:CC 1 "move_operand" ""))]
+  ""
+  "
+{
+  if (! reload_in_progress && ! reload_completed)
+    FAIL;
+}")
+
+(define_insn "*movcc_internal"
+  [(set (match_operand:CC 0 "nonimmediate_operand" "=r,c,r,m")
+	(match_operand:CC 1 "move_operand" "c,r,m,r"))]
+  "reload_in_progress || reload_completed"
+  "@
+   #
+   cmp4.ne %0, %I0 = %1, r0
+   ld4%O1 %0 = %1%P1
+   st4%Q0 %0 = %1%P0"
+  [(set_attr "type" "unknown,A,M,M")])
+
+(define_split
+  [(set (match_operand:CC 0 "register_operand" "")
+	(match_operand:CC 1 "register_operand" ""))]
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && GR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))"
+  [(set (match_dup 2)
+	(if_then_else:DI (ne:CC (match_dup 1) (const_int 0))
+			 (const_int 1)
+			 (match_dup 2)))
+   (set (match_dup 2)
+	(if_then_else:DI (ne:CC (match_dup 1) (const_int 0))
+			 (match_dup 2)
+			 (const_int 0)))]
+  "operands[2] = gen_rtx_SUBREG (DImode, operands[0], 0);")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Branches
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "beq"
+  [(set (match_dup 1)
+	(eq:CC (match_dup 2)
+	       (match_dup 3)))
+   (set (pc)
+	(if_then_else (ne:CC (match_dup 1)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "bne"
+  [(set (match_dup 1)
+	(ne:CC (match_dup 2)
+	       (match_dup 3)))
+   (set (pc)
+	(if_then_else (ne:CC (match_dup 1)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "blt"
+  [(set (match_dup 1)
+	(lt:CC (match_dup 2)
+	       (match_dup 3)))
+   (set (pc)
+	(if_then_else (ne:CC (match_dup 1)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "ble"
+  [(set (match_dup 1)
+	(le:CC (match_dup 2)
+	       (match_dup 3)))
+   (set (pc)
+	(if_then_else (ne:CC (match_dup 1)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "bgt"
+  [(set (match_dup 1)
+	(gt:CC (match_dup 2)
+	       (match_dup 3)))
+   (set (pc)
+	(if_then_else (ne:CC (match_dup 1)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "bge"
+  [(set (match_dup 1)
+	(ge:CC (match_dup 2)
+	       (match_dup 3)))
+   (set (pc)
+	(if_then_else (ne:CC (match_dup 1)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "bltu"
+  [(set (match_dup 1)
+	(ltu:CC (match_dup 2)
+		(match_dup 3)))
+   (set (pc)
+	(if_then_else (ne:CC (match_dup 1)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "bleu"
+  [(set (match_dup 1)
+	(leu:CC (match_dup 2)
+		(match_dup 3)))
+   (set (pc)
+	(if_then_else (ne:CC (match_dup 1)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "bgtu"
+  [(set (match_dup 1)
+	(gtu:CC (match_dup 2)
+		(match_dup 3)))
+   (set (pc)
+	(if_then_else (ne:CC (match_dup 1)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "bgeu"
+  [(set (match_dup 1)
+	(geu:CC (match_dup 2)
+		(match_dup 3)))
+   (set (pc)
+	(if_then_else (ne:CC (match_dup 1)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+;; ??? Need a way to choose between dpnt and dptk.  Currently, I assume that
+;; equality tests will likely fail, and inequality tests will likely succeed.
+
+(define_insn "*beq_true"
+  [(set (pc)
+	(if_then_else (eq:CC (match_operand:CC 0 "register_operand" "c")
+			     (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  "(%I0) br.cond.dpnt %l1"
+  [(set_attr "type" "B")])
+
+(define_insn "*beq_false"
+  [(set (pc)
+	(if_then_else (eq:CC (match_operand:CC 0 "register_operand" "c")
+			     (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 1 "" ""))))]
+  ""
+  "(%0) br.cond.dptk %l1"
+  [(set_attr "type" "B")])
+
+(define_insn "*bne_true"
+  [(set (pc)
+	(if_then_else (ne:CC (match_operand:CC 0 "register_operand" "c")
+			     (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  "(%0) br.cond.dptk %l1"
+  [(set_attr "type" "B")])
+
+(define_insn "*bne_false"
+  [(set (pc)
+	(if_then_else (ne:CC (match_operand:CC 0 "register_operand" "c")
+			     (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 1 "" ""))))]
+  ""
+  "(%I0) br.cond.dpnt %l1"
+  [(set_attr "type" "B")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Set flag operations
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "seq"
+  [(set (match_dup 1)
+	(eq:CC (match_dup 2)
+	       (match_dup 3)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(ne:DI (match_dup 1) (const_int 0)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "sne"
+  [(set (match_dup 1)
+	(ne:CC (match_dup 2)
+	       (match_dup 3)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(ne:DI (match_dup 1) (const_int 0)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "slt"
+  [(set (match_dup 1)
+	(lt:CC (match_dup 2)
+	       (match_dup 3)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(ne:DI (match_dup 1) (const_int 0)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "sle"
+  [(set (match_dup 1)
+	(le:CC (match_dup 2)
+	       (match_dup 3)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(ne:DI (match_dup 1) (const_int 0)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "sgt"
+  [(set (match_dup 1)
+	(gt:CC (match_dup 2)
+	       (match_dup 3)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(ne:DI (match_dup 1) (const_int 0)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "sge"
+  [(set (match_dup 1)
+	(ge:CC (match_dup 2)
+	       (match_dup 3)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(ne:DI (match_dup 1) (const_int 0)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "sltu"
+  [(set (match_dup 1)
+	(ltu:CC (match_dup 2)
+		(match_dup 3)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(ne:DI (match_dup 1) (const_int 0)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "sleu"
+  [(set (match_dup 1)
+	(leu:CC (match_dup 2)
+		(match_dup 3)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(ne:DI (match_dup 1) (const_int 0)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "sgtu"
+  [(set (match_dup 1)
+	(gtu:CC (match_dup 2)
+		(match_dup 3)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(ne:DI (match_dup 1) (const_int 0)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+(define_expand "sgeu"
+  [(set (match_dup 1)
+	(geu:CC (match_dup 2)
+		(match_dup 3)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(ne:DI (match_dup 1) (const_int 0)))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (CCmode);
+  operands[2] = ia64_compare_op0;
+  operands[3] = ia64_compare_op1;
+}")
+
+;; Don't allow memory as destination here, because cmov/cmov/st is more
+;; efficient than mov/mov/cst/cst.
+
+(define_insn "*sne_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ne:DI (match_operand:CC 1 "register_operand" "c")
+	       (const_int 0)))]
+  ""
+  "#"
+  [(set_attr "type" "unknown")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ne:DI (match_operand:CC 1 "register_operand" "")
+	       (const_int 0)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(if_then_else:DI (ne:CC (match_dup 1) (const_int 0))
+			 (const_int 1)
+			 (match_dup 0)))
+   (set (match_dup 0)
+	(if_then_else:DI (ne:CC (match_dup 1) (const_int 0))
+			 (match_dup 0)
+			 (const_int 0)))]
+  "")
+
+;; ??? Unknown if this can be matched.
+
+(define_insn "*seq_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(eq:DI (match_operand:CC 1 "register_operand" "c")
+	       (const_int 0)))]
+  ""
+  "#"
+  [(set_attr "type" "unknown")])
+
+;; ??? Unknown if this can be matched.
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(eq:DI (match_operand:CC 1 "register_operand" "")
+	       (const_int 0)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(if_then_else:DI (eq:CC (match_dup 1) (const_int 0))
+			 (const_int 1)
+			 (match_dup 0)))
+   (set (match_dup 0)
+	(if_then_else:DI (eq:CC (match_dup 1) (const_int 0))
+			 (match_dup 0)
+			 (const_int 0)))]
+  "")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conditional move instructions.
+;; ::
+;; ::::::::::::::::::::
+
+;; ??? Add movXXcc patterns?
+
+;; ??? The predicates don't match the constraints.
+
+;; ??? r/c/m/m and m/c/r/r alternatives make sense, but won't work until the
+;; predicates are fixed, because the define_splits won't recognize them.
+
+;;
+;; DImode if_then_else patterns.
+;;
+
+(define_insn "*cmovne_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,m,r,r,m,r")
+	(if_then_else:DI (ne:CC (match_operand:CC 1 "register_operand" "c,c,c,c,c,c,c")
+				(const_int 0))
+			 (match_operand:DI 2 "reg_or_22bit_operand" "0,0,0,rI,m,r,rI")
+			 (match_operand:DI 3 "reg_or_22bit_operand" "rI,m,r,0,0,0,rI")))]
+  ""
+  "@
+   (%I1) mov %0 = %3
+   (%I1) ld8%O3 %0 = %3
+   (%I1) st8%Q0 %0 = %3
+   (%1) mov %0 = %2
+   (%1) ld8%O2 %0 = %2
+   (%1) st8%Q0 %0 = %2
+   #"
+  [(set_attr "type" "A,M,M,A,M,M,unknown")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (ne:CC (match_operand:CC 1 "register_operand" "")
+				(const_int 0))
+			 (match_operand:DI 2 "reg_or_22bit_operand" "")
+			 (match_operand:DI 3 "reg_or_22bit_operand" "")))]
+  "(reload_completed
+    && ! rtx_equal_p (operands[0], operands[2])
+    && ! rtx_equal_p (operands[0], operands[3]))"
+  [(set (match_dup 0)
+	(if_then_else:DI (ne:CC (match_dup 1) (const_int 0))
+			 (match_dup 2)
+			 (match_dup 0)))
+   (set (match_dup 0)
+	(if_then_else:DI (ne:CC (match_dup 1) (const_int 0))
+			 (match_dup 0)
+			 (match_dup 3)))]
+  "")
+
+;; ??? Unknown if this can be matched.
+
+(define_insn "*cmoveq_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,m,r,r,m,r")
+	(if_then_else:DI (eq:CC (match_operand:CC 1 "register_operand" "c,c,c,c,c,c,c")
+				(const_int 0))
+			 (match_operand:DI 2 "reg_or_22bit_operand" "0,0,0,rI,m,r,rI")
+			 (match_operand:DI 3 "reg_or_22bit_operand" "rI,m,r,0,0,0,rI")))]
+  ""
+  "@
+   (%1) mov %0 = %3
+   (%1) ld8%O3 %0 = %3
+   (%1) st8%Q0 %0 = %3
+   (%I1) mov %0 = %2
+   (%I1) ld8%O2 %0 = %2
+   (%I1) st8%Q0 %0 = %2
+   #"
+  [(set_attr "type" "A,M,M,A,M,M,unknown")])
+
+;; ??? Unknown if this can be matched.
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (eq:CC (match_operand:CC 1 "register_operand" "")
+				(const_int 0))
+			 (match_operand:DI 2 "reg_or_22bit_operand" "")
+			 (match_operand:DI 3 "reg_or_22bit_operand" "")))]
+  "(reload_completed
+    && ! rtx_equal_p (operands[0], operands[2])
+    && ! rtx_equal_p (operands[0], operands[3]))"
+  [(set (match_dup 0)
+	(if_then_else:DI (eq:CC (match_dup 1) (const_int 0))
+			 (match_dup 2)
+			 (match_dup 0)))
+   (set (match_dup 0)
+	(if_then_else:DI (eq:CC (match_dup 1) (const_int 0))
+			 (match_dup 0)
+			 (match_dup 3)))]
+  "")
+
+;; Absolute value pattern.
+
+(define_insn "*absdi2_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(if_then_else:DI (ne:CC (match_operand:CC 1 "register_operand" "c,c")
+				(const_int 0))
+			 (match_operand:DI 2 "reg_or_22bit_operand" "0,rI")
+			 (neg:DI (match_operand:DI 3 "reg_or_22bit_operand" "rI,rI"))))]
+  ""
+  "@
+   (%I1) sub %0 = r0, %3
+   #"
+  [(set_attr "type" "A,unknown")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (ne:CC (match_operand:CC 1 "register_operand" "")
+				(const_int 0))
+			 (match_operand:DI 2 "reg_or_22bit_operand" "")
+			 (neg:DI (match_operand:DI 3 "reg_or_22bit_operand" ""))))]
+  "reload_completed && ! rtx_equal_p (operands[0], operands[2])"
+  [(set (match_dup 0)
+	(if_then_else:DI (ne:CC (match_dup 1) (const_int 0))
+			 (match_dup 2)
+			 (match_dup 0)))
+   (set (match_dup 0)
+	(if_then_else:DI (ne:CC (match_dup 1) (const_int 0))
+			 (match_dup 0)
+			 (neg:DI (match_dup 3))))]
+  "")
+
+;; ??? Unknown if this can be generated.  If so, then add a define_split as
+;; above.
+
+(define_insn "*absdi2_not_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(if_then_else:DI (ne:CC (match_operand:CC 1 "register_operand" "c,c")
+				(const_int 0))
+			 (neg:DI (match_operand:DI 2 "reg_or_22bit_operand" "rI,rI"))
+			 (match_operand:DI 3 "reg_or_22bit_operand" "0,rI")))]
+  ""
+  "*abort ();"
+  [(set_attr "type" "unknown")])
+
+;;
+;; SImode if_then_else patterns.
+;;
+
+(define_insn "*cmovnesi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,m,r,r,m,r")
+	(if_then_else:SI (ne:CC (match_operand:CC 1 "register_operand" "c,c,c,c,c,c,c")
+				(const_int 0))
+			 (match_operand:SI 2 "reg_or_22bit_operand" "0,0,0,rI,m,r,rI")
+			 (match_operand:SI 3 "reg_or_22bit_operand" "rI,m,r,0,0,0,rI")))]
+  ""
+  "@
+   (%I1) mov %0 = %3
+   (%I1) ld4%O3 %0 = %3
+   (%I1) st4%Q0 %0 = %3
+   (%1) mov %0 = %2
+   (%1) ld4%O2 %0 = %2
+   (%1) st4%Q0 %0 = %2
+   #"
+  [(set_attr "type" "A,M,M,A,M,M,unknown")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (ne:CC (match_operand:CC 1 "register_operand" "")
+				(const_int 0))
+			 (match_operand:SI 2 "reg_or_22bit_operand" "")
+			 (match_operand:SI 3 "reg_or_22bit_operand" "")))]
+  "(reload_completed
+    && ! rtx_equal_p (operands[0], operands[2])
+    && ! rtx_equal_p (operands[0], operands[3]))"
+  [(set (match_dup 0)
+	(if_then_else:SI (ne:CC (match_dup 1) (const_int 0))
+			 (match_dup 2)
+			 (match_dup 0)))
+   (set (match_dup 0)
+	(if_then_else:SI (ne:CC (match_dup 1) (const_int 0))
+			 (match_dup 0)
+			 (match_dup 3)))]
+  "")
+
+(define_insn "*abssi2_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(if_then_else:SI (ne:CC (match_operand:CC 1 "register_operand" "c,c")
+				(const_int 0))
+			 (match_operand:SI 2 "reg_or_22bit_operand" "0,rI")
+			 (neg:SI (match_operand:SI 3 "reg_or_22bit_operand" "rI,rI"))))]
+  ""
+  "@
+   (%I1) sub %0 = r0, %3
+   #"
+  [(set_attr "type" "A,unknown")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (ne:CC (match_operand:CC 1 "register_operand" "")
+				(const_int 0))
+			 (match_operand:SI 2 "reg_or_22bit_operand" "")
+			 (neg:SI (match_operand:SI 3 "reg_or_22bit_operand" ""))))]
+  "reload_completed && ! rtx_equal_p (operands[0], operands[2])"
+  [(set (match_dup 0)
+	(if_then_else:SI (ne:CC (match_dup 1) (const_int 0))
+			 (match_dup 2)
+			 (match_dup 0)))
+   (set (match_dup 0)
+	(if_then_else:SI (ne:CC (match_dup 1) (const_int 0))
+			 (match_dup 0)
+			 (neg:SI (match_dup 3))))]
+  "")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Call and branch instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; Subroutine call instruction returning no value.  Operand 0 is the function
+;; to call; operand 1 is the number of bytes of arguments pushed (in mode
+;; `SImode', except it is normally a `const_int'); operand 2 is the number of
+;; registers used as operands.
+
+;; On most machines, operand 2 is not actually stored into the RTL pattern.  It
+;; is supplied for the sake of some RISC machines which need to put this
+;; information into the assembler code; they can put it in the RTL instead of
+;; operand 1.
+
+(define_expand "call"
+  [(use (match_operand:DI 0 "" ""))
+   (use (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))]
+  ""
+  "
+{
+  /* ??? Stripping off the MEM isn't correct.  Will lose alias info.  */
+  rtx addr = XEXP (operands[0], 0);
+  enum machine_mode mode = GET_MODE (addr);
+
+  if (TARGET_NO_PIC)
+    emit_call_insn (gen_call_internal (addr, operands[1],
+				       gen_rtx_REG (DImode, R_BR (0))));
+
+  /* If this is an indirect call, then we have the address of a descriptor.  */
+  else if (! symbolic_operand (addr, mode))
+    emit_insn (gen_indirect_call_pic (addr, operands[1]));
+  /* ??? This is an unsatisfying solution.  Should rethink.  */
+  else if (setjmp_operand (addr, mode))
+    emit_insn (gen_setjmp_call_pic (addr, operands[1]));
+  else
+    emit_insn (gen_call_pic (addr, operands[1]));
+
+  DONE;
+}")
+
+(define_expand "indirect_call_pic"
+  [(set (match_dup 2) (reg:DI 1))
+   (set (match_dup 3) (mem:DI (match_operand 0 "" "")))
+   (set (match_dup 4) (plus:DI (match_dup 0) (const_int 8)))
+   (set (reg:DI 1) (mem:DI (match_dup 4)))
+   (parallel [(call (mem:DI (match_dup 3)) (match_operand 1 "" ""))
+	      (use (reg:DI 1))
+	      (clobber (reg:DI 320))])
+   (set (reg:DI 1) (match_dup 2))]
+  ""
+  "
+{
+  operands[2] = gen_reg_rtx (DImode);
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+}")
+
+;; We can't save GP in a pseudo if we are calling setjmp, because pseudos
+;; won't be restored by longjmp.  For now, we save it in r4.
+
+;; ??? It would be more efficient to save this directly into a stack slot.
+;; Unfortunately, the stack slot address gets cse'd across the setjmp call
+;; because the NOTE_INSN_SETJMP note is in the wrong place.
+
+;; ??? This is an unsatisfying solution.  Should rethink.
+
+(define_expand "setjmp_call_pic"
+  [(set (match_dup 2) (reg:DI 1))
+   (parallel [(call (mem:DI (match_operand 0 "" "")) (match_operand 1 "" ""))
+	      (use (reg:DI 1))
+	      (clobber (reg:DI 320))])
+   (set (reg:DI 1) (match_dup 2))]
+  ""
+  "
+{
+  operands[2] = gen_rtx_REG (DImode, GR_REG (4));
+}")
+
+;; ??? Saving/restoring the GP register is not needed if we are calling
+;; a function in the same module.
+
+(define_expand "call_pic"
+  [(set (match_dup 2) (reg:DI 1))
+   (parallel [(call (mem:DI (match_operand 0 "" "")) (match_operand 1 "" ""))
+	      (use (reg:DI 1))
+	      (clobber (reg:DI 320))])
+   (set (reg:DI 1) (match_dup 2))]
+  ""
+  "
+{
+  operands[2] = gen_reg_rtx (DImode);
+}")
+
+;; ??? A call must end a group, otherwise, the assembler might pack it in
+;; a group with a following branch, and then the function return goes to the
+;; wrong place.  We could perhaps handle this in emit_insn_group_barriers.
+
+(define_insn "call_internal"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "bi"))
+	 (match_operand 1 "" ""))
+   (clobber (match_operand:DI 2 "register_operand" "=b"))]
+  ""
+  "br.call.sptk.many %2 = %0 ;;"
+  [(set_attr "type" "B")])
+
+(define_insn "*call_internal1"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "bi"))
+	 (match_operand 1 "" ""))
+   (use (reg:DI 1))
+   (clobber (match_operand:DI 2 "register_operand" "=b"))]
+  ""
+  "br.call.sptk.many %2 = %0 ;;"
+  [(set_attr "type" "B")])
+
+;; Subroutine call instruction returning a value.  Operand 0 is the hard
+;; register in which the value is returned.  There are three more operands, the
+;; same as the three operands of the `call' instruction (but with numbers
+;; increased by one).
+
+;; Subroutines that return `BLKmode' objects use the `call' insn.
+
+(define_expand "call_value"
+  [(use (match_operand 0 "" ""))
+   (use (match_operand:DI 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))
+   (use (match_operand 4 "" ""))]
+  ""
+  "
+{
+  /* ??? Stripping off the MEM isn't correct.  Will lose alias info.  */
+  rtx addr = XEXP (operands[1], 0);
+  enum machine_mode mode = GET_MODE (addr);
+
+  if (TARGET_NO_PIC)
+    emit_call_insn (gen_call_value_internal (operands[0], addr, operands[2],
+					     gen_rtx_REG (DImode, R_BR (0))));
+
+  /* If this is an indirect call, then we have the address of a descriptor.  */
+  else if (! symbolic_operand (addr, mode))
+    {
+      /* This is for HFA returns.  */
+      if (GET_CODE (operands[0]) == PARALLEL)
+	emit_insn (gen_indirect_call_multiple_values_pic (operands[0], addr,
+							  operands[2]));
+      else
+	emit_insn (gen_indirect_call_value_pic (operands[0], addr,
+						operands[2]));
+    }
+  /* ??? This is an unsatisfying solution.  Should rethink.  */
+  else if (setjmp_operand (addr, mode))
+    emit_insn (gen_setjmp_call_value_pic (operands[0], addr, operands[2]));
+  /* This is for HFA returns.  */
+  else if (GET_CODE (operands[0]) == PARALLEL)
+    emit_insn (gen_call_multiple_values_pic (operands[0], addr, operands[2]));
+  else
+    emit_insn (gen_call_value_pic (operands[0], addr, operands[2]));
+
+  DONE;
+}")
+
+(define_expand "indirect_call_value_pic"
+  [(set (match_dup 3) (reg:DI 1))
+   (set (match_dup 4) (mem:DI (match_operand 1 "" "")))
+   (set (match_dup 5) (plus:DI (match_dup 1) (const_int 8)))
+   (set (reg:DI 1) (mem:DI (match_dup 5)))
+   (parallel [(set (match_operand 0 "" "")
+		   (call (mem:DI (match_dup 4)) (match_operand 2 "" "")))
+	      (use (reg:DI 1))
+	      (clobber (reg:DI 320))])
+   (set (reg:DI 1) (match_dup 3))]
+  ""
+  "
+{
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DImode);
+}")
+
+(define_expand "indirect_call_multiple_values_pic"
+  [(set (match_dup 3) (reg:DI 1))
+   (set (match_dup 4) (mem:DI (match_operand 1 "" "")))
+   (set (match_dup 5) (plus:DI (match_dup 1) (const_int 8)))
+   (set (reg:DI 1) (mem:DI (match_dup 5)))
+   (match_par_dup 6 [(set (match_operand 0 "" "")
+			  (call (mem:DI (match_dup 4))
+				(match_operand 2 "" "")))
+		     (use (reg:DI 1))
+		     (clobber (reg:DI 320))])
+   (set (reg:DI 1) (match_dup 3))]
+  ""
+  "
+{
+  int count;
+  int i;
+  rtx call;
+
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DImode);
+
+  /* This code is the same as the code in call_multiple_values_pic, except
+     that op3 was replaced with op6 and op1 was replaced with op4.  */
+  call = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (DImode, operands[4]),
+		       operands[2]);
+
+  count = XVECLEN (operands[0], 0);
+  operands[6] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 2));
+  
+  XVECEXP (operands[6], 0, 0)
+    = gen_rtx_SET (VOIDmode, XEXP (XVECEXP (operands[0], 0, 0), 0), call);
+
+  XVECEXP (operands[6], 0, 1)
+    = gen_rtx_USE (DImode, gen_rtx_REG (DImode, GR_REG (1)));
+  XVECEXP (operands[6], 0, 2)
+    = gen_rtx_CLOBBER (DImode, gen_rtx_REG (DImode, BR_REG (0)));
+
+  for (i = 1; i < count; i++)
+    XVECEXP (operands[6], 0, i + 2)
+      = gen_rtx_SET (VOIDmode, XEXP (XVECEXP (operands[0], 0, i), 0), call);
+
+}")
+
+;; We can't save GP in a pseudo if we are calling setjmp, because pseudos
+;; won't be restored by longjmp.  For now, we save it in r4.
+
+;; ??? It would be more efficient to save this directly into a stack slot.
+;; Unfortunately, the stack slot address gets cse'd across the setjmp call
+;; because the NOTE_INSN_SETJMP note is in the wrong place.
+
+;; ??? This is an unsatisfying solution.  Should rethink.
+
+(define_expand "setjmp_call_value_pic"
+  [(set (match_dup 3) (reg:DI 1))
+   (parallel [(set (match_operand 0 "" "")
+		   (call (mem:DI (match_operand 1 "" ""))
+			 (match_operand 2 "" "")))
+	      (use (reg:DI 1))
+	      (clobber (reg:DI 320))])
+   (set (reg:DI 1) (match_dup 3))]
+  ""
+  "
+{
+  operands[3] = gen_rtx_REG (DImode, GR_REG (4));
+}")
+
+;; ??? Saving/restoring the GP register is not needed if we are calling
+;; a function in the same module.
+
+(define_expand "call_value_pic"
+  [(set (match_dup 3) (reg:DI 1))
+   (parallel [(set (match_operand 0 "" "")
+		   (call (mem:DI (match_operand 1 "" ""))
+			 (match_operand 2 "" "")))
+	      (use (reg:DI 1))
+	      (clobber (reg:DI 320))])
+   (set (reg:DI 1) (match_dup 3))]
+  ""
+  "
+{
+  operands[3] = gen_reg_rtx (DImode);
+}")
+
+;; ??? Saving/restoring the GP register is not needed if we are calling
+;; a function in the same module.
+
+(define_expand "call_multiple_values_pic"
+  [(set (match_dup 4) (reg:DI 1))
+   (match_par_dup 3 [(set (match_operand 0 "" "")
+			  (call (mem:DI (match_operand 1 "" ""))
+				(match_operand 2 "" "")))
+		     (use (reg:DI 1))
+		     (clobber (reg:DI 320))])
+   (set (reg:DI 1) (match_dup 4))]
+  ""
+  "
+{
+  int count;
+  int i;
+  rtx call;
+
+  operands[4] = gen_reg_rtx (DImode);
+
+  call = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (DImode, operands[1]),
+		       operands[2]);
+
+  count = XVECLEN (operands[0], 0);
+  operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 2));
+  
+  XVECEXP (operands[3], 0, 0)
+    = gen_rtx_SET (VOIDmode, XEXP (XVECEXP (operands[0], 0, 0), 0), call);
+
+  XVECEXP (operands[3], 0, 1)
+    = gen_rtx_USE (DImode, gen_rtx_REG (DImode, GR_REG (1)));
+  XVECEXP (operands[3], 0, 2)
+    = gen_rtx_CLOBBER (DImode, gen_rtx_REG (DImode, BR_REG (0)));
+
+  for (i = 1; i < count; i++)
+    XVECEXP (operands[3], 0, i + 2)
+      = gen_rtx_SET (VOIDmode, XEXP (XVECEXP (operands[0], 0, i), 0), call);
+}")
+
+;; ??? A call must end a group, otherwise, the assembler might pack it in
+;; a group with a following branch, and then the function return goes to the
+;; wrong place.  We could perhaps handle this in emit_insn_group_barriers.
+
+(define_insn "call_value_internal"
+  [(set (match_operand 0 "register_operand" "=rf")
+	(call (mem:DI (match_operand:DI 1 "call_operand" "bi"))
+	      (match_operand 2 "" "")))
+   (clobber (match_operand:DI 3 "register_operand" "=b"))]
+  ""
+  "br.call.sptk.many %3 = %1 ;;"
+  [(set_attr "type" "B")])
+
+(define_insn "*call_value_internal1"
+  [(set (match_operand 0 "register_operand" "=rf")
+	(call (mem:DI (match_operand:DI 1 "call_operand" "bi"))
+	      (match_operand 2 "" "")))
+   (use (reg:DI 1))
+   (clobber (match_operand:DI 3 "register_operand" "=b"))]
+  ""
+  "br.call.sptk.many %3 = %1 ;;"
+  [(set_attr "type" "B")])
+
+(define_insn "*call_multiple_values_internal1"
+  [(match_parallel 0 "call_multiple_values_operation"
+		   [(set (match_operand 1 "register_operand" "=rf")
+			 (call (mem:DI (match_operand:DI 2 "call_operand" "bi"))
+			       (match_operand 3 "" "")))
+		    (use (reg:DI 1))
+		    (clobber (match_operand:DI 4 "register_operand" "=b"))])]
+  ""
+  "br.call.sptk.many %4 = %2 ;;"
+  [(set_attr "type" "B")])
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+  "
+{
+  int i;
+
+  emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+}")
+
+(define_insn "return_internal"
+  [(return)
+   (use (match_operand:DI 0 "register_operand" "b"))]
+  ""
+  "br.ret.sptk.many %0"
+  [(set_attr "type" "B")])
+
+(define_insn "return"
+  [(return)]
+  "ia64_direct_return ()"
+  "br.ret.sptk.many rp"
+  [(set_attr "type" "B")])
+
+(define_insn "*eq_return"
+  [(set (pc)
+	(if_then_else (eq:CC (match_operand:CC 0 "register_operand" "c")
+			     (const_int 0))
+		      (return)
+		      (pc)))]
+  "ia64_direct_return ()"
+  "(%I0) br.ret.sptk.many rp"
+  [(set_attr "type" "B")])
+
+(define_insn "*eq_not_return"
+  [(set (pc)
+	(if_then_else (eq:CC (match_operand:CC 0 "register_operand" "c")
+			     (const_int 0))
+		      (pc)
+		      (return)))]
+  "ia64_direct_return ()"
+  "(%0) br.ret.sptk.many rp"
+  [(set_attr "type" "B")])
+
+(define_insn "*ne_return"
+  [(set (pc)
+	(if_then_else (ne (match_operand:CC 0 "register_operand" "c")
+			  (const_int 0))
+		      (return)
+		      (pc)))]
+  "ia64_direct_return ()"
+  "(%0) br.ret.sptk.many rp"
+  [(set_attr "type" "B")])
+
+(define_insn "*ne_not_return"
+  [(set (pc)
+	(if_then_else (ne (match_operand:CC 0 "register_operand" "c")
+			  (const_int 0))
+		      (pc)
+		      (return)))]
+  "ia64_direct_return ()"
+  "(%I0) br.ret.sptk.many rp"
+  [(set_attr "type" "B")])
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "br %l0"
+  [(set_attr "type" "B")])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:DI 0 "register_operand" "b"))]
+  ""
+  "br %0"
+  [(set_attr "type" "B")])
+
+(define_expand "tablejump"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand 1 "" "")]
+  ""
+  "
+{
+  rtx tmp1 = gen_reg_rtx (DImode);
+  rtx tmp2 = gen_reg_rtx (DImode);
+
+  emit_move_insn (tmp1, gen_rtx_LABEL_REF (Pmode, operands[1]));
+  emit_insn (gen_adddi3 (tmp2, operands[0], tmp1));
+  emit_jump_insn (gen_tablejump_internal (tmp2, operands[1]));
+  DONE;
+}")
+
+(define_insn "tablejump_internal"
+  [(set (pc) (match_operand:DI 0 "register_operand" "b"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "br %0"
+  [(set_attr "type" "B")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Prologue and Epilogue instructions
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  "
+{
+  ia64_expand_prologue ();
+  DONE;
+}")
+
+(define_expand "epilogue"
+  [(const_int 2)]
+  ""
+  "
+{
+  ia64_expand_epilogue ();
+  DONE;
+}")
+
+;; This prevents the scheduler from moving the SP decrement past FP-relative
+;; stack accesses.  This is the same as adddi3 plus the extra set.
+
+(define_insn "prologue_allocate_stack"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%r,r,a")
+		 (match_operand:DI 2 "reg_or_22bit_operand" "r,I,J")))
+   (set (match_operand:DI 3 "register_operand" "=r,r,r")
+	(match_dup 3))]
+  ""
+  "@
+  add %0 = %1, %2
+  adds %0 = %2, %1
+  addl %0 = %2, %1"
+  [(set_attr "type" "A")])
+
+;; This prevents the scheduler from moving the SP restore past FP-relative
+;; stack accesses.  This is similar to movdi plus the extra set.
+
+(define_insn "epilogue_deallocate_stack"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(match_operand:DI 1 "register_operand" "+r"))
+   (set (match_dup 1) (match_dup 1))]
+  ""
+  "mov %0 = %1"
+  [(set_attr "type" "A")])
+
+;; Allocate a new register frame.
+
+(define_insn "alloc"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(const_int 0)] 0))
+   (use (match_operand:DI 1 "const_int_operand" "i"))
+   (use (match_operand:DI 2 "const_int_operand" "i"))
+   (use (match_operand:DI 3 "const_int_operand" "i"))
+   (use (match_operand:DI 4 "const_int_operand" "i"))]
+  ""
+  "alloc %0 = ar.pfs, %1, %2, %3, %4"
+  [(set_attr "type" "M")])
+
+(define_insn "gr_spill"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")] 1))]
+  ""
+  "st8.spill %0 = %1%P0"
+  [(set_attr "type" "M")])
+
+(define_insn "gr_restore"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")] 2))]
+  ""
+  "ld8.fill %0 = %1%P1"
+  [(set_attr "type" "M")])
+
+(define_insn "fr_spill"
+  [(set (match_operand:XF 0 "memory_operand" "=m")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "f*e")] 3))]
+  ""
+  "stf.spill %0 = %1%P0"
+  [(set_attr "type" "M")])
+
+(define_insn "fr_restore"
+  [(set (match_operand:XF 0 "register_operand" "=f*e")
+	(unspec:XF [(match_operand:XF 1 "memory_operand" "m")] 4))]
+  ""
+  "ldf.fill %0 = %1%P1"
+  [(set_attr "type" "M")])
+
+(define_insn "pr_spill"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(const_int 0)] 5))]
+  ""
+  "mov %0 = pr"
+  [(set_attr "type" "I")])
+
+(define_insn "pr_restore"
+  [(unspec [(const_int 0)] 6)
+   (use (match_operand:DI 0 "register_operand" "r"))]
+  ""
+  "mov pr = %0, -1"
+  [(set_attr "type" "I")])
+
+;; ??? This is volatile to prevent it from being moved before a call.
+;; Should instead add a ar.pfs hard register which is call clobbered.
+
+(define_insn "pfs_restore"
+  [(unspec_volatile [(const_int 0)] 4)
+   (use (match_operand:DI 0 "register_operand" "r"))]
+  ""
+  "mov ar.pfs = %0"
+  [(set_attr "type" "I")])
+
+(define_insn "unat_spill"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(const_int 0)] 9))]
+  ""
+  "mov %0 = ar.unat"
+  [(set_attr "type" "M")])
+
+(define_insn "unat_restore"
+  [(unspec [(const_int 0)] 10)
+   (use (match_operand:DI 0 "register_operand" "r"))]
+  ""
+  "mov ar.unat = %0"
+  [(set_attr "type" "M")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Miscellaneous instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; ??? Emiting a NOP instruction isn't very useful.  This should probably
+;; be emitting ";;" to force a break in the instruction packing.
+
+;; No operation, needed in case the user uses -g but not -O.
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop 0"
+  [(set_attr "type" "unknown")])
+
+;; Pseudo instruction that prevents the scheduler from moving code above this
+;; point.
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] 1)]
+  ""
+  ""
+  [(set_attr "type" "unknown")])
+
+(define_insn "insn_group_barrier"
+  [(unspec_volatile [(const_int 0)] 2)]
+  ""
+  ";;"
+  [(set_attr "type" "S")])
+
+
+;; Non-local goto support.
+
+(define_expand "save_stack_nonlocal"
+  [(use (match_operand:OI 0 "memory_operand" ""))
+   (use (match_operand:DI 1 "register_operand" ""))]
+  ""
+  "
+{
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode,
+					 \"__ia64_save_stack_nonlocal\"),
+		     0, VOIDmode, 2, XEXP (operands[0], 0), Pmode,
+		     operands[1], Pmode);
+  DONE;
+}")
+
+(define_expand "nonlocal_goto"
+  [(use (match_operand 0 "general_operand" ""))
+   (use (match_operand 1 "general_operand" ""))
+   (use (match_operand 2 "general_operand" ""))
+   (use (match_operand 3 "general_operand" ""))]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) != REG)
+    operands[0] = force_reg (Pmode, operands[0]);
+  emit_move_insn (virtual_stack_vars_rtx, operands[0]);
+  emit_insn (gen_rtx_USE (VOIDmode, frame_pointer_rtx));
+  emit_insn (gen_rtx_USE (VOIDmode, stack_pointer_rtx));
+  emit_insn (gen_rtx_USE (VOIDmode, static_chain_rtx));
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, \"__ia64_nonlocal_goto\"),
+		     0, VOIDmode, 4,
+		     operands[0], Pmode, operands[1], Pmode,
+		     copy_to_reg (XEXP (operands[2], 0)), Pmode,
+		     operands[3], Pmode);
+  emit_barrier ();
+  DONE;
+}")
+
+;; ??? We need this because the function __ia64_nonlocal_goto can't easily
+;; access the FP which is currently stored in a local register.  Maybe move
+;; the FP to a global register to avoid this problem?
+
+(define_expand "nonlocal_goto_receiver"
+  [(use (const_int 0))]
+  ""
+  "
+{
+  emit_move_insn (frame_pointer_rtx, gen_rtx_REG (DImode, GR_REG (7)));
+  DONE;
+}")
+
+;; This flushes at least 64 bytes starting from the address pointed
+;; to by operand[0].
+
+;; ??? This should be a define expand.
+
+(define_insn "flush_cache"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "=&r")] 3)]
+  ""
+  "fc %0\;;;\;adds %0=31,%0\;;;\;fc %0\;;;\;sync.i\;srlz.i"
+  [(set_attr "type" "unknown")])
+
+(define_insn "ccv_restore_si"
+  [(unspec [(const_int 0)] 11)
+   (use (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "mov ar.ccv = %0"
+  [(set_attr "type" "M")])
+
+(define_insn "ccv_restore_di"
+  [(unspec [(const_int 0)] 11)
+   (use (match_operand:DI 0 "register_operand" "r"))]
+  ""
+  "mov ar.ccv = %0"
+  [(set_attr "type" "M")])
+
+(define_insn "mf"
+  [(unspec [(match_operand:BLK 0 "memory_operand" "m")] 12)]
+  ""
+  "mf"
+  [(set_attr "type" "M")])
+
+(define_insn "fetchadd_acq_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "fetchadd_operand" "n")] 19))]
+  ""
+  "fetchadd4.acq %0 = %1, %2"
+  [(set_attr "type" "M")])
+
+(define_insn "fetchadd_acq_di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "fetchadd_operand" "n")] 19))]
+  ""
+  "fetchadd8.acq %0 = %1, %2"
+  [(set_attr "type" "M")])
+
+(define_insn "cmpxchg_acq_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "register_operand" "r")] 13))]
+  ""
+  "cmpxchg4.acq %0 = %1, %2, ar.ccv"
+  [(set_attr "type" "M")])
+
+(define_insn "cmpxchg_acq_di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "register_operand" "r")] 13))]
+  ""
+  "cmpxchg8.acq %0 = %1, %2, ar.ccv"
+  [(set_attr "type" "M")])
+
+(define_expand "val_compare_and_swap_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "register_operand" "r")
+                    (match_operand:SI 3 "register_operand" "r")] 14))]
+  ""
+  "
+{
+  rtx tmp_reg = gen_rtx_REG (DImode, GR_REG(0));
+  rtx target = gen_rtx_MEM (BLKmode, tmp_reg);
+  RTX_UNCHANGING_P (target) = 1;
+  emit_insn (gen_ccv_restore_si (operands[2]));
+  emit_insn (gen_mf (target));
+  emit_insn (gen_cmpxchg_acq_si (operands[0], operands[1], operands[3]));
+  DONE;
+}")
+
+(define_expand "val_compare_and_swap_di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "register_operand" "r")
+                    (match_operand:DI 3 "register_operand" "r")] 14))]
+  ""
+  "
+{
+  rtx tmp_reg = gen_rtx_REG (DImode, GR_REG(0));
+  rtx target = gen_rtx_MEM (BLKmode, tmp_reg);
+  RTX_UNCHANGING_P (target) = 1;
+  emit_insn (gen_ccv_restore_di (operands[2]));
+  emit_insn (gen_mf (target));
+  emit_insn (gen_cmpxchg_acq_di (operands[0], operands[1], operands[3]));
+  DONE;
+}")
+
+(define_insn "xchgsi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (match_operand:SI 1 "memory_operand" "+m"))
+   (set (match_dup 1)
+        (match_operand:SI 2 "register_operand" "r"))]
+  ""
+  "xchg4 %0 = %1, %2"
+  [(set_attr "type" "M")])
+
+(define_insn "xchgdi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (match_operand:DI 1 "memory_operand" "+m"))
+   (set (match_dup 1)
+        (match_operand:DI 2 "register_operand" "r"))]
+  ""
+  "xchg8 %0 = %1, %2"
+  [(set_attr "type" "M")])
+
+(define_expand "lock_test_and_set_si"
+  [(set (match_operand:SI 0 "register_operand" "r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "register_operand" "r")] 16))]
+  ""
+  "
+{
+  emit_insn (gen_xchgsi (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_expand "lock_test_and_set_di"
+  [(set (match_operand:DI 0 "register_operand" "r")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "register_operand" "r")] 16))]
+  ""
+  "
+{
+  emit_insn (gen_xchgdi (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_expand "fetch_and_add_si"
+  [(set (match_operand:SI 0 "register_operand" "r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "nonmemory_operand" "")] 18))]
+  ""
+  "
+{
+  int x;
+
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      x = INTVAL(operands[2]);
+      if (x == -16 || x == -8 || x == -4 || x == -1 ||
+          x ==  16 || x ==  8 || x ==  4 || x ==  1)
+        {
+          emit_insn (gen_fetchadd_acq_si (operands[0], operands[1], operands[2]));
+          DONE;
+        }
+    }
+
+  ia64_expand_fetch_and_op (IA64_ADD_OP, SImode, operands);
+  DONE;
+}")
+
+(define_expand "fetch_and_sub_si"
+  [(set (match_operand:SI 0 "register_operand" "r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "register_operand" "r")] 18))]
+  ""
+  "
+{
+  ia64_expand_fetch_and_op (IA64_SUB_OP, SImode, operands);
+  DONE;
+}")
+
+(define_expand "fetch_and_or_si"
+  [(set (match_operand:SI 0 "register_operand" "r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "register_operand" "r")] 18))]
+  ""
+  "
+{
+  ia64_expand_fetch_and_op (IA64_OR_OP, SImode, operands);
+  DONE;
+}")
+
+(define_expand "fetch_and_and_si"
+  [(set (match_operand:SI 0 "register_operand" "r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "register_operand" "r")] 18))]
+  ""
+  "
+{
+  ia64_expand_fetch_and_op (IA64_AND_OP, SImode, operands);
+  DONE;
+}")
+
+(define_expand "fetch_and_xor_si"
+  [(set (match_operand:SI 0 "register_operand" "r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "register_operand" "r")] 18))]
+  ""
+  "
+{
+  ia64_expand_fetch_and_op (IA64_XOR_OP, SImode, operands);
+  DONE;
+}")
+
+(define_expand "fetch_and_nand_si"
+  [(set (match_operand:SI 0 "register_operand" "r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "register_operand" "r")] 18))]
+  ""
+  "
+{
+  ia64_expand_fetch_and_op (IA64_NAND_OP, SImode, operands);
+  DONE;
+}")
+
+(define_expand "fetch_and_add_di"
+  [(set (match_operand:DI 0 "register_operand" "r")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "nonmemory_operand" "")] 18))]
+  ""
+  "
+{
+  int x;
+
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      x = INTVAL(operands[2]);
+      if (x == -16 || x == -8 || x == -4 || x == -1 ||
+          x ==  16 || x ==  8 || x ==  4 || x ==  1)
+        {
+          emit_insn (gen_fetchadd_acq_di (operands[0], operands[1], operands[2]));
+          DONE;
+        }
+    }
+
+  ia64_expand_fetch_and_op (IA64_ADD_OP, DImode, operands);
+  DONE;
+}")
+
+(define_expand "fetch_and_sub_di"
+  [(set (match_operand:DI 0 "register_operand" "r")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "register_operand" "r")] 18))]
+  ""
+  "
+{
+  ia64_expand_fetch_and_op (IA64_SUB_OP, DImode, operands);
+  DONE;
+}")
+
+(define_expand "fetch_and_or_di"
+  [(set (match_operand:DI 0 "register_operand" "r")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "register_operand" "r")] 18))]
+  ""
+  "
+{
+  ia64_expand_fetch_and_op (IA64_OR_OP, DImode, operands);
+  DONE;
+}")
+
+(define_expand "fetch_and_and_di"
+  [(set (match_operand:DI 0 "register_operand" "r")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "register_operand" "r")] 18))]
+  ""
+  "
+{
+  ia64_expand_fetch_and_op (IA64_AND_OP, DImode, operands);
+  DONE;
+}")
+
+(define_expand "fetch_and_xor_di"
+  [(set (match_operand:DI 0 "register_operand" "r")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "register_operand" "r")] 18))]
+  ""
+  "
+{
+  ia64_expand_fetch_and_op (IA64_XOR_OP, DImode, operands);
+  DONE;
+}")
+
+(define_expand "fetch_and_nand_di"
+  [(set (match_operand:DI 0 "register_operand" "r")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "register_operand" "r")] 18))]
+  ""
+  "
+{
+  ia64_expand_fetch_and_op (IA64_NAND_OP, DImode, operands);
+  DONE;
+}")
+
+(define_expand "add_and_fetch_di"
+  [(set (match_operand:DI 0 "register_operand" "r")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "register_operand" "r")] 17))]
+  ""
+  "
+{
+  ia64_expand_op_and_fetch (IA64_ADD_OP, DImode, operands);
+  DONE;
+}")
+
+(define_expand "sub_and_fetch_di"
+  [(set (match_operand:DI 0 "register_operand" "r")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "register_operand" "r")] 17))]
+  ""
+  "
+{
+  ia64_expand_op_and_fetch (IA64_SUB_OP, DImode, operands);
+  DONE;
+}")
+
+(define_expand "or_and_fetch_di"
+  [(set (match_operand:DI 0 "register_operand" "r")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "register_operand" "r")] 17))]
+  ""
+  "
+{
+  ia64_expand_op_and_fetch (IA64_OR_OP, DImode, operands);
+  DONE;
+}")
+
+(define_expand "and_and_fetch_di"
+  [(set (match_operand:DI 0 "register_operand" "r")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "register_operand" "r")] 17))]
+  ""
+  "
+{
+  ia64_expand_op_and_fetch (IA64_AND_OP, DImode, operands);
+  DONE;
+}")
+
+(define_expand "xor_and_fetch_di"
+  [(set (match_operand:DI 0 "register_operand" "r")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "register_operand" "r")] 17))]
+  ""
+  "
+{
+  ia64_expand_op_and_fetch (IA64_XOR_OP, DImode, operands);
+  DONE;
+}")
+
+(define_expand "nand_and_fetch_di"
+  [(set (match_operand:DI 0 "register_operand" "r")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "register_operand" "r")] 17))]
+  ""
+  "
+{
+  ia64_expand_op_and_fetch (IA64_NAND_OP, DImode, operands);
+  DONE;
+}")
+
+(define_expand "add_and_fetch_si"
+  [(set (match_operand:SI 0 "register_operand" "r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "register_operand" "r")] 17))]
+  ""
+  "
+{
+  ia64_expand_op_and_fetch (IA64_ADD_OP, SImode, operands);
+  DONE;
+}")
+
+(define_expand "sub_and_fetch_si"
+  [(set (match_operand:SI 0 "register_operand" "r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "register_operand" "r")] 17))]
+  ""
+  "
+{
+  ia64_expand_op_and_fetch (IA64_SUB_OP, SImode, operands);
+  DONE;
+}")
+
+(define_expand "or_and_fetch_si"
+  [(set (match_operand:SI 0 "register_operand" "r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "register_operand" "r")] 17))]
+  ""
+  "
+{
+  ia64_expand_op_and_fetch (IA64_OR_OP, SImode, operands);
+  DONE;
+}")
+
+(define_expand "and_and_fetch_si"
+  [(set (match_operand:SI 0 "register_operand" "r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "register_operand" "r")] 17))]
+  ""
+  "
+{
+  ia64_expand_op_and_fetch (IA64_AND_OP, SImode, operands);
+  DONE;
+}")
+
+(define_expand "xor_and_fetch_si"
+  [(set (match_operand:SI 0 "register_operand" "r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "register_operand" "r")] 17))]
+  ""
+  "
+{
+  ia64_expand_op_and_fetch (IA64_XOR_OP, SImode, operands);
+  DONE;
+}")
+
+(define_expand "nand_and_fetch_si"
+  [(set (match_operand:SI 0 "register_operand" "r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "register_operand" "r")] 17))]
+  ""
+  "
+{
+  ia64_expand_op_and_fetch (IA64_NAND_OP, SImode, operands);
+  DONE;
+}")
diff --git a/gcc/config/ia64/ia64intrin.h b/gcc/config/ia64/ia64intrin.h
new file mode 100644
index 0000000..c75f185
--- /dev/null
+++ b/gcc/config/ia64/ia64intrin.h
@@ -0,0 +1,60 @@
+#ifndef _IA64INTRIN_H_INCLUDED
+#define _IA64INTRIN_H_INCLUDED
+
+void __sync_synchronize (void);
+
+int  __sync_val_compare_and_swap_si (int *, int, int);
+long __sync_val_compare_and_swap_di (long *, long, long);
+#define __sync_val_compare_and_swap(A,B,C) ((sizeof (*(A)) == sizeof(int)) ? __sync_val_compare_and_swap_si((int *)(A),(int)(B),(int)(C)) : __sync_val_compare_and_swap_di((long *)(A),(long)(B),(long)(C)))
+
+int  __sync_bool_compare_and_swap_si (int *, int, int);
+long __sync_bool_compare_and_swap_di (long *, long, long);
+#define __sync_bool_compare_and_swap(A,B,C) ((sizeof (*(A)) == sizeof(int)) ? __sync_bool_compare_and_swap_si((int *)(A),(int)(B),(int)(C)) : __sync_bool_compare_and_swap_di((long *)(A),(long)(B),(long)(C)))
+
+void __sync_lock_release_si (int *);
+void __sync_lock_release_di (long *);
+#define __sync_lock_release(A) ((sizeof (*(A)) == sizeof(int)) ? __sync_lock_release_si((int *)(A)) : __sync_lock_release_di((long *)(A)))
+
+int  __sync_lock_test_and_set_si (int *, int);
+long __sync_lock_test_and_set_di (long *, long);
+#define __sync_lock_test_and_set(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_lock_test_and_set_si((int *)(A),(int)(B)) : __sync_lock_test_and_set_di((long *)(A),(long)(B)))
+
+int  __sync_fetch_and_add_si  (int *, int);
+int  __sync_fetch_and_sub_si  (int *, int);
+int  __sync_fetch_and_and_si  (int *, int);
+int  __sync_fetch_and_or_si   (int *, int);
+int  __sync_fetch_and_xor_si  (int *, int);
+int  __sync_fetch_and_nand_si (int *, int);
+long __sync_fetch_and_add_di  (long *, long);
+long __sync_fetch_and_sub_di  (long *, long);
+long __sync_fetch_and_and_di  (long *, long);
+long __sync_fetch_and_or_di   (long *, long);
+long __sync_fetch_and_xor_di  (long *, long);
+long __sync_fetch_and_nand_di (long *, long);
+#define __sync_fetch_and_add(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_fetch_and_add_si((int *)(A),(int)(B)) : __sync_fetch_and_add_di((long *)(A),(long)(B)))
+#define __sync_fetch_and_sub(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_fetch_and_sub_si((int *)(A),(int)(B)) : __sync_fetch_and_sub_di((long *)(A),(long)(B)))
+#define __sync_fetch_and_and(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_fetch_and_and_si((int *)(A),(int)(B)) : __sync_fetch_and_and_di((long *)(A),(long)(B)))
+#define __sync_fetch_and_or(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_fetch_and_or_si((int *)(A),(int)(B)) : __sync_fetch_and_or_di((long *)(A),(long)(B)))
+#define __sync_fetch_and_xor(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_fetch_and_xor_si((int *)(A),(int)(B)) : __sync_fetch_and_xor_di((long *)(A),(long)(B)))
+#define __sync_fetch_and_nand(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_fetch_and_nand_si((int *)(A),(int)(B)) : __sync_fetch_and_nand_di((long *)(A),(long)(B)))
+
+int  __sync_add_and_fetch_si  (int *, int);
+int  __sync_sub_and_fetch_si  (int *, int);
+int  __sync_and_and_fetch_si  (int *, int);
+int  __sync_or_and_fetch_si   (int *, int);
+int  __sync_xor_and_fetch_si  (int *, int);
+int  __sync_nand_and_fetch_si (int *, int);
+long __sync_add_and_fetch_di  (long *, long);
+long __sync_sub_and_fetch_di  (long *, long);
+long __sync_and_and_fetch_di  (long *, long);
+long __sync_or_and_fetch_di   (long *, long);
+long __sync_xor_and_fetch_di  (long *, long);
+long __sync_nand_and_fetch_di (long *, long);
+#define __sync_add_and_fetch(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_add_and_fetch_si((int *)(A),(int)(B)) : __sync_add_and_fetch_di((long *)(A),(long)(B)))
+#define __sync_sub_and_fetch(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_sub_and_fetch_si((int *)(A),(int)(B)) : __sync_sub_and_fetch_di((long *)(A),(long)(B)))
+#define __sync_and_and_fetch(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_and_and_fetch_si((int *)(A),(int)(B)) : __sync_and_and_fetch_di((long *)(A),(long)(B)))
+#define __sync_or_and_fetch(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_or_and_fetch_si((int *)(A),(int)(B)) : __sync_or_and_fetch_di((long *)(A),(long)(B)))
+#define __sync_xor_and_fetch(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_xor_and_fetch_si((int *)(A),(int)(B)) : __sync_xor_and_fetch_di((long *)(A),(long)(B)))
+#define __sync_nand_and_fetch(A,B) ((sizeof (*(A)) == sizeof(int)) ? __sync_nand_and_fetch_si((int *)(A),(int)(B)) : __sync_nand_and_fetch_di((long *)(A),(long)(B)))
+
+#endif
diff --git a/gcc/config/ia64/lib1funcs.asm b/gcc/config/ia64/lib1funcs.asm
new file mode 100644
index 0000000..d8af8db
--- /dev/null
+++ b/gcc/config/ia64/lib1funcs.asm
@@ -0,0 +1,635 @@
+#ifdef L__divdf3
+// Compute a 64-bit IEEE double quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// farg0 holds the dividend.  farg1 holds the divisor.
+
+	.text
+	.align 16
+	.global __divdf3
+	.proc __divdf3
+__divdf3:
+	frcpa f10, p6 = farg0, farg1
+	;;
+(p6)	fma.s1 f11 = farg0, f10, f0
+(p6)	fnma.s1 f12 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f11 = f12, f11, f11
+(p6)	fma.s1 f13 = f12, f12, f0
+(p6)	fma.s1 f10 = f12, f10, f10
+	;;
+(p6)	fma.s1 f11 = f13, f11, f11
+(p6)	fma.s1 f12 = f13, f13, f0
+(p6)	fma.s1 f10 = f13, f10, f10
+	;;
+(p6)	fma.d.s1 f11 = f12, f11, f11
+(p6)	fma.s1 f10 = f12, f10, f10
+	;;
+(p6)	fnma.d.s1 f8 = farg1, f11, farg0
+	;;
+(p6)	fma.d f10 = f8, f10, f11
+	;;
+	mov fret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __divdf3
+#endif
+
+#ifdef L__divsf3
+// Compute a 32-bit IEEE float quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// farg0 holds the dividend.  farg1 holds the divisor.
+
+	.text
+	.align 16
+	.global __divsf3
+	.proc __divsf3
+__divsf3:
+	frcpa f10, p6 = farg0, farg1
+	;;
+(p6)	fma.s1 f8 = farg0, f10, f0
+(p6)	fnma.s1 f9 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fma.s1 f9 = f9, f9, f0
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fma.s1 f9 = f9, f9, f0
+	;;
+(p6)	fma.d.s1 f8 = f9, f8, f8
+	;;
+(p6)	fma.s f10 = f8, f1, f0
+	;;
+	mov fret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __divsf3
+#endif
+
+#ifdef L__divdi3
+// Compute a 64-bit integer quotient.
+//
+// Use reciprocal approximation and Newton-Raphson iteration to compute the
+// quotient.  frcpa gives 8.6 significant bits, so we need 3 iterations
+// to get more than the 64 bits of precision that we need for DImode.
+//
+// Must use max precision for the reciprocal computations to get 64 bits of
+// precision.
+//
+// r32/f8 holds the dividend.  r33/f9 holds the divisor.
+// f10 holds the value 2.0.  f11 holds the reciprocal approximation.
+// f12 is a temporary.
+
+	.text
+	.align 16
+	.global __divdi3
+	.proc __divdi3
+__divdi3:
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	;;
+	// Convert the inputs to FP, so that they won't be treated as unsigned.
+	fcvt.xf f8 = f8
+	fcvt.xf f9 = f9
+	;;
+	// Compute the reciprocal approximation.
+	frcpa f10, p6 = f8, f9
+	;;
+	// 3 Newton-Raphson iterations.
+(p6)	fma.s1 f11 = farg0, f10, f0
+(p6)	fnma.s1 f12 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f11 = f12, f11, f11
+(p6)	fma.s1 f13 = f12, f12, f0
+(p6)	fma.s1 f10 = f12, f10, f10
+	;;
+(p6)	fma.s1 f11 = f13, f11, f11
+(p6)	fma.s1 f12 = f13, f13, f0
+(p6)	fma.s1 f10 = f13, f10, f10
+	;;
+(p6)	fma.s1 f11 = f12, f11, f11
+(p6)	fma.s1 f10 = f12, f10, f10
+	;;
+(p6)	fnma.s1 f8 = f9, f11, f8
+	;;
+(p6)	fma f10 = f8, f10, f11
+	;;
+	// Round quotient to an integer.
+	fcvt.fx.trunc f8 = f10
+	;;
+	// Transfer result to GP registers.
+	getf.sig ret0 = f8
+	br.ret.sptk rp
+	;;
+	.endp __divdi3
+#endif
+
+#ifdef L__moddi3
+// Compute a 64-bit integer modulus.
+//
+// Use reciprocal approximation and Newton-Raphson iteration to compute the
+// quotient.  frcpa gives 8.6 significant bits, so we need 3 iterations
+// to get more than the 64 bits of precision that we need for DImode.
+//
+// Must use max precision for the reciprocal computations to get 64 bits of
+// precision.
+//
+// r32/f8 holds the dividend.  r33/f9 holds the divisor.
+// f10 holds the value 2.0.  f11 holds the reciprocal approximation.
+// f12 is a temporary.
+
+	.text
+	.align 16
+	.global __moddi3
+	.proc __moddi3
+__moddi3:
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	;;
+	// Convert the inputs to FP, so that they won't be treated as unsigned.
+	fcvt.xf f8 = f8
+	fcvt.xf f9 = f9
+	;;
+	// Compute the reciprocal approximation.
+	frcpa f10, p6 = f8, f9
+	;;
+	// 3 Newton-Raphson iterations.
+(p6)	fma.s1 f11 = farg0, f10, f0
+(p6)	fnma.s1 f12 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f11 = f12, f11, f11
+(p6)	fma.s1 f13 = f12, f12, f0
+(p6)	fma.s1 f10 = f12, f10, f10
+	;;
+(p6)	fma.s1 f11 = f13, f11, f11
+(p6)	fma.s1 f12 = f13, f13, f0
+(p6)	fma.s1 f10 = f13, f10, f10
+	;;
+(p6)	fma.s1 f11 = f12, f11, f11
+(p6)	fma.s1 f10 = f12, f10, f10
+	;;
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+(p6)	fma f10 = f12, f10, f11
+	;;
+	// Round quotient to an integer.
+	fcvt.fx.trunc f10 = f10
+	;;
+	// Renormalize.
+	fcvt.xf f10 = f10
+	;;
+	// Compute remainder.
+	fnma f8 = f10, f9, f8
+	;;
+	// Round remainder to an integer.
+	fcvt.fx.trunc f8 = f8
+	;;
+	// Transfer result to GP registers.
+	getf.sig ret0 = f8
+	br.ret.sptk rp
+	;;
+	.endp __moddi3
+#endif
+
+#ifdef L__udivdi3
+// Compute a 64-bit unsigned integer quotient.
+//
+// Use reciprocal approximation and Newton-Raphson iteration to compute the
+// quotient.  frcpa gives 8.6 significant bits, so we need 3 iterations
+// to get more than the 64 bits of precision that we need for DImode.
+//
+// Must use max precision for the reciprocal computations to get 64 bits of
+// precision.
+//
+// r32/f8 holds the dividend.  r33/f9 holds the divisor.
+// f10 holds the value 2.0.  f11 holds the reciprocal approximation.
+// f12 is a temporary.
+
+	.text
+	.align 16
+	.global __udivdi3
+	.proc __udivdi3
+__udivdi3:
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	;;
+	// Convert the inputs to FP, to avoid FP software-assist faults.
+	fcvt.xuf f8 = f8
+	fcvt.xuf f9 = f9
+	;;
+	// Compute the reciprocal approximation.
+	frcpa f10, p6 = f8, f9
+	;;
+	// 3 Newton-Raphson iterations.
+(p6)	fma.s1 f11 = farg0, f10, f0
+(p6)	fnma.s1 f12 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f11 = f12, f11, f11
+(p6)	fma.s1 f13 = f12, f12, f0
+(p6)	fma.s1 f10 = f12, f10, f10
+	;;
+(p6)	fma.s1 f11 = f13, f11, f11
+(p6)	fma.s1 f12 = f13, f13, f0
+(p6)	fma.s1 f10 = f13, f10, f10
+	;;
+(p6)	fma.s1 f11 = f12, f11, f11
+(p6)	fma.s1 f10 = f12, f10, f10
+	;;
+(p6)	fnma.s1 f8 = f9, f11, f8
+	;;
+(p6)	fma f10 = f8, f10, f11
+	;;
+	// Round quotient to an unsigned integer.
+	fcvt.fxu.trunc f8 = f10
+	;;
+	// Transfer result to GP registers.
+	getf.sig ret0 = f8
+	br.ret.sptk rp
+	;;
+	.endp __udivdi3
+#endif
+
+#ifdef L__umoddi3
+// Compute a 64-bit unsigned integer modulus.
+//
+// Use reciprocal approximation and Newton-Raphson iteration to compute the
+// quotient.  frcpa gives 8.6 significant bits, so we need 3 iterations
+// to get more than the 64 bits of precision that we need for DImode.
+//
+// Must use max precision for the reciprocal computations to get 64 bits of
+// precision.
+//
+// r32/f8 holds the dividend.  r33/f9 holds the divisor.
+// f10 holds the value 2.0.  f11 holds the reciprocal approximation.
+// f12 is a temporary.
+
+	.text
+	.align 16
+	.global __umoddi3
+	.proc __umoddi3
+__umoddi3:
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	;;
+	// Convert the inputs to FP, to avoid FP software assist faults.
+	fcvt.xuf f8 = f8
+	fcvt.xuf f9 = f9
+	;;
+	// Compute the reciprocal approximation.
+	frcpa f10, p6 = f8, f9
+	;;
+	// 3 Newton-Raphson iterations.
+(p6)	fma.s1 f11 = farg0, f10, f0
+(p6)	fnma.s1 f12 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f11 = f12, f11, f11
+(p6)	fma.s1 f13 = f12, f12, f0
+(p6)	fma.s1 f10 = f12, f10, f10
+	;;
+(p6)	fma.s1 f11 = f13, f11, f11
+(p6)	fma.s1 f12 = f13, f13, f0
+(p6)	fma.s1 f10 = f13, f10, f10
+	;;
+(p6)	fma.s1 f11 = f12, f11, f11
+(p6)	fma.s1 f10 = f12, f10, f10
+	;;
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+(p6)	fma f10 = f12, f10, f11
+	;;
+	// Round quotient to an unsigned integer.
+	fcvt.fxu.trunc f10 = f10
+	;;
+	// Renormalize.
+	fcvt.xuf f10 = f10
+	;;
+	// Compute remainder.
+	fnma f8 = f10, f9, f8
+	;;
+	// Round remainder to an integer.
+	fcvt.fxu.trunc f8 = f8
+	;;
+	// Transfer result to GP registers.
+	getf.sig ret0 = f8
+	br.ret.sptk rp
+	;;
+	.endp __umoddi3
+#endif
+
+#ifdef L__divsi3
+// Compute a 32-bit integer quotient.
+//
+// Use reciprocal approximation and Newton-Raphson iteration to compute the
+// quotient.  frcpa gives 8.6 significant bits, so we need 2 iterations
+// to get more than the 32 bits of precision that we need for SImode.
+//
+// ??? This is currently not used.  It needs to be fixed to be more like the
+// above DImode routines.
+//
+// ??? Check to see if the error is less than >.5ulp error.  We may need
+// some adjustment code to get precise enough results.
+//
+// ??? Should probably use max precision for the reciprocal computations.
+//
+// r32/f8 holds the dividend.  r33/f9 holds the divisor.
+// f10 holds the value 2.0.  f11 holds the reciprocal approximation.
+// f12 is a temporary.
+
+	.text
+	.align 16
+	.global __divsi3
+	.proc __divsi3
+__divsi3:
+	.regstk 2,0,0,0
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	;;
+	fcvt.xf f8 = f8
+	fcvt.xf f9 = f9
+	;;
+	frcpa f11, p6 = f8, f9
+	fadd f10 = f1, f1
+	;;
+	fnma f12 = f9, f11, f10
+	;;
+	fmpy f11 = f11, f12
+	;;
+	fnma f12 = f9, f11, f10
+	;;
+	fmpy f11 = f11, f12
+	;;
+	fmpy f8 = f8, f11
+	;;
+	fcvt.fx.trunc f8 = f8
+	;;
+	getf.sig ret0 = f8
+	br.ret.sptk rp
+	;;
+	.endp __divsi3
+#endif
+
+#ifdef L__modsi3
+// Compute a 32-bit integer modulus.
+//
+// Use reciprocal approximation and Newton-Raphson iteration to compute the
+// quotient.  frcpa gives 8.6 significant bits, so we need 2 iterations
+// to get more than the 32 bits of precision that we need for SImode.
+//
+// ??? This is currently not used.  It needs to be fixed to be more like the
+// above DImode routines.
+//
+// ??? Check to see if the error is less than >.5ulp error.  We may need
+// some adjustment code to get precise enough results.
+//
+// ??? Should probably use max precision for the reciprocal computations.
+//
+// r32/f8 holds the dividend.  r33/f9 holds the divisor.
+// f10 holds the value 2.0.  f11 holds the reciprocal approximation.
+// f12 is a temporary.
+
+	.text
+	.align 16
+	.global __modsi3
+	.proc __modsi3
+__modsi3:
+	.regstk 2,0,0,0
+	setf.sig f8 = r32
+	setf.sig f9 = r33
+	;;
+	fcvt.xf f8 = f8
+	fcvt.xf f9 = f9
+	;;
+	frcpa f11, p6 = f8, f9
+	fadd f10 = f1, f1
+	;;
+	fnma f12 = f9, f11, f10
+	;;
+	fmpy f11 = f11, f12
+	;;
+	fnma f12 = f9, f11, f10
+	;;
+	fmpy f11 = f11, f12
+	;;
+	fmpy f10 = f8, f11
+	;;
+	fcvt.fx.trunc f10 = f10
+	;;
+	fcvt.xf f10 = f10
+	;;
+	fnma f8 = f10, f9, f8
+	;;
+	fcvt.fx f8 = f8
+	;;
+	getf.sig r32 = f8
+	br.ret.sptk rp
+	;;
+	.endp __modsi3
+#endif
+
+#ifdef L__udivsi3
+// Compute a 32-bit unsigned integer quotient.
+//
+// Use reciprocal approximation and Newton-Raphson iteration to compute the
+// quotient.  frcpa gives 8.6 significant bits, so we need 2 iterations
+// to get more than the 32 bits of precision that we need for SImode.
+//
+// ??? This is currently not used.  It needs to be fixed to be more like the
+// above DImode routines.
+//
+// ??? Check to see if the error is less than >.5ulp error.  We may need
+// some adjustment code to get precise enough results.
+//
+// ??? Should probably use max precision for the reciprocal computations.
+//
+// r32/f8 holds the dividend.  r33/f9 holds the divisor.
+// f10 holds the value 2.0.  f11 holds the reciprocal approximation.
+// f12 is a temporary.
+//
+// This is the same as divsi3, except that we don't need fcvt instructions
+// before the frcpa.
+
+	.text
+	.align 16
+	.global __udivsi3
+	.proc __udivsi3
+__udivsi3:
+	.regstk 2,0,0,0
+	setf.sig f8 = r32
+	setf.sig f9 = r33
+	;;
+	frcpa f11, p6 = f8, f9
+	fadd f10 = f1, f1
+	;;
+	fnma f12 = f9, f11, f10
+	;;
+	fmpy f11 = f11, f12
+	;;
+	fnma f12 = f9, f11, f10
+	;;
+	fmpy f11 = f11, f12
+	;;
+	fmpy f8 = f8, f11
+	;;
+	fcvt.fxu.trunc f8 = f8
+	;;
+	getf.sig ret0 = f8
+	br.ret.sptk rp
+	;;
+	.endp __udivsi3
+#endif
+
+#ifdef L__umodsi3
+// Compute a 32-bit unsigned integer modulus.
+//
+// Use reciprocal approximation and Newton-Raphson iteration to compute the
+// quotient.  frcpa gives 8.6 significant bits, so we need 2 iterations
+// to get more than the 32 bits of precision that we need for SImode.
+//
+// ??? This is currently not used.  It needs to be fixed to be more like the
+// above DImode routines.
+//
+// ??? Check to see if the error is less than >.5ulp error.  We may need
+// some adjustment code to get precise enough results.
+//
+// ??? Should probably use max precision for the reciprocal computations.
+//
+// r32/f8 holds the dividend.  r33/f9 holds the divisor.
+// f10 holds the value 2.0.  f11 holds the reciprocal approximation.
+// f12 is a temporary.
+//
+// This is the same as modsi3, except that we don't need fcvt instructions
+// before the frcpa.
+
+	.text
+	.align 16
+	.global __umodsi3
+	.proc __umodsi3
+__umodsi3:
+	.regstk 2,0,0,0
+	setf.sig f8 = r32
+	setf.sig f9 = r33
+	;;
+	frcpa f11, p6 = f8, f9
+	fadd f10 = f1, f1
+	;;
+	fnma f12 = f9, f11, f10
+	;;
+	fmpy f11 = f11, f12
+	;;
+	fnma f12 = f9, f11, f10
+	;;
+	fmpy f11 = f11, f12
+	;;
+	fmpy f10 = f8, f11
+	;;
+	fcvt.fxu.trunc f10 = f10
+	;;
+	fcvt.xuf f10 = f10
+	;;
+	fnma f8 = f10, f9, f8
+	;;
+	fcvt.fxu f8 = f8
+	;;
+	getf.sig r32 = f8
+	br.ret.sptk rp
+	;;
+	.endp __umodsi3
+#endif
+
+#ifdef L__save_stack_nonlocal
+// Notes on save/restore stack nonlocal: We read ar.bsp but write
+// ar.bspstore.  This is because ar.bsp can be read at all times
+// (independent of the RSE mode) but since it's read-only we need to
+// restore the value via ar.bspstore.  This is OK because
+// ar.bsp==ar.bspstore after executing "flushrs".
+
+// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer)
+
+	.text
+	.align 16
+	.global __ia64_save_stack_nonlocal
+	.proc __ia64_save_stack_nonlocal
+__ia64_save_stack_nonlocal:
+	alloc r18=ar.pfs,2,0,0,0
+	st8 [in0]=in1,8
+	mov r19=ar.rsc
+	;;
+	flushrs
+	and r19=0x1c,r19
+	mov ar.pfs=r18
+	;;
+	mov ar.rsc=r19
+	mov r16=ar.bsp
+	adds r2=16,in0
+	;;
+	mov r17=ar.rnat
+	st8 [in0]=r16,8
+	or r19=0x3,r19
+	;;
+	st8 [in0]=r17
+	mov ar.rsc=r19
+	st8 [r2]=r18
+	mov ar.pfs=r18
+	br.ret.sptk.few rp
+	;;
+	.endp __ia64_save_stack_nonlocal
+#endif
+
+#ifdef L__nonlocal_goto
+// void __ia64_nonlocal_goto(void *fp, void *target_label, void *save_area,
+//			     void *static_chain);
+
+	.text
+	.align 16
+	.global __ia64_nonlocal_goto
+	.proc __ia64_nonlocal_goto
+__ia64_nonlocal_goto:
+	alloc r20=ar.pfs,4,0,0,0
+	mov r19=ar.rsc
+	adds r2=8,in2
+	ld8 r12=[in2],16
+	mov.ret.sptk.few.dc.dc rp = r33, .L0
+// ??? flushrs must be first instruction of a group.  Gas is unfortunately
+// putting the stop bit before the padding nop instead of after it, making
+// flushrs the first instruction of its bundle, but the second instruction
+// of its group.  We explicitly add the nop to avoid this problem.
+	nop.i 0
+	;;
+	flushrs
+	ld8 r16=[r2],16
+	and r19=0x1c,r19
+	ld8 r17=[in2]
+	;;
+	ld8 r18=[r2]
+	mov ar.rsc=r19
+	;;
+	mov ar.bspstore=r16
+	;;
+	mov ar.rnat=r17
+	mov ar.pfs=r18
+	or r19=0x3,r19
+	;;
+	loadrs
+	invala
+	mov r7=r32
+.L0:	{
+	mov ar.rsc=r19
+	mov r15=r35
+	br.ret.sptk.few rp
+	}
+	;;
+	.endp __ia64_nonlocal_goto
+#endif
diff --git a/gcc/config/ia64/linux.h b/gcc/config/ia64/linux.h
new file mode 100644
index 0000000..08b002b
--- /dev/null
+++ b/gcc/config/ia64/linux.h
@@ -0,0 +1,29 @@
+/* Definitions for ia64-linux target.  */
+#include "ia64/ia64.h"
+#include <linux.h>
+#include "sysv4.h"
+
+/* ??? Maybe this should be in sysv4.h?  */
+#define CPP_PREDEFINES "\
+-D__ia64 -D__ia64__ -D__linux -D__linux__ -D_LONGLONG -Dlinux -Dunix \
+-D__LP64__ -D__ELF__ -Asystem(linux) -Acpu(ia64) -Amachine(ia64)"
+
+/* ??? ia64 gas doesn't accept standard svr4 assembler options?  */
+#undef ASM_SPEC
+
+/* Define this for shared library support because it isn't in the main
+   linux.h file.  */
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+  %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      %{!dynamic-linker:-dynamic-linker /lib/ld-linux.so.2}} \
+      %{static:-static}}"
+
+
+#define DONT_USE_BUILTIN_SETJMP
+#define JMP_BUF_SIZE  (8 * 76)
+/* End of linux.h */
diff --git a/gcc/config/ia64/sysv4.h b/gcc/config/ia64/sysv4.h
new file mode 100644
index 0000000..6e5efdb
--- /dev/null
+++ b/gcc/config/ia64/sysv4.h
@@ -0,0 +1,248 @@
+/* Override definitions in elfos.h/svr4.h to be correct for IA64.  */
+
+/* We want DWARF2 as specified by the IA64 ABI.  */
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* Various pseudo-ops for which the Intel assembler uses non-standard
+   definitions.  */
+
+#undef ASM_BYTE_OP
+#define ASM_BYTE_OP "data1"
+
+#undef STRING_ASM_OP
+#define STRING_ASM_OP "stringz"
+
+#undef SKIP_ASM_OP
+#define SKIP_ASM_OP ".skip"
+
+#undef COMMON_ASM_OP
+#define COMMON_ASM_OP ".common"
+
+#undef ASCII_DATA_ASM_OP
+#define ASCII_DATA_ASM_OP "string"
+
+/* ??? Unfortunately, .lcomm doesn't work, because it puts things in either
+   .bss or .sbss, and we can't control the decision of which is used.  When
+   I use .lcomm, I get a cryptic "Section group has no member" error from
+   the Intel simulator.  So we must explicitly put variables in .bss
+   instead.  This matters only if we care about the Intel assembler.  */
+
+/* This is asm_output_aligned_bss from varasm.c without the ASM_GLOBALIZE_LABEL
+   call at the beginning.  */
+
+/* This is for final.c, because it is used by ASM_DECLARE_OBJECT_NAME.  */
+extern int size_directive_output;
+
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \
+do {									\
+  if (XSTR (XEXP (DECL_RTL (DECL), 0), 0)[0] == SDATA_NAME_FLAG_CHAR)	\
+    sbss_section ();							\
+  else									\
+    bss_section ();							\
+  ASM_OUTPUT_ALIGN (FILE, floor_log2 ((ALIGN) / BITS_PER_UNIT));	\
+  ASM_DECLARE_OBJECT_NAME (FILE, NAME, DECL);				\
+  ASM_OUTPUT_SKIP (FILE, SIZE ? SIZE : 1);				\
+} while (0)
+
+/* ??? Intel assembler does not allow "." in section names, so turn off
+   gnu.linkonce section support, but only when using the Intel assembler.  */
+#undef UNIQUE_SECTION_P
+#define UNIQUE_SECTION_P(DECL) (TARGET_GNU_AS ? DECL_ONE_ONLY (DECL) : 0)
+
+/* The # tells the Intel assembler that this is not a register name.
+   However, we can't emit the # in a label definition, so we set a variable
+   in ASM_OUTPUT_LABEL to control whether we want the postfix here or not.  */
+
+#undef ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(STREAM, NAME) \
+do									\
+  {									\
+    const char *real_name;						\
+    STRIP_NAME_ENCODING (real_name, NAME);				\
+    asm_fprintf (STREAM, "%U%s%s", real_name,				\
+		 (ia64_asm_output_label ? "" : "#"));			\
+  }									\
+while (0)
+
+/* Intel assembler requires both flags and type if declaring a non-predefined
+   section.  */
+#undef INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP	".section\t.init,\"ax\",\"progbits\""
+#undef FINI_SECTION_ASM_OP
+#define FINI_SECTION_ASM_OP	".section\t.fini,\"ax\",\"progbits\""
+#undef CTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP	".section\t.ctors,\"aw\",\"progbits\""
+#undef DTORS_SECTION_ASM_OP
+#define DTORS_SECTION_ASM_OP	".section\t.dtors,\"aw\",\"progbits\""
+
+/* A C statement (sans semicolon) to output an element in the table of
+   global constructors.  */
+/* Must override this to get @fptr relocation.  */
+#undef ASM_OUTPUT_CONSTRUCTOR
+#define ASM_OUTPUT_CONSTRUCTOR(FILE,NAME)				\
+  do {									\
+    ctors_section ();							\
+    fputs ("\tdata8\t @fptr(", FILE);					\
+    assemble_name (FILE, NAME);						\
+    fputs (")\n", FILE);						\
+  } while (0)
+
+/* A C statement (sans semicolon) to output an element in the table of
+   global destructors.  */
+/* Must override this to get @fptr relocation.  */
+#undef ASM_OUTPUT_DESTRUCTOR
+#define ASM_OUTPUT_DESTRUCTOR(FILE,NAME)       				\
+  do {									\
+    dtors_section ();                   				\
+    fputs ("\tdata8\t @fptr(", FILE);					\
+    assemble_name (FILE, NAME);              				\
+    fputs (")\n", FILE);						\
+  } while (0)
+
+/* svr4.h undefines this, so we need to define it here.  */
+#define DBX_REGISTER_NUMBER(REGNO) 					\
+  (IN_REGNO_P (REGNO) ? (32 + (REGNO) - IN_REG (0)) 			\
+   : LOC_REGNO_P (REGNO) ? (32 + ia64_input_regs +			\
+			    (REGNO) - LOC_REG (0))			\
+   : OUT_REGNO_P (REGNO) ? (32 + ia64_input_regs + ia64_local_regs	\
+			    + (REGNO) - OUT_REG (0))			\
+   : (REGNO) == FRAME_POINTER_REGNUM ? ia64_fp_regno 			\
+   : (REGNO))
+
+/* Things that svr4.h defines to the wrong type, because it assumes 32 bit
+   ints and 32 bit longs.  */
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* We don't want any symbol at the beginning of the file.  This is defined in
+   dbxelf.h which is included from elfos.h, so we need to undef/define it
+   here.  */
+
+#undef ASM_IDENTIFY_GCC
+#define ASM_IDENTIFY_GCC(FILE)
+
+/* We redefine this to use the ia64 .proc pseudo-op.  */
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
+do {									\
+  fputs ("\t.proc ", FILE);						\
+  assemble_name (FILE, NAME);						\
+  fputc ('\n', FILE);							\
+  ASM_OUTPUT_LABEL (FILE, NAME);					\
+} while (0)
+
+/* We redefine this to use the ia64 .endp pseudo-op.  */
+
+#undef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, NAME, DECL) \
+do {									\
+  fputs ("\t.endp ", FILE);						\
+  assemble_name (FILE, NAME);						\
+  fputc ('\n', FILE);							\
+} while (0)
+
+/* A C expression which outputs to the stdio stream STREAM some appropriate
+   text to go at the start of an assembler file.  */
+
+/* ??? Looks like almost every port, except for a few original ones, get this
+   wrong.  Must emit #NO_APP as first line of file to turn of special assembler
+   preprocessing of files.  */
+
+/* ??? Even worse, it doesn't work, because gas does not accept the tab chars
+   that dwarf2out.c emits when #NO_APP.  */
+
+/* ??? Unrelated, but dwarf2out.c emits unnecessary newlines after strings,
+   may as well fix at the same time.  */
+
+#if 0
+#undef ASM_FILE_START
+#define ASM_FILE_START(STREAM) \
+do {									\
+  fputs (ASM_APP_OFF, STREAM);						\
+  output_file_directive (STREAM, main_input_filename);			\
+} while (0)
+#endif
+
+/* Case label alignment is handled by ADDR_VEC_ALIGN now.  */
+
+#undef ASM_OUTPUT_BEFORE_CASE_LABEL
+#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE,PREFIX,NUM,TABLE)
+
+/* We override svr4.h so that we can support the sdata section.  */
+
+#undef SELECT_SECTION
+#define SELECT_SECTION(DECL,RELOC)					\
+{									\
+  if (TREE_CODE (DECL) == STRING_CST)					\
+    {									\
+      if (! flag_writable_strings)					\
+	const_section ();						\
+      else								\
+	data_section ();						\
+    }									\
+  else if (TREE_CODE (DECL) == VAR_DECL)				\
+    {									\
+      if (XSTR (XEXP (DECL_RTL (DECL), 0), 0)[0]			\
+	  == SDATA_NAME_FLAG_CHAR)					\
+        sdata_section ();						\
+      /* ??? We need the extra ! RELOC check, because the default is to \
+	 only check RELOC if flag_pic is set, and we don't set flag_pic \
+	 (yet?).  */							\
+      else if (DECL_READONLY_SECTION (DECL, RELOC) && ! (RELOC))	\
+	const_section ();						\
+      else								\
+	data_section ();						\
+    }									\
+  else									\
+    const_section ();							\
+}
+
+#undef EXTRA_SECTIONS
+#define EXTRA_SECTIONS in_const, in_ctors, in_dtors, in_sdata, in_sbss
+
+#undef EXTRA_SECTION_FUNCTIONS
+#define EXTRA_SECTION_FUNCTIONS						\
+  CONST_SECTION_FUNCTION						\
+  CTORS_SECTION_FUNCTION						\
+  DTORS_SECTION_FUNCTION						\
+  SDATA_SECTION_FUNCTION						\
+  SBSS_SECTION_FUNCTION
+
+#define SDATA_SECTION_ASM_OP ".sdata"
+
+#define SDATA_SECTION_FUNCTION						\
+void									\
+sdata_section ()							\
+{									\
+  if (in_section != in_sdata)						\
+    {									\
+      fprintf (asm_out_file, "%s\n", SDATA_SECTION_ASM_OP);		\
+      in_section = in_sdata;						\
+    }									\
+}
+
+#define SBSS_SECTION_ASM_OP ".sbss"
+
+#define SBSS_SECTION_FUNCTION						\
+void									\
+sbss_section ()								\
+{									\
+  if (in_section != in_sbss)						\
+    {									\
+      fprintf (asm_out_file, "%s\n", SBSS_SECTION_ASM_OP);		\
+      in_section = in_sbss;						\
+    }									\
+}
diff --git a/gcc/config/ia64/t-ia64 b/gcc/config/ia64/t-ia64
new file mode 100644
index 0000000..bf14c0a
--- /dev/null
+++ b/gcc/config/ia64/t-ia64
@@ -0,0 +1,41 @@
+# Name of assembly file containing libgcc1 functions.
+# This entry must be present, but it can be empty if the target does
+# not need any assembler functions to support its code generation.
+CROSS_LIBGCC1 = libgcc1-asm.a
+LIBGCC1 = libgcc1-asm.a
+LIB1ASMSRC    = ia64/lib1funcs.asm
+
+# ??? We change the names of the DImode div/mod files so that they won't
+# accidentally be overridden by libgcc2.c files.  We used to use __ia64 as
+# a prefix, now we use __ as the prefix.
+LIB1ASMFUNCS  = __divdf3 __divsf3 \
+	__divdi3 __moddi3 __udivdi3 __umoddi3 \
+	__divsi3 __modsi3 __udivsi3 __umodsi3 __save_stack_nonlocal \
+	__nonlocal_goto
+
+# ??? Hack to get -P option used when compiling lib1funcs.asm, because Intel
+# assembler does not accept # line number as a comment.
+# ??? This breaks C++ pragma interface/implementation, which is used in the
+# C++ part of libgcc2, hence it had to be disabled.  Must find some other way
+# to support the Intel assembler.
+#LIBGCC2_DEBUG_CFLAGS = -g1 -P
+
+# For svr4 we build crtbegin.o and crtend.o which serve to add begin and
+# end labels to the .ctors and .dtors section when we link using gcc.
+
+EXTRA_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o
+
+# Effectively disable the crtbegin/end rules using crtstuff.c
+T = disable
+
+# Assemble startup files.
+crtbegin.o: $(srcdir)/config/ia64/crtbegin.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -c -o crtbegin.o -x assembler-with-cpp $(srcdir)/config/ia64/crtbegin.asm
+crtend.o: $(srcdir)/config/ia64/crtend.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -c -o crtend.o -x assembler-with-cpp $(srcdir)/config/ia64/crtend.asm
+crtbeginS.o: $(srcdir)/config/ia64/crtbegin.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -DSHARED -c -o crtbeginS.o -x assembler-with-cpp $(srcdir)/config/ia64/crtbegin.asm
+crtendS.o: $(srcdir)/config/ia64/crtend.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -DSHARED -c -o crtendS.o -x assembler-with-cpp $(srcdir)/config/ia64/crtend.asm
+
+EXTRA_HEADERS = $(srcdir)/config/ia64/ia64intrin.h
diff --git a/gcc/config/ia64/xm-ia64.h b/gcc/config/ia64/xm-ia64.h
new file mode 100644
index 0000000..541d39a
--- /dev/null
+++ b/gcc/config/ia64/xm-ia64.h
@@ -0,0 +1,61 @@
+/* Definitions of target machine for IA64.
+   Copyright (C) 1999 Cygnus Solutions.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* #defines that need visibility everywhere.  */
+#define	FALSE	0
+#define	TRUE	1
+
+/* A C expression for the status code to be returned when the compiler exits
+   after serious errors.  */
+#define FATAL_EXIT_CODE 33
+
+/* A C expression for the status code to be returned when the compiler exits
+   without serious errors.  */
+#define SUCCESS_EXIT_CODE 0
+
+/* Defined if the host machine stores words of multi-word values in big-endian
+   order.  (GNU CC does not depend on the host byte ordering within a word.)  */
+#ifdef __BIG_ENDIAN__
+#define HOST_WORDS_BIG_ENDIAN
+#endif
+
+/* A C expression for the number of bits in `char' on the host machine.  */
+#define HOST_BITS_PER_CHAR 8
+
+/* A C expression for the number of bits in `short' on the host machine.  */
+#define HOST_BITS_PER_SHORT 16
+
+/* A C expression for the number of bits in `int' on the host machine.  */
+#define HOST_BITS_PER_INT 32
+
+/* ??? This depends on the as yet unimplemented ILP32 option.  */
+
+/* A C expression for the number of bits in `long' on the host machine.  */
+#define HOST_BITS_PER_LONG 64
+
+/* A C expression for the number of bits in `long long' on the host
+   machine.  */
+#define HOST_BITS_PER_LONGLONG 64
+
+/* target machine dependencies.
+   tm.h is a symbolic link to the actual target specific file.   */
+#include "tm.h"
+
+/* end of xm-ia64.h */
diff --git a/gcc/configure b/gcc/configure
index 3b0b235..cdc381e 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -4489,6 +4489,27 @@ for machine in $build $host $target; do
 	i960-*-*)			# Default i960 environment.
 		use_collect2=yes
 		;;
+	ia64*-*-elf*)
+		tm_file=ia64/elf.h
+		tmake_file="ia64/t-ia64"
+		target_cpu_default="0"
+		if test x$gas = xyes
+		then
+			target_cpu_default="${target_cpu_default}|MASK_GNU_AS"
+		fi
+		if test x$gnu_ld = xyes
+		then
+			target_cpu_default="${target_cpu_default}|MASK_GNU_LD"
+		fi
+		;;
+	ia64*-*-linux*)
+		tm_file=ia64/linux.h
+		tmake_file="t-linux ia64/t-ia64"
+		target_cpu_default="MASK_GNU_AS|MASK_GNU_LD"
+ 		if test x$enable_threads = xyes; then
+ 			thread_file='posix'
+ 		fi
+		;;
 	m32r-*-elf*)
 		extra_parts="crtinit.o crtfini.o"
 		;;
diff --git a/gcc/configure.in b/gcc/configure.in
index f34cf6e..ae5132b 100644
--- a/gcc/configure.in
+++ b/gcc/configure.in
@@ -1859,6 +1859,27 @@ changequote([,])dnl
 	i960-*-*)			# Default i960 environment.
 		use_collect2=yes
 		;;
+	ia64*-*-elf*)
+		tm_file=ia64/elf.h
+		tmake_file="ia64/t-ia64"
+		target_cpu_default="0"
+		if test x$gas = xyes
+		then
+			target_cpu_default="${target_cpu_default}|MASK_GNU_AS"
+		fi
+		if test x$gnu_ld = xyes
+		then
+			target_cpu_default="${target_cpu_default}|MASK_GNU_LD"
+		fi
+		;;
+	ia64*-*-linux*)
+		tm_file=ia64/linux.h
+		tmake_file="t-linux ia64/t-ia64"
+		target_cpu_default="MASK_GNU_AS|MASK_GNU_LD"
+ 		if test x$enable_threads = xyes; then
+ 			thread_file='posix'
+ 		fi
+		;;
 	m32r-*-elf*)
 		extra_parts="crtinit.o crtfini.o"
 		;;
-- 
2.7.4