* config/xtensa/xtensa.md (fix_return_addr): Remove.
* config/xtensa/xtensa-protos.h (xtensa_initialize_trampoline): New.
(xtensa_trampoline_template): New.
* config/xtensa/xtensa.c (MIN_FRAME_SIZE): Moved here from xtensa.h.
(xtensa_return_addr): Expand to standard Xtensa insns instead of
fix_return_addr. Get high bits from a local label.
(xtensa_trampoline_template): New function with code moved from
TRAMPOLINE_TEMPLATE in xtensa.h. Use L32R instead of CALL0 except
when using CONST16 or absolute-mode literals.
(xtensa_initialize_trampoline): New function with code moved from
INITIALIZE_TRAMPOLINE in xtensa.h. Use different offsets depending
on which trampoline version is used.
* config/xtensa/lib2funcs.S (TRAMPOLINE_SIZE): Add comment.
* config/xtensa/xtensa.h (TARGET_ABSOLUTE_LITERALS): Define.
(MIN_FRAME_SIZE): Moved to xtensa.c.
(TRAMPOLINE_TEMPLATE): Use xtensa_trampoline_template.
(TRAMPOLINE_SIZE): Two versions of the trampoline have different sizes.
(INITIALIZE_TRAMPOLINE): Use xtensa_initialize_trampoline.
* config/xtensa/ieee754-df.S (XCHAL_NO_MUL): Define.
(__muldf3): Use CALL12 instead of CALL0 to invoke .Lmul_mulsi3
helper when not using the CALL0 ABI. Change .Lmul_mulsi3 to match.
* config/xtensa/lib1funcs.asm (__umulsidi3): Likewise.
* config/xtensa/ieee754-sf.S (__mulsf3): Likewise.
From-SVN: r131108
+2007-12-20 Bob Wilson <bob.wilson@acm.org>
+
+ * config/xtensa/xtensa.md (fix_return_addr): Remove.
+ * config/xtensa/xtensa-protos.h (xtensa_initialize_trampoline): New.
+ (xtensa_trampoline_template): New.
+ * config/xtensa/xtensa.c (MIN_FRAME_SIZE): Moved here from xtensa.h.
+ (xtensa_return_addr): Expand to standard Xtensa insns instead of
+ fix_return_addr. Get high bits from a local label.
+ (xtensa_trampoline_template): New function with code moved from
+ TRAMPOLINE_TEMPLATE in xtensa.h. Use L32R instead of CALL0 except
+ when using CONST16 or absolute-mode literals.
+ (xtensa_initialize_trampoline): New function with code moved from
+ INITIALIZE_TRAMPOLINE in xtensa.h. Use different offsets depending
+ on which trampoline version is used.
+ * config/xtensa/lib2funcs.S (TRAMPOLINE_SIZE): Add comment.
+ * config/xtensa/xtensa.h (TARGET_ABSOLUTE_LITERALS): Define.
+ (MIN_FRAME_SIZE): Moved to xtensa.c.
+ (TRAMPOLINE_TEMPLATE): Use xtensa_trampoline_template.
+ (TRAMPOLINE_SIZE): Two versions of the trampoline have different sizes.
+ (INITIALIZE_TRAMPOLINE): Use xtensa_initialize_trampoline.
+ * config/xtensa/ieee754-df.S (XCHAL_NO_MUL): Define.
+ (__muldf3): Use CALL12 instead of CALL0 to invoke .Lmul_mulsi3
+ helper when not using the CALL0 ABI. Change .Lmul_mulsi3 to match.
+ * config/xtensa/lib1funcs.asm (__umulsidi3): Likewise.
+ * config/xtensa/ieee754-sf.S (__mulsf3): Likewise.
+
2007-12-20 Jakub Jelinek <jakub@redhat.com>
PR c++/34459
/* IEEE-754 double-precision functions for Xtensa
- Copyright (C) 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2007 Free Software Foundation, Inc.
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
This file is part of GCC.
#ifdef L_muldf3
/* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
__muldf3_aux:
/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
.global __muldf3
.type __muldf3, @function
__muldf3:
- leaf_entry sp, 32
#if __XTENSA_CALL0_ABI__
+ leaf_entry sp, 32
addi sp, sp, -32
s32i a12, sp, 16
s32i a13, sp, 20
s32i a14, sp, 24
s32i a15, sp, 28
+#elif XCHAL_NO_MUL
+ /* This is not really a leaf function; allocate enough stack space
+ to allow CALL12s to a helper function. */
+ leaf_entry sp, 64
+#else
+ leaf_entry sp, 32
#endif
movi a6, 0x7ff00000
muluh xh, xh, yh
add xh, xh, a9
-#else
+#else /* ! XCHAL_HAVE_MUL32_HIGH */
/* Break the inputs into 16-bit chunks and compute 16 32-bit partial
products. These partial products are:
/* Save a7 since it is needed to hold a temporary value. */
s32i a7, sp, 4
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
/* Calling a separate multiply function will clobber a0 and requires
use of a8 as a temporary, so save those values now. (The function
uses a custom ABI so nothing else needs to be saved.) */
#define set_arg_h(dst, src) \
srli dst, src, 16
+#if __XTENSA_CALL0_ABI__
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
set_arg_ ## xhalf (a13, xreg); \
set_arg_ ## yhalf (a14, yreg); \
call0 .Lmul_mulsi3; \
mov dst, a12
-#endif
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a14, xreg); \
+ set_arg_ ## yhalf (a15, yreg); \
+ call12 .Lmul_mulsi3; \
+ mov dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
/* Add pp1 and pp2 into a10 with carry-out in a9. */
do_mul(a10, xl, l, yl, h) /* pp 1 */
/* Restore values saved on the stack during the multiplication. */
l32i a7, sp, 4
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
l32i a0, sp, 0
l32i a8, sp, 8
#endif
-#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
/* Shift left by 12 bits, unless there was a carry-out from the
multiply, in which case, shift by 11 bits and increment the
movi xl, 0
j .Lmul_done
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#if XCHAL_NO_MUL
/* For Xtensa processors with no multiply hardware, this simplified
version of _mulsi3 is used for multiplying 16-bit chunks of
- the floating-point mantissas. It uses a custom ABI: the inputs
- are passed in a13 and a14, the result is returned in a12, and
- a8 and a15 are clobbered. */
+ the floating-point mantissas. When using CALL0, this function
+ uses a custom ABI: the inputs are passed in a13 and a14, the
+ result is returned in a12, and a8 and a15 are clobbered. */
.align 4
.Lmul_mulsi3:
- movi a12, 0
-.Lmul_mult_loop:
- add a15, a14, a12
- extui a8, a13, 0, 1
- movnez a12, a15, a8
-
- do_addx2 a15, a14, a12, a15
- extui a8, a13, 1, 1
- movnez a12, a15, a8
-
- do_addx4 a15, a14, a12, a15
- extui a8, a13, 2, 1
- movnez a12, a15, a8
-
- do_addx8 a15, a14, a12, a15
- extui a8, a13, 3, 1
- movnez a12, a15, a8
-
- srli a13, a13, 4
- slli a14, a14, 4
- bnez a13, .Lmul_mult_loop
- ret
-#endif /* !MUL16 && !MUL32 && !MAC16 */
+ leaf_entry sp, 16
+ .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+ movi \dst, 0
+1: add \tmp1, \src2, \dst
+ extui \tmp2, \src1, 0, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx2 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 1, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx4 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 2, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx8 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 3, 1
+ movnez \dst, \tmp1, \tmp2
+
+ srli \src1, \src1, 4
+ slli \src2, \src2, 4
+ bnez \src1, 1b
+ .endm
+#if __XTENSA_CALL0_ABI__
+ mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+ /* The result will be written into a2, so save that argument in a4. */
+ mov a4, a2
+ mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+ leaf_return
+#endif /* XCHAL_NO_MUL */
#endif /* L_muldf3 */
#ifdef L_divdf3
/* IEEE-754 single-precision functions for Xtensa
- Copyright (C) 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2007 Free Software Foundation, Inc.
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
This file is part of GCC.
#ifdef L_mulsf3
/* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
__mulsf3_aux:
/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
.global __mulsf3
.type __mulsf3, @function
__mulsf3:
- leaf_entry sp, 32
#if __XTENSA_CALL0_ABI__
+ leaf_entry sp, 32
addi sp, sp, -32
s32i a12, sp, 16
s32i a13, sp, 20
s32i a14, sp, 24
s32i a15, sp, 28
+#elif XCHAL_NO_MUL
+ /* This is not really a leaf function; allocate enough stack space
+ to allow CALL12s to a helper function. */
+ leaf_entry sp, 64
+#else
+ leaf_entry sp, 32
#endif
movi a6, 0x7f800000
chunks can be extracted when setting up the arguments to the
separate multiply function. */
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
/* Calling a separate multiply function will clobber a0 and requires
use of a8 as a temporary, so save those values now. (The function
uses a custom ABI so nothing else needs to be saved.) */
#define set_arg_h(dst, src) \
srli dst, src, 16
+#if __XTENSA_CALL0_ABI__
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
set_arg_ ## xhalf (a13, xreg); \
set_arg_ ## yhalf (a14, yreg); \
call0 .Lmul_mulsi3; \
mov dst, a12
-#endif
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a14, xreg); \
+ set_arg_ ## yhalf (a15, yreg); \
+ call12 .Lmul_mulsi3; \
+ mov dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
/* Add pp1 and pp2 into a6 with carry-out in a9. */
do_mul(a6, a2, l, a3, h) /* pp 1 */
do_mul(a2, a2, h, a3, h) /* pp 3 */
add a2, a2, a9
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
/* Restore values saved on the stack during the multiplication. */
l32i a0, sp, 0
l32i a8, sp, 4
#endif
-#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
/* Shift left by 9 bits, unless there was a carry-out from the
multiply, in which case, shift by 8 bits and increment the
slli a2, a2, 31
j .Lmul_done
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#if XCHAL_NO_MUL
/* For Xtensa processors with no multiply hardware, this simplified
version of _mulsi3 is used for multiplying 16-bit chunks of
- the floating-point mantissas. It uses a custom ABI: the inputs
- are passed in a13 and a14, the result is returned in a12, and
- a8 and a15 are clobbered. */
+ the floating-point mantissas. When using CALL0, this function
+ uses a custom ABI: the inputs are passed in a13 and a14, the
+ result is returned in a12, and a8 and a15 are clobbered. */
.align 4
.Lmul_mulsi3:
- movi a12, 0
-.Lmul_mult_loop:
- add a15, a14, a12
- extui a8, a13, 0, 1
- movnez a12, a15, a8
-
- do_addx2 a15, a14, a12, a15
- extui a8, a13, 1, 1
- movnez a12, a15, a8
-
- do_addx4 a15, a14, a12, a15
- extui a8, a13, 2, 1
- movnez a12, a15, a8
-
- do_addx8 a15, a14, a12, a15
- extui a8, a13, 3, 1
- movnez a12, a15, a8
-
- srli a13, a13, 4
- slli a14, a14, 4
- bnez a13, .Lmul_mult_loop
- ret
-#endif /* !MUL16 && !MUL32 && !MAC16 */
+ leaf_entry sp, 16
+ .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+ movi \dst, 0
+1: add \tmp1, \src2, \dst
+ extui \tmp2, \src1, 0, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx2 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 1, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx4 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 2, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx8 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 3, 1
+ movnez \dst, \tmp1, \tmp2
+
+ srli \src1, \src1, 4
+ slli \src2, \src2, 4
+ bnez \src1, 1b
+ .endm
+#if __XTENSA_CALL0_ABI__
+ mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+ /* The result will be written into a2, so save that argument in a4. */
+ mov a4, a2
+ mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+ leaf_return
+#endif /* XCHAL_NO_MUL */
#endif /* L_mulsf3 */
#ifdef L_divsf3
#ifdef L_umulsidi3
+
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
.align 4
.global __umulsidi3
.type __umulsidi3, @function
__umulsidi3:
- leaf_entry sp, 32
#if __XTENSA_CALL0_ABI__
+ leaf_entry sp, 32
addi sp, sp, -32
s32i a12, sp, 16
s32i a13, sp, 20
s32i a14, sp, 24
s32i a15, sp, 28
+#elif XCHAL_NO_MUL
+ /* This is not really a leaf function; allocate enough stack space
+ to allow CALL12s to a helper function. */
+ leaf_entry sp, 48
+#else
+ leaf_entry sp, 16
#endif
#ifdef __XTENSA_EB__
#else /* ! MUL32_HIGH */
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
/* a0 and a8 will be clobbered by calling the multiply function
but a8 is not used here and need not be saved. */
s32i a0, sp, 0
#define set_arg_h(dst, src) \
srli dst, src, 16
+#if __XTENSA_CALL0_ABI__
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
set_arg_ ## xhalf (a13, xreg); \
set_arg_ ## yhalf (a14, yreg); \
call0 .Lmul_mulsi3; \
mov dst, a12
-#endif
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a14, xreg); \
+ set_arg_ ## yhalf (a15, yreg); \
+ call12 .Lmul_mulsi3; \
+ mov dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
/* Add pp1 and pp2 into a6 with carry-out in a9. */
do_mul(a6, a2, l, a3, h) /* pp 1 */
#endif /* !MUL32_HIGH */
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
/* Restore the original return address. */
l32i a0, sp, 0
#endif
#endif
leaf_return
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#if XCHAL_NO_MUL
/* For Xtensa processors with no multiply hardware, this simplified
version of _mulsi3 is used for multiplying 16-bit chunks of
- the floating-point mantissas. It uses a custom ABI: the inputs
- are passed in a13 and a14, the result is returned in a12, and
- a8 and a15 are clobbered. */
+ the floating-point mantissas. When using CALL0, this function
+ uses a custom ABI: the inputs are passed in a13 and a14, the
+ result is returned in a12, and a8 and a15 are clobbered. */
.align 4
.Lmul_mulsi3:
- movi a12, 0
-.Lmul_mult_loop:
- add a15, a14, a12
- extui a8, a13, 0, 1
- movnez a12, a15, a8
-
- do_addx2 a15, a14, a12, a15
- extui a8, a13, 1, 1
- movnez a12, a15, a8
-
- do_addx4 a15, a14, a12, a15
- extui a8, a13, 2, 1
- movnez a12, a15, a8
-
- do_addx8 a15, a14, a12, a15
- extui a8, a13, 3, 1
- movnez a12, a15, a8
-
- srli a13, a13, 4
- slli a14, a14, 4
- bnez a13, .Lmul_mult_loop
- ret
-#endif /* !MUL16 && !MUL32 && !MAC16 */
+ leaf_entry sp, 16
+ .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+ movi \dst, 0
+1: add \tmp1, \src2, \dst
+ extui \tmp2, \src1, 0, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx2 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 1, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx4 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 2, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx8 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 3, 1
+ movnez \dst, \tmp1, \tmp2
+
+ srli \src1, \src1, 4
+ slli \src2, \src2, 4
+ bnez \src1, 1b
+ .endm
+#if __XTENSA_CALL0_ABI__
+ mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+ /* The result will be written into a2, so save that argument in a4. */
+ mov a4, a2
+ mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+ leaf_return
+#endif /* XCHAL_NO_MUL */
.size __umulsidi3, . - __umulsidi3
/* Assembly functions for libgcc2.
- Copyright (C) 2001, 2006 Free Software Foundation, Inc.
+ Copyright (C) 2001, 2006, 2007 Free Software Foundation, Inc.
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
This file is part of GCC.
make sure that the modified instructions are loaded into the instruction
fetch buffer. */
+/* Use the maximum trampoline size. Flushing a bit extra is OK. */
#define TRAMPOLINE_SIZE 60
.text
extern enum reg_class xtensa_secondary_reload_class (enum reg_class,
enum machine_mode, rtx,
int);
+extern void xtensa_initialize_trampoline (rtx, rtx, rtx);
#endif /* RTX_CODE */
#ifdef TREE_CODE
extern int xtensa_frame_pointer_required (void);
extern void xtensa_expand_prologue (void);
extern void order_regs_for_local_alloc (void);
+extern void xtensa_trampoline_template (FILE *);
#endif /* !__XTENSA_PROTOS_H__ */
}
+/* minimum frame = reg save area (4 words) plus static chain (1 word)
+ and the total number of words must be a multiple of 128 bits. */
+#define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
+
void
xtensa_expand_prologue (void)
{
rtx
xtensa_return_addr (int count, rtx frame)
{
- rtx result, retaddr;
+ rtx result, retaddr, curaddr, label;
if (count == -1)
retaddr = gen_rtx_REG (Pmode, A0_REG);
/* The 2 most-significant bits of the return address on Xtensa hold
the register window size. To get the real return address, these
- bits must be replaced with the high bits from the current PC. */
-
+ bits must be replaced with the high bits from some address in the
+ code. */
+
+ /* Get the 2 high bits of a local label in the code. */
+ curaddr = gen_reg_rtx (Pmode);
+ label = gen_label_rtx ();
+ emit_label (label);
+ LABEL_PRESERVE_P (label) = 1;
+ emit_move_insn (curaddr, gen_rtx_LABEL_REF (Pmode, label));
+ emit_insn (gen_lshrsi3 (curaddr, curaddr, GEN_INT (30)));
+ emit_insn (gen_ashlsi3 (curaddr, curaddr, GEN_INT (30)));
+
+ /* Clear the 2 high bits of the return address. */
result = gen_reg_rtx (Pmode);
- emit_insn (gen_fix_return_addr (result, retaddr));
+ emit_insn (gen_ashlsi3 (result, retaddr, GEN_INT (2)));
+ emit_insn (gen_lshrsi3 (result, result, GEN_INT (2)));
+
+ /* Combine them to get the result. */
+ emit_insn (gen_iorsi3 (result, result, curaddr));
return result;
}
> 4 * UNITS_PER_WORD);
}
+
+/* TRAMPOLINE_TEMPLATE: For Xtensa, the trampoline must perform an ENTRY
+ instruction with a minimal stack frame in order to get some free
+ registers. Once the actual call target is known, the proper stack frame
+ size is extracted from the ENTRY instruction at the target and the
+ current frame is adjusted to match. The trampoline then transfers
+ control to the instruction following the ENTRY at the target. Note:
+ this assumes that the target begins with an ENTRY instruction. */
+
+void
+xtensa_trampoline_template (FILE *stream)
+{
+ bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS);
+
+ fprintf (stream, "\t.begin no-transform\n");
+ fprintf (stream, "\tentry\tsp, %d\n", MIN_FRAME_SIZE);
+
+ if (use_call0)
+ {
+ /* Save the return address. */
+ fprintf (stream, "\tmov\ta10, a0\n");
+
+ /* Use a CALL0 instruction to skip past the constants and in the
+ process get the PC into A0. This allows PC-relative access to
+ the constants without relying on L32R. */
+ fprintf (stream, "\tcall0\t.Lskipconsts\n");
+ }
+ else
+ fprintf (stream, "\tj\t.Lskipconsts\n");
+
+ fprintf (stream, "\t.align\t4\n");
+ fprintf (stream, ".Lchainval:%s0\n", integer_asm_op (4, TRUE));
+ fprintf (stream, ".Lfnaddr:%s0\n", integer_asm_op (4, TRUE));
+ fprintf (stream, ".Lskipconsts:\n");
+
+ /* Load the static chain and function address from the trampoline. */
+ if (use_call0)
+ {
+ fprintf (stream, "\taddi\ta0, a0, 3\n");
+ fprintf (stream, "\tl32i\ta9, a0, 0\n");
+ fprintf (stream, "\tl32i\ta8, a0, 4\n");
+ }
+ else
+ {
+ fprintf (stream, "\tl32r\ta9, .Lchainval\n");
+ fprintf (stream, "\tl32r\ta8, .Lfnaddr\n");
+ }
+
+ /* Store the static chain. */
+ fprintf (stream, "\ts32i\ta9, sp, %d\n", MIN_FRAME_SIZE - 20);
+
+ /* Set the proper stack pointer value. */
+ fprintf (stream, "\tl32i\ta9, a8, 0\n");
+ fprintf (stream, "\textui\ta9, a9, %d, 12\n",
+ TARGET_BIG_ENDIAN ? 8 : 12);
+ fprintf (stream, "\tslli\ta9, a9, 3\n");
+ fprintf (stream, "\taddi\ta9, a9, %d\n", -MIN_FRAME_SIZE);
+ fprintf (stream, "\tsub\ta9, sp, a9\n");
+ fprintf (stream, "\tmovsp\tsp, a9\n");
+
+ if (use_call0)
+ /* Restore the return address. */
+ fprintf (stream, "\tmov\ta0, a10\n");
+
+ /* Jump to the instruction following the ENTRY. */
+ fprintf (stream, "\taddi\ta8, a8, 3\n");
+ fprintf (stream, "\tjx\ta8\n");
+
+ /* Pad size to a multiple of TRAMPOLINE_ALIGNMENT. */
+ if (use_call0)
+ fprintf (stream, "\t.byte\t0\n");
+ else
+ fprintf (stream, "\tnop\n");
+
+ fprintf (stream, "\t.end no-transform\n");
+}
+
+
+void
+xtensa_initialize_trampoline (rtx addr, rtx func, rtx chain)
+{
+ bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS);
+ int chain_off = use_call0 ? 12 : 8;
+ int func_off = use_call0 ? 16 : 12;
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, chain_off)), chain);
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, func_off)), func);
+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_sync_caches"),
+ 0, VOIDmode, 1, addr, Pmode);
+}
+
+
#include "gt-xtensa.h"
#define TARGET_ADDX XCHAL_HAVE_ADDX
#define TARGET_RELEASE_SYNC XCHAL_HAVE_RELEASE_SYNC
#define TARGET_S32C1I XCHAL_HAVE_S32C1I
+#define TARGET_ABSOLUTE_LITERALS XSHAL_USE_ABSOLUTE_LITERALS
#define TARGET_DEFAULT ( \
(XCHAL_HAVE_L32R ? 0 : MASK_CONST16))
/* Stack pointer value doesn't matter at exit. */
#define EXIT_IGNORE_STACK 1
-/* A C statement to output, on the stream FILE, assembler code for a
- block of data that contains the constant parts of a trampoline.
- This code should not include a label--the label is taken care of
- automatically.
-
- For Xtensa, the trampoline must perform an entry instruction with a
- minimal stack frame in order to get some free registers. Once the
- actual call target is known, the proper stack frame size is extracted
- from the entry instruction at the target and the current frame is
- adjusted to match. The trampoline then transfers control to the
- instruction following the entry at the target. Note: this assumes
- that the target begins with an entry instruction. */
-
-/* minimum frame = reg save area (4 words) plus static chain (1 word)
- and the total number of words must be a multiple of 128 bits */
-#define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
-
-#define TRAMPOLINE_TEMPLATE(STREAM) \
- do { \
- fprintf (STREAM, "\t.begin no-transform\n"); \
- fprintf (STREAM, "\tentry\tsp, %d\n", MIN_FRAME_SIZE); \
- \
- /* save the return address */ \
- fprintf (STREAM, "\tmov\ta10, a0\n"); \
- \
- /* Use a CALL0 instruction to skip past the constants and in the \
- process get the PC into A0. This allows PC-relative access to \
- the constants without relying on L32R, which may not always be \
- available. */ \
- \
- fprintf (STREAM, "\tcall0\t.Lskipconsts\n"); \
- fprintf (STREAM, "\t.align\t4\n"); \
- fprintf (STREAM, ".Lchainval:%s0\n", integer_asm_op (4, TRUE)); \
- fprintf (STREAM, ".Lfnaddr:%s0\n", integer_asm_op (4, TRUE)); \
- fprintf (STREAM, ".Lskipconsts:\n"); \
- \
- /* store the static chain */ \
- fprintf (STREAM, "\taddi\ta0, a0, 3\n"); \
- fprintf (STREAM, "\tl32i\ta8, a0, 0\n"); \
- fprintf (STREAM, "\ts32i\ta8, sp, %d\n", MIN_FRAME_SIZE - 20); \
- \
- /* set the proper stack pointer value */ \
- fprintf (STREAM, "\tl32i\ta8, a0, 4\n"); \
- fprintf (STREAM, "\tl32i\ta9, a8, 0\n"); \
- fprintf (STREAM, "\textui\ta9, a9, %d, 12\n", \
- TARGET_BIG_ENDIAN ? 8 : 12); \
- fprintf (STREAM, "\tslli\ta9, a9, 3\n"); \
- fprintf (STREAM, "\taddi\ta9, a9, %d\n", -MIN_FRAME_SIZE); \
- fprintf (STREAM, "\tsub\ta9, sp, a9\n"); \
- fprintf (STREAM, "\tmovsp\tsp, a9\n"); \
- \
- /* restore the return address */ \
- fprintf (STREAM, "\tmov\ta0, a10\n"); \
- \
- /* jump to the instruction following the entry */ \
- fprintf (STREAM, "\taddi\ta8, a8, 3\n"); \
- fprintf (STREAM, "\tjx\ta8\n"); \
- fprintf (STREAM, "\t.byte\t0\n"); \
- fprintf (STREAM, "\t.end no-transform\n"); \
- } while (0)
+#define TRAMPOLINE_TEMPLATE(STREAM) xtensa_trampoline_template (STREAM)
/* Size in bytes of the trampoline, as an integer. Make sure this is
a multiple of TRAMPOLINE_ALIGNMENT to avoid -Wpadded warnings. */
-#define TRAMPOLINE_SIZE 60
+#define TRAMPOLINE_SIZE (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS ? 60 : 52)
/* Alignment required for trampolines, in bits. */
-#define TRAMPOLINE_ALIGNMENT (32)
+#define TRAMPOLINE_ALIGNMENT 32
/* A C statement to initialize the variable parts of a trampoline. */
#define INITIALIZE_TRAMPOLINE(ADDR, FUNC, CHAIN) \
- do { \
- rtx addr = ADDR; \
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, 12)), CHAIN); \
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, 16)), FUNC); \
- emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_sync_caches"), \
- 0, VOIDmode, 1, addr, Pmode); \
- } while (0)
+ xtensa_initialize_trampoline (ADDR, FUNC, CHAIN)
+
/* If defined, a C expression that produces the machine-specific code
to setup the stack so that arbitrary frames can be accessed.
(set_attr "mode" "none")
(set_attr "length" "0")])
-;; The fix_return_addr pattern sets the high 2 bits of an address in a
-;; register to match the high bits of the current PC.
-(define_insn "fix_return_addr"
- [(set (match_operand:SI 0 "register_operand" "=a")
- (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
- UNSPEC_RET_ADDR))
- (clobber (match_scratch:SI 2 "=r"))
- (clobber (match_scratch:SI 3 "=r"))]
- ""
- "mov\t%2, a0\;call0\t0f\;.align\t4\;0:\;mov\t%3, a0\;mov\ta0, %2\;\
-srli\t%3, %3, 30\;slli\t%0, %1, 2\;ssai\t2\;src\t%0, %3, %0"
- [(set_attr "type" "multi")
- (set_attr "mode" "SI")
- (set_attr "length" "24")])
-
\f
;; Instructions for the Xtensa "boolean" option.