1 /* Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
2 Free Software Foundation, Inc.
4 This file is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 3 of the License, or (at your option)
9 This file is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with GCC; see the file COPYING3. If not see
16 <http://www.gnu.org/licenses/>. */
20 #include "coretypes.h"
24 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
37 #include "basic-block.h"
38 #include "integrate.h"
39 #include "diagnostic-core.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
52 #include "tm-constrs.h"
58 /* Builtin types, data and prototypes. */
60 enum spu_builtin_type_index
62 SPU_BTI_END_OF_PARAMS,
64 /* We create new type nodes for these. */
76 /* A 16-byte type. (Implemented with V16QI_type_node) */
79 /* These all correspond to intSI_type_node */
93 /* These correspond to the standard types */
113 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
114 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
115 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
116 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
117 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
118 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
119 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
120 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
121 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
122 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
124 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
126 struct spu_builtin_range
131 static struct spu_builtin_range spu_builtin_range[] = {
132 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
133 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
134 {0ll, 0x7fll}, /* SPU_BTI_U7 */
135 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
136 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
137 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
138 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
139 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
140 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
141 {0ll, 0xffffll}, /* SPU_BTI_U16 */
142 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
147 /* Target specific attribute specifications. */
148 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
150 /* Prototypes and external defs. */
151 static void spu_option_override (void);
152 static void spu_init_builtins (void);
153 static tree spu_builtin_decl (unsigned, bool);
154 static bool spu_scalar_mode_supported_p (enum machine_mode mode);
155 static bool spu_vector_mode_supported_p (enum machine_mode mode);
156 static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
157 static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
159 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
160 static rtx get_pic_reg (void);
161 static int need_to_save_reg (int regno, int saving);
162 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
163 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
164 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
166 static void emit_nop_for_insn (rtx insn);
167 static bool insn_clobbers_hbr (rtx insn);
168 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
169 int distance, sbitmap blocks);
170 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
171 enum machine_mode dmode);
172 static rtx get_branch_target (rtx branch);
173 static void spu_machine_dependent_reorg (void);
174 static int spu_sched_issue_rate (void);
175 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
177 static int get_pipe (rtx insn);
178 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
179 static void spu_sched_init_global (FILE *, int, int);
180 static void spu_sched_init (FILE *, int, int);
181 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
182 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
185 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
188 static int spu_naked_function_p (tree func);
189 static bool spu_pass_by_reference (cumulative_args_t cum,
190 enum machine_mode mode,
191 const_tree type, bool named);
192 static rtx spu_function_arg (cumulative_args_t cum, enum machine_mode mode,
193 const_tree type, bool named);
194 static void spu_function_arg_advance (cumulative_args_t cum,
195 enum machine_mode mode,
196 const_tree type, bool named);
197 static tree spu_build_builtin_va_list (void);
198 static void spu_va_start (tree, rtx);
199 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
200 gimple_seq * pre_p, gimple_seq * post_p);
201 static int store_with_one_insn_p (rtx mem);
202 static int mem_is_padded_component_ref (rtx x);
203 static int reg_aligned_for_addr (rtx x);
204 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
205 static void spu_asm_globalize_label (FILE * file, const char *name);
206 static bool spu_rtx_costs (rtx x, int code, int outer_code, int opno,
207 int *total, bool speed);
208 static bool spu_function_ok_for_sibcall (tree decl, tree exp);
209 static void spu_init_libfuncs (void);
210 static bool spu_return_in_memory (const_tree type, const_tree fntype);
211 static void fix_range (const char *);
212 static void spu_encode_section_info (tree, rtx, int);
213 static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
214 static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
216 static tree spu_builtin_mul_widen_even (tree);
217 static tree spu_builtin_mul_widen_odd (tree);
218 static tree spu_builtin_mask_for_load (void);
219 static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
220 static bool spu_vector_alignment_reachable (const_tree, bool);
221 static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
222 static enum machine_mode spu_addr_space_address_mode (addr_space_t);
223 static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
224 static rtx spu_addr_space_convert (rtx, tree, tree);
225 static int spu_sms_res_mii (struct ddg *g);
226 static unsigned int spu_section_type_flags (tree, const char *, int);
227 static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
228 static void spu_unique_section (tree, int);
229 static rtx spu_expand_load (rtx, rtx, rtx, int);
230 static void spu_trampoline_init (rtx, tree, rtx);
231 static void spu_conditional_register_usage (void);
232 static bool spu_ref_may_alias_errno (ao_ref *);
233 static void spu_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
234 HOST_WIDE_INT, tree);
236 /* Which instruction set architecture to use. */
238 /* Which cpu are we tuning for. */
241 /* The hardware requires 8 insns between a hint and the branch it
242 effects. This variable describes how many rtl instructions the
243 compiler needs to see before inserting a hint, and then the compiler
244 will insert enough nops to make it at least 8 insns. The default is
245 for the compiler to allow up to 2 nops be emitted. The nops are
246 inserted in pairs, so we round down. */
247 int spu_hint_dist = (8*4) - (2*4);
262 IC_POOL, /* constant pool */
263 IC_IL1, /* one il* instruction */
264 IC_IL2, /* both ilhu and iohl instructions */
265 IC_IL1s, /* one il* instruction */
266 IC_IL2s, /* both ilhu and iohl instructions */
267 IC_FSMBI, /* the fsmbi instruction */
268 IC_CPAT, /* one of the c*d instructions */
269 IC_FSMBI2 /* fsmbi plus 1 other instruction */
272 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
273 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
274 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
275 static enum immediate_class classify_immediate (rtx op,
276 enum machine_mode mode);
278 static enum machine_mode spu_unwind_word_mode (void);
280 static enum machine_mode
281 spu_libgcc_cmp_return_mode (void);
283 static enum machine_mode
284 spu_libgcc_shift_count_mode (void);
286 /* Pointer mode for __ea references. */
287 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
290 /* Table of machine attributes. */
291 static const struct attribute_spec spu_attribute_table[] =
293 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
294 affects_type_identity } */
295 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
297 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
299 { NULL, 0, 0, false, false, false, NULL, false }
302 /* TARGET overrides. */
304 #undef TARGET_ADDR_SPACE_POINTER_MODE
305 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
307 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
308 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
310 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
311 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
312 spu_addr_space_legitimate_address_p
314 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
315 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
317 #undef TARGET_ADDR_SPACE_SUBSET_P
318 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
320 #undef TARGET_ADDR_SPACE_CONVERT
321 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
323 #undef TARGET_INIT_BUILTINS
324 #define TARGET_INIT_BUILTINS spu_init_builtins
325 #undef TARGET_BUILTIN_DECL
326 #define TARGET_BUILTIN_DECL spu_builtin_decl
328 #undef TARGET_EXPAND_BUILTIN
329 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
331 #undef TARGET_UNWIND_WORD_MODE
332 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
334 #undef TARGET_LEGITIMIZE_ADDRESS
335 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
337 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
338 and .quad for the debugger. When it is known that the assembler is fixed,
339 these can be removed. */
340 #undef TARGET_ASM_UNALIGNED_SI_OP
341 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
343 #undef TARGET_ASM_ALIGNED_DI_OP
344 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
346 /* The .8byte directive doesn't seem to work well for a 32 bit
348 #undef TARGET_ASM_UNALIGNED_DI_OP
349 #define TARGET_ASM_UNALIGNED_DI_OP NULL
351 #undef TARGET_RTX_COSTS
352 #define TARGET_RTX_COSTS spu_rtx_costs
354 #undef TARGET_ADDRESS_COST
355 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
357 #undef TARGET_SCHED_ISSUE_RATE
358 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
360 #undef TARGET_SCHED_INIT_GLOBAL
361 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
363 #undef TARGET_SCHED_INIT
364 #define TARGET_SCHED_INIT spu_sched_init
366 #undef TARGET_SCHED_VARIABLE_ISSUE
367 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
369 #undef TARGET_SCHED_REORDER
370 #define TARGET_SCHED_REORDER spu_sched_reorder
372 #undef TARGET_SCHED_REORDER2
373 #define TARGET_SCHED_REORDER2 spu_sched_reorder
375 #undef TARGET_SCHED_ADJUST_COST
376 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
378 #undef TARGET_ATTRIBUTE_TABLE
379 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
381 #undef TARGET_ASM_INTEGER
382 #define TARGET_ASM_INTEGER spu_assemble_integer
384 #undef TARGET_SCALAR_MODE_SUPPORTED_P
385 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
387 #undef TARGET_VECTOR_MODE_SUPPORTED_P
388 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
390 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
391 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
393 #undef TARGET_ASM_GLOBALIZE_LABEL
394 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
396 #undef TARGET_PASS_BY_REFERENCE
397 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
399 #undef TARGET_FUNCTION_ARG
400 #define TARGET_FUNCTION_ARG spu_function_arg
402 #undef TARGET_FUNCTION_ARG_ADVANCE
403 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
405 #undef TARGET_MUST_PASS_IN_STACK
406 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
408 #undef TARGET_BUILD_BUILTIN_VA_LIST
409 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
411 #undef TARGET_EXPAND_BUILTIN_VA_START
412 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
414 static void spu_setup_incoming_varargs (cumulative_args_t cum,
415 enum machine_mode mode,
416 tree type, int *pretend_size,
418 #undef TARGET_SETUP_INCOMING_VARARGS
419 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
421 #undef TARGET_MACHINE_DEPENDENT_REORG
422 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
424 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
425 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
427 #undef TARGET_INIT_LIBFUNCS
428 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
430 #undef TARGET_RETURN_IN_MEMORY
431 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
433 #undef TARGET_ENCODE_SECTION_INFO
434 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
436 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
437 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
439 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
440 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
442 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
443 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
445 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
446 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
448 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
449 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
451 #undef TARGET_LIBGCC_CMP_RETURN_MODE
452 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
454 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
455 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
457 #undef TARGET_SCHED_SMS_RES_MII
458 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
460 #undef TARGET_SECTION_TYPE_FLAGS
461 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
463 #undef TARGET_ASM_SELECT_SECTION
464 #define TARGET_ASM_SELECT_SECTION spu_select_section
466 #undef TARGET_ASM_UNIQUE_SECTION
467 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
469 #undef TARGET_LEGITIMATE_ADDRESS_P
470 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
472 #undef TARGET_LEGITIMATE_CONSTANT_P
473 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
475 #undef TARGET_TRAMPOLINE_INIT
476 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
478 #undef TARGET_OPTION_OVERRIDE
479 #define TARGET_OPTION_OVERRIDE spu_option_override
481 #undef TARGET_CONDITIONAL_REGISTER_USAGE
482 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
484 #undef TARGET_REF_MAY_ALIAS_ERRNO
485 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
487 #undef TARGET_ASM_OUTPUT_MI_THUNK
488 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
489 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
490 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
492 /* Variable tracking should be run after all optimizations which
493 change order of insns. It also needs a valid CFG. */
494 #undef TARGET_DELAY_VARTRACK
495 #define TARGET_DELAY_VARTRACK true
497 struct gcc_target targetm = TARGET_INITIALIZER;
499 /* Define the structure for the machine field in struct function. */
500 struct GTY(()) machine_function
502 /* Register to use for PIC accesses. */
506 /* How to allocate a 'struct machine_function'. */
507 static struct machine_function *
508 spu_init_machine_status (void)
510 return ggc_alloc_cleared_machine_function ();
513 /* Implement TARGET_OPTION_OVERRIDE. */
515 spu_option_override (void)
517 /* Set up function hooks. */
518 init_machine_status = spu_init_machine_status;
520 /* Small loops will be unpeeled at -O3. For SPU it is more important
521 to keep code small by default. */
522 if (!flag_unroll_loops && !flag_peel_loops)
523 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
524 global_options.x_param_values,
525 global_options_set.x_param_values);
527 flag_omit_frame_pointer = 1;
529 /* Functions must be 8 byte aligned so we correctly handle dual issue */
530 if (align_functions < 8)
533 spu_hint_dist = 8*4 - spu_max_nops*4;
534 if (spu_hint_dist < 0)
537 if (spu_fixed_range_string)
538 fix_range (spu_fixed_range_string);
540 /* Determine processor architectural level. */
543 if (strcmp (&spu_arch_string[0], "cell") == 0)
544 spu_arch = PROCESSOR_CELL;
545 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
546 spu_arch = PROCESSOR_CELLEDP;
548 error ("bad value (%s) for -march= switch", spu_arch_string);
551 /* Determine processor to tune for. */
554 if (strcmp (&spu_tune_string[0], "cell") == 0)
555 spu_tune = PROCESSOR_CELL;
556 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
557 spu_tune = PROCESSOR_CELLEDP;
559 error ("bad value (%s) for -mtune= switch", spu_tune_string);
562 /* Change defaults according to the processor architecture. */
563 if (spu_arch == PROCESSOR_CELLEDP)
565 /* If no command line option has been otherwise specified, change
566 the default to -mno-safe-hints on celledp -- only the original
567 Cell/B.E. processors require this workaround. */
568 if (!(target_flags_explicit & MASK_SAFE_HINTS))
569 target_flags &= ~MASK_SAFE_HINTS;
572 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
575 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
576 struct attribute_spec.handler. */
578 /* True if MODE is valid for the target. By "valid", we mean able to
579 be manipulated in non-trivial ways. In particular, this means all
580 the arithmetic is supported. */
582 spu_scalar_mode_supported_p (enum machine_mode mode)
600 /* Similarly for vector modes. "Supported" here is less strict. At
601 least some operations are supported; need to check optabs or builtins
602 for further details. */
604 spu_vector_mode_supported_p (enum machine_mode mode)
621 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
622 least significant bytes of the outer mode. This function returns
623 TRUE for the SUBREG's where this is correct. */
625 valid_subreg (rtx op)
627 enum machine_mode om = GET_MODE (op);
628 enum machine_mode im = GET_MODE (SUBREG_REG (op));
629 return om != VOIDmode && im != VOIDmode
630 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
631 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
632 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
635 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
636 and adjust the start offset. */
638 adjust_operand (rtx op, HOST_WIDE_INT * start)
640 enum machine_mode mode;
642 /* Strip any paradoxical SUBREG. */
643 if (GET_CODE (op) == SUBREG
644 && (GET_MODE_BITSIZE (GET_MODE (op))
645 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
649 GET_MODE_BITSIZE (GET_MODE (op)) -
650 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
651 op = SUBREG_REG (op);
653 /* If it is smaller than SI, assure a SUBREG */
654 op_size = GET_MODE_BITSIZE (GET_MODE (op));
658 *start += 32 - op_size;
661 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
662 mode = mode_for_size (op_size, MODE_INT, 0);
663 if (mode != GET_MODE (op))
664 op = gen_rtx_SUBREG (mode, op, 0);
669 spu_expand_extv (rtx ops[], int unsignedp)
671 rtx dst = ops[0], src = ops[1];
672 HOST_WIDE_INT width = INTVAL (ops[2]);
673 HOST_WIDE_INT start = INTVAL (ops[3]);
674 HOST_WIDE_INT align_mask;
675 rtx s0, s1, mask, r0;
677 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
681 /* First, determine if we need 1 TImode load or 2. We need only 1
682 if the bits being extracted do not cross the alignment boundary
683 as determined by the MEM and its address. */
685 align_mask = -MEM_ALIGN (src);
686 if ((start & align_mask) == ((start + width - 1) & align_mask))
688 /* Alignment is sufficient for 1 load. */
689 s0 = gen_reg_rtx (TImode);
690 r0 = spu_expand_load (s0, 0, src, start / 8);
693 emit_insn (gen_rotqby_ti (s0, s0, r0));
698 s0 = gen_reg_rtx (TImode);
699 s1 = gen_reg_rtx (TImode);
700 r0 = spu_expand_load (s0, s1, src, start / 8);
703 gcc_assert (start + width <= 128);
706 rtx r1 = gen_reg_rtx (SImode);
707 mask = gen_reg_rtx (TImode);
708 emit_move_insn (mask, GEN_INT (-1));
709 emit_insn (gen_rotqby_ti (s0, s0, r0));
710 emit_insn (gen_rotqby_ti (s1, s1, r0));
711 if (GET_CODE (r0) == CONST_INT)
712 r1 = GEN_INT (INTVAL (r0) & 15);
714 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
715 emit_insn (gen_shlqby_ti (mask, mask, r1));
716 emit_insn (gen_selb (s0, s1, s0, mask));
721 else if (GET_CODE (src) == SUBREG)
723 rtx r = SUBREG_REG (src);
724 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
725 s0 = gen_reg_rtx (TImode);
726 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
727 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
729 emit_move_insn (s0, src);
733 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
734 s0 = gen_reg_rtx (TImode);
735 emit_move_insn (s0, src);
738 /* Now s0 is TImode and contains the bits to extract at start. */
741 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
744 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
746 emit_move_insn (dst, s0);
750 spu_expand_insv (rtx ops[])
752 HOST_WIDE_INT width = INTVAL (ops[1]);
753 HOST_WIDE_INT start = INTVAL (ops[2]);
754 HOST_WIDE_INT maskbits;
755 enum machine_mode dst_mode;
756 rtx dst = ops[0], src = ops[3];
763 if (GET_CODE (ops[0]) == MEM)
764 dst = gen_reg_rtx (TImode);
766 dst = adjust_operand (dst, &start);
767 dst_mode = GET_MODE (dst);
768 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
770 if (CONSTANT_P (src))
772 enum machine_mode m =
773 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
774 src = force_reg (m, convert_to_mode (m, src, 0));
776 src = adjust_operand (src, 0);
778 mask = gen_reg_rtx (dst_mode);
779 shift_reg = gen_reg_rtx (dst_mode);
780 shift = dst_size - start - width;
782 /* It's not safe to use subreg here because the compiler assumes
783 that the SUBREG_REG is right justified in the SUBREG. */
784 convert_move (shift_reg, src, 1);
791 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
794 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
797 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
809 maskbits = (-1ll << (32 - width - start));
811 maskbits += (1ll << (32 - start));
812 emit_move_insn (mask, GEN_INT (maskbits));
815 maskbits = (-1ll << (64 - width - start));
817 maskbits += (1ll << (64 - start));
818 emit_move_insn (mask, GEN_INT (maskbits));
822 unsigned char arr[16];
824 memset (arr, 0, sizeof (arr));
825 arr[i] = 0xff >> (start & 7);
826 for (i++; i <= (start + width - 1) / 8; i++)
828 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
829 emit_move_insn (mask, array_to_constant (TImode, arr));
835 if (GET_CODE (ops[0]) == MEM)
837 rtx low = gen_reg_rtx (SImode);
838 rtx rotl = gen_reg_rtx (SImode);
839 rtx mask0 = gen_reg_rtx (TImode);
845 addr = force_reg (Pmode, XEXP (ops[0], 0));
846 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
847 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
848 emit_insn (gen_negsi2 (rotl, low));
849 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
850 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
851 mem = change_address (ops[0], TImode, addr0);
852 set_mem_alias_set (mem, 0);
853 emit_move_insn (dst, mem);
854 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
855 if (start + width > MEM_ALIGN (ops[0]))
857 rtx shl = gen_reg_rtx (SImode);
858 rtx mask1 = gen_reg_rtx (TImode);
859 rtx dst1 = gen_reg_rtx (TImode);
861 addr1 = plus_constant (Pmode, addr, 16);
862 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
863 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
864 emit_insn (gen_shlqby_ti (mask1, mask, shl));
865 mem1 = change_address (ops[0], TImode, addr1);
866 set_mem_alias_set (mem1, 0);
867 emit_move_insn (dst1, mem1);
868 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
869 emit_move_insn (mem1, dst1);
871 emit_move_insn (mem, dst);
874 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
879 spu_expand_block_move (rtx ops[])
881 HOST_WIDE_INT bytes, align, offset;
882 rtx src, dst, sreg, dreg, target;
884 if (GET_CODE (ops[2]) != CONST_INT
885 || GET_CODE (ops[3]) != CONST_INT
886 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
889 bytes = INTVAL (ops[2]);
890 align = INTVAL (ops[3]);
900 for (offset = 0; offset + 16 <= bytes; offset += 16)
902 dst = adjust_address (ops[0], V16QImode, offset);
903 src = adjust_address (ops[1], V16QImode, offset);
904 emit_move_insn (dst, src);
909 unsigned char arr[16] = { 0 };
910 for (i = 0; i < bytes - offset; i++)
912 dst = adjust_address (ops[0], V16QImode, offset);
913 src = adjust_address (ops[1], V16QImode, offset);
914 mask = gen_reg_rtx (V16QImode);
915 sreg = gen_reg_rtx (V16QImode);
916 dreg = gen_reg_rtx (V16QImode);
917 target = gen_reg_rtx (V16QImode);
918 emit_move_insn (mask, array_to_constant (V16QImode, arr));
919 emit_move_insn (dreg, dst);
920 emit_move_insn (sreg, src);
921 emit_insn (gen_selb (target, dreg, sreg, mask));
922 emit_move_insn (dst, target);
930 { SPU_EQ, SPU_GT, SPU_GTU };
932 int spu_comp_icode[12][3] = {
933 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
934 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
935 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
936 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
937 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
938 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
939 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
940 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
941 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
942 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
943 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
944 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
947 /* Generate a compare for CODE. Return a brand-new rtx that represents
948 the result of the compare. GCC can figure this out too if we don't
949 provide all variations of compares, but GCC always wants to use
950 WORD_MODE, we can generate better code in most cases if we do it
953 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
955 int reverse_compare = 0;
956 int reverse_test = 0;
957 rtx compare_result, eq_result;
958 rtx comp_rtx, eq_rtx;
959 enum machine_mode comp_mode;
960 enum machine_mode op_mode;
961 enum spu_comp_code scode, eq_code;
962 enum insn_code ior_code;
963 enum rtx_code code = GET_CODE (cmp);
964 rtx op0 = XEXP (cmp, 0);
965 rtx op1 = XEXP (cmp, 1);
969 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
970 and so on, to keep the constant in operand 1. */
971 if (GET_CODE (op1) == CONST_INT)
973 HOST_WIDE_INT val = INTVAL (op1) - 1;
974 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
998 /* However, if we generate an integer result, performing a reverse test
999 would require an extra negation, so avoid that where possible. */
1000 if (GET_CODE (op1) == CONST_INT && is_set == 1)
1002 HOST_WIDE_INT val = INTVAL (op1) + 1;
1003 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
1007 op1 = GEN_INT (val);
1011 op1 = GEN_INT (val);
1020 op_mode = GET_MODE (op0);
1026 if (HONOR_NANS (op_mode))
1028 reverse_compare = 0;
1035 reverse_compare = 1;
1041 if (HONOR_NANS (op_mode))
1043 reverse_compare = 1;
1050 reverse_compare = 0;
1055 reverse_compare = 1;
1060 reverse_compare = 1;
1065 reverse_compare = 0;
1070 reverse_compare = 1;
1075 reverse_compare = 0;
1121 comp_mode = op_mode;
1125 comp_mode = op_mode;
1129 comp_mode = op_mode;
1133 comp_mode = V4SImode;
1137 comp_mode = V2DImode;
1144 if (GET_MODE (op1) == DFmode
1145 && (scode != SPU_GT && scode != SPU_EQ))
1148 if (is_set == 0 && op1 == const0_rtx
1149 && (GET_MODE (op0) == SImode
1150 || GET_MODE (op0) == HImode
1151 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
1153 /* Don't need to set a register with the result when we are
1154 comparing against zero and branching. */
1155 reverse_test = !reverse_test;
1156 compare_result = op0;
1160 compare_result = gen_reg_rtx (comp_mode);
1162 if (reverse_compare)
1169 if (spu_comp_icode[index][scode] == 0)
1172 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1174 op0 = force_reg (op_mode, op0);
1175 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1177 op1 = force_reg (op_mode, op1);
1178 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1182 emit_insn (comp_rtx);
1186 eq_result = gen_reg_rtx (comp_mode);
1187 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1192 ior_code = optab_handler (ior_optab, comp_mode);
1193 gcc_assert (ior_code != CODE_FOR_nothing);
1194 emit_insn (GEN_FCN (ior_code)
1195 (compare_result, compare_result, eq_result));
1204 /* We don't have branch on QI compare insns, so we convert the
1205 QI compare result to a HI result. */
1206 if (comp_mode == QImode)
1208 rtx old_res = compare_result;
1209 compare_result = gen_reg_rtx (HImode);
1211 emit_insn (gen_extendqihi2 (compare_result, old_res));
1215 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1217 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1219 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
1220 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1221 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1224 else if (is_set == 2)
1226 rtx target = operands[0];
1227 int compare_size = GET_MODE_BITSIZE (comp_mode);
1228 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1229 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1231 rtx op_t = operands[2];
1232 rtx op_f = operands[3];
1234 /* The result of the comparison can be SI, HI or QI mode. Create a
1235 mask based on that result. */
1236 if (target_size > compare_size)
1238 select_mask = gen_reg_rtx (mode);
1239 emit_insn (gen_extend_compare (select_mask, compare_result));
1241 else if (target_size < compare_size)
1243 gen_rtx_SUBREG (mode, compare_result,
1244 (compare_size - target_size) / BITS_PER_UNIT);
1245 else if (comp_mode != mode)
1246 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1248 select_mask = compare_result;
1250 if (GET_MODE (target) != GET_MODE (op_t)
1251 || GET_MODE (target) != GET_MODE (op_f))
1255 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1257 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1261 rtx target = operands[0];
1263 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1264 gen_rtx_NOT (comp_mode, compare_result)));
1265 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1266 emit_insn (gen_extendhisi2 (target, compare_result));
1267 else if (GET_MODE (target) == SImode
1268 && GET_MODE (compare_result) == QImode)
1269 emit_insn (gen_extend_compare (target, compare_result));
1271 emit_move_insn (target, compare_result);
1276 const_double_to_hwint (rtx x)
1280 if (GET_MODE (x) == SFmode)
1282 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1283 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1285 else if (GET_MODE (x) == DFmode)
1288 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1289 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1291 val = (val << 32) | (l[1] & 0xffffffff);
1299 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1303 gcc_assert (mode == SFmode || mode == DFmode);
1306 tv[0] = (v << 32) >> 32;
1307 else if (mode == DFmode)
1309 tv[1] = (v << 32) >> 32;
1312 real_from_target (&rv, tv, mode);
1313 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1317 print_operand_address (FILE * file, register rtx addr)
1322 if (GET_CODE (addr) == AND
1323 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1324 && INTVAL (XEXP (addr, 1)) == -16)
1325 addr = XEXP (addr, 0);
1327 switch (GET_CODE (addr))
1330 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1334 reg = XEXP (addr, 0);
1335 offset = XEXP (addr, 1);
1336 if (GET_CODE (offset) == REG)
1338 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1339 reg_names[REGNO (offset)]);
1341 else if (GET_CODE (offset) == CONST_INT)
1343 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1344 INTVAL (offset), reg_names[REGNO (reg)]);
1354 output_addr_const (file, addr);
1364 print_operand (FILE * file, rtx x, int code)
1366 enum machine_mode mode = GET_MODE (x);
1368 unsigned char arr[16];
1369 int xcode = GET_CODE (x);
1371 if (GET_MODE (x) == VOIDmode)
1374 case 'L': /* 128 bits, signed */
1375 case 'm': /* 128 bits, signed */
1376 case 'T': /* 128 bits, signed */
1377 case 't': /* 128 bits, signed */
1380 case 'K': /* 64 bits, signed */
1381 case 'k': /* 64 bits, signed */
1382 case 'D': /* 64 bits, signed */
1383 case 'd': /* 64 bits, signed */
1386 case 'J': /* 32 bits, signed */
1387 case 'j': /* 32 bits, signed */
1388 case 's': /* 32 bits, signed */
1389 case 'S': /* 32 bits, signed */
1396 case 'j': /* 32 bits, signed */
1397 case 'k': /* 64 bits, signed */
1398 case 'm': /* 128 bits, signed */
1399 if (xcode == CONST_INT
1400 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1402 gcc_assert (logical_immediate_p (x, mode));
1403 constant_to_array (mode, x, arr);
1404 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1405 val = trunc_int_for_mode (val, SImode);
1406 switch (which_logical_immediate (val))
1411 fprintf (file, "h");
1414 fprintf (file, "b");
1424 case 'J': /* 32 bits, signed */
1425 case 'K': /* 64 bits, signed */
1426 case 'L': /* 128 bits, signed */
1427 if (xcode == CONST_INT
1428 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1430 gcc_assert (logical_immediate_p (x, mode)
1431 || iohl_immediate_p (x, mode));
1432 constant_to_array (mode, x, arr);
1433 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1434 val = trunc_int_for_mode (val, SImode);
1435 switch (which_logical_immediate (val))
1441 val = trunc_int_for_mode (val, HImode);
1444 val = trunc_int_for_mode (val, QImode);
1449 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1455 case 't': /* 128 bits, signed */
1456 case 'd': /* 64 bits, signed */
1457 case 's': /* 32 bits, signed */
1460 enum immediate_class c = classify_immediate (x, mode);
1464 constant_to_array (mode, x, arr);
1465 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1466 val = trunc_int_for_mode (val, SImode);
1467 switch (which_immediate_load (val))
1472 fprintf (file, "a");
1475 fprintf (file, "h");
1478 fprintf (file, "hu");
1485 constant_to_array (mode, x, arr);
1486 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1488 fprintf (file, "b");
1490 fprintf (file, "h");
1492 fprintf (file, "w");
1494 fprintf (file, "d");
1497 if (xcode == CONST_VECTOR)
1499 x = CONST_VECTOR_ELT (x, 0);
1500 xcode = GET_CODE (x);
1502 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1503 fprintf (file, "a");
1504 else if (xcode == HIGH)
1505 fprintf (file, "hu");
1519 case 'T': /* 128 bits, signed */
1520 case 'D': /* 64 bits, signed */
1521 case 'S': /* 32 bits, signed */
1524 enum immediate_class c = classify_immediate (x, mode);
1528 constant_to_array (mode, x, arr);
1529 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1530 val = trunc_int_for_mode (val, SImode);
1531 switch (which_immediate_load (val))
1538 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1543 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1546 constant_to_array (mode, x, arr);
1548 for (i = 0; i < 16; i++)
1553 print_operand (file, GEN_INT (val), 0);
1556 constant_to_array (mode, x, arr);
1557 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1558 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1563 if (GET_CODE (x) == CONST_VECTOR)
1564 x = CONST_VECTOR_ELT (x, 0);
1565 output_addr_const (file, x);
1567 fprintf (file, "@h");
1581 if (xcode == CONST_INT)
1583 /* Only 4 least significant bits are relevant for generate
1584 control word instructions. */
1585 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1590 case 'M': /* print code for c*d */
1591 if (GET_CODE (x) == CONST_INT)
1595 fprintf (file, "b");
1598 fprintf (file, "h");
1601 fprintf (file, "w");
1604 fprintf (file, "d");
1613 case 'N': /* Negate the operand */
1614 if (xcode == CONST_INT)
1615 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1616 else if (xcode == CONST_VECTOR)
1617 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1618 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1621 case 'I': /* enable/disable interrupts */
1622 if (xcode == CONST_INT)
1623 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1626 case 'b': /* branch modifiers */
1628 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1629 else if (COMPARISON_P (x))
1630 fprintf (file, "%s", xcode == NE ? "n" : "");
1633 case 'i': /* indirect call */
1636 if (GET_CODE (XEXP (x, 0)) == REG)
1637 /* Used in indirect function calls. */
1638 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1640 output_address (XEXP (x, 0));
1644 case 'p': /* load/store */
1648 xcode = GET_CODE (x);
1653 xcode = GET_CODE (x);
1656 fprintf (file, "d");
1657 else if (xcode == CONST_INT)
1658 fprintf (file, "a");
1659 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1660 fprintf (file, "r");
1661 else if (xcode == PLUS || xcode == LO_SUM)
1663 if (GET_CODE (XEXP (x, 1)) == REG)
1664 fprintf (file, "x");
1666 fprintf (file, "d");
1671 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1673 output_addr_const (file, GEN_INT (val));
1677 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1679 output_addr_const (file, GEN_INT (val));
1683 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1685 output_addr_const (file, GEN_INT (val));
1689 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1690 val = (val >> 3) & 0x1f;
1691 output_addr_const (file, GEN_INT (val));
1695 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1698 output_addr_const (file, GEN_INT (val));
1702 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1705 output_addr_const (file, GEN_INT (val));
1709 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1712 output_addr_const (file, GEN_INT (val));
1716 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1717 val = -(val & -8ll);
1718 val = (val >> 3) & 0x1f;
1719 output_addr_const (file, GEN_INT (val));
1724 constant_to_array (mode, x, arr);
1725 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1726 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1731 fprintf (file, "%s", reg_names[REGNO (x)]);
1732 else if (xcode == MEM)
1733 output_address (XEXP (x, 0));
1734 else if (xcode == CONST_VECTOR)
1735 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1737 output_addr_const (file, x);
1744 output_operand_lossage ("invalid %%xn code");
1749 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1750 caller saved register. For leaf functions it is more efficient to
1751 use a volatile register because we won't need to save and restore the
1752 pic register. This routine is only valid after register allocation
1753 is completed, so we can pick an unused register. */
1757 if (!reload_completed && !reload_in_progress)
1760 /* If we've already made the decision, we need to keep with it. Once we've
1761 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1762 return true since the register is now live; this should not cause us to
1763 "switch back" to using pic_offset_table_rtx. */
1764 if (!cfun->machine->pic_reg)
1766 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1767 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1769 cfun->machine->pic_reg = pic_offset_table_rtx;
1772 return cfun->machine->pic_reg;
1775 /* Split constant addresses to handle cases that are too large.
1776 Add in the pic register when in PIC mode.
1777 Split immediates that require more than 1 instruction. */
1779 spu_split_immediate (rtx * ops)
1781 enum machine_mode mode = GET_MODE (ops[0]);
1782 enum immediate_class c = classify_immediate (ops[1], mode);
1788 unsigned char arrhi[16];
1789 unsigned char arrlo[16];
1790 rtx to, temp, hi, lo;
1792 enum machine_mode imode = mode;
1793 /* We need to do reals as ints because the constant used in the
1794 IOR might not be a legitimate real constant. */
1795 imode = int_mode_for_mode (mode);
1796 constant_to_array (mode, ops[1], arrhi);
1798 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1801 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1802 for (i = 0; i < 16; i += 4)
1804 arrlo[i + 2] = arrhi[i + 2];
1805 arrlo[i + 3] = arrhi[i + 3];
1806 arrlo[i + 0] = arrlo[i + 1] = 0;
1807 arrhi[i + 2] = arrhi[i + 3] = 0;
1809 hi = array_to_constant (imode, arrhi);
1810 lo = array_to_constant (imode, arrlo);
1811 emit_move_insn (temp, hi);
1812 emit_insn (gen_rtx_SET
1813 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1818 unsigned char arr_fsmbi[16];
1819 unsigned char arr_andbi[16];
1820 rtx to, reg_fsmbi, reg_and;
1822 enum machine_mode imode = mode;
1823 /* We need to do reals as ints because the constant used in the
1824 * AND might not be a legitimate real constant. */
1825 imode = int_mode_for_mode (mode);
1826 constant_to_array (mode, ops[1], arr_fsmbi);
1828 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1831 for (i = 0; i < 16; i++)
1832 if (arr_fsmbi[i] != 0)
1834 arr_andbi[0] = arr_fsmbi[i];
1835 arr_fsmbi[i] = 0xff;
1837 for (i = 1; i < 16; i++)
1838 arr_andbi[i] = arr_andbi[0];
1839 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1840 reg_and = array_to_constant (imode, arr_andbi);
1841 emit_move_insn (to, reg_fsmbi);
1842 emit_insn (gen_rtx_SET
1843 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1847 if (reload_in_progress || reload_completed)
1849 rtx mem = force_const_mem (mode, ops[1]);
1850 if (TARGET_LARGE_MEM)
1852 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1853 emit_move_insn (addr, XEXP (mem, 0));
1854 mem = replace_equiv_address (mem, addr);
1856 emit_move_insn (ops[0], mem);
1862 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1866 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1867 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1870 emit_insn (gen_pic (ops[0], ops[1]));
1873 rtx pic_reg = get_pic_reg ();
1874 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1876 return flag_pic || c == IC_IL2s;
1887 /* SAVING is TRUE when we are generating the actual load and store
1888 instructions for REGNO. When determining the size of the stack
1889 needed for saving register we must allocate enough space for the
1890 worst case, because we don't always have the information early enough
1891 to not allocate it. But we can at least eliminate the actual loads
1892 and stores during the prologue/epilogue. */
1894 need_to_save_reg (int regno, int saving)
1896 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1899 && regno == PIC_OFFSET_TABLE_REGNUM
1900 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1905 /* This function is only correct starting with local register
1908 spu_saved_regs_size (void)
1910 int reg_save_size = 0;
1913 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1914 if (need_to_save_reg (regno, 0))
1915 reg_save_size += 0x10;
1916 return reg_save_size;
1920 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1922 rtx reg = gen_rtx_REG (V4SImode, regno);
1924 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1925 return emit_insn (gen_movv4si (mem, reg));
1929 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1931 rtx reg = gen_rtx_REG (V4SImode, regno);
1933 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1934 return emit_insn (gen_movv4si (reg, mem));
1937 /* This happens after reload, so we need to expand it. */
1939 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1942 if (satisfies_constraint_K (GEN_INT (imm)))
1944 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1948 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1949 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1950 if (REGNO (src) == REGNO (scratch))
1956 /* Return nonzero if this function is known to have a null epilogue. */
1959 direct_return (void)
1961 if (reload_completed)
1963 if (cfun->static_chain_decl == 0
1964 && (spu_saved_regs_size ()
1966 + crtl->outgoing_args_size
1967 + crtl->args.pretend_args_size == 0)
1968 && current_function_is_leaf)
1975 The stack frame looks like this:
1979 AP -> +-------------+
1982 prev SP | back chain |
1985 | reg save | crtl->args.pretend_args_size bytes
1988 | saved regs | spu_saved_regs_size() bytes
1989 FP -> +-------------+
1991 | vars | get_frame_size() bytes
1992 HFP -> +-------------+
1995 | args | crtl->outgoing_args_size bytes
2001 SP -> +-------------+
2005 spu_expand_prologue (void)
2007 HOST_WIDE_INT size = get_frame_size (), offset, regno;
2008 HOST_WIDE_INT total_size;
2009 HOST_WIDE_INT saved_regs_size;
2010 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2011 rtx scratch_reg_0, scratch_reg_1;
2014 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
2015 cfun->machine->pic_reg = pic_offset_table_rtx;
2017 if (spu_naked_function_p (current_function_decl))
2020 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2021 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
2023 saved_regs_size = spu_saved_regs_size ();
2024 total_size = size + saved_regs_size
2025 + crtl->outgoing_args_size
2026 + crtl->args.pretend_args_size;
2028 if (!current_function_is_leaf
2029 || cfun->calls_alloca || total_size > 0)
2030 total_size += STACK_POINTER_OFFSET;
2032 /* Save this first because code after this might use the link
2033 register as a scratch register. */
2034 if (!current_function_is_leaf)
2036 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
2037 RTX_FRAME_RELATED_P (insn) = 1;
2042 offset = -crtl->args.pretend_args_size;
2043 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2044 if (need_to_save_reg (regno, 1))
2047 insn = frame_emit_store (regno, sp_reg, offset);
2048 RTX_FRAME_RELATED_P (insn) = 1;
2052 if (flag_pic && cfun->machine->pic_reg)
2054 rtx pic_reg = cfun->machine->pic_reg;
2055 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
2056 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
2061 if (flag_stack_check)
2063 /* We compare against total_size-1 because
2064 ($sp >= total_size) <=> ($sp > total_size-1) */
2065 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2066 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2067 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2068 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2070 emit_move_insn (scratch_v4si, size_v4si);
2071 size_v4si = scratch_v4si;
2073 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2074 emit_insn (gen_vec_extractv4si
2075 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2076 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2079 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2080 the value of the previous $sp because we save it as the back
2082 if (total_size <= 2000)
2084 /* In this case we save the back chain first. */
2085 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
2087 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2091 insn = emit_move_insn (scratch_reg_0, sp_reg);
2093 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2095 RTX_FRAME_RELATED_P (insn) = 1;
2096 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
2097 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2099 if (total_size > 2000)
2101 /* Save the back chain ptr */
2102 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
2105 if (frame_pointer_needed)
2107 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2108 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
2109 + crtl->outgoing_args_size;
2110 /* Set the new frame_pointer */
2111 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2112 RTX_FRAME_RELATED_P (insn) = 1;
2113 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
2114 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2115 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
2119 if (flag_stack_usage_info)
2120 current_function_static_stack_size = total_size;
2124 spu_expand_epilogue (bool sibcall_p)
2126 int size = get_frame_size (), offset, regno;
2127 HOST_WIDE_INT saved_regs_size, total_size;
2128 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2131 if (spu_naked_function_p (current_function_decl))
2134 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2136 saved_regs_size = spu_saved_regs_size ();
2137 total_size = size + saved_regs_size
2138 + crtl->outgoing_args_size
2139 + crtl->args.pretend_args_size;
2141 if (!current_function_is_leaf
2142 || cfun->calls_alloca || total_size > 0)
2143 total_size += STACK_POINTER_OFFSET;
2147 if (cfun->calls_alloca)
2148 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2150 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2153 if (saved_regs_size > 0)
2155 offset = -crtl->args.pretend_args_size;
2156 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2157 if (need_to_save_reg (regno, 1))
2160 frame_emit_load (regno, sp_reg, offset);
2165 if (!current_function_is_leaf)
2166 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2170 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
2171 emit_jump_insn (gen__return ());
2176 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2180 /* This is inefficient because it ends up copying to a save-register
2181 which then gets saved even though $lr has already been saved. But
2182 it does generate better code for leaf functions and we don't need
2183 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2184 used for __builtin_return_address anyway, so maybe we don't care if
2185 it's inefficient. */
2186 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2190 /* Given VAL, generate a constant appropriate for MODE.
2191 If MODE is a vector mode, every element will be VAL.
2192 For TImode, VAL will be zero extended to 128 bits. */
2194 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2200 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2201 || GET_MODE_CLASS (mode) == MODE_FLOAT
2202 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2203 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2205 if (GET_MODE_CLASS (mode) == MODE_INT)
2206 return immed_double_const (val, 0, mode);
2208 /* val is the bit representation of the float */
2209 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2210 return hwint_to_const_double (mode, val);
2212 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2213 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2215 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2217 units = GET_MODE_NUNITS (mode);
2219 v = rtvec_alloc (units);
2221 for (i = 0; i < units; ++i)
2222 RTVEC_ELT (v, i) = inner;
2224 return gen_rtx_CONST_VECTOR (mode, v);
2227 /* Create a MODE vector constant from 4 ints. */
2229 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2231 unsigned char arr[16];
2232 arr[0] = (a >> 24) & 0xff;
2233 arr[1] = (a >> 16) & 0xff;
2234 arr[2] = (a >> 8) & 0xff;
2235 arr[3] = (a >> 0) & 0xff;
2236 arr[4] = (b >> 24) & 0xff;
2237 arr[5] = (b >> 16) & 0xff;
2238 arr[6] = (b >> 8) & 0xff;
2239 arr[7] = (b >> 0) & 0xff;
2240 arr[8] = (c >> 24) & 0xff;
2241 arr[9] = (c >> 16) & 0xff;
2242 arr[10] = (c >> 8) & 0xff;
2243 arr[11] = (c >> 0) & 0xff;
2244 arr[12] = (d >> 24) & 0xff;
2245 arr[13] = (d >> 16) & 0xff;
2246 arr[14] = (d >> 8) & 0xff;
2247 arr[15] = (d >> 0) & 0xff;
2248 return array_to_constant(mode, arr);
2251 /* branch hint stuff */
2253 /* An array of these is used to propagate hints to predecessor blocks. */
2256 rtx prop_jump; /* propagated from another block */
2257 int bb_index; /* the original block. */
2259 static struct spu_bb_info *spu_bb_info;
2261 #define STOP_HINT_P(INSN) \
2262 (GET_CODE(INSN) == CALL_INSN \
2263 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2264 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2266 /* 1 when RTX is a hinted branch or its target. We keep track of
2267 what has been hinted so the safe-hint code can test it easily. */
2268 #define HINTED_P(RTX) \
2269 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2271 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2272 #define SCHED_ON_EVEN_P(RTX) \
2273 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2275 /* Emit a nop for INSN such that the two will dual issue. This assumes
2276 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2277 We check for TImode to handle a MULTI1 insn which has dual issued its
2278 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2281 emit_nop_for_insn (rtx insn)
2285 p = get_pipe (insn);
2286 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2287 new_insn = emit_insn_after (gen_lnop (), insn);
2288 else if (p == 1 && GET_MODE (insn) == TImode)
2290 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2291 PUT_MODE (new_insn, TImode);
2292 PUT_MODE (insn, VOIDmode);
2295 new_insn = emit_insn_after (gen_lnop (), insn);
2296 recog_memoized (new_insn);
2297 INSN_LOCATOR (new_insn) = INSN_LOCATOR (insn);
2300 /* Insert nops in basic blocks to meet dual issue alignment
2301 requirements. Also make sure hbrp and hint instructions are at least
2302 one cycle apart, possibly inserting a nop. */
2306 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2310 /* This sets up INSN_ADDRESSES. */
2311 shorten_branches (get_insns ());
2313 /* Keep track of length added by nops. */
2317 insn = get_insns ();
2318 if (!active_insn_p (insn))
2319 insn = next_active_insn (insn);
2320 for (; insn; insn = next_insn)
2322 next_insn = next_active_insn (insn);
2323 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2324 || INSN_CODE (insn) == CODE_FOR_hbr)
2328 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2329 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2330 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2333 prev_insn = emit_insn_before (gen_lnop (), insn);
2334 PUT_MODE (prev_insn, GET_MODE (insn));
2335 PUT_MODE (insn, TImode);
2336 INSN_LOCATOR (prev_insn) = INSN_LOCATOR (insn);
2342 if (INSN_CODE (insn) == CODE_FOR_blockage)
2344 if (GET_MODE (insn) == TImode)
2345 PUT_MODE (next_insn, TImode);
2347 next_insn = next_active_insn (insn);
2349 addr = INSN_ADDRESSES (INSN_UID (insn));
2350 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2352 if (((addr + length) & 7) != 0)
2354 emit_nop_for_insn (prev_insn);
2358 else if (GET_MODE (insn) == TImode
2359 && ((next_insn && GET_MODE (next_insn) != TImode)
2360 || get_attr_type (insn) == TYPE_MULTI0)
2361 && ((addr + length) & 7) != 0)
2363 /* prev_insn will always be set because the first insn is
2364 always 8-byte aligned. */
2365 emit_nop_for_insn (prev_insn);
2373 /* Routines for branch hints. */
2376 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2377 int distance, sbitmap blocks)
2379 rtx branch_label = 0;
2384 if (before == 0 || branch == 0 || target == 0)
2387 /* While scheduling we require hints to be no further than 600, so
2388 we need to enforce that here too */
2392 /* If we have a Basic block note, emit it after the basic block note. */
2393 if (NOTE_INSN_BASIC_BLOCK_P (before))
2394 before = NEXT_INSN (before);
2396 branch_label = gen_label_rtx ();
2397 LABEL_NUSES (branch_label)++;
2398 LABEL_PRESERVE_P (branch_label) = 1;
2399 insn = emit_label_before (branch_label, branch);
2400 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2401 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2403 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2404 recog_memoized (hint);
2405 INSN_LOCATOR (hint) = INSN_LOCATOR (branch);
2406 HINTED_P (branch) = 1;
2408 if (GET_CODE (target) == LABEL_REF)
2409 HINTED_P (XEXP (target, 0)) = 1;
2410 else if (tablejump_p (branch, 0, &table))
2414 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2415 vec = XVEC (PATTERN (table), 0);
2417 vec = XVEC (PATTERN (table), 1);
2418 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2419 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2422 if (distance >= 588)
2424 /* Make sure the hint isn't scheduled any earlier than this point,
2425 which could make it too far for the branch offest to fit */
2426 insn = emit_insn_before (gen_blockage (), hint);
2427 recog_memoized (insn);
2428 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
2430 else if (distance <= 8 * 4)
2432 /* To guarantee at least 8 insns between the hint and branch we
2435 for (d = distance; d < 8 * 4; d += 4)
2438 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2439 recog_memoized (insn);
2440 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
2443 /* Make sure any nops inserted aren't scheduled before the hint. */
2444 insn = emit_insn_after (gen_blockage (), hint);
2445 recog_memoized (insn);
2446 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
2448 /* Make sure any nops inserted aren't scheduled after the call. */
2449 if (CALL_P (branch) && distance < 8 * 4)
2451 insn = emit_insn_before (gen_blockage (), branch);
2452 recog_memoized (insn);
2453 INSN_LOCATOR (insn) = INSN_LOCATOR (branch);
2458 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2459 the rtx for the branch target. */
2461 get_branch_target (rtx branch)
2463 if (GET_CODE (branch) == JUMP_INSN)
2467 /* Return statements */
2468 if (GET_CODE (PATTERN (branch)) == RETURN)
2469 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2472 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2473 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2477 if (extract_asm_operands (PATTERN (branch)) != NULL)
2480 set = single_set (branch);
2481 src = SET_SRC (set);
2482 if (GET_CODE (SET_DEST (set)) != PC)
2485 if (GET_CODE (src) == IF_THEN_ELSE)
2488 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2491 /* If the more probable case is not a fall through, then
2492 try a branch hint. */
2493 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2494 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2495 && GET_CODE (XEXP (src, 1)) != PC)
2496 lab = XEXP (src, 1);
2497 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2498 && GET_CODE (XEXP (src, 2)) != PC)
2499 lab = XEXP (src, 2);
2503 if (GET_CODE (lab) == RETURN)
2504 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2512 else if (GET_CODE (branch) == CALL_INSN)
2515 /* All of our call patterns are in a PARALLEL and the CALL is
2516 the first pattern in the PARALLEL. */
2517 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2519 call = XVECEXP (PATTERN (branch), 0, 0);
2520 if (GET_CODE (call) == SET)
2521 call = SET_SRC (call);
2522 if (GET_CODE (call) != CALL)
2524 return XEXP (XEXP (call, 0), 0);
2529 /* The special $hbr register is used to prevent the insn scheduler from
2530 moving hbr insns across instructions which invalidate them. It
2531 should only be used in a clobber, and this function searches for
2532 insns which clobber it. */
2534 insn_clobbers_hbr (rtx insn)
2537 && GET_CODE (PATTERN (insn)) == PARALLEL)
2539 rtx parallel = PATTERN (insn);
2542 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2544 clobber = XVECEXP (parallel, 0, j);
2545 if (GET_CODE (clobber) == CLOBBER
2546 && GET_CODE (XEXP (clobber, 0)) == REG
2547 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2554 /* Search up to 32 insns starting at FIRST:
2555 - at any kind of hinted branch, just return
2556 - at any unconditional branch in the first 15 insns, just return
2557 - at a call or indirect branch, after the first 15 insns, force it to
2558 an even address and return
2559 - at any unconditional branch, after the first 15 insns, force it to
2561 At then end of the search, insert an hbrp within 4 insns of FIRST,
2562 and an hbrp within 16 instructions of FIRST.
2565 insert_hbrp_for_ilb_runout (rtx first)
2567 rtx insn, before_4 = 0, before_16 = 0;
2568 int addr = 0, length, first_addr = -1;
2569 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2570 int insert_lnop_after = 0;
2571 for (insn = first; insn; insn = NEXT_INSN (insn))
2574 if (first_addr == -1)
2575 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2576 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2577 length = get_attr_length (insn);
2579 if (before_4 == 0 && addr + length >= 4 * 4)
2581 /* We test for 14 instructions because the first hbrp will add
2582 up to 2 instructions. */
2583 if (before_16 == 0 && addr + length >= 14 * 4)
2586 if (INSN_CODE (insn) == CODE_FOR_hbr)
2588 /* Make sure an hbrp is at least 2 cycles away from a hint.
2589 Insert an lnop after the hbrp when necessary. */
2590 if (before_4 == 0 && addr > 0)
2593 insert_lnop_after |= 1;
2595 else if (before_4 && addr <= 4 * 4)
2596 insert_lnop_after |= 1;
2597 if (before_16 == 0 && addr > 10 * 4)
2600 insert_lnop_after |= 2;
2602 else if (before_16 && addr <= 14 * 4)
2603 insert_lnop_after |= 2;
2606 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2608 if (addr < hbrp_addr0)
2610 else if (addr < hbrp_addr1)
2614 if (CALL_P (insn) || JUMP_P (insn))
2616 if (HINTED_P (insn))
2619 /* Any branch after the first 15 insns should be on an even
2620 address to avoid a special case branch. There might be
2621 some nops and/or hbrps inserted, so we test after 10
2624 SCHED_ON_EVEN_P (insn) = 1;
2627 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2631 if (addr + length >= 32 * 4)
2633 gcc_assert (before_4 && before_16);
2634 if (hbrp_addr0 > 4 * 4)
2637 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2638 recog_memoized (insn);
2639 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
2640 INSN_ADDRESSES_NEW (insn,
2641 INSN_ADDRESSES (INSN_UID (before_4)));
2642 PUT_MODE (insn, GET_MODE (before_4));
2643 PUT_MODE (before_4, TImode);
2644 if (insert_lnop_after & 1)
2646 insn = emit_insn_before (gen_lnop (), before_4);
2647 recog_memoized (insn);
2648 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
2649 INSN_ADDRESSES_NEW (insn,
2650 INSN_ADDRESSES (INSN_UID (before_4)));
2651 PUT_MODE (insn, TImode);
2654 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2655 && hbrp_addr1 > 16 * 4)
2658 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2659 recog_memoized (insn);
2660 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
2661 INSN_ADDRESSES_NEW (insn,
2662 INSN_ADDRESSES (INSN_UID (before_16)));
2663 PUT_MODE (insn, GET_MODE (before_16));
2664 PUT_MODE (before_16, TImode);
2665 if (insert_lnop_after & 2)
2667 insn = emit_insn_before (gen_lnop (), before_16);
2668 recog_memoized (insn);
2669 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
2670 INSN_ADDRESSES_NEW (insn,
2671 INSN_ADDRESSES (INSN_UID
2673 PUT_MODE (insn, TImode);
2679 else if (BARRIER_P (insn))
2684 /* The SPU might hang when it executes 48 inline instructions after a
2685 hinted branch jumps to its hinted target. The beginning of a
2686 function and the return from a call might have been hinted, and
2687 must be handled as well. To prevent a hang we insert 2 hbrps. The
2688 first should be within 6 insns of the branch target. The second
2689 should be within 22 insns of the branch target. When determining
2690 if hbrps are necessary, we look for only 32 inline instructions,
2691 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2692 when inserting new hbrps, we insert them within 4 and 16 insns of
2698 if (TARGET_SAFE_HINTS)
2700 shorten_branches (get_insns ());
2701 /* Insert hbrp at beginning of function */
2702 insn = next_active_insn (get_insns ());
2704 insert_hbrp_for_ilb_runout (insn);
2705 /* Insert hbrp after hinted targets. */
2706 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2707 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2708 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2712 static int in_spu_reorg;
2715 spu_var_tracking (void)
2717 if (flag_var_tracking)
2720 timevar_push (TV_VAR_TRACKING);
2721 variable_tracking_main ();
2722 timevar_pop (TV_VAR_TRACKING);
2723 df_finish_pass (false);
2727 /* Insert branch hints. There are no branch optimizations after this
2728 pass, so it's safe to set our branch hints now. */
2730 spu_machine_dependent_reorg (void)
2735 rtx branch_target = 0;
2736 int branch_addr = 0, insn_addr, required_dist = 0;
2740 if (!TARGET_BRANCH_HINTS || optimize == 0)
2742 /* We still do it for unoptimized code because an external
2743 function might have hinted a call or return. */
2744 compute_bb_for_insn ();
2747 spu_var_tracking ();
2748 free_bb_for_insn ();
2752 blocks = sbitmap_alloc (last_basic_block);
2753 sbitmap_zero (blocks);
2756 compute_bb_for_insn ();
2761 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2762 sizeof (struct spu_bb_info));
2764 /* We need exact insn addresses and lengths. */
2765 shorten_branches (get_insns ());
2767 for (i = n_basic_blocks - 1; i >= 0; i--)
2769 bb = BASIC_BLOCK (i);
2771 if (spu_bb_info[i].prop_jump)
2773 branch = spu_bb_info[i].prop_jump;
2774 branch_target = get_branch_target (branch);
2775 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2776 required_dist = spu_hint_dist;
2778 /* Search from end of a block to beginning. In this loop, find
2779 jumps which need a branch and emit them only when:
2780 - it's an indirect branch and we're at the insn which sets
2782 - we're at an insn that will invalidate the hint. e.g., a
2783 call, another hint insn, inline asm that clobbers $hbr, and
2784 some inlined operations (divmodsi4). Don't consider jumps
2785 because they are only at the end of a block and are
2786 considered when we are deciding whether to propagate
2787 - we're getting too far away from the branch. The hbr insns
2788 only have a signed 10 bit offset
2789 We go back as far as possible so the branch will be considered
2790 for propagation when we get to the beginning of the block. */
2791 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2795 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2797 && ((GET_CODE (branch_target) == REG
2798 && set_of (branch_target, insn) != NULL_RTX)
2799 || insn_clobbers_hbr (insn)
2800 || branch_addr - insn_addr > 600))
2802 rtx next = NEXT_INSN (insn);
2803 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2804 if (insn != BB_END (bb)
2805 && branch_addr - next_addr >= required_dist)
2809 "hint for %i in block %i before %i\n",
2810 INSN_UID (branch), bb->index,
2812 spu_emit_branch_hint (next, branch, branch_target,
2813 branch_addr - next_addr, blocks);
2818 /* JUMP_P will only be true at the end of a block. When
2819 branch is already set it means we've previously decided
2820 to propagate a hint for that branch into this block. */
2821 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2824 if ((branch_target = get_branch_target (insn)))
2827 branch_addr = insn_addr;
2828 required_dist = spu_hint_dist;
2832 if (insn == BB_HEAD (bb))
2838 /* If we haven't emitted a hint for this branch yet, it might
2839 be profitable to emit it in one of the predecessor blocks,
2840 especially for loops. */
2842 basic_block prev = 0, prop = 0, prev2 = 0;
2843 int loop_exit = 0, simple_loop = 0;
2844 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2846 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2847 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2848 prev = EDGE_PRED (bb, j)->src;
2850 prev2 = EDGE_PRED (bb, j)->src;
2852 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2853 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2855 else if (EDGE_SUCC (bb, j)->dest == bb)
2858 /* If this branch is a loop exit then propagate to previous
2859 fallthru block. This catches the cases when it is a simple
2860 loop or when there is an initial branch into the loop. */
2861 if (prev && (loop_exit || simple_loop)
2862 && prev->loop_depth <= bb->loop_depth)
2865 /* If there is only one adjacent predecessor. Don't propagate
2866 outside this loop. This loop_depth test isn't perfect, but
2867 I'm not sure the loop_father member is valid at this point. */
2868 else if (prev && single_pred_p (bb)
2869 && prev->loop_depth == bb->loop_depth)
2872 /* If this is the JOIN block of a simple IF-THEN then
2873 propagate the hint to the HEADER block. */
2874 else if (prev && prev2
2875 && EDGE_COUNT (bb->preds) == 2
2876 && EDGE_COUNT (prev->preds) == 1
2877 && EDGE_PRED (prev, 0)->src == prev2
2878 && prev2->loop_depth == bb->loop_depth
2879 && GET_CODE (branch_target) != REG)
2882 /* Don't propagate when:
2883 - this is a simple loop and the hint would be too far
2884 - this is not a simple loop and there are 16 insns in
2886 - the predecessor block ends in a branch that will be
2888 - the predecessor block ends in an insn that invalidates
2892 && (bbend = BB_END (prop))
2893 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2894 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2895 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2898 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2899 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2900 bb->index, prop->index, bb->loop_depth,
2901 INSN_UID (branch), loop_exit, simple_loop,
2902 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2904 spu_bb_info[prop->index].prop_jump = branch;
2905 spu_bb_info[prop->index].bb_index = i;
2907 else if (branch_addr - next_addr >= required_dist)
2910 fprintf (dump_file, "hint for %i in block %i before %i\n",
2911 INSN_UID (branch), bb->index,
2912 INSN_UID (NEXT_INSN (insn)));
2913 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2914 branch_addr - next_addr, blocks);
2921 if (!sbitmap_empty_p (blocks))
2922 find_many_sub_basic_blocks (blocks);
2924 /* We have to schedule to make sure alignment is ok. */
2925 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2927 /* The hints need to be scheduled, so call it again. */
2929 df_finish_pass (true);
2935 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2936 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2938 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2939 between its branch label and the branch . We don't move the
2940 label because GCC expects it at the beginning of the block. */
2941 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2942 rtx label_ref = XVECEXP (unspec, 0, 0);
2943 rtx label = XEXP (label_ref, 0);
2946 for (branch = NEXT_INSN (label);
2947 !JUMP_P (branch) && !CALL_P (branch);
2948 branch = NEXT_INSN (branch))
2949 if (NONJUMP_INSN_P (branch))
2950 offset += get_attr_length (branch);
2952 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2955 spu_var_tracking ();
2957 free_bb_for_insn ();
2963 /* Insn scheduling routines, primarily for dual issue. */
2965 spu_sched_issue_rate (void)
2971 uses_ls_unit(rtx insn)
2973 rtx set = single_set (insn);
2975 && (GET_CODE (SET_DEST (set)) == MEM
2976 || GET_CODE (SET_SRC (set)) == MEM))
2985 /* Handle inline asm */
2986 if (INSN_CODE (insn) == -1)
2988 t = get_attr_type (insn);
3013 case TYPE_IPREFETCH:
3021 /* haifa-sched.c has a static variable that keeps track of the current
3022 cycle. It is passed to spu_sched_reorder, and we record it here for
3023 use by spu_sched_variable_issue. It won't be accurate if the
3024 scheduler updates it's clock_var between the two calls. */
3025 static int clock_var;
3027 /* This is used to keep track of insn alignment. Set to 0 at the
3028 beginning of each block and increased by the "length" attr of each
3030 static int spu_sched_length;
3032 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
3033 ready list appropriately in spu_sched_reorder(). */
3034 static int pipe0_clock;
3035 static int pipe1_clock;
3037 static int prev_clock_var;
3039 static int prev_priority;
3041 /* The SPU needs to load the next ilb sometime during the execution of
3042 the previous ilb. There is a potential conflict if every cycle has a
3043 load or store. To avoid the conflict we make sure the load/store
3044 unit is free for at least one cycle during the execution of insns in
3045 the previous ilb. */
3046 static int spu_ls_first;
3047 static int prev_ls_clock;
3050 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3051 int max_ready ATTRIBUTE_UNUSED)
3053 spu_sched_length = 0;
3057 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3058 int max_ready ATTRIBUTE_UNUSED)
3060 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
3062 /* When any block might be at least 8-byte aligned, assume they
3063 will all be at least 8-byte aligned to make sure dual issue
3064 works out correctly. */
3065 spu_sched_length = 0;
3067 spu_ls_first = INT_MAX;
3072 prev_clock_var = -1;
3077 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
3078 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
3082 if (GET_CODE (PATTERN (insn)) == USE
3083 || GET_CODE (PATTERN (insn)) == CLOBBER
3084 || (len = get_attr_length (insn)) == 0)
3087 spu_sched_length += len;
3089 /* Reset on inline asm */
3090 if (INSN_CODE (insn) == -1)
3092 spu_ls_first = INT_MAX;
3097 p = get_pipe (insn);
3099 pipe0_clock = clock_var;
3101 pipe1_clock = clock_var;
3105 if (clock_var - prev_ls_clock > 1
3106 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3107 spu_ls_first = INT_MAX;
3108 if (uses_ls_unit (insn))
3110 if (spu_ls_first == INT_MAX)
3111 spu_ls_first = spu_sched_length;
3112 prev_ls_clock = clock_var;
3115 /* The scheduler hasn't inserted the nop, but we will later on.
3116 Include those nops in spu_sched_length. */
3117 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3118 spu_sched_length += 4;
3119 prev_clock_var = clock_var;
3121 /* more is -1 when called from spu_sched_reorder for new insns
3122 that don't have INSN_PRIORITY */
3124 prev_priority = INSN_PRIORITY (insn);
3127 /* Always try issuing more insns. spu_sched_reorder will decide
3128 when the cycle should be advanced. */
3132 /* This function is called for both TARGET_SCHED_REORDER and
3133 TARGET_SCHED_REORDER2. */
3135 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3136 rtx *ready, int *nreadyp, int clock)
3138 int i, nready = *nreadyp;
3139 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3144 if (nready <= 0 || pipe1_clock >= clock)
3147 /* Find any rtl insns that don't generate assembly insns and schedule
3149 for (i = nready - 1; i >= 0; i--)
3152 if (INSN_CODE (insn) == -1
3153 || INSN_CODE (insn) == CODE_FOR_blockage
3154 || (INSN_P (insn) && get_attr_length (insn) == 0))
3156 ready[i] = ready[nready - 1];
3157 ready[nready - 1] = insn;
3162 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3163 for (i = 0; i < nready; i++)
3164 if (INSN_CODE (ready[i]) != -1)
3167 switch (get_attr_type (insn))
3192 case TYPE_IPREFETCH:
3198 /* In the first scheduling phase, schedule loads and stores together
3199 to increase the chance they will get merged during postreload CSE. */
3200 if (!reload_completed && pipe_ls >= 0)
3202 insn = ready[pipe_ls];
3203 ready[pipe_ls] = ready[nready - 1];
3204 ready[nready - 1] = insn;
3208 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3212 /* When we have loads/stores in every cycle of the last 15 insns and
3213 we are about to schedule another load/store, emit an hbrp insn
3216 && spu_sched_length - spu_ls_first >= 4 * 15
3217 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3219 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3220 recog_memoized (insn);
3221 if (pipe0_clock < clock)
3222 PUT_MODE (insn, TImode);
3223 spu_sched_variable_issue (file, verbose, insn, -1);
3227 /* In general, we want to emit nops to increase dual issue, but dual
3228 issue isn't faster when one of the insns could be scheduled later
3229 without effecting the critical path. We look at INSN_PRIORITY to
3230 make a good guess, but it isn't perfect so -mdual-nops=n can be
3231 used to effect it. */
3232 if (in_spu_reorg && spu_dual_nops < 10)
3234 /* When we are at an even address and we are not issuing nops to
3235 improve scheduling then we need to advance the cycle. */
3236 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3237 && (spu_dual_nops == 0
3240 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3243 /* When at an odd address, schedule the highest priority insn
3244 without considering pipeline. */
3245 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3246 && (spu_dual_nops == 0
3248 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3253 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3254 pipe0 insn in the ready list, schedule it. */
3255 if (pipe0_clock < clock && pipe_0 >= 0)
3256 schedule_i = pipe_0;
3258 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3259 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3261 schedule_i = pipe_1;
3263 if (schedule_i > -1)
3265 insn = ready[schedule_i];
3266 ready[schedule_i] = ready[nready - 1];
3267 ready[nready - 1] = insn;
3273 /* INSN is dependent on DEP_INSN. */
3275 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3279 /* The blockage pattern is used to prevent instructions from being
3280 moved across it and has no cost. */
3281 if (INSN_CODE (insn) == CODE_FOR_blockage
3282 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3285 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3286 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3289 /* Make sure hbrps are spread out. */
3290 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3291 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3294 /* Make sure hints and hbrps are 2 cycles apart. */
3295 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3296 || INSN_CODE (insn) == CODE_FOR_hbr)
3297 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3298 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3301 /* An hbrp has no real dependency on other insns. */
3302 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3303 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3306 /* Assuming that it is unlikely an argument register will be used in
3307 the first cycle of the called function, we reduce the cost for
3308 slightly better scheduling of dep_insn. When not hinted, the
3309 mispredicted branch would hide the cost as well. */
3312 rtx target = get_branch_target (insn);
3313 if (GET_CODE (target) != REG || !set_of (target, insn))
3318 /* And when returning from a function, let's assume the return values
3319 are completed sooner too. */
3320 if (CALL_P (dep_insn))
3323 /* Make sure an instruction that loads from the back chain is schedule
3324 away from the return instruction so a hint is more likely to get
3326 if (INSN_CODE (insn) == CODE_FOR__return
3327 && (set = single_set (dep_insn))
3328 && GET_CODE (SET_DEST (set)) == REG
3329 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3332 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3333 scheduler makes every insn in a block anti-dependent on the final
3334 jump_insn. We adjust here so higher cost insns will get scheduled
3336 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3337 return insn_cost (dep_insn) - 3;
3342 /* Create a CONST_DOUBLE from a string. */
3344 spu_float_const (const char *string, enum machine_mode mode)
3346 REAL_VALUE_TYPE value;
3347 value = REAL_VALUE_ATOF (string, mode);
3348 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3352 spu_constant_address_p (rtx x)
3354 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3355 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3356 || GET_CODE (x) == HIGH);
3359 static enum spu_immediate
3360 which_immediate_load (HOST_WIDE_INT val)
3362 gcc_assert (val == trunc_int_for_mode (val, SImode));
3364 if (val >= -0x8000 && val <= 0x7fff)
3366 if (val >= 0 && val <= 0x3ffff)
3368 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3370 if ((val & 0xffff) == 0)
3376 /* Return true when OP can be loaded by one of the il instructions, or
3377 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3379 immediate_load_p (rtx op, enum machine_mode mode)
3381 if (CONSTANT_P (op))
3383 enum immediate_class c = classify_immediate (op, mode);
3384 return c == IC_IL1 || c == IC_IL1s
3385 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3390 /* Return true if the first SIZE bytes of arr is a constant that can be
3391 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3392 represent the size and offset of the instruction to use. */
3394 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3396 int cpat, run, i, start;
3400 for (i = 0; i < size && cpat; i++)
3408 else if (arr[i] == 2 && arr[i+1] == 3)
3410 else if (arr[i] == 0)
3412 while (arr[i+run] == run && i+run < 16)
3414 if (run != 4 && run != 8)
3419 if ((i & (run-1)) != 0)
3426 if (cpat && (run || size < 16))
3433 *pstart = start == -1 ? 16-run : start;
3439 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3440 it into a register. MODE is only valid when OP is a CONST_INT. */
3441 static enum immediate_class
3442 classify_immediate (rtx op, enum machine_mode mode)
3445 unsigned char arr[16];
3446 int i, j, repeated, fsmbi, repeat;
3448 gcc_assert (CONSTANT_P (op));
3450 if (GET_MODE (op) != VOIDmode)
3451 mode = GET_MODE (op);
3453 /* A V4SI const_vector with all identical symbols is ok. */
3456 && GET_CODE (op) == CONST_VECTOR
3457 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3458 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3459 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3460 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3461 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3462 op = CONST_VECTOR_ELT (op, 0);
3464 switch (GET_CODE (op))
3468 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3471 /* We can never know if the resulting address fits in 18 bits and can be
3472 loaded with ila. For now, assume the address will not overflow if
3473 the displacement is "small" (fits 'K' constraint). */
3474 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3476 rtx sym = XEXP (XEXP (op, 0), 0);
3477 rtx cst = XEXP (XEXP (op, 0), 1);
3479 if (GET_CODE (sym) == SYMBOL_REF
3480 && GET_CODE (cst) == CONST_INT
3481 && satisfies_constraint_K (cst))
3490 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3491 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3492 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3498 constant_to_array (mode, op, arr);
3500 /* Check that each 4-byte slot is identical. */
3502 for (i = 4; i < 16; i += 4)
3503 for (j = 0; j < 4; j++)
3504 if (arr[j] != arr[i + j])
3509 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3510 val = trunc_int_for_mode (val, SImode);
3512 if (which_immediate_load (val) != SPU_NONE)
3516 /* Any mode of 2 bytes or smaller can be loaded with an il
3518 gcc_assert (GET_MODE_SIZE (mode) > 2);
3522 for (i = 0; i < 16 && fsmbi; i++)
3523 if (arr[i] != 0 && repeat == 0)
3525 else if (arr[i] != 0 && arr[i] != repeat)
3528 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3530 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3543 static enum spu_immediate
3544 which_logical_immediate (HOST_WIDE_INT val)
3546 gcc_assert (val == trunc_int_for_mode (val, SImode));
3548 if (val >= -0x200 && val <= 0x1ff)
3550 if (val >= 0 && val <= 0xffff)
3552 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3554 val = trunc_int_for_mode (val, HImode);
3555 if (val >= -0x200 && val <= 0x1ff)
3557 if ((val & 0xff) == ((val >> 8) & 0xff))
3559 val = trunc_int_for_mode (val, QImode);
3560 if (val >= -0x200 && val <= 0x1ff)
3567 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3570 const_vector_immediate_p (rtx x)
3573 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3574 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3575 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3576 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3582 logical_immediate_p (rtx op, enum machine_mode mode)
3585 unsigned char arr[16];
3588 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3589 || GET_CODE (op) == CONST_VECTOR);
3591 if (GET_CODE (op) == CONST_VECTOR
3592 && !const_vector_immediate_p (op))
3595 if (GET_MODE (op) != VOIDmode)
3596 mode = GET_MODE (op);
3598 constant_to_array (mode, op, arr);
3600 /* Check that bytes are repeated. */
3601 for (i = 4; i < 16; i += 4)
3602 for (j = 0; j < 4; j++)
3603 if (arr[j] != arr[i + j])
3606 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3607 val = trunc_int_for_mode (val, SImode);
3609 i = which_logical_immediate (val);
3610 return i != SPU_NONE && i != SPU_IOHL;
3614 iohl_immediate_p (rtx op, enum machine_mode mode)
3617 unsigned char arr[16];
3620 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3621 || GET_CODE (op) == CONST_VECTOR);
3623 if (GET_CODE (op) == CONST_VECTOR
3624 && !const_vector_immediate_p (op))
3627 if (GET_MODE (op) != VOIDmode)
3628 mode = GET_MODE (op);
3630 constant_to_array (mode, op, arr);
3632 /* Check that bytes are repeated. */
3633 for (i = 4; i < 16; i += 4)
3634 for (j = 0; j < 4; j++)
3635 if (arr[j] != arr[i + j])
3638 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3639 val = trunc_int_for_mode (val, SImode);
3641 return val >= 0 && val <= 0xffff;
3645 arith_immediate_p (rtx op, enum machine_mode mode,
3646 HOST_WIDE_INT low, HOST_WIDE_INT high)
3649 unsigned char arr[16];
3652 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3653 || GET_CODE (op) == CONST_VECTOR);
3655 if (GET_CODE (op) == CONST_VECTOR
3656 && !const_vector_immediate_p (op))
3659 if (GET_MODE (op) != VOIDmode)
3660 mode = GET_MODE (op);
3662 constant_to_array (mode, op, arr);
3664 if (VECTOR_MODE_P (mode))
3665 mode = GET_MODE_INNER (mode);
3667 bytes = GET_MODE_SIZE (mode);
3668 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3670 /* Check that bytes are repeated. */
3671 for (i = bytes; i < 16; i += bytes)
3672 for (j = 0; j < bytes; j++)
3673 if (arr[j] != arr[i + j])
3677 for (j = 1; j < bytes; j++)
3678 val = (val << 8) | arr[j];
3680 val = trunc_int_for_mode (val, mode);
3682 return val >= low && val <= high;
3685 /* TRUE when op is an immediate and an exact power of 2, and given that
3686 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3687 all entries must be the same. */
3689 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3691 enum machine_mode int_mode;
3693 unsigned char arr[16];
3696 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3697 || GET_CODE (op) == CONST_VECTOR);
3699 if (GET_CODE (op) == CONST_VECTOR
3700 && !const_vector_immediate_p (op))
3703 if (GET_MODE (op) != VOIDmode)
3704 mode = GET_MODE (op);
3706 constant_to_array (mode, op, arr);
3708 if (VECTOR_MODE_P (mode))
3709 mode = GET_MODE_INNER (mode);
3711 bytes = GET_MODE_SIZE (mode);
3712 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3714 /* Check that bytes are repeated. */
3715 for (i = bytes; i < 16; i += bytes)
3716 for (j = 0; j < bytes; j++)
3717 if (arr[j] != arr[i + j])
3721 for (j = 1; j < bytes; j++)
3722 val = (val << 8) | arr[j];
3724 val = trunc_int_for_mode (val, int_mode);
3726 /* Currently, we only handle SFmode */
3727 gcc_assert (mode == SFmode);
3730 int exp = (val >> 23) - 127;
3731 return val > 0 && (val & 0x007fffff) == 0
3732 && exp >= low && exp <= high;
3737 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3740 ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3745 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3747 rtx plus = XEXP (x, 0);
3748 rtx op0 = XEXP (plus, 0);
3749 rtx op1 = XEXP (plus, 1);
3750 if (GET_CODE (op1) == CONST_INT)
3754 return (GET_CODE (x) == SYMBOL_REF
3755 && (decl = SYMBOL_REF_DECL (x)) != 0
3756 && TREE_CODE (decl) == VAR_DECL
3757 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3761 - any 32-bit constant (SImode, SFmode)
3762 - any constant that can be generated with fsmbi (any mode)
3763 - a 64-bit constant where the high and low bits are identical
3765 - a 128-bit constant where the four 32-bit words match. */
3767 spu_legitimate_constant_p (enum machine_mode mode, rtx x)
3769 if (GET_CODE (x) == HIGH)
3772 /* Reject any __ea qualified reference. These can't appear in
3773 instructions but must be forced to the constant pool. */
3774 if (for_each_rtx (&x, ea_symbol_ref, 0))
3777 /* V4SI with all identical symbols is valid. */
3780 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3781 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3782 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3783 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3784 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3785 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3787 if (GET_CODE (x) == CONST_VECTOR
3788 && !const_vector_immediate_p (x))
3793 /* Valid address are:
3794 - symbol_ref, label_ref, const
3796 - reg + const_int, where const_int is 16 byte aligned
3797 - reg + reg, alignment doesn't matter
3798 The alignment matters in the reg+const case because lqd and stqd
3799 ignore the 4 least significant bits of the const. We only care about
3800 16 byte modes because the expand phase will change all smaller MEM
3801 references to TImode. */
3803 spu_legitimate_address_p (enum machine_mode mode,
3804 rtx x, bool reg_ok_strict)
3806 int aligned = GET_MODE_SIZE (mode) >= 16;
3808 && GET_CODE (x) == AND
3809 && GET_CODE (XEXP (x, 1)) == CONST_INT
3810 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3812 switch (GET_CODE (x))
3815 return !TARGET_LARGE_MEM;
3819 /* Keep __ea references until reload so that spu_expand_mov can see them
3821 if (ea_symbol_ref (&x, 0))
3822 return !reload_in_progress && !reload_completed;
3823 return !TARGET_LARGE_MEM;
3826 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3834 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3839 rtx op0 = XEXP (x, 0);
3840 rtx op1 = XEXP (x, 1);
3841 if (GET_CODE (op0) == SUBREG)
3842 op0 = XEXP (op0, 0);
3843 if (GET_CODE (op1) == SUBREG)
3844 op1 = XEXP (op1, 0);
3845 if (GET_CODE (op0) == REG
3846 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3847 && GET_CODE (op1) == CONST_INT
3848 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3849 /* If virtual registers are involved, the displacement will
3850 change later on anyway, so checking would be premature.
3851 Reload will make sure the final displacement after
3852 register elimination is OK. */
3853 || op0 == arg_pointer_rtx
3854 || op0 == frame_pointer_rtx
3855 || op0 == virtual_stack_vars_rtx)
3856 && (!aligned || (INTVAL (op1) & 15) == 0))
3858 if (GET_CODE (op0) == REG
3859 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3860 && GET_CODE (op1) == REG
3861 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3872 /* Like spu_legitimate_address_p, except with named addresses. */
3874 spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3875 bool reg_ok_strict, addr_space_t as)
3877 if (as == ADDR_SPACE_EA)
3878 return (REG_P (x) && (GET_MODE (x) == EAmode));
3880 else if (as != ADDR_SPACE_GENERIC)
3883 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3886 /* When the address is reg + const_int, force the const_int into a
3889 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3890 enum machine_mode mode ATTRIBUTE_UNUSED)
3893 /* Make sure both operands are registers. */
3894 if (GET_CODE (x) == PLUS)
3898 if (ALIGNED_SYMBOL_REF_P (op0))
3900 op0 = force_reg (Pmode, op0);
3901 mark_reg_pointer (op0, 128);
3903 else if (GET_CODE (op0) != REG)
3904 op0 = force_reg (Pmode, op0);
3905 if (ALIGNED_SYMBOL_REF_P (op1))
3907 op1 = force_reg (Pmode, op1);
3908 mark_reg_pointer (op1, 128);
3910 else if (GET_CODE (op1) != REG)
3911 op1 = force_reg (Pmode, op1);
3912 x = gen_rtx_PLUS (Pmode, op0, op1);
3917 /* Like spu_legitimate_address, except with named address support. */
3919 spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3922 if (as != ADDR_SPACE_GENERIC)
3925 return spu_legitimize_address (x, oldx, mode);
3928 /* Reload reg + const_int for out-of-range displacements. */
3930 spu_legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
3931 int opnum, int type)
3933 bool removed_and = false;
3935 if (GET_CODE (ad) == AND
3936 && CONST_INT_P (XEXP (ad, 1))
3937 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3943 if (GET_CODE (ad) == PLUS
3944 && REG_P (XEXP (ad, 0))
3945 && CONST_INT_P (XEXP (ad, 1))
3946 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3947 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3949 /* Unshare the sum. */
3952 /* Reload the displacement. */
3953 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3954 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3955 opnum, (enum reload_type) type);
3957 /* Add back AND for alignment if we stripped it. */
3959 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3967 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3968 struct attribute_spec.handler. */
3970 spu_handle_fndecl_attribute (tree * node,
3972 tree args ATTRIBUTE_UNUSED,
3973 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3975 if (TREE_CODE (*node) != FUNCTION_DECL)
3977 warning (0, "%qE attribute only applies to functions",
3979 *no_add_attrs = true;
3985 /* Handle the "vector" attribute. */
3987 spu_handle_vector_attribute (tree * node, tree name,
3988 tree args ATTRIBUTE_UNUSED,
3989 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3991 tree type = *node, result = NULL_TREE;
3992 enum machine_mode mode;
3995 while (POINTER_TYPE_P (type)
3996 || TREE_CODE (type) == FUNCTION_TYPE
3997 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3998 type = TREE_TYPE (type);
4000 mode = TYPE_MODE (type);
4002 unsigned_p = TYPE_UNSIGNED (type);
4006 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
4009 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
4012 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
4015 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
4018 result = V4SF_type_node;
4021 result = V2DF_type_node;
4027 /* Propagate qualifiers attached to the element type
4028 onto the vector type. */
4029 if (result && result != type && TYPE_QUALS (type))
4030 result = build_qualified_type (result, TYPE_QUALS (type));
4032 *no_add_attrs = true; /* No need to hang on to the attribute. */
4035 warning (0, "%qE attribute ignored", name);
4037 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
4042 /* Return nonzero if FUNC is a naked function. */
4044 spu_naked_function_p (tree func)
4048 if (TREE_CODE (func) != FUNCTION_DECL)
4051 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
4052 return a != NULL_TREE;
4056 spu_initial_elimination_offset (int from, int to)
4058 int saved_regs_size = spu_saved_regs_size ();
4060 if (!current_function_is_leaf || crtl->outgoing_args_size
4061 || get_frame_size () || saved_regs_size)
4062 sp_offset = STACK_POINTER_OFFSET;
4063 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4064 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
4065 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4066 return get_frame_size ();
4067 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4068 return sp_offset + crtl->outgoing_args_size
4069 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
4070 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4071 return get_frame_size () + saved_regs_size + sp_offset;
4077 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
4079 enum machine_mode mode = TYPE_MODE (type);
4080 int byte_size = ((mode == BLKmode)
4081 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4083 /* Make sure small structs are left justified in a register. */
4084 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4085 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
4087 enum machine_mode smode;
4090 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4091 int n = byte_size / UNITS_PER_WORD;
4092 v = rtvec_alloc (nregs);
4093 for (i = 0; i < n; i++)
4095 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
4096 gen_rtx_REG (TImode,
4099 GEN_INT (UNITS_PER_WORD * i));
4100 byte_size -= UNITS_PER_WORD;
4108 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4110 gen_rtx_EXPR_LIST (VOIDmode,
4111 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
4112 GEN_INT (UNITS_PER_WORD * n));
4114 return gen_rtx_PARALLEL (mode, v);
4116 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
4120 spu_function_arg (cumulative_args_t cum_v,
4121 enum machine_mode mode,
4122 const_tree type, bool named ATTRIBUTE_UNUSED)
4124 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4127 if (*cum >= MAX_REGISTER_ARGS)
4130 byte_size = ((mode == BLKmode)
4131 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4133 /* The ABI does not allow parameters to be passed partially in
4134 reg and partially in stack. */
4135 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
4138 /* Make sure small structs are left justified in a register. */
4139 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4140 && byte_size < UNITS_PER_WORD && byte_size > 0)
4142 enum machine_mode smode;
4146 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4147 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4148 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
4150 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4153 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
4157 spu_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
4158 const_tree type, bool named ATTRIBUTE_UNUSED)
4160 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4162 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4165 ? ((int_size_in_bytes (type) + 15) / 16)
4168 : HARD_REGNO_NREGS (cum, mode));
4171 /* Variable sized types are passed by reference. */
4173 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4174 enum machine_mode mode ATTRIBUTE_UNUSED,
4175 const_tree type, bool named ATTRIBUTE_UNUSED)
4177 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4183 /* Create and return the va_list datatype.
4185 On SPU, va_list is an array type equivalent to
4187 typedef struct __va_list_tag
4189 void *__args __attribute__((__aligned(16)));
4190 void *__skip __attribute__((__aligned(16)));
4194 where __args points to the arg that will be returned by the next
4195 va_arg(), and __skip points to the previous stack frame such that
4196 when __args == __skip we should advance __args by 32 bytes. */
4198 spu_build_builtin_va_list (void)
4200 tree f_args, f_skip, record, type_decl;
4203 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4206 build_decl (BUILTINS_LOCATION,
4207 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4209 f_args = build_decl (BUILTINS_LOCATION,
4210 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4211 f_skip = build_decl (BUILTINS_LOCATION,
4212 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
4214 DECL_FIELD_CONTEXT (f_args) = record;
4215 DECL_ALIGN (f_args) = 128;
4216 DECL_USER_ALIGN (f_args) = 1;
4218 DECL_FIELD_CONTEXT (f_skip) = record;
4219 DECL_ALIGN (f_skip) = 128;
4220 DECL_USER_ALIGN (f_skip) = 1;
4222 TYPE_STUB_DECL (record) = type_decl;
4223 TYPE_NAME (record) = type_decl;
4224 TYPE_FIELDS (record) = f_args;
4225 DECL_CHAIN (f_args) = f_skip;
4227 /* We know this is being padded and we want it too. It is an internal
4228 type so hide the warnings from the user. */
4230 warn_padded = false;
4232 layout_type (record);
4236 /* The correct type is an array type of one element. */
4237 return build_array_type (record, build_index_type (size_zero_node));
4240 /* Implement va_start by filling the va_list structure VALIST.
4241 NEXTARG points to the first anonymous stack argument.
4243 The following global variables are used to initialize
4244 the va_list structure:
4247 the CUMULATIVE_ARGS for this function
4249 crtl->args.arg_offset_rtx:
4250 holds the offset of the first anonymous stack argument
4251 (relative to the virtual arg pointer). */
4254 spu_va_start (tree valist, rtx nextarg)
4256 tree f_args, f_skip;
4259 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4260 f_skip = DECL_CHAIN (f_args);
4262 valist = build_simple_mem_ref (valist);
4264 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4266 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4268 /* Find the __args area. */
4269 t = make_tree (TREE_TYPE (args), nextarg);
4270 if (crtl->args.pretend_args_size > 0)
4271 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
4272 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4273 TREE_SIDE_EFFECTS (t) = 1;
4274 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4276 /* Find the __skip area. */
4277 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4278 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4279 - STACK_POINTER_OFFSET));
4280 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4281 TREE_SIDE_EFFECTS (t) = 1;
4282 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4285 /* Gimplify va_arg by updating the va_list structure
4286 VALIST as required to retrieve an argument of type
4287 TYPE, and returning that argument.
4289 ret = va_arg(VALIST, TYPE);
4291 generates code equivalent to:
4293 paddedsize = (sizeof(TYPE) + 15) & -16;
4294 if (VALIST.__args + paddedsize > VALIST.__skip
4295 && VALIST.__args <= VALIST.__skip)
4296 addr = VALIST.__skip + 32;
4298 addr = VALIST.__args;
4299 VALIST.__args = addr + paddedsize;
4300 ret = *(TYPE *)addr;
4303 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4304 gimple_seq * post_p ATTRIBUTE_UNUSED)
4306 tree f_args, f_skip;
4308 HOST_WIDE_INT size, rsize;
4310 bool pass_by_reference_p;
4312 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4313 f_skip = DECL_CHAIN (f_args);
4315 valist = build_simple_mem_ref (valist);
4317 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4319 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4321 addr = create_tmp_var (ptr_type_node, "va_arg");
4323 /* if an object is dynamically sized, a pointer to it is passed
4324 instead of the object itself. */
4325 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4327 if (pass_by_reference_p)
4328 type = build_pointer_type (type);
4329 size = int_size_in_bytes (type);
4330 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4332 /* build conditional expression to calculate addr. The expression
4333 will be gimplified later. */
4334 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4335 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4336 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4337 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4338 unshare_expr (skip)));
4340 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4341 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4342 unshare_expr (args));
4344 gimplify_assign (addr, tmp, pre_p);
4346 /* update VALIST.__args */
4347 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4348 gimplify_assign (unshare_expr (args), tmp, pre_p);
4350 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4353 if (pass_by_reference_p)
4354 addr = build_va_arg_indirect_ref (addr);
4356 return build_va_arg_indirect_ref (addr);
4359 /* Save parameter registers starting with the register that corresponds
4360 to the first unnamed parameters. If the first unnamed parameter is
4361 in the stack then save no registers. Set pretend_args_size to the
4362 amount of space needed to save the registers. */
4364 spu_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
4365 tree type, int *pretend_size, int no_rtl)
4372 int ncum = *get_cumulative_args (cum);
4374 /* cum currently points to the last named argument, we want to
4375 start at the next argument. */
4376 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4378 offset = -STACK_POINTER_OFFSET;
4379 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4381 tmp = gen_frame_mem (V4SImode,
4382 plus_constant (Pmode, virtual_incoming_args_rtx,
4384 emit_move_insn (tmp,
4385 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4388 *pretend_size = offset + STACK_POINTER_OFFSET;
4393 spu_conditional_register_usage (void)
4397 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4398 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4402 /* This is called any time we inspect the alignment of a register for
4405 reg_aligned_for_addr (rtx x)
4408 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4409 return REGNO_POINTER_ALIGN (regno) >= 128;
4412 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4413 into its SYMBOL_REF_FLAGS. */
4415 spu_encode_section_info (tree decl, rtx rtl, int first)
4417 default_encode_section_info (decl, rtl, first);
4419 /* If a variable has a forced alignment to < 16 bytes, mark it with
4420 SYMBOL_FLAG_ALIGN1. */
4421 if (TREE_CODE (decl) == VAR_DECL
4422 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4423 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4426 /* Return TRUE if we are certain the mem refers to a complete object
4427 which is both 16-byte aligned and padded to a 16-byte boundary. This
4428 would make it safe to store with a single instruction.
4429 We guarantee the alignment and padding for static objects by aligning
4430 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4431 FIXME: We currently cannot guarantee this for objects on the stack
4432 because assign_parm_setup_stack calls assign_stack_local with the
4433 alignment of the parameter mode and in that case the alignment never
4434 gets adjusted by LOCAL_ALIGNMENT. */
4436 store_with_one_insn_p (rtx mem)
4438 enum machine_mode mode = GET_MODE (mem);
4439 rtx addr = XEXP (mem, 0);
4440 if (mode == BLKmode)
4442 if (GET_MODE_SIZE (mode) >= 16)
4444 /* Only static objects. */
4445 if (GET_CODE (addr) == SYMBOL_REF)
4447 /* We use the associated declaration to make sure the access is
4448 referring to the whole object.
4449 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4450 if it is necessary. Will there be cases where one exists, and
4451 the other does not? Will there be cases where both exist, but
4452 have different types? */
4453 tree decl = MEM_EXPR (mem);
4455 && TREE_CODE (decl) == VAR_DECL
4456 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4458 decl = SYMBOL_REF_DECL (addr);
4460 && TREE_CODE (decl) == VAR_DECL
4461 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4467 /* Return 1 when the address is not valid for a simple load and store as
4468 required by the '_mov*' patterns. We could make this less strict
4469 for loads, but we prefer mem's to look the same so they are more
4470 likely to be merged. */
4472 address_needs_split (rtx mem)
4474 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4475 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4476 || !(store_with_one_insn_p (mem)
4477 || mem_is_padded_component_ref (mem))))
4483 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4484 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4485 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4487 /* MEM is known to be an __ea qualified memory access. Emit a call to
4488 fetch the ppu memory to local store, and return its address in local
4492 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4496 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4497 if (!cache_fetch_dirty)
4498 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4499 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4500 2, ea_addr, EAmode, ndirty, SImode);
4505 cache_fetch = init_one_libfunc ("__cache_fetch");
4506 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4507 1, ea_addr, EAmode);
4511 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4512 dirty bit marking, inline.
4514 The cache control data structure is an array of
4516 struct __cache_tag_array
4518 unsigned int tag_lo[4];
4519 unsigned int tag_hi[4];
4520 void *data_pointer[4];
4522 vector unsigned short dirty_bits[4];
4526 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4530 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4531 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4532 rtx index_mask = gen_reg_rtx (SImode);
4533 rtx tag_arr = gen_reg_rtx (Pmode);
4534 rtx splat_mask = gen_reg_rtx (TImode);
4535 rtx splat = gen_reg_rtx (V4SImode);
4536 rtx splat_hi = NULL_RTX;
4537 rtx tag_index = gen_reg_rtx (Pmode);
4538 rtx block_off = gen_reg_rtx (SImode);
4539 rtx tag_addr = gen_reg_rtx (Pmode);
4540 rtx tag = gen_reg_rtx (V4SImode);
4541 rtx cache_tag = gen_reg_rtx (V4SImode);
4542 rtx cache_tag_hi = NULL_RTX;
4543 rtx cache_ptrs = gen_reg_rtx (TImode);
4544 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4545 rtx tag_equal = gen_reg_rtx (V4SImode);
4546 rtx tag_equal_hi = NULL_RTX;
4547 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4548 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4549 rtx eq_index = gen_reg_rtx (SImode);
4550 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4552 if (spu_ea_model != 32)
4554 splat_hi = gen_reg_rtx (V4SImode);
4555 cache_tag_hi = gen_reg_rtx (V4SImode);
4556 tag_equal_hi = gen_reg_rtx (V4SImode);
4559 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4560 emit_move_insn (tag_arr, tag_arr_sym);
4561 v = 0x0001020300010203LL;
4562 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4563 ea_addr_si = ea_addr;
4564 if (spu_ea_model != 32)
4565 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4567 /* tag_index = ea_addr & (tag_array_size - 128) */
4568 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4570 /* splat ea_addr to all 4 slots. */
4571 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4572 /* Similarly for high 32 bits of ea_addr. */
4573 if (spu_ea_model != 32)
4574 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4576 /* block_off = ea_addr & 127 */
4577 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4579 /* tag_addr = tag_arr + tag_index */
4580 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4582 /* Read cache tags. */
4583 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4584 if (spu_ea_model != 32)
4585 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4586 plus_constant (Pmode,
4589 /* tag = ea_addr & -128 */
4590 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4592 /* Read all four cache data pointers. */
4593 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4594 plus_constant (Pmode,
4598 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4599 if (spu_ea_model != 32)
4601 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4602 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4605 /* At most one of the tags compare equal, so tag_equal has one
4606 32-bit slot set to all 1's, with the other slots all zero.
4607 gbb picks off low bit from each byte in the 128-bit registers,
4608 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4610 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4611 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4613 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4614 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4616 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4617 (rotating eq_index mod 16 bytes). */
4618 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4619 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4621 /* Add block offset to form final data address. */
4622 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4624 /* Check that we did hit. */
4625 hit_label = gen_label_rtx ();
4626 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4627 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4628 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4629 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4631 /* Say that this branch is very likely to happen. */
4632 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4633 add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
4635 ea_load_store (mem, is_store, ea_addr, data_addr);
4636 cont_label = gen_label_rtx ();
4637 emit_jump_insn (gen_jump (cont_label));
4640 emit_label (hit_label);
4645 rtx dirty_bits = gen_reg_rtx (TImode);
4646 rtx dirty_off = gen_reg_rtx (SImode);
4647 rtx dirty_128 = gen_reg_rtx (TImode);
4648 rtx neg_block_off = gen_reg_rtx (SImode);
4650 /* Set up mask with one dirty bit per byte of the mem we are
4651 writing, starting from top bit. */
4653 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4654 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4659 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4661 /* Form index into cache dirty_bits. eq_index is one of
4662 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4663 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4664 offset to each of the four dirty_bits elements. */
4665 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4667 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4669 /* Rotate bit mask to proper bit. */
4670 emit_insn (gen_negsi2 (neg_block_off, block_off));
4671 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4672 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4674 /* Or in the new dirty bits. */
4675 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4678 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4681 emit_label (cont_label);
4685 expand_ea_mem (rtx mem, bool is_store)
4688 rtx data_addr = gen_reg_rtx (Pmode);
4691 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4692 if (optimize_size || optimize == 0)
4693 ea_load_store (mem, is_store, ea_addr, data_addr);
4695 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4697 if (ea_alias_set == -1)
4698 ea_alias_set = new_alias_set ();
4700 /* We generate a new MEM RTX to refer to the copy of the data
4701 in the cache. We do not copy memory attributes (except the
4702 alignment) from the original MEM, as they may no longer apply
4703 to the cache copy. */
4704 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4705 set_mem_alias_set (new_mem, ea_alias_set);
4706 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4712 spu_expand_mov (rtx * ops, enum machine_mode mode)
4714 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4716 /* Perform the move in the destination SUBREG's inner mode. */
4717 ops[0] = SUBREG_REG (ops[0]);
4718 mode = GET_MODE (ops[0]);
4719 ops[1] = gen_lowpart_common (mode, ops[1]);
4720 gcc_assert (ops[1]);
4723 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4725 rtx from = SUBREG_REG (ops[1]);
4726 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4728 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4729 && GET_MODE_CLASS (imode) == MODE_INT
4730 && subreg_lowpart_p (ops[1]));
4732 if (GET_MODE_SIZE (imode) < 4)
4734 if (imode != GET_MODE (from))
4735 from = gen_rtx_SUBREG (imode, from, 0);
4737 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4739 enum insn_code icode = convert_optab_handler (trunc_optab,
4741 emit_insn (GEN_FCN (icode) (ops[0], from));
4744 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4748 /* At least one of the operands needs to be a register. */
4749 if ((reload_in_progress | reload_completed) == 0
4750 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4752 rtx temp = force_reg (mode, ops[1]);
4753 emit_move_insn (ops[0], temp);
4756 if (reload_in_progress || reload_completed)
4758 if (CONSTANT_P (ops[1]))
4759 return spu_split_immediate (ops);
4763 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4765 if (GET_CODE (ops[1]) == CONST_INT)
4767 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4768 if (val != INTVAL (ops[1]))
4770 emit_move_insn (ops[0], GEN_INT (val));
4776 if (MEM_ADDR_SPACE (ops[0]))
4777 ops[0] = expand_ea_mem (ops[0], true);
4778 return spu_split_store (ops);
4782 if (MEM_ADDR_SPACE (ops[1]))
4783 ops[1] = expand_ea_mem (ops[1], false);
4784 return spu_split_load (ops);
4791 spu_convert_move (rtx dst, rtx src)
4793 enum machine_mode mode = GET_MODE (dst);
4794 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4796 gcc_assert (GET_MODE (src) == TImode);
4797 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4798 emit_insn (gen_rtx_SET (VOIDmode, reg,
4799 gen_rtx_TRUNCATE (int_mode,
4800 gen_rtx_LSHIFTRT (TImode, src,
4801 GEN_INT (int_mode == DImode ? 64 : 96)))));
4802 if (int_mode != mode)
4804 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4805 emit_move_insn (dst, reg);
4809 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4810 the address from SRC and SRC+16. Return a REG or CONST_INT that
4811 specifies how many bytes to rotate the loaded registers, plus any
4812 extra from EXTRA_ROTQBY. The address and rotate amounts are
4813 normalized to improve merging of loads and rotate computations. */
4815 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4817 rtx addr = XEXP (src, 0);
4818 rtx p0, p1, rot, addr0, addr1;
4824 if (MEM_ALIGN (src) >= 128)
4825 /* Address is already aligned; simply perform a TImode load. */ ;
4826 else if (GET_CODE (addr) == PLUS)
4829 aligned reg + aligned reg => lqx
4830 aligned reg + unaligned reg => lqx, rotqby
4831 aligned reg + aligned const => lqd
4832 aligned reg + unaligned const => lqd, rotqbyi
4833 unaligned reg + aligned reg => lqx, rotqby
4834 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4835 unaligned reg + aligned const => lqd, rotqby
4836 unaligned reg + unaligned const -> not allowed by legitimate address
4838 p0 = XEXP (addr, 0);
4839 p1 = XEXP (addr, 1);
4840 if (!reg_aligned_for_addr (p0))
4842 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4844 rot = gen_reg_rtx (SImode);
4845 emit_insn (gen_addsi3 (rot, p0, p1));
4847 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4851 && INTVAL (p1) * BITS_PER_UNIT
4852 < REGNO_POINTER_ALIGN (REGNO (p0)))
4854 rot = gen_reg_rtx (SImode);
4855 emit_insn (gen_addsi3 (rot, p0, p1));
4860 rtx x = gen_reg_rtx (SImode);
4861 emit_move_insn (x, p1);
4862 if (!spu_arith_operand (p1, SImode))
4864 rot = gen_reg_rtx (SImode);
4865 emit_insn (gen_addsi3 (rot, p0, p1));
4866 addr = gen_rtx_PLUS (Pmode, p0, x);
4874 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4876 rot_amt = INTVAL (p1) & 15;
4877 if (INTVAL (p1) & -16)
4879 p1 = GEN_INT (INTVAL (p1) & -16);
4880 addr = gen_rtx_PLUS (SImode, p0, p1);
4885 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4889 else if (REG_P (addr))
4891 if (!reg_aligned_for_addr (addr))
4894 else if (GET_CODE (addr) == CONST)
4896 if (GET_CODE (XEXP (addr, 0)) == PLUS
4897 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4898 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4900 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4902 addr = gen_rtx_CONST (Pmode,
4903 gen_rtx_PLUS (Pmode,
4904 XEXP (XEXP (addr, 0), 0),
4905 GEN_INT (rot_amt & -16)));
4907 addr = XEXP (XEXP (addr, 0), 0);
4911 rot = gen_reg_rtx (Pmode);
4912 emit_move_insn (rot, addr);
4915 else if (GET_CODE (addr) == CONST_INT)
4917 rot_amt = INTVAL (addr);
4918 addr = GEN_INT (rot_amt & -16);
4920 else if (!ALIGNED_SYMBOL_REF_P (addr))
4922 rot = gen_reg_rtx (Pmode);
4923 emit_move_insn (rot, addr);
4926 rot_amt += extra_rotby;
4932 rtx x = gen_reg_rtx (SImode);
4933 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4937 if (!rot && rot_amt)
4938 rot = GEN_INT (rot_amt);
4940 addr0 = copy_rtx (addr);
4941 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4942 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4946 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4947 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4948 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4955 spu_split_load (rtx * ops)
4957 enum machine_mode mode = GET_MODE (ops[0]);
4958 rtx addr, load, rot;
4961 if (GET_MODE_SIZE (mode) >= 16)
4964 addr = XEXP (ops[1], 0);
4965 gcc_assert (GET_CODE (addr) != AND);
4967 if (!address_needs_split (ops[1]))
4969 ops[1] = change_address (ops[1], TImode, addr);
4970 load = gen_reg_rtx (TImode);
4971 emit_insn (gen__movti (load, ops[1]));
4972 spu_convert_move (ops[0], load);
4976 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4978 load = gen_reg_rtx (TImode);
4979 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4982 emit_insn (gen_rotqby_ti (load, load, rot));
4984 spu_convert_move (ops[0], load);
4989 spu_split_store (rtx * ops)
4991 enum machine_mode mode = GET_MODE (ops[0]);
4993 rtx addr, p0, p1, p1_lo, smem;
4997 if (GET_MODE_SIZE (mode) >= 16)
5000 addr = XEXP (ops[0], 0);
5001 gcc_assert (GET_CODE (addr) != AND);
5003 if (!address_needs_split (ops[0]))
5005 reg = gen_reg_rtx (TImode);
5006 emit_insn (gen_spu_convert (reg, ops[1]));
5007 ops[0] = change_address (ops[0], TImode, addr);
5008 emit_move_insn (ops[0], reg);
5012 if (GET_CODE (addr) == PLUS)
5015 aligned reg + aligned reg => lqx, c?x, shuf, stqx
5016 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
5017 aligned reg + aligned const => lqd, c?d, shuf, stqx
5018 aligned reg + unaligned const => lqd, c?d, shuf, stqx
5019 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
5020 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
5021 unaligned reg + aligned const => lqd, c?d, shuf, stqx
5022 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
5025 p0 = XEXP (addr, 0);
5026 p1 = p1_lo = XEXP (addr, 1);
5027 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
5029 p1_lo = GEN_INT (INTVAL (p1) & 15);
5030 if (reg_aligned_for_addr (p0))
5032 p1 = GEN_INT (INTVAL (p1) & -16);
5033 if (p1 == const0_rtx)
5036 addr = gen_rtx_PLUS (SImode, p0, p1);
5040 rtx x = gen_reg_rtx (SImode);
5041 emit_move_insn (x, p1);
5042 addr = gen_rtx_PLUS (SImode, p0, x);
5046 else if (REG_P (addr))
5050 p1 = p1_lo = const0_rtx;
5055 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
5056 p1 = 0; /* aform doesn't use p1 */
5058 if (ALIGNED_SYMBOL_REF_P (addr))
5060 else if (GET_CODE (addr) == CONST
5061 && GET_CODE (XEXP (addr, 0)) == PLUS
5062 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
5063 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5065 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
5067 addr = gen_rtx_CONST (Pmode,
5068 gen_rtx_PLUS (Pmode,
5069 XEXP (XEXP (addr, 0), 0),
5070 GEN_INT (v & -16)));
5072 addr = XEXP (XEXP (addr, 0), 0);
5073 p1_lo = GEN_INT (v & 15);
5075 else if (GET_CODE (addr) == CONST_INT)
5077 p1_lo = GEN_INT (INTVAL (addr) & 15);
5078 addr = GEN_INT (INTVAL (addr) & -16);
5082 p1_lo = gen_reg_rtx (SImode);
5083 emit_move_insn (p1_lo, addr);
5087 gcc_assert (aform == 0 || aform == 1);
5088 reg = gen_reg_rtx (TImode);
5090 scalar = store_with_one_insn_p (ops[0]);
5093 /* We could copy the flags from the ops[0] MEM to mem here,
5094 We don't because we want this load to be optimized away if
5095 possible, and copying the flags will prevent that in certain
5096 cases, e.g. consider the volatile flag. */
5098 rtx pat = gen_reg_rtx (TImode);
5099 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
5100 set_mem_alias_set (lmem, 0);
5101 emit_insn (gen_movti (reg, lmem));
5103 if (!p0 || reg_aligned_for_addr (p0))
5104 p0 = stack_pointer_rtx;
5108 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
5109 emit_insn (gen_shufb (reg, ops[1], reg, pat));
5113 if (GET_CODE (ops[1]) == REG)
5114 emit_insn (gen_spu_convert (reg, ops[1]));
5115 else if (GET_CODE (ops[1]) == SUBREG)
5116 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
5121 if (GET_MODE_SIZE (mode) < 4 && scalar)
5122 emit_insn (gen_ashlti3
5123 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
5125 smem = change_address (ops[0], TImode, copy_rtx (addr));
5126 /* We can't use the previous alias set because the memory has changed
5127 size and can potentially overlap objects of other types. */
5128 set_mem_alias_set (smem, 0);
5130 emit_insn (gen_movti (smem, reg));
5134 /* Return TRUE if X is MEM which is a struct member reference
5135 and the member can safely be loaded and stored with a single
5136 instruction because it is padded. */
5138 mem_is_padded_component_ref (rtx x)
5140 tree t = MEM_EXPR (x);
5142 if (!t || TREE_CODE (t) != COMPONENT_REF)
5144 t = TREE_OPERAND (t, 1);
5145 if (!t || TREE_CODE (t) != FIELD_DECL
5146 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
5148 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5149 r = DECL_FIELD_CONTEXT (t);
5150 if (!r || TREE_CODE (r) != RECORD_TYPE)
5152 /* Make sure they are the same mode */
5153 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5155 /* If there are no following fields then the field alignment assures
5156 the structure is padded to the alignment which means this field is
5158 if (TREE_CHAIN (t) == 0)
5160 /* If the following field is also aligned then this field will be
5163 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5168 /* Parse the -mfixed-range= option string. */
5170 fix_range (const char *const_str)
5173 char *str, *dash, *comma;
5175 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5176 REG2 are either register names or register numbers. The effect
5177 of this option is to mark the registers in the range from REG1 to
5178 REG2 as ``fixed'' so they won't be used by the compiler. */
5180 i = strlen (const_str);
5181 str = (char *) alloca (i + 1);
5182 memcpy (str, const_str, i + 1);
5186 dash = strchr (str, '-');
5189 warning (0, "value of -mfixed-range must have form REG1-REG2");
5193 comma = strchr (dash + 1, ',');
5197 first = decode_reg_name (str);
5200 warning (0, "unknown register name: %s", str);
5204 last = decode_reg_name (dash + 1);
5207 warning (0, "unknown register name: %s", dash + 1);
5215 warning (0, "%s-%s is an empty range", str, dash + 1);
5219 for (i = first; i <= last; ++i)
5220 fixed_regs[i] = call_used_regs[i] = 1;
5230 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5231 can be generated using the fsmbi instruction. */
5233 fsmbi_const_p (rtx x)
5237 /* We can always choose TImode for CONST_INT because the high bits
5238 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5239 enum immediate_class c = classify_immediate (x, TImode);
5240 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
5245 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5246 can be generated using the cbd, chd, cwd or cdd instruction. */
5248 cpat_const_p (rtx x, enum machine_mode mode)
5252 enum immediate_class c = classify_immediate (x, mode);
5253 return c == IC_CPAT;
5259 gen_cpat_const (rtx * ops)
5261 unsigned char dst[16];
5262 int i, offset, shift, isize;
5263 if (GET_CODE (ops[3]) != CONST_INT
5264 || GET_CODE (ops[2]) != CONST_INT
5265 || (GET_CODE (ops[1]) != CONST_INT
5266 && GET_CODE (ops[1]) != REG))
5268 if (GET_CODE (ops[1]) == REG
5269 && (!REG_POINTER (ops[1])
5270 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5273 for (i = 0; i < 16; i++)
5275 isize = INTVAL (ops[3]);
5278 else if (isize == 2)
5282 offset = (INTVAL (ops[2]) +
5283 (GET_CODE (ops[1]) ==
5284 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5285 for (i = 0; i < isize; i++)
5286 dst[offset + i] = i + shift;
5287 return array_to_constant (TImode, dst);
5290 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5291 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5292 than 16 bytes, the value is repeated across the rest of the array. */
5294 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5299 memset (arr, 0, 16);
5300 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5301 if (GET_CODE (x) == CONST_INT
5302 || (GET_CODE (x) == CONST_DOUBLE
5303 && (mode == SFmode || mode == DFmode)))
5305 gcc_assert (mode != VOIDmode && mode != BLKmode);
5307 if (GET_CODE (x) == CONST_DOUBLE)
5308 val = const_double_to_hwint (x);
5311 first = GET_MODE_SIZE (mode) - 1;
5312 for (i = first; i >= 0; i--)
5314 arr[i] = val & 0xff;
5317 /* Splat the constant across the whole array. */
5318 for (j = 0, i = first + 1; i < 16; i++)
5321 j = (j == first) ? 0 : j + 1;
5324 else if (GET_CODE (x) == CONST_DOUBLE)
5326 val = CONST_DOUBLE_LOW (x);
5327 for (i = 15; i >= 8; i--)
5329 arr[i] = val & 0xff;
5332 val = CONST_DOUBLE_HIGH (x);
5333 for (i = 7; i >= 0; i--)
5335 arr[i] = val & 0xff;
5339 else if (GET_CODE (x) == CONST_VECTOR)
5343 mode = GET_MODE_INNER (mode);
5344 units = CONST_VECTOR_NUNITS (x);
5345 for (i = 0; i < units; i++)
5347 elt = CONST_VECTOR_ELT (x, i);
5348 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5350 if (GET_CODE (elt) == CONST_DOUBLE)
5351 val = const_double_to_hwint (elt);
5354 first = GET_MODE_SIZE (mode) - 1;
5355 if (first + i * GET_MODE_SIZE (mode) > 16)
5357 for (j = first; j >= 0; j--)
5359 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5369 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5370 smaller than 16 bytes, use the bytes that would represent that value
5371 in a register, e.g., for QImode return the value of arr[3]. */
5373 array_to_constant (enum machine_mode mode, const unsigned char arr[16])
5375 enum machine_mode inner_mode;
5377 int units, size, i, j, k;
5380 if (GET_MODE_CLASS (mode) == MODE_INT
5381 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5383 j = GET_MODE_SIZE (mode);
5384 i = j < 4 ? 4 - j : 0;
5385 for (val = 0; i < j; i++)
5386 val = (val << 8) | arr[i];
5387 val = trunc_int_for_mode (val, mode);
5388 return GEN_INT (val);
5394 for (i = high = 0; i < 8; i++)
5395 high = (high << 8) | arr[i];
5396 for (i = 8, val = 0; i < 16; i++)
5397 val = (val << 8) | arr[i];
5398 return immed_double_const (val, high, TImode);
5402 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5403 val = trunc_int_for_mode (val, SImode);
5404 return hwint_to_const_double (SFmode, val);
5408 for (i = 0, val = 0; i < 8; i++)
5409 val = (val << 8) | arr[i];
5410 return hwint_to_const_double (DFmode, val);
5413 if (!VECTOR_MODE_P (mode))
5416 units = GET_MODE_NUNITS (mode);
5417 size = GET_MODE_UNIT_SIZE (mode);
5418 inner_mode = GET_MODE_INNER (mode);
5419 v = rtvec_alloc (units);
5421 for (k = i = 0; i < units; ++i)
5424 for (j = 0; j < size; j++, k++)
5425 val = (val << 8) | arr[k];
5427 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5428 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5430 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5435 return gen_rtx_CONST_VECTOR (mode, v);
5439 reloc_diagnostic (rtx x)
5442 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5445 if (GET_CODE (x) == SYMBOL_REF)
5446 decl = SYMBOL_REF_DECL (x);
5447 else if (GET_CODE (x) == CONST
5448 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5449 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5451 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5452 if (decl && !DECL_P (decl))
5455 /* The decl could be a string constant. */
5456 if (decl && DECL_P (decl))
5459 /* We use last_assemble_variable_decl to get line information. It's
5460 not always going to be right and might not even be close, but will
5461 be right for the more common cases. */
5462 if (!last_assemble_variable_decl || in_section == ctors_section)
5463 loc = DECL_SOURCE_LOCATION (decl);
5465 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5467 if (TARGET_WARN_RELOC)
5469 "creating run-time relocation for %qD", decl);
5472 "creating run-time relocation for %qD", decl);
5476 if (TARGET_WARN_RELOC)
5477 warning_at (input_location, 0, "creating run-time relocation");
5479 error_at (input_location, "creating run-time relocation");
5483 /* Hook into assemble_integer so we can generate an error for run-time
5484 relocations. The SPU ABI disallows them. */
5486 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5488 /* By default run-time relocations aren't supported, but we allow them
5489 in case users support it in their own run-time loader. And we provide
5490 a warning for those users that don't. */
5491 if ((GET_CODE (x) == SYMBOL_REF)
5492 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5493 reloc_diagnostic (x);
5495 return default_assemble_integer (x, size, aligned_p);
5499 spu_asm_globalize_label (FILE * file, const char *name)
5501 fputs ("\t.global\t", file);
5502 assemble_name (file, name);
5507 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
5508 int opno ATTRIBUTE_UNUSED, int *total,
5509 bool speed ATTRIBUTE_UNUSED)
5511 enum machine_mode mode = GET_MODE (x);
5512 int cost = COSTS_N_INSNS (2);
5514 /* Folding to a CONST_VECTOR will use extra space but there might
5515 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5516 only if it allows us to fold away multiple insns. Changing the cost
5517 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5518 because this cost will only be compared against a single insn.
5519 if (code == CONST_VECTOR)
5520 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5523 /* Use defaults for float operations. Not accurate but good enough. */
5526 *total = COSTS_N_INSNS (13);
5531 *total = COSTS_N_INSNS (6);
5537 if (satisfies_constraint_K (x))
5539 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5540 *total = COSTS_N_INSNS (1);
5542 *total = COSTS_N_INSNS (3);
5546 *total = COSTS_N_INSNS (3);
5551 *total = COSTS_N_INSNS (0);
5555 *total = COSTS_N_INSNS (5);
5559 case FLOAT_TRUNCATE:
5561 case UNSIGNED_FLOAT:
5564 *total = COSTS_N_INSNS (7);
5570 *total = COSTS_N_INSNS (9);
5577 GET_CODE (XEXP (x, 0)) ==
5578 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5579 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5581 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5583 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5584 cost = COSTS_N_INSNS (14);
5585 if ((val & 0xffff) == 0)
5586 cost = COSTS_N_INSNS (9);
5587 else if (val > 0 && val < 0x10000)
5588 cost = COSTS_N_INSNS (11);
5597 *total = COSTS_N_INSNS (20);
5604 *total = COSTS_N_INSNS (4);
5607 if (XINT (x, 1) == UNSPEC_CONVERT)
5608 *total = COSTS_N_INSNS (0);
5610 *total = COSTS_N_INSNS (4);
5613 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5614 if (GET_MODE_CLASS (mode) == MODE_INT
5615 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5616 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5617 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5622 static enum machine_mode
5623 spu_unwind_word_mode (void)
5628 /* Decide whether we can make a sibling call to a function. DECL is the
5629 declaration of the function being targeted by the call and EXP is the
5630 CALL_EXPR representing the call. */
5632 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5634 return decl && !TARGET_LARGE_MEM;
5637 /* We need to correctly update the back chain pointer and the Available
5638 Stack Size (which is in the second slot of the sp register.) */
5640 spu_allocate_stack (rtx op0, rtx op1)
5643 rtx chain = gen_reg_rtx (V4SImode);
5644 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5645 rtx sp = gen_reg_rtx (V4SImode);
5646 rtx splatted = gen_reg_rtx (V4SImode);
5647 rtx pat = gen_reg_rtx (TImode);
5649 /* copy the back chain so we can save it back again. */
5650 emit_move_insn (chain, stack_bot);
5652 op1 = force_reg (SImode, op1);
5654 v = 0x1020300010203ll;
5655 emit_move_insn (pat, immed_double_const (v, v, TImode));
5656 emit_insn (gen_shufb (splatted, op1, op1, pat));
5658 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5659 emit_insn (gen_subv4si3 (sp, sp, splatted));
5661 if (flag_stack_check)
5663 rtx avail = gen_reg_rtx(SImode);
5664 rtx result = gen_reg_rtx(SImode);
5665 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5666 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5667 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5670 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5672 emit_move_insn (stack_bot, chain);
5674 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5678 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5680 static unsigned char arr[16] =
5681 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5682 rtx temp = gen_reg_rtx (SImode);
5683 rtx temp2 = gen_reg_rtx (SImode);
5684 rtx temp3 = gen_reg_rtx (V4SImode);
5685 rtx temp4 = gen_reg_rtx (V4SImode);
5686 rtx pat = gen_reg_rtx (TImode);
5687 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5689 /* Restore the backchain from the first word, sp from the second. */
5690 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5691 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5693 emit_move_insn (pat, array_to_constant (TImode, arr));
5695 /* Compute Available Stack Size for sp */
5696 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5697 emit_insn (gen_shufb (temp3, temp, temp, pat));
5699 /* Compute Available Stack Size for back chain */
5700 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5701 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5702 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5704 emit_insn (gen_addv4si3 (sp, sp, temp3));
5705 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5709 spu_init_libfuncs (void)
5711 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5712 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5713 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5714 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5715 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5716 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5717 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5718 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5719 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5720 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5721 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5722 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5724 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5725 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5727 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5728 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5729 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5730 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5731 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5732 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5733 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5734 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5735 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5736 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5737 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5738 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5740 set_optab_libfunc (smul_optab, TImode, "__multi3");
5741 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5742 set_optab_libfunc (smod_optab, TImode, "__modti3");
5743 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5744 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5745 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5748 /* Make a subreg, stripping any existing subreg. We could possibly just
5749 call simplify_subreg, but in this case we know what we want. */
5751 spu_gen_subreg (enum machine_mode mode, rtx x)
5753 if (GET_CODE (x) == SUBREG)
5755 if (GET_MODE (x) == mode)
5757 return gen_rtx_SUBREG (mode, x, 0);
5761 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5763 return (TYPE_MODE (type) == BLKmode
5765 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5766 || int_size_in_bytes (type) >
5767 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5770 /* Create the built-in types and functions */
5772 enum spu_function_code
5774 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5775 #include "spu-builtins.def"
5780 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5782 struct spu_builtin_description spu_builtins[] = {
5783 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5784 {fcode, icode, name, type, params},
5785 #include "spu-builtins.def"
5789 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5791 /* Returns the spu builtin decl for CODE. */
5794 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5796 if (code >= NUM_SPU_BUILTINS)
5797 return error_mark_node;
5799 return spu_builtin_decls[code];
5804 spu_init_builtins (void)
5806 struct spu_builtin_description *d;
5809 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5810 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5811 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5812 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5813 V4SF_type_node = build_vector_type (float_type_node, 4);
5814 V2DF_type_node = build_vector_type (double_type_node, 2);
5816 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5817 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5818 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5819 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5821 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5823 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5824 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5825 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5826 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5827 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5828 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5829 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5830 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5831 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5832 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5833 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5834 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5836 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5837 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5838 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5839 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5840 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5841 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5842 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5843 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5845 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5846 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5848 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5850 spu_builtin_types[SPU_BTI_PTR] =
5851 build_pointer_type (build_qualified_type
5853 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5855 /* For each builtin we build a new prototype. The tree code will make
5856 sure nodes are shared. */
5857 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5860 char name[64]; /* build_function will make a copy. */
5866 /* Find last parm. */
5867 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5872 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5874 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5876 sprintf (name, "__builtin_%s", d->name);
5877 spu_builtin_decls[i] =
5878 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5879 if (d->fcode == SPU_MASK_FOR_LOAD)
5880 TREE_READONLY (spu_builtin_decls[i]) = 1;
5882 /* These builtins don't throw. */
5883 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5888 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5890 static unsigned char arr[16] =
5891 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5893 rtx temp = gen_reg_rtx (Pmode);
5894 rtx temp2 = gen_reg_rtx (V4SImode);
5895 rtx temp3 = gen_reg_rtx (V4SImode);
5896 rtx pat = gen_reg_rtx (TImode);
5897 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5899 emit_move_insn (pat, array_to_constant (TImode, arr));
5901 /* Restore the sp. */
5902 emit_move_insn (temp, op1);
5903 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5905 /* Compute available stack size for sp. */
5906 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5907 emit_insn (gen_shufb (temp3, temp, temp, pat));
5909 emit_insn (gen_addv4si3 (sp, sp, temp3));
5910 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5914 spu_safe_dma (HOST_WIDE_INT channel)
5916 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5920 spu_builtin_splats (rtx ops[])
5922 enum machine_mode mode = GET_MODE (ops[0]);
5923 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5925 unsigned char arr[16];
5926 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5927 emit_move_insn (ops[0], array_to_constant (mode, arr));
5931 rtx reg = gen_reg_rtx (TImode);
5933 if (GET_CODE (ops[1]) != REG
5934 && GET_CODE (ops[1]) != SUBREG)
5935 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5941 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5947 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5952 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5957 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5963 emit_move_insn (reg, shuf);
5964 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5969 spu_builtin_extract (rtx ops[])
5971 enum machine_mode mode;
5974 mode = GET_MODE (ops[1]);
5976 if (GET_CODE (ops[2]) == CONST_INT)
5981 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5984 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5987 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5990 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5993 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5996 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
6004 from = spu_gen_subreg (TImode, ops[1]);
6005 rot = gen_reg_rtx (TImode);
6006 tmp = gen_reg_rtx (SImode);
6011 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
6014 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
6015 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
6019 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
6023 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
6028 emit_insn (gen_rotqby_ti (rot, from, tmp));
6030 emit_insn (gen_spu_convert (ops[0], rot));
6034 spu_builtin_insert (rtx ops[])
6036 enum machine_mode mode = GET_MODE (ops[0]);
6037 enum machine_mode imode = GET_MODE_INNER (mode);
6038 rtx mask = gen_reg_rtx (TImode);
6041 if (GET_CODE (ops[3]) == CONST_INT)
6042 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
6045 offset = gen_reg_rtx (SImode);
6046 emit_insn (gen_mulsi3
6047 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
6050 (mask, stack_pointer_rtx, offset,
6051 GEN_INT (GET_MODE_SIZE (imode))));
6052 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
6056 spu_builtin_promote (rtx ops[])
6058 enum machine_mode mode, imode;
6059 rtx rot, from, offset;
6062 mode = GET_MODE (ops[0]);
6063 imode = GET_MODE_INNER (mode);
6065 from = gen_reg_rtx (TImode);
6066 rot = spu_gen_subreg (TImode, ops[0]);
6068 emit_insn (gen_spu_convert (from, ops[1]));
6070 if (GET_CODE (ops[2]) == CONST_INT)
6072 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
6073 if (GET_MODE_SIZE (imode) < 4)
6074 pos += 4 - GET_MODE_SIZE (imode);
6075 offset = GEN_INT (pos & 15);
6079 offset = gen_reg_rtx (SImode);
6083 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
6086 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
6087 emit_insn (gen_addsi3 (offset, offset, offset));
6091 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
6092 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
6096 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
6102 emit_insn (gen_rotqby_ti (rot, from, offset));
6106 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
6108 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
6109 rtx shuf = gen_reg_rtx (V4SImode);
6110 rtx insn = gen_reg_rtx (V4SImode);
6115 fnaddr = force_reg (SImode, fnaddr);
6116 cxt = force_reg (SImode, cxt);
6118 if (TARGET_LARGE_MEM)
6120 rtx rotl = gen_reg_rtx (V4SImode);
6121 rtx mask = gen_reg_rtx (V4SImode);
6122 rtx bi = gen_reg_rtx (SImode);
6123 static unsigned char const shufa[16] = {
6124 2, 3, 0, 1, 18, 19, 16, 17,
6125 0, 1, 2, 3, 16, 17, 18, 19
6127 static unsigned char const insna[16] = {
6129 0x41, 0, 0, STATIC_CHAIN_REGNUM,
6131 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
6134 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
6135 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6137 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
6138 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
6139 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
6140 emit_insn (gen_selb (insn, insnc, rotl, mask));
6142 mem = adjust_address (m_tramp, V4SImode, 0);
6143 emit_move_insn (mem, insn);
6145 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
6146 mem = adjust_address (m_tramp, Pmode, 16);
6147 emit_move_insn (mem, bi);
6151 rtx scxt = gen_reg_rtx (SImode);
6152 rtx sfnaddr = gen_reg_rtx (SImode);
6153 static unsigned char const insna[16] = {
6154 0x42, 0, 0, STATIC_CHAIN_REGNUM,
6160 shufc = gen_reg_rtx (TImode);
6161 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6163 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6164 fits 18 bits and the last 4 are zeros. This will be true if
6165 the stack pointer is initialized to 0x3fff0 at program start,
6166 otherwise the ila instruction will be garbage. */
6168 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6169 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6171 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6172 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6173 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6175 mem = adjust_address (m_tramp, V4SImode, 0);
6176 emit_move_insn (mem, insn);
6178 emit_insn (gen_sync ());
6182 spu_expand_sign_extend (rtx ops[])
6184 unsigned char arr[16];
6185 rtx pat = gen_reg_rtx (TImode);
6188 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6189 if (GET_MODE (ops[1]) == QImode)
6191 sign = gen_reg_rtx (HImode);
6192 emit_insn (gen_extendqihi2 (sign, ops[1]));
6193 for (i = 0; i < 16; i++)
6199 for (i = 0; i < 16; i++)
6201 switch (GET_MODE (ops[1]))
6204 sign = gen_reg_rtx (SImode);
6205 emit_insn (gen_extendhisi2 (sign, ops[1]));
6207 arr[last - 1] = 0x02;
6210 sign = gen_reg_rtx (SImode);
6211 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6212 for (i = 0; i < 4; i++)
6213 arr[last - i] = 3 - i;
6216 sign = gen_reg_rtx (SImode);
6217 c = gen_reg_rtx (SImode);
6218 emit_insn (gen_spu_convert (c, ops[1]));
6219 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6220 for (i = 0; i < 8; i++)
6221 arr[last - i] = 7 - i;
6227 emit_move_insn (pat, array_to_constant (TImode, arr));
6228 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6231 /* expand vector initialization. If there are any constant parts,
6232 load constant parts first. Then load any non-constant parts. */
6234 spu_expand_vector_init (rtx target, rtx vals)
6236 enum machine_mode mode = GET_MODE (target);
6237 int n_elts = GET_MODE_NUNITS (mode);
6239 bool all_same = true;
6240 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
6243 first = XVECEXP (vals, 0, 0);
6244 for (i = 0; i < n_elts; ++i)
6246 x = XVECEXP (vals, 0, i);
6247 if (!(CONST_INT_P (x)
6248 || GET_CODE (x) == CONST_DOUBLE
6249 || GET_CODE (x) == CONST_FIXED))
6253 if (first_constant == NULL_RTX)
6256 if (i > 0 && !rtx_equal_p (x, first))
6260 /* if all elements are the same, use splats to repeat elements */
6263 if (!CONSTANT_P (first)
6264 && !register_operand (first, GET_MODE (x)))
6265 first = force_reg (GET_MODE (first), first);
6266 emit_insn (gen_spu_splats (target, first));
6270 /* load constant parts */
6271 if (n_var != n_elts)
6275 emit_move_insn (target,
6276 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6280 rtx constant_parts_rtx = copy_rtx (vals);
6282 gcc_assert (first_constant != NULL_RTX);
6283 /* fill empty slots with the first constant, this increases
6284 our chance of using splats in the recursive call below. */
6285 for (i = 0; i < n_elts; ++i)
6287 x = XVECEXP (constant_parts_rtx, 0, i);
6288 if (!(CONST_INT_P (x)
6289 || GET_CODE (x) == CONST_DOUBLE
6290 || GET_CODE (x) == CONST_FIXED))
6291 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6294 spu_expand_vector_init (target, constant_parts_rtx);
6298 /* load variable parts */
6301 rtx insert_operands[4];
6303 insert_operands[0] = target;
6304 insert_operands[2] = target;
6305 for (i = 0; i < n_elts; ++i)
6307 x = XVECEXP (vals, 0, i);
6308 if (!(CONST_INT_P (x)
6309 || GET_CODE (x) == CONST_DOUBLE
6310 || GET_CODE (x) == CONST_FIXED))
6312 if (!register_operand (x, GET_MODE (x)))
6313 x = force_reg (GET_MODE (x), x);
6314 insert_operands[1] = x;
6315 insert_operands[3] = GEN_INT (i);
6316 spu_builtin_insert (insert_operands);
6322 /* Return insn index for the vector compare instruction for given CODE,
6323 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6326 get_vec_cmp_insn (enum rtx_code code,
6327 enum machine_mode dest_mode,
6328 enum machine_mode op_mode)
6334 if (dest_mode == V16QImode && op_mode == V16QImode)
6335 return CODE_FOR_ceq_v16qi;
6336 if (dest_mode == V8HImode && op_mode == V8HImode)
6337 return CODE_FOR_ceq_v8hi;
6338 if (dest_mode == V4SImode && op_mode == V4SImode)
6339 return CODE_FOR_ceq_v4si;
6340 if (dest_mode == V4SImode && op_mode == V4SFmode)
6341 return CODE_FOR_ceq_v4sf;
6342 if (dest_mode == V2DImode && op_mode == V2DFmode)
6343 return CODE_FOR_ceq_v2df;
6346 if (dest_mode == V16QImode && op_mode == V16QImode)
6347 return CODE_FOR_cgt_v16qi;
6348 if (dest_mode == V8HImode && op_mode == V8HImode)
6349 return CODE_FOR_cgt_v8hi;
6350 if (dest_mode == V4SImode && op_mode == V4SImode)
6351 return CODE_FOR_cgt_v4si;
6352 if (dest_mode == V4SImode && op_mode == V4SFmode)
6353 return CODE_FOR_cgt_v4sf;
6354 if (dest_mode == V2DImode && op_mode == V2DFmode)
6355 return CODE_FOR_cgt_v2df;
6358 if (dest_mode == V16QImode && op_mode == V16QImode)
6359 return CODE_FOR_clgt_v16qi;
6360 if (dest_mode == V8HImode && op_mode == V8HImode)
6361 return CODE_FOR_clgt_v8hi;
6362 if (dest_mode == V4SImode && op_mode == V4SImode)
6363 return CODE_FOR_clgt_v4si;
6371 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6372 DMODE is expected destination mode. This is a recursive function. */
6375 spu_emit_vector_compare (enum rtx_code rcode,
6377 enum machine_mode dmode)
6381 enum machine_mode dest_mode;
6382 enum machine_mode op_mode = GET_MODE (op1);
6384 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6386 /* Floating point vector compare instructions uses destination V4SImode.
6387 Double floating point vector compare instructions uses destination V2DImode.
6388 Move destination to appropriate mode later. */
6389 if (dmode == V4SFmode)
6390 dest_mode = V4SImode;
6391 else if (dmode == V2DFmode)
6392 dest_mode = V2DImode;
6396 mask = gen_reg_rtx (dest_mode);
6397 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6399 if (vec_cmp_insn == -1)
6401 bool swap_operands = false;
6402 bool try_again = false;
6407 swap_operands = true;
6412 swap_operands = true;
6422 /* Treat A != B as ~(A==B). */
6424 enum rtx_code rev_code;
6425 enum insn_code nor_code;
6428 rev_code = reverse_condition_maybe_unordered (rcode);
6429 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6431 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6432 gcc_assert (nor_code != CODE_FOR_nothing);
6433 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6434 if (dmode != dest_mode)
6436 rtx temp = gen_reg_rtx (dest_mode);
6437 convert_move (temp, mask, 0);
6447 /* Try GT/GTU/LT/LTU OR EQ */
6450 enum insn_code ior_code;
6451 enum rtx_code new_code;
6455 case GE: new_code = GT; break;
6456 case GEU: new_code = GTU; break;
6457 case LE: new_code = LT; break;
6458 case LEU: new_code = LTU; break;
6463 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6464 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6466 ior_code = optab_handler (ior_optab, dest_mode);
6467 gcc_assert (ior_code != CODE_FOR_nothing);
6468 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6469 if (dmode != dest_mode)
6471 rtx temp = gen_reg_rtx (dest_mode);
6472 convert_move (temp, mask, 0);
6482 enum insn_code ior_code;
6484 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6485 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6487 ior_code = optab_handler (ior_optab, dest_mode);
6488 gcc_assert (ior_code != CODE_FOR_nothing);
6489 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6490 if (dmode != dest_mode)
6492 rtx temp = gen_reg_rtx (dest_mode);
6493 convert_move (temp, mask, 0);
6500 /* Implement as (A==A) & (B==B) */
6503 enum insn_code and_code;
6505 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6506 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6508 and_code = optab_handler (and_optab, dest_mode);
6509 gcc_assert (and_code != CODE_FOR_nothing);
6510 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6511 if (dmode != dest_mode)
6513 rtx temp = gen_reg_rtx (dest_mode);
6514 convert_move (temp, mask, 0);
6524 /* You only get two chances. */
6526 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6528 gcc_assert (vec_cmp_insn != -1);
6539 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6540 if (dmode != dest_mode)
6542 rtx temp = gen_reg_rtx (dest_mode);
6543 convert_move (temp, mask, 0);
6550 /* Emit vector conditional expression.
6551 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6552 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6555 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6556 rtx cond, rtx cc_op0, rtx cc_op1)
6558 enum machine_mode dest_mode = GET_MODE (dest);
6559 enum rtx_code rcode = GET_CODE (cond);
6562 /* Get the vector mask for the given relational operations. */
6563 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6565 emit_insn(gen_selb (dest, op2, op1, mask));
6571 spu_force_reg (enum machine_mode mode, rtx op)
6574 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6576 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6577 || GET_MODE (op) == BLKmode)
6578 return force_reg (mode, convert_to_mode (mode, op, 0));
6582 r = force_reg (GET_MODE (op), op);
6583 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6585 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6590 x = gen_reg_rtx (mode);
6591 emit_insn (gen_spu_convert (x, r));
6596 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6598 HOST_WIDE_INT v = 0;
6600 /* Check the range of immediate operands. */
6601 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6603 int range = p - SPU_BTI_7;
6605 if (!CONSTANT_P (op))
6606 error ("%s expects an integer literal in the range [%d, %d]",
6608 spu_builtin_range[range].low, spu_builtin_range[range].high);
6610 if (GET_CODE (op) == CONST
6611 && (GET_CODE (XEXP (op, 0)) == PLUS
6612 || GET_CODE (XEXP (op, 0)) == MINUS))
6614 v = INTVAL (XEXP (XEXP (op, 0), 1));
6615 op = XEXP (XEXP (op, 0), 0);
6617 else if (GET_CODE (op) == CONST_INT)
6619 else if (GET_CODE (op) == CONST_VECTOR
6620 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6621 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6623 /* The default for v is 0 which is valid in every range. */
6624 if (v < spu_builtin_range[range].low
6625 || v > spu_builtin_range[range].high)
6626 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6628 spu_builtin_range[range].low, spu_builtin_range[range].high,
6637 /* This is only used in lqa, and stqa. Even though the insns
6638 encode 16 bits of the address (all but the 2 least
6639 significant), only 14 bits are used because it is masked to
6640 be 16 byte aligned. */
6644 /* This is used for lqr and stqr. */
6651 if (GET_CODE (op) == LABEL_REF
6652 || (GET_CODE (op) == SYMBOL_REF
6653 && SYMBOL_REF_FUNCTION_P (op))
6654 || (v & ((1 << lsbits) - 1)) != 0)
6655 warning (0, "%d least significant bits of %s are ignored", lsbits,
6662 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6663 rtx target, rtx ops[])
6665 enum insn_code icode = (enum insn_code) d->icode;
6668 /* Expand the arguments into rtl. */
6670 if (d->parm[0] != SPU_BTI_VOID)
6673 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6675 tree arg = CALL_EXPR_ARG (exp, a);
6678 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6681 gcc_assert (i == insn_data[icode].n_generator_args);
6686 spu_expand_builtin_1 (struct spu_builtin_description *d,
6687 tree exp, rtx target)
6691 enum insn_code icode = (enum insn_code) d->icode;
6692 enum machine_mode mode, tmode;
6697 /* Set up ops[] with values from arglist. */
6698 n_operands = expand_builtin_args (d, exp, target, ops);
6700 /* Handle the target operand which must be operand 0. */
6702 if (d->parm[0] != SPU_BTI_VOID)
6705 /* We prefer the mode specified for the match_operand otherwise
6706 use the mode from the builtin function prototype. */
6707 tmode = insn_data[d->icode].operand[0].mode;
6708 if (tmode == VOIDmode)
6709 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6711 /* Try to use target because not using it can lead to extra copies
6712 and when we are using all of the registers extra copies leads
6714 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6717 target = ops[0] = gen_reg_rtx (tmode);
6719 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6725 if (d->fcode == SPU_MASK_FOR_LOAD)
6727 enum machine_mode mode = insn_data[icode].operand[1].mode;
6732 arg = CALL_EXPR_ARG (exp, 0);
6733 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6734 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6735 addr = memory_address (mode, op);
6738 op = gen_reg_rtx (GET_MODE (addr));
6739 emit_insn (gen_rtx_SET (VOIDmode, op,
6740 gen_rtx_NEG (GET_MODE (addr), addr)));
6741 op = gen_rtx_MEM (mode, op);
6743 pat = GEN_FCN (icode) (target, op);
6750 /* Ignore align_hint, but still expand it's args in case they have
6752 if (icode == CODE_FOR_spu_align_hint)
6755 /* Handle the rest of the operands. */
6756 for (p = 1; i < n_operands; i++, p++)
6758 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6759 mode = insn_data[d->icode].operand[i].mode;
6761 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6763 /* mode can be VOIDmode here for labels */
6765 /* For specific intrinsics with an immediate operand, e.g.,
6766 si_ai(), we sometimes need to convert the scalar argument to a
6767 vector argument by splatting the scalar. */
6768 if (VECTOR_MODE_P (mode)
6769 && (GET_CODE (ops[i]) == CONST_INT
6770 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6771 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6773 if (GET_CODE (ops[i]) == CONST_INT)
6774 ops[i] = spu_const (mode, INTVAL (ops[i]));
6777 rtx reg = gen_reg_rtx (mode);
6778 enum machine_mode imode = GET_MODE_INNER (mode);
6779 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6780 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6781 if (imode != GET_MODE (ops[i]))
6782 ops[i] = convert_to_mode (imode, ops[i],
6783 TYPE_UNSIGNED (spu_builtin_types
6785 emit_insn (gen_spu_splats (reg, ops[i]));
6790 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6792 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6793 ops[i] = spu_force_reg (mode, ops[i]);
6799 pat = GEN_FCN (icode) (0);
6802 pat = GEN_FCN (icode) (ops[0]);
6805 pat = GEN_FCN (icode) (ops[0], ops[1]);
6808 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6811 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6814 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6817 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6826 if (d->type == B_CALL || d->type == B_BISLED)
6827 emit_call_insn (pat);
6828 else if (d->type == B_JUMP)
6830 emit_jump_insn (pat);
6836 return_type = spu_builtin_types[d->parm[0]];
6837 if (d->parm[0] != SPU_BTI_VOID
6838 && GET_MODE (target) != TYPE_MODE (return_type))
6840 /* target is the return value. It should always be the mode of
6841 the builtin function prototype. */
6842 target = spu_force_reg (TYPE_MODE (return_type), target);
6849 spu_expand_builtin (tree exp,
6851 rtx subtarget ATTRIBUTE_UNUSED,
6852 enum machine_mode mode ATTRIBUTE_UNUSED,
6853 int ignore ATTRIBUTE_UNUSED)
6855 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6856 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6857 struct spu_builtin_description *d;
6859 if (fcode < NUM_SPU_BUILTINS)
6861 d = &spu_builtins[fcode];
6863 return spu_expand_builtin_1 (d, exp, target);
6868 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6870 spu_builtin_mul_widen_even (tree type)
6872 switch (TYPE_MODE (type))
6875 if (TYPE_UNSIGNED (type))
6876 return spu_builtin_decls[SPU_MULE_0];
6878 return spu_builtin_decls[SPU_MULE_1];
6885 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6887 spu_builtin_mul_widen_odd (tree type)
6889 switch (TYPE_MODE (type))
6892 if (TYPE_UNSIGNED (type))
6893 return spu_builtin_decls[SPU_MULO_1];
6895 return spu_builtin_decls[SPU_MULO_0];
6902 /* Implement targetm.vectorize.builtin_mask_for_load. */
6904 spu_builtin_mask_for_load (void)
6906 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6909 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6911 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6912 tree vectype ATTRIBUTE_UNUSED,
6913 int misalign ATTRIBUTE_UNUSED)
6915 switch (type_of_cost)
6923 case cond_branch_not_taken:
6925 case vec_promote_demote:
6932 /* Load + rotate. */
6935 case unaligned_load:
6938 case cond_branch_taken:
6946 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6947 after applying N number of iterations. This routine does not determine
6948 how may iterations are required to reach desired alignment. */
6951 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6956 /* All other types are naturally aligned. */
6960 /* Return the appropriate mode for a named address pointer. */
6961 static enum machine_mode
6962 spu_addr_space_pointer_mode (addr_space_t addrspace)
6966 case ADDR_SPACE_GENERIC:
6975 /* Return the appropriate mode for a named address address. */
6976 static enum machine_mode
6977 spu_addr_space_address_mode (addr_space_t addrspace)
6981 case ADDR_SPACE_GENERIC:
6990 /* Determine if one named address space is a subset of another. */
6993 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6995 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6996 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6998 if (subset == superset)
7001 /* If we have -mno-address-space-conversion, treat __ea and generic as not
7002 being subsets but instead as disjoint address spaces. */
7003 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
7007 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
7010 /* Convert from one address space to another. */
7012 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
7014 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
7015 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
7017 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
7018 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
7020 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
7024 ls = gen_const_mem (DImode,
7025 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
7026 set_mem_align (ls, 128);
7028 result = gen_reg_rtx (Pmode);
7029 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
7030 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
7031 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
7032 ls, const0_rtx, Pmode, 1);
7034 emit_insn (gen_subsi3 (result, op, ls));
7039 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
7043 ls = gen_const_mem (DImode,
7044 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
7045 set_mem_align (ls, 128);
7047 result = gen_reg_rtx (EAmode);
7048 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
7049 op = force_reg (Pmode, op);
7050 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
7051 ls, const0_rtx, EAmode, 1);
7052 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
7054 if (EAmode == SImode)
7055 emit_insn (gen_addsi3 (result, op, ls));
7057 emit_insn (gen_adddi3 (result, op, ls));
7067 /* Count the total number of instructions in each pipe and return the
7068 maximum, which is used as the Minimum Iteration Interval (MII)
7069 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
7070 -2 are instructions that can go in pipe0 or pipe1. */
7072 spu_sms_res_mii (struct ddg *g)
7075 unsigned t[4] = {0, 0, 0, 0};
7077 for (i = 0; i < g->num_nodes; i++)
7079 rtx insn = g->nodes[i].insn;
7080 int p = get_pipe (insn) + 2;
7082 gcc_assert (p >= 0);
7086 if (dump_file && INSN_P (insn))
7087 fprintf (dump_file, "i%d %s %d %d\n",
7089 insn_data[INSN_CODE(insn)].name,
7093 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
7095 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
7100 spu_init_expanders (void)
7105 /* HARD_FRAME_REGISTER is only 128 bit aligned when
7106 frame_pointer_needed is true. We don't know that until we're
7107 expanding the prologue. */
7108 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
7110 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
7111 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
7112 to be treated as aligned, so generate them here. */
7113 r0 = gen_reg_rtx (SImode);
7114 r1 = gen_reg_rtx (SImode);
7115 mark_reg_pointer (r0, 128);
7116 mark_reg_pointer (r1, 128);
7117 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
7118 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
7122 static enum machine_mode
7123 spu_libgcc_cmp_return_mode (void)
7126 /* For SPU word mode is TI mode so it is better to use SImode
7127 for compare returns. */
7131 static enum machine_mode
7132 spu_libgcc_shift_count_mode (void)
7134 /* For SPU word mode is TI mode so it is better to use SImode
7135 for shift counts. */
7139 /* Implement targetm.section_type_flags. */
7141 spu_section_type_flags (tree decl, const char *name, int reloc)
7143 /* .toe needs to have type @nobits. */
7144 if (strcmp (name, ".toe") == 0)
7146 /* Don't load _ea into the current address space. */
7147 if (strcmp (name, "._ea") == 0)
7148 return SECTION_WRITE | SECTION_DEBUG;
7149 return default_section_type_flags (decl, name, reloc);
7152 /* Implement targetm.select_section. */
7154 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
7156 /* Variables and constants defined in the __ea address space
7157 go into a special section named "._ea". */
7158 if (TREE_TYPE (decl) != error_mark_node
7159 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
7161 /* We might get called with string constants, but get_named_section
7162 doesn't like them as they are not DECLs. Also, we need to set
7163 flags in that case. */
7165 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7167 return get_named_section (decl, "._ea", reloc);
7170 return default_elf_select_section (decl, reloc, align);
7173 /* Implement targetm.unique_section. */
7175 spu_unique_section (tree decl, int reloc)
7177 /* We don't support unique section names in the __ea address
7179 if (TREE_TYPE (decl) != error_mark_node
7180 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7183 default_unique_section (decl, reloc);
7186 /* Generate a constant or register which contains 2^SCALE. We assume
7187 the result is valid for MODE. Currently, MODE must be V4SFmode and
7188 SCALE must be SImode. */
7190 spu_gen_exp2 (enum machine_mode mode, rtx scale)
7192 gcc_assert (mode == V4SFmode);
7193 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7194 if (GET_CODE (scale) != CONST_INT)
7196 /* unsigned int exp = (127 + scale) << 23;
7197 __vector float m = (__vector float) spu_splats (exp); */
7198 rtx reg = force_reg (SImode, scale);
7199 rtx exp = gen_reg_rtx (SImode);
7200 rtx mul = gen_reg_rtx (mode);
7201 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7202 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7203 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7208 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7209 unsigned char arr[16];
7210 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7211 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7212 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7213 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7214 return array_to_constant (mode, arr);
7218 /* After reload, just change the convert into a move instruction
7219 or a dead instruction. */
7221 spu_split_convert (rtx ops[])
7223 if (REGNO (ops[0]) == REGNO (ops[1]))
7224 emit_note (NOTE_INSN_DELETED);
7227 /* Use TImode always as this might help hard reg copyprop. */
7228 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7229 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7230 emit_insn (gen_move_insn (op0, op1));
7235 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
7237 fprintf (file, "# profile\n");
7238 fprintf (file, "brsl $75, _mcount\n");
7241 /* Implement targetm.ref_may_alias_errno. */
7243 spu_ref_may_alias_errno (ao_ref *ref)
7245 tree base = ao_ref_base (ref);
7247 /* With SPU newlib, errno is defined as something like
7249 The default implementation of this target macro does not
7250 recognize such expressions, so special-code for it here. */
7252 if (TREE_CODE (base) == VAR_DECL
7253 && !TREE_STATIC (base)
7254 && DECL_EXTERNAL (base)
7255 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7256 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7257 "_impure_data") == 0
7258 /* _errno is the first member of _impure_data. */
7259 && ref->offset == 0)
7262 return default_ref_may_alias_errno (ref);
7265 /* Output thunk to FILE that implements a C++ virtual function call (with
7266 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7267 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7268 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7269 relative to the resulting this pointer. */
7272 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7273 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7278 /* Make sure unwind info is emitted for the thunk if needed. */
7279 final_start_function (emit_barrier (), file, 1);
7281 /* Operand 0 is the target function. */
7282 op[0] = XEXP (DECL_RTL (function), 0);
7284 /* Operand 1 is the 'this' pointer. */
7285 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7286 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7288 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7290 /* Operands 2/3 are the low/high halfwords of delta. */
7291 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7292 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7294 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7295 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7296 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7298 /* Operands 6/7 are temporary registers. */
7299 op[6] = gen_rtx_REG (Pmode, 79);
7300 op[7] = gen_rtx_REG (Pmode, 78);
7302 /* Add DELTA to this pointer. */
7305 if (delta >= -0x200 && delta < 0x200)
7306 output_asm_insn ("ai\t%1,%1,%2", op);
7307 else if (delta >= -0x8000 && delta < 0x8000)
7309 output_asm_insn ("il\t%6,%2", op);
7310 output_asm_insn ("a\t%1,%1,%6", op);
7314 output_asm_insn ("ilhu\t%6,%3", op);
7315 output_asm_insn ("iohl\t%6,%2", op);
7316 output_asm_insn ("a\t%1,%1,%6", op);
7320 /* Perform vcall adjustment. */
7323 output_asm_insn ("lqd\t%7,0(%1)", op);
7324 output_asm_insn ("rotqby\t%7,%7,%1", op);
7326 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7327 output_asm_insn ("ai\t%7,%7,%4", op);
7328 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7330 output_asm_insn ("il\t%6,%4", op);
7331 output_asm_insn ("a\t%7,%7,%6", op);
7335 output_asm_insn ("ilhu\t%6,%5", op);
7336 output_asm_insn ("iohl\t%6,%4", op);
7337 output_asm_insn ("a\t%7,%7,%6", op);
7340 output_asm_insn ("lqd\t%6,0(%7)", op);
7341 output_asm_insn ("rotqby\t%6,%6,%7", op);
7342 output_asm_insn ("a\t%1,%1,%6", op);
7345 /* Jump to target. */
7346 output_asm_insn ("br\t%0", op);
7348 final_end_function ();