1 /* Target Code for TI C6X
2 Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc.
3 Contributed by Andrew Jenner <andrew@codesourcery.com>
4 Contributed by Bernd Schmidt <bernds@codesourcery.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
28 #include "insn-flags.h"
30 #include "insn-attr.h"
31 #include "insn-codes.h"
37 #include "sched-int.h"
41 #include "tm-constrs.h"
44 #include "diagnostic-core.h"
46 #include "cfglayout.h"
47 #include "langhooks.h"
49 #include "target-def.h"
50 #include "sel-sched.h"
53 #include "hw-doloop.h"
54 #include "regrename.h"
56 /* Table of supported architecture variants. */
60 enum c6x_cpu_type type;
61 unsigned short features;
64 /* A list of all ISAs, mapping each one to a representative device.
65 Used for -march selection. */
66 static const c6x_arch_table all_isas[] =
68 #define C6X_ISA(NAME,DEVICE,FLAGS) \
69 { NAME, DEVICE, FLAGS },
70 #include "c6x-isas.def"
72 { NULL, C6X_CPU_C62X, 0 }
75 /* This is the parsed result of the "-march=" option, if given. */
76 enum c6x_cpu_type c6x_arch = C6X_DEFAULT_ARCH;
78 /* A mask of insn types that are allowed by the architecture selected by
80 unsigned long c6x_insn_mask = C6X_DEFAULT_INSN_MASK;
82 /* The instruction that is being output (as obtained from FINAL_PRESCAN_INSN).
84 static rtx c6x_current_insn = NULL_RTX;
86 /* A decl we build to access __c6xabi_DSBT_base. */
87 static GTY(()) tree dsbt_decl;
89 /* Determines whether we run our final scheduling pass or not. We always
90 avoid the normal second scheduling pass. */
91 static int c6x_flag_schedule_insns2;
93 /* Determines whether we run variable tracking in machine dependent
95 static int c6x_flag_var_tracking;
97 /* Determines whether we use modulo scheduling. */
98 static int c6x_flag_modulo_sched;
100 /* Record the state of flag_pic before we set it to 1 for DSBT. */
101 int c6x_initial_flag_pic;
105 /* We record the clock cycle for every insn during scheduling. */
107 /* After scheduling, we run assign_reservations to choose unit
108 reservations for all insns. These are recorded here. */
110 /* Records the new condition for insns which must be made
111 conditional after scheduling. An entry of NULL_RTX means no such
112 change is necessary. */
114 /* True for the first insn that was scheduled in an ebb. */
116 /* The scheduler state after the insn, transformed into a mask of UNIT_QID
117 bits rather than storing the state. Meaningful only for the last
119 unsigned int unit_mask;
120 } c6x_sched_insn_info;
122 DEF_VEC_O(c6x_sched_insn_info);
123 DEF_VEC_ALLOC_O(c6x_sched_insn_info, heap);
125 /* Record a c6x_sched_insn_info structure for every insn in the function. */
126 static VEC(c6x_sched_insn_info, heap) *insn_info;
128 #define INSN_INFO_LENGTH (VEC_length (c6x_sched_insn_info, insn_info))
129 #define INSN_INFO_ENTRY(N) (*VEC_index (c6x_sched_insn_info, insn_info, (N)))
131 static bool done_cfi_sections;
133 #define RESERVATION_FLAG_D 1
134 #define RESERVATION_FLAG_L 2
135 #define RESERVATION_FLAG_S 4
136 #define RESERVATION_FLAG_M 8
137 #define RESERVATION_FLAG_DL (RESERVATION_FLAG_D | RESERVATION_FLAG_L)
138 #define RESERVATION_FLAG_DS (RESERVATION_FLAG_D | RESERVATION_FLAG_S)
139 #define RESERVATION_FLAG_LS (RESERVATION_FLAG_L | RESERVATION_FLAG_S)
140 #define RESERVATION_FLAG_DLS (RESERVATION_FLAG_D | RESERVATION_FLAG_LS)
142 /* The DFA names of the units. */
143 static const char *const c6x_unit_names[] =
145 "d1", "l1", "s1", "m1", "fps1", "fpl1", "adddps1", "adddpl1",
146 "d2", "l2", "s2", "m2", "fps2", "fpl2", "adddps2", "adddpl2"
149 /* The DFA unit number for each unit in c6x_unit_names[]. */
150 static int c6x_unit_codes[ARRAY_SIZE (c6x_unit_names)];
152 /* Unit query IDs. */
153 #define UNIT_QID_D1 0
154 #define UNIT_QID_L1 1
155 #define UNIT_QID_S1 2
156 #define UNIT_QID_M1 3
157 #define UNIT_QID_FPS1 4
158 #define UNIT_QID_FPL1 5
159 #define UNIT_QID_ADDDPS1 6
160 #define UNIT_QID_ADDDPL1 7
161 #define UNIT_QID_SIDE_OFFSET 8
163 #define RESERVATION_S1 2
164 #define RESERVATION_S2 10
166 /* An enum for the unit requirements we count in the UNIT_REQS table. */
182 /* A table used to count unit requirements. Used when computing minimum
183 iteration intervals. */
184 typedef int unit_req_table[2][UNIT_REQ_MAX];
185 static unit_req_table unit_reqs;
187 /* Register map for debugging. */
188 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
190 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* A0 - A15. */
191 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, /* A16 - A32. */
193 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, /* B0 - B15. */
195 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, /* B16 - B32. */
197 -1, -1, -1 /* FP, ARGP, ILC. */
200 /* Allocate a new, cleared machine_function structure. */
202 static struct machine_function *
203 c6x_init_machine_status (void)
205 return ggc_alloc_cleared_machine_function ();
208 /* Implement TARGET_OPTION_OVERRIDE. */
211 c6x_option_override (void)
215 if (global_options_set.x_c6x_arch_option)
217 c6x_arch = all_isas[c6x_arch_option].type;
218 c6x_insn_mask &= ~C6X_INSNS_ALL_CPU_BITS;
219 c6x_insn_mask |= all_isas[c6x_arch_option].features;
222 c6x_flag_schedule_insns2 = flag_schedule_insns_after_reload;
223 flag_schedule_insns_after_reload = 0;
225 c6x_flag_modulo_sched = flag_modulo_sched;
226 flag_modulo_sched = 0;
228 init_machine_status = c6x_init_machine_status;
230 for (i = 0; i < ARRAY_SIZE (c6x_unit_names); i++)
231 c6x_unit_codes[i] = get_cpu_unit_code (c6x_unit_names[i]);
233 if (flag_pic && !TARGET_DSBT)
235 error ("-fpic and -fPIC not supported without -mdsbt on this target");
238 c6x_initial_flag_pic = flag_pic;
239 if (TARGET_DSBT && !flag_pic)
244 /* Implement the TARGET_CONDITIONAL_REGISTER_USAGE hook. */
247 c6x_conditional_register_usage (void)
250 if (c6x_arch == C6X_CPU_C62X || c6x_arch == C6X_CPU_C67X)
251 for (i = 16; i < 32; i++)
254 fixed_regs[32 + i] = 1;
258 SET_HARD_REG_BIT (reg_class_contents[(int)PREDICATE_A_REGS],
260 SET_HARD_REG_BIT (reg_class_contents[(int)PREDICATE_REGS],
262 CLEAR_HARD_REG_BIT (reg_class_contents[(int)NONPREDICATE_A_REGS],
264 CLEAR_HARD_REG_BIT (reg_class_contents[(int)NONPREDICATE_REGS],
269 static GTY(()) rtx eqdf_libfunc;
270 static GTY(()) rtx nedf_libfunc;
271 static GTY(()) rtx ledf_libfunc;
272 static GTY(()) rtx ltdf_libfunc;
273 static GTY(()) rtx gedf_libfunc;
274 static GTY(()) rtx gtdf_libfunc;
275 static GTY(()) rtx eqsf_libfunc;
276 static GTY(()) rtx nesf_libfunc;
277 static GTY(()) rtx lesf_libfunc;
278 static GTY(()) rtx ltsf_libfunc;
279 static GTY(()) rtx gesf_libfunc;
280 static GTY(()) rtx gtsf_libfunc;
281 static GTY(()) rtx strasgi_libfunc;
282 static GTY(()) rtx strasgi64p_libfunc;
284 /* Implement the TARGET_INIT_LIBFUNCS macro. We use this to rename library
285 functions to match the C6x ABI. */
288 c6x_init_libfuncs (void)
290 /* Double-precision floating-point arithmetic. */
291 set_optab_libfunc (add_optab, DFmode, "__c6xabi_addd");
292 set_optab_libfunc (sdiv_optab, DFmode, "__c6xabi_divd");
293 set_optab_libfunc (smul_optab, DFmode, "__c6xabi_mpyd");
294 set_optab_libfunc (neg_optab, DFmode, "__c6xabi_negd");
295 set_optab_libfunc (sub_optab, DFmode, "__c6xabi_subd");
297 /* Single-precision floating-point arithmetic. */
298 set_optab_libfunc (add_optab, SFmode, "__c6xabi_addf");
299 set_optab_libfunc (sdiv_optab, SFmode, "__c6xabi_divf");
300 set_optab_libfunc (smul_optab, SFmode, "__c6xabi_mpyf");
301 set_optab_libfunc (neg_optab, SFmode, "__c6xabi_negf");
302 set_optab_libfunc (sub_optab, SFmode, "__c6xabi_subf");
304 /* Floating-point comparisons. */
305 eqsf_libfunc = init_one_libfunc ("__c6xabi_eqf");
306 nesf_libfunc = init_one_libfunc ("__c6xabi_neqf");
307 lesf_libfunc = init_one_libfunc ("__c6xabi_lef");
308 ltsf_libfunc = init_one_libfunc ("__c6xabi_ltf");
309 gesf_libfunc = init_one_libfunc ("__c6xabi_gef");
310 gtsf_libfunc = init_one_libfunc ("__c6xabi_gtf");
311 eqdf_libfunc = init_one_libfunc ("__c6xabi_eqd");
312 nedf_libfunc = init_one_libfunc ("__c6xabi_neqd");
313 ledf_libfunc = init_one_libfunc ("__c6xabi_led");
314 ltdf_libfunc = init_one_libfunc ("__c6xabi_ltd");
315 gedf_libfunc = init_one_libfunc ("__c6xabi_ged");
316 gtdf_libfunc = init_one_libfunc ("__c6xabi_gtd");
318 set_optab_libfunc (eq_optab, SFmode, NULL);
319 set_optab_libfunc (ne_optab, SFmode, "__c6xabi_neqf");
320 set_optab_libfunc (gt_optab, SFmode, NULL);
321 set_optab_libfunc (ge_optab, SFmode, NULL);
322 set_optab_libfunc (lt_optab, SFmode, NULL);
323 set_optab_libfunc (le_optab, SFmode, NULL);
324 set_optab_libfunc (unord_optab, SFmode, "__c6xabi_unordf");
325 set_optab_libfunc (eq_optab, DFmode, NULL);
326 set_optab_libfunc (ne_optab, DFmode, "__c6xabi_neqd");
327 set_optab_libfunc (gt_optab, DFmode, NULL);
328 set_optab_libfunc (ge_optab, DFmode, NULL);
329 set_optab_libfunc (lt_optab, DFmode, NULL);
330 set_optab_libfunc (le_optab, DFmode, NULL);
331 set_optab_libfunc (unord_optab, DFmode, "__c6xabi_unordd");
333 /* Floating-point to integer conversions. */
334 set_conv_libfunc (sfix_optab, SImode, DFmode, "__c6xabi_fixdi");
335 set_conv_libfunc (ufix_optab, SImode, DFmode, "__c6xabi_fixdu");
336 set_conv_libfunc (sfix_optab, DImode, DFmode, "__c6xabi_fixdlli");
337 set_conv_libfunc (ufix_optab, DImode, DFmode, "__c6xabi_fixdull");
338 set_conv_libfunc (sfix_optab, SImode, SFmode, "__c6xabi_fixfi");
339 set_conv_libfunc (ufix_optab, SImode, SFmode, "__c6xabi_fixfu");
340 set_conv_libfunc (sfix_optab, DImode, SFmode, "__c6xabi_fixflli");
341 set_conv_libfunc (ufix_optab, DImode, SFmode, "__c6xabi_fixfull");
343 /* Conversions between floating types. */
344 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__c6xabi_cvtdf");
345 set_conv_libfunc (sext_optab, DFmode, SFmode, "__c6xabi_cvtfd");
347 /* Integer to floating-point conversions. */
348 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__c6xabi_fltid");
349 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__c6xabi_fltud");
350 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__c6xabi_fltllid");
351 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__c6xabi_fltulld");
352 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__c6xabi_fltif");
353 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__c6xabi_fltuf");
354 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__c6xabi_fltllif");
355 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__c6xabi_fltullf");
358 set_optab_libfunc (smul_optab, DImode, "__c6xabi_mpyll");
359 set_optab_libfunc (ashl_optab, DImode, "__c6xabi_llshl");
360 set_optab_libfunc (lshr_optab, DImode, "__c6xabi_llshru");
361 set_optab_libfunc (ashr_optab, DImode, "__c6xabi_llshr");
363 set_optab_libfunc (sdiv_optab, SImode, "__c6xabi_divi");
364 set_optab_libfunc (udiv_optab, SImode, "__c6xabi_divu");
365 set_optab_libfunc (smod_optab, SImode, "__c6xabi_remi");
366 set_optab_libfunc (umod_optab, SImode, "__c6xabi_remu");
367 set_optab_libfunc (sdivmod_optab, SImode, "__c6xabi_divremi");
368 set_optab_libfunc (udivmod_optab, SImode, "__c6xabi_divremu");
369 set_optab_libfunc (sdiv_optab, DImode, "__c6xabi_divlli");
370 set_optab_libfunc (udiv_optab, DImode, "__c6xabi_divull");
371 set_optab_libfunc (smod_optab, DImode, "__c6xabi_remlli");
372 set_optab_libfunc (umod_optab, DImode, "__c6xabi_remull");
373 set_optab_libfunc (udivmod_optab, DImode, "__c6xabi_divremull");
376 strasgi_libfunc = init_one_libfunc ("__c6xabi_strasgi");
377 strasgi64p_libfunc = init_one_libfunc ("__c6xabi_strasgi_64plus");
380 /* Begin the assembly file. */
383 c6x_file_start (void)
385 /* Variable tracking should be run after all optimizations which change order
386 of insns. It also needs a valid CFG. This can't be done in
387 c6x_override_options, because flag_var_tracking is finalized after
389 c6x_flag_var_tracking = flag_var_tracking;
390 flag_var_tracking = 0;
392 done_cfi_sections = false;
393 default_file_start ();
395 /* Arrays are aligned to 8-byte boundaries. */
396 asm_fprintf (asm_out_file,
397 "\t.c6xabi_attribute Tag_ABI_array_object_alignment, 0\n");
398 asm_fprintf (asm_out_file,
399 "\t.c6xabi_attribute Tag_ABI_array_object_align_expected, 0\n");
401 /* Stack alignment is 8 bytes. */
402 asm_fprintf (asm_out_file,
403 "\t.c6xabi_attribute Tag_ABI_stack_align_needed, 0\n");
404 asm_fprintf (asm_out_file,
405 "\t.c6xabi_attribute Tag_ABI_stack_align_preserved, 0\n");
407 #if 0 /* FIXME: Reenable when TI's tools are fixed. */
408 /* ??? Ideally we'd check flag_short_wchar somehow. */
409 asm_fprintf (asm_out_file, "\t.c6xabi_attribute Tag_ABI_wchar_t, %d\n", 2);
412 /* We conform to version 1.0 of the ABI. */
413 asm_fprintf (asm_out_file,
414 "\t.c6xabi_attribute Tag_ABI_conformance, \"1.0\"\n");
418 /* The LTO frontend only enables exceptions when it sees a function that
419 uses it. This changes the return value of dwarf2out_do_frame, so we
420 have to check before every function. */
423 c6x_output_file_unwind (FILE * f)
425 if (done_cfi_sections)
428 /* Output a .cfi_sections directive. */
429 if (dwarf2out_do_frame ())
431 if (flag_unwind_tables || flag_exceptions)
433 if (write_symbols == DWARF2_DEBUG
434 || write_symbols == VMS_AND_DWARF2_DEBUG)
435 asm_fprintf (f, "\t.cfi_sections .debug_frame, .c6xabi.exidx\n");
437 asm_fprintf (f, "\t.cfi_sections .c6xabi.exidx\n");
440 asm_fprintf (f, "\t.cfi_sections .debug_frame\n");
441 done_cfi_sections = true;
445 /* Output unwind directives at the end of a function. */
448 c6x_output_fn_unwind (FILE * f)
450 /* Return immediately if we are not generating unwinding tables. */
451 if (! (flag_unwind_tables || flag_exceptions))
454 /* If this function will never be unwound, then mark it as such. */
455 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
456 && (TREE_NOTHROW (current_function_decl)
457 || crtl->all_throwers_are_sibcalls))
458 fputs("\t.cantunwind\n", f);
460 fputs ("\t.endp\n", f);
464 /* Stack and Calling. */
466 int argument_registers[10] =
475 /* Implements the macro INIT_CUMULATIVE_ARGS defined in c6x.h. */
478 c6x_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype, rtx libname,
479 int n_named_args ATTRIBUTE_UNUSED)
483 if (!libname && fntype)
485 /* We need to find out the number of named arguments. Unfortunately,
486 for incoming arguments, N_NAMED_ARGS is set to -1. */
487 if (stdarg_p (fntype))
488 cum->nregs = type_num_arguments (fntype) - 1;
494 /* Implements the macro FUNCTION_ARG defined in c6x.h. */
497 c6x_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
498 const_tree type, bool named ATTRIBUTE_UNUSED)
500 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
501 if (cum->count >= cum->nregs)
505 HOST_WIDE_INT size = int_size_in_bytes (type);
506 if (TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (type))
510 rtx reg1 = gen_rtx_REG (SImode, argument_registers[cum->count] + 1);
511 rtx reg2 = gen_rtx_REG (SImode, argument_registers[cum->count]);
512 rtvec vec = gen_rtvec (2, gen_rtx_EXPR_LIST (VOIDmode, reg1, const0_rtx),
513 gen_rtx_EXPR_LIST (VOIDmode, reg2, GEN_INT (4)));
514 return gen_rtx_PARALLEL (mode, vec);
518 return gen_rtx_REG (mode, argument_registers[cum->count]);
522 c6x_function_arg_advance (cumulative_args_t cum_v,
523 enum machine_mode mode ATTRIBUTE_UNUSED,
524 const_tree type ATTRIBUTE_UNUSED,
525 bool named ATTRIBUTE_UNUSED)
527 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
532 /* Return true if BLOCK_REG_PADDING (MODE, TYPE, FIRST) should return
533 upward rather than downward. */
536 c6x_block_reg_pad_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
537 const_tree type, bool first)
541 if (!TARGET_BIG_ENDIAN)
547 size = int_size_in_bytes (type);
551 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. */
554 c6x_function_arg_boundary (enum machine_mode mode, const_tree type)
556 unsigned int boundary = type ? TYPE_ALIGN (type) : GET_MODE_BITSIZE (mode);
558 if (boundary > BITS_PER_WORD)
559 return 2 * BITS_PER_WORD;
563 HOST_WIDE_INT size = int_size_in_bytes (type);
565 return 2 * BITS_PER_WORD;
566 if (boundary < BITS_PER_WORD)
569 return BITS_PER_WORD;
571 return 2 * BITS_PER_UNIT;
577 /* Implement TARGET_FUNCTION_ARG_ROUND_BOUNDARY. */
579 c6x_function_arg_round_boundary (enum machine_mode mode, const_tree type)
581 return c6x_function_arg_boundary (mode, type);
584 /* TARGET_FUNCTION_VALUE implementation. Returns an RTX representing the place
585 where function FUNC returns or receives a value of data type TYPE. */
588 c6x_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED,
589 bool outgoing ATTRIBUTE_UNUSED)
591 /* Functions return values in register A4. When returning aggregates, we may
592 have to adjust for endianness. */
593 if (TARGET_BIG_ENDIAN && type && AGGREGATE_TYPE_P (type))
595 HOST_WIDE_INT size = int_size_in_bytes (type);
599 rtx reg1 = gen_rtx_REG (SImode, REG_A4 + 1);
600 rtx reg2 = gen_rtx_REG (SImode, REG_A4);
601 rtvec vec = gen_rtvec (2, gen_rtx_EXPR_LIST (VOIDmode, reg1, const0_rtx),
602 gen_rtx_EXPR_LIST (VOIDmode, reg2, GEN_INT (4)));
603 return gen_rtx_PARALLEL (TYPE_MODE (type), vec);
606 return gen_rtx_REG (TYPE_MODE (type), REG_A4);
609 /* Implement TARGET_LIBCALL_VALUE. */
612 c6x_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
614 return gen_rtx_REG (mode, REG_A4);
617 /* TARGET_STRUCT_VALUE_RTX implementation. */
620 c6x_struct_value_rtx (tree type ATTRIBUTE_UNUSED, int incoming ATTRIBUTE_UNUSED)
622 return gen_rtx_REG (Pmode, REG_A3);
625 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
628 c6x_function_value_regno_p (const unsigned int regno)
630 return regno == REG_A4;
633 /* Types larger than 64 bit, and variable sized types, are passed by
634 reference. The callee must copy them; see c6x_callee_copies. */
637 c6x_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
638 enum machine_mode mode, const_tree type,
639 bool named ATTRIBUTE_UNUSED)
643 size = int_size_in_bytes (type);
644 else if (mode != VOIDmode)
645 size = GET_MODE_SIZE (mode);
646 return size > 2 * UNITS_PER_WORD || size == -1;
649 /* Decide whether a type should be returned in memory (true)
650 or in a register (false). This is called by the macro
651 TARGET_RETURN_IN_MEMORY. */
654 c6x_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
656 int size = int_size_in_bytes (type);
657 return size > 2 * UNITS_PER_WORD || size == -1;
660 /* Values which must be returned in the most-significant end of the return
664 c6x_return_in_msb (const_tree valtype)
666 HOST_WIDE_INT size = int_size_in_bytes (valtype);
667 return TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (valtype) && size == 3;
670 /* Implement TARGET_CALLEE_COPIES. */
673 c6x_callee_copies (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
674 enum machine_mode mode ATTRIBUTE_UNUSED,
675 const_tree type ATTRIBUTE_UNUSED,
676 bool named ATTRIBUTE_UNUSED)
681 /* Return the type to use as __builtin_va_list. */
683 c6x_build_builtin_va_list (void)
685 return build_pointer_type (char_type_node);
689 c6x_asm_trampoline_template (FILE *f)
691 fprintf (f, "\t.long\t0x0000002b\n"); /* mvkl .s2 fnlow,B0 */
692 fprintf (f, "\t.long\t0x01000028\n"); /* || mvkl .s1 sclow,A2 */
693 fprintf (f, "\t.long\t0x0000006b\n"); /* mvkh .s2 fnhigh,B0 */
694 fprintf (f, "\t.long\t0x01000068\n"); /* || mvkh .s1 schigh,A2 */
695 fprintf (f, "\t.long\t0x00000362\n"); /* b .s2 B0 */
696 fprintf (f, "\t.long\t0x00008000\n"); /* nop 5 */
697 fprintf (f, "\t.long\t0x00000000\n"); /* nop */
698 fprintf (f, "\t.long\t0x00000000\n"); /* nop */
701 /* Emit RTL insns to initialize the variable parts of a trampoline at
702 TRAMP. FNADDR is an RTX for the address of the function's pure
703 code. CXT is an RTX for the static chain value for the function. */
706 c6x_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
708 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
709 rtx t1 = copy_to_reg (fnaddr);
710 rtx t2 = copy_to_reg (cxt);
711 rtx mask = gen_reg_rtx (SImode);
714 emit_block_move (tramp, assemble_trampoline_template (),
715 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
717 emit_move_insn (mask, GEN_INT (0xffff << 7));
719 for (i = 0; i < 4; i++)
721 rtx mem = adjust_address (tramp, SImode, i * 4);
722 rtx t = (i & 1) ? t2 : t1;
723 rtx v1 = gen_reg_rtx (SImode);
724 rtx v2 = gen_reg_rtx (SImode);
725 emit_move_insn (v1, mem);
727 emit_insn (gen_ashlsi3 (v2, t, GEN_INT (7)));
729 emit_insn (gen_lshrsi3 (v2, t, GEN_INT (9)));
730 emit_insn (gen_andsi3 (v2, v2, mask));
731 emit_insn (gen_iorsi3 (v2, v2, v1));
732 emit_move_insn (mem, v2);
734 #ifdef CLEAR_INSN_CACHE
735 tramp = XEXP (tramp, 0);
736 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__gnu_clear_cache"),
737 LCT_NORMAL, VOIDmode, 2, tramp, Pmode,
738 plus_constant (Pmode, tramp, TRAMPOLINE_SIZE),
743 /* Determine whether c6x_output_mi_thunk can succeed. */
746 c6x_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
747 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
748 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
749 const_tree function ATTRIBUTE_UNUSED)
751 return !TARGET_LONG_CALLS;
754 /* Output the assembler code for a thunk function. THUNK is the
755 declaration for the thunk function itself, FUNCTION is the decl for
756 the target function. DELTA is an immediate constant offset to be
757 added to THIS. If VCALL_OFFSET is nonzero, the word at
758 *(*this + vcall_offset) should be added to THIS. */
761 c6x_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
762 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
763 HOST_WIDE_INT vcall_offset, tree function)
766 /* The this parameter is passed as the first argument. */
767 rtx this_rtx = gen_rtx_REG (Pmode, REG_A4);
769 c6x_current_insn = NULL_RTX;
771 xops[4] = XEXP (DECL_RTL (function), 0);
774 output_asm_insn ("b .s2 \t%4", xops);
776 output_asm_insn ("nop 5", xops);
779 /* Adjust the this parameter by a fixed constant. */
782 xops[0] = GEN_INT (delta);
784 if (delta >= -16 && delta <= 15)
786 output_asm_insn ("add .s1 %0, %1, %1", xops);
788 output_asm_insn ("nop 4", xops);
790 else if (delta >= 16 && delta < 32)
792 output_asm_insn ("add .d1 %0, %1, %1", xops);
794 output_asm_insn ("nop 4", xops);
796 else if (delta >= -32768 && delta < 32768)
798 output_asm_insn ("mvk .s1 %0, A0", xops);
799 output_asm_insn ("add .d1 %1, A0, %1", xops);
801 output_asm_insn ("nop 3", xops);
805 output_asm_insn ("mvkl .s1 %0, A0", xops);
806 output_asm_insn ("mvkh .s1 %0, A0", xops);
807 output_asm_insn ("add .d1 %1, A0, %1", xops);
809 output_asm_insn ("nop 3", xops);
813 /* Adjust the this parameter by a value stored in the vtable. */
816 rtx a0tmp = gen_rtx_REG (Pmode, REG_A0);
817 rtx a3tmp = gen_rtx_REG (Pmode, REG_A3);
821 xops[3] = gen_rtx_MEM (Pmode, a0tmp);
822 output_asm_insn ("mv .s1 a4, %2", xops);
823 output_asm_insn ("ldw .d1t1 %3, %2", xops);
825 /* Adjust the this parameter. */
826 xops[0] = gen_rtx_MEM (Pmode, plus_constant (Pmode, a0tmp,
828 if (!memory_operand (xops[0], Pmode))
830 rtx tmp2 = gen_rtx_REG (Pmode, REG_A1);
831 xops[0] = GEN_INT (vcall_offset);
833 output_asm_insn ("mvkl .s1 %0, %1", xops);
834 output_asm_insn ("mvkh .s1 %0, %1", xops);
835 output_asm_insn ("nop 2", xops);
836 output_asm_insn ("add .d1 %2, %1, %2", xops);
837 xops[0] = gen_rtx_MEM (Pmode, a0tmp);
840 output_asm_insn ("nop 4", xops);
842 output_asm_insn ("ldw .d1t1 %0, %1", xops);
843 output_asm_insn ("|| b .s2 \t%4", xops);
844 output_asm_insn ("nop 4", xops);
845 output_asm_insn ("add .d1 %2, %1, %2", xops);
849 /* Return true if EXP goes in small data/bss. */
852 c6x_in_small_data_p (const_tree exp)
854 /* We want to merge strings, so we never consider them small data. */
855 if (TREE_CODE (exp) == STRING_CST)
858 /* Functions are never small data. */
859 if (TREE_CODE (exp) == FUNCTION_DECL)
862 if (TREE_CODE (exp) == VAR_DECL && DECL_WEAK (exp))
865 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
867 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
869 if (strcmp (section, ".neardata") == 0
870 || strncmp (section, ".neardata.", 10) == 0
871 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
872 || strcmp (section, ".bss") == 0
873 || strncmp (section, ".bss.", 5) == 0
874 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0
875 || strcmp (section, ".rodata") == 0
876 || strncmp (section, ".rodata.", 8) == 0
877 || strncmp (section, ".gnu.linkonce.s2.", 17) == 0)
881 return PLACE_IN_SDATA_P (exp);
886 /* Return a section for X. The only special thing we do here is to
887 honor small data. We don't have a tree type, so we can't use the
888 PLACE_IN_SDATA_P macro we use everywhere else; we choose to place
889 everything sized 8 bytes or smaller into small data. */
892 c6x_select_rtx_section (enum machine_mode mode, rtx x,
893 unsigned HOST_WIDE_INT align)
895 if (c6x_sdata_mode == C6X_SDATA_ALL
896 || (c6x_sdata_mode != C6X_SDATA_NONE && GET_MODE_SIZE (mode) <= 8))
897 /* ??? Consider using mergeable sdata sections. */
898 return sdata_section;
900 return default_elf_select_rtx_section (mode, x, align);
904 c6x_elf_select_section (tree decl, int reloc,
905 unsigned HOST_WIDE_INT align)
907 const char *sname = NULL;
908 unsigned int flags = SECTION_WRITE;
909 if (c6x_in_small_data_p (decl))
911 switch (categorize_decl_for_section (decl, reloc))
922 flags |= SECTION_BSS;
929 switch (categorize_decl_for_section (decl, reloc))
934 case SECCAT_DATA_REL:
935 sname = ".fardata.rel";
937 case SECCAT_DATA_REL_LOCAL:
938 sname = ".fardata.rel.local";
940 case SECCAT_DATA_REL_RO:
941 sname = ".fardata.rel.ro";
943 case SECCAT_DATA_REL_RO_LOCAL:
944 sname = ".fardata.rel.ro.local";
948 flags |= SECTION_BSS;
964 /* We might get called with string constants, but get_named_section
965 doesn't like them as they are not DECLs. Also, we need to set
966 flags in that case. */
968 return get_section (sname, flags, NULL);
969 return get_named_section (decl, sname, reloc);
972 return default_elf_select_section (decl, reloc, align);
975 /* Build up a unique section name, expressed as a
976 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
977 RELOC indicates whether the initial value of EXP requires
978 link-time relocations. */
980 static void ATTRIBUTE_UNUSED
981 c6x_elf_unique_section (tree decl, int reloc)
983 const char *prefix = NULL;
984 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
985 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
987 if (c6x_in_small_data_p (decl))
989 switch (categorize_decl_for_section (decl, reloc))
992 prefix = one_only ? ".s" : ".neardata";
995 prefix = one_only ? ".sb" : ".bss";
998 prefix = one_only ? ".s2" : ".rodata";
1000 case SECCAT_RODATA_MERGE_STR:
1001 case SECCAT_RODATA_MERGE_STR_INIT:
1002 case SECCAT_RODATA_MERGE_CONST:
1005 case SECCAT_DATA_REL:
1006 case SECCAT_DATA_REL_LOCAL:
1007 case SECCAT_DATA_REL_RO:
1008 case SECCAT_DATA_REL_RO_LOCAL:
1011 /* Everything else we place into default sections and hope for the
1018 switch (categorize_decl_for_section (decl, reloc))
1021 case SECCAT_DATA_REL:
1022 case SECCAT_DATA_REL_LOCAL:
1023 case SECCAT_DATA_REL_RO:
1024 case SECCAT_DATA_REL_RO_LOCAL:
1025 prefix = one_only ? ".fd" : ".fardata";
1028 prefix = one_only ? ".fb" : ".far";
1031 case SECCAT_RODATA_MERGE_STR:
1032 case SECCAT_RODATA_MERGE_STR_INIT:
1033 case SECCAT_RODATA_MERGE_CONST:
1034 prefix = one_only ? ".fr" : ".const";
1036 case SECCAT_SRODATA:
1047 const char *name, *linkonce;
1050 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
1051 name = targetm.strip_name_encoding (name);
1053 /* If we're using one_only, then there needs to be a .gnu.linkonce
1054 prefix to the section name. */
1055 linkonce = one_only ? ".gnu.linkonce" : "";
1057 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
1059 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
1062 default_unique_section (decl, reloc);
1066 c6x_section_type_flags (tree decl, const char *name, int reloc)
1068 unsigned int flags = 0;
1070 if (strcmp (name, ".far") == 0
1071 || strncmp (name, ".far.", 5) == 0)
1072 flags |= SECTION_BSS;
1074 flags |= default_section_type_flags (decl, name, reloc);
1079 /* Checks whether the given CALL_EXPR would use a caller saved
1080 register. This is used to decide whether sibling call optimization
1081 could be performed on the respective function call. */
1084 c6x_call_saved_register_used (tree call_expr)
1086 CUMULATIVE_ARGS cum_v;
1087 cumulative_args_t cum;
1088 HARD_REG_SET call_saved_regset;
1090 enum machine_mode mode;
1095 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
1096 cum = pack_cumulative_args (&cum_v);
1098 COMPL_HARD_REG_SET (call_saved_regset, call_used_reg_set);
1099 for (i = 0; i < call_expr_nargs (call_expr); i++)
1101 parameter = CALL_EXPR_ARG (call_expr, i);
1102 gcc_assert (parameter);
1104 /* For an undeclared variable passed as parameter we will get
1105 an ERROR_MARK node here. */
1106 if (TREE_CODE (parameter) == ERROR_MARK)
1109 type = TREE_TYPE (parameter);
1112 mode = TYPE_MODE (type);
1115 if (pass_by_reference (&cum_v, mode, type, true))
1118 type = build_pointer_type (type);
1121 parm_rtx = c6x_function_arg (cum, mode, type, 0);
1123 c6x_function_arg_advance (cum, mode, type, 0);
1128 if (REG_P (parm_rtx)
1129 && overlaps_hard_reg_set_p (call_saved_regset, GET_MODE (parm_rtx),
1132 if (GET_CODE (parm_rtx) == PARALLEL)
1134 int n = XVECLEN (parm_rtx, 0);
1137 rtx x = XEXP (XVECEXP (parm_rtx, 0, n), 0);
1139 && overlaps_hard_reg_set_p (call_saved_regset,
1140 GET_MODE (x), REGNO (x)))
1148 /* Decide whether we can make a sibling call to a function. DECL is the
1149 declaration of the function being targeted by the call and EXP is the
1150 CALL_EXPR representing the call. */
1153 c6x_function_ok_for_sibcall (tree decl, tree exp)
1155 /* Registers A10, A12, B10 and B12 are available as arguments
1156 register but unfortunately caller saved. This makes functions
1157 needing these registers for arguments not suitable for
1159 if (c6x_call_saved_register_used (exp))
1167 /* When compiling for DSBT, the calling function must be local,
1168 so that when we reload B14 in the sibcall epilogue, it will
1169 not change its value. */
1170 struct cgraph_local_info *this_func;
1173 /* Not enough information. */
1176 this_func = cgraph_local_info (current_function_decl);
1177 return this_func->local;
1183 /* Return true if DECL is known to be linked into section SECTION. */
1186 c6x_function_in_section_p (tree decl, section *section)
1188 /* We can only be certain about functions defined in the same
1189 compilation unit. */
1190 if (!TREE_STATIC (decl))
1193 /* Make sure that SYMBOL always binds to the definition in this
1194 compilation unit. */
1195 if (!targetm.binds_local_p (decl))
1198 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
1199 if (!DECL_SECTION_NAME (decl))
1201 /* Make sure that we will not create a unique section for DECL. */
1202 if (flag_function_sections || DECL_ONE_ONLY (decl))
1206 return function_section (decl) == section;
1209 /* Return true if a call to OP, which is a SYMBOL_REF, must be expanded
1212 c6x_long_call_p (rtx op)
1216 if (!TARGET_LONG_CALLS)
1219 decl = SYMBOL_REF_DECL (op);
1221 /* Try to determine whether the symbol is in the same section as the current
1222 function. Be conservative, and only cater for cases in which the
1223 whole of the current function is placed in the same section. */
1224 if (decl != NULL_TREE
1225 && !flag_reorder_blocks_and_partition
1226 && TREE_CODE (decl) == FUNCTION_DECL
1227 && c6x_function_in_section_p (decl, current_function_section ()))
1233 /* Emit the sequence for a call. */
1235 c6x_expand_call (rtx retval, rtx address, bool sibcall)
1237 rtx callee = XEXP (address, 0);
1240 if (!c6x_call_operand (callee, Pmode))
1242 callee = force_reg (Pmode, callee);
1243 address = change_address (address, Pmode, callee);
1245 call_insn = gen_rtx_CALL (VOIDmode, address, const0_rtx);
1248 call_insn = emit_call_insn (call_insn);
1249 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
1250 gen_rtx_REG (Pmode, REG_B3));
1254 if (retval == NULL_RTX)
1255 call_insn = emit_call_insn (call_insn);
1257 call_insn = emit_call_insn (gen_rtx_SET (GET_MODE (retval), retval,
1261 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
1264 /* Legitimize PIC addresses. If the address is already position-independent,
1265 we return ORIG. Newly generated position-independent addresses go into a
1266 reg. This is REG if nonzero, otherwise we allocate register(s) as
1267 necessary. PICREG is the register holding the pointer to the PIC offset
1271 legitimize_pic_address (rtx orig, rtx reg, rtx picreg)
1276 if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
1278 int unspec = UNSPEC_LOAD_GOT;
1283 gcc_assert (can_create_pseudo_p ());
1284 reg = gen_reg_rtx (Pmode);
1288 if (can_create_pseudo_p ())
1289 tmp = gen_reg_rtx (Pmode);
1292 emit_insn (gen_movsi_gotoff_high (tmp, addr));
1293 emit_insn (gen_movsi_gotoff_lo_sum (tmp, tmp, addr));
1294 emit_insn (gen_load_got_gotoff (reg, picreg, tmp));
1298 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec);
1299 new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp));
1301 emit_move_insn (reg, new_rtx);
1303 if (picreg == pic_offset_table_rtx)
1304 crtl->uses_pic_offset_table = 1;
1308 else if (GET_CODE (addr) == CONST || GET_CODE (addr) == PLUS)
1312 if (GET_CODE (addr) == CONST)
1314 addr = XEXP (addr, 0);
1315 gcc_assert (GET_CODE (addr) == PLUS);
1318 if (XEXP (addr, 0) == picreg)
1323 gcc_assert (can_create_pseudo_p ());
1324 reg = gen_reg_rtx (Pmode);
1327 base = legitimize_pic_address (XEXP (addr, 0), reg, picreg);
1328 addr = legitimize_pic_address (XEXP (addr, 1),
1329 base == reg ? NULL_RTX : reg,
1332 if (GET_CODE (addr) == CONST_INT)
1334 gcc_assert (! reload_in_progress && ! reload_completed);
1335 addr = force_reg (Pmode, addr);
1338 if (GET_CODE (addr) == PLUS && CONSTANT_P (XEXP (addr, 1)))
1340 base = gen_rtx_PLUS (Pmode, base, XEXP (addr, 0));
1341 addr = XEXP (addr, 1);
1344 return gen_rtx_PLUS (Pmode, base, addr);
1350 /* Expand a move operation in mode MODE. The operands are in OPERANDS.
1351 Returns true if no further code must be generated, false if the caller
1352 should generate an insn to move OPERANDS[1] to OPERANDS[0]. */
1355 expand_move (rtx *operands, enum machine_mode mode)
1357 rtx dest = operands[0];
1358 rtx op = operands[1];
1360 if ((reload_in_progress | reload_completed) == 0
1361 && GET_CODE (dest) == MEM && GET_CODE (op) != REG)
1362 operands[1] = force_reg (mode, op);
1363 else if (mode == SImode && symbolic_operand (op, SImode))
1367 if (sdata_symbolic_operand (op, SImode))
1369 emit_insn (gen_load_sdata_pic (dest, pic_offset_table_rtx, op));
1370 crtl->uses_pic_offset_table = 1;
1375 rtx temp = (reload_completed || reload_in_progress
1376 ? dest : gen_reg_rtx (Pmode));
1378 operands[1] = legitimize_pic_address (op, temp,
1379 pic_offset_table_rtx);
1382 else if (reload_completed
1383 && !sdata_symbolic_operand (op, SImode))
1385 emit_insn (gen_movsi_high (dest, op));
1386 emit_insn (gen_movsi_lo_sum (dest, dest, op));
1393 /* This function is called when we're about to expand an integer compare
1394 operation which performs COMPARISON. It examines the second operand,
1395 and if it is an integer constant that cannot be used directly on the
1396 current machine in a comparison insn, it returns true. */
1398 c6x_force_op_for_comparison_p (enum rtx_code code, rtx op)
1400 if (!CONST_INT_P (op) || satisfies_constraint_Iu4 (op))
1403 if ((code == EQ || code == LT || code == GT)
1404 && !satisfies_constraint_Is5 (op))
1406 if ((code == GTU || code == LTU)
1407 && (!TARGET_INSNS_64 || !satisfies_constraint_Iu5 (op)))
1413 /* Emit comparison instruction if necessary, returning the expression
1414 that holds the compare result in the proper mode. Return the comparison
1415 that should be used in the jump insn. */
1418 c6x_expand_compare (rtx comparison, enum machine_mode mode)
1420 enum rtx_code code = GET_CODE (comparison);
1421 rtx op0 = XEXP (comparison, 0);
1422 rtx op1 = XEXP (comparison, 1);
1424 enum rtx_code jump_code = code;
1425 enum machine_mode op_mode = GET_MODE (op0);
1427 if (op_mode == DImode && (code == NE || code == EQ) && op1 == const0_rtx)
1429 rtx t = gen_reg_rtx (SImode);
1430 emit_insn (gen_iorsi3 (t, gen_lowpart (SImode, op0),
1431 gen_highpart (SImode, op0)));
1435 else if (op_mode == DImode)
1440 if (code == NE || code == GEU || code == LEU || code == GE || code == LE)
1442 code = reverse_condition (code);
1448 split_di (&op0, 1, lo, high);
1449 split_di (&op1, 1, lo + 1, high + 1);
1451 if (c6x_force_op_for_comparison_p (code, high[1])
1452 || c6x_force_op_for_comparison_p (EQ, high[1]))
1453 high[1] = force_reg (SImode, high[1]);
1455 cmp1 = gen_reg_rtx (SImode);
1456 cmp2 = gen_reg_rtx (SImode);
1457 emit_insn (gen_rtx_SET (VOIDmode, cmp1,
1458 gen_rtx_fmt_ee (code, SImode, high[0], high[1])));
1461 if (c6x_force_op_for_comparison_p (code, lo[1]))
1462 lo[1] = force_reg (SImode, lo[1]);
1463 emit_insn (gen_rtx_SET (VOIDmode, cmp2,
1464 gen_rtx_fmt_ee (code, SImode, lo[0], lo[1])));
1465 emit_insn (gen_andsi3 (cmp1, cmp1, cmp2));
1469 emit_insn (gen_rtx_SET (VOIDmode, cmp2,
1470 gen_rtx_EQ (SImode, high[0], high[1])));
1473 else if (code == LT)
1475 if (c6x_force_op_for_comparison_p (code, lo[1]))
1476 lo[1] = force_reg (SImode, lo[1]);
1477 emit_insn (gen_cmpsi_and (cmp2, gen_rtx_fmt_ee (code, SImode,
1479 lo[0], lo[1], cmp2));
1480 emit_insn (gen_iorsi3 (cmp1, cmp1, cmp2));
1484 else if (TARGET_FP && !flag_finite_math_only
1485 && (op_mode == DFmode || op_mode == SFmode)
1486 && code != EQ && code != NE && code != LT && code != GT
1487 && code != UNLE && code != UNGE)
1489 enum rtx_code code1, code2, code3;
1490 rtx (*fn) (rtx, rtx, rtx, rtx, rtx);
1502 code1 = code == LE || code == UNGT ? LT : GT;
1527 cmp = gen_reg_rtx (SImode);
1528 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1529 gen_rtx_fmt_ee (code1, SImode, op0, op1)));
1530 fn = op_mode == DFmode ? gen_cmpdf_ior : gen_cmpsf_ior;
1531 emit_insn (fn (cmp, gen_rtx_fmt_ee (code2, SImode, op0, op1),
1533 if (code3 != UNKNOWN)
1534 emit_insn (fn (cmp, gen_rtx_fmt_ee (code3, SImode, op0, op1),
1537 else if (op_mode == SImode && (code == NE || code == EQ) && op1 == const0_rtx)
1542 is_fp_libfunc = !TARGET_FP && (op_mode == DFmode || op_mode == SFmode);
1544 if ((code == NE || code == GEU || code == LEU || code == GE || code == LE)
1547 code = reverse_condition (code);
1550 else if (code == UNGE)
1555 else if (code == UNLE)
1570 libfunc = op_mode == DFmode ? eqdf_libfunc : eqsf_libfunc;
1573 libfunc = op_mode == DFmode ? nedf_libfunc : nesf_libfunc;
1576 libfunc = op_mode == DFmode ? gtdf_libfunc : gtsf_libfunc;
1579 libfunc = op_mode == DFmode ? gedf_libfunc : gesf_libfunc;
1582 libfunc = op_mode == DFmode ? ltdf_libfunc : ltsf_libfunc;
1585 libfunc = op_mode == DFmode ? ledf_libfunc : lesf_libfunc;
1592 cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode, 2,
1593 op0, op_mode, op1, op_mode);
1594 insns = get_insns ();
1597 emit_libcall_block (insns, cmp, cmp,
1598 gen_rtx_fmt_ee (code, SImode, op0, op1));
1602 cmp = gen_reg_rtx (SImode);
1603 if (c6x_force_op_for_comparison_p (code, op1))
1604 op1 = force_reg (SImode, op1);
1605 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1606 gen_rtx_fmt_ee (code, SImode, op0, op1)));
1610 return gen_rtx_fmt_ee (jump_code, mode, cmp, const0_rtx);
1613 /* Return one word of double-word value OP. HIGH_P is true to select the
1614 high part, false to select the low part. When encountering auto-increment
1615 addressing, we make the assumption that the low part is going to be accessed
1619 c6x_subword (rtx op, bool high_p)
1622 enum machine_mode mode;
1624 mode = GET_MODE (op);
1625 if (mode == VOIDmode)
1628 if (TARGET_BIG_ENDIAN ? !high_p : high_p)
1629 byte = UNITS_PER_WORD;
1635 rtx addr = XEXP (op, 0);
1636 if (GET_CODE (addr) == PLUS || REG_P (addr))
1637 return adjust_address (op, word_mode, byte);
1638 /* FIXME: should really support autoincrement addressing for
1639 multi-word modes. */
1643 return simplify_gen_subreg (word_mode, op, mode, byte);
1646 /* Split one or more DImode RTL references into pairs of SImode
1647 references. The RTL can be REG, offsettable MEM, integer constant, or
1648 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
1649 split and "num" is its length. lo_half and hi_half are output arrays
1650 that parallel "operands". */
1653 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
1657 rtx op = operands[num];
1659 lo_half[num] = c6x_subword (op, false);
1660 hi_half[num] = c6x_subword (op, true);
1664 /* Return true if VAL is a mask valid for a clr instruction. */
1666 c6x_valid_mask_p (HOST_WIDE_INT val)
1669 for (i = 0; i < 32; i++)
1670 if (!(val & ((unsigned HOST_WIDE_INT)1 << i)))
1673 if (val & ((unsigned HOST_WIDE_INT)1 << i))
1676 if (!(val & ((unsigned HOST_WIDE_INT)1 << i)))
1681 /* Expand a block move for a movmemM pattern. */
1684 c6x_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
1685 rtx expected_align_exp ATTRIBUTE_UNUSED,
1686 rtx expected_size_exp ATTRIBUTE_UNUSED)
1688 unsigned HOST_WIDE_INT align = 1;
1689 unsigned HOST_WIDE_INT src_mem_align, dst_mem_align, min_mem_align;
1690 unsigned HOST_WIDE_INT count = 0, offset = 0;
1691 unsigned int biggest_move = TARGET_STDW ? 8 : 4;
1693 if (CONST_INT_P (align_exp))
1694 align = INTVAL (align_exp);
1696 src_mem_align = MEM_ALIGN (src) / BITS_PER_UNIT;
1697 dst_mem_align = MEM_ALIGN (dst) / BITS_PER_UNIT;
1698 min_mem_align = MIN (src_mem_align, dst_mem_align);
1700 if (min_mem_align > align)
1701 align = min_mem_align / BITS_PER_UNIT;
1702 if (src_mem_align < align)
1703 src_mem_align = align;
1704 if (dst_mem_align < align)
1705 dst_mem_align = align;
1707 if (CONST_INT_P (count_exp))
1708 count = INTVAL (count_exp);
1712 /* Make sure we don't need to care about overflow later on. */
1713 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
1716 if (count >= 28 && (count & 3) == 0 && align >= 4)
1718 tree dst_expr = MEM_EXPR (dst);
1719 tree src_expr = MEM_EXPR (src);
1720 rtx fn = TARGET_INSNS_64PLUS ? strasgi64p_libfunc : strasgi_libfunc;
1721 rtx srcreg = force_reg (Pmode, XEXP (src, 0));
1722 rtx dstreg = force_reg (Pmode, XEXP (dst, 0));
1725 mark_addressable (src_expr);
1727 mark_addressable (dst_expr);
1728 emit_library_call (fn, LCT_NORMAL, VOIDmode, 3,
1729 dstreg, Pmode, srcreg, Pmode, count_exp, SImode);
1733 if (biggest_move > align && !TARGET_INSNS_64)
1734 biggest_move = align;
1736 if (count / biggest_move > 7)
1741 rtx reg, reg_lowpart;
1742 enum machine_mode srcmode, dstmode;
1743 unsigned HOST_WIDE_INT src_size, dst_size, src_left;
1747 while (biggest_move > count)
1750 src_size = dst_size = biggest_move;
1751 if (src_size > src_mem_align && src_size == 2)
1753 if (dst_size > dst_mem_align && dst_size == 2)
1756 if (dst_size > src_size)
1757 dst_size = src_size;
1759 srcmode = mode_for_size (src_size * BITS_PER_UNIT, MODE_INT, 0);
1760 dstmode = mode_for_size (dst_size * BITS_PER_UNIT, MODE_INT, 0);
1762 reg_lowpart = reg = gen_reg_rtx (srcmode);
1765 reg = gen_reg_rtx (SImode);
1766 reg_lowpart = gen_lowpart (srcmode, reg);
1769 srcmem = adjust_address (copy_rtx (src), srcmode, offset);
1771 if (src_size > src_mem_align)
1773 enum insn_code icode = (srcmode == SImode ? CODE_FOR_movmisalignsi
1774 : CODE_FOR_movmisaligndi);
1775 emit_insn (GEN_FCN (icode) (reg_lowpart, srcmem));
1778 emit_move_insn (reg_lowpart, srcmem);
1780 src_left = src_size;
1781 shift = TARGET_BIG_ENDIAN ? (src_size - dst_size) * BITS_PER_UNIT : 0;
1782 while (src_left > 0)
1784 rtx dstreg = reg_lowpart;
1786 if (src_size > dst_size)
1789 int shift_amount = shift & (BITS_PER_WORD - 1);
1791 srcword = operand_subword_force (srcword, src_left >= 4 ? 0 : 4,
1793 if (shift_amount > 0)
1795 dstreg = gen_reg_rtx (SImode);
1796 emit_insn (gen_lshrsi3 (dstreg, srcword,
1797 GEN_INT (shift_amount)));
1801 dstreg = gen_lowpart (dstmode, dstreg);
1804 dstmem = adjust_address (copy_rtx (dst), dstmode, offset);
1805 if (dst_size > dst_mem_align)
1807 enum insn_code icode = (dstmode == SImode ? CODE_FOR_movmisalignsi
1808 : CODE_FOR_movmisaligndi);
1809 emit_insn (GEN_FCN (icode) (dstmem, dstreg));
1812 emit_move_insn (dstmem, dstreg);
1814 if (TARGET_BIG_ENDIAN)
1815 shift -= dst_size * BITS_PER_UNIT;
1817 shift += dst_size * BITS_PER_UNIT;
1819 src_left -= dst_size;
1826 /* Subroutine of print_address_operand, print a single address offset OFF for
1827 a memory access of mode MEM_MODE, choosing between normal form and scaled
1828 form depending on the type of the insn. Misaligned memory references must
1829 use the scaled form. */
1832 print_address_offset (FILE *file, rtx off, enum machine_mode mem_mode)
1836 if (c6x_current_insn != NULL_RTX)
1838 pat = PATTERN (c6x_current_insn);
1839 if (GET_CODE (pat) == COND_EXEC)
1840 pat = COND_EXEC_CODE (pat);
1841 if (GET_CODE (pat) == PARALLEL)
1842 pat = XVECEXP (pat, 0, 0);
1844 if (GET_CODE (pat) == SET
1845 && GET_CODE (SET_SRC (pat)) == UNSPEC
1846 && XINT (SET_SRC (pat), 1) == UNSPEC_MISALIGNED_ACCESS)
1848 gcc_assert (CONST_INT_P (off)
1849 && (INTVAL (off) & (GET_MODE_SIZE (mem_mode) - 1)) == 0);
1850 fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC "]",
1851 INTVAL (off) / GET_MODE_SIZE (mem_mode));
1856 output_address (off);
1861 c6x_print_operand_punct_valid_p (unsigned char c)
1863 return c == '$' || c == '.' || c == '|';
1866 static void c6x_print_operand (FILE *, rtx, int);
1868 /* Subroutine of c6x_print_operand; used to print a memory reference X to FILE. */
1871 c6x_print_address_operand (FILE *file, rtx x, enum machine_mode mem_mode)
1874 switch (GET_CODE (x))
1878 if (GET_CODE (x) == POST_MODIFY)
1879 output_address (XEXP (x, 0));
1880 off = XEXP (XEXP (x, 1), 1);
1881 if (XEXP (x, 0) == stack_pointer_rtx)
1883 if (GET_CODE (x) == PRE_MODIFY)
1884 gcc_assert (INTVAL (off) > 0);
1886 gcc_assert (INTVAL (off) < 0);
1888 if (CONST_INT_P (off) && INTVAL (off) < 0)
1890 fprintf (file, "--");
1891 off = GEN_INT (-INTVAL (off));
1894 fprintf (file, "++");
1895 if (GET_CODE (x) == PRE_MODIFY)
1896 output_address (XEXP (x, 0));
1897 print_address_offset (file, off, mem_mode);
1902 if (CONST_INT_P (off) && INTVAL (off) < 0)
1904 fprintf (file, "-");
1905 off = GEN_INT (-INTVAL (off));
1908 fprintf (file, "+");
1909 output_address (XEXP (x, 0));
1910 print_address_offset (file, off, mem_mode);
1914 gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
1915 fprintf (file, "--");
1916 output_address (XEXP (x, 0));
1917 fprintf (file, "[1]");
1920 fprintf (file, "++");
1921 output_address (XEXP (x, 0));
1922 fprintf (file, "[1]");
1925 gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
1926 output_address (XEXP (x, 0));
1927 fprintf (file, "++[1]");
1930 output_address (XEXP (x, 0));
1931 fprintf (file, "--[1]");
1937 gcc_assert (sdata_symbolic_operand (x, Pmode));
1938 fprintf (file, "+B14(");
1939 output_addr_const (file, x);
1940 fprintf (file, ")");
1944 switch (XINT (x, 1))
1946 case UNSPEC_LOAD_GOT:
1947 fputs ("$GOT(", file);
1948 output_addr_const (file, XVECEXP (x, 0, 0));
1951 case UNSPEC_LOAD_SDATA:
1952 output_addr_const (file, XVECEXP (x, 0, 0));
1960 gcc_assert (GET_CODE (x) != MEM);
1961 c6x_print_operand (file, x, 0);
1966 /* Return a single character, which is either 'l', 's', 'd' or 'm', which
1967 specifies the functional unit used by INSN. */
1970 c6x_get_unit_specifier (rtx insn)
1972 enum attr_units units;
1976 int unit = INSN_INFO_ENTRY (INSN_UID (insn)).reservation;
1977 return c6x_unit_names[unit][0];
1980 units = get_attr_units (insn);
2005 /* Prints the unit specifier field. */
2007 c6x_print_unit_specifier_field (FILE *file, rtx insn)
2009 enum attr_units units = get_attr_units (insn);
2010 enum attr_cross cross = get_attr_cross (insn);
2011 enum attr_dest_regfile rf = get_attr_dest_regfile (insn);
2015 if (units == UNITS_D_ADDR)
2017 enum attr_addr_regfile arf = get_attr_addr_regfile (insn);
2019 gcc_assert (arf != ADDR_REGFILE_UNKNOWN);
2020 half = arf == ADDR_REGFILE_A ? 1 : 2;
2021 t_half = rf == DEST_REGFILE_A ? 1 : 2;
2022 fprintf (file, ".d%dt%d", half, t_half);
2028 int unit = INSN_INFO_ENTRY (INSN_UID (insn)).reservation;
2030 fputs (c6x_unit_names[unit], file);
2031 if (cross == CROSS_Y)
2036 gcc_assert (rf != DEST_REGFILE_UNKNOWN);
2037 unitspec = c6x_get_unit_specifier (insn);
2038 half = rf == DEST_REGFILE_A ? 1 : 2;
2039 fprintf (file, ".%c%d%s", unitspec, half, cross == CROSS_Y ? "x" : "");
2042 /* Output assembly language output for the address ADDR to FILE. */
2044 c6x_print_operand_address (FILE *file, rtx addr)
2046 c6x_print_address_operand (file, addr, VOIDmode);
2049 /* Print an operand, X, to FILE, with an optional modifier in CODE.
2052 $ -- print the unit specifier field for the instruction.
2053 . -- print the predicate for the instruction or an emptry string for an
2055 | -- print "||" if the insn should be issued in parallel with the previous
2058 C -- print an opcode suffix for a reversed condition
2059 d -- H, W or D as a suffix for ADDA, based on the factor given by the
2061 D -- print either B, H, W or D as a suffix for ADDA, based on the size of
2063 J -- print a predicate
2064 j -- like J, but use reverse predicate
2065 k -- treat a CONST_INT as a register number and print it as a register
2066 k -- like k, but print out a doubleword register
2067 n -- print an integer operand, negated
2068 p -- print the low part of a DImode register
2069 P -- print the high part of a DImode register
2070 r -- print the absolute value of an integer operand, shifted right by 1
2071 R -- print the absolute value of an integer operand, shifted right by 2
2072 f -- the first clear bit in an integer operand assumed to be a mask for
2074 F -- the last clear bit in such a mask
2075 s -- the first set bit in an integer operand assumed to be a mask for
2077 S -- the last set bit in such a mask
2078 U -- print either 1 or 2, depending on the side of the machine used by
2082 c6x_print_operand (FILE *file, rtx x, int code)
2087 enum machine_mode mode;
2091 if (GET_MODE (c6x_current_insn) != TImode)
2097 c6x_print_unit_specifier_field (file, c6x_current_insn);
2103 x = current_insn_predicate;
2106 unsigned int regno = REGNO (XEXP (x, 0));
2108 if (GET_CODE (x) == EQ)
2110 fputs (reg_names [regno], file);
2116 mode = GET_MODE (x);
2123 enum rtx_code c = GET_CODE (x);
2125 c = swap_condition (c);
2126 fputs (GET_RTX_NAME (c), file);
2133 unsigned int regno = REGNO (XEXP (x, 0));
2134 if ((GET_CODE (x) == EQ) == (code == 'J'))
2136 fputs (reg_names [regno], file);
2141 gcc_assert (GET_CODE (x) == CONST_INT);
2143 fprintf (file, "%s", reg_names[v]);
2146 gcc_assert (GET_CODE (x) == CONST_INT);
2148 gcc_assert ((v & 1) == 0);
2149 fprintf (file, "%s:%s", reg_names[v + 1], reg_names[v]);
2156 gcc_assert (GET_CODE (x) == CONST_INT);
2158 for (i = 0; i < 32; i++)
2160 HOST_WIDE_INT tst = v & 1;
2161 if (((code == 'f' || code == 'F') && !tst)
2162 || ((code == 's' || code == 'S') && tst))
2166 if (code == 'f' || code == 's')
2168 fprintf (file, "%d", i);
2173 HOST_WIDE_INT tst = v & 1;
2174 if ((code == 'F' && tst) || (code == 'S' && !tst))
2178 fprintf (file, "%d", i - 1);
2182 gcc_assert (GET_CODE (x) == CONST_INT);
2183 output_addr_const (file, GEN_INT (-INTVAL (x)));
2187 gcc_assert (GET_CODE (x) == CONST_INT);
2191 output_addr_const (file, GEN_INT (v >> 1));
2195 gcc_assert (GET_CODE (x) == CONST_INT);
2199 output_addr_const (file, GEN_INT (v >> 2));
2203 gcc_assert (GET_CODE (x) == CONST_INT);
2205 fputs (v == 2 ? "h" : v == 4 ? "w" : "d", file);
2210 gcc_assert (GET_CODE (x) == REG);
2214 fputs (reg_names[v], file);
2219 if (GET_CODE (x) == CONST)
2222 gcc_assert (GET_CODE (x) == PLUS);
2223 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
2224 v = INTVAL (XEXP (x, 1));
2228 gcc_assert (GET_CODE (x) == SYMBOL_REF);
2230 t = SYMBOL_REF_DECL (x);
2232 v |= DECL_ALIGN_UNIT (t);
2234 v |= TYPE_ALIGN_UNIT (TREE_TYPE (t));
2247 if (GET_CODE (x) == PLUS
2248 || GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
2250 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
2252 gcc_assert (sdata_symbolic_operand (x, Pmode));
2257 gcc_assert (REG_P (x));
2258 if (A_REGNO_P (REGNO (x)))
2260 if (B_REGNO_P (REGNO (x)))
2265 switch (GET_CODE (x))
2268 if (GET_MODE_SIZE (mode) == 8)
2269 fprintf (file, "%s:%s", reg_names[REGNO (x) + 1],
2270 reg_names[REGNO (x)]);
2272 fprintf (file, "%s", reg_names[REGNO (x)]);
2277 gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
2278 c6x_print_address_operand (file, XEXP (x, 0), GET_MODE (x));
2283 output_addr_const (file, x);
2288 output_addr_const (file, x);
2292 output_operand_lossage ("invalid const_double operand");
2296 output_addr_const (file, x);
2301 /* Return TRUE if OP is a valid memory address with a base register of
2302 class C. If SMALL_OFFSET is true, we disallow memory references which would
2303 require a long offset with B14/B15. */
2306 c6x_mem_operand (rtx op, enum reg_class c, bool small_offset)
2308 enum machine_mode mode = GET_MODE (op);
2309 rtx base = XEXP (op, 0);
2310 switch (GET_CODE (base))
2316 && (XEXP (base, 0) == stack_pointer_rtx
2317 || XEXP (base, 0) == pic_offset_table_rtx))
2319 if (!c6x_legitimate_address_p_1 (mode, base, true, true))
2330 base = XEXP (base, 0);
2336 gcc_assert (sdata_symbolic_operand (base, Pmode));
2337 return !small_offset && c == B_REGS;
2342 return TEST_HARD_REG_BIT (reg_class_contents[ (int) (c)], REGNO (base));
2345 /* Returns true if X is a valid address for use in a memory reference
2346 of mode MODE. If STRICT is true, we do not allow pseudo registers
2347 in the address. NO_LARGE_OFFSET is true if we are examining an
2348 address for use in a load or store misaligned instruction, or
2349 recursively examining an operand inside a PRE/POST_MODIFY. */
2352 c6x_legitimate_address_p_1 (enum machine_mode mode, rtx x, bool strict,
2353 bool no_large_offset)
2357 enum rtx_code code = GET_CODE (x);
2363 /* We can't split these into word-sized pieces yet. */
2364 if (!TARGET_STDW && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
2366 if (GET_CODE (XEXP (x, 1)) != PLUS)
2368 if (!c6x_legitimate_address_p_1 (mode, XEXP (x, 1), strict, true))
2370 if (!rtx_equal_p (XEXP (x, 0), XEXP (XEXP (x, 1), 0)))
2378 /* We can't split these into word-sized pieces yet. */
2379 if (!TARGET_STDW && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
2388 return REGNO_OK_FOR_BASE_STRICT_P (REGNO (x));
2390 return REGNO_OK_FOR_BASE_NONSTRICT_P (REGNO (x));
2393 if (!REG_P (XEXP (x, 0))
2394 || !c6x_legitimate_address_p_1 (mode, XEXP (x, 0), strict, false))
2396 /* We cannot ensure currently that both registers end up in the
2397 same register file. */
2398 if (REG_P (XEXP (x, 1)))
2401 if (mode == BLKmode)
2403 else if (mode == VOIDmode)
2404 /* ??? This can happen during ivopts. */
2407 size = GET_MODE_SIZE (mode);
2410 && GET_CODE (XEXP (x, 1)) == UNSPEC
2411 && XINT (XEXP (x, 1), 1) == UNSPEC_LOAD_SDATA
2412 && XEXP (x, 0) == pic_offset_table_rtx
2413 && sdata_symbolic_operand (XVECEXP (XEXP (x, 1), 0, 0), SImode))
2414 return !no_large_offset && size <= 4;
2417 && GET_CODE (XEXP (x, 1)) == UNSPEC
2418 && XINT (XEXP (x, 1), 1) == UNSPEC_LOAD_GOT
2419 && XEXP (x, 0) == pic_offset_table_rtx
2420 && (GET_CODE (XVECEXP (XEXP (x, 1), 0, 0)) == SYMBOL_REF
2421 || GET_CODE (XVECEXP (XEXP (x, 1), 0, 0)) == LABEL_REF))
2422 return !no_large_offset;
2423 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2426 off = INTVAL (XEXP (x, 1));
2428 /* If the machine does not have doubleword load/stores, we'll use
2429 word size accesses. */
2431 if (size == 2 * UNITS_PER_WORD && !TARGET_STDW)
2432 size = UNITS_PER_WORD;
2434 if (((HOST_WIDE_INT)size1 - 1) & off)
2437 if (off > -32 && off < (size1 == size ? 32 : 28))
2439 if (no_large_offset || code != PLUS || XEXP (x, 0) != stack_pointer_rtx
2440 || size1 > UNITS_PER_WORD)
2442 return off >= 0 && off < 32768;
2447 return (!no_large_offset
2448 /* With -fpic, we must wrap it in an unspec to show the B14
2451 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
2452 && sdata_symbolic_operand (x, Pmode));
2460 c6x_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
2462 return c6x_legitimate_address_p_1 (mode, x, strict, false);
2466 c6x_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2467 rtx x ATTRIBUTE_UNUSED)
2472 /* Implements TARGET_PREFERRED_RENAME_CLASS. */
2474 c6x_preferred_rename_class (reg_class_t cl)
2477 return NONPREDICATE_A_REGS;
2479 return NONPREDICATE_B_REGS;
2480 if (cl == ALL_REGS || cl == GENERAL_REGS)
2481 return NONPREDICATE_REGS;
2485 /* Implements FINAL_PRESCAN_INSN. */
2487 c6x_final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
2488 int noperands ATTRIBUTE_UNUSED)
2490 c6x_current_insn = insn;
2493 /* A structure to describe the stack layout of a function. The layout is
2496 [saved frame pointer (or possibly padding0)]
2497 --> incoming stack pointer, new hard frame pointer
2498 [saved call-used regs]
2500 --> soft frame pointer
2502 [outgoing arguments]
2505 The structure members are laid out in this order. */
2510 /* Number of registers to save. */
2513 HOST_WIDE_INT frame;
2514 int outgoing_arguments_size;
2517 HOST_WIDE_INT to_allocate;
2518 /* The offsets relative to the incoming stack pointer (which
2519 becomes HARD_FRAME_POINTER). */
2520 HOST_WIDE_INT frame_pointer_offset;
2521 HOST_WIDE_INT b3_offset;
2523 /* True if we should call push_rts/pop_rts to save and restore
2528 /* Return true if we need to save and modify the PIC register in the
2532 must_reload_pic_reg_p (void)
2534 struct cgraph_local_info *i = NULL;
2539 i = cgraph_local_info (current_function_decl);
2541 if ((crtl->uses_pic_offset_table || !current_function_is_leaf) && !i->local)
2546 /* Return 1 if we need to save REGNO. */
2548 c6x_save_reg (unsigned int regno)
2550 return ((df_regs_ever_live_p (regno)
2551 && !call_used_regs[regno]
2552 && !fixed_regs[regno])
2553 || (regno == RETURN_ADDR_REGNO
2554 && (df_regs_ever_live_p (regno)
2555 || !current_function_is_leaf))
2556 || (regno == PIC_OFFSET_TABLE_REGNUM && must_reload_pic_reg_p ()));
2559 /* Examine the number of regs NREGS we've determined we must save.
2560 Return true if we should use __c6xabi_push_rts/__c6xabi_pop_rts for
2561 prologue and epilogue. */
2564 use_push_rts_p (int nregs)
2566 if (TARGET_INSNS_64PLUS && optimize_function_for_size_p (cfun)
2567 && !cfun->machine->contains_sibcall
2568 && !cfun->returns_struct
2569 && !TARGET_LONG_CALLS
2570 && nregs >= 6 && !frame_pointer_needed)
2575 /* Return number of saved general prupose registers. */
2578 c6x_nsaved_regs (void)
2583 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2584 if (c6x_save_reg (regno))
2589 /* The safe debug order mandated by the ABI. */
2590 static unsigned reg_save_order[] =
2592 REG_A10, REG_A11, REG_A12, REG_A13,
2594 REG_B10, REG_B11, REG_B12, REG_B13,
2598 #define N_SAVE_ORDER (sizeof reg_save_order / sizeof *reg_save_order)
2600 /* Compute the layout of the stack frame and store it in FRAME. */
2603 c6x_compute_frame_layout (struct c6x_frame *frame)
2605 HOST_WIDE_INT size = get_frame_size ();
2606 HOST_WIDE_INT offset;
2609 /* We use the four bytes which are technically inside the caller's frame,
2610 usually to save the frame pointer. */
2612 frame->padding0 = 0;
2613 nregs = c6x_nsaved_regs ();
2614 frame->push_rts = false;
2615 frame->b3_offset = 0;
2616 if (use_push_rts_p (nregs))
2618 frame->push_rts = true;
2619 frame->b3_offset = (TARGET_BIG_ENDIAN ? -12 : -13) * 4;
2622 else if (c6x_save_reg (REG_B3))
2625 for (idx = N_SAVE_ORDER - 1; reg_save_order[idx] != REG_B3; idx--)
2627 if (c6x_save_reg (reg_save_order[idx]))
2628 frame->b3_offset -= 4;
2631 frame->nregs = nregs;
2633 if (size == 0 && nregs == 0)
2635 frame->padding0 = 4;
2636 frame->padding1 = frame->padding2 = 0;
2637 frame->frame_pointer_offset = frame->to_allocate = 0;
2638 frame->outgoing_arguments_size = 0;
2642 if (!frame->push_rts)
2643 offset += frame->nregs * 4;
2645 if (offset == 0 && size == 0 && crtl->outgoing_args_size == 0
2646 && !current_function_is_leaf)
2647 /* Don't use the bottom of the caller's frame if we have no
2648 allocation of our own and call other functions. */
2649 frame->padding0 = frame->padding1 = 4;
2650 else if (offset & 4)
2651 frame->padding1 = 4;
2653 frame->padding1 = 0;
2655 offset += frame->padding0 + frame->padding1;
2656 frame->frame_pointer_offset = offset;
2659 frame->outgoing_arguments_size = crtl->outgoing_args_size;
2660 offset += frame->outgoing_arguments_size;
2662 if ((offset & 4) == 0)
2663 frame->padding2 = 8;
2665 frame->padding2 = 4;
2666 frame->to_allocate = offset + frame->padding2;
2669 /* Return the offset between two registers, one to be eliminated, and the other
2670 its replacement, at the start of a routine. */
2673 c6x_initial_elimination_offset (int from, int to)
2675 struct c6x_frame frame;
2676 c6x_compute_frame_layout (&frame);
2678 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2680 else if (from == FRAME_POINTER_REGNUM
2681 && to == HARD_FRAME_POINTER_REGNUM)
2682 return -frame.frame_pointer_offset;
2685 gcc_assert (to == STACK_POINTER_REGNUM);
2687 if (from == ARG_POINTER_REGNUM)
2688 return frame.to_allocate + (frame.push_rts ? 56 : 0);
2690 gcc_assert (from == FRAME_POINTER_REGNUM);
2691 return frame.to_allocate - frame.frame_pointer_offset;
2695 /* Given FROM and TO register numbers, say whether this elimination is
2696 allowed. Frame pointer elimination is automatically handled. */
2699 c6x_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2701 if (to == STACK_POINTER_REGNUM)
2702 return !frame_pointer_needed;
2706 /* Emit insns to increment the stack pointer by OFFSET. If
2707 FRAME_RELATED_P, set the RTX_FRAME_RELATED_P flag on the insns.
2708 Does nothing if the offset is zero. */
2711 emit_add_sp_const (HOST_WIDE_INT offset, bool frame_related_p)
2713 rtx to_add = GEN_INT (offset);
2714 rtx orig_to_add = to_add;
2720 if (offset < -32768 || offset > 32767)
2722 rtx reg = gen_rtx_REG (SImode, REG_A0);
2723 rtx low = GEN_INT (trunc_int_for_mode (offset, HImode));
2725 insn = emit_insn (gen_movsi_high (reg, low));
2726 if (frame_related_p)
2727 RTX_FRAME_RELATED_P (insn) = 1;
2728 insn = emit_insn (gen_movsi_lo_sum (reg, reg, to_add));
2729 if (frame_related_p)
2730 RTX_FRAME_RELATED_P (insn) = 1;
2733 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2735 if (frame_related_p)
2738 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
2739 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2740 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2743 RTX_FRAME_RELATED_P (insn) = 1;
2747 /* Prologue and epilogue. */
2749 c6x_expand_prologue (void)
2751 struct c6x_frame frame;
2754 HOST_WIDE_INT initial_offset, off, added_already;
2756 c6x_compute_frame_layout (&frame);
2758 if (flag_stack_usage_info)
2759 current_function_static_stack_size = frame.to_allocate;
2761 initial_offset = -frame.to_allocate;
2764 emit_insn (gen_push_rts ());
2765 nsaved = frame.nregs;
2768 /* If the offsets would be too large for the memory references we will
2769 create to save registers, do the stack allocation in two parts.
2770 Ensure by subtracting 8 that we don't store to the word pointed to
2771 by the stack pointer. */
2772 if (initial_offset < -32768)
2773 initial_offset = -frame.frame_pointer_offset - 8;
2775 if (frame.to_allocate > 0)
2776 gcc_assert (initial_offset != 0);
2778 off = -initial_offset + 4 - frame.padding0;
2780 mem = gen_frame_mem (Pmode, stack_pointer_rtx);
2783 if (frame_pointer_needed)
2785 rtx fp_reg = gen_rtx_REG (SImode, REG_A15);
2786 /* We go through some contortions here to both follow the ABI's
2787 recommendation that FP == incoming SP, and to avoid writing or
2788 reading the word pointed to by the stack pointer. */
2789 rtx addr = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx,
2790 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2792 insn = emit_move_insn (gen_frame_mem (Pmode, addr), fp_reg);
2793 RTX_FRAME_RELATED_P (insn) = 1;
2795 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, stack_pointer_rtx,
2797 RTX_FRAME_RELATED_P (insn) = 1;
2802 emit_add_sp_const (initial_offset - added_already, true);
2804 if (nsaved < frame.nregs)
2808 for (i = 0; i < N_SAVE_ORDER; i++)
2810 int idx = N_SAVE_ORDER - i - 1;
2811 unsigned regno = reg_save_order[idx];
2813 enum machine_mode save_mode = SImode;
2815 if (regno == REG_A15 && frame_pointer_needed)
2816 /* Already saved. */
2818 if (!c6x_save_reg (regno))
2821 if (TARGET_STDW && (off & 4) == 0 && off <= 256
2823 && i + 1 < N_SAVE_ORDER
2824 && reg_save_order[idx - 1] == regno - 1
2825 && c6x_save_reg (regno - 1))
2831 reg = gen_rtx_REG (save_mode, regno);
2832 off -= GET_MODE_SIZE (save_mode);
2834 insn = emit_move_insn (adjust_address (mem, save_mode, off),
2836 RTX_FRAME_RELATED_P (insn) = 1;
2838 nsaved += HARD_REGNO_NREGS (regno, save_mode);
2841 gcc_assert (nsaved == frame.nregs);
2842 emit_add_sp_const (-frame.to_allocate - initial_offset, true);
2843 if (must_reload_pic_reg_p ())
2845 if (dsbt_decl == NULL)
2849 t = build_index_type (integer_one_node);
2850 t = build_array_type (integer_type_node, t);
2851 t = build_decl (BUILTINS_LOCATION, VAR_DECL,
2852 get_identifier ("__c6xabi_DSBT_BASE"), t);
2853 DECL_ARTIFICIAL (t) = 1;
2854 DECL_IGNORED_P (t) = 1;
2855 DECL_EXTERNAL (t) = 1;
2856 TREE_STATIC (t) = 1;
2857 TREE_PUBLIC (t) = 1;
2862 emit_insn (gen_setup_dsbt (pic_offset_table_rtx,
2863 XEXP (DECL_RTL (dsbt_decl), 0)));
2868 c6x_expand_epilogue (bool sibcall)
2871 struct c6x_frame frame;
2876 c6x_compute_frame_layout (&frame);
2878 mem = gen_frame_mem (Pmode, stack_pointer_rtx);
2880 /* Insert a dummy set/use of the stack pointer. This creates a
2881 scheduler barrier between the prologue saves and epilogue restores. */
2882 emit_insn (gen_epilogue_barrier (stack_pointer_rtx, stack_pointer_rtx));
2884 /* If the offsets would be too large for the memory references we will
2885 create to restore registers, do a preliminary stack adjustment here. */
2886 off = frame.to_allocate - frame.frame_pointer_offset + frame.padding1;
2889 nsaved = frame.nregs;
2893 if (frame.to_allocate > 32768)
2895 /* Don't add the entire offset so that we leave an unused word
2896 above the stack pointer. */
2897 emit_add_sp_const ((off - 16) & ~7, false);
2901 for (i = 0; i < N_SAVE_ORDER; i++)
2903 unsigned regno = reg_save_order[i];
2905 enum machine_mode save_mode = SImode;
2907 if (!c6x_save_reg (regno))
2909 if (regno == REG_A15 && frame_pointer_needed)
2912 if (TARGET_STDW && (off & 4) == 0 && off < 256
2914 && i + 1 < N_SAVE_ORDER
2915 && reg_save_order[i + 1] == regno + 1
2916 && c6x_save_reg (regno + 1))
2921 reg = gen_rtx_REG (save_mode, regno);
2923 emit_move_insn (reg, adjust_address (mem, save_mode, off));
2925 off += GET_MODE_SIZE (save_mode);
2926 nsaved += HARD_REGNO_NREGS (regno, save_mode);
2929 if (!frame_pointer_needed)
2930 emit_add_sp_const (off + frame.padding0 - 4, false);
2933 rtx fp_reg = gen_rtx_REG (SImode, REG_A15);
2934 rtx addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
2935 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2937 emit_insn (gen_addsi3 (stack_pointer_rtx, hard_frame_pointer_rtx,
2939 emit_move_insn (fp_reg, gen_frame_mem (Pmode, addr));
2942 gcc_assert (nsaved == frame.nregs);
2946 emit_jump_insn (gen_pop_rts ());
2948 emit_jump_insn (gen_return_internal (gen_rtx_REG (SImode,
2949 RETURN_ADDR_REGNO)));
2953 /* Return the value of the return address for the frame COUNT steps up
2954 from the current frame, after the prologue.
2955 We punt for everything but the current frame by returning const0_rtx. */
2958 c6x_return_addr_rtx (int count)
2963 return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNO);
2966 /* Return true iff TYPE is one of the shadow types. */
2968 shadow_type_p (enum attr_type type)
2970 return (type == TYPE_SHADOW || type == TYPE_LOAD_SHADOW
2971 || type == TYPE_MULT_SHADOW);
2974 /* Return true iff INSN is a shadow pattern. */
2978 if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
2980 return shadow_type_p (get_attr_type (insn));
2983 /* Return true iff INSN is a shadow or blockage pattern. */
2985 shadow_or_blockage_p (rtx insn)
2987 enum attr_type type;
2988 if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
2990 type = get_attr_type (insn);
2991 return shadow_type_p (type) || type == TYPE_BLOCKAGE;
2994 /* Translate UNITS into a bitmask of units we can reserve for this
2997 get_reservation_flags (enum attr_units units)
3003 return RESERVATION_FLAG_D;
3005 return RESERVATION_FLAG_L;
3007 return RESERVATION_FLAG_S;
3009 return RESERVATION_FLAG_M;
3011 return RESERVATION_FLAG_LS;
3013 return RESERVATION_FLAG_DL;
3015 return RESERVATION_FLAG_DS;
3017 return RESERVATION_FLAG_DLS;
3023 /* Compute the side of the machine used by INSN, which reserves UNITS.
3024 This must match the reservations in the scheduling description. */
3026 get_insn_side (rtx insn, enum attr_units units)
3028 if (units == UNITS_D_ADDR)
3029 return (get_attr_addr_regfile (insn) == ADDR_REGFILE_A ? 0 : 1);
3032 enum attr_dest_regfile rf = get_attr_dest_regfile (insn);
3033 if (rf == DEST_REGFILE_ANY)
3034 return get_attr_type (insn) == TYPE_BRANCH ? 0 : 1;
3036 return rf == DEST_REGFILE_A ? 0 : 1;
3040 /* After scheduling, walk the insns between HEAD and END and assign unit
3043 assign_reservations (rtx head, rtx end)
3046 for (insn = head; insn != NEXT_INSN (end); insn = NEXT_INSN (insn))
3048 unsigned int sched_mask, reserved;
3052 int rsrv_count[2][4];
3055 if (GET_MODE (insn) != TImode)
3060 /* Find the last insn in the packet. It has a state recorded for it,
3061 which we can use to determine the units we should be using. */
3063 (within != NEXT_INSN (end)
3064 && (within == insn || GET_MODE (within) != TImode));
3065 within = NEXT_INSN (within))
3068 if (!NONDEBUG_INSN_P (within))
3070 icode = recog_memoized (within);
3073 if (shadow_p (within))
3075 if (INSN_INFO_ENTRY (INSN_UID (within)).reservation != 0)
3076 reserved |= 1 << INSN_INFO_ENTRY (INSN_UID (within)).reservation;
3079 if (last == NULL_RTX)
3082 sched_mask = INSN_INFO_ENTRY (INSN_UID (last)).unit_mask;
3083 sched_mask &= ~reserved;
3085 memset (rsrv_count, 0, sizeof rsrv_count);
3086 rsrv[0] = rsrv[1] = ~0;
3087 for (i = 0; i < 8; i++)
3091 unsigned unit_bit = 1 << (unit + side * UNIT_QID_SIDE_OFFSET);
3092 /* Clear the bits which we expect to reserve in the following loop,
3093 leaving the ones set which aren't present in the scheduler's
3094 state and shouldn't be reserved. */
3095 if (sched_mask & unit_bit)
3096 rsrv[i / 4] &= ~(1 << unit);
3099 /* Walk through the insns that occur in the same cycle. We use multiple
3100 passes to assign units, assigning for insns with the most specific
3101 requirements first. */
3102 for (pass = 0; pass < 4; pass++)
3104 (within != NEXT_INSN (end)
3105 && (within == insn || GET_MODE (within) != TImode));
3106 within = NEXT_INSN (within))
3108 int uid = INSN_UID (within);
3109 int this_rsrv, side;
3111 enum attr_units units;
3112 enum attr_type type;
3115 if (!NONDEBUG_INSN_P (within))
3117 icode = recog_memoized (within);
3120 if (INSN_INFO_ENTRY (uid).reservation != 0)
3122 units = get_attr_units (within);
3123 type = get_attr_type (within);
3124 this_rsrv = get_reservation_flags (units);
3127 side = get_insn_side (within, units);
3129 /* Certain floating point instructions are treated specially. If
3130 an insn can choose between units it can reserve, and its
3131 reservation spans more than one cycle, the reservation contains
3132 special markers in the first cycle to help us reconstruct what
3133 the automaton chose. */
3134 if ((type == TYPE_ADDDP || type == TYPE_FP4)
3135 && units == UNITS_LS)
3137 int test1_code = ((type == TYPE_FP4 ? UNIT_QID_FPL1 : UNIT_QID_ADDDPL1)
3138 + side * UNIT_QID_SIDE_OFFSET);
3139 int test2_code = ((type == TYPE_FP4 ? UNIT_QID_FPS1 : UNIT_QID_ADDDPS1)
3140 + side * UNIT_QID_SIDE_OFFSET);
3141 if ((sched_mask & (1 << test1_code)) != 0)
3143 this_rsrv = RESERVATION_FLAG_L;
3144 sched_mask &= ~(1 << test1_code);
3146 else if ((sched_mask & (1 << test2_code)) != 0)
3148 this_rsrv = RESERVATION_FLAG_S;
3149 sched_mask &= ~(1 << test2_code);
3153 if ((this_rsrv & (this_rsrv - 1)) == 0)
3155 int t = exact_log2 (this_rsrv) + side * UNIT_QID_SIDE_OFFSET;
3156 rsrv[side] |= this_rsrv;
3157 INSN_INFO_ENTRY (uid).reservation = t;
3163 for (j = 0; j < 4; j++)
3164 if (this_rsrv & (1 << j))
3165 rsrv_count[side][j]++;
3168 if ((pass == 2 && this_rsrv != RESERVATION_FLAG_DLS)
3169 || (pass == 3 && this_rsrv == RESERVATION_FLAG_DLS))
3171 int best = -1, best_cost = INT_MAX;
3172 for (j = 0; j < 4; j++)
3173 if ((this_rsrv & (1 << j))
3174 && !(rsrv[side] & (1 << j))
3175 && rsrv_count[side][j] < best_cost)
3177 best_cost = rsrv_count[side][j];
3180 gcc_assert (best != -1);
3181 rsrv[side] |= 1 << best;
3182 for (j = 0; j < 4; j++)
3183 if ((this_rsrv & (1 << j)) && j != best)
3184 rsrv_count[side][j]--;
3186 INSN_INFO_ENTRY (uid).reservation
3187 = best + side * UNIT_QID_SIDE_OFFSET;
3193 /* Return a factor by which to weight unit imbalances for a reservation
3196 unit_req_factor (enum unitreqs r)
3218 /* Examine INSN, and store in REQ1/SIDE1 and REQ2/SIDE2 the unit
3219 requirements. Returns zero if INSN can't be handled, otherwise
3220 either one or two to show how many of the two pairs are in use.
3221 REQ1 is always used, it holds what is normally thought of as the
3222 instructions reservation, e.g. UNIT_REQ_DL. REQ2 is used to either
3223 describe a cross path, or for loads/stores, the T unit. */
3225 get_unit_reqs (rtx insn, int *req1, int *side1, int *req2, int *side2)
3227 enum attr_units units;
3228 enum attr_cross cross;
3231 if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
3233 units = get_attr_units (insn);
3234 if (units == UNITS_UNKNOWN)
3236 side = get_insn_side (insn, units);
3237 cross = get_attr_cross (insn);
3239 req = (units == UNITS_D ? UNIT_REQ_D
3240 : units == UNITS_D_ADDR ? UNIT_REQ_D
3241 : units == UNITS_DL ? UNIT_REQ_DL
3242 : units == UNITS_DS ? UNIT_REQ_DS
3243 : units == UNITS_L ? UNIT_REQ_L
3244 : units == UNITS_LS ? UNIT_REQ_LS
3245 : units == UNITS_S ? UNIT_REQ_S
3246 : units == UNITS_M ? UNIT_REQ_M
3247 : units == UNITS_DLS ? UNIT_REQ_DLS
3249 gcc_assert (req != -1);
3252 if (units == UNITS_D_ADDR)
3255 *side2 = side ^ (cross == CROSS_Y ? 1 : 0);
3258 else if (cross == CROSS_Y)
3267 /* Walk the insns between and including HEAD and TAIL, and mark the
3268 resource requirements in the unit_reqs table. */
3270 count_unit_reqs (unit_req_table reqs, rtx head, rtx tail)
3274 memset (reqs, 0, sizeof (unit_req_table));
3276 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
3278 int side1, side2, req1, req2;
3280 switch (get_unit_reqs (insn, &req1, &side1, &req2, &side2))
3283 reqs[side2][req2]++;
3286 reqs[side1][req1]++;
3292 /* Update the table REQS by merging more specific unit reservations into
3293 more general ones, i.e. counting (for example) UNIT_REQ_D also in
3294 UNIT_REQ_DL, DS, and DLS. */
3296 merge_unit_reqs (unit_req_table reqs)
3299 for (side = 0; side < 2; side++)
3301 int d = reqs[side][UNIT_REQ_D];
3302 int l = reqs[side][UNIT_REQ_L];
3303 int s = reqs[side][UNIT_REQ_S];
3304 int dl = reqs[side][UNIT_REQ_DL];
3305 int ls = reqs[side][UNIT_REQ_LS];
3306 int ds = reqs[side][UNIT_REQ_DS];
3308 reqs[side][UNIT_REQ_DL] += d;
3309 reqs[side][UNIT_REQ_DL] += l;
3310 reqs[side][UNIT_REQ_DS] += d;
3311 reqs[side][UNIT_REQ_DS] += s;
3312 reqs[side][UNIT_REQ_LS] += l;
3313 reqs[side][UNIT_REQ_LS] += s;
3314 reqs[side][UNIT_REQ_DLS] += ds + dl + ls + d + l + s;
3318 /* Examine the table REQS and return a measure of unit imbalance by comparing
3319 the two sides of the machine. If, for example, D1 is used twice and D2
3320 used not at all, the return value should be 1 in the absence of other
3323 unit_req_imbalance (unit_req_table reqs)
3328 for (i = 0; i < UNIT_REQ_MAX; i++)
3330 int factor = unit_req_factor ((enum unitreqs) i);
3331 int diff = abs (reqs[0][i] - reqs[1][i]);
3332 val += (diff + factor - 1) / factor / 2;
3337 /* Return the resource-constrained minimum iteration interval given the
3338 data in the REQS table. This must have been processed with
3339 merge_unit_reqs already. */
3341 res_mii (unit_req_table reqs)
3345 for (side = 0; side < 2; side++)
3346 for (req = 0; req < UNIT_REQ_MAX; req++)
3348 int factor = unit_req_factor ((enum unitreqs) req);
3349 worst = MAX ((reqs[side][UNIT_REQ_D] + factor - 1) / factor, worst);
3355 /* Examine INSN, and store in PMASK1 and PMASK2 bitmasks that represent
3356 the operands that are involved in the (up to) two reservations, as
3357 found by get_unit_reqs. Return true if we did this successfully, false
3358 if we couldn't identify what to do with INSN. */
3360 get_unit_operand_masks (rtx insn, unsigned int *pmask1, unsigned int *pmask2)
3362 enum attr_op_pattern op_pat;
3364 if (recog_memoized (insn) < 0)
3366 if (GET_CODE (PATTERN (insn)) == COND_EXEC)
3368 extract_insn (insn);
3369 op_pat = get_attr_op_pattern (insn);
3370 if (op_pat == OP_PATTERN_DT)
3372 gcc_assert (recog_data.n_operands == 2);
3377 else if (op_pat == OP_PATTERN_TD)
3379 gcc_assert (recog_data.n_operands == 2);
3384 else if (op_pat == OP_PATTERN_SXS)
3386 gcc_assert (recog_data.n_operands == 3);
3387 *pmask1 = (1 << 0) | (1 << 2);
3391 else if (op_pat == OP_PATTERN_SX)
3393 gcc_assert (recog_data.n_operands == 2);
3398 else if (op_pat == OP_PATTERN_SSX)
3400 gcc_assert (recog_data.n_operands == 3);
3401 *pmask1 = (1 << 0) | (1 << 1);
3408 /* Try to replace a register in INSN, which has corresponding rename info
3409 from regrename_analyze in INFO. OP_MASK and ORIG_SIDE provide information
3410 about the operands that must be renamed and the side they are on.
3411 REQS is the table of unit reservations in the loop between HEAD and TAIL.
3412 We recompute this information locally after our transformation, and keep
3413 it only if we managed to improve the balance. */
3415 try_rename_operands (rtx head, rtx tail, unit_req_table reqs, rtx insn,
3416 insn_rr_info *info, unsigned int op_mask, int orig_side)
3418 enum reg_class super_class = orig_side == 0 ? B_REGS : A_REGS;
3419 HARD_REG_SET unavailable;
3420 du_head_p this_head;
3421 struct du_chain *chain;
3424 int best_reg, old_reg;
3425 VEC (du_head_p, heap) *involved_chains = NULL;
3426 unit_req_table new_reqs;
3428 for (i = 0, tmp_mask = op_mask; tmp_mask; i++)
3431 if ((tmp_mask & (1 << i)) == 0)
3433 if (info->op_info[i].n_chains != 1)
3435 op_chain = regrename_chain_from_id (info->op_info[i].heads[0]->id);
3436 VEC_safe_push (du_head_p, heap, involved_chains, op_chain);
3437 tmp_mask &= ~(1 << i);
3440 if (VEC_length (du_head_p, involved_chains) > 1)
3443 this_head = VEC_index (du_head_p, involved_chains, 0);
3444 if (this_head->cannot_rename)
3447 for (chain = this_head->first; chain; chain = chain->next_use)
3449 unsigned int mask1, mask2, mask_changed;
3450 int count, side1, side2, req1, req2;
3451 insn_rr_info *this_rr = VEC_index (insn_rr_info, insn_rr,
3452 INSN_UID (chain->insn));
3454 count = get_unit_reqs (chain->insn, &req1, &side1, &req2, &side2);
3459 if (!get_unit_operand_masks (chain->insn, &mask1, &mask2))
3462 extract_insn (chain->insn);
3465 for (i = 0; i < recog_data.n_operands; i++)
3468 int n_this_op = this_rr->op_info[i].n_chains;
3469 for (j = 0; j < n_this_op; j++)
3471 du_head_p other = this_rr->op_info[i].heads[j];
3472 if (regrename_chain_from_id (other->id) == this_head)
3480 mask_changed |= 1 << i;
3482 gcc_assert (mask_changed != 0);
3483 if (mask_changed != mask1 && mask_changed != mask2)
3487 /* If we get here, we can do the renaming. */
3488 COMPL_HARD_REG_SET (unavailable, reg_class_contents[(int) super_class]);
3490 old_reg = this_head->regno;
3491 best_reg = find_best_rename_reg (this_head, super_class, &unavailable, old_reg);
3493 regrename_do_replace (this_head, best_reg);
3495 count_unit_reqs (new_reqs, head, PREV_INSN (tail));
3496 merge_unit_reqs (new_reqs);
3499 fprintf (dump_file, "reshuffle for insn %d, op_mask %x, "
3500 "original side %d, new reg %d\n",
3501 INSN_UID (insn), op_mask, orig_side, best_reg);
3502 fprintf (dump_file, " imbalance %d -> %d\n",
3503 unit_req_imbalance (reqs), unit_req_imbalance (new_reqs));
3505 if (unit_req_imbalance (new_reqs) > unit_req_imbalance (reqs))
3506 regrename_do_replace (this_head, old_reg);
3508 memcpy (reqs, new_reqs, sizeof (unit_req_table));
3511 VEC_free (du_head_p, heap, involved_chains);
3514 /* Find insns in LOOP which would, if shifted to the other side
3515 of the machine, reduce an imbalance in the unit reservations. */
3517 reshuffle_units (basic_block loop)
3519 rtx head = BB_HEAD (loop);
3520 rtx tail = BB_END (loop);
3522 unit_req_table reqs;
3527 count_unit_reqs (reqs, head, PREV_INSN (tail));
3528 merge_unit_reqs (reqs);
3530 regrename_init (true);
3532 bitmap_initialize (&bbs, &bitmap_default_obstack);
3534 FOR_EACH_EDGE (e, ei, loop->preds)
3535 bitmap_set_bit (&bbs, e->src->index);
3537 bitmap_set_bit (&bbs, loop->index);
3538 regrename_analyze (&bbs);
3540 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
3542 enum attr_units units;
3543 int count, side1, side2, req1, req2;
3544 unsigned int mask1, mask2;
3547 if (!NONDEBUG_INSN_P (insn))
3550 count = get_unit_reqs (insn, &req1, &side1, &req2, &side2);
3555 if (!get_unit_operand_masks (insn, &mask1, &mask2))
3558 info = VEC_index (insn_rr_info, insn_rr, INSN_UID (insn));
3559 if (info->op_info == NULL)
3562 if (reqs[side1][req1] > 1
3563 && reqs[side1][req1] > 2 * reqs[side1 ^ 1][req1])
3565 try_rename_operands (head, tail, reqs, insn, info, mask1, side1);
3568 units = get_attr_units (insn);
3569 if (units == UNITS_D_ADDR)
3571 gcc_assert (count == 2);
3572 if (reqs[side2][req2] > 1
3573 && reqs[side2][req2] > 2 * reqs[side2 ^ 1][req2])
3575 try_rename_operands (head, tail, reqs, insn, info, mask2, side2);
3579 regrename_finish ();
3582 /* Backend scheduling state. */
3583 typedef struct c6x_sched_context
3585 /* The current scheduler clock, saved in the sched_reorder hook. */
3586 int curr_sched_clock;
3588 /* Number of insns issued so far in this cycle. */
3589 int issued_this_cycle;
3591 /* We record the time at which each jump occurs in JUMP_CYCLES. The
3592 theoretical maximum for number of jumps in flight is 12: 2 every
3593 cycle, with a latency of 6 cycles each. This is a circular
3594 buffer; JUMP_CYCLE_INDEX is the pointer to the start. Earlier
3595 jumps have a higher index. This array should be accessed through
3596 the jump_cycle function. */
3597 int jump_cycles[12];
3598 int jump_cycle_index;
3600 /* In parallel with jump_cycles, this array records the opposite of
3601 the condition used in each pending jump. This is used to
3602 predicate insns that are scheduled in the jump's delay slots. If
3603 this is NULL_RTX no such predication happens. */
3606 /* Similar to the jump_cycles mechanism, but here we take into
3607 account all insns with delay slots, to avoid scheduling asms into
3609 int delays_finished_at;
3611 /* The following variable value is the last issued insn. */
3612 rtx last_scheduled_insn;
3613 /* The last issued insn that isn't a shadow of another. */
3614 rtx last_scheduled_iter0;
3616 /* The following variable value is DFA state before issuing the
3617 first insn in the current clock cycle. We do not use this member
3618 of the structure directly; we copy the data in and out of
3619 prev_cycle_state. */
3620 state_t prev_cycle_state_ctx;
3622 int reg_n_accesses[FIRST_PSEUDO_REGISTER];
3623 int reg_n_xaccesses[FIRST_PSEUDO_REGISTER];
3624 int reg_set_in_cycle[FIRST_PSEUDO_REGISTER];
3626 int tmp_reg_n_accesses[FIRST_PSEUDO_REGISTER];
3627 int tmp_reg_n_xaccesses[FIRST_PSEUDO_REGISTER];
3628 } *c6x_sched_context_t;
3630 /* The current scheduling state. */
3631 static struct c6x_sched_context ss;
3633 /* The following variable value is DFA state before issuing the first insn
3634 in the current clock cycle. This is used in c6x_variable_issue for
3635 comparison with the state after issuing the last insn in a cycle. */
3636 static state_t prev_cycle_state;
3638 /* Set when we discover while processing an insn that it would lead to too
3639 many accesses of the same register. */
3640 static bool reg_access_stall;
3642 /* The highest insn uid after delayed insns were split, but before loop bodies
3643 were copied by the modulo scheduling code. */
3644 static int sploop_max_uid_iter0;
3646 /* Look up the jump cycle with index N. For an out-of-bounds N, we return 0,
3647 so the caller does not specifically have to test for it. */
3649 get_jump_cycle (int n)
3653 n += ss.jump_cycle_index;
3656 return ss.jump_cycles[n];
3659 /* Look up the jump condition with index N. */
3661 get_jump_cond (int n)
3665 n += ss.jump_cycle_index;
3668 return ss.jump_cond[n];
3671 /* Return the index of the first jump that occurs after CLOCK_VAR. If no jump
3672 has delay slots beyond CLOCK_VAR, return -1. */
3674 first_jump_index (int clock_var)
3680 int t = get_jump_cycle (n);
3689 /* Add a new entry in our scheduling state for a jump that occurs in CYCLE
3690 and has the opposite condition of COND. */
3692 record_jump (int cycle, rtx cond)
3694 if (ss.jump_cycle_index == 0)
3695 ss.jump_cycle_index = 11;
3697 ss.jump_cycle_index--;
3698 ss.jump_cycles[ss.jump_cycle_index] = cycle;
3699 ss.jump_cond[ss.jump_cycle_index] = cond;
3702 /* Set the clock cycle of INSN to CYCLE. Also clears the insn's entry in
3705 insn_set_clock (rtx insn, int cycle)
3707 unsigned uid = INSN_UID (insn);
3709 if (uid >= INSN_INFO_LENGTH)
3710 VEC_safe_grow (c6x_sched_insn_info, heap, insn_info, uid * 5 / 4 + 10);
3712 INSN_INFO_ENTRY (uid).clock = cycle;
3713 INSN_INFO_ENTRY (uid).new_cond = NULL;
3714 INSN_INFO_ENTRY (uid).reservation = 0;
3715 INSN_INFO_ENTRY (uid).ebb_start = false;
3718 /* Return the clock cycle we set for the insn with uid UID. */
3720 insn_uid_get_clock (int uid)
3722 return INSN_INFO_ENTRY (uid).clock;
3725 /* Return the clock cycle we set for INSN. */
3727 insn_get_clock (rtx insn)
3729 return insn_uid_get_clock (INSN_UID (insn));
3732 /* Examine INSN, and if it is a conditional jump of any kind, return
3733 the opposite of the condition in which it branches. Otherwise,
3736 condjump_opposite_condition (rtx insn)
3738 rtx pat = PATTERN (insn);
3739 int icode = INSN_CODE (insn);
3742 if (icode == CODE_FOR_br_true || icode == CODE_FOR_br_false)
3744 x = XEXP (SET_SRC (pat), 0);
3745 if (icode == CODE_FOR_br_false)
3748 if (GET_CODE (pat) == COND_EXEC)
3750 rtx t = COND_EXEC_CODE (pat);
3751 if ((GET_CODE (t) == PARALLEL
3752 && GET_CODE (XVECEXP (t, 0, 0)) == RETURN)
3753 || (GET_CODE (t) == UNSPEC && XINT (t, 1) == UNSPEC_REAL_JUMP)
3754 || (GET_CODE (t) == SET && SET_DEST (t) == pc_rtx))
3755 x = COND_EXEC_TEST (pat);
3760 enum rtx_code code = GET_CODE (x);
3761 x = gen_rtx_fmt_ee (code == EQ ? NE : EQ,
3762 GET_MODE (x), XEXP (x, 0),
3768 /* Return true iff COND1 and COND2 are exactly opposite conditions
3769 one of them NE and the other EQ. */
3771 conditions_opposite_p (rtx cond1, rtx cond2)
3773 return (rtx_equal_p (XEXP (cond1, 0), XEXP (cond2, 0))
3774 && rtx_equal_p (XEXP (cond1, 1), XEXP (cond2, 1))
3775 && GET_CODE (cond1) == reverse_condition (GET_CODE (cond2)));
3778 /* Return true if we can add a predicate COND to INSN, or if INSN
3779 already has that predicate. If DOIT is true, also perform the
3782 predicate_insn (rtx insn, rtx cond, bool doit)
3785 if (cond == NULL_RTX)
3791 if (get_attr_predicable (insn) == PREDICABLE_YES
3792 && GET_CODE (PATTERN (insn)) != COND_EXEC)
3796 rtx newpat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (insn));
3797 PATTERN (insn) = newpat;
3798 INSN_CODE (insn) = -1;
3802 if (GET_CODE (PATTERN (insn)) == COND_EXEC
3803 && rtx_equal_p (COND_EXEC_TEST (PATTERN (insn)), cond))
3805 icode = INSN_CODE (insn);
3806 if (icode == CODE_FOR_real_jump
3807 || icode == CODE_FOR_jump
3808 || icode == CODE_FOR_indirect_jump)
3810 rtx pat = PATTERN (insn);
3811 rtx dest = (icode == CODE_FOR_real_jump ? XVECEXP (pat, 0, 0)
3812 : icode == CODE_FOR_jump ? XEXP (SET_SRC (pat), 0)
3818 newpat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (insn));
3820 newpat = gen_br_true (cond, XEXP (cond, 0), dest);
3821 PATTERN (insn) = newpat;
3822 INSN_CODE (insn) = -1;
3826 if (INSN_CODE (insn) == CODE_FOR_br_true)
3828 rtx br_cond = XEXP (SET_SRC (PATTERN (insn)), 0);
3829 return rtx_equal_p (br_cond, cond);
3831 if (INSN_CODE (insn) == CODE_FOR_br_false)
3833 rtx br_cond = XEXP (SET_SRC (PATTERN (insn)), 0);
3834 return conditions_opposite_p (br_cond, cond);
3839 /* Initialize SC. Used by c6x_init_sched_context and c6x_sched_init. */
3841 init_sched_state (c6x_sched_context_t sc)
3843 sc->last_scheduled_insn = NULL_RTX;
3844 sc->last_scheduled_iter0 = NULL_RTX;
3845 sc->issued_this_cycle = 0;
3846 memset (sc->jump_cycles, 0, sizeof sc->jump_cycles);
3847 memset (sc->jump_cond, 0, sizeof sc->jump_cond);
3848 sc->jump_cycle_index = 0;
3849 sc->delays_finished_at = 0;
3850 sc->curr_sched_clock = 0;
3852 sc->prev_cycle_state_ctx = xmalloc (dfa_state_size);
3854 memset (sc->reg_n_accesses, 0, sizeof sc->reg_n_accesses);
3855 memset (sc->reg_n_xaccesses, 0, sizeof sc->reg_n_xaccesses);
3856 memset (sc->reg_set_in_cycle, 0, sizeof sc->reg_set_in_cycle);
3858 state_reset (sc->prev_cycle_state_ctx);
3861 /* Allocate store for new scheduling context. */
3863 c6x_alloc_sched_context (void)
3865 return xmalloc (sizeof (struct c6x_sched_context));
3868 /* If CLEAN_P is true then initializes _SC with clean data,
3869 and from the global context otherwise. */
3871 c6x_init_sched_context (void *_sc, bool clean_p)
3873 c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3877 init_sched_state (sc);
3882 sc->prev_cycle_state_ctx = xmalloc (dfa_state_size);
3883 memcpy (sc->prev_cycle_state_ctx, prev_cycle_state, dfa_state_size);
3887 /* Sets the global scheduling context to the one pointed to by _SC. */
3889 c6x_set_sched_context (void *_sc)
3891 c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3893 gcc_assert (sc != NULL);
3895 memcpy (prev_cycle_state, sc->prev_cycle_state_ctx, dfa_state_size);
3898 /* Clear data in _SC. */
3900 c6x_clear_sched_context (void *_sc)
3902 c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3903 gcc_assert (_sc != NULL);
3905 free (sc->prev_cycle_state_ctx);
3910 c6x_free_sched_context (void *_sc)
3915 /* Provide information about speculation capabilities, and set the
3916 DO_BACKTRACKING flag. */
3918 c6x_set_sched_flags (spec_info_t spec_info)
3920 unsigned int *flags = &(current_sched_info->flags);
3922 if (*flags & SCHED_EBB)
3924 *flags |= DO_BACKTRACKING | DO_PREDICATION;
3927 spec_info->mask = 0;
3930 /* Implement the TARGET_SCHED_ISSUE_RATE hook. */
3933 c6x_issue_rate (void)
3938 /* Used together with the collapse_ndfa option, this ensures that we reach a
3939 deterministic automaton state before trying to advance a cycle.
3940 With collapse_ndfa, genautomata creates advance cycle arcs only for
3941 such deterministic states. */
3944 c6x_sched_dfa_pre_cycle_insn (void)
3949 /* We're beginning a new block. Initialize data structures as necessary. */
3952 c6x_sched_init (FILE *dump ATTRIBUTE_UNUSED,
3953 int sched_verbose ATTRIBUTE_UNUSED,
3954 int max_ready ATTRIBUTE_UNUSED)
3956 if (prev_cycle_state == NULL)
3958 prev_cycle_state = xmalloc (dfa_state_size);
3960 init_sched_state (&ss);
3961 state_reset (prev_cycle_state);
3964 /* We are about to being issuing INSN. Return nonzero if we cannot
3965 issue it on given cycle CLOCK and return zero if we should not sort
3966 the ready queue on the next clock start.
3967 For C6X, we use this function just to copy the previous DFA state
3968 for comparison purposes. */
3971 c6x_dfa_new_cycle (FILE *dump ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3972 rtx insn ATTRIBUTE_UNUSED, int last_clock ATTRIBUTE_UNUSED,
3973 int clock ATTRIBUTE_UNUSED, int *sort_p ATTRIBUTE_UNUSED)
3975 if (clock != last_clock)
3976 memcpy (prev_cycle_state, curr_state, dfa_state_size);
3981 c6x_mark_regno_read (int regno, bool cross)
3983 int t = ++ss.tmp_reg_n_accesses[regno];
3986 reg_access_stall = true;
3990 int set_cycle = ss.reg_set_in_cycle[regno];
3991 /* This must be done in this way rather than by tweaking things in
3992 adjust_cost, since the stall occurs even for insns with opposite
3993 predicates, and the scheduler may not even see a dependency. */
3994 if (set_cycle > 0 && set_cycle == ss.curr_sched_clock)
3995 reg_access_stall = true;
3996 /* This doesn't quite do anything yet as we're only modeling one
3998 ++ss.tmp_reg_n_xaccesses[regno];
4002 /* Note that REG is read in the insn being examined. If CROSS, it
4003 means the access is through a cross path. Update the temporary reg
4004 access arrays, and set REG_ACCESS_STALL if the insn can't be issued
4005 in the current cycle. */
4008 c6x_mark_reg_read (rtx reg, bool cross)
4010 unsigned regno = REGNO (reg);
4011 unsigned nregs = hard_regno_nregs[regno][GET_MODE (reg)];
4014 c6x_mark_regno_read (regno + nregs, cross);
4017 /* Note that register REG is written in cycle CYCLES. */
4020 c6x_mark_reg_written (rtx reg, int cycles)
4022 unsigned regno = REGNO (reg);
4023 unsigned nregs = hard_regno_nregs[regno][GET_MODE (reg)];
4026 ss.reg_set_in_cycle[regno + nregs] = cycles;
4029 /* Update the register state information for an instruction whose
4030 body is X. Return true if the instruction has to be delayed until the
4034 c6x_registers_update (rtx insn)
4036 enum attr_cross cross;
4037 enum attr_dest_regfile destrf;
4041 if (!reload_completed || recog_memoized (insn) < 0)
4044 reg_access_stall = false;
4045 memcpy (ss.tmp_reg_n_accesses, ss.reg_n_accesses,
4046 sizeof ss.tmp_reg_n_accesses);
4047 memcpy (ss.tmp_reg_n_xaccesses, ss.reg_n_xaccesses,
4048 sizeof ss.tmp_reg_n_xaccesses);
4050 extract_insn (insn);
4052 cross = get_attr_cross (insn);
4053 destrf = get_attr_dest_regfile (insn);
4055 nops = recog_data.n_operands;
4057 if (GET_CODE (x) == COND_EXEC)
4059 c6x_mark_reg_read (XEXP (XEXP (x, 0), 0), false);
4063 for (i = 0; i < nops; i++)
4065 rtx op = recog_data.operand[i];
4066 if (recog_data.operand_type[i] == OP_OUT)
4070 bool this_cross = cross;
4071 if (destrf == DEST_REGFILE_A && A_REGNO_P (REGNO (op)))
4073 if (destrf == DEST_REGFILE_B && B_REGNO_P (REGNO (op)))
4075 c6x_mark_reg_read (op, this_cross);
4077 else if (MEM_P (op))
4080 switch (GET_CODE (op))
4089 c6x_mark_reg_read (op, false);
4094 gcc_assert (GET_CODE (op) == PLUS);
4097 c6x_mark_reg_read (XEXP (op, 0), false);
4098 if (REG_P (XEXP (op, 1)))
4099 c6x_mark_reg_read (XEXP (op, 1), false);
4104 c6x_mark_regno_read (REG_B14, false);
4110 else if (!CONSTANT_P (op) && strlen (recog_data.constraints[i]) > 0)
4113 return reg_access_stall;
4116 /* Helper function for the TARGET_SCHED_REORDER and
4117 TARGET_SCHED_REORDER2 hooks. If scheduling an insn would be unsafe
4118 in the current cycle, move it down in the ready list and return the
4119 number of non-unsafe insns. */
4122 c6x_sched_reorder_1 (rtx *ready, int *pn_ready, int clock_var)
4124 int n_ready = *pn_ready;
4125 rtx *e_ready = ready + n_ready;
4129 /* Keep track of conflicts due to a limit number of register accesses,
4130 and due to stalls incurred by too early accesses of registers using
4133 for (insnp = ready; insnp < e_ready; insnp++)
4136 int icode = recog_memoized (insn);
4137 bool is_asm = (icode < 0
4138 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
4139 || asm_noperands (PATTERN (insn)) >= 0));
4140 bool no_parallel = (is_asm || icode == CODE_FOR_sploop
4142 && get_attr_type (insn) == TYPE_ATOMIC));
4144 /* We delay asm insns until all delay slots are exhausted. We can't
4145 accurately tell how many cycles an asm takes, and the main scheduling
4146 code always assumes at least 1 cycle, which may be wrong. */
4148 && (ss.issued_this_cycle > 0 || clock_var < ss.delays_finished_at))
4149 || c6x_registers_update (insn)
4150 || (ss.issued_this_cycle > 0 && icode == CODE_FOR_sploop))
4152 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4157 else if (shadow_p (insn))
4159 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4164 /* Ensure that no other jump is scheduled in jump delay slots, since
4165 it would put the machine into the wrong state. Also, we must
4166 avoid scheduling insns that have a latency longer than the
4167 remaining jump delay slots, as the code at the jump destination
4168 won't be prepared for it.
4170 However, we can relax this condition somewhat. The rest of the
4171 scheduler will automatically avoid scheduling an insn on which
4172 the jump shadow depends so late that its side effect happens
4173 after the jump. This means that if we see an insn with a longer
4174 latency here, it can safely be scheduled if we can ensure that it
4175 has a predicate opposite of the previous jump: the side effect
4176 will happen in what we think of as the same basic block. In
4177 c6x_variable_issue, we will record the necessary predicate in
4178 new_conditions, and after scheduling is finished, we will modify
4181 Special care must be taken whenever there is more than one jump
4184 first_jump = first_jump_index (clock_var);
4185 if (first_jump != -1)
4187 int first_cycle = get_jump_cycle (first_jump);
4188 rtx first_cond = get_jump_cond (first_jump);
4189 int second_cycle = 0;
4192 second_cycle = get_jump_cycle (first_jump - 1);
4194 for (insnp = ready; insnp < e_ready; insnp++)
4197 int icode = recog_memoized (insn);
4198 bool is_asm = (icode < 0
4199 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
4200 || asm_noperands (PATTERN (insn)) >= 0));
4201 int this_cycles, rsrv_cycles;
4202 enum attr_type type;
4204 gcc_assert (!is_asm);
4207 this_cycles = get_attr_cycles (insn);
4208 rsrv_cycles = get_attr_reserve_cycles (insn);
4209 type = get_attr_type (insn);
4210 /* Treat branches specially; there is also a hazard if two jumps
4211 end at the same cycle. */
4212 if (type == TYPE_BRANCH || type == TYPE_CALL)
4214 if (clock_var + this_cycles <= first_cycle)
4216 if ((first_jump > 0 && clock_var + this_cycles > second_cycle)
4217 || clock_var + rsrv_cycles > first_cycle
4218 || !predicate_insn (insn, first_cond, false))
4220 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4231 /* Implement the TARGET_SCHED_REORDER hook. We save the current clock
4232 for later and clear the register access information for the new
4233 cycle. We also move asm statements out of the way if they would be
4234 scheduled in a delay slot. */
4237 c6x_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
4238 int sched_verbose ATTRIBUTE_UNUSED,
4239 rtx *ready ATTRIBUTE_UNUSED,
4240 int *pn_ready ATTRIBUTE_UNUSED, int clock_var)
4242 ss.curr_sched_clock = clock_var;
4243 ss.issued_this_cycle = 0;
4244 memset (ss.reg_n_accesses, 0, sizeof ss.reg_n_accesses);
4245 memset (ss.reg_n_xaccesses, 0, sizeof ss.reg_n_xaccesses);
4250 return c6x_sched_reorder_1 (ready, pn_ready, clock_var);
4253 /* Implement the TARGET_SCHED_REORDER2 hook. We use this to record the clock
4254 cycle for every insn. */
4257 c6x_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
4258 int sched_verbose ATTRIBUTE_UNUSED,
4259 rtx *ready ATTRIBUTE_UNUSED,
4260 int *pn_ready ATTRIBUTE_UNUSED, int clock_var)
4262 /* FIXME: the assembler rejects labels inside an execute packet.
4263 This can occur if prologue insns are scheduled in parallel with
4264 others, so we avoid this here. Also make sure that nothing is
4265 scheduled in parallel with a TYPE_ATOMIC insn or after a jump. */
4266 if (RTX_FRAME_RELATED_P (ss.last_scheduled_insn)
4267 || JUMP_P (ss.last_scheduled_insn)
4268 || (recog_memoized (ss.last_scheduled_insn) >= 0
4269 && get_attr_type (ss.last_scheduled_insn) == TYPE_ATOMIC))
4271 int n_ready = *pn_ready;
4272 rtx *e_ready = ready + n_ready;
4275 for (insnp = ready; insnp < e_ready; insnp++)
4278 if (!shadow_p (insn))
4280 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4289 return c6x_sched_reorder_1 (ready, pn_ready, clock_var);
4292 /* Subroutine of maybe_clobber_cond, called through note_stores. */
4295 clobber_cond_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data1)
4297 rtx *cond = (rtx *)data1;
4298 if (*cond != NULL_RTX && reg_overlap_mentioned_p (x, *cond))
4302 /* Examine INSN, and if it destroys the conditions have recorded for
4303 any of the jumps in flight, clear that condition so that we don't
4304 predicate any more insns. CLOCK_VAR helps us limit the search to
4305 only those jumps which are still in flight. */
4308 maybe_clobber_cond (rtx insn, int clock_var)
4311 idx = ss.jump_cycle_index;
4312 for (n = 0; n < 12; n++, idx++)
4319 cycle = ss.jump_cycles[idx];
4320 if (cycle <= clock_var)
4323 cond = ss.jump_cond[idx];
4324 if (cond == NULL_RTX)
4329 ss.jump_cond[idx] = NULL_RTX;
4333 note_stores (PATTERN (insn), clobber_cond_1, ss.jump_cond + idx);
4334 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
4335 if (REG_NOTE_KIND (link) == REG_INC)
4336 clobber_cond_1 (XEXP (link, 0), NULL_RTX, ss.jump_cond + idx);
4340 /* Implement the TARGET_SCHED_VARIABLE_ISSUE hook. We are about to
4341 issue INSN. Return the number of insns left on the ready queue
4342 that can be issued this cycle.
4343 We use this hook to record clock cycles and reservations for every insn. */
4346 c6x_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
4347 int sched_verbose ATTRIBUTE_UNUSED,
4348 rtx insn, int can_issue_more ATTRIBUTE_UNUSED)
4350 ss.last_scheduled_insn = insn;
4351 if (INSN_UID (insn) < sploop_max_uid_iter0 && !JUMP_P (insn))
4352 ss.last_scheduled_iter0 = insn;
4353 if (GET_CODE (PATTERN (insn)) != USE && GET_CODE (PATTERN (insn)) != CLOBBER)
4354 ss.issued_this_cycle++;
4357 state_t st_after = alloca (dfa_state_size);
4358 int curr_clock = ss.curr_sched_clock;
4359 int uid = INSN_UID (insn);
4360 int icode = recog_memoized (insn);
4362 int first, first_cycle;
4366 insn_set_clock (insn, curr_clock);
4367 INSN_INFO_ENTRY (uid).ebb_start
4368 = curr_clock == 0 && ss.issued_this_cycle == 1;
4370 first = first_jump_index (ss.curr_sched_clock);
4374 first_cond = NULL_RTX;
4378 first_cycle = get_jump_cycle (first);
4379 first_cond = get_jump_cond (first);
4382 && first_cycle > curr_clock
4383 && first_cond != NULL_RTX
4384 && (curr_clock + get_attr_cycles (insn) > first_cycle
4385 || get_attr_type (insn) == TYPE_BRANCH
4386 || get_attr_type (insn) == TYPE_CALL))
4387 INSN_INFO_ENTRY (uid).new_cond = first_cond;
4389 memcpy (st_after, curr_state, dfa_state_size);
4390 state_transition (st_after, const0_rtx);
4393 for (i = 0; i < 2 * UNIT_QID_SIDE_OFFSET; i++)
4394 if (cpu_unit_reservation_p (st_after, c6x_unit_codes[i])
4395 && !cpu_unit_reservation_p (prev_cycle_state, c6x_unit_codes[i]))
4397 INSN_INFO_ENTRY (uid).unit_mask = mask;
4399 maybe_clobber_cond (insn, curr_clock);
4405 c6x_registers_update (insn);
4406 memcpy (ss.reg_n_accesses, ss.tmp_reg_n_accesses,
4407 sizeof ss.reg_n_accesses);
4408 memcpy (ss.reg_n_xaccesses, ss.tmp_reg_n_accesses,
4409 sizeof ss.reg_n_xaccesses);
4411 cycles = get_attr_cycles (insn);
4412 if (ss.delays_finished_at < ss.curr_sched_clock + cycles)
4413 ss.delays_finished_at = ss.curr_sched_clock + cycles;
4414 if (get_attr_type (insn) == TYPE_BRANCH
4415 || get_attr_type (insn) == TYPE_CALL)
4417 rtx opposite = condjump_opposite_condition (insn);
4418 record_jump (ss.curr_sched_clock + cycles, opposite);
4421 /* Mark the cycles in which the destination registers are written.
4422 This is used for calculating stalls when using cross units. */
4423 extract_insn (insn);
4424 /* Cross-path stalls don't apply to results of load insns. */
4425 if (get_attr_type (insn) == TYPE_LOAD
4426 || get_attr_type (insn) == TYPE_LOADN
4427 || get_attr_type (insn) == TYPE_LOAD_SHADOW)
4429 for (i = 0; i < recog_data.n_operands; i++)
4431 rtx op = recog_data.operand[i];
4434 rtx addr = XEXP (op, 0);
4435 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4436 c6x_mark_reg_written (XEXP (addr, 0),
4437 insn_uid_get_clock (uid) + 1);
4439 if (recog_data.operand_type[i] != OP_IN
4442 c6x_mark_reg_written (op,
4443 insn_uid_get_clock (uid) + cycles);
4448 return can_issue_more;
4451 /* Implement the TARGET_SCHED_ADJUST_COST hook. We need special handling for
4452 anti- and output dependencies. */
4455 c6x_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4457 enum attr_type insn_type = TYPE_UNKNOWN, dep_insn_type = TYPE_UNKNOWN;
4458 int dep_insn_code_number, insn_code_number;
4459 int shadow_bonus = 0;
4461 dep_insn_code_number = recog_memoized (dep_insn);
4462 insn_code_number = recog_memoized (insn);
4464 if (dep_insn_code_number >= 0)
4465 dep_insn_type = get_attr_type (dep_insn);
4467 if (insn_code_number >= 0)
4468 insn_type = get_attr_type (insn);
4470 kind = REG_NOTE_KIND (link);
4473 /* If we have a dependency on a load, and it's not for the result of
4474 the load, it must be for an autoincrement. Reduce the cost in that
4476 if (dep_insn_type == TYPE_LOAD)
4478 rtx set = PATTERN (dep_insn);
4479 if (GET_CODE (set) == COND_EXEC)
4480 set = COND_EXEC_CODE (set);
4481 if (GET_CODE (set) == UNSPEC)
4485 gcc_assert (GET_CODE (set) == SET);
4486 if (!reg_overlap_mentioned_p (SET_DEST (set), PATTERN (insn)))
4492 /* A jump shadow needs to have its latency decreased by one. Conceptually,
4493 it occurs in between two cycles, but we schedule it at the end of the
4495 if (shadow_type_p (insn_type))
4498 /* Anti and output dependencies usually have zero cost, but we want
4499 to insert a stall after a jump, and after certain floating point
4500 insns that take more than one cycle to read their inputs. In the
4501 future, we should try to find a better algorithm for scheduling
4505 /* We can get anti-dependencies against shadow insns. Treat these
4506 like output dependencies, so that the insn is entirely finished
4507 before the branch takes place. */
4508 if (kind == REG_DEP_ANTI && insn_type == TYPE_SHADOW)
4509 kind = REG_DEP_OUTPUT;
4510 switch (dep_insn_type)
4516 if (get_attr_has_shadow (dep_insn) == HAS_SHADOW_Y)
4517 /* This is a real_jump/real_call insn. These don't have
4518 outputs, and ensuring the validity of scheduling things
4519 in the delay slot is the job of
4520 c6x_sched_reorder_1. */
4522 /* Unsplit calls can happen - e.g. for divide insns. */
4527 if (kind == REG_DEP_OUTPUT)
4528 return 5 - shadow_bonus;
4532 if (kind == REG_DEP_OUTPUT)
4533 return 4 - shadow_bonus;
4536 if (kind == REG_DEP_OUTPUT)
4537 return 2 - shadow_bonus;
4540 if (kind == REG_DEP_OUTPUT)
4541 return 2 - shadow_bonus;
4545 if (kind == REG_DEP_OUTPUT)
4546 return 7 - shadow_bonus;
4549 if (kind == REG_DEP_OUTPUT)
4550 return 5 - shadow_bonus;
4553 if (kind == REG_DEP_OUTPUT)
4554 return 9 - shadow_bonus;
4558 if (kind == REG_DEP_OUTPUT)
4559 return 10 - shadow_bonus;
4563 if (insn_type == TYPE_SPKERNEL)
4565 if (kind == REG_DEP_OUTPUT)
4566 return 1 - shadow_bonus;
4572 return cost - shadow_bonus;
4575 /* Create a SEQUENCE rtx to replace the instructions in SLOT, of which there
4576 are N_FILLED. REAL_FIRST identifies the slot if the insn that appears
4577 first in the original stream. */
4580 gen_one_bundle (rtx *slot, int n_filled, int real_first)
4586 bundle = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (n_filled, slot));
4587 bundle = make_insn_raw (bundle);
4588 BLOCK_FOR_INSN (bundle) = BLOCK_FOR_INSN (slot[0]);
4589 INSN_LOCATOR (bundle) = INSN_LOCATOR (slot[0]);
4590 PREV_INSN (bundle) = PREV_INSN (slot[real_first]);
4594 for (i = 0; i < n_filled; i++)
4598 PREV_INSN (insn) = t ? t : PREV_INSN (bundle);
4600 NEXT_INSN (t) = insn;
4603 INSN_LOCATOR (slot[i]) = INSN_LOCATOR (bundle);
4606 NEXT_INSN (bundle) = NEXT_INSN (PREV_INSN (bundle));
4607 NEXT_INSN (t) = NEXT_INSN (bundle);
4608 NEXT_INSN (PREV_INSN (bundle)) = bundle;
4609 PREV_INSN (NEXT_INSN (bundle)) = bundle;
4612 /* Move all parallel instructions into SEQUENCEs, so that no subsequent passes
4613 try to insert labels in the middle. */
4616 c6x_gen_bundles (void)
4619 rtx insn, next, last_call;
4624 /* The machine is eight insns wide. We can have up to six shadow
4625 insns, plus an extra slot for merging the jump shadow. */
4630 for (insn = BB_HEAD (bb);; insn = next)
4633 rtx delete_this = NULL_RTX;
4635 if (NONDEBUG_INSN_P (insn))
4637 /* Put calls at the start of the sequence. */
4643 memmove (&slot[1], &slot[0],
4644 n_filled * sizeof (slot[0]));
4646 if (!shadow_p (insn))
4648 PUT_MODE (insn, TImode);
4650 PUT_MODE (slot[1], VOIDmode);
4657 slot[n_filled++] = insn;
4661 next = NEXT_INSN (insn);
4662 while (next && insn != BB_END (bb)
4663 && !(NONDEBUG_INSN_P (next)
4664 && GET_CODE (PATTERN (next)) != USE
4665 && GET_CODE (PATTERN (next)) != CLOBBER))
4668 next = NEXT_INSN (insn);
4671 at_end = insn == BB_END (bb);
4672 if (delete_this == NULL_RTX
4673 && (at_end || (GET_MODE (next) == TImode
4674 && !(shadow_p (next) && CALL_P (next)))))
4677 gen_one_bundle (slot, n_filled, first_slot);
4686 /* Bundling, and emitting nops, can separate
4687 NOTE_INSN_CALL_ARG_LOCATION from the corresponding calls. Fix
4689 last_call = NULL_RTX;
4690 for (insn = get_insns (); insn; insn = next)
4692 next = NEXT_INSN (insn);
4694 || (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE
4695 && CALL_P (XVECEXP (PATTERN (insn), 0, 0))))
4697 if (!NOTE_P (insn) || NOTE_KIND (insn) != NOTE_INSN_CALL_ARG_LOCATION)
4699 if (NEXT_INSN (last_call) == insn)
4701 NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
4702 PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
4703 PREV_INSN (insn) = last_call;
4704 NEXT_INSN (insn) = NEXT_INSN (last_call);
4705 PREV_INSN (NEXT_INSN (insn)) = insn;
4706 NEXT_INSN (PREV_INSN (insn)) = insn;
4711 /* Emit a NOP instruction for CYCLES cycles after insn AFTER. Return it. */
4714 emit_nop_after (int cycles, rtx after)
4718 /* mpydp has 9 delay slots, and we may schedule a stall for a cross-path
4719 operation. We don't need the extra NOP since in this case, the hardware
4720 will automatically insert the required stall. */
4724 gcc_assert (cycles < 10);
4726 insn = emit_insn_after (gen_nop_count (GEN_INT (cycles)), after);
4727 PUT_MODE (insn, TImode);
4732 /* Determine whether INSN is a call that needs to have a return label
4736 returning_call_p (rtx insn)
4739 return (!SIBLING_CALL_P (insn)
4740 && get_attr_type (insn) != TYPE_CALLP
4741 && get_attr_type (insn) != TYPE_SHADOW);
4742 if (recog_memoized (insn) < 0)
4744 if (get_attr_type (insn) == TYPE_CALL)
4749 /* Determine whether INSN's pattern can be converted to use callp. */
4751 can_use_callp (rtx insn)
4753 int icode = recog_memoized (insn);
4754 if (!TARGET_INSNS_64PLUS
4756 || GET_CODE (PATTERN (insn)) == COND_EXEC)
4759 return ((icode == CODE_FOR_real_call
4760 || icode == CODE_FOR_call_internal
4761 || icode == CODE_FOR_call_value_internal)
4762 && get_attr_dest_regfile (insn) == DEST_REGFILE_ANY);
4765 /* Convert the pattern of INSN, which must be a CALL_INSN, into a callp. */
4767 convert_to_callp (rtx insn)
4770 extract_insn (insn);
4771 if (GET_CODE (PATTERN (insn)) == SET)
4773 rtx dest = recog_data.operand[0];
4774 lab = recog_data.operand[1];
4775 PATTERN (insn) = gen_callp_value (dest, lab);
4776 INSN_CODE (insn) = CODE_FOR_callp_value;
4780 lab = recog_data.operand[0];
4781 PATTERN (insn) = gen_callp (lab);
4782 INSN_CODE (insn) = CODE_FOR_callp;
4786 /* Scan forwards from INSN until we find the next insn that has mode TImode
4787 (indicating it starts a new cycle), and occurs in cycle CLOCK.
4788 Return it if we find such an insn, NULL_RTX otherwise. */
4790 find_next_cycle_insn (rtx insn, int clock)
4793 if (GET_MODE (t) == TImode)
4794 t = next_real_insn (t);
4795 while (t && GET_MODE (t) != TImode)
4796 t = next_real_insn (t);
4798 if (t && insn_get_clock (t) == clock)
4803 /* If COND_INSN has a COND_EXEC condition, wrap the same condition
4804 around PAT. Return PAT either unchanged or modified in this
4807 duplicate_cond (rtx pat, rtx cond_insn)
4809 rtx cond_pat = PATTERN (cond_insn);
4810 if (GET_CODE (cond_pat) == COND_EXEC)
4811 pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (cond_pat)),
4816 /* Walk forward from INSN to find the last insn that issues in the same clock
4819 find_last_same_clock (rtx insn)
4822 rtx t = next_real_insn (insn);
4824 while (t && GET_MODE (t) != TImode)
4826 if (!DEBUG_INSN_P (t) && recog_memoized (t) >= 0)
4828 t = next_real_insn (t);
4833 /* For every call insn in the function, emit code to load the return
4834 address. For each call we create a return label and store it in
4835 CALL_LABELS. If are not scheduling, we emit the labels here,
4836 otherwise the caller will do it later.
4837 This function is called after final insn scheduling, but before creating
4838 the SEQUENCEs that represent execute packets. */
4841 reorg_split_calls (rtx *call_labels)
4843 unsigned int reservation_mask = 0;
4844 rtx insn = get_insns ();
4845 gcc_assert (GET_CODE (insn) == NOTE);
4846 insn = next_real_insn (insn);
4850 rtx next = next_real_insn (insn);
4852 if (DEBUG_INSN_P (insn))
4855 if (GET_MODE (insn) == TImode)
4856 reservation_mask = 0;
4857 uid = INSN_UID (insn);
4858 if (c6x_flag_schedule_insns2 && recog_memoized (insn) >= 0)
4859 reservation_mask |= 1 << INSN_INFO_ENTRY (uid).reservation;
4861 if (returning_call_p (insn))
4863 rtx label = gen_label_rtx ();
4864 rtx labelref = gen_rtx_LABEL_REF (Pmode, label);
4865 rtx reg = gen_rtx_REG (SImode, RETURN_ADDR_REGNO);
4867 LABEL_NUSES (label) = 2;
4868 if (!c6x_flag_schedule_insns2)
4870 if (can_use_callp (insn))
4871 convert_to_callp (insn);
4876 emit_label_after (label, insn);
4878 /* Bundle the call and its delay slots into a single
4879 SEQUENCE. While these do not issue in parallel
4880 we need to group them into a single EH region. */
4882 PUT_MODE (insn, TImode);
4883 if (TARGET_INSNS_64)
4885 t = gen_addkpc (reg, labelref, GEN_INT (4));
4886 slot[1] = emit_insn_after (duplicate_cond (t, insn),
4888 PUT_MODE (slot[1], TImode);
4889 gen_one_bundle (slot, 2, 0);
4893 slot[3] = emit_insn_after (gen_nop_count (GEN_INT (3)),
4895 PUT_MODE (slot[3], TImode);
4896 t = gen_movsi_lo_sum (reg, reg, labelref);
4897 slot[2] = emit_insn_after (duplicate_cond (t, insn),
4899 PUT_MODE (slot[2], TImode);
4900 t = gen_movsi_high (reg, labelref);
4901 slot[1] = emit_insn_after (duplicate_cond (t, insn),
4903 PUT_MODE (slot[1], TImode);
4904 gen_one_bundle (slot, 4, 0);
4910 /* If we scheduled, we reserved the .S2 unit for one or two
4911 cycles after the call. Emit the insns in these slots,
4912 unless it's possible to create a CALLP insn.
4913 Note that this works because the dependencies ensure that
4914 no insn setting/using B3 is scheduled in the delay slots of
4916 int this_clock = insn_get_clock (insn);
4917 rtx last_same_clock;
4920 call_labels[INSN_UID (insn)] = label;
4922 last_same_clock = find_last_same_clock (insn);
4924 if (can_use_callp (insn))
4926 /* Find the first insn of the next execute packet. If it
4927 is the shadow insn corresponding to this call, we may
4928 use a CALLP insn. */
4929 rtx shadow = next_nonnote_nondebug_insn (last_same_clock);
4932 && insn_get_clock (shadow) == this_clock + 5)
4934 convert_to_callp (shadow);
4935 insn_set_clock (shadow, this_clock);
4936 INSN_INFO_ENTRY (INSN_UID (shadow)).reservation
4938 INSN_INFO_ENTRY (INSN_UID (shadow)).unit_mask
4939 = INSN_INFO_ENTRY (INSN_UID (last_same_clock)).unit_mask;
4940 if (GET_MODE (insn) == TImode)
4942 rtx new_cycle_first = NEXT_INSN (insn);
4943 while (!NONDEBUG_INSN_P (new_cycle_first)
4944 || GET_CODE (PATTERN (new_cycle_first)) == USE
4945 || GET_CODE (PATTERN (new_cycle_first)) == CLOBBER)
4946 new_cycle_first = NEXT_INSN (new_cycle_first);
4947 PUT_MODE (new_cycle_first, TImode);
4948 if (new_cycle_first != shadow)
4949 PUT_MODE (shadow, VOIDmode);
4950 INSN_INFO_ENTRY (INSN_UID (new_cycle_first)).ebb_start
4951 = INSN_INFO_ENTRY (INSN_UID (insn)).ebb_start;
4954 PUT_MODE (shadow, VOIDmode);
4959 after1 = find_next_cycle_insn (last_same_clock, this_clock + 1);
4960 if (after1 == NULL_RTX)
4961 after1 = last_same_clock;
4963 after1 = find_last_same_clock (after1);
4964 if (TARGET_INSNS_64)
4966 rtx x1 = gen_addkpc (reg, labelref, const0_rtx);
4967 x1 = emit_insn_after (duplicate_cond (x1, insn), after1);
4968 insn_set_clock (x1, this_clock + 1);
4969 INSN_INFO_ENTRY (INSN_UID (x1)).reservation = RESERVATION_S2;
4970 if (after1 == last_same_clock)
4971 PUT_MODE (x1, TImode);
4973 INSN_INFO_ENTRY (INSN_UID (x1)).unit_mask
4974 = INSN_INFO_ENTRY (INSN_UID (after1)).unit_mask;
4979 rtx after2 = find_next_cycle_insn (after1, this_clock + 2);
4980 if (after2 == NULL_RTX)
4982 x2 = gen_movsi_lo_sum (reg, reg, labelref);
4983 x2 = emit_insn_after (duplicate_cond (x2, insn), after2);
4984 x1 = gen_movsi_high (reg, labelref);
4985 x1 = emit_insn_after (duplicate_cond (x1, insn), after1);
4986 insn_set_clock (x1, this_clock + 1);
4987 insn_set_clock (x2, this_clock + 2);
4988 INSN_INFO_ENTRY (INSN_UID (x1)).reservation = RESERVATION_S2;
4989 INSN_INFO_ENTRY (INSN_UID (x2)).reservation = RESERVATION_S2;
4990 if (after1 == last_same_clock)
4991 PUT_MODE (x1, TImode);
4993 INSN_INFO_ENTRY (INSN_UID (x1)).unit_mask
4994 = INSN_INFO_ENTRY (INSN_UID (after1)).unit_mask;
4995 if (after1 == after2)
4996 PUT_MODE (x2, TImode);
4998 INSN_INFO_ENTRY (INSN_UID (x2)).unit_mask
4999 = INSN_INFO_ENTRY (INSN_UID (after2)).unit_mask;
5008 /* Called as part of c6x_reorg. This function emits multi-cycle NOP
5009 insns as required for correctness. CALL_LABELS is the array that
5010 holds the return labels for call insns; we emit these here if
5011 scheduling was run earlier. */
5014 reorg_emit_nops (rtx *call_labels)
5017 rtx prev, last_call;
5018 int prev_clock, earliest_bb_end;
5019 int prev_implicit_nops;
5020 rtx insn = get_insns ();
5022 /* We look at one insn (or bundle inside a sequence) in each iteration, storing
5023 its issue time in PREV_CLOCK for the next iteration. If there is a gap in
5024 clocks, we must insert a NOP.
5025 EARLIEST_BB_END tracks in which cycle all insns that have been issued in the
5026 current basic block will finish. We must not allow the next basic block to
5027 begin before this cycle.
5028 PREV_IMPLICIT_NOPS tells us whether we've seen an insn that implicitly contains
5029 a multi-cycle nop. The code is scheduled such that subsequent insns will
5030 show the cycle gap, but we needn't insert a real NOP instruction. */
5031 insn = next_real_insn (insn);
5032 last_call = prev = NULL_RTX;
5034 earliest_bb_end = 0;
5035 prev_implicit_nops = 0;
5039 int this_clock = -1;
5043 next = next_real_insn (insn);
5045 if (DEBUG_INSN_P (insn)
5046 || GET_CODE (PATTERN (insn)) == USE
5047 || GET_CODE (PATTERN (insn)) == CLOBBER
5048 || shadow_or_blockage_p (insn)
5050 && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5051 || GET_CODE (PATTERN (insn)) == ADDR_VEC)))
5054 if (!c6x_flag_schedule_insns2)
5055 /* No scheduling; ensure that no parallel issue happens. */
5056 PUT_MODE (insn, TImode);
5061 this_clock = insn_get_clock (insn);
5062 if (this_clock != prev_clock)
5064 PUT_MODE (insn, TImode);
5068 cycles = this_clock - prev_clock;
5070 cycles -= prev_implicit_nops;
5073 rtx nop = emit_nop_after (cycles - 1, prev);
5074 insn_set_clock (nop, prev_clock + prev_implicit_nops + 1);
5077 prev_clock = this_clock;
5080 && insn_get_clock (last_call) + 6 <= this_clock)
5082 emit_label_before (call_labels[INSN_UID (last_call)], insn);
5083 last_call = NULL_RTX;
5085 prev_implicit_nops = 0;
5089 /* Examine how many cycles the current insn takes, and adjust
5090 LAST_CALL, EARLIEST_BB_END and PREV_IMPLICIT_NOPS. */
5091 if (recog_memoized (insn) >= 0
5092 /* If not scheduling, we've emitted NOPs after calls already. */
5093 && (c6x_flag_schedule_insns2 || !returning_call_p (insn)))
5095 max_cycles = get_attr_cycles (insn);
5096 if (get_attr_type (insn) == TYPE_CALLP)
5097 prev_implicit_nops = 5;
5101 if (returning_call_p (insn))
5104 if (c6x_flag_schedule_insns2)
5106 gcc_assert (this_clock >= 0);
5107 if (earliest_bb_end < this_clock + max_cycles)
5108 earliest_bb_end = this_clock + max_cycles;
5110 else if (max_cycles > 1)
5111 emit_nop_after (max_cycles - 1, insn);
5117 if (c6x_flag_schedule_insns2
5118 && (next == NULL_RTX
5119 || (GET_MODE (next) == TImode
5120 && INSN_INFO_ENTRY (INSN_UID (next)).ebb_start))
5121 && earliest_bb_end > 0)
5123 int cycles = earliest_bb_end - prev_clock;
5126 prev = emit_nop_after (cycles - 1, prev);
5127 insn_set_clock (prev, prev_clock + prev_implicit_nops + 1);
5129 earliest_bb_end = 0;
5134 emit_label_after (call_labels[INSN_UID (last_call)], prev);
5135 last_call = NULL_RTX;
5141 /* If possible, split INSN, which we know is either a jump or a call, into a real
5142 insn and its shadow. */
5144 split_delayed_branch (rtx insn)
5146 int code = recog_memoized (insn);
5148 rtx pat = PATTERN (insn);
5150 if (GET_CODE (pat) == COND_EXEC)
5151 pat = COND_EXEC_CODE (pat);
5155 rtx src = pat, dest = NULL_RTX;
5157 if (GET_CODE (pat) == SET)
5159 dest = SET_DEST (pat);
5160 src = SET_SRC (pat);
5162 callee = XEXP (XEXP (src, 0), 0);
5163 if (SIBLING_CALL_P (insn))
5166 newpat = gen_indirect_sibcall_shadow ();
5168 newpat = gen_sibcall_shadow (callee);
5169 pat = gen_real_jump (callee);
5171 else if (dest != NULL_RTX)
5174 newpat = gen_indirect_call_value_shadow (dest);
5176 newpat = gen_call_value_shadow (dest, callee);
5177 pat = gen_real_call (callee);
5182 newpat = gen_indirect_call_shadow ();
5184 newpat = gen_call_shadow (callee);
5185 pat = gen_real_call (callee);
5187 pat = duplicate_cond (pat, insn);
5188 newpat = duplicate_cond (newpat, insn);
5193 if (GET_CODE (pat) == PARALLEL
5194 && GET_CODE (XVECEXP (pat, 0, 0)) == RETURN)
5196 newpat = gen_return_shadow ();
5197 pat = gen_real_ret (XEXP (XVECEXP (pat, 0, 1), 0));
5198 newpat = duplicate_cond (newpat, insn);
5203 case CODE_FOR_br_true:
5204 case CODE_FOR_br_false:
5205 src = SET_SRC (pat);
5206 op = XEXP (src, code == CODE_FOR_br_true ? 1 : 2);
5207 newpat = gen_condjump_shadow (op);
5208 pat = gen_real_jump (op);
5209 if (code == CODE_FOR_br_true)
5210 pat = gen_rtx_COND_EXEC (VOIDmode, XEXP (src, 0), pat);
5212 pat = gen_rtx_COND_EXEC (VOIDmode,
5213 reversed_comparison (XEXP (src, 0),
5220 newpat = gen_jump_shadow (op);
5223 case CODE_FOR_indirect_jump:
5224 newpat = gen_indirect_jump_shadow ();
5227 case CODE_FOR_return_internal:
5228 newpat = gen_return_shadow ();
5229 pat = gen_real_ret (XEXP (XVECEXP (pat, 0, 1), 0));
5236 i1 = emit_insn_before (pat, insn);
5237 PATTERN (insn) = newpat;
5238 INSN_CODE (insn) = -1;
5239 record_delay_slot_pair (i1, insn, 5, 0);
5242 /* If INSN is a multi-cycle insn that should be handled properly in
5243 modulo-scheduling, split it into a real insn and a shadow.
5244 Return true if we made a change.
5246 It is valid for us to fail to split an insn; the caller has to deal
5247 with the possibility. Currently we handle loads and most mpy2 and
5250 split_delayed_nonbranch (rtx insn)
5252 int code = recog_memoized (insn);
5253 enum attr_type type;
5254 rtx i1, newpat, src, dest;
5255 rtx pat = PATTERN (insn);
5259 if (GET_CODE (pat) == COND_EXEC)
5260 pat = COND_EXEC_CODE (pat);
5262 if (code < 0 || GET_CODE (pat) != SET)
5264 src = SET_SRC (pat);
5265 dest = SET_DEST (pat);
5269 type = get_attr_type (insn);
5271 && (type == TYPE_LOAD
5272 || type == TYPE_LOADN))
5275 && (GET_CODE (src) != ZERO_EXTEND
5276 || !MEM_P (XEXP (src, 0))))
5279 if (GET_MODE_SIZE (GET_MODE (dest)) > 4
5280 && (GET_MODE_SIZE (GET_MODE (dest)) != 8 || !TARGET_LDDW))
5283 rtv = gen_rtvec (2, GEN_INT (REGNO (SET_DEST (pat))),
5285 newpat = gen_load_shadow (SET_DEST (pat));
5286 pat = gen_rtx_UNSPEC (VOIDmode, rtv, UNSPEC_REAL_LOAD);
5290 && (type == TYPE_MPY2
5291 || type == TYPE_MPY4))
5293 /* We don't handle floating point multiplies yet. */
5294 if (GET_MODE (dest) == SFmode)
5297 rtv = gen_rtvec (2, GEN_INT (REGNO (SET_DEST (pat))),
5299 newpat = gen_mult_shadow (SET_DEST (pat));
5300 pat = gen_rtx_UNSPEC (VOIDmode, rtv, UNSPEC_REAL_MULT);
5301 delay = type == TYPE_MPY2 ? 1 : 3;
5306 pat = duplicate_cond (pat, insn);
5307 newpat = duplicate_cond (newpat, insn);
5308 i1 = emit_insn_before (pat, insn);
5309 PATTERN (insn) = newpat;
5310 INSN_CODE (insn) = -1;
5311 recog_memoized (insn);
5312 recog_memoized (i1);
5313 record_delay_slot_pair (i1, insn, delay, 0);
5317 /* Examine if INSN is the result of splitting a load into a real load and a
5318 shadow, and if so, undo the transformation. */
5320 undo_split_delayed_nonbranch (rtx insn)
5322 int icode = recog_memoized (insn);
5323 enum attr_type type;
5324 rtx prev_pat, insn_pat, prev;
5328 type = get_attr_type (insn);
5329 if (type != TYPE_LOAD_SHADOW && type != TYPE_MULT_SHADOW)
5331 prev = PREV_INSN (insn);
5332 prev_pat = PATTERN (prev);
5333 insn_pat = PATTERN (insn);
5334 if (GET_CODE (prev_pat) == COND_EXEC)
5336 prev_pat = COND_EXEC_CODE (prev_pat);
5337 insn_pat = COND_EXEC_CODE (insn_pat);
5340 gcc_assert (GET_CODE (prev_pat) == UNSPEC
5341 && ((XINT (prev_pat, 1) == UNSPEC_REAL_LOAD
5342 && type == TYPE_LOAD_SHADOW)
5343 || (XINT (prev_pat, 1) == UNSPEC_REAL_MULT
5344 && type == TYPE_MULT_SHADOW)));
5345 insn_pat = gen_rtx_SET (VOIDmode, SET_DEST (insn_pat),
5346 XVECEXP (prev_pat, 0, 1));
5347 insn_pat = duplicate_cond (insn_pat, prev);
5348 PATTERN (insn) = insn_pat;
5349 INSN_CODE (insn) = -1;
5353 /* Split every insn (i.e. jumps and calls) which can have delay slots into
5354 two parts: the first one is scheduled normally and emits the instruction,
5355 while the second one is a shadow insn which shows the side effect taking
5356 place. The second one is placed in the right cycle by the scheduler, but
5357 not emitted as an assembly instruction. */
5360 split_delayed_insns (void)
5363 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5365 if (JUMP_P (insn) || CALL_P (insn))
5366 split_delayed_branch (insn);
5370 /* For every insn that has an entry in the new_conditions vector, give it
5371 the appropriate predicate. */
5373 conditionalize_after_sched (void)
5378 FOR_BB_INSNS (bb, insn)
5380 unsigned uid = INSN_UID (insn);
5382 if (!NONDEBUG_INSN_P (insn) || uid >= INSN_INFO_LENGTH)
5384 cond = INSN_INFO_ENTRY (uid).new_cond;
5385 if (cond == NULL_RTX)
5388 fprintf (dump_file, "Conditionalizing insn %d\n", uid);
5389 predicate_insn (insn, cond, true);
5393 /* A callback for the hw-doloop pass. This function examines INSN; if
5394 it is a loop_end pattern we recognize, return the reg rtx for the
5395 loop counter. Otherwise, return NULL_RTX. */
5398 hwloop_pattern_reg (rtx insn)
5402 if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end)
5405 pat = PATTERN (insn);
5406 reg = SET_DEST (XVECEXP (pat, 0, 1));
5412 /* Return the number of cycles taken by BB, as computed by scheduling,
5413 including the latencies of all insns with delay slots. IGNORE is
5414 an insn we should ignore in the calculation, usually the final
5417 bb_earliest_end_cycle (basic_block bb, rtx ignore)
5422 FOR_BB_INSNS (bb, insn)
5424 int cycles, this_clock;
5426 if (LABEL_P (insn) || NOTE_P (insn) || DEBUG_INSN_P (insn)
5427 || GET_CODE (PATTERN (insn)) == USE
5428 || GET_CODE (PATTERN (insn)) == CLOBBER
5432 this_clock = insn_get_clock (insn);
5433 cycles = get_attr_cycles (insn);
5435 if (earliest < this_clock + cycles)
5436 earliest = this_clock + cycles;
5441 /* Examine the insns in BB and remove all which have a uid greater or
5442 equal to MAX_UID. */
5444 filter_insns_above (basic_block bb, int max_uid)
5447 bool prev_ti = false;
5448 int prev_cycle = -1;
5450 FOR_BB_INSNS_SAFE (bb, insn, next)
5453 if (!NONDEBUG_INSN_P (insn))
5455 if (insn == BB_END (bb))
5457 this_cycle = insn_get_clock (insn);
5458 if (prev_ti && this_cycle == prev_cycle)
5460 gcc_assert (GET_MODE (insn) != TImode);
5461 PUT_MODE (insn, TImode);
5464 if (INSN_UID (insn) >= max_uid)
5466 if (GET_MODE (insn) == TImode)
5469 prev_cycle = this_cycle;
5476 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
5479 c6x_asm_emit_except_personality (rtx personality)
5481 fputs ("\t.personality\t", asm_out_file);
5482 output_addr_const (asm_out_file, personality);
5483 fputc ('\n', asm_out_file);
5486 /* Use a special assembly directive rather than a regular setion for
5487 unwind table data. */
5490 c6x_asm_init_sections (void)
5492 exception_section = get_unnamed_section (0, output_section_asm_op,
5496 /* A callback for the hw-doloop pass. Called to optimize LOOP in a
5497 machine-specific fashion; returns true if successful and false if
5498 the hwloop_fail function should be called. */
5501 hwloop_optimize (hwloop_info loop)
5503 basic_block entry_bb, bb;
5504 rtx seq, insn, prev, entry_after, end_packet;
5505 rtx head_insn, tail_insn, new_insns, last_insn;
5507 int n_execute_packets;
5510 int max_uid_before, delayed_splits;
5511 int i, sp_ii, min_ii, max_ii, max_parallel, n_insns, n_real_insns, stages;
5516 if (!c6x_flag_modulo_sched || !c6x_flag_schedule_insns2
5517 || !TARGET_INSNS_64PLUS)
5520 if (loop->iter_reg_used || loop->depth > 1)
5522 if (loop->has_call || loop->has_asm)
5525 if (loop->head != loop->tail)
5528 gcc_assert (loop->incoming_dest == loop->head);
5531 FOR_EACH_VEC_ELT (edge, loop->incoming, i, entry_edge)
5532 if (entry_edge->flags & EDGE_FALLTHRU)
5534 if (entry_edge == NULL)
5537 reshuffle_units (loop->head);
5539 schedule_ebbs_init ();
5540 schedule_ebb (BB_HEAD (loop->tail), loop->loop_end, true);
5541 schedule_ebbs_finish ();
5544 loop_earliest = bb_earliest_end_cycle (bb, loop->loop_end) + 1;
5546 max_uid_before = get_max_uid ();
5548 /* Split all multi-cycle operations, such as loads. For normal
5549 scheduling, we only do this for branches, as the generated code
5550 would otherwise not be interrupt-safe. When using sploop, it is
5551 safe and beneficial to split them. If any multi-cycle operations
5552 remain after splitting (because we don't handle them yet), we
5553 cannot pipeline the loop. */
5555 FOR_BB_INSNS (bb, insn)
5557 if (NONDEBUG_INSN_P (insn))
5559 recog_memoized (insn);
5560 if (split_delayed_nonbranch (insn))
5562 else if (INSN_CODE (insn) >= 0
5563 && get_attr_cycles (insn) > 1)
5568 /* Count the number of insns as well as the number real insns, and save
5569 the original sequence of insns in case we must restore it later. */
5570 n_insns = n_real_insns = 0;
5571 FOR_BB_INSNS (bb, insn)
5574 if (NONDEBUG_INSN_P (insn) && insn != loop->loop_end)
5577 orig_vec = XNEWVEC (rtx, n_insns);
5579 FOR_BB_INSNS (bb, insn)
5580 orig_vec[n_insns++] = insn;
5582 /* Count the unit reservations, and compute a minimum II from that
5584 count_unit_reqs (unit_reqs, loop->start_label,
5585 PREV_INSN (loop->loop_end));
5586 merge_unit_reqs (unit_reqs);
5588 min_ii = res_mii (unit_reqs);
5589 max_ii = loop_earliest < 15 ? loop_earliest : 14;
5591 /* Make copies of the loop body, up to a maximum number of stages we want
5593 max_parallel = loop_earliest / min_ii + 1;
5595 copies = XCNEWVEC (rtx, (max_parallel + 1) * n_real_insns);
5596 insn_copies = XNEWVEC (rtx *, max_parallel + 1);
5597 for (i = 0; i < max_parallel + 1; i++)
5598 insn_copies[i] = copies + i * n_real_insns;
5600 head_insn = next_nonnote_nondebug_insn (loop->start_label);
5601 tail_insn = prev_real_insn (BB_END (bb));
5604 FOR_BB_INSNS (bb, insn)
5605 if (NONDEBUG_INSN_P (insn) && insn != loop->loop_end)
5606 insn_copies[0][i++] = insn;
5608 sploop_max_uid_iter0 = get_max_uid ();
5610 /* Generate the copies of the loop body, and save them in the
5611 INSN_COPIES array. */
5613 for (i = 0; i < max_parallel; i++)
5618 this_iter = duplicate_insn_chain (head_insn, tail_insn);
5622 rtx prev_stage_insn = insn_copies[i][j];
5623 gcc_assert (INSN_CODE (this_iter) == INSN_CODE (prev_stage_insn));
5625 if (INSN_CODE (this_iter) >= 0
5626 && (get_attr_type (this_iter) == TYPE_LOAD_SHADOW
5627 || get_attr_type (this_iter) == TYPE_MULT_SHADOW))
5629 rtx prev = PREV_INSN (this_iter);
5630 record_delay_slot_pair (prev, this_iter,
5631 get_attr_cycles (prev) - 1, 0);
5634 record_delay_slot_pair (prev_stage_insn, this_iter, i, 1);
5636 insn_copies[i + 1][j] = this_iter;
5638 this_iter = next_nonnote_nondebug_insn (this_iter);
5641 new_insns = get_insns ();
5642 last_insn = insn_copies[max_parallel][n_real_insns - 1];
5644 emit_insn_before (new_insns, BB_END (bb));
5646 /* Try to schedule the loop using varying initiation intervals,
5647 starting with the smallest possible and incrementing it
5649 for (sp_ii = min_ii; sp_ii <= max_ii; sp_ii++)
5653 fprintf (dump_file, "Trying to schedule for II %d\n", sp_ii);
5655 df_clear_flags (DF_LR_RUN_DCE);
5657 schedule_ebbs_init ();
5658 set_modulo_params (sp_ii, max_parallel, n_real_insns,
5659 sploop_max_uid_iter0);
5660 tmp_bb = schedule_ebb (BB_HEAD (bb), last_insn, true);
5661 schedule_ebbs_finish ();
5666 fprintf (dump_file, "Found schedule with II %d\n", sp_ii);
5671 discard_delay_pairs_above (max_uid_before);
5676 stages = insn_get_clock (ss.last_scheduled_iter0) / sp_ii + 1;
5678 if (stages == 1 && sp_ii > 5)
5681 /* At this point, we know we've been successful, unless we find later that
5682 there are too many execute packets for the loop buffer to hold. */
5684 /* Assign reservations to the instructions in the loop. We must find
5685 the stage that contains the full loop kernel, and transfer the
5686 reservations of the instructions contained in it to the corresponding
5687 instructions from iteration 0, which are the only ones we'll keep. */
5688 assign_reservations (BB_HEAD (bb), ss.last_scheduled_insn);
5689 PREV_INSN (BB_END (bb)) = ss.last_scheduled_iter0;
5690 NEXT_INSN (ss.last_scheduled_iter0) = BB_END (bb);
5691 filter_insns_above (bb, sploop_max_uid_iter0);
5693 for (i = 0; i < n_real_insns; i++)
5695 rtx insn = insn_copies[0][i];
5696 int uid = INSN_UID (insn);
5697 int stage = insn_uid_get_clock (uid) / sp_ii;
5699 if (stage + 1 < stages)
5702 stage = stages - stage - 1;
5703 copy_uid = INSN_UID (insn_copies[stage][i]);
5704 INSN_INFO_ENTRY (uid).reservation
5705 = INSN_INFO_ENTRY (copy_uid).reservation;
5711 /* Compute the number of execute packets the pipelined form of the loop will
5714 n_execute_packets = 0;
5715 for (insn = loop->start_label; insn != loop->loop_end; insn = NEXT_INSN (insn))
5717 if (NONDEBUG_INSN_P (insn) && GET_MODE (insn) == TImode
5718 && !shadow_p (insn))
5720 n_execute_packets++;
5721 if (prev && insn_get_clock (prev) + 1 != insn_get_clock (insn))
5722 /* We need an extra NOP instruction. */
5723 n_execute_packets++;
5729 end_packet = ss.last_scheduled_iter0;
5730 while (!NONDEBUG_INSN_P (end_packet) || GET_MODE (end_packet) != TImode)
5731 end_packet = PREV_INSN (end_packet);
5733 /* The earliest cycle in which we can emit the SPKERNEL instruction. */
5734 loop_earliest = (stages - 1) * sp_ii;
5735 if (loop_earliest > insn_get_clock (end_packet))
5737 n_execute_packets++;
5738 end_packet = loop->loop_end;
5741 loop_earliest = insn_get_clock (end_packet);
5743 if (n_execute_packets > 14)
5746 /* Generate the spkernel instruction, and place it at the appropriate
5748 PUT_MODE (end_packet, VOIDmode);
5750 insn = gen_spkernel (GEN_INT (stages - 1),
5751 const0_rtx, JUMP_LABEL (loop->loop_end));
5752 insn = emit_jump_insn_before (insn, end_packet);
5753 JUMP_LABEL (insn) = JUMP_LABEL (loop->loop_end);
5754 insn_set_clock (insn, loop_earliest);
5755 PUT_MODE (insn, TImode);
5756 INSN_INFO_ENTRY (INSN_UID (insn)).ebb_start = false;
5757 delete_insn (loop->loop_end);
5759 /* Place the mvc and sploop instructions before the loop. */
5760 entry_bb = entry_edge->src;
5764 insn = emit_insn (gen_mvilc (loop->iter_reg));
5765 insn = emit_insn (gen_sploop (GEN_INT (sp_ii)));
5769 if (!single_succ_p (entry_bb) || VEC_length (edge, loop->incoming) > 1)
5775 emit_insn_before (seq, BB_HEAD (loop->head));
5776 seq = emit_label_before (gen_label_rtx (), seq);
5778 new_bb = create_basic_block (seq, insn, entry_bb);
5779 FOR_EACH_EDGE (e, ei, loop->incoming)
5781 if (!(e->flags & EDGE_FALLTHRU))
5782 redirect_edge_and_branch_force (e, new_bb);
5784 redirect_edge_succ (e, new_bb);
5786 make_edge (new_bb, loop->head, 0);
5790 entry_after = BB_END (entry_bb);
5791 while (DEBUG_INSN_P (entry_after)
5792 || (NOTE_P (entry_after)
5793 && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK))
5794 entry_after = PREV_INSN (entry_after);
5795 emit_insn_after (seq, entry_after);
5800 /* Make sure we don't try to schedule this loop again. */
5801 for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, bb); ix++)
5802 bb->flags |= BB_DISABLE_SCHEDULE;
5808 fprintf (dump_file, "Unable to pipeline loop.\n");
5810 for (i = 1; i < n_insns; i++)
5812 NEXT_INSN (orig_vec[i - 1]) = orig_vec[i];
5813 PREV_INSN (orig_vec[i]) = orig_vec[i - 1];
5815 PREV_INSN (orig_vec[0]) = PREV_INSN (BB_HEAD (bb));
5816 NEXT_INSN (PREV_INSN (BB_HEAD (bb))) = orig_vec[0];
5817 NEXT_INSN (orig_vec[n_insns - 1]) = NEXT_INSN (BB_END (bb));
5818 PREV_INSN (NEXT_INSN (BB_END (bb))) = orig_vec[n_insns - 1];
5819 BB_HEAD (bb) = orig_vec[0];
5820 BB_END (bb) = orig_vec[n_insns - 1];
5822 free_delay_pairs ();
5823 FOR_BB_INSNS (bb, insn)
5824 if (NONDEBUG_INSN_P (insn))
5825 undo_split_delayed_nonbranch (insn);
5829 /* A callback for the hw-doloop pass. Called when a loop we have discovered
5830 turns out not to be optimizable; we have to split the doloop_end pattern
5831 into a subtract and a test. */
5833 hwloop_fail (hwloop_info loop)
5835 rtx insn, test, testreg;
5838 fprintf (dump_file, "splitting doloop insn %d\n",
5839 INSN_UID (loop->loop_end));
5840 insn = gen_addsi3 (loop->iter_reg, loop->iter_reg, constm1_rtx);
5841 /* See if we can emit the add at the head of the loop rather than at the
5843 if (loop->head == NULL
5844 || loop->iter_reg_used_outside
5845 || loop->iter_reg_used
5846 || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REGNO (loop->iter_reg))
5847 || loop->incoming_dest != loop->head
5848 || EDGE_COUNT (loop->head->preds) != 2)
5849 emit_insn_before (insn, loop->loop_end);
5852 rtx t = loop->start_label;
5853 while (!NOTE_P (t) || NOTE_KIND (t) != NOTE_INSN_BASIC_BLOCK)
5855 emit_insn_after (insn, t);
5858 testreg = SET_DEST (XVECEXP (PATTERN (loop->loop_end), 0, 2));
5859 if (GET_CODE (testreg) == SCRATCH)
5860 testreg = loop->iter_reg;
5862 emit_insn_before (gen_movsi (testreg, loop->iter_reg), loop->loop_end);
5864 test = gen_rtx_NE (VOIDmode, testreg, const0_rtx);
5865 insn = emit_jump_insn_before (gen_cbranchsi4 (test, testreg, const0_rtx,
5869 JUMP_LABEL (insn) = loop->start_label;
5870 LABEL_NUSES (loop->start_label)++;
5871 delete_insn (loop->loop_end);
5874 static struct hw_doloop_hooks c6x_doloop_hooks =
5881 /* Run the hw-doloop pass to modulo-schedule hardware loops, or split the
5882 doloop_end patterns where such optimizations are impossible. */
5887 reorg_loops (true, &c6x_doloop_hooks);
5890 /* Implement the TARGET_MACHINE_DEPENDENT_REORG pass. We split call insns here
5891 into a sequence that loads the return register and performs the call,
5892 and emit the return label.
5893 If scheduling after reload is requested, it happens here. */
5900 bool do_selsched = (c6x_flag_schedule_insns2 && flag_selective_scheduling2
5901 && !maybe_skip_selective_scheduling ());
5903 /* We are freeing block_for_insn in the toplev to keep compatibility
5904 with old MDEP_REORGS that are not CFG based. Recompute it now. */
5905 compute_bb_for_insn ();
5907 df_clear_flags (DF_LR_RUN_DCE);
5908 df_note_add_problem ();
5910 /* If optimizing, we'll have split before scheduling. */
5916 if (c6x_flag_schedule_insns2)
5918 int sz = get_max_uid () * 3 / 2 + 1;
5920 insn_info = VEC_alloc (c6x_sched_insn_info, heap, sz);
5923 /* Make sure the real-jump insns we create are not deleted. When modulo-
5924 scheduling, situations where a reg is only stored in a loop can also
5925 cause dead code when doing the initial unrolling. */
5926 sched_no_dce = true;
5930 if (c6x_flag_schedule_insns2)
5932 split_delayed_insns ();
5933 timevar_push (TV_SCHED2);
5935 run_selective_scheduling ();
5938 conditionalize_after_sched ();
5939 timevar_pop (TV_SCHED2);
5941 free_delay_pairs ();
5943 sched_no_dce = false;
5945 call_labels = XCNEWVEC (rtx, get_max_uid () + 1);
5947 reorg_split_calls (call_labels);
5949 if (c6x_flag_schedule_insns2)
5952 if ((bb->flags & BB_DISABLE_SCHEDULE) == 0)
5953 assign_reservations (BB_HEAD (bb), BB_END (bb));
5956 if (c6x_flag_var_tracking)
5958 timevar_push (TV_VAR_TRACKING);
5959 variable_tracking_main ();
5960 timevar_pop (TV_VAR_TRACKING);
5963 reorg_emit_nops (call_labels);
5965 /* Post-process the schedule to move parallel insns into SEQUENCEs. */
5966 if (c6x_flag_schedule_insns2)
5968 free_delay_pairs ();
5972 df_finish_pass (false);
5975 /* Called when a function has been assembled. It should perform all the
5976 tasks of ASM_DECLARE_FUNCTION_SIZE in elfos.h, plus target-specific
5978 We free the reservation (and other scheduling) information here now that
5979 all insns have been output. */
5981 c6x_function_end (FILE *file, const char *fname)
5983 c6x_output_fn_unwind (file);
5986 VEC_free (c6x_sched_insn_info, heap, insn_info);
5989 if (!flag_inhibit_size_directive)
5990 ASM_OUTPUT_MEASURED_SIZE (file, fname);
5993 /* Determine whether X is a shift with code CODE and an integer amount
5996 shift_p (rtx x, enum rtx_code code, int amount)
5998 return (GET_CODE (x) == code && GET_CODE (XEXP (x, 1)) == CONST_INT
5999 && INTVAL (XEXP (x, 1)) == amount);
6002 /* Compute a (partial) cost for rtx X. Return true if the complete
6003 cost has been computed, and false if subexpressions should be
6004 scanned. In either case, *TOTAL contains the cost result. */
6007 c6x_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
6010 int cost2 = COSTS_N_INSNS (1);
6016 if (outer_code == SET || outer_code == PLUS)
6017 *total = satisfies_constraint_IsB (x) ? 0 : cost2;
6018 else if (outer_code == AND || outer_code == IOR || outer_code == XOR
6019 || outer_code == MINUS)
6020 *total = satisfies_constraint_Is5 (x) ? 0 : cost2;
6021 else if (GET_RTX_CLASS (outer_code) == RTX_COMPARE
6022 || GET_RTX_CLASS (outer_code) == RTX_COMM_COMPARE)
6023 *total = satisfies_constraint_Iu4 (x) ? 0 : cost2;
6024 else if (outer_code == ASHIFT || outer_code == ASHIFTRT
6025 || outer_code == LSHIFTRT)
6026 *total = satisfies_constraint_Iu5 (x) ? 0 : cost2;
6035 *total = COSTS_N_INSNS (2);
6039 /* Recognize a mult_highpart operation. */
6040 if ((GET_MODE (x) == HImode || GET_MODE (x) == SImode)
6041 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6042 && GET_MODE (XEXP (x, 0)) == GET_MODE_2XWIDER_MODE (GET_MODE (x))
6043 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6044 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6045 && INTVAL (XEXP (XEXP (x, 0), 1)) == GET_MODE_BITSIZE (GET_MODE (x)))
6047 rtx mul = XEXP (XEXP (x, 0), 0);
6048 rtx op0 = XEXP (mul, 0);
6049 rtx op1 = XEXP (mul, 1);
6050 enum rtx_code code0 = GET_CODE (op0);
6051 enum rtx_code code1 = GET_CODE (op1);
6054 && (code0 == SIGN_EXTEND || code0 == ZERO_EXTEND))
6055 || (GET_MODE (x) == HImode
6056 && code0 == ZERO_EXTEND && code1 == SIGN_EXTEND))
6058 if (GET_MODE (x) == HImode)
6059 *total = COSTS_N_INSNS (2);
6061 *total = COSTS_N_INSNS (12);
6062 *total += rtx_cost (XEXP (op0, 0), code0, 0, speed);
6063 *total += rtx_cost (XEXP (op1, 0), code1, 0, speed);
6072 if (GET_MODE (x) == DImode)
6073 *total = COSTS_N_INSNS (CONSTANT_P (XEXP (x, 1)) ? 4 : 15);
6075 *total = COSTS_N_INSNS (1);
6080 *total = COSTS_N_INSNS (1);
6081 op0 = code == PLUS ? XEXP (x, 0) : XEXP (x, 1);
6082 op1 = code == PLUS ? XEXP (x, 1) : XEXP (x, 0);
6083 if (GET_MODE_SIZE (GET_MODE (x)) <= UNITS_PER_WORD
6084 && INTEGRAL_MODE_P (GET_MODE (x))
6085 && GET_CODE (op0) == MULT
6086 && GET_CODE (XEXP (op0, 1)) == CONST_INT
6087 && (INTVAL (XEXP (op0, 1)) == 2
6088 || INTVAL (XEXP (op0, 1)) == 4
6089 || (code == PLUS && INTVAL (XEXP (op0, 1)) == 8)))
6091 *total += rtx_cost (XEXP (op0, 0), ASHIFT, 0, speed);
6092 *total += rtx_cost (op1, (enum rtx_code) code, 1, speed);
6100 if (GET_MODE (x) == DFmode)
6103 *total = COSTS_N_INSNS (speed ? 10 : 1);
6105 *total = COSTS_N_INSNS (speed ? 200 : 4);
6107 else if (GET_MODE (x) == SFmode)
6110 *total = COSTS_N_INSNS (speed ? 4 : 1);
6112 *total = COSTS_N_INSNS (speed ? 100 : 4);
6114 else if (GET_MODE (x) == DImode)
6117 && GET_CODE (op0) == GET_CODE (op1)
6118 && (GET_CODE (op0) == ZERO_EXTEND
6119 || GET_CODE (op0) == SIGN_EXTEND))
6121 *total = COSTS_N_INSNS (speed ? 2 : 1);
6122 op0 = XEXP (op0, 0);
6123 op1 = XEXP (op1, 0);
6126 /* Maybe improve this laster. */
6127 *total = COSTS_N_INSNS (20);
6129 else if (GET_MODE (x) == SImode)
6131 if (((GET_CODE (op0) == ZERO_EXTEND
6132 || GET_CODE (op0) == SIGN_EXTEND
6133 || shift_p (op0, LSHIFTRT, 16))
6134 && (GET_CODE (op1) == SIGN_EXTEND
6135 || GET_CODE (op1) == ZERO_EXTEND
6136 || scst5_operand (op1, SImode)
6137 || shift_p (op1, ASHIFTRT, 16)
6138 || shift_p (op1, LSHIFTRT, 16)))
6139 || (shift_p (op0, ASHIFTRT, 16)
6140 && (GET_CODE (op1) == SIGN_EXTEND
6141 || shift_p (op1, ASHIFTRT, 16))))
6143 *total = COSTS_N_INSNS (speed ? 2 : 1);
6144 op0 = XEXP (op0, 0);
6145 if (scst5_operand (op1, SImode))
6148 op1 = XEXP (op1, 0);
6151 *total = COSTS_N_INSNS (1);
6152 else if (TARGET_MPY32)
6153 *total = COSTS_N_INSNS (4);
6155 *total = COSTS_N_INSNS (6);
6157 else if (GET_MODE (x) == HImode)
6158 *total = COSTS_N_INSNS (speed ? 2 : 1);
6160 if (GET_CODE (op0) != REG
6161 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
6162 *total += rtx_cost (op0, MULT, 0, speed);
6163 if (op1 && GET_CODE (op1) != REG
6164 && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG))
6165 *total += rtx_cost (op1, MULT, 1, speed);
6170 /* This is a bit random; assuming on average there'll be 16 leading
6171 zeros. FIXME: estimate better for constant dividends. */
6172 *total = COSTS_N_INSNS (6 + 3 * 16);
6176 /* Recognize the cmp_and/ior patterns. */
6178 if ((GET_CODE (op0) == EQ || GET_CODE (op0) == NE)
6179 && REG_P (XEXP (op0, 0))
6180 && XEXP (op0, 1) == const0_rtx
6181 && rtx_equal_p (XEXP (x, 1), XEXP (op0, 0)))
6183 *total = rtx_cost (XEXP (x, 1), (enum rtx_code) outer_code,
6194 /* Implements target hook vector_mode_supported_p. */
6197 c6x_vector_mode_supported_p (enum machine_mode mode)
6212 /* Implements TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
6213 static enum machine_mode
6214 c6x_preferred_simd_mode (enum machine_mode mode)
6228 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
6231 c6x_scalar_mode_supported_p (enum machine_mode mode)
6233 if (ALL_FIXED_POINT_MODE_P (mode)
6234 && GET_MODE_PRECISION (mode) <= 2 * BITS_PER_WORD)
6237 return default_scalar_mode_supported_p (mode);
6240 /* Output a reference from a function exception table to the type_info
6241 object X. Output these via a special assembly directive. */
6244 c6x_output_ttype (rtx x)
6246 /* Use special relocations for symbol references. */
6247 if (GET_CODE (x) != CONST_INT)
6248 fputs ("\t.ehtype\t", asm_out_file);
6250 fputs ("\t.word\t", asm_out_file);
6251 output_addr_const (asm_out_file, x);
6252 fputc ('\n', asm_out_file);
6257 /* Modify the return address of the current function. */
6260 c6x_set_return_address (rtx source, rtx scratch)
6262 struct c6x_frame frame;
6264 HOST_WIDE_INT offset;
6266 c6x_compute_frame_layout (&frame);
6267 if (! c6x_save_reg (RETURN_ADDR_REGNO))
6268 emit_move_insn (gen_rtx_REG (Pmode, RETURN_ADDR_REGNO), source);
6272 if (frame_pointer_needed)
6274 addr = hard_frame_pointer_rtx;
6275 offset = frame.b3_offset;
6279 addr = stack_pointer_rtx;
6280 offset = frame.to_allocate - frame.b3_offset;
6283 /* TODO: Use base+offset loads where possible. */
6286 HOST_WIDE_INT low = trunc_int_for_mode (offset, HImode);
6288 emit_insn (gen_movsi_high (scratch, GEN_INT (low)));
6290 emit_insn (gen_movsi_lo_sum (scratch, scratch, GEN_INT(offset)));
6291 emit_insn (gen_addsi3 (scratch, addr, scratch));
6295 emit_move_insn (gen_frame_mem (Pmode, addr), source);
6299 /* We save pairs of registers using a DImode store. Describe the component
6300 registers for DWARF generation code. */
6303 c6x_dwarf_register_span (rtx rtl)
6306 unsigned real_regno;
6311 regno = REGNO (rtl);
6312 nregs = HARD_REGNO_NREGS (regno, GET_MODE (rtl));
6316 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc(nregs));
6317 for (i = 0; i < nregs; i++)
6319 if (TARGET_BIG_ENDIAN)
6320 real_regno = regno + nregs - (i + 1);
6322 real_regno = regno + i;
6324 XVECEXP (p, 0, i) = gen_rtx_REG (SImode, real_regno);
6330 /* Codes for all the C6X builtins. */
6365 static GTY(()) tree c6x_builtin_decls[C6X_BUILTIN_MAX];
6367 /* Return the C6X builtin for CODE. */
6369 c6x_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6371 if (code >= C6X_BUILTIN_MAX)
6372 return error_mark_node;
6374 return c6x_builtin_decls[code];
6377 #define def_builtin(NAME, TYPE, CODE) \
6380 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
6382 c6x_builtin_decls[CODE] = bdecl; \
6385 /* Set up all builtin functions for this target. */
6387 c6x_init_builtins (void)
6389 tree V4QI_type_node = build_vector_type (unsigned_intQI_type_node, 4);
6390 tree V2HI_type_node = build_vector_type (intHI_type_node, 2);
6391 tree V2SI_type_node = build_vector_type (intSI_type_node, 2);
6393 = build_function_type_list (integer_type_node, integer_type_node,
6395 tree int_ftype_int_int
6396 = build_function_type_list (integer_type_node, integer_type_node,
6397 integer_type_node, NULL_TREE);
6398 tree v2hi_ftype_v2hi
6399 = build_function_type_list (V2HI_type_node, V2HI_type_node, NULL_TREE);
6400 tree v4qi_ftype_v4qi_v4qi
6401 = build_function_type_list (V4QI_type_node, V4QI_type_node,
6402 V4QI_type_node, NULL_TREE);
6403 tree v2hi_ftype_v2hi_v2hi
6404 = build_function_type_list (V2HI_type_node, V2HI_type_node,
6405 V2HI_type_node, NULL_TREE);
6406 tree v2si_ftype_v2hi_v2hi
6407 = build_function_type_list (V2SI_type_node, V2HI_type_node,
6408 V2HI_type_node, NULL_TREE);
6410 def_builtin ("__builtin_c6x_sadd", int_ftype_int_int,
6412 def_builtin ("__builtin_c6x_ssub", int_ftype_int_int,
6414 def_builtin ("__builtin_c6x_add2", v2hi_ftype_v2hi_v2hi,
6416 def_builtin ("__builtin_c6x_sub2", v2hi_ftype_v2hi_v2hi,
6418 def_builtin ("__builtin_c6x_add4", v4qi_ftype_v4qi_v4qi,
6420 def_builtin ("__builtin_c6x_sub4", v4qi_ftype_v4qi_v4qi,
6422 def_builtin ("__builtin_c6x_mpy2", v2si_ftype_v2hi_v2hi,
6424 def_builtin ("__builtin_c6x_sadd2", v2hi_ftype_v2hi_v2hi,
6426 def_builtin ("__builtin_c6x_ssub2", v2hi_ftype_v2hi_v2hi,
6428 def_builtin ("__builtin_c6x_saddu4", v4qi_ftype_v4qi_v4qi,
6429 C6X_BUILTIN_SADDU4);
6430 def_builtin ("__builtin_c6x_smpy2", v2si_ftype_v2hi_v2hi,
6433 def_builtin ("__builtin_c6x_smpy", int_ftype_int_int,
6435 def_builtin ("__builtin_c6x_smpyh", int_ftype_int_int,
6437 def_builtin ("__builtin_c6x_smpyhl", int_ftype_int_int,
6438 C6X_BUILTIN_SMPYHL);
6439 def_builtin ("__builtin_c6x_smpylh", int_ftype_int_int,
6440 C6X_BUILTIN_SMPYLH);
6442 def_builtin ("__builtin_c6x_sshl", int_ftype_int_int,
6444 def_builtin ("__builtin_c6x_subc", int_ftype_int_int,
6447 def_builtin ("__builtin_c6x_avg2", v2hi_ftype_v2hi_v2hi,
6449 def_builtin ("__builtin_c6x_avgu4", v4qi_ftype_v4qi_v4qi,
6452 def_builtin ("__builtin_c6x_clrr", int_ftype_int_int,
6454 def_builtin ("__builtin_c6x_extr", int_ftype_int_int,
6456 def_builtin ("__builtin_c6x_extru", int_ftype_int_int,
6459 def_builtin ("__builtin_c6x_abs", int_ftype_int, C6X_BUILTIN_ABS);
6460 def_builtin ("__builtin_c6x_abs2", v2hi_ftype_v2hi, C6X_BUILTIN_ABS2);
6464 struct builtin_description
6466 const enum insn_code icode;
6467 const char *const name;
6468 const enum c6x_builtins code;
6471 static const struct builtin_description bdesc_2arg[] =
6473 { CODE_FOR_saddsi3, "__builtin_c6x_sadd", C6X_BUILTIN_SADD },
6474 { CODE_FOR_ssubsi3, "__builtin_c6x_ssub", C6X_BUILTIN_SSUB },
6475 { CODE_FOR_addv2hi3, "__builtin_c6x_add2", C6X_BUILTIN_ADD2 },
6476 { CODE_FOR_subv2hi3, "__builtin_c6x_sub2", C6X_BUILTIN_SUB2 },
6477 { CODE_FOR_addv4qi3, "__builtin_c6x_add4", C6X_BUILTIN_ADD4 },
6478 { CODE_FOR_subv4qi3, "__builtin_c6x_sub4", C6X_BUILTIN_SUB4 },
6479 { CODE_FOR_ss_addv2hi3, "__builtin_c6x_sadd2", C6X_BUILTIN_SADD2 },
6480 { CODE_FOR_ss_subv2hi3, "__builtin_c6x_ssub2", C6X_BUILTIN_SSUB2 },
6481 { CODE_FOR_us_addv4qi3, "__builtin_c6x_saddu4", C6X_BUILTIN_SADDU4 },
6483 { CODE_FOR_subcsi3, "__builtin_c6x_subc", C6X_BUILTIN_SUBC },
6484 { CODE_FOR_ss_ashlsi3, "__builtin_c6x_sshl", C6X_BUILTIN_SSHL },
6486 { CODE_FOR_avgv2hi3, "__builtin_c6x_avg2", C6X_BUILTIN_AVG2 },
6487 { CODE_FOR_uavgv4qi3, "__builtin_c6x_avgu4", C6X_BUILTIN_AVGU4 },
6489 { CODE_FOR_mulhqsq3, "__builtin_c6x_smpy", C6X_BUILTIN_SMPY },
6490 { CODE_FOR_mulhqsq3_hh, "__builtin_c6x_smpyh", C6X_BUILTIN_SMPYH },
6491 { CODE_FOR_mulhqsq3_lh, "__builtin_c6x_smpylh", C6X_BUILTIN_SMPYLH },
6492 { CODE_FOR_mulhqsq3_hl, "__builtin_c6x_smpyhl", C6X_BUILTIN_SMPYHL },
6494 { CODE_FOR_mulv2hqv2sq3, "__builtin_c6x_smpy2", C6X_BUILTIN_SMPY2 },
6496 { CODE_FOR_clrr, "__builtin_c6x_clrr", C6X_BUILTIN_CLRR },
6497 { CODE_FOR_extr, "__builtin_c6x_extr", C6X_BUILTIN_EXTR },
6498 { CODE_FOR_extru, "__builtin_c6x_extru", C6X_BUILTIN_EXTRU }
6501 static const struct builtin_description bdesc_1arg[] =
6503 { CODE_FOR_ssabssi2, "__builtin_c6x_abs", C6X_BUILTIN_ABS },
6504 { CODE_FOR_ssabsv2hi2, "__builtin_c6x_abs2", C6X_BUILTIN_ABS2 }
6507 /* Errors in the source file can cause expand_expr to return const0_rtx
6508 where we expect a vector. To avoid crashing, use one of the vector
6509 clear instructions. */
6511 safe_vector_operand (rtx x, enum machine_mode mode)
6513 if (x != const0_rtx)
6515 x = gen_reg_rtx (SImode);
6517 emit_insn (gen_movsi (x, CONST0_RTX (SImode)));
6518 return gen_lowpart (mode, x);
6521 /* Subroutine of c6x_expand_builtin to take care of binop insns. MACFLAG is -1
6522 if this is a normal binary op, or one of the MACFLAG_xxx constants. */
6525 c6x_expand_binop_builtin (enum insn_code icode, tree exp, rtx target,
6528 int offs = match_op ? 1 : 0;
6530 tree arg0 = CALL_EXPR_ARG (exp, 0);
6531 tree arg1 = CALL_EXPR_ARG (exp, 1);
6532 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6533 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6534 enum machine_mode op0mode = GET_MODE (op0);
6535 enum machine_mode op1mode = GET_MODE (op1);
6536 enum machine_mode tmode = insn_data[icode].operand[0].mode;
6537 enum machine_mode mode0 = insn_data[icode].operand[1 + offs].mode;
6538 enum machine_mode mode1 = insn_data[icode].operand[2 + offs].mode;
6541 if (VECTOR_MODE_P (mode0))
6542 op0 = safe_vector_operand (op0, mode0);
6543 if (VECTOR_MODE_P (mode1))
6544 op1 = safe_vector_operand (op1, mode1);
6547 || GET_MODE (target) != tmode
6548 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6550 if (tmode == SQmode || tmode == V2SQmode)
6552 ret = gen_reg_rtx (tmode == SQmode ? SImode : V2SImode);
6553 target = gen_lowpart (tmode, ret);
6556 target = gen_reg_rtx (tmode);
6559 if ((op0mode == V2HImode || op0mode == SImode || op0mode == VOIDmode)
6560 && (mode0 == V2HQmode || mode0 == HQmode || mode0 == SQmode))
6563 op0 = gen_lowpart (mode0, op0);
6565 if ((op1mode == V2HImode || op1mode == SImode || op1mode == VOIDmode)
6566 && (mode1 == V2HQmode || mode1 == HQmode || mode1 == SQmode))
6569 op1 = gen_lowpart (mode1, op1);
6571 /* In case the insn wants input operands in modes different from
6572 the result, abort. */
6573 gcc_assert ((op0mode == mode0 || op0mode == VOIDmode)
6574 && (op1mode == mode1 || op1mode == VOIDmode));
6576 if (! (*insn_data[icode].operand[1 + offs].predicate) (op0, mode0))
6577 op0 = copy_to_mode_reg (mode0, op0);
6578 if (! (*insn_data[icode].operand[2 + offs].predicate) (op1, mode1))
6579 op1 = copy_to_mode_reg (mode1, op1);
6582 pat = GEN_FCN (icode) (target, target, op0, op1);
6584 pat = GEN_FCN (icode) (target, op0, op1);
6594 /* Subroutine of c6x_expand_builtin to take care of unop insns. */
6597 c6x_expand_unop_builtin (enum insn_code icode, tree exp,
6601 tree arg0 = CALL_EXPR_ARG (exp, 0);
6602 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6603 enum machine_mode op0mode = GET_MODE (op0);
6604 enum machine_mode tmode = insn_data[icode].operand[0].mode;
6605 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
6608 || GET_MODE (target) != tmode
6609 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6610 target = gen_reg_rtx (tmode);
6612 if (VECTOR_MODE_P (mode0))
6613 op0 = safe_vector_operand (op0, mode0);
6615 if (op0mode == SImode && mode0 == HImode)
6618 op0 = gen_lowpart (HImode, op0);
6620 gcc_assert (op0mode == mode0 || op0mode == VOIDmode);
6622 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6623 op0 = copy_to_mode_reg (mode0, op0);
6625 pat = GEN_FCN (icode) (target, op0);
6632 /* Expand an expression EXP that calls a built-in function,
6633 with result going to TARGET if that's convenient
6634 (and in mode MODE if that's convenient).
6635 SUBTARGET may be used as the target for computing one of EXP's operands.
6636 IGNORE is nonzero if the value is to be ignored. */
6639 c6x_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
6640 rtx subtarget ATTRIBUTE_UNUSED,
6641 enum machine_mode mode ATTRIBUTE_UNUSED,
6642 int ignore ATTRIBUTE_UNUSED)
6645 const struct builtin_description *d;
6646 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6647 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6649 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
6650 if (d->code == fcode)
6651 return c6x_expand_binop_builtin (d->icode, exp, target,
6652 fcode == C6X_BUILTIN_CLRR);
6654 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
6655 if (d->code == fcode)
6656 return c6x_expand_unop_builtin (d->icode, exp, target);
6661 /* Target unwind frame info is generated from dwarf CFI directives, so
6662 always output dwarf2 unwind info. */
6664 static enum unwind_info_type
6665 c6x_debug_unwind_info (void)
6667 if (flag_unwind_tables || flag_exceptions)
6670 return default_debug_unwind_info ();
6673 /* Target Structure. */
6675 /* Initialize the GCC target structure. */
6676 #undef TARGET_FUNCTION_ARG
6677 #define TARGET_FUNCTION_ARG c6x_function_arg
6678 #undef TARGET_FUNCTION_ARG_ADVANCE
6679 #define TARGET_FUNCTION_ARG_ADVANCE c6x_function_arg_advance
6680 #undef TARGET_FUNCTION_ARG_BOUNDARY
6681 #define TARGET_FUNCTION_ARG_BOUNDARY c6x_function_arg_boundary
6682 #undef TARGET_FUNCTION_ARG_ROUND_BOUNDARY
6683 #define TARGET_FUNCTION_ARG_ROUND_BOUNDARY \
6684 c6x_function_arg_round_boundary
6685 #undef TARGET_FUNCTION_VALUE_REGNO_P
6686 #define TARGET_FUNCTION_VALUE_REGNO_P c6x_function_value_regno_p
6687 #undef TARGET_FUNCTION_VALUE
6688 #define TARGET_FUNCTION_VALUE c6x_function_value
6689 #undef TARGET_LIBCALL_VALUE
6690 #define TARGET_LIBCALL_VALUE c6x_libcall_value
6691 #undef TARGET_RETURN_IN_MEMORY
6692 #define TARGET_RETURN_IN_MEMORY c6x_return_in_memory
6693 #undef TARGET_RETURN_IN_MSB
6694 #define TARGET_RETURN_IN_MSB c6x_return_in_msb
6695 #undef TARGET_PASS_BY_REFERENCE
6696 #define TARGET_PASS_BY_REFERENCE c6x_pass_by_reference
6697 #undef TARGET_CALLEE_COPIES
6698 #define TARGET_CALLEE_COPIES c6x_callee_copies
6699 #undef TARGET_STRUCT_VALUE_RTX
6700 #define TARGET_STRUCT_VALUE_RTX c6x_struct_value_rtx
6701 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
6702 #define TARGET_FUNCTION_OK_FOR_SIBCALL c6x_function_ok_for_sibcall
6704 #undef TARGET_ASM_OUTPUT_MI_THUNK
6705 #define TARGET_ASM_OUTPUT_MI_THUNK c6x_output_mi_thunk
6706 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
6707 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK c6x_can_output_mi_thunk
6709 #undef TARGET_BUILD_BUILTIN_VA_LIST
6710 #define TARGET_BUILD_BUILTIN_VA_LIST c6x_build_builtin_va_list
6712 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
6713 #define TARGET_ASM_TRAMPOLINE_TEMPLATE c6x_asm_trampoline_template
6714 #undef TARGET_TRAMPOLINE_INIT
6715 #define TARGET_TRAMPOLINE_INIT c6x_initialize_trampoline
6717 #undef TARGET_LEGITIMATE_CONSTANT_P
6718 #define TARGET_LEGITIMATE_CONSTANT_P c6x_legitimate_constant_p
6719 #undef TARGET_LEGITIMATE_ADDRESS_P
6720 #define TARGET_LEGITIMATE_ADDRESS_P c6x_legitimate_address_p
6722 #undef TARGET_IN_SMALL_DATA_P
6723 #define TARGET_IN_SMALL_DATA_P c6x_in_small_data_p
6724 #undef TARGET_ASM_SELECT_RTX_SECTION
6725 #define TARGET_ASM_SELECT_RTX_SECTION c6x_select_rtx_section
6726 #undef TARGET_ASM_SELECT_SECTION
6727 #define TARGET_ASM_SELECT_SECTION c6x_elf_select_section
6728 #undef TARGET_ASM_UNIQUE_SECTION
6729 #define TARGET_ASM_UNIQUE_SECTION c6x_elf_unique_section
6730 #undef TARGET_SECTION_TYPE_FLAGS
6731 #define TARGET_SECTION_TYPE_FLAGS c6x_section_type_flags
6732 #undef TARGET_HAVE_SRODATA_SECTION
6733 #define TARGET_HAVE_SRODATA_SECTION true
6734 #undef TARGET_ASM_MERGEABLE_RODATA_PREFIX
6735 #define TARGET_ASM_MERGEABLE_RODATA_PREFIX ".const"
6737 #undef TARGET_OPTION_OVERRIDE
6738 #define TARGET_OPTION_OVERRIDE c6x_option_override
6739 #undef TARGET_CONDITIONAL_REGISTER_USAGE
6740 #define TARGET_CONDITIONAL_REGISTER_USAGE c6x_conditional_register_usage
6742 #undef TARGET_INIT_LIBFUNCS
6743 #define TARGET_INIT_LIBFUNCS c6x_init_libfuncs
6744 #undef TARGET_LIBFUNC_GNU_PREFIX
6745 #define TARGET_LIBFUNC_GNU_PREFIX true
6747 #undef TARGET_SCALAR_MODE_SUPPORTED_P
6748 #define TARGET_SCALAR_MODE_SUPPORTED_P c6x_scalar_mode_supported_p
6749 #undef TARGET_VECTOR_MODE_SUPPORTED_P
6750 #define TARGET_VECTOR_MODE_SUPPORTED_P c6x_vector_mode_supported_p
6751 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
6752 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE c6x_preferred_simd_mode
6754 #undef TARGET_RTX_COSTS
6755 #define TARGET_RTX_COSTS c6x_rtx_costs
6757 #undef TARGET_SCHED_INIT
6758 #define TARGET_SCHED_INIT c6x_sched_init
6759 #undef TARGET_SCHED_SET_SCHED_FLAGS
6760 #define TARGET_SCHED_SET_SCHED_FLAGS c6x_set_sched_flags
6761 #undef TARGET_SCHED_ADJUST_COST
6762 #define TARGET_SCHED_ADJUST_COST c6x_adjust_cost
6763 #undef TARGET_SCHED_ISSUE_RATE
6764 #define TARGET_SCHED_ISSUE_RATE c6x_issue_rate
6765 #undef TARGET_SCHED_VARIABLE_ISSUE
6766 #define TARGET_SCHED_VARIABLE_ISSUE c6x_variable_issue
6767 #undef TARGET_SCHED_REORDER
6768 #define TARGET_SCHED_REORDER c6x_sched_reorder
6769 #undef TARGET_SCHED_REORDER2
6770 #define TARGET_SCHED_REORDER2 c6x_sched_reorder2
6771 #undef TARGET_SCHED_DFA_NEW_CYCLE
6772 #define TARGET_SCHED_DFA_NEW_CYCLE c6x_dfa_new_cycle
6773 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
6774 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN c6x_sched_dfa_pre_cycle_insn
6775 #undef TARGET_SCHED_EXPOSED_PIPELINE
6776 #define TARGET_SCHED_EXPOSED_PIPELINE true
6778 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
6779 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT c6x_alloc_sched_context
6780 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
6781 #define TARGET_SCHED_INIT_SCHED_CONTEXT c6x_init_sched_context
6782 #undef TARGET_SCHED_SET_SCHED_CONTEXT
6783 #define TARGET_SCHED_SET_SCHED_CONTEXT c6x_set_sched_context
6784 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
6785 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT c6x_clear_sched_context
6786 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
6787 #define TARGET_SCHED_FREE_SCHED_CONTEXT c6x_free_sched_context
6789 #undef TARGET_CAN_ELIMINATE
6790 #define TARGET_CAN_ELIMINATE c6x_can_eliminate
6792 #undef TARGET_PREFERRED_RENAME_CLASS
6793 #define TARGET_PREFERRED_RENAME_CLASS c6x_preferred_rename_class
6795 #undef TARGET_MACHINE_DEPENDENT_REORG
6796 #define TARGET_MACHINE_DEPENDENT_REORG c6x_reorg
6798 #undef TARGET_ASM_FILE_START
6799 #define TARGET_ASM_FILE_START c6x_file_start
6801 #undef TARGET_PRINT_OPERAND
6802 #define TARGET_PRINT_OPERAND c6x_print_operand
6803 #undef TARGET_PRINT_OPERAND_ADDRESS
6804 #define TARGET_PRINT_OPERAND_ADDRESS c6x_print_operand_address
6805 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
6806 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P c6x_print_operand_punct_valid_p
6808 /* C6x unwinding tables use a different format for the typeinfo tables. */
6809 #undef TARGET_ASM_TTYPE
6810 #define TARGET_ASM_TTYPE c6x_output_ttype
6812 /* The C6x ABI follows the ARM EABI exception handling rules. */
6813 #undef TARGET_ARM_EABI_UNWINDER
6814 #define TARGET_ARM_EABI_UNWINDER true
6816 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
6817 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY c6x_asm_emit_except_personality
6819 #undef TARGET_ASM_INIT_SECTIONS
6820 #define TARGET_ASM_INIT_SECTIONS c6x_asm_init_sections
6822 #undef TARGET_DEBUG_UNWIND_INFO
6823 #define TARGET_DEBUG_UNWIND_INFO c6x_debug_unwind_info
6825 #undef TARGET_DWARF_REGISTER_SPAN
6826 #define TARGET_DWARF_REGISTER_SPAN c6x_dwarf_register_span
6828 #undef TARGET_INIT_BUILTINS
6829 #define TARGET_INIT_BUILTINS c6x_init_builtins
6830 #undef TARGET_EXPAND_BUILTIN
6831 #define TARGET_EXPAND_BUILTIN c6x_expand_builtin
6832 #undef TARGET_BUILTIN_DECL
6833 #define TARGET_BUILTIN_DECL c6x_builtin_decl
6835 struct gcc_target targetm = TARGET_INITIALIZER;