1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2013 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "insn-codes.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
50 #include "langhooks.h"
55 #include "tree-pass.h"
59 struct processor_costs {
63 /* Integer signed load */
66 /* Integer zeroed load */
72 /* fmov, fneg, fabs */
76 const int float_plusminus;
82 const int float_cmove;
88 const int float_div_sf;
91 const int float_div_df;
94 const int float_sqrt_sf;
97 const int float_sqrt_df;
105 /* integer multiply cost for each bit set past the most
106 significant 3, so the formula for multiply cost becomes:
109 highest_bit = highest_clear_bit(rs1);
111 highest_bit = highest_set_bit(rs1);
114 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
116 A value of zero indicates that the multiply costs is fixed,
118 const int int_mul_bit_factor;
129 /* penalty for shifts, due to scheduling rules etc. */
130 const int shift_penalty;
134 struct processor_costs cypress_costs = {
135 COSTS_N_INSNS (2), /* int load */
136 COSTS_N_INSNS (2), /* int signed load */
137 COSTS_N_INSNS (2), /* int zeroed load */
138 COSTS_N_INSNS (2), /* float load */
139 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
140 COSTS_N_INSNS (5), /* fadd, fsub */
141 COSTS_N_INSNS (1), /* fcmp */
142 COSTS_N_INSNS (1), /* fmov, fmovr */
143 COSTS_N_INSNS (7), /* fmul */
144 COSTS_N_INSNS (37), /* fdivs */
145 COSTS_N_INSNS (37), /* fdivd */
146 COSTS_N_INSNS (63), /* fsqrts */
147 COSTS_N_INSNS (63), /* fsqrtd */
148 COSTS_N_INSNS (1), /* imul */
149 COSTS_N_INSNS (1), /* imulX */
150 0, /* imul bit factor */
151 COSTS_N_INSNS (1), /* idiv */
152 COSTS_N_INSNS (1), /* idivX */
153 COSTS_N_INSNS (1), /* movcc/movr */
154 0, /* shift penalty */
158 struct processor_costs supersparc_costs = {
159 COSTS_N_INSNS (1), /* int load */
160 COSTS_N_INSNS (1), /* int signed load */
161 COSTS_N_INSNS (1), /* int zeroed load */
162 COSTS_N_INSNS (0), /* float load */
163 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
164 COSTS_N_INSNS (3), /* fadd, fsub */
165 COSTS_N_INSNS (3), /* fcmp */
166 COSTS_N_INSNS (1), /* fmov, fmovr */
167 COSTS_N_INSNS (3), /* fmul */
168 COSTS_N_INSNS (6), /* fdivs */
169 COSTS_N_INSNS (9), /* fdivd */
170 COSTS_N_INSNS (12), /* fsqrts */
171 COSTS_N_INSNS (12), /* fsqrtd */
172 COSTS_N_INSNS (4), /* imul */
173 COSTS_N_INSNS (4), /* imulX */
174 0, /* imul bit factor */
175 COSTS_N_INSNS (4), /* idiv */
176 COSTS_N_INSNS (4), /* idivX */
177 COSTS_N_INSNS (1), /* movcc/movr */
178 1, /* shift penalty */
182 struct processor_costs hypersparc_costs = {
183 COSTS_N_INSNS (1), /* int load */
184 COSTS_N_INSNS (1), /* int signed load */
185 COSTS_N_INSNS (1), /* int zeroed load */
186 COSTS_N_INSNS (1), /* float load */
187 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
188 COSTS_N_INSNS (1), /* fadd, fsub */
189 COSTS_N_INSNS (1), /* fcmp */
190 COSTS_N_INSNS (1), /* fmov, fmovr */
191 COSTS_N_INSNS (1), /* fmul */
192 COSTS_N_INSNS (8), /* fdivs */
193 COSTS_N_INSNS (12), /* fdivd */
194 COSTS_N_INSNS (17), /* fsqrts */
195 COSTS_N_INSNS (17), /* fsqrtd */
196 COSTS_N_INSNS (17), /* imul */
197 COSTS_N_INSNS (17), /* imulX */
198 0, /* imul bit factor */
199 COSTS_N_INSNS (17), /* idiv */
200 COSTS_N_INSNS (17), /* idivX */
201 COSTS_N_INSNS (1), /* movcc/movr */
202 0, /* shift penalty */
206 struct processor_costs leon_costs = {
207 COSTS_N_INSNS (1), /* int load */
208 COSTS_N_INSNS (1), /* int signed load */
209 COSTS_N_INSNS (1), /* int zeroed load */
210 COSTS_N_INSNS (1), /* float load */
211 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
212 COSTS_N_INSNS (1), /* fadd, fsub */
213 COSTS_N_INSNS (1), /* fcmp */
214 COSTS_N_INSNS (1), /* fmov, fmovr */
215 COSTS_N_INSNS (1), /* fmul */
216 COSTS_N_INSNS (15), /* fdivs */
217 COSTS_N_INSNS (15), /* fdivd */
218 COSTS_N_INSNS (23), /* fsqrts */
219 COSTS_N_INSNS (23), /* fsqrtd */
220 COSTS_N_INSNS (5), /* imul */
221 COSTS_N_INSNS (5), /* imulX */
222 0, /* imul bit factor */
223 COSTS_N_INSNS (5), /* idiv */
224 COSTS_N_INSNS (5), /* idivX */
225 COSTS_N_INSNS (1), /* movcc/movr */
226 0, /* shift penalty */
230 struct processor_costs sparclet_costs = {
231 COSTS_N_INSNS (3), /* int load */
232 COSTS_N_INSNS (3), /* int signed load */
233 COSTS_N_INSNS (1), /* int zeroed load */
234 COSTS_N_INSNS (1), /* float load */
235 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
236 COSTS_N_INSNS (1), /* fadd, fsub */
237 COSTS_N_INSNS (1), /* fcmp */
238 COSTS_N_INSNS (1), /* fmov, fmovr */
239 COSTS_N_INSNS (1), /* fmul */
240 COSTS_N_INSNS (1), /* fdivs */
241 COSTS_N_INSNS (1), /* fdivd */
242 COSTS_N_INSNS (1), /* fsqrts */
243 COSTS_N_INSNS (1), /* fsqrtd */
244 COSTS_N_INSNS (5), /* imul */
245 COSTS_N_INSNS (5), /* imulX */
246 0, /* imul bit factor */
247 COSTS_N_INSNS (5), /* idiv */
248 COSTS_N_INSNS (5), /* idivX */
249 COSTS_N_INSNS (1), /* movcc/movr */
250 0, /* shift penalty */
254 struct processor_costs ultrasparc_costs = {
255 COSTS_N_INSNS (2), /* int load */
256 COSTS_N_INSNS (3), /* int signed load */
257 COSTS_N_INSNS (2), /* int zeroed load */
258 COSTS_N_INSNS (2), /* float load */
259 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
260 COSTS_N_INSNS (4), /* fadd, fsub */
261 COSTS_N_INSNS (1), /* fcmp */
262 COSTS_N_INSNS (2), /* fmov, fmovr */
263 COSTS_N_INSNS (4), /* fmul */
264 COSTS_N_INSNS (13), /* fdivs */
265 COSTS_N_INSNS (23), /* fdivd */
266 COSTS_N_INSNS (13), /* fsqrts */
267 COSTS_N_INSNS (23), /* fsqrtd */
268 COSTS_N_INSNS (4), /* imul */
269 COSTS_N_INSNS (4), /* imulX */
270 2, /* imul bit factor */
271 COSTS_N_INSNS (37), /* idiv */
272 COSTS_N_INSNS (68), /* idivX */
273 COSTS_N_INSNS (2), /* movcc/movr */
274 2, /* shift penalty */
278 struct processor_costs ultrasparc3_costs = {
279 COSTS_N_INSNS (2), /* int load */
280 COSTS_N_INSNS (3), /* int signed load */
281 COSTS_N_INSNS (3), /* int zeroed load */
282 COSTS_N_INSNS (2), /* float load */
283 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
284 COSTS_N_INSNS (4), /* fadd, fsub */
285 COSTS_N_INSNS (5), /* fcmp */
286 COSTS_N_INSNS (3), /* fmov, fmovr */
287 COSTS_N_INSNS (4), /* fmul */
288 COSTS_N_INSNS (17), /* fdivs */
289 COSTS_N_INSNS (20), /* fdivd */
290 COSTS_N_INSNS (20), /* fsqrts */
291 COSTS_N_INSNS (29), /* fsqrtd */
292 COSTS_N_INSNS (6), /* imul */
293 COSTS_N_INSNS (6), /* imulX */
294 0, /* imul bit factor */
295 COSTS_N_INSNS (40), /* idiv */
296 COSTS_N_INSNS (71), /* idivX */
297 COSTS_N_INSNS (2), /* movcc/movr */
298 0, /* shift penalty */
302 struct processor_costs niagara_costs = {
303 COSTS_N_INSNS (3), /* int load */
304 COSTS_N_INSNS (3), /* int signed load */
305 COSTS_N_INSNS (3), /* int zeroed load */
306 COSTS_N_INSNS (9), /* float load */
307 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
308 COSTS_N_INSNS (8), /* fadd, fsub */
309 COSTS_N_INSNS (26), /* fcmp */
310 COSTS_N_INSNS (8), /* fmov, fmovr */
311 COSTS_N_INSNS (29), /* fmul */
312 COSTS_N_INSNS (54), /* fdivs */
313 COSTS_N_INSNS (83), /* fdivd */
314 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
315 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
316 COSTS_N_INSNS (11), /* imul */
317 COSTS_N_INSNS (11), /* imulX */
318 0, /* imul bit factor */
319 COSTS_N_INSNS (72), /* idiv */
320 COSTS_N_INSNS (72), /* idivX */
321 COSTS_N_INSNS (1), /* movcc/movr */
322 0, /* shift penalty */
326 struct processor_costs niagara2_costs = {
327 COSTS_N_INSNS (3), /* int load */
328 COSTS_N_INSNS (3), /* int signed load */
329 COSTS_N_INSNS (3), /* int zeroed load */
330 COSTS_N_INSNS (3), /* float load */
331 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
332 COSTS_N_INSNS (6), /* fadd, fsub */
333 COSTS_N_INSNS (6), /* fcmp */
334 COSTS_N_INSNS (6), /* fmov, fmovr */
335 COSTS_N_INSNS (6), /* fmul */
336 COSTS_N_INSNS (19), /* fdivs */
337 COSTS_N_INSNS (33), /* fdivd */
338 COSTS_N_INSNS (19), /* fsqrts */
339 COSTS_N_INSNS (33), /* fsqrtd */
340 COSTS_N_INSNS (5), /* imul */
341 COSTS_N_INSNS (5), /* imulX */
342 0, /* imul bit factor */
343 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
344 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
345 COSTS_N_INSNS (1), /* movcc/movr */
346 0, /* shift penalty */
350 struct processor_costs niagara3_costs = {
351 COSTS_N_INSNS (3), /* int load */
352 COSTS_N_INSNS (3), /* int signed load */
353 COSTS_N_INSNS (3), /* int zeroed load */
354 COSTS_N_INSNS (3), /* float load */
355 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
356 COSTS_N_INSNS (9), /* fadd, fsub */
357 COSTS_N_INSNS (9), /* fcmp */
358 COSTS_N_INSNS (9), /* fmov, fmovr */
359 COSTS_N_INSNS (9), /* fmul */
360 COSTS_N_INSNS (23), /* fdivs */
361 COSTS_N_INSNS (37), /* fdivd */
362 COSTS_N_INSNS (23), /* fsqrts */
363 COSTS_N_INSNS (37), /* fsqrtd */
364 COSTS_N_INSNS (9), /* imul */
365 COSTS_N_INSNS (9), /* imulX */
366 0, /* imul bit factor */
367 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
368 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
369 COSTS_N_INSNS (1), /* movcc/movr */
370 0, /* shift penalty */
374 struct processor_costs niagara4_costs = {
375 COSTS_N_INSNS (5), /* int load */
376 COSTS_N_INSNS (5), /* int signed load */
377 COSTS_N_INSNS (5), /* int zeroed load */
378 COSTS_N_INSNS (5), /* float load */
379 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
380 COSTS_N_INSNS (11), /* fadd, fsub */
381 COSTS_N_INSNS (11), /* fcmp */
382 COSTS_N_INSNS (11), /* fmov, fmovr */
383 COSTS_N_INSNS (11), /* fmul */
384 COSTS_N_INSNS (24), /* fdivs */
385 COSTS_N_INSNS (37), /* fdivd */
386 COSTS_N_INSNS (24), /* fsqrts */
387 COSTS_N_INSNS (37), /* fsqrtd */
388 COSTS_N_INSNS (12), /* imul */
389 COSTS_N_INSNS (12), /* imulX */
390 0, /* imul bit factor */
391 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
392 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
393 COSTS_N_INSNS (1), /* movcc/movr */
394 0, /* shift penalty */
397 static const struct processor_costs *sparc_costs = &cypress_costs;
399 #ifdef HAVE_AS_RELAX_OPTION
400 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
401 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
402 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
403 somebody does not branch between the sethi and jmp. */
404 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
406 #define LEAF_SIBCALL_SLOT_RESERVED_P \
407 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
410 /* Vector to say how input registers are mapped to output registers.
411 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
412 eliminate it. You must use -fomit-frame-pointer to get that. */
413 char leaf_reg_remap[] =
414 { 0, 1, 2, 3, 4, 5, 6, 7,
415 -1, -1, -1, -1, -1, -1, 14, -1,
416 -1, -1, -1, -1, -1, -1, -1, -1,
417 8, 9, 10, 11, 12, 13, -1, 15,
419 32, 33, 34, 35, 36, 37, 38, 39,
420 40, 41, 42, 43, 44, 45, 46, 47,
421 48, 49, 50, 51, 52, 53, 54, 55,
422 56, 57, 58, 59, 60, 61, 62, 63,
423 64, 65, 66, 67, 68, 69, 70, 71,
424 72, 73, 74, 75, 76, 77, 78, 79,
425 80, 81, 82, 83, 84, 85, 86, 87,
426 88, 89, 90, 91, 92, 93, 94, 95,
427 96, 97, 98, 99, 100, 101, 102};
429 /* Vector, indexed by hard register number, which contains 1
430 for a register that is allowable in a candidate for leaf
431 function treatment. */
432 char sparc_leaf_regs[] =
433 { 1, 1, 1, 1, 1, 1, 1, 1,
434 0, 0, 0, 0, 0, 0, 1, 0,
435 0, 0, 0, 0, 0, 0, 0, 0,
436 1, 1, 1, 1, 1, 1, 0, 1,
437 1, 1, 1, 1, 1, 1, 1, 1,
438 1, 1, 1, 1, 1, 1, 1, 1,
439 1, 1, 1, 1, 1, 1, 1, 1,
440 1, 1, 1, 1, 1, 1, 1, 1,
441 1, 1, 1, 1, 1, 1, 1, 1,
442 1, 1, 1, 1, 1, 1, 1, 1,
443 1, 1, 1, 1, 1, 1, 1, 1,
444 1, 1, 1, 1, 1, 1, 1, 1,
445 1, 1, 1, 1, 1, 1, 1};
447 struct GTY(()) machine_function
449 /* Size of the frame of the function. */
450 HOST_WIDE_INT frame_size;
452 /* Size of the frame of the function minus the register window save area
453 and the outgoing argument area. */
454 HOST_WIDE_INT apparent_frame_size;
456 /* Register we pretend the frame pointer is allocated to. Normally, this
457 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
458 record "offset" separately as it may be too big for (reg + disp). */
460 HOST_WIDE_INT frame_base_offset;
462 /* Some local-dynamic TLS symbol name. */
463 const char *some_ld_name;
465 /* Number of global or FP registers to be saved (as 4-byte quantities). */
466 int n_global_fp_regs;
468 /* True if the current function is leaf and uses only leaf regs,
469 so that the SPARC leaf function optimization can be applied.
470 Private version of crtl->uses_only_leaf_regs, see
471 sparc_expand_prologue for the rationale. */
474 /* True if the prologue saves local or in registers. */
475 bool save_local_in_regs_p;
477 /* True if the data calculated by sparc_expand_prologue are valid. */
478 bool prologue_data_valid_p;
481 #define sparc_frame_size cfun->machine->frame_size
482 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
483 #define sparc_frame_base_reg cfun->machine->frame_base_reg
484 #define sparc_frame_base_offset cfun->machine->frame_base_offset
485 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
486 #define sparc_leaf_function_p cfun->machine->leaf_function_p
487 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
488 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
490 /* 1 if the next opcode is to be specially indented. */
491 int sparc_indent_opcode = 0;
493 static void sparc_option_override (void);
494 static void sparc_init_modes (void);
495 static void scan_record_type (const_tree, int *, int *, int *);
496 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
497 const_tree, bool, bool, int *, int *);
499 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
500 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
502 static void sparc_emit_set_const32 (rtx, rtx);
503 static void sparc_emit_set_const64 (rtx, rtx);
504 static void sparc_output_addr_vec (rtx);
505 static void sparc_output_addr_diff_vec (rtx);
506 static void sparc_output_deferred_case_vectors (void);
507 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
508 static bool sparc_legitimate_constant_p (enum machine_mode, rtx);
509 static rtx sparc_builtin_saveregs (void);
510 static int epilogue_renumber (rtx *, int);
511 static bool sparc_assemble_integer (rtx, unsigned int, int);
512 static int set_extends (rtx);
513 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
514 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
515 #ifdef TARGET_SOLARIS
516 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
517 tree) ATTRIBUTE_UNUSED;
519 static int sparc_adjust_cost (rtx, rtx, rtx, int);
520 static int sparc_issue_rate (void);
521 static void sparc_sched_init (FILE *, int, int);
522 static int sparc_use_sched_lookahead (void);
524 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
525 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
526 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
527 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
528 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
530 static bool sparc_function_ok_for_sibcall (tree, tree);
531 static void sparc_init_libfuncs (void);
532 static void sparc_init_builtins (void);
533 static void sparc_vis_init_builtins (void);
534 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
535 static tree sparc_fold_builtin (tree, int, tree *, bool);
536 static int sparc_vis_mul8x16 (int, int);
537 static void sparc_handle_vis_mul8x16 (tree *, int, tree, tree, tree);
538 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
539 HOST_WIDE_INT, tree);
540 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
541 HOST_WIDE_INT, const_tree);
542 static struct machine_function * sparc_init_machine_status (void);
543 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
544 static rtx sparc_tls_get_addr (void);
545 static rtx sparc_tls_got (void);
546 static const char *get_some_local_dynamic_name (void);
547 static int get_some_local_dynamic_name_1 (rtx *, void *);
548 static int sparc_register_move_cost (enum machine_mode,
549 reg_class_t, reg_class_t);
550 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
551 static rtx sparc_function_value (const_tree, const_tree, bool);
552 static rtx sparc_libcall_value (enum machine_mode, const_rtx);
553 static bool sparc_function_value_regno_p (const unsigned int);
554 static rtx sparc_struct_value_rtx (tree, int);
555 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
556 int *, const_tree, int);
557 static bool sparc_return_in_memory (const_tree, const_tree);
558 static bool sparc_strict_argument_naming (cumulative_args_t);
559 static void sparc_va_start (tree, rtx);
560 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
561 static bool sparc_vector_mode_supported_p (enum machine_mode);
562 static bool sparc_tls_referenced_p (rtx);
563 static rtx sparc_legitimize_tls_address (rtx);
564 static rtx sparc_legitimize_pic_address (rtx, rtx);
565 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
566 static rtx sparc_delegitimize_address (rtx);
567 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
568 static bool sparc_pass_by_reference (cumulative_args_t,
569 enum machine_mode, const_tree, bool);
570 static void sparc_function_arg_advance (cumulative_args_t,
571 enum machine_mode, const_tree, bool);
572 static rtx sparc_function_arg_1 (cumulative_args_t,
573 enum machine_mode, const_tree, bool, bool);
574 static rtx sparc_function_arg (cumulative_args_t,
575 enum machine_mode, const_tree, bool);
576 static rtx sparc_function_incoming_arg (cumulative_args_t,
577 enum machine_mode, const_tree, bool);
578 static unsigned int sparc_function_arg_boundary (enum machine_mode,
580 static int sparc_arg_partial_bytes (cumulative_args_t,
581 enum machine_mode, tree, bool);
582 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
583 static void sparc_file_end (void);
584 static bool sparc_frame_pointer_required (void);
585 static bool sparc_can_eliminate (const int, const int);
586 static rtx sparc_builtin_setjmp_frame_value (void);
587 static void sparc_conditional_register_usage (void);
588 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
589 static const char *sparc_mangle_type (const_tree);
591 static void sparc_trampoline_init (rtx, tree, rtx);
592 static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
593 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
594 static bool sparc_print_operand_punct_valid_p (unsigned char);
595 static void sparc_print_operand (FILE *, rtx, int);
596 static void sparc_print_operand_address (FILE *, rtx);
597 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
599 secondary_reload_info *);
600 static enum machine_mode sparc_cstore_mode (enum insn_code icode);
602 #ifdef SUBTARGET_ATTRIBUTE_TABLE
603 /* Table of valid machine attributes. */
604 static const struct attribute_spec sparc_attribute_table[] =
606 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
608 SUBTARGET_ATTRIBUTE_TABLE,
609 { NULL, 0, 0, false, false, false, NULL, false }
613 /* Option handling. */
616 enum cmodel sparc_cmodel;
618 char sparc_hard_reg_printed[8];
620 /* Initialize the GCC target structure. */
622 /* The default is to use .half rather than .short for aligned HI objects. */
623 #undef TARGET_ASM_ALIGNED_HI_OP
624 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
626 #undef TARGET_ASM_UNALIGNED_HI_OP
627 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
628 #undef TARGET_ASM_UNALIGNED_SI_OP
629 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
630 #undef TARGET_ASM_UNALIGNED_DI_OP
631 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
633 /* The target hook has to handle DI-mode values. */
634 #undef TARGET_ASM_INTEGER
635 #define TARGET_ASM_INTEGER sparc_assemble_integer
637 #undef TARGET_ASM_FUNCTION_PROLOGUE
638 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
639 #undef TARGET_ASM_FUNCTION_EPILOGUE
640 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
642 #undef TARGET_SCHED_ADJUST_COST
643 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
644 #undef TARGET_SCHED_ISSUE_RATE
645 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
646 #undef TARGET_SCHED_INIT
647 #define TARGET_SCHED_INIT sparc_sched_init
648 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
649 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
651 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
652 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
654 #undef TARGET_INIT_LIBFUNCS
655 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
656 #undef TARGET_INIT_BUILTINS
657 #define TARGET_INIT_BUILTINS sparc_init_builtins
659 #undef TARGET_LEGITIMIZE_ADDRESS
660 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
661 #undef TARGET_DELEGITIMIZE_ADDRESS
662 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
663 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
664 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
666 #undef TARGET_EXPAND_BUILTIN
667 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
668 #undef TARGET_FOLD_BUILTIN
669 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
672 #undef TARGET_HAVE_TLS
673 #define TARGET_HAVE_TLS true
676 #undef TARGET_CANNOT_FORCE_CONST_MEM
677 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
679 #undef TARGET_ASM_OUTPUT_MI_THUNK
680 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
681 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
682 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
684 #undef TARGET_RTX_COSTS
685 #define TARGET_RTX_COSTS sparc_rtx_costs
686 #undef TARGET_ADDRESS_COST
687 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
688 #undef TARGET_REGISTER_MOVE_COST
689 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
691 #undef TARGET_PROMOTE_FUNCTION_MODE
692 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
694 #undef TARGET_FUNCTION_VALUE
695 #define TARGET_FUNCTION_VALUE sparc_function_value
696 #undef TARGET_LIBCALL_VALUE
697 #define TARGET_LIBCALL_VALUE sparc_libcall_value
698 #undef TARGET_FUNCTION_VALUE_REGNO_P
699 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
701 #undef TARGET_STRUCT_VALUE_RTX
702 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
703 #undef TARGET_RETURN_IN_MEMORY
704 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
705 #undef TARGET_MUST_PASS_IN_STACK
706 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
707 #undef TARGET_PASS_BY_REFERENCE
708 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
709 #undef TARGET_ARG_PARTIAL_BYTES
710 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
711 #undef TARGET_FUNCTION_ARG_ADVANCE
712 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
713 #undef TARGET_FUNCTION_ARG
714 #define TARGET_FUNCTION_ARG sparc_function_arg
715 #undef TARGET_FUNCTION_INCOMING_ARG
716 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
717 #undef TARGET_FUNCTION_ARG_BOUNDARY
718 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
720 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
721 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
722 #undef TARGET_STRICT_ARGUMENT_NAMING
723 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
725 #undef TARGET_EXPAND_BUILTIN_VA_START
726 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
727 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
728 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
730 #undef TARGET_VECTOR_MODE_SUPPORTED_P
731 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
733 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
734 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
736 #ifdef SUBTARGET_INSERT_ATTRIBUTES
737 #undef TARGET_INSERT_ATTRIBUTES
738 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
741 #ifdef SUBTARGET_ATTRIBUTE_TABLE
742 #undef TARGET_ATTRIBUTE_TABLE
743 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
746 #undef TARGET_RELAXED_ORDERING
747 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
749 #undef TARGET_OPTION_OVERRIDE
750 #define TARGET_OPTION_OVERRIDE sparc_option_override
752 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
753 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
754 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
757 #undef TARGET_ASM_FILE_END
758 #define TARGET_ASM_FILE_END sparc_file_end
760 #undef TARGET_FRAME_POINTER_REQUIRED
761 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
763 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
764 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
766 #undef TARGET_CAN_ELIMINATE
767 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
769 #undef TARGET_PREFERRED_RELOAD_CLASS
770 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
772 #undef TARGET_SECONDARY_RELOAD
773 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
775 #undef TARGET_CONDITIONAL_REGISTER_USAGE
776 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
778 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
779 #undef TARGET_MANGLE_TYPE
780 #define TARGET_MANGLE_TYPE sparc_mangle_type
783 #undef TARGET_LEGITIMATE_ADDRESS_P
784 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
786 #undef TARGET_LEGITIMATE_CONSTANT_P
787 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
789 #undef TARGET_TRAMPOLINE_INIT
790 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
792 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
793 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
794 #undef TARGET_PRINT_OPERAND
795 #define TARGET_PRINT_OPERAND sparc_print_operand
796 #undef TARGET_PRINT_OPERAND_ADDRESS
797 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
799 /* The value stored by LDSTUB. */
800 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
801 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
803 #undef TARGET_CSTORE_MODE
804 #define TARGET_CSTORE_MODE sparc_cstore_mode
806 struct gcc_target targetm = TARGET_INITIALIZER;
808 /* We use a machine specific pass to enable workarounds for errata.
809 We need to have the (essentially) final form of the insn stream in order
810 to properly detect the various hazards. Therefore, this machine specific
811 pass runs as late as possible. The pass is inserted in the pass pipeline
812 at the end of sparc_options_override. */
815 sparc_gate_work_around_errata (void)
817 /* The only erratum we handle for now is that of the AT697F processor. */
818 return sparc_fix_at697f != 0;
822 sparc_do_work_around_errata (void)
826 /* Now look for specific patterns in the insn stream. */
827 for (insn = get_insns (); insn; insn = next)
829 bool insert_nop = false;
832 /* Look for a single-word load into an odd-numbered FP register. */
833 if (NONJUMP_INSN_P (insn)
834 && (set = single_set (insn)) != NULL_RTX
835 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
836 && MEM_P (SET_SRC (set))
837 && REG_P (SET_DEST (set))
838 && REGNO (SET_DEST (set)) > 31
839 && REGNO (SET_DEST (set)) % 2 != 0)
841 /* The wrong dependency is on the enclosing double register. */
842 unsigned int x = REGNO (SET_DEST (set)) - 1;
843 unsigned int src1, src2, dest;
846 /* If the insn has a delay slot, then it cannot be problematic. */
847 next = next_active_insn (insn);
848 if (NONJUMP_INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE)
853 code = INSN_CODE (next);
858 case CODE_FOR_adddf3:
859 case CODE_FOR_subdf3:
860 case CODE_FOR_muldf3:
861 case CODE_FOR_divdf3:
862 dest = REGNO (recog_data.operand[0]);
863 src1 = REGNO (recog_data.operand[1]);
864 src2 = REGNO (recog_data.operand[2]);
869 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
870 if ((src1 == x || src2 == x)
871 && (dest == src1 || dest == src2))
878 FPOPd %fx, %fx, %fx */
881 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
886 case CODE_FOR_sqrtdf2:
887 dest = REGNO (recog_data.operand[0]);
888 src1 = REGNO (recog_data.operand[1]);
892 if (src1 == x && dest == src1)
901 next = NEXT_INSN (insn);
904 emit_insn_after (gen_nop (), insn);
909 struct rtl_opt_pass pass_work_around_errata =
914 OPTGROUP_NONE, /* optinfo_flags */
915 sparc_gate_work_around_errata, /* gate */
916 sparc_do_work_around_errata, /* execute */
919 0, /* static_pass_number */
920 TV_MACH_DEP, /* tv_id */
921 0, /* properties_required */
922 0, /* properties_provided */
923 0, /* properties_destroyed */
924 0, /* todo_flags_start */
925 TODO_verify_rtl_sharing, /* todo_flags_finish */
929 struct register_pass_info insert_pass_work_around_errata =
931 &pass_work_around_errata.pass, /* pass */
932 "dbr", /* reference_pass_name */
933 1, /* ref_pass_instance_number */
934 PASS_POS_INSERT_AFTER /* po_op */
937 /* Helpers for TARGET_DEBUG_OPTIONS. */
939 dump_target_flag_bits (const int flags)
941 if (flags & MASK_64BIT)
942 fprintf (stderr, "64BIT ");
943 if (flags & MASK_APP_REGS)
944 fprintf (stderr, "APP_REGS ");
945 if (flags & MASK_FASTER_STRUCTS)
946 fprintf (stderr, "FASTER_STRUCTS ");
947 if (flags & MASK_FLAT)
948 fprintf (stderr, "FLAT ");
949 if (flags & MASK_FMAF)
950 fprintf (stderr, "FMAF ");
951 if (flags & MASK_FPU)
952 fprintf (stderr, "FPU ");
953 if (flags & MASK_HARD_QUAD)
954 fprintf (stderr, "HARD_QUAD ");
955 if (flags & MASK_POPC)
956 fprintf (stderr, "POPC ");
957 if (flags & MASK_PTR64)
958 fprintf (stderr, "PTR64 ");
959 if (flags & MASK_STACK_BIAS)
960 fprintf (stderr, "STACK_BIAS ");
961 if (flags & MASK_UNALIGNED_DOUBLES)
962 fprintf (stderr, "UNALIGNED_DOUBLES ");
963 if (flags & MASK_V8PLUS)
964 fprintf (stderr, "V8PLUS ");
965 if (flags & MASK_VIS)
966 fprintf (stderr, "VIS ");
967 if (flags & MASK_VIS2)
968 fprintf (stderr, "VIS2 ");
969 if (flags & MASK_VIS3)
970 fprintf (stderr, "VIS3 ");
971 if (flags & MASK_CBCOND)
972 fprintf (stderr, "CBCOND ");
973 if (flags & MASK_DEPRECATED_V8_INSNS)
974 fprintf (stderr, "DEPRECATED_V8_INSNS ");
975 if (flags & MASK_SPARCLET)
976 fprintf (stderr, "SPARCLET ");
977 if (flags & MASK_SPARCLITE)
978 fprintf (stderr, "SPARCLITE ");
980 fprintf (stderr, "V8 ");
982 fprintf (stderr, "V9 ");
986 dump_target_flags (const char *prefix, const int flags)
988 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
989 dump_target_flag_bits (flags);
990 fprintf(stderr, "]\n");
993 /* Validate and override various options, and do some machine dependent
997 sparc_option_override (void)
999 static struct code_model {
1000 const char *const name;
1001 const enum cmodel value;
1002 } const cmodels[] = {
1004 { "medlow", CM_MEDLOW },
1005 { "medmid", CM_MEDMID },
1006 { "medany", CM_MEDANY },
1007 { "embmedany", CM_EMBMEDANY },
1008 { NULL, (enum cmodel) 0 }
1010 const struct code_model *cmodel;
1011 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1012 static struct cpu_default {
1014 const enum processor_type processor;
1015 } const cpu_default[] = {
1016 /* There must be one entry here for each TARGET_CPU value. */
1017 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1018 { TARGET_CPU_v8, PROCESSOR_V8 },
1019 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1020 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1021 { TARGET_CPU_leon, PROCESSOR_LEON },
1022 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1023 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1024 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1025 { TARGET_CPU_v9, PROCESSOR_V9 },
1026 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1027 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1028 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1029 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1030 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1031 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1032 { -1, PROCESSOR_V7 }
1034 const struct cpu_default *def;
1035 /* Table of values for -m{cpu,tune}=. This must match the order of
1036 the PROCESSOR_* enumeration. */
1037 static struct cpu_table {
1038 const char *const name;
1041 } const cpu_table[] = {
1042 { "v7", MASK_ISA, 0 },
1043 { "cypress", MASK_ISA, 0 },
1044 { "v8", MASK_ISA, MASK_V8 },
1045 /* TI TMS390Z55 supersparc */
1046 { "supersparc", MASK_ISA, MASK_V8 },
1047 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1049 { "leon", MASK_ISA, MASK_V8|MASK_FPU },
1050 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1051 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1052 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1053 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1054 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1055 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1056 { "sparclet", MASK_ISA, MASK_SPARCLET },
1057 /* TEMIC sparclet */
1058 { "tsc701", MASK_ISA, MASK_SPARCLET },
1059 { "v9", MASK_ISA, MASK_V9 },
1060 /* UltraSPARC I, II, IIi */
1061 { "ultrasparc", MASK_ISA,
1062 /* Although insns using %y are deprecated, it is a clear win. */
1063 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1064 /* UltraSPARC III */
1065 /* ??? Check if %y issue still holds true. */
1066 { "ultrasparc3", MASK_ISA,
1067 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1069 { "niagara", MASK_ISA,
1070 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1072 { "niagara2", MASK_ISA,
1073 MASK_V9|MASK_POPC|MASK_VIS2 },
1075 { "niagara3", MASK_ISA,
1076 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1078 { "niagara4", MASK_ISA,
1079 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1081 const struct cpu_table *cpu;
1085 if (sparc_debug_string != NULL)
1090 p = ASTRDUP (sparc_debug_string);
1091 while ((q = strtok (p, ",")) != NULL)
1105 if (! strcmp (q, "all"))
1106 mask = MASK_DEBUG_ALL;
1107 else if (! strcmp (q, "options"))
1108 mask = MASK_DEBUG_OPTIONS;
1110 error ("unknown -mdebug-%s switch", q);
1113 sparc_debug &= ~mask;
1115 sparc_debug |= mask;
1119 if (TARGET_DEBUG_OPTIONS)
1121 dump_target_flags("Initial target_flags", target_flags);
1122 dump_target_flags("target_flags_explicit", target_flags_explicit);
1125 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1126 SUBTARGET_OVERRIDE_OPTIONS;
1129 #ifndef SPARC_BI_ARCH
1130 /* Check for unsupported architecture size. */
1131 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1132 error ("%s is not supported by this configuration",
1133 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1136 /* We force all 64bit archs to use 128 bit long double */
1137 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1139 error ("-mlong-double-64 not allowed with -m64");
1140 target_flags |= MASK_LONG_DOUBLE_128;
1143 /* Code model selection. */
1144 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1146 #ifdef SPARC_BI_ARCH
1148 sparc_cmodel = CM_32;
1151 if (sparc_cmodel_string != NULL)
1155 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1156 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1158 if (cmodel->name == NULL)
1159 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1161 sparc_cmodel = cmodel->value;
1164 error ("-mcmodel= is not supported on 32 bit systems");
1167 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1168 for (i = 8; i < 16; i++)
1169 if (!call_used_regs [i])
1171 error ("-fcall-saved-REG is not supported for out registers");
1172 call_used_regs [i] = 1;
1175 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1177 /* Set the default CPU. */
1178 if (!global_options_set.x_sparc_cpu_and_features)
1180 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1181 if (def->cpu == TARGET_CPU_DEFAULT)
1183 gcc_assert (def->cpu != -1);
1184 sparc_cpu_and_features = def->processor;
1187 if (!global_options_set.x_sparc_cpu)
1188 sparc_cpu = sparc_cpu_and_features;
1190 cpu = &cpu_table[(int) sparc_cpu_and_features];
1192 if (TARGET_DEBUG_OPTIONS)
1194 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1195 fprintf (stderr, "sparc_cpu: %s\n",
1196 cpu_table[(int) sparc_cpu].name);
1197 dump_target_flags ("cpu->disable", cpu->disable);
1198 dump_target_flags ("cpu->enable", cpu->enable);
1201 target_flags &= ~cpu->disable;
1202 target_flags |= (cpu->enable
1203 #ifndef HAVE_AS_FMAF_HPC_VIS3
1204 & ~(MASK_FMAF | MASK_VIS3)
1206 #ifndef HAVE_AS_SPARC4
1211 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1212 the processor default. */
1213 if (target_flags_explicit & MASK_FPU)
1214 target_flags = (target_flags & ~MASK_FPU) | fpu;
1216 /* -mvis2 implies -mvis */
1218 target_flags |= MASK_VIS;
1220 /* -mvis3 implies -mvis2 and -mvis */
1222 target_flags |= MASK_VIS2 | MASK_VIS;
1224 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1227 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1229 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1231 -m64 also implies v9. */
1232 if (TARGET_VIS || TARGET_ARCH64)
1234 target_flags |= MASK_V9;
1235 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1238 /* -mvis also implies -mv8plus on 32-bit */
1239 if (TARGET_VIS && ! TARGET_ARCH64)
1240 target_flags |= MASK_V8PLUS;
1242 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1243 if (TARGET_V9 && TARGET_ARCH32)
1244 target_flags |= MASK_DEPRECATED_V8_INSNS;
1246 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1247 if (! TARGET_V9 || TARGET_ARCH64)
1248 target_flags &= ~MASK_V8PLUS;
1250 /* Don't use stack biasing in 32 bit mode. */
1252 target_flags &= ~MASK_STACK_BIAS;
1254 /* Supply a default value for align_functions. */
1255 if (align_functions == 0
1256 && (sparc_cpu == PROCESSOR_ULTRASPARC
1257 || sparc_cpu == PROCESSOR_ULTRASPARC3
1258 || sparc_cpu == PROCESSOR_NIAGARA
1259 || sparc_cpu == PROCESSOR_NIAGARA2
1260 || sparc_cpu == PROCESSOR_NIAGARA3
1261 || sparc_cpu == PROCESSOR_NIAGARA4))
1262 align_functions = 32;
1264 /* Validate PCC_STRUCT_RETURN. */
1265 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1266 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1268 /* Only use .uaxword when compiling for a 64-bit target. */
1270 targetm.asm_out.unaligned_op.di = NULL;
1272 /* Do various machine dependent initializations. */
1273 sparc_init_modes ();
1275 /* Set up function hooks. */
1276 init_machine_status = sparc_init_machine_status;
1281 case PROCESSOR_CYPRESS:
1282 sparc_costs = &cypress_costs;
1285 case PROCESSOR_SPARCLITE:
1286 case PROCESSOR_SUPERSPARC:
1287 sparc_costs = &supersparc_costs;
1289 case PROCESSOR_F930:
1290 case PROCESSOR_F934:
1291 case PROCESSOR_HYPERSPARC:
1292 case PROCESSOR_SPARCLITE86X:
1293 sparc_costs = &hypersparc_costs;
1295 case PROCESSOR_LEON:
1296 sparc_costs = &leon_costs;
1298 case PROCESSOR_SPARCLET:
1299 case PROCESSOR_TSC701:
1300 sparc_costs = &sparclet_costs;
1303 case PROCESSOR_ULTRASPARC:
1304 sparc_costs = &ultrasparc_costs;
1306 case PROCESSOR_ULTRASPARC3:
1307 sparc_costs = &ultrasparc3_costs;
1309 case PROCESSOR_NIAGARA:
1310 sparc_costs = &niagara_costs;
1312 case PROCESSOR_NIAGARA2:
1313 sparc_costs = &niagara2_costs;
1315 case PROCESSOR_NIAGARA3:
1316 sparc_costs = &niagara3_costs;
1318 case PROCESSOR_NIAGARA4:
1319 sparc_costs = &niagara4_costs;
1321 case PROCESSOR_NATIVE:
1325 if (sparc_memory_model == SMM_DEFAULT)
1327 /* Choose the memory model for the operating system. */
1328 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1329 if (os_default != SMM_DEFAULT)
1330 sparc_memory_model = os_default;
1331 /* Choose the most relaxed model for the processor. */
1333 sparc_memory_model = SMM_RMO;
1335 sparc_memory_model = SMM_PSO;
1337 sparc_memory_model = SMM_SC;
1340 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1341 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1342 target_flags |= MASK_LONG_DOUBLE_128;
1345 if (TARGET_DEBUG_OPTIONS)
1346 dump_target_flags ("Final target_flags", target_flags);
1348 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1349 ((sparc_cpu == PROCESSOR_ULTRASPARC
1350 || sparc_cpu == PROCESSOR_NIAGARA
1351 || sparc_cpu == PROCESSOR_NIAGARA2
1352 || sparc_cpu == PROCESSOR_NIAGARA3
1353 || sparc_cpu == PROCESSOR_NIAGARA4)
1355 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1357 global_options.x_param_values,
1358 global_options_set.x_param_values);
1359 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1360 ((sparc_cpu == PROCESSOR_ULTRASPARC
1361 || sparc_cpu == PROCESSOR_ULTRASPARC3
1362 || sparc_cpu == PROCESSOR_NIAGARA
1363 || sparc_cpu == PROCESSOR_NIAGARA2
1364 || sparc_cpu == PROCESSOR_NIAGARA3
1365 || sparc_cpu == PROCESSOR_NIAGARA4)
1367 global_options.x_param_values,
1368 global_options_set.x_param_values);
1370 /* Disable save slot sharing for call-clobbered registers by default.
1371 The IRA sharing algorithm works on single registers only and this
1372 pessimizes for double floating-point registers. */
1373 if (!global_options_set.x_flag_ira_share_save_slots)
1374 flag_ira_share_save_slots = 0;
1376 /* We register a machine specific pass to work around errata, if any.
1377 The pass mut be scheduled as late as possible so that we have the
1378 (essentially) final form of the insn stream to work on.
1379 Registering the pass must be done at start up. It's convenient to
1381 register_pass (&insert_pass_work_around_errata);
1384 /* Miscellaneous utilities. */
1386 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1387 or branch on register contents instructions. */
1390 v9_regcmp_p (enum rtx_code code)
1392 return (code == EQ || code == NE || code == GE || code == LT
1393 || code == LE || code == GT);
1396 /* Nonzero if OP is a floating point constant which can
1397 be loaded into an integer register using a single
1398 sethi instruction. */
1403 if (GET_CODE (op) == CONST_DOUBLE)
1408 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1409 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1410 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1416 /* Nonzero if OP is a floating point constant which can
1417 be loaded into an integer register using a single
1423 if (GET_CODE (op) == CONST_DOUBLE)
1428 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1429 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1430 return SPARC_SIMM13_P (i);
1436 /* Nonzero if OP is a floating point constant which can
1437 be loaded into an integer register using a high/losum
1438 instruction sequence. */
1441 fp_high_losum_p (rtx op)
1443 /* The constraints calling this should only be in
1444 SFmode move insns, so any constant which cannot
1445 be moved using a single insn will do. */
1446 if (GET_CODE (op) == CONST_DOUBLE)
1451 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1452 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1453 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1459 /* Return true if the address of LABEL can be loaded by means of the
1460 mov{si,di}_pic_label_ref patterns in PIC mode. */
1463 can_use_mov_pic_label_ref (rtx label)
1465 /* VxWorks does not impose a fixed gap between segments; the run-time
1466 gap can be different from the object-file gap. We therefore can't
1467 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1468 are absolutely sure that X is in the same segment as the GOT.
1469 Unfortunately, the flexibility of linker scripts means that we
1470 can't be sure of that in general, so assume that GOT-relative
1471 accesses are never valid on VxWorks. */
1472 if (TARGET_VXWORKS_RTP)
1475 /* Similarly, if the label is non-local, it might end up being placed
1476 in a different section than the current one; now mov_pic_label_ref
1477 requires the label and the code to be in the same section. */
1478 if (LABEL_REF_NONLOCAL_P (label))
1481 /* Finally, if we are reordering basic blocks and partition into hot
1482 and cold sections, this might happen for any label. */
1483 if (flag_reorder_blocks_and_partition)
1489 /* Expand a move instruction. Return true if all work is done. */
1492 sparc_expand_move (enum machine_mode mode, rtx *operands)
1494 /* Handle sets of MEM first. */
1495 if (GET_CODE (operands[0]) == MEM)
1497 /* 0 is a register (or a pair of registers) on SPARC. */
1498 if (register_or_zero_operand (operands[1], mode))
1501 if (!reload_in_progress)
1503 operands[0] = validize_mem (operands[0]);
1504 operands[1] = force_reg (mode, operands[1]);
1508 /* Fixup TLS cases. */
1510 && CONSTANT_P (operands[1])
1511 && sparc_tls_referenced_p (operands [1]))
1513 operands[1] = sparc_legitimize_tls_address (operands[1]);
1517 /* Fixup PIC cases. */
1518 if (flag_pic && CONSTANT_P (operands[1]))
1520 if (pic_address_needs_scratch (operands[1]))
1521 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1523 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1524 if (GET_CODE (operands[1]) == LABEL_REF
1525 && can_use_mov_pic_label_ref (operands[1]))
1529 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1535 gcc_assert (TARGET_ARCH64);
1536 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1541 if (symbolic_operand (operands[1], mode))
1544 = sparc_legitimize_pic_address (operands[1],
1546 ? operands[0] : NULL_RTX);
1551 /* If we are trying to toss an integer constant into FP registers,
1552 or loading a FP or vector constant, force it into memory. */
1553 if (CONSTANT_P (operands[1])
1554 && REG_P (operands[0])
1555 && (SPARC_FP_REG_P (REGNO (operands[0]))
1556 || SCALAR_FLOAT_MODE_P (mode)
1557 || VECTOR_MODE_P (mode)))
1559 /* emit_group_store will send such bogosity to us when it is
1560 not storing directly into memory. So fix this up to avoid
1561 crashes in output_constant_pool. */
1562 if (operands [1] == const0_rtx)
1563 operands[1] = CONST0_RTX (mode);
1565 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1566 always other regs. */
1567 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1568 && (const_zero_operand (operands[1], mode)
1569 || const_all_ones_operand (operands[1], mode)))
1572 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1573 /* We are able to build any SF constant in integer registers
1574 with at most 2 instructions. */
1576 /* And any DF constant in integer registers. */
1578 && ! can_create_pseudo_p ())))
1581 operands[1] = force_const_mem (mode, operands[1]);
1582 if (!reload_in_progress)
1583 operands[1] = validize_mem (operands[1]);
1587 /* Accept non-constants and valid constants unmodified. */
1588 if (!CONSTANT_P (operands[1])
1589 || GET_CODE (operands[1]) == HIGH
1590 || input_operand (operands[1], mode))
1596 /* All QImode constants require only one insn, so proceed. */
1601 sparc_emit_set_const32 (operands[0], operands[1]);
1605 /* input_operand should have filtered out 32-bit mode. */
1606 sparc_emit_set_const64 (operands[0], operands[1]);
1612 /* TImode isn't available in 32-bit mode. */
1613 split_double (operands[1], &high, &low);
1614 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1616 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1628 /* Load OP1, a 32-bit constant, into OP0, a register.
1629 We know it can't be done in one insn when we get
1630 here, the move expander guarantees this. */
1633 sparc_emit_set_const32 (rtx op0, rtx op1)
1635 enum machine_mode mode = GET_MODE (op0);
1638 if (can_create_pseudo_p ())
1639 temp = gen_reg_rtx (mode);
1641 if (GET_CODE (op1) == CONST_INT)
1643 gcc_assert (!small_int_operand (op1, mode)
1644 && !const_high_operand (op1, mode));
1646 /* Emit them as real moves instead of a HIGH/LO_SUM,
1647 this way CSE can see everything and reuse intermediate
1648 values if it wants. */
1649 emit_insn (gen_rtx_SET (VOIDmode, temp,
1650 GEN_INT (INTVAL (op1)
1651 & ~(HOST_WIDE_INT)0x3ff)));
1653 emit_insn (gen_rtx_SET (VOIDmode,
1655 gen_rtx_IOR (mode, temp,
1656 GEN_INT (INTVAL (op1) & 0x3ff))));
1660 /* A symbol, emit in the traditional way. */
1661 emit_insn (gen_rtx_SET (VOIDmode, temp,
1662 gen_rtx_HIGH (mode, op1)));
1663 emit_insn (gen_rtx_SET (VOIDmode,
1664 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1668 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1669 If TEMP is nonzero, we are forbidden to use any other scratch
1670 registers. Otherwise, we are allowed to generate them as needed.
1672 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1673 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1676 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1678 rtx temp1, temp2, temp3, temp4, temp5;
1681 if (temp && GET_MODE (temp) == TImode)
1684 temp = gen_rtx_REG (DImode, REGNO (temp));
1687 /* SPARC-V9 code-model support. */
1688 switch (sparc_cmodel)
1691 /* The range spanned by all instructions in the object is less
1692 than 2^31 bytes (2GB) and the distance from any instruction
1693 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1694 than 2^31 bytes (2GB).
1696 The executable must be in the low 4TB of the virtual address
1699 sethi %hi(symbol), %temp1
1700 or %temp1, %lo(symbol), %reg */
1702 temp1 = temp; /* op0 is allowed. */
1704 temp1 = gen_reg_rtx (DImode);
1706 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1707 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1711 /* The range spanned by all instructions in the object is less
1712 than 2^31 bytes (2GB) and the distance from any instruction
1713 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1714 than 2^31 bytes (2GB).
1716 The executable must be in the low 16TB of the virtual address
1719 sethi %h44(symbol), %temp1
1720 or %temp1, %m44(symbol), %temp2
1721 sllx %temp2, 12, %temp3
1722 or %temp3, %l44(symbol), %reg */
1727 temp3 = temp; /* op0 is allowed. */
1731 temp1 = gen_reg_rtx (DImode);
1732 temp2 = gen_reg_rtx (DImode);
1733 temp3 = gen_reg_rtx (DImode);
1736 emit_insn (gen_seth44 (temp1, op1));
1737 emit_insn (gen_setm44 (temp2, temp1, op1));
1738 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1739 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1740 emit_insn (gen_setl44 (op0, temp3, op1));
1744 /* The range spanned by all instructions in the object is less
1745 than 2^31 bytes (2GB) and the distance from any instruction
1746 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1747 than 2^31 bytes (2GB).
1749 The executable can be placed anywhere in the virtual address
1752 sethi %hh(symbol), %temp1
1753 sethi %lm(symbol), %temp2
1754 or %temp1, %hm(symbol), %temp3
1755 sllx %temp3, 32, %temp4
1756 or %temp4, %temp2, %temp5
1757 or %temp5, %lo(symbol), %reg */
1760 /* It is possible that one of the registers we got for operands[2]
1761 might coincide with that of operands[0] (which is why we made
1762 it TImode). Pick the other one to use as our scratch. */
1763 if (rtx_equal_p (temp, op0))
1765 gcc_assert (ti_temp);
1766 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1769 temp2 = temp; /* op0 is _not_ allowed, see above. */
1776 temp1 = gen_reg_rtx (DImode);
1777 temp2 = gen_reg_rtx (DImode);
1778 temp3 = gen_reg_rtx (DImode);
1779 temp4 = gen_reg_rtx (DImode);
1780 temp5 = gen_reg_rtx (DImode);
1783 emit_insn (gen_sethh (temp1, op1));
1784 emit_insn (gen_setlm (temp2, op1));
1785 emit_insn (gen_sethm (temp3, temp1, op1));
1786 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1787 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1788 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1789 gen_rtx_PLUS (DImode, temp4, temp2)));
1790 emit_insn (gen_setlo (op0, temp5, op1));
1794 /* Old old old backwards compatibility kruft here.
1795 Essentially it is MEDLOW with a fixed 64-bit
1796 virtual base added to all data segment addresses.
1797 Text-segment stuff is computed like MEDANY, we can't
1798 reuse the code above because the relocation knobs
1801 Data segment: sethi %hi(symbol), %temp1
1802 add %temp1, EMBMEDANY_BASE_REG, %temp2
1803 or %temp2, %lo(symbol), %reg */
1804 if (data_segment_operand (op1, GET_MODE (op1)))
1808 temp1 = temp; /* op0 is allowed. */
1813 temp1 = gen_reg_rtx (DImode);
1814 temp2 = gen_reg_rtx (DImode);
1817 emit_insn (gen_embmedany_sethi (temp1, op1));
1818 emit_insn (gen_embmedany_brsum (temp2, temp1));
1819 emit_insn (gen_embmedany_losum (op0, temp2, op1));
1822 /* Text segment: sethi %uhi(symbol), %temp1
1823 sethi %hi(symbol), %temp2
1824 or %temp1, %ulo(symbol), %temp3
1825 sllx %temp3, 32, %temp4
1826 or %temp4, %temp2, %temp5
1827 or %temp5, %lo(symbol), %reg */
1832 /* It is possible that one of the registers we got for operands[2]
1833 might coincide with that of operands[0] (which is why we made
1834 it TImode). Pick the other one to use as our scratch. */
1835 if (rtx_equal_p (temp, op0))
1837 gcc_assert (ti_temp);
1838 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1841 temp2 = temp; /* op0 is _not_ allowed, see above. */
1848 temp1 = gen_reg_rtx (DImode);
1849 temp2 = gen_reg_rtx (DImode);
1850 temp3 = gen_reg_rtx (DImode);
1851 temp4 = gen_reg_rtx (DImode);
1852 temp5 = gen_reg_rtx (DImode);
1855 emit_insn (gen_embmedany_textuhi (temp1, op1));
1856 emit_insn (gen_embmedany_texthi (temp2, op1));
1857 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
1858 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1859 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1860 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1861 gen_rtx_PLUS (DImode, temp4, temp2)));
1862 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
1871 #if HOST_BITS_PER_WIDE_INT == 32
1873 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
1878 /* These avoid problems when cross compiling. If we do not
1879 go through all this hair then the optimizer will see
1880 invalid REG_EQUAL notes or in some cases none at all. */
1881 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
1882 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
1883 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
1884 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
1886 /* The optimizer is not to assume anything about exactly
1887 which bits are set for a HIGH, they are unspecified.
1888 Unfortunately this leads to many missed optimizations
1889 during CSE. We mask out the non-HIGH bits, and matches
1890 a plain movdi, to alleviate this problem. */
1892 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
1894 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
1898 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
1900 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
1904 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
1906 return gen_rtx_IOR (DImode, src, GEN_INT (val));
1910 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
1912 return gen_rtx_XOR (DImode, src, GEN_INT (val));
1915 /* Worker routines for 64-bit constant formation on arch64.
1916 One of the key things to be doing in these emissions is
1917 to create as many temp REGs as possible. This makes it
1918 possible for half-built constants to be used later when
1919 such values are similar to something required later on.
1920 Without doing this, the optimizer cannot see such
1923 static void sparc_emit_set_const64_quick1 (rtx, rtx,
1924 unsigned HOST_WIDE_INT, int);
1927 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
1928 unsigned HOST_WIDE_INT low_bits, int is_neg)
1930 unsigned HOST_WIDE_INT high_bits;
1933 high_bits = (~low_bits) & 0xffffffff;
1935 high_bits = low_bits;
1937 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1940 emit_insn (gen_rtx_SET (VOIDmode, op0,
1941 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1945 /* If we are XOR'ing with -1, then we should emit a one's complement
1946 instead. This way the combiner will notice logical operations
1947 such as ANDN later on and substitute. */
1948 if ((low_bits & 0x3ff) == 0x3ff)
1950 emit_insn (gen_rtx_SET (VOIDmode, op0,
1951 gen_rtx_NOT (DImode, temp)));
1955 emit_insn (gen_rtx_SET (VOIDmode, op0,
1956 gen_safe_XOR64 (temp,
1957 (-(HOST_WIDE_INT)0x400
1958 | (low_bits & 0x3ff)))));
1963 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
1964 unsigned HOST_WIDE_INT, int);
1967 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
1968 unsigned HOST_WIDE_INT high_bits,
1969 unsigned HOST_WIDE_INT low_immediate,
1974 if ((high_bits & 0xfffffc00) != 0)
1976 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1977 if ((high_bits & ~0xfffffc00) != 0)
1978 emit_insn (gen_rtx_SET (VOIDmode, op0,
1979 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1985 emit_insn (gen_safe_SET64 (temp, high_bits));
1989 /* Now shift it up into place. */
1990 emit_insn (gen_rtx_SET (VOIDmode, op0,
1991 gen_rtx_ASHIFT (DImode, temp2,
1992 GEN_INT (shift_count))));
1994 /* If there is a low immediate part piece, finish up by
1995 putting that in as well. */
1996 if (low_immediate != 0)
1997 emit_insn (gen_rtx_SET (VOIDmode, op0,
1998 gen_safe_OR64 (op0, low_immediate)));
2001 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2002 unsigned HOST_WIDE_INT);
2004 /* Full 64-bit constant decomposition. Even though this is the
2005 'worst' case, we still optimize a few things away. */
2007 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2008 unsigned HOST_WIDE_INT high_bits,
2009 unsigned HOST_WIDE_INT low_bits)
2013 if (can_create_pseudo_p ())
2014 sub_temp = gen_reg_rtx (DImode);
2016 if ((high_bits & 0xfffffc00) != 0)
2018 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2019 if ((high_bits & ~0xfffffc00) != 0)
2020 emit_insn (gen_rtx_SET (VOIDmode,
2022 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2028 emit_insn (gen_safe_SET64 (temp, high_bits));
2032 if (can_create_pseudo_p ())
2034 rtx temp2 = gen_reg_rtx (DImode);
2035 rtx temp3 = gen_reg_rtx (DImode);
2036 rtx temp4 = gen_reg_rtx (DImode);
2038 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2039 gen_rtx_ASHIFT (DImode, sub_temp,
2042 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2043 if ((low_bits & ~0xfffffc00) != 0)
2045 emit_insn (gen_rtx_SET (VOIDmode, temp3,
2046 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2047 emit_insn (gen_rtx_SET (VOIDmode, op0,
2048 gen_rtx_PLUS (DImode, temp4, temp3)));
2052 emit_insn (gen_rtx_SET (VOIDmode, op0,
2053 gen_rtx_PLUS (DImode, temp4, temp2)));
2058 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2059 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2060 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2063 /* We are in the middle of reload, so this is really
2064 painful. However we do still make an attempt to
2065 avoid emitting truly stupid code. */
2066 if (low1 != const0_rtx)
2068 emit_insn (gen_rtx_SET (VOIDmode, op0,
2069 gen_rtx_ASHIFT (DImode, sub_temp,
2070 GEN_INT (to_shift))));
2071 emit_insn (gen_rtx_SET (VOIDmode, op0,
2072 gen_rtx_IOR (DImode, op0, low1)));
2080 if (low2 != const0_rtx)
2082 emit_insn (gen_rtx_SET (VOIDmode, op0,
2083 gen_rtx_ASHIFT (DImode, sub_temp,
2084 GEN_INT (to_shift))));
2085 emit_insn (gen_rtx_SET (VOIDmode, op0,
2086 gen_rtx_IOR (DImode, op0, low2)));
2094 emit_insn (gen_rtx_SET (VOIDmode, op0,
2095 gen_rtx_ASHIFT (DImode, sub_temp,
2096 GEN_INT (to_shift))));
2097 if (low3 != const0_rtx)
2098 emit_insn (gen_rtx_SET (VOIDmode, op0,
2099 gen_rtx_IOR (DImode, op0, low3)));
2104 /* Analyze a 64-bit constant for certain properties. */
2105 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2106 unsigned HOST_WIDE_INT,
2107 int *, int *, int *);
2110 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2111 unsigned HOST_WIDE_INT low_bits,
2112 int *hbsp, int *lbsp, int *abbasp)
2114 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2117 lowest_bit_set = highest_bit_set = -1;
2121 if ((lowest_bit_set == -1)
2122 && ((low_bits >> i) & 1))
2124 if ((highest_bit_set == -1)
2125 && ((high_bits >> (32 - i - 1)) & 1))
2126 highest_bit_set = (64 - i - 1);
2129 && ((highest_bit_set == -1)
2130 || (lowest_bit_set == -1)));
2136 if ((lowest_bit_set == -1)
2137 && ((high_bits >> i) & 1))
2138 lowest_bit_set = i + 32;
2139 if ((highest_bit_set == -1)
2140 && ((low_bits >> (32 - i - 1)) & 1))
2141 highest_bit_set = 32 - i - 1;
2144 && ((highest_bit_set == -1)
2145 || (lowest_bit_set == -1)));
2147 /* If there are no bits set this should have gone out
2148 as one instruction! */
2149 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2150 all_bits_between_are_set = 1;
2151 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2155 if ((low_bits & (1 << i)) != 0)
2160 if ((high_bits & (1 << (i - 32))) != 0)
2163 all_bits_between_are_set = 0;
2166 *hbsp = highest_bit_set;
2167 *lbsp = lowest_bit_set;
2168 *abbasp = all_bits_between_are_set;
2171 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2174 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2175 unsigned HOST_WIDE_INT low_bits)
2177 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2180 || high_bits == 0xffffffff)
2183 analyze_64bit_constant (high_bits, low_bits,
2184 &highest_bit_set, &lowest_bit_set,
2185 &all_bits_between_are_set);
2187 if ((highest_bit_set == 63
2188 || lowest_bit_set == 0)
2189 && all_bits_between_are_set != 0)
2192 if ((highest_bit_set - lowest_bit_set) < 21)
2198 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2199 unsigned HOST_WIDE_INT,
2202 static unsigned HOST_WIDE_INT
2203 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2204 unsigned HOST_WIDE_INT low_bits,
2205 int lowest_bit_set, int shift)
2207 HOST_WIDE_INT hi, lo;
2209 if (lowest_bit_set < 32)
2211 lo = (low_bits >> lowest_bit_set) << shift;
2212 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2217 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2219 gcc_assert (! (hi & lo));
2223 /* Here we are sure to be arch64 and this is an integer constant
2224 being loaded into a register. Emit the most efficient
2225 insn sequence possible. Detection of all the 1-insn cases
2226 has been done already. */
2228 sparc_emit_set_const64 (rtx op0, rtx op1)
2230 unsigned HOST_WIDE_INT high_bits, low_bits;
2231 int lowest_bit_set, highest_bit_set;
2232 int all_bits_between_are_set;
2235 /* Sanity check that we know what we are working with. */
2236 gcc_assert (TARGET_ARCH64
2237 && (GET_CODE (op0) == SUBREG
2238 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2240 if (! can_create_pseudo_p ())
2243 if (GET_CODE (op1) != CONST_INT)
2245 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2250 temp = gen_reg_rtx (DImode);
2252 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2253 low_bits = (INTVAL (op1) & 0xffffffff);
2255 /* low_bits bits 0 --> 31
2256 high_bits bits 32 --> 63 */
2258 analyze_64bit_constant (high_bits, low_bits,
2259 &highest_bit_set, &lowest_bit_set,
2260 &all_bits_between_are_set);
2262 /* First try for a 2-insn sequence. */
2264 /* These situations are preferred because the optimizer can
2265 * do more things with them:
2267 * sllx %reg, shift, %reg
2269 * srlx %reg, shift, %reg
2270 * 3) mov some_small_const, %reg
2271 * sllx %reg, shift, %reg
2273 if (((highest_bit_set == 63
2274 || lowest_bit_set == 0)
2275 && all_bits_between_are_set != 0)
2276 || ((highest_bit_set - lowest_bit_set) < 12))
2278 HOST_WIDE_INT the_const = -1;
2279 int shift = lowest_bit_set;
2281 if ((highest_bit_set != 63
2282 && lowest_bit_set != 0)
2283 || all_bits_between_are_set == 0)
2286 create_simple_focus_bits (high_bits, low_bits,
2289 else if (lowest_bit_set == 0)
2290 shift = -(63 - highest_bit_set);
2292 gcc_assert (SPARC_SIMM13_P (the_const));
2293 gcc_assert (shift != 0);
2295 emit_insn (gen_safe_SET64 (temp, the_const));
2297 emit_insn (gen_rtx_SET (VOIDmode,
2299 gen_rtx_ASHIFT (DImode,
2303 emit_insn (gen_rtx_SET (VOIDmode,
2305 gen_rtx_LSHIFTRT (DImode,
2307 GEN_INT (-shift))));
2311 /* Now a range of 22 or less bits set somewhere.
2312 * 1) sethi %hi(focus_bits), %reg
2313 * sllx %reg, shift, %reg
2314 * 2) sethi %hi(focus_bits), %reg
2315 * srlx %reg, shift, %reg
2317 if ((highest_bit_set - lowest_bit_set) < 21)
2319 unsigned HOST_WIDE_INT focus_bits =
2320 create_simple_focus_bits (high_bits, low_bits,
2321 lowest_bit_set, 10);
2323 gcc_assert (SPARC_SETHI_P (focus_bits));
2324 gcc_assert (lowest_bit_set != 10);
2326 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2328 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2329 if (lowest_bit_set < 10)
2330 emit_insn (gen_rtx_SET (VOIDmode,
2332 gen_rtx_LSHIFTRT (DImode, temp,
2333 GEN_INT (10 - lowest_bit_set))));
2334 else if (lowest_bit_set > 10)
2335 emit_insn (gen_rtx_SET (VOIDmode,
2337 gen_rtx_ASHIFT (DImode, temp,
2338 GEN_INT (lowest_bit_set - 10))));
2342 /* 1) sethi %hi(low_bits), %reg
2343 * or %reg, %lo(low_bits), %reg
2344 * 2) sethi %hi(~low_bits), %reg
2345 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2348 || high_bits == 0xffffffff)
2350 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2351 (high_bits == 0xffffffff));
2355 /* Now, try 3-insn sequences. */
2357 /* 1) sethi %hi(high_bits), %reg
2358 * or %reg, %lo(high_bits), %reg
2359 * sllx %reg, 32, %reg
2363 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2367 /* We may be able to do something quick
2368 when the constant is negated, so try that. */
2369 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2370 (~low_bits) & 0xfffffc00))
2372 /* NOTE: The trailing bits get XOR'd so we need the
2373 non-negated bits, not the negated ones. */
2374 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2376 if ((((~high_bits) & 0xffffffff) == 0
2377 && ((~low_bits) & 0x80000000) == 0)
2378 || (((~high_bits) & 0xffffffff) == 0xffffffff
2379 && ((~low_bits) & 0x80000000) != 0))
2381 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2383 if ((SPARC_SETHI_P (fast_int)
2384 && (~high_bits & 0xffffffff) == 0)
2385 || SPARC_SIMM13_P (fast_int))
2386 emit_insn (gen_safe_SET64 (temp, fast_int));
2388 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2393 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2394 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2395 sparc_emit_set_const64 (temp, negated_const);
2398 /* If we are XOR'ing with -1, then we should emit a one's complement
2399 instead. This way the combiner will notice logical operations
2400 such as ANDN later on and substitute. */
2401 if (trailing_bits == 0x3ff)
2403 emit_insn (gen_rtx_SET (VOIDmode, op0,
2404 gen_rtx_NOT (DImode, temp)));
2408 emit_insn (gen_rtx_SET (VOIDmode,
2410 gen_safe_XOR64 (temp,
2411 (-0x400 | trailing_bits))));
2416 /* 1) sethi %hi(xxx), %reg
2417 * or %reg, %lo(xxx), %reg
2418 * sllx %reg, yyy, %reg
2420 * ??? This is just a generalized version of the low_bits==0
2421 * thing above, FIXME...
2423 if ((highest_bit_set - lowest_bit_set) < 32)
2425 unsigned HOST_WIDE_INT focus_bits =
2426 create_simple_focus_bits (high_bits, low_bits,
2429 /* We can't get here in this state. */
2430 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2432 /* So what we know is that the set bits straddle the
2433 middle of the 64-bit word. */
2434 sparc_emit_set_const64_quick2 (op0, temp,
2440 /* 1) sethi %hi(high_bits), %reg
2441 * or %reg, %lo(high_bits), %reg
2442 * sllx %reg, 32, %reg
2443 * or %reg, low_bits, %reg
2445 if (SPARC_SIMM13_P(low_bits)
2446 && ((int)low_bits > 0))
2448 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2452 /* The easiest way when all else fails, is full decomposition. */
2453 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2455 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2457 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2458 return the mode to be used for the comparison. For floating-point,
2459 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2460 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2461 processing is needed. */
2464 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2466 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2492 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2493 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2495 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2496 return CCX_NOOVmode;
2502 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2509 /* Emit the compare insn and return the CC reg for a CODE comparison
2510 with operands X and Y. */
2513 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2515 enum machine_mode mode;
2518 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2521 mode = SELECT_CC_MODE (code, x, y);
2523 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2524 fcc regs (cse can't tell they're really call clobbered regs and will
2525 remove a duplicate comparison even if there is an intervening function
2526 call - it will then try to reload the cc reg via an int reg which is why
2527 we need the movcc patterns). It is possible to provide the movcc
2528 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2529 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2530 to tell cse that CCFPE mode registers (even pseudos) are call
2533 /* ??? This is an experiment. Rather than making changes to cse which may
2534 or may not be easy/clean, we do our own cse. This is possible because
2535 we will generate hard registers. Cse knows they're call clobbered (it
2536 doesn't know the same thing about pseudos). If we guess wrong, no big
2537 deal, but if we win, great! */
2539 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2540 #if 1 /* experiment */
2543 /* We cycle through the registers to ensure they're all exercised. */
2544 static int next_fcc_reg = 0;
2545 /* Previous x,y for each fcc reg. */
2546 static rtx prev_args[4][2];
2548 /* Scan prev_args for x,y. */
2549 for (reg = 0; reg < 4; reg++)
2550 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2555 prev_args[reg][0] = x;
2556 prev_args[reg][1] = y;
2557 next_fcc_reg = (next_fcc_reg + 1) & 3;
2559 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2562 cc_reg = gen_reg_rtx (mode);
2563 #endif /* ! experiment */
2564 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2565 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2567 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2569 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2570 will only result in an unrecognizable insn so no point in asserting. */
2571 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2577 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2580 gen_compare_reg (rtx cmp)
2582 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2585 /* This function is used for v9 only.
2586 DEST is the target of the Scc insn.
2587 CODE is the code for an Scc's comparison.
2588 X and Y are the values we compare.
2590 This function is needed to turn
2593 (gt (reg:CCX 100 %icc)
2597 (gt:DI (reg:CCX 100 %icc)
2600 IE: The instruction recognizer needs to see the mode of the comparison to
2601 find the right instruction. We could use "gt:DI" right in the
2602 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2605 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2608 && (GET_MODE (x) == DImode
2609 || GET_MODE (dest) == DImode))
2612 /* Try to use the movrCC insns. */
2614 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2616 && v9_regcmp_p (compare_code))
2621 /* Special case for op0 != 0. This can be done with one instruction if
2624 if (compare_code == NE
2625 && GET_MODE (dest) == DImode
2626 && rtx_equal_p (op0, dest))
2628 emit_insn (gen_rtx_SET (VOIDmode, dest,
2629 gen_rtx_IF_THEN_ELSE (DImode,
2630 gen_rtx_fmt_ee (compare_code, DImode,
2637 if (reg_overlap_mentioned_p (dest, op0))
2639 /* Handle the case where dest == x.
2640 We "early clobber" the result. */
2641 op0 = gen_reg_rtx (GET_MODE (x));
2642 emit_move_insn (op0, x);
2645 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2646 if (GET_MODE (op0) != DImode)
2648 temp = gen_reg_rtx (DImode);
2649 convert_move (temp, op0, 0);
2653 emit_insn (gen_rtx_SET (VOIDmode, dest,
2654 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2655 gen_rtx_fmt_ee (compare_code, DImode,
2663 x = gen_compare_reg_1 (compare_code, x, y);
2666 gcc_assert (GET_MODE (x) != CC_NOOVmode
2667 && GET_MODE (x) != CCX_NOOVmode);
2669 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2670 emit_insn (gen_rtx_SET (VOIDmode, dest,
2671 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2672 gen_rtx_fmt_ee (compare_code,
2673 GET_MODE (x), x, y),
2674 const1_rtx, dest)));
2680 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2681 without jumps using the addx/subx instructions. */
2684 emit_scc_insn (rtx operands[])
2691 /* The quad-word fp compare library routines all return nonzero to indicate
2692 true, which is different from the equivalent libgcc routines, so we must
2693 handle them specially here. */
2694 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2696 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2697 GET_CODE (operands[1]));
2698 operands[2] = XEXP (operands[1], 0);
2699 operands[3] = XEXP (operands[1], 1);
2702 code = GET_CODE (operands[1]);
2706 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2707 more applications). The exception to this is "reg != 0" which can
2708 be done in one instruction on v9 (so we do it). */
2711 if (GET_MODE (x) == SImode)
2715 pat = gen_seqsidi_special (operands[0], x, y);
2717 pat = gen_seqsisi_special (operands[0], x, y);
2721 else if (GET_MODE (x) == DImode)
2723 rtx pat = gen_seqdi_special (operands[0], x, y);
2731 if (GET_MODE (x) == SImode)
2735 pat = gen_snesidi_special (operands[0], x, y);
2737 pat = gen_snesisi_special (operands[0], x, y);
2741 else if (GET_MODE (x) == DImode)
2745 pat = gen_snedi_special_vis3 (operands[0], x, y);
2747 pat = gen_snedi_special (operands[0], x, y);
2755 && GET_MODE (x) == DImode
2757 && (code == GTU || code == LTU))
2758 && gen_v9_scc (operands[0], code, x, y))
2761 /* We can do LTU and GEU using the addx/subx instructions too. And
2762 for GTU/LEU, if both operands are registers swap them and fall
2763 back to the easy case. */
2764 if (code == GTU || code == LEU)
2766 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2767 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2772 code = swap_condition (code);
2777 || (!TARGET_VIS3 && code == GEU))
2779 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2780 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2781 gen_compare_reg_1 (code, x, y),
2786 /* All the posibilities to use addx/subx based sequences has been
2787 exhausted, try for a 3 instruction sequence using v9 conditional
2789 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
2792 /* Nope, do branches. */
2796 /* Emit a conditional jump insn for the v9 architecture using comparison code
2797 CODE and jump target LABEL.
2798 This function exists to take advantage of the v9 brxx insns. */
2801 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2803 emit_jump_insn (gen_rtx_SET (VOIDmode,
2805 gen_rtx_IF_THEN_ELSE (VOIDmode,
2806 gen_rtx_fmt_ee (code, GET_MODE (op0),
2808 gen_rtx_LABEL_REF (VOIDmode, label),
2812 /* Emit a conditional jump insn for the UA2011 architecture using
2813 comparison code CODE and jump target LABEL. This function exists
2814 to take advantage of the UA2011 Compare and Branch insns. */
2817 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
2821 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
2822 gen_rtx_fmt_ee(code, GET_MODE(op0),
2824 gen_rtx_LABEL_REF (VOIDmode, label),
2827 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
2831 emit_conditional_branch_insn (rtx operands[])
2833 /* The quad-word fp compare library routines all return nonzero to indicate
2834 true, which is different from the equivalent libgcc routines, so we must
2835 handle them specially here. */
2836 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
2838 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
2839 GET_CODE (operands[0]));
2840 operands[1] = XEXP (operands[0], 0);
2841 operands[2] = XEXP (operands[0], 1);
2844 /* If we can tell early on that the comparison is against a constant
2845 that won't fit in the 5-bit signed immediate field of a cbcond,
2846 use one of the other v9 conditional branch sequences. */
2848 && GET_CODE (operands[1]) == REG
2849 && (GET_MODE (operands[1]) == SImode
2850 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
2851 && (GET_CODE (operands[2]) != CONST_INT
2852 || SPARC_SIMM5_P (INTVAL (operands[2]))))
2854 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
2858 if (TARGET_ARCH64 && operands[2] == const0_rtx
2859 && GET_CODE (operands[1]) == REG
2860 && GET_MODE (operands[1]) == DImode)
2862 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
2866 operands[1] = gen_compare_reg (operands[0]);
2867 operands[2] = const0_rtx;
2868 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
2869 operands[1], operands[2]);
2870 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
2875 /* Generate a DFmode part of a hard TFmode register.
2876 REG is the TFmode hard register, LOW is 1 for the
2877 low 64bit of the register and 0 otherwise.
2880 gen_df_reg (rtx reg, int low)
2882 int regno = REGNO (reg);
2884 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
2885 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
2886 return gen_rtx_REG (DFmode, regno);
2889 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
2890 Unlike normal calls, TFmode operands are passed by reference. It is
2891 assumed that no more than 3 operands are required. */
2894 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
2896 rtx ret_slot = NULL, arg[3], func_sym;
2899 /* We only expect to be called for conversions, unary, and binary ops. */
2900 gcc_assert (nargs == 2 || nargs == 3);
2902 for (i = 0; i < nargs; ++i)
2904 rtx this_arg = operands[i];
2907 /* TFmode arguments and return values are passed by reference. */
2908 if (GET_MODE (this_arg) == TFmode)
2910 int force_stack_temp;
2912 force_stack_temp = 0;
2913 if (TARGET_BUGGY_QP_LIB && i == 0)
2914 force_stack_temp = 1;
2916 if (GET_CODE (this_arg) == MEM
2917 && ! force_stack_temp)
2919 tree expr = MEM_EXPR (this_arg);
2921 mark_addressable (expr);
2922 this_arg = XEXP (this_arg, 0);
2924 else if (CONSTANT_P (this_arg)
2925 && ! force_stack_temp)
2927 this_slot = force_const_mem (TFmode, this_arg);
2928 this_arg = XEXP (this_slot, 0);
2932 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
2934 /* Operand 0 is the return value. We'll copy it out later. */
2936 emit_move_insn (this_slot, this_arg);
2938 ret_slot = this_slot;
2940 this_arg = XEXP (this_slot, 0);
2947 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
2949 if (GET_MODE (operands[0]) == TFmode)
2952 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
2953 arg[0], GET_MODE (arg[0]),
2954 arg[1], GET_MODE (arg[1]));
2956 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
2957 arg[0], GET_MODE (arg[0]),
2958 arg[1], GET_MODE (arg[1]),
2959 arg[2], GET_MODE (arg[2]));
2962 emit_move_insn (operands[0], ret_slot);
2968 gcc_assert (nargs == 2);
2970 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
2971 GET_MODE (operands[0]), 1,
2972 arg[1], GET_MODE (arg[1]));
2974 if (ret != operands[0])
2975 emit_move_insn (operands[0], ret);
2979 /* Expand soft-float TFmode calls to sparc abi routines. */
2982 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3004 emit_soft_tfmode_libcall (func, 3, operands);
3008 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3012 gcc_assert (code == SQRT);
3015 emit_soft_tfmode_libcall (func, 2, operands);
3019 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3026 switch (GET_MODE (operands[1]))
3039 case FLOAT_TRUNCATE:
3040 switch (GET_MODE (operands[0]))
3054 switch (GET_MODE (operands[1]))
3059 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3069 case UNSIGNED_FLOAT:
3070 switch (GET_MODE (operands[1]))
3075 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3086 switch (GET_MODE (operands[0]))
3100 switch (GET_MODE (operands[0]))
3117 emit_soft_tfmode_libcall (func, 2, operands);
3120 /* Expand a hard-float tfmode operation. All arguments must be in
3124 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3128 if (GET_RTX_CLASS (code) == RTX_UNARY)
3130 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3131 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3135 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3136 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3137 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3138 operands[1], operands[2]);
3141 if (register_operand (operands[0], VOIDmode))
3144 dest = gen_reg_rtx (GET_MODE (operands[0]));
3146 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
3148 if (dest != operands[0])
3149 emit_move_insn (operands[0], dest);
3153 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3155 if (TARGET_HARD_QUAD)
3156 emit_hard_tfmode_operation (code, operands);
3158 emit_soft_tfmode_binop (code, operands);
3162 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3164 if (TARGET_HARD_QUAD)
3165 emit_hard_tfmode_operation (code, operands);
3167 emit_soft_tfmode_unop (code, operands);
3171 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3173 if (TARGET_HARD_QUAD)
3174 emit_hard_tfmode_operation (code, operands);
3176 emit_soft_tfmode_cvt (code, operands);
3179 /* Return nonzero if a branch/jump/call instruction will be emitting
3180 nop into its delay slot. */
3183 empty_delay_slot (rtx insn)
3187 /* If no previous instruction (should not happen), return true. */
3188 if (PREV_INSN (insn) == NULL)
3191 seq = NEXT_INSN (PREV_INSN (insn));
3192 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3198 /* Return nonzero if we should emit a nop after a cbcond instruction.
3199 The cbcond instruction does not have a delay slot, however there is
3200 a severe performance penalty if a control transfer appears right
3201 after a cbcond. Therefore we emit a nop when we detect this
3205 emit_cbcond_nop (rtx insn)
3207 rtx next = next_active_insn (insn);
3212 if (NONJUMP_INSN_P (next)
3213 && GET_CODE (PATTERN (next)) == SEQUENCE)
3214 next = XVECEXP (PATTERN (next), 0, 0);
3215 else if (CALL_P (next)
3216 && GET_CODE (PATTERN (next)) == PARALLEL)
3218 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3220 if (GET_CODE (delay) == RETURN)
3222 /* It's a sibling call. Do not emit the nop if we're going
3223 to emit something other than the jump itself as the first
3224 instruction of the sibcall sequence. */
3225 if (sparc_leaf_function_p || TARGET_FLAT)
3230 if (NONJUMP_INSN_P (next))
3236 /* Return nonzero if TRIAL can go into the call delay slot. */
3239 tls_call_delay (rtx trial)
3244 call __tls_get_addr, %tgd_call (foo)
3245 add %l7, %o0, %o0, %tgd_add (foo)
3246 while Sun as/ld does not. */
3247 if (TARGET_GNU_TLS || !TARGET_TLS)
3250 pat = PATTERN (trial);
3252 /* We must reject tgd_add{32|64}, i.e.
3253 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3254 and tldm_add{32|64}, i.e.
3255 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3257 if (GET_CODE (pat) == SET
3258 && GET_CODE (SET_SRC (pat)) == PLUS)
3260 rtx unspec = XEXP (SET_SRC (pat), 1);
3262 if (GET_CODE (unspec) == UNSPEC
3263 && (XINT (unspec, 1) == UNSPEC_TLSGD
3264 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3271 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3272 instruction. RETURN_P is true if the v9 variant 'return' is to be
3273 considered in the test too.
3275 TRIAL must be a SET whose destination is a REG appropriate for the
3276 'restore' instruction or, if RETURN_P is true, for the 'return'
3280 eligible_for_restore_insn (rtx trial, bool return_p)
3282 rtx pat = PATTERN (trial);
3283 rtx src = SET_SRC (pat);
3284 bool src_is_freg = false;
3287 /* Since we now can do moves between float and integer registers when
3288 VIS3 is enabled, we have to catch this case. We can allow such
3289 moves when doing a 'return' however. */
3291 if (GET_CODE (src_reg) == SUBREG)
3292 src_reg = SUBREG_REG (src_reg);
3293 if (GET_CODE (src_reg) == REG
3294 && SPARC_FP_REG_P (REGNO (src_reg)))
3297 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3298 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3299 && arith_operand (src, GET_MODE (src))
3303 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3305 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3308 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3309 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3310 && arith_double_operand (src, GET_MODE (src))
3312 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3314 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3315 else if (! TARGET_FPU && register_operand (src, SFmode))
3318 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3319 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3322 /* If we have the 'return' instruction, anything that does not use
3323 local or output registers and can go into a delay slot wins. */
3326 && !epilogue_renumber (&pat, 1)
3327 && get_attr_in_uncond_branch_delay (trial)
3328 == IN_UNCOND_BRANCH_DELAY_TRUE)
3331 /* The 'restore src1,src2,dest' pattern for SImode. */
3332 else if (GET_CODE (src) == PLUS
3333 && register_operand (XEXP (src, 0), SImode)
3334 && arith_operand (XEXP (src, 1), SImode))
3337 /* The 'restore src1,src2,dest' pattern for DImode. */
3338 else if (GET_CODE (src) == PLUS
3339 && register_operand (XEXP (src, 0), DImode)
3340 && arith_double_operand (XEXP (src, 1), DImode))
3343 /* The 'restore src1,%lo(src2),dest' pattern. */
3344 else if (GET_CODE (src) == LO_SUM
3345 && ! TARGET_CM_MEDMID
3346 && ((register_operand (XEXP (src, 0), SImode)
3347 && immediate_operand (XEXP (src, 1), SImode))
3349 && register_operand (XEXP (src, 0), DImode)
3350 && immediate_operand (XEXP (src, 1), DImode))))
3353 /* The 'restore src,src,dest' pattern. */
3354 else if (GET_CODE (src) == ASHIFT
3355 && (register_operand (XEXP (src, 0), SImode)
3356 || register_operand (XEXP (src, 0), DImode))
3357 && XEXP (src, 1) == const1_rtx)
3363 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3366 eligible_for_return_delay (rtx trial)
3371 if (! NONJUMP_INSN_P (trial))
3374 if (get_attr_length (trial) != 1)
3377 /* If the function uses __builtin_eh_return, the eh_return machinery
3378 occupies the delay slot. */
3379 if (crtl->calls_eh_return)
3382 /* In the case of a leaf or flat function, anything can go into the slot. */
3383 if (sparc_leaf_function_p || TARGET_FLAT)
3385 get_attr_in_uncond_branch_delay (trial) == IN_UNCOND_BRANCH_DELAY_TRUE;
3387 pat = PATTERN (trial);
3388 if (GET_CODE (pat) == PARALLEL)
3394 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3396 rtx expr = XVECEXP (pat, 0, i);
3397 if (GET_CODE (expr) != SET)
3399 if (GET_CODE (SET_DEST (expr)) != REG)
3401 regno = REGNO (SET_DEST (expr));
3402 if (regno >= 8 && regno < 24)
3405 return !epilogue_renumber (&pat, 1)
3406 && (get_attr_in_uncond_branch_delay (trial)
3407 == IN_UNCOND_BRANCH_DELAY_TRUE);
3410 if (GET_CODE (pat) != SET)
3413 if (GET_CODE (SET_DEST (pat)) != REG)
3416 regno = REGNO (SET_DEST (pat));
3418 /* Otherwise, only operations which can be done in tandem with
3419 a `restore' or `return' insn can go into the delay slot. */
3420 if (regno >= 8 && regno < 24)
3423 /* If this instruction sets up floating point register and we have a return
3424 instruction, it can probably go in. But restore will not work
3426 if (! SPARC_INT_REG_P (regno))
3428 && !epilogue_renumber (&pat, 1)
3429 && get_attr_in_uncond_branch_delay (trial)
3430 == IN_UNCOND_BRANCH_DELAY_TRUE);
3432 return eligible_for_restore_insn (trial, true);
3435 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3438 eligible_for_sibcall_delay (rtx trial)
3442 if (! NONJUMP_INSN_P (trial) || GET_CODE (PATTERN (trial)) != SET)
3445 if (get_attr_length (trial) != 1)
3448 pat = PATTERN (trial);
3450 if (sparc_leaf_function_p || TARGET_FLAT)
3452 /* If the tail call is done using the call instruction,
3453 we have to restore %o7 in the delay slot. */
3454 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3457 /* %g1 is used to build the function address */
3458 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3464 /* Otherwise, only operations which can be done in tandem with
3465 a `restore' insn can go into the delay slot. */
3466 if (GET_CODE (SET_DEST (pat)) != REG
3467 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3468 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3471 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3473 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3476 return eligible_for_restore_insn (trial, false);
3479 /* Determine if it's legal to put X into the constant pool. This
3480 is not possible if X contains the address of a symbol that is
3481 not constant (TLS) or not known at final link time (PIC). */
3484 sparc_cannot_force_const_mem (enum machine_mode mode, rtx x)
3486 switch (GET_CODE (x))
3491 /* Accept all non-symbolic constants. */
3495 /* Labels are OK iff we are non-PIC. */
3496 return flag_pic != 0;
3499 /* 'Naked' TLS symbol references are never OK,
3500 non-TLS symbols are OK iff we are non-PIC. */
3501 if (SYMBOL_REF_TLS_MODEL (x))
3504 return flag_pic != 0;
3507 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3510 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3511 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3519 /* Global Offset Table support. */
3520 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3521 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3523 /* Return the SYMBOL_REF for the Global Offset Table. */
3525 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3530 if (!sparc_got_symbol)
3531 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3533 return sparc_got_symbol;
3536 /* Ensure that we are not using patterns that are not OK with PIC. */
3546 op = recog_data.operand[i];
3547 gcc_assert (GET_CODE (op) != SYMBOL_REF
3548 && (GET_CODE (op) != CONST
3549 || (GET_CODE (XEXP (op, 0)) == MINUS
3550 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3551 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3558 /* Return true if X is an address which needs a temporary register when
3559 reloaded while generating PIC code. */
3562 pic_address_needs_scratch (rtx x)
3564 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3565 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3566 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3567 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3568 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3574 /* Determine if a given RTX is a valid constant. We already know this
3575 satisfies CONSTANT_P. */
3578 sparc_legitimate_constant_p (enum machine_mode mode, rtx x)
3580 switch (GET_CODE (x))
3584 if (sparc_tls_referenced_p (x))
3589 if (GET_MODE (x) == VOIDmode)
3592 /* Floating point constants are generally not ok.
3593 The only exception is 0.0 and all-ones in VIS. */
3595 && SCALAR_FLOAT_MODE_P (mode)
3596 && (const_zero_operand (x, mode)
3597 || const_all_ones_operand (x, mode)))
3603 /* Vector constants are generally not ok.
3604 The only exception is 0 or -1 in VIS. */
3606 && (const_zero_operand (x, mode)
3607 || const_all_ones_operand (x, mode)))
3619 /* Determine if a given RTX is a valid constant address. */
3622 constant_address_p (rtx x)
3624 switch (GET_CODE (x))
3632 if (flag_pic && pic_address_needs_scratch (x))
3634 return sparc_legitimate_constant_p (Pmode, x);
3637 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3644 /* Nonzero if the constant value X is a legitimate general operand
3645 when generating PIC code. It is given that flag_pic is on and
3646 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3649 legitimate_pic_operand_p (rtx x)
3651 if (pic_address_needs_scratch (x))
3653 if (sparc_tls_referenced_p (x))
3658 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3660 && INTVAL (X) >= -0x1000 \
3661 && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE)))
3663 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3665 && INTVAL (X) >= -0x1000 \
3666 && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE)))
3668 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3670 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3671 ordinarily. This changes a bit when generating PIC. */
3674 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3676 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3678 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3680 else if (GET_CODE (addr) == PLUS)
3682 rs1 = XEXP (addr, 0);
3683 rs2 = XEXP (addr, 1);
3685 /* Canonicalize. REG comes first, if there are no regs,
3686 LO_SUM comes first. */
3688 && GET_CODE (rs1) != SUBREG
3690 || GET_CODE (rs2) == SUBREG
3691 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3693 rs1 = XEXP (addr, 1);
3694 rs2 = XEXP (addr, 0);
3698 && rs1 == pic_offset_table_rtx
3700 && GET_CODE (rs2) != SUBREG
3701 && GET_CODE (rs2) != LO_SUM
3702 && GET_CODE (rs2) != MEM
3703 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3704 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3705 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3707 || GET_CODE (rs1) == SUBREG)
3708 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3713 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3714 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3716 /* We prohibit REG + REG for TFmode when there are no quad move insns
3717 and we consequently need to split. We do this because REG+REG
3718 is not an offsettable address. If we get the situation in reload
3719 where source and destination of a movtf pattern are both MEMs with
3720 REG+REG address, then only one of them gets converted to an
3721 offsettable address. */
3723 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3726 /* Likewise for TImode, but in all cases. */
3730 /* We prohibit REG + REG on ARCH32 if not optimizing for
3731 DFmode/DImode because then mem_min_alignment is likely to be zero
3732 after reload and the forced split would lack a matching splitter
3734 if (TARGET_ARCH32 && !optimize
3735 && (mode == DFmode || mode == DImode))
3738 else if (USE_AS_OFFSETABLE_LO10
3739 && GET_CODE (rs1) == LO_SUM
3741 && ! TARGET_CM_MEDMID
3742 && RTX_OK_FOR_OLO10_P (rs2, mode))
3745 imm1 = XEXP (rs1, 1);
3746 rs1 = XEXP (rs1, 0);
3747 if (!CONSTANT_P (imm1)
3748 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3752 else if (GET_CODE (addr) == LO_SUM)
3754 rs1 = XEXP (addr, 0);
3755 imm1 = XEXP (addr, 1);
3757 if (!CONSTANT_P (imm1)
3758 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3761 /* We can't allow TFmode in 32-bit mode, because an offset greater
3762 than the alignment (8) may cause the LO_SUM to overflow. */
3763 if (mode == TFmode && TARGET_ARCH32)
3766 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3771 if (GET_CODE (rs1) == SUBREG)
3772 rs1 = SUBREG_REG (rs1);
3778 if (GET_CODE (rs2) == SUBREG)
3779 rs2 = SUBREG_REG (rs2);
3786 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3787 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3792 if ((! SPARC_INT_REG_P (REGNO (rs1))
3793 && REGNO (rs1) != FRAME_POINTER_REGNUM
3794 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3796 && (! SPARC_INT_REG_P (REGNO (rs2))
3797 && REGNO (rs2) != FRAME_POINTER_REGNUM
3798 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3804 /* Return the SYMBOL_REF for the tls_get_addr function. */
3806 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
3809 sparc_tls_get_addr (void)
3811 if (!sparc_tls_symbol)
3812 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
3814 return sparc_tls_symbol;
3817 /* Return the Global Offset Table to be used in TLS mode. */
3820 sparc_tls_got (void)
3822 /* In PIC mode, this is just the PIC offset table. */
3825 crtl->uses_pic_offset_table = 1;
3826 return pic_offset_table_rtx;
3829 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
3830 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
3831 if (TARGET_SUN_TLS && TARGET_ARCH32)
3833 load_got_register ();
3834 return global_offset_table_rtx;
3837 /* In all other cases, we load a new pseudo with the GOT symbol. */
3838 return copy_to_reg (sparc_got ());
3841 /* Return true if X contains a thread-local symbol. */
3844 sparc_tls_referenced_p (rtx x)
3846 if (!TARGET_HAVE_TLS)
3849 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3850 x = XEXP (XEXP (x, 0), 0);
3852 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
3855 /* That's all we handle in sparc_legitimize_tls_address for now. */
3859 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3860 this (thread-local) address. */
3863 sparc_legitimize_tls_address (rtx addr)
3865 rtx temp1, temp2, temp3, ret, o0, got, insn;
3867 gcc_assert (can_create_pseudo_p ());
3869 if (GET_CODE (addr) == SYMBOL_REF)
3870 switch (SYMBOL_REF_TLS_MODEL (addr))
3872 case TLS_MODEL_GLOBAL_DYNAMIC:
3874 temp1 = gen_reg_rtx (SImode);
3875 temp2 = gen_reg_rtx (SImode);
3876 ret = gen_reg_rtx (Pmode);
3877 o0 = gen_rtx_REG (Pmode, 8);
3878 got = sparc_tls_got ();
3879 emit_insn (gen_tgd_hi22 (temp1, addr));
3880 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
3883 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
3884 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
3889 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
3890 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
3893 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
3894 insn = get_insns ();
3896 emit_libcall_block (insn, ret, o0, addr);
3899 case TLS_MODEL_LOCAL_DYNAMIC:
3901 temp1 = gen_reg_rtx (SImode);
3902 temp2 = gen_reg_rtx (SImode);
3903 temp3 = gen_reg_rtx (Pmode);
3904 ret = gen_reg_rtx (Pmode);
3905 o0 = gen_rtx_REG (Pmode, 8);
3906 got = sparc_tls_got ();
3907 emit_insn (gen_tldm_hi22 (temp1));
3908 emit_insn (gen_tldm_lo10 (temp2, temp1));
3911 emit_insn (gen_tldm_add32 (o0, got, temp2));
3912 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
3917 emit_insn (gen_tldm_add64 (o0, got, temp2));
3918 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
3921 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
3922 insn = get_insns ();
3924 emit_libcall_block (insn, temp3, o0,
3925 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
3926 UNSPEC_TLSLD_BASE));
3927 temp1 = gen_reg_rtx (SImode);
3928 temp2 = gen_reg_rtx (SImode);
3929 emit_insn (gen_tldo_hix22 (temp1, addr));
3930 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
3932 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
3934 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
3937 case TLS_MODEL_INITIAL_EXEC:
3938 temp1 = gen_reg_rtx (SImode);
3939 temp2 = gen_reg_rtx (SImode);
3940 temp3 = gen_reg_rtx (Pmode);
3941 got = sparc_tls_got ();
3942 emit_insn (gen_tie_hi22 (temp1, addr));
3943 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
3945 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
3947 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
3950 ret = gen_reg_rtx (Pmode);
3952 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
3955 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
3959 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
3962 case TLS_MODEL_LOCAL_EXEC:
3963 temp1 = gen_reg_rtx (Pmode);
3964 temp2 = gen_reg_rtx (Pmode);
3967 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
3968 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
3972 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
3973 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
3975 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
3982 else if (GET_CODE (addr) == CONST)
3986 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
3988 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
3989 offset = XEXP (XEXP (addr, 0), 1);
3991 base = force_operand (base, NULL_RTX);
3992 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
3993 offset = force_reg (Pmode, offset);
3994 ret = gen_rtx_PLUS (Pmode, base, offset);
3998 gcc_unreachable (); /* for now ... */
4003 /* Legitimize PIC addresses. If the address is already position-independent,
4004 we return ORIG. Newly generated position-independent addresses go into a
4005 reg. This is REG if nonzero, otherwise we allocate register(s) as
4009 sparc_legitimize_pic_address (rtx orig, rtx reg)
4011 bool gotdata_op = false;
4013 if (GET_CODE (orig) == SYMBOL_REF
4014 /* See the comment in sparc_expand_move. */
4015 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4017 rtx pic_ref, address;
4022 gcc_assert (can_create_pseudo_p ());
4023 reg = gen_reg_rtx (Pmode);
4028 /* If not during reload, allocate another temp reg here for loading
4029 in the address, so that these instructions can be optimized
4031 rtx temp_reg = (! can_create_pseudo_p ()
4032 ? reg : gen_reg_rtx (Pmode));
4034 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4035 won't get confused into thinking that these two instructions
4036 are loading in the true address of the symbol. If in the
4037 future a PIC rtx exists, that should be used instead. */
4040 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4041 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4045 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4046 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4054 crtl->uses_pic_offset_table = 1;
4058 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4059 pic_offset_table_rtx,
4062 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4063 pic_offset_table_rtx,
4069 = gen_const_mem (Pmode,
4070 gen_rtx_PLUS (Pmode,
4071 pic_offset_table_rtx, address));
4072 insn = emit_move_insn (reg, pic_ref);
4075 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4077 set_unique_reg_note (insn, REG_EQUAL, orig);
4080 else if (GET_CODE (orig) == CONST)
4084 if (GET_CODE (XEXP (orig, 0)) == PLUS
4085 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4090 gcc_assert (can_create_pseudo_p ());
4091 reg = gen_reg_rtx (Pmode);
4094 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4095 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4096 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4097 base == reg ? NULL_RTX : reg);
4099 if (GET_CODE (offset) == CONST_INT)
4101 if (SMALL_INT (offset))
4102 return plus_constant (Pmode, base, INTVAL (offset));
4103 else if (can_create_pseudo_p ())
4104 offset = force_reg (Pmode, offset);
4106 /* If we reach here, then something is seriously wrong. */
4109 return gen_rtx_PLUS (Pmode, base, offset);
4111 else if (GET_CODE (orig) == LABEL_REF)
4112 /* ??? We ought to be checking that the register is live instead, in case
4113 it is eliminated. */
4114 crtl->uses_pic_offset_table = 1;
4119 /* Try machine-dependent ways of modifying an illegitimate address X
4120 to be legitimate. If we find one, return the new, valid address.
4122 OLDX is the address as it was before break_out_memory_refs was called.
4123 In some cases it is useful to look at this to decide what needs to be done.
4125 MODE is the mode of the operand pointed to by X.
4127 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4130 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4131 enum machine_mode mode)
4135 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4136 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4137 force_operand (XEXP (x, 0), NULL_RTX));
4138 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4139 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4140 force_operand (XEXP (x, 1), NULL_RTX));
4141 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4142 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4144 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4145 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4146 force_operand (XEXP (x, 1), NULL_RTX));
4148 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4151 if (sparc_tls_referenced_p (x))
4152 x = sparc_legitimize_tls_address (x);
4154 x = sparc_legitimize_pic_address (x, NULL_RTX);
4155 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4156 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4157 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4158 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4159 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4160 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4161 else if (GET_CODE (x) == SYMBOL_REF
4162 || GET_CODE (x) == CONST
4163 || GET_CODE (x) == LABEL_REF)
4164 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4169 /* Delegitimize an address that was legitimized by the above function. */
4172 sparc_delegitimize_address (rtx x)
4174 x = delegitimize_mem_from_attrs (x);
4176 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4177 switch (XINT (XEXP (x, 1), 1))
4179 case UNSPEC_MOVE_PIC:
4181 x = XVECEXP (XEXP (x, 1), 0, 0);
4182 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4188 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4189 if (GET_CODE (x) == MINUS
4190 && REG_P (XEXP (x, 0))
4191 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4192 && GET_CODE (XEXP (x, 1)) == LO_SUM
4193 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4194 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4196 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4197 gcc_assert (GET_CODE (x) == LABEL_REF);
4203 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4204 replace the input X, or the original X if no replacement is called for.
4205 The output parameter *WIN is 1 if the calling macro should goto WIN,
4208 For SPARC, we wish to handle addresses by splitting them into
4209 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4210 This cuts the number of extra insns by one.
4212 Do nothing when generating PIC code and the address is a symbolic
4213 operand or requires a scratch register. */
4216 sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
4217 int opnum, int type,
4218 int ind_levels ATTRIBUTE_UNUSED, int *win)
4220 /* Decompose SImode constants into HIGH+LO_SUM. */
4222 && (mode != TFmode || TARGET_ARCH64)
4223 && GET_MODE (x) == SImode
4224 && GET_CODE (x) != LO_SUM
4225 && GET_CODE (x) != HIGH
4226 && sparc_cmodel <= CM_MEDLOW
4228 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4230 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4231 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4232 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4233 opnum, (enum reload_type)type);
4238 /* We have to recognize what we have already generated above. */
4239 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4241 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4242 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4243 opnum, (enum reload_type)type);
4252 /* Return true if ADDR (a legitimate address expression)
4253 has an effect that depends on the machine mode it is used for.
4259 is not equivalent to
4261 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4263 because [%l7+a+1] is interpreted as the address of (a+1). */
4267 sparc_mode_dependent_address_p (const_rtx addr,
4268 addr_space_t as ATTRIBUTE_UNUSED)
4270 if (flag_pic && GET_CODE (addr) == PLUS)
4272 rtx op0 = XEXP (addr, 0);
4273 rtx op1 = XEXP (addr, 1);
4274 if (op0 == pic_offset_table_rtx
4275 && symbolic_operand (op1, VOIDmode))
4282 #ifdef HAVE_GAS_HIDDEN
4283 # define USE_HIDDEN_LINKONCE 1
4285 # define USE_HIDDEN_LINKONCE 0
4289 get_pc_thunk_name (char name[32], unsigned int regno)
4291 const char *reg_name = reg_names[regno];
4293 /* Skip the leading '%' as that cannot be used in a
4297 if (USE_HIDDEN_LINKONCE)
4298 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4300 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4303 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4306 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4308 int orig_flag_pic = flag_pic;
4311 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4314 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4316 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4317 flag_pic = orig_flag_pic;
4322 /* Emit code to load the GOT register. */
4325 load_got_register (void)
4327 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4328 if (!global_offset_table_rtx)
4329 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4331 if (TARGET_VXWORKS_RTP)
4332 emit_insn (gen_vxworks_load_got ());
4335 /* The GOT symbol is subject to a PC-relative relocation so we need a
4336 helper function to add the PC value and thus get the final value. */
4337 if (!got_helper_rtx)
4340 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4341 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4344 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4346 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4349 /* Need to emit this whether or not we obey regdecls,
4350 since setjmp/longjmp can cause life info to screw up.
4351 ??? In the case where we don't obey regdecls, this is not sufficient
4352 since we may not fall out the bottom. */
4353 emit_use (global_offset_table_rtx);
4356 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4357 address of the call target. */
4360 sparc_emit_call_insn (rtx pat, rtx addr)
4364 insn = emit_call_insn (pat);
4366 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4367 if (TARGET_VXWORKS_RTP
4369 && GET_CODE (addr) == SYMBOL_REF
4370 && (SYMBOL_REF_DECL (addr)
4371 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4372 : !SYMBOL_REF_LOCAL_P (addr)))
4374 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4375 crtl->uses_pic_offset_table = 1;
4379 /* Return 1 if RTX is a MEM which is known to be aligned to at
4380 least a DESIRED byte boundary. */
4383 mem_min_alignment (rtx mem, int desired)
4385 rtx addr, base, offset;
4387 /* If it's not a MEM we can't accept it. */
4388 if (GET_CODE (mem) != MEM)
4392 if (!TARGET_UNALIGNED_DOUBLES
4393 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4396 /* ??? The rest of the function predates MEM_ALIGN so
4397 there is probably a bit of redundancy. */
4398 addr = XEXP (mem, 0);
4399 base = offset = NULL_RTX;
4400 if (GET_CODE (addr) == PLUS)
4402 if (GET_CODE (XEXP (addr, 0)) == REG)
4404 base = XEXP (addr, 0);
4406 /* What we are saying here is that if the base
4407 REG is aligned properly, the compiler will make
4408 sure any REG based index upon it will be so
4410 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4411 offset = XEXP (addr, 1);
4413 offset = const0_rtx;
4416 else if (GET_CODE (addr) == REG)
4419 offset = const0_rtx;
4422 if (base != NULL_RTX)
4424 int regno = REGNO (base);
4426 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4428 /* Check if the compiler has recorded some information
4429 about the alignment of the base REG. If reload has
4430 completed, we already matched with proper alignments.
4431 If not running global_alloc, reload might give us
4432 unaligned pointer to local stack though. */
4434 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4435 || (optimize && reload_completed))
4436 && (INTVAL (offset) & (desired - 1)) == 0)
4441 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4445 else if (! TARGET_UNALIGNED_DOUBLES
4446 || CONSTANT_P (addr)
4447 || GET_CODE (addr) == LO_SUM)
4449 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4450 is true, in which case we can only assume that an access is aligned if
4451 it is to a constant address, or the address involves a LO_SUM. */
4455 /* An obviously unaligned address. */
4460 /* Vectors to keep interesting information about registers where it can easily
4461 be got. We used to use the actual mode value as the bit number, but there
4462 are more than 32 modes now. Instead we use two tables: one indexed by
4463 hard register number, and one indexed by mode. */
4465 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4466 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4467 mapped into one sparc_mode_class mode. */
4469 enum sparc_mode_class {
4470 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4471 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4475 /* Modes for single-word and smaller quantities. */
4477 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4479 /* Modes for double-word and smaller quantities. */
4480 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4482 /* Modes for quad-word and smaller quantities. */
4483 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4485 /* Modes for 8-word and smaller quantities. */
4486 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4488 /* Modes for single-float quantities. */
4489 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4491 /* Modes for double-float and smaller quantities. */
4492 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4494 /* Modes for quad-float and smaller quantities. */
4495 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4497 /* Modes for quad-float pairs and smaller quantities. */
4498 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4500 /* Modes for double-float only quantities. */
4501 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4503 /* Modes for quad-float and double-float only quantities. */
4504 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4506 /* Modes for quad-float pairs and double-float only quantities. */
4507 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4509 /* Modes for condition codes. */
4510 #define CC_MODES (1 << (int) CC_MODE)
4511 #define CCFP_MODES (1 << (int) CCFP_MODE)
4513 /* Value is 1 if register/mode pair is acceptable on sparc.
4514 The funny mixture of D and T modes is because integer operations
4515 do not specially operate on tetra quantities, so non-quad-aligned
4516 registers can hold quadword quantities (except %o4 and %i4 because
4517 they cross fixed registers). */
4519 /* This points to either the 32 bit or the 64 bit version. */
4520 const int *hard_regno_mode_classes;
4522 static const int hard_32bit_mode_classes[] = {
4523 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4524 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4525 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4526 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4528 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4529 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4530 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4531 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4533 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4534 and none can hold SFmode/SImode values. */
4535 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4536 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4537 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4538 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4541 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4543 /* %icc, %sfp, %gsr */
4544 CC_MODES, 0, D_MODES
4547 static const int hard_64bit_mode_classes[] = {
4548 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4549 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4550 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4551 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4553 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4554 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4555 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4556 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4558 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4559 and none can hold SFmode/SImode values. */
4560 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4561 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4562 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4563 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4566 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4568 /* %icc, %sfp, %gsr */
4569 CC_MODES, 0, D_MODES
4572 int sparc_mode_class [NUM_MACHINE_MODES];
4574 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4577 sparc_init_modes (void)
4581 for (i = 0; i < NUM_MACHINE_MODES; i++)
4583 switch (GET_MODE_CLASS (i))
4586 case MODE_PARTIAL_INT:
4587 case MODE_COMPLEX_INT:
4588 if (GET_MODE_SIZE (i) < 4)
4589 sparc_mode_class[i] = 1 << (int) H_MODE;
4590 else if (GET_MODE_SIZE (i) == 4)
4591 sparc_mode_class[i] = 1 << (int) S_MODE;
4592 else if (GET_MODE_SIZE (i) == 8)
4593 sparc_mode_class[i] = 1 << (int) D_MODE;
4594 else if (GET_MODE_SIZE (i) == 16)
4595 sparc_mode_class[i] = 1 << (int) T_MODE;
4596 else if (GET_MODE_SIZE (i) == 32)
4597 sparc_mode_class[i] = 1 << (int) O_MODE;
4599 sparc_mode_class[i] = 0;
4601 case MODE_VECTOR_INT:
4602 if (GET_MODE_SIZE (i) == 4)
4603 sparc_mode_class[i] = 1 << (int) SF_MODE;
4604 else if (GET_MODE_SIZE (i) == 8)
4605 sparc_mode_class[i] = 1 << (int) DF_MODE;
4607 sparc_mode_class[i] = 0;
4610 case MODE_COMPLEX_FLOAT:
4611 if (GET_MODE_SIZE (i) == 4)
4612 sparc_mode_class[i] = 1 << (int) SF_MODE;
4613 else if (GET_MODE_SIZE (i) == 8)
4614 sparc_mode_class[i] = 1 << (int) DF_MODE;
4615 else if (GET_MODE_SIZE (i) == 16)
4616 sparc_mode_class[i] = 1 << (int) TF_MODE;
4617 else if (GET_MODE_SIZE (i) == 32)
4618 sparc_mode_class[i] = 1 << (int) OF_MODE;
4620 sparc_mode_class[i] = 0;
4623 if (i == (int) CCFPmode || i == (int) CCFPEmode)
4624 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4626 sparc_mode_class[i] = 1 << (int) CC_MODE;
4629 sparc_mode_class[i] = 0;
4635 hard_regno_mode_classes = hard_64bit_mode_classes;
4637 hard_regno_mode_classes = hard_32bit_mode_classes;
4639 /* Initialize the array used by REGNO_REG_CLASS. */
4640 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4642 if (i < 16 && TARGET_V8PLUS)
4643 sparc_regno_reg_class[i] = I64_REGS;
4644 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4645 sparc_regno_reg_class[i] = GENERAL_REGS;
4647 sparc_regno_reg_class[i] = FP_REGS;
4649 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4651 sparc_regno_reg_class[i] = FPCC_REGS;
4653 sparc_regno_reg_class[i] = NO_REGS;
4657 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4660 save_global_or_fp_reg_p (unsigned int regno,
4661 int leaf_function ATTRIBUTE_UNUSED)
4663 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4666 /* Return whether the return address register (%i7) is needed. */
4669 return_addr_reg_needed_p (int leaf_function)
4671 /* If it is live, for example because of __builtin_return_address (0). */
4672 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4675 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4677 /* Loading the GOT register clobbers %o7. */
4678 || crtl->uses_pic_offset_table
4679 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4685 /* Return whether REGNO, a local or in register, must be saved/restored. */
4688 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4690 /* General case: call-saved registers live at some point. */
4691 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4694 /* Frame pointer register (%fp) if needed. */
4695 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4698 /* Return address register (%i7) if needed. */
4699 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4702 /* GOT register (%l7) if needed. */
4703 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4706 /* If the function accesses prior frames, the frame pointer and the return
4707 address of the previous frame must be saved on the stack. */
4708 if (crtl->accesses_prior_frames
4709 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4715 /* Compute the frame size required by the function. This function is called
4716 during the reload pass and also by sparc_expand_prologue. */
4719 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4721 HOST_WIDE_INT frame_size, apparent_frame_size;
4722 int args_size, n_global_fp_regs = 0;
4723 bool save_local_in_regs_p = false;
4726 /* If the function allocates dynamic stack space, the dynamic offset is
4727 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4728 if (leaf_function && !cfun->calls_alloca)
4731 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4733 /* Calculate space needed for global registers. */
4735 for (i = 0; i < 8; i++)
4736 if (save_global_or_fp_reg_p (i, 0))
4737 n_global_fp_regs += 2;
4739 for (i = 0; i < 8; i += 2)
4740 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4741 n_global_fp_regs += 2;
4743 /* In the flat window model, find out which local and in registers need to
4744 be saved. We don't reserve space in the current frame for them as they
4745 will be spilled into the register window save area of the caller's frame.
4746 However, as soon as we use this register window save area, we must create
4747 that of the current frame to make it the live one. */
4749 for (i = 16; i < 32; i++)
4750 if (save_local_or_in_reg_p (i, leaf_function))
4752 save_local_in_regs_p = true;
4756 /* Calculate space needed for FP registers. */
4757 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
4758 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4759 n_global_fp_regs += 2;
4762 && n_global_fp_regs == 0
4764 && !save_local_in_regs_p)
4765 frame_size = apparent_frame_size = 0;
4768 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
4769 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
4770 apparent_frame_size += n_global_fp_regs * 4;
4772 /* We need to add the size of the outgoing argument area. */
4773 frame_size = apparent_frame_size + ((args_size + 7) & -8);
4775 /* And that of the register window save area. */
4776 frame_size += FIRST_PARM_OFFSET (cfun->decl);
4778 /* Finally, bump to the appropriate alignment. */
4779 frame_size = SPARC_STACK_ALIGN (frame_size);
4782 /* Set up values for use in prologue and epilogue. */
4783 sparc_frame_size = frame_size;
4784 sparc_apparent_frame_size = apparent_frame_size;
4785 sparc_n_global_fp_regs = n_global_fp_regs;
4786 sparc_save_local_in_regs_p = save_local_in_regs_p;
4791 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
4794 sparc_initial_elimination_offset (int to)
4798 if (to == STACK_POINTER_REGNUM)
4799 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
4803 offset += SPARC_STACK_BIAS;
4807 /* Output any necessary .register pseudo-ops. */
4810 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
4812 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
4818 /* Check if %g[2367] were used without
4819 .register being printed for them already. */
4820 for (i = 2; i < 8; i++)
4822 if (df_regs_ever_live_p (i)
4823 && ! sparc_hard_reg_printed [i])
4825 sparc_hard_reg_printed [i] = 1;
4826 /* %g7 is used as TLS base register, use #ignore
4827 for it instead of #scratch. */
4828 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
4829 i == 7 ? "ignore" : "scratch");
4836 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
4838 #if PROBE_INTERVAL > 4096
4839 #error Cannot use indexed addressing mode for stack probing
4842 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
4843 inclusive. These are offsets from the current stack pointer.
4845 Note that we don't use the REG+REG addressing mode for the probes because
4846 of the stack bias in 64-bit mode. And it doesn't really buy us anything
4847 so the advantages of having a single code win here. */
4850 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
4852 rtx g1 = gen_rtx_REG (Pmode, 1);
4854 /* See if we have a constant small number of probes to generate. If so,
4855 that's the easy case. */
4856 if (size <= PROBE_INTERVAL)
4858 emit_move_insn (g1, GEN_INT (first));
4859 emit_insn (gen_rtx_SET (VOIDmode, g1,
4860 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4861 emit_stack_probe (plus_constant (Pmode, g1, -size));
4864 /* The run-time loop is made up of 10 insns in the generic case while the
4865 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
4866 else if (size <= 5 * PROBE_INTERVAL)
4870 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
4871 emit_insn (gen_rtx_SET (VOIDmode, g1,
4872 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4873 emit_stack_probe (g1);
4875 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
4876 it exceeds SIZE. If only two probes are needed, this will not
4877 generate any code. Then probe at FIRST + SIZE. */
4878 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
4880 emit_insn (gen_rtx_SET (VOIDmode, g1,
4881 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
4882 emit_stack_probe (g1);
4885 emit_stack_probe (plus_constant (Pmode, g1,
4886 (i - PROBE_INTERVAL) - size));
4889 /* Otherwise, do the same as above, but in a loop. Note that we must be
4890 extra careful with variables wrapping around because we might be at
4891 the very top (or the very bottom) of the address space and we have
4892 to be able to handle this case properly; in particular, we use an
4893 equality test for the loop condition. */
4896 HOST_WIDE_INT rounded_size;
4897 rtx g4 = gen_rtx_REG (Pmode, 4);
4899 emit_move_insn (g1, GEN_INT (first));
4902 /* Step 1: round SIZE to the previous multiple of the interval. */
4904 rounded_size = size & -PROBE_INTERVAL;
4905 emit_move_insn (g4, GEN_INT (rounded_size));
4908 /* Step 2: compute initial and final value of the loop counter. */
4910 /* TEST_ADDR = SP + FIRST. */
4911 emit_insn (gen_rtx_SET (VOIDmode, g1,
4912 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4914 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
4915 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
4920 while (TEST_ADDR != LAST_ADDR)
4922 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
4926 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
4927 until it is equal to ROUNDED_SIZE. */
4930 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
4932 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
4935 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
4936 that SIZE is equal to ROUNDED_SIZE. */
4938 if (size != rounded_size)
4939 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
4942 /* Make sure nothing is scheduled before we are done. */
4943 emit_insn (gen_blockage ());
4946 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
4947 absolute addresses. */
4950 output_probe_stack_range (rtx reg1, rtx reg2)
4952 static int labelno = 0;
4953 char loop_lab[32], end_lab[32];
4956 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
4957 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
4959 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
4961 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
4964 output_asm_insn ("cmp\t%0, %1", xops);
4966 fputs ("\tbe,pn\t%xcc,", asm_out_file);
4968 fputs ("\tbe\t", asm_out_file);
4969 assemble_name_raw (asm_out_file, end_lab);
4970 fputc ('\n', asm_out_file);
4972 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
4973 xops[1] = GEN_INT (-PROBE_INTERVAL);
4974 output_asm_insn (" add\t%0, %1, %0", xops);
4976 /* Probe at TEST_ADDR and branch. */
4978 fputs ("\tba,pt\t%xcc,", asm_out_file);
4980 fputs ("\tba\t", asm_out_file);
4981 assemble_name_raw (asm_out_file, loop_lab);
4982 fputc ('\n', asm_out_file);
4983 xops[1] = GEN_INT (SPARC_STACK_BIAS);
4984 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
4986 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
4991 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
4992 needed. LOW is supposed to be double-word aligned for 32-bit registers.
4993 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
4994 is the action to be performed if SAVE_P returns true and ACTION_FALSE
4995 the action to be performed if it returns false. Return the new offset. */
4997 typedef bool (*sorr_pred_t) (unsigned int, int);
4998 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5001 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5002 int offset, int leaf_function, sorr_pred_t save_p,
5003 sorr_act_t action_true, sorr_act_t action_false)
5008 if (TARGET_ARCH64 && high <= 32)
5012 for (i = low; i < high; i++)
5014 if (save_p (i, leaf_function))
5016 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5018 if (action_true == SORR_SAVE)
5020 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5021 RTX_FRAME_RELATED_P (insn) = 1;
5023 else /* action_true == SORR_RESTORE */
5025 /* The frame pointer must be restored last since its old
5026 value may be used as base address for the frame. This
5027 is problematic in 64-bit mode only because of the lack
5028 of double-word load instruction. */
5029 if (i == HARD_FRAME_POINTER_REGNUM)
5032 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5036 else if (action_false == SORR_ADVANCE)
5042 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5043 emit_move_insn (hard_frame_pointer_rtx, mem);
5048 for (i = low; i < high; i += 2)
5050 bool reg0 = save_p (i, leaf_function);
5051 bool reg1 = save_p (i + 1, leaf_function);
5052 enum machine_mode mode;
5057 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5062 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5067 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5073 if (action_false == SORR_ADVANCE)
5078 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5079 if (action_true == SORR_SAVE)
5081 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5082 RTX_FRAME_RELATED_P (insn) = 1;
5086 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5088 set1 = gen_rtx_SET (VOIDmode, mem,
5089 gen_rtx_REG (SImode, regno));
5090 RTX_FRAME_RELATED_P (set1) = 1;
5092 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5094 set2 = gen_rtx_SET (VOIDmode, mem,
5095 gen_rtx_REG (SImode, regno + 1));
5096 RTX_FRAME_RELATED_P (set2) = 1;
5097 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5098 gen_rtx_PARALLEL (VOIDmode,
5099 gen_rtvec (2, set1, set2)));
5102 else /* action_true == SORR_RESTORE */
5103 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5105 /* Always preserve double-word alignment. */
5106 offset = (offset + 8) & -8;
5113 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5116 emit_adjust_base_to_offset (rtx base, int offset)
5118 /* ??? This might be optimized a little as %g1 might already have a
5119 value close enough that a single add insn will do. */
5120 /* ??? Although, all of this is probably only a temporary fix because
5121 if %g1 can hold a function result, then sparc_expand_epilogue will
5122 lose (the result will be clobbered). */
5123 rtx new_base = gen_rtx_REG (Pmode, 1);
5124 emit_move_insn (new_base, GEN_INT (offset));
5125 emit_insn (gen_rtx_SET (VOIDmode,
5126 new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5130 /* Emit code to save/restore call-saved global and FP registers. */
5133 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5135 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5137 base = emit_adjust_base_to_offset (base, offset);
5142 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5143 save_global_or_fp_reg_p, action, SORR_NONE);
5144 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5145 save_global_or_fp_reg_p, action, SORR_NONE);
5148 /* Emit code to save/restore call-saved local and in registers. */
5151 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5153 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5155 base = emit_adjust_base_to_offset (base, offset);
5159 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5160 save_local_or_in_reg_p, action, SORR_ADVANCE);
5163 /* Emit a window_save insn. */
5166 emit_window_save (rtx increment)
5168 rtx insn = emit_insn (gen_window_save (increment));
5169 RTX_FRAME_RELATED_P (insn) = 1;
5171 /* The incoming return address (%o7) is saved in %i7. */
5172 add_reg_note (insn, REG_CFA_REGISTER,
5173 gen_rtx_SET (VOIDmode,
5174 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5176 INCOMING_RETURN_ADDR_REGNUM)));
5178 /* The window save event. */
5179 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5181 /* The CFA is %fp, the hard frame pointer. */
5182 add_reg_note (insn, REG_CFA_DEF_CFA,
5183 plus_constant (Pmode, hard_frame_pointer_rtx,
5184 INCOMING_FRAME_SP_OFFSET));
5189 /* Generate an increment for the stack pointer. */
5192 gen_stack_pointer_inc (rtx increment)
5194 return gen_rtx_SET (VOIDmode,
5196 gen_rtx_PLUS (Pmode,
5201 /* Expand the function prologue. The prologue is responsible for reserving
5202 storage for the frame, saving the call-saved registers and loading the
5203 GOT register if needed. */
5206 sparc_expand_prologue (void)
5211 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5212 on the final value of the flag means deferring the prologue/epilogue
5213 expansion until just before the second scheduling pass, which is too
5214 late to emit multiple epilogues or return insns.
5216 Of course we are making the assumption that the value of the flag
5217 will not change between now and its final value. Of the three parts
5218 of the formula, only the last one can reasonably vary. Let's take a
5219 closer look, after assuming that the first two ones are set to true
5220 (otherwise the last value is effectively silenced).
5222 If only_leaf_regs_used returns false, the global predicate will also
5223 be false so the actual frame size calculated below will be positive.
5224 As a consequence, the save_register_window insn will be emitted in
5225 the instruction stream; now this insn explicitly references %fp
5226 which is not a leaf register so only_leaf_regs_used will always
5227 return false subsequently.
5229 If only_leaf_regs_used returns true, we hope that the subsequent
5230 optimization passes won't cause non-leaf registers to pop up. For
5231 example, the regrename pass has special provisions to not rename to
5232 non-leaf registers in a leaf function. */
5233 sparc_leaf_function_p
5234 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5236 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5238 if (flag_stack_usage_info)
5239 current_function_static_stack_size = size;
5241 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
5242 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5246 else if (sparc_leaf_function_p)
5248 rtx size_int_rtx = GEN_INT (-size);
5251 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5252 else if (size <= 8192)
5254 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5255 RTX_FRAME_RELATED_P (insn) = 1;
5257 /* %sp is still the CFA register. */
5258 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5262 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5263 emit_move_insn (size_rtx, size_int_rtx);
5264 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5265 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5266 gen_stack_pointer_inc (size_int_rtx));
5269 RTX_FRAME_RELATED_P (insn) = 1;
5273 rtx size_int_rtx = GEN_INT (-size);
5276 emit_window_save (size_int_rtx);
5277 else if (size <= 8192)
5279 emit_window_save (GEN_INT (-4096));
5281 /* %sp is not the CFA register anymore. */
5282 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5284 /* Make sure no %fp-based store is issued until after the frame is
5285 established. The offset between the frame pointer and the stack
5286 pointer is calculated relative to the value of the stack pointer
5287 at the end of the function prologue, and moving instructions that
5288 access the stack via the frame pointer between the instructions
5289 that decrement the stack pointer could result in accessing the
5290 register window save area, which is volatile. */
5291 emit_insn (gen_frame_blockage ());
5295 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5296 emit_move_insn (size_rtx, size_int_rtx);
5297 emit_window_save (size_rtx);
5301 if (sparc_leaf_function_p)
5303 sparc_frame_base_reg = stack_pointer_rtx;
5304 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5308 sparc_frame_base_reg = hard_frame_pointer_rtx;
5309 sparc_frame_base_offset = SPARC_STACK_BIAS;
5312 if (sparc_n_global_fp_regs > 0)
5313 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5314 sparc_frame_base_offset
5315 - sparc_apparent_frame_size,
5318 /* Load the GOT register if needed. */
5319 if (crtl->uses_pic_offset_table)
5320 load_got_register ();
5322 /* Advertise that the data calculated just above are now valid. */
5323 sparc_prologue_data_valid_p = true;
5326 /* Expand the function prologue. The prologue is responsible for reserving
5327 storage for the frame, saving the call-saved registers and loading the
5328 GOT register if needed. */
5331 sparc_flat_expand_prologue (void)
5336 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5338 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5340 if (flag_stack_usage_info)
5341 current_function_static_stack_size = size;
5343 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
5344 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5346 if (sparc_save_local_in_regs_p)
5347 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5354 rtx size_int_rtx, size_rtx;
5356 size_rtx = size_int_rtx = GEN_INT (-size);
5358 /* We establish the frame (i.e. decrement the stack pointer) first, even
5359 if we use a frame pointer, because we cannot clobber any call-saved
5360 registers, including the frame pointer, if we haven't created a new
5361 register save area, for the sake of compatibility with the ABI. */
5363 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5364 else if (size <= 8192 && !frame_pointer_needed)
5366 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5367 RTX_FRAME_RELATED_P (insn) = 1;
5368 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5372 size_rtx = gen_rtx_REG (Pmode, 1);
5373 emit_move_insn (size_rtx, size_int_rtx);
5374 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5375 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5376 gen_stack_pointer_inc (size_int_rtx));
5378 RTX_FRAME_RELATED_P (insn) = 1;
5380 /* Ensure nothing is scheduled until after the frame is established. */
5381 emit_insn (gen_blockage ());
5383 if (frame_pointer_needed)
5385 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5386 gen_rtx_MINUS (Pmode,
5389 RTX_FRAME_RELATED_P (insn) = 1;
5391 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5392 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5393 plus_constant (Pmode, stack_pointer_rtx,
5397 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5399 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5400 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5402 insn = emit_move_insn (i7, o7);
5403 RTX_FRAME_RELATED_P (insn) = 1;
5405 add_reg_note (insn, REG_CFA_REGISTER,
5406 gen_rtx_SET (VOIDmode, i7, o7));
5408 /* Prevent this instruction from ever being considered dead,
5409 even if this function has no epilogue. */
5414 if (frame_pointer_needed)
5416 sparc_frame_base_reg = hard_frame_pointer_rtx;
5417 sparc_frame_base_offset = SPARC_STACK_BIAS;
5421 sparc_frame_base_reg = stack_pointer_rtx;
5422 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5425 if (sparc_n_global_fp_regs > 0)
5426 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5427 sparc_frame_base_offset
5428 - sparc_apparent_frame_size,
5431 /* Load the GOT register if needed. */
5432 if (crtl->uses_pic_offset_table)
5433 load_got_register ();
5435 /* Advertise that the data calculated just above are now valid. */
5436 sparc_prologue_data_valid_p = true;
5439 /* This function generates the assembly code for function entry, which boils
5440 down to emitting the necessary .register directives. */
5443 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5445 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5447 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5449 sparc_output_scratch_registers (file);
5452 /* Expand the function epilogue, either normal or part of a sibcall.
5453 We emit all the instructions except the return or the call. */
5456 sparc_expand_epilogue (bool for_eh)
5458 HOST_WIDE_INT size = sparc_frame_size;
5460 if (sparc_n_global_fp_regs > 0)
5461 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5462 sparc_frame_base_offset
5463 - sparc_apparent_frame_size,
5466 if (size == 0 || for_eh)
5468 else if (sparc_leaf_function_p)
5471 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5472 else if (size <= 8192)
5474 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5475 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5479 rtx reg = gen_rtx_REG (Pmode, 1);
5480 emit_move_insn (reg, GEN_INT (size));
5481 emit_insn (gen_stack_pointer_inc (reg));
5486 /* Expand the function epilogue, either normal or part of a sibcall.
5487 We emit all the instructions except the return or the call. */
5490 sparc_flat_expand_epilogue (bool for_eh)
5492 HOST_WIDE_INT size = sparc_frame_size;
5494 if (sparc_n_global_fp_regs > 0)
5495 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5496 sparc_frame_base_offset
5497 - sparc_apparent_frame_size,
5500 /* If we have a frame pointer, we'll need both to restore it before the
5501 frame is destroyed and use its current value in destroying the frame.
5502 Since we don't have an atomic way to do that in the flat window model,
5503 we save the current value into a temporary register (%g1). */
5504 if (frame_pointer_needed && !for_eh)
5505 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5507 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5508 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5509 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5511 if (sparc_save_local_in_regs_p)
5512 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5513 sparc_frame_base_offset,
5516 if (size == 0 || for_eh)
5518 else if (frame_pointer_needed)
5520 /* Make sure the frame is destroyed after everything else is done. */
5521 emit_insn (gen_blockage ());
5523 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5528 emit_insn (gen_blockage ());
5531 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5532 else if (size <= 8192)
5534 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5535 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5539 rtx reg = gen_rtx_REG (Pmode, 1);
5540 emit_move_insn (reg, GEN_INT (size));
5541 emit_insn (gen_stack_pointer_inc (reg));
5546 /* Return true if it is appropriate to emit `return' instructions in the
5547 body of a function. */
5550 sparc_can_use_return_insn_p (void)
5552 return sparc_prologue_data_valid_p
5553 && sparc_n_global_fp_regs == 0
5555 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5556 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5559 /* This function generates the assembly code for function exit. */
5562 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5564 /* If the last two instructions of a function are "call foo; dslot;"
5565 the return address might point to the first instruction in the next
5566 function and we have to output a dummy nop for the sake of sane
5567 backtraces in such cases. This is pointless for sibling calls since
5568 the return address is explicitly adjusted. */
5570 rtx insn, last_real_insn;
5572 insn = get_last_insn ();
5574 last_real_insn = prev_real_insn (insn);
5576 && NONJUMP_INSN_P (last_real_insn)
5577 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5578 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5581 && CALL_P (last_real_insn)
5582 && !SIBLING_CALL_P (last_real_insn))
5583 fputs("\tnop\n", file);
5585 sparc_output_deferred_case_vectors ();
5588 /* Output a 'restore' instruction. */
5591 output_restore (rtx pat)
5597 fputs ("\t restore\n", asm_out_file);
5601 gcc_assert (GET_CODE (pat) == SET);
5603 operands[0] = SET_DEST (pat);
5604 pat = SET_SRC (pat);
5606 switch (GET_CODE (pat))
5609 operands[1] = XEXP (pat, 0);
5610 operands[2] = XEXP (pat, 1);
5611 output_asm_insn (" restore %r1, %2, %Y0", operands);
5614 operands[1] = XEXP (pat, 0);
5615 operands[2] = XEXP (pat, 1);
5616 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5619 operands[1] = XEXP (pat, 0);
5620 gcc_assert (XEXP (pat, 1) == const1_rtx);
5621 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5625 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5630 /* Output a return. */
5633 output_return (rtx insn)
5635 if (crtl->calls_eh_return)
5637 /* If the function uses __builtin_eh_return, the eh_return
5638 machinery occupies the delay slot. */
5639 gcc_assert (!final_sequence);
5641 if (flag_delayed_branch)
5643 if (!TARGET_FLAT && TARGET_V9)
5644 fputs ("\treturn\t%i7+8\n", asm_out_file);
5648 fputs ("\trestore\n", asm_out_file);
5650 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5653 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5658 fputs ("\trestore\n", asm_out_file);
5660 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5661 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5664 else if (sparc_leaf_function_p || TARGET_FLAT)
5666 /* This is a leaf or flat function so we don't have to bother restoring
5667 the register window, which frees us from dealing with the convoluted
5668 semantics of restore/return. We simply output the jump to the
5669 return address and the insn in the delay slot (if any). */
5671 return "jmp\t%%o7+%)%#";
5675 /* This is a regular function so we have to restore the register window.
5676 We may have a pending insn for the delay slot, which will be either
5677 combined with the 'restore' instruction or put in the delay slot of
5678 the 'return' instruction. */
5684 delay = NEXT_INSN (insn);
5687 pat = PATTERN (delay);
5689 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5691 epilogue_renumber (&pat, 0);
5692 return "return\t%%i7+%)%#";
5696 output_asm_insn ("jmp\t%%i7+%)", NULL);
5697 output_restore (pat);
5698 PATTERN (delay) = gen_blockage ();
5699 INSN_CODE (delay) = -1;
5704 /* The delay slot is empty. */
5706 return "return\t%%i7+%)\n\t nop";
5707 else if (flag_delayed_branch)
5708 return "jmp\t%%i7+%)\n\t restore";
5710 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5717 /* Output a sibling call. */
5720 output_sibcall (rtx insn, rtx call_operand)
5724 gcc_assert (flag_delayed_branch);
5726 operands[0] = call_operand;
5728 if (sparc_leaf_function_p || TARGET_FLAT)
5730 /* This is a leaf or flat function so we don't have to bother restoring
5731 the register window. We simply output the jump to the function and
5732 the insn in the delay slot (if any). */
5734 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
5737 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
5740 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
5741 it into branch if possible. */
5742 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
5747 /* This is a regular function so we have to restore the register window.
5748 We may have a pending insn for the delay slot, which will be combined
5749 with the 'restore' instruction. */
5751 output_asm_insn ("call\t%a0, 0", operands);
5755 rtx delay = NEXT_INSN (insn);
5758 output_restore (PATTERN (delay));
5760 PATTERN (delay) = gen_blockage ();
5761 INSN_CODE (delay) = -1;
5764 output_restore (NULL_RTX);
5770 /* Functions for handling argument passing.
5772 For 32-bit, the first 6 args are normally in registers and the rest are
5773 pushed. Any arg that starts within the first 6 words is at least
5774 partially passed in a register unless its data type forbids.
5776 For 64-bit, the argument registers are laid out as an array of 16 elements
5777 and arguments are added sequentially. The first 6 int args and up to the
5778 first 16 fp args (depending on size) are passed in regs.
5780 Slot Stack Integral Float Float in structure Double Long Double
5781 ---- ----- -------- ----- ------------------ ------ -----------
5782 15 [SP+248] %f31 %f30,%f31 %d30
5783 14 [SP+240] %f29 %f28,%f29 %d28 %q28
5784 13 [SP+232] %f27 %f26,%f27 %d26
5785 12 [SP+224] %f25 %f24,%f25 %d24 %q24
5786 11 [SP+216] %f23 %f22,%f23 %d22
5787 10 [SP+208] %f21 %f20,%f21 %d20 %q20
5788 9 [SP+200] %f19 %f18,%f19 %d18
5789 8 [SP+192] %f17 %f16,%f17 %d16 %q16
5790 7 [SP+184] %f15 %f14,%f15 %d14
5791 6 [SP+176] %f13 %f12,%f13 %d12 %q12
5792 5 [SP+168] %o5 %f11 %f10,%f11 %d10
5793 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
5794 3 [SP+152] %o3 %f7 %f6,%f7 %d6
5795 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
5796 1 [SP+136] %o1 %f3 %f2,%f3 %d2
5797 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
5799 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
5801 Integral arguments are always passed as 64-bit quantities appropriately
5804 Passing of floating point values is handled as follows.
5805 If a prototype is in scope:
5806 If the value is in a named argument (i.e. not a stdarg function or a
5807 value not part of the `...') then the value is passed in the appropriate
5809 If the value is part of the `...' and is passed in one of the first 6
5810 slots then the value is passed in the appropriate int reg.
5811 If the value is part of the `...' and is not passed in one of the first 6
5812 slots then the value is passed in memory.
5813 If a prototype is not in scope:
5814 If the value is one of the first 6 arguments the value is passed in the
5815 appropriate integer reg and the appropriate fp reg.
5816 If the value is not one of the first 6 arguments the value is passed in
5817 the appropriate fp reg and in memory.
5820 Summary of the calling conventions implemented by GCC on the SPARC:
5823 size argument return value
5825 small integer <4 int. reg. int. reg.
5826 word 4 int. reg. int. reg.
5827 double word 8 int. reg. int. reg.
5829 _Complex small integer <8 int. reg. int. reg.
5830 _Complex word 8 int. reg. int. reg.
5831 _Complex double word 16 memory int. reg.
5833 vector integer <=8 int. reg. FP reg.
5834 vector integer >8 memory memory
5836 float 4 int. reg. FP reg.
5837 double 8 int. reg. FP reg.
5838 long double 16 memory memory
5840 _Complex float 8 memory FP reg.
5841 _Complex double 16 memory FP reg.
5842 _Complex long double 32 memory FP reg.
5844 vector float any memory memory
5846 aggregate any memory memory
5851 size argument return value
5853 small integer <8 int. reg. int. reg.
5854 word 8 int. reg. int. reg.
5855 double word 16 int. reg. int. reg.
5857 _Complex small integer <16 int. reg. int. reg.
5858 _Complex word 16 int. reg. int. reg.
5859 _Complex double word 32 memory int. reg.
5861 vector integer <=16 FP reg. FP reg.
5862 vector integer 16<s<=32 memory FP reg.
5863 vector integer >32 memory memory
5865 float 4 FP reg. FP reg.
5866 double 8 FP reg. FP reg.
5867 long double 16 FP reg. FP reg.
5869 _Complex float 8 FP reg. FP reg.
5870 _Complex double 16 FP reg. FP reg.
5871 _Complex long double 32 memory FP reg.
5873 vector float <=16 FP reg. FP reg.
5874 vector float 16<s<=32 memory FP reg.
5875 vector float >32 memory memory
5877 aggregate <=16 reg. reg.
5878 aggregate 16<s<=32 memory reg.
5879 aggregate >32 memory memory
5883 Note #1: complex floating-point types follow the extended SPARC ABIs as
5884 implemented by the Sun compiler.
5886 Note #2: integral vector types follow the scalar floating-point types
5887 conventions to match what is implemented by the Sun VIS SDK.
5889 Note #3: floating-point vector types follow the aggregate types
5893 /* Maximum number of int regs for args. */
5894 #define SPARC_INT_ARG_MAX 6
5895 /* Maximum number of fp regs for args. */
5896 #define SPARC_FP_ARG_MAX 16
5898 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
5900 /* Handle the INIT_CUMULATIVE_ARGS macro.
5901 Initialize a variable CUM of type CUMULATIVE_ARGS
5902 for a call to a function whose data type is FNTYPE.
5903 For a library call, FNTYPE is 0. */
5906 init_cumulative_args (struct sparc_args *cum, tree fntype,
5907 rtx libname ATTRIBUTE_UNUSED,
5908 tree fndecl ATTRIBUTE_UNUSED)
5911 cum->prototype_p = fntype && prototype_p (fntype);
5912 cum->libcall_p = fntype == 0;
5915 /* Handle promotion of pointer and integer arguments. */
5917 static enum machine_mode
5918 sparc_promote_function_mode (const_tree type,
5919 enum machine_mode mode,
5921 const_tree fntype ATTRIBUTE_UNUSED,
5922 int for_return ATTRIBUTE_UNUSED)
5924 if (type != NULL_TREE && POINTER_TYPE_P (type))
5926 *punsignedp = POINTERS_EXTEND_UNSIGNED;
5930 /* Integral arguments are passed as full words, as per the ABI. */
5931 if (GET_MODE_CLASS (mode) == MODE_INT
5932 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5938 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
5941 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
5943 return TARGET_ARCH64 ? true : false;
5946 /* Scan the record type TYPE and return the following predicates:
5947 - INTREGS_P: the record contains at least one field or sub-field
5948 that is eligible for promotion in integer registers.
5949 - FP_REGS_P: the record contains at least one field or sub-field
5950 that is eligible for promotion in floating-point registers.
5951 - PACKED_P: the record contains at least one field that is packed.
5953 Sub-fields are not taken into account for the PACKED_P predicate. */
5956 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
5961 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5963 if (TREE_CODE (field) == FIELD_DECL)
5965 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
5966 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
5967 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
5968 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5974 if (packed_p && DECL_PACKED (field))
5980 /* Compute the slot number to pass an argument in.
5981 Return the slot number or -1 if passing on the stack.
5983 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5984 the preceding args and about the function being called.
5985 MODE is the argument's machine mode.
5986 TYPE is the data type of the argument (as a tree).
5987 This is null for libcalls where that information may
5989 NAMED is nonzero if this argument is a named parameter
5990 (otherwise it is an extra parameter matching an ellipsis).
5991 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
5992 *PREGNO records the register number to use if scalar type.
5993 *PPADDING records the amount of padding needed in words. */
5996 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
5997 const_tree type, bool named, bool incoming_p,
5998 int *pregno, int *ppadding)
6000 int regbase = (incoming_p
6001 ? SPARC_INCOMING_INT_ARG_FIRST
6002 : SPARC_OUTGOING_INT_ARG_FIRST);
6003 int slotno = cum->words;
6004 enum mode_class mclass;
6009 if (type && TREE_ADDRESSABLE (type))
6015 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6018 /* For SPARC64, objects requiring 16-byte alignment get it. */
6020 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6021 && (slotno & 1) != 0)
6022 slotno++, *ppadding = 1;
6024 mclass = GET_MODE_CLASS (mode);
6025 if (type && TREE_CODE (type) == VECTOR_TYPE)
6027 /* Vector types deserve special treatment because they are
6028 polymorphic wrt their mode, depending upon whether VIS
6029 instructions are enabled. */
6030 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6032 /* The SPARC port defines no floating-point vector modes. */
6033 gcc_assert (mode == BLKmode);
6037 /* Integral vector types should either have a vector
6038 mode or an integral mode, because we are guaranteed
6039 by pass_by_reference that their size is not greater
6040 than 16 bytes and TImode is 16-byte wide. */
6041 gcc_assert (mode != BLKmode);
6043 /* Vector integers are handled like floats according to
6045 mclass = MODE_FLOAT;
6052 case MODE_COMPLEX_FLOAT:
6053 case MODE_VECTOR_INT:
6054 if (TARGET_ARCH64 && TARGET_FPU && named)
6056 if (slotno >= SPARC_FP_ARG_MAX)
6058 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6059 /* Arguments filling only one single FP register are
6060 right-justified in the outer double FP register. */
6061 if (GET_MODE_SIZE (mode) <= 4)
6068 case MODE_COMPLEX_INT:
6069 if (slotno >= SPARC_INT_ARG_MAX)
6071 regno = regbase + slotno;
6075 if (mode == VOIDmode)
6076 /* MODE is VOIDmode when generating the actual call. */
6079 gcc_assert (mode == BLKmode);
6083 || (TREE_CODE (type) != VECTOR_TYPE
6084 && TREE_CODE (type) != RECORD_TYPE))
6086 if (slotno >= SPARC_INT_ARG_MAX)
6088 regno = regbase + slotno;
6090 else /* TARGET_ARCH64 && type */
6092 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
6094 /* First see what kinds of registers we would need. */
6095 if (TREE_CODE (type) == VECTOR_TYPE)
6098 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
6100 /* The ABI obviously doesn't specify how packed structures
6101 are passed. These are defined to be passed in int regs
6102 if possible, otherwise memory. */
6103 if (packed_p || !named)
6104 fpregs_p = 0, intregs_p = 1;
6106 /* If all arg slots are filled, then must pass on stack. */
6107 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
6110 /* If there are only int args and all int arg slots are filled,
6111 then must pass on stack. */
6112 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
6115 /* Note that even if all int arg slots are filled, fp members may
6116 still be passed in regs if such regs are available.
6117 *PREGNO isn't set because there may be more than one, it's up
6118 to the caller to compute them. */
6131 /* Handle recursive register counting for structure field layout. */
6133 struct function_arg_record_value_parms
6135 rtx ret; /* return expression being built. */
6136 int slotno; /* slot number of the argument. */
6137 int named; /* whether the argument is named. */
6138 int regbase; /* regno of the base register. */
6139 int stack; /* 1 if part of the argument is on the stack. */
6140 int intoffset; /* offset of the first pending integer field. */
6141 unsigned int nregs; /* number of words passed in registers. */
6144 static void function_arg_record_value_3
6145 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
6146 static void function_arg_record_value_2
6147 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6148 static void function_arg_record_value_1
6149 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6150 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
6151 static rtx function_arg_union_value (int, enum machine_mode, int, int);
6153 /* A subroutine of function_arg_record_value. Traverse the structure
6154 recursively and determine how many registers will be required. */
6157 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6158 struct function_arg_record_value_parms *parms,
6163 /* We need to compute how many registers are needed so we can
6164 allocate the PARALLEL but before we can do that we need to know
6165 whether there are any packed fields. The ABI obviously doesn't
6166 specify how structures are passed in this case, so they are
6167 defined to be passed in int regs if possible, otherwise memory,
6168 regardless of whether there are fp values present. */
6171 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6173 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6180 /* Compute how many registers we need. */
6181 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6183 if (TREE_CODE (field) == FIELD_DECL)
6185 HOST_WIDE_INT bitpos = startbitpos;
6187 if (DECL_SIZE (field) != 0)
6189 if (integer_zerop (DECL_SIZE (field)))
6192 if (host_integerp (bit_position (field), 1))
6193 bitpos += int_bit_position (field);
6196 /* ??? FIXME: else assume zero offset. */
6198 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6199 function_arg_record_value_1 (TREE_TYPE (field),
6203 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6204 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6209 if (parms->intoffset != -1)
6211 unsigned int startbit, endbit;
6212 int intslots, this_slotno;
6214 startbit = parms->intoffset & -BITS_PER_WORD;
6215 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6217 intslots = (endbit - startbit) / BITS_PER_WORD;
6218 this_slotno = parms->slotno + parms->intoffset
6221 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6223 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6224 /* We need to pass this field on the stack. */
6228 parms->nregs += intslots;
6229 parms->intoffset = -1;
6232 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6233 If it wasn't true we wouldn't be here. */
6234 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6235 && DECL_MODE (field) == BLKmode)
6236 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6237 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6244 if (parms->intoffset == -1)
6245 parms->intoffset = bitpos;
6251 /* A subroutine of function_arg_record_value. Assign the bits of the
6252 structure between parms->intoffset and bitpos to integer registers. */
6255 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6256 struct function_arg_record_value_parms *parms)
6258 enum machine_mode mode;
6260 unsigned int startbit, endbit;
6261 int this_slotno, intslots, intoffset;
6264 if (parms->intoffset == -1)
6267 intoffset = parms->intoffset;
6268 parms->intoffset = -1;
6270 startbit = intoffset & -BITS_PER_WORD;
6271 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6272 intslots = (endbit - startbit) / BITS_PER_WORD;
6273 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6275 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6279 /* If this is the trailing part of a word, only load that much into
6280 the register. Otherwise load the whole register. Note that in
6281 the latter case we may pick up unwanted bits. It's not a problem
6282 at the moment but may wish to revisit. */
6284 if (intoffset % BITS_PER_WORD != 0)
6285 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6290 intoffset /= BITS_PER_UNIT;
6293 regno = parms->regbase + this_slotno;
6294 reg = gen_rtx_REG (mode, regno);
6295 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6296 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6299 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6304 while (intslots > 0);
6307 /* A subroutine of function_arg_record_value. Traverse the structure
6308 recursively and assign bits to floating point registers. Track which
6309 bits in between need integer registers; invoke function_arg_record_value_3
6310 to make that happen. */
6313 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6314 struct function_arg_record_value_parms *parms,
6320 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6322 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6329 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6331 if (TREE_CODE (field) == FIELD_DECL)
6333 HOST_WIDE_INT bitpos = startbitpos;
6335 if (DECL_SIZE (field) != 0)
6337 if (integer_zerop (DECL_SIZE (field)))
6340 if (host_integerp (bit_position (field), 1))
6341 bitpos += int_bit_position (field);
6344 /* ??? FIXME: else assume zero offset. */
6346 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6347 function_arg_record_value_2 (TREE_TYPE (field),
6351 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6352 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6357 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6358 int regno, nregs, pos;
6359 enum machine_mode mode = DECL_MODE (field);
6362 function_arg_record_value_3 (bitpos, parms);
6364 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6367 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6368 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6370 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6372 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6378 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6379 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6381 reg = gen_rtx_REG (mode, regno);
6382 pos = bitpos / BITS_PER_UNIT;
6383 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6384 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6388 regno += GET_MODE_SIZE (mode) / 4;
6389 reg = gen_rtx_REG (mode, regno);
6390 pos += GET_MODE_SIZE (mode);
6391 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6392 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6398 if (parms->intoffset == -1)
6399 parms->intoffset = bitpos;
6405 /* Used by function_arg and sparc_function_value_1 to implement the complex
6406 conventions of the 64-bit ABI for passing and returning structures.
6407 Return an expression valid as a return value for the FUNCTION_ARG
6408 and TARGET_FUNCTION_VALUE.
6410 TYPE is the data type of the argument (as a tree).
6411 This is null for libcalls where that information may
6413 MODE is the argument's machine mode.
6414 SLOTNO is the index number of the argument's slot in the parameter array.
6415 NAMED is nonzero if this argument is a named parameter
6416 (otherwise it is an extra parameter matching an ellipsis).
6417 REGBASE is the regno of the base register for the parameter array. */
6420 function_arg_record_value (const_tree type, enum machine_mode mode,
6421 int slotno, int named, int regbase)
6423 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6424 struct function_arg_record_value_parms parms;
6427 parms.ret = NULL_RTX;
6428 parms.slotno = slotno;
6429 parms.named = named;
6430 parms.regbase = regbase;
6433 /* Compute how many registers we need. */
6435 parms.intoffset = 0;
6436 function_arg_record_value_1 (type, 0, &parms, false);
6438 /* Take into account pending integer fields. */
6439 if (parms.intoffset != -1)
6441 unsigned int startbit, endbit;
6442 int intslots, this_slotno;
6444 startbit = parms.intoffset & -BITS_PER_WORD;
6445 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6446 intslots = (endbit - startbit) / BITS_PER_WORD;
6447 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6449 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6451 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6452 /* We need to pass this field on the stack. */
6456 parms.nregs += intslots;
6458 nregs = parms.nregs;
6460 /* Allocate the vector and handle some annoying special cases. */
6463 /* ??? Empty structure has no value? Duh? */
6466 /* Though there's nothing really to store, return a word register
6467 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6468 leads to breakage due to the fact that there are zero bytes to
6470 return gen_rtx_REG (mode, regbase);
6474 /* ??? C++ has structures with no fields, and yet a size. Give up
6475 for now and pass everything back in integer registers. */
6476 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6478 if (nregs + slotno > SPARC_INT_ARG_MAX)
6479 nregs = SPARC_INT_ARG_MAX - slotno;
6481 gcc_assert (nregs != 0);
6483 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6485 /* If at least one field must be passed on the stack, generate
6486 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6487 also be passed on the stack. We can't do much better because the
6488 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6489 of structures for which the fields passed exclusively in registers
6490 are not at the beginning of the structure. */
6492 XVECEXP (parms.ret, 0, 0)
6493 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6495 /* Fill in the entries. */
6497 parms.intoffset = 0;
6498 function_arg_record_value_2 (type, 0, &parms, false);
6499 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6501 gcc_assert (parms.nregs == nregs);
6506 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6507 of the 64-bit ABI for passing and returning unions.
6508 Return an expression valid as a return value for the FUNCTION_ARG
6509 and TARGET_FUNCTION_VALUE.
6511 SIZE is the size in bytes of the union.
6512 MODE is the argument's machine mode.
6513 REGNO is the hard register the union will be passed in. */
6516 function_arg_union_value (int size, enum machine_mode mode, int slotno,
6519 int nwords = ROUND_ADVANCE (size), i;
6522 /* See comment in previous function for empty structures. */
6524 return gen_rtx_REG (mode, regno);
6526 if (slotno == SPARC_INT_ARG_MAX - 1)
6529 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6531 for (i = 0; i < nwords; i++)
6533 /* Unions are passed left-justified. */
6534 XVECEXP (regs, 0, i)
6535 = gen_rtx_EXPR_LIST (VOIDmode,
6536 gen_rtx_REG (word_mode, regno),
6537 GEN_INT (UNITS_PER_WORD * i));
6544 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6545 for passing and returning large (BLKmode) vectors.
6546 Return an expression valid as a return value for the FUNCTION_ARG
6547 and TARGET_FUNCTION_VALUE.
6549 SIZE is the size in bytes of the vector (at least 8 bytes).
6550 REGNO is the FP hard register the vector will be passed in. */
6553 function_arg_vector_value (int size, int regno)
6555 int i, nregs = size / 8;
6558 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6560 for (i = 0; i < nregs; i++)
6562 XVECEXP (regs, 0, i)
6563 = gen_rtx_EXPR_LIST (VOIDmode,
6564 gen_rtx_REG (DImode, regno + 2*i),
6571 /* Determine where to put an argument to a function.
6572 Value is zero to push the argument on the stack,
6573 or a hard register in which to store the argument.
6575 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6576 the preceding args and about the function being called.
6577 MODE is the argument's machine mode.
6578 TYPE is the data type of the argument (as a tree).
6579 This is null for libcalls where that information may
6581 NAMED is true if this argument is a named parameter
6582 (otherwise it is an extra parameter matching an ellipsis).
6583 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6584 TARGET_FUNCTION_INCOMING_ARG. */
6587 sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
6588 const_tree type, bool named, bool incoming_p)
6590 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6592 int regbase = (incoming_p
6593 ? SPARC_INCOMING_INT_ARG_FIRST
6594 : SPARC_OUTGOING_INT_ARG_FIRST);
6595 int slotno, regno, padding;
6596 enum mode_class mclass = GET_MODE_CLASS (mode);
6598 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6603 /* Vector types deserve special treatment because they are polymorphic wrt
6604 their mode, depending upon whether VIS instructions are enabled. */
6605 if (type && TREE_CODE (type) == VECTOR_TYPE)
6607 HOST_WIDE_INT size = int_size_in_bytes (type);
6608 gcc_assert ((TARGET_ARCH32 && size <= 8)
6609 || (TARGET_ARCH64 && size <= 16));
6611 if (mode == BLKmode)
6612 return function_arg_vector_value (size,
6613 SPARC_FP_ARG_FIRST + 2*slotno);
6615 mclass = MODE_FLOAT;
6619 return gen_rtx_REG (mode, regno);
6621 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6622 and are promoted to registers if possible. */
6623 if (type && TREE_CODE (type) == RECORD_TYPE)
6625 HOST_WIDE_INT size = int_size_in_bytes (type);
6626 gcc_assert (size <= 16);
6628 return function_arg_record_value (type, mode, slotno, named, regbase);
6631 /* Unions up to 16 bytes in size are passed in integer registers. */
6632 else if (type && TREE_CODE (type) == UNION_TYPE)
6634 HOST_WIDE_INT size = int_size_in_bytes (type);
6635 gcc_assert (size <= 16);
6637 return function_arg_union_value (size, mode, slotno, regno);
6640 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6641 but also have the slot allocated for them.
6642 If no prototype is in scope fp values in register slots get passed
6643 in two places, either fp regs and int regs or fp regs and memory. */
6644 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6645 && SPARC_FP_REG_P (regno))
6647 rtx reg = gen_rtx_REG (mode, regno);
6648 if (cum->prototype_p || cum->libcall_p)
6650 /* "* 2" because fp reg numbers are recorded in 4 byte
6653 /* ??? This will cause the value to be passed in the fp reg and
6654 in the stack. When a prototype exists we want to pass the
6655 value in the reg but reserve space on the stack. That's an
6656 optimization, and is deferred [for a bit]. */
6657 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6658 return gen_rtx_PARALLEL (mode,
6660 gen_rtx_EXPR_LIST (VOIDmode,
6661 NULL_RTX, const0_rtx),
6662 gen_rtx_EXPR_LIST (VOIDmode,
6666 /* ??? It seems that passing back a register even when past
6667 the area declared by REG_PARM_STACK_SPACE will allocate
6668 space appropriately, and will not copy the data onto the
6669 stack, exactly as we desire.
6671 This is due to locate_and_pad_parm being called in
6672 expand_call whenever reg_parm_stack_space > 0, which
6673 while beneficial to our example here, would seem to be
6674 in error from what had been intended. Ho hum... -- r~ */
6682 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6686 /* On incoming, we don't need to know that the value
6687 is passed in %f0 and %i0, and it confuses other parts
6688 causing needless spillage even on the simplest cases. */
6692 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6693 + (regno - SPARC_FP_ARG_FIRST) / 2);
6695 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6696 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6698 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6702 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6703 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6704 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6709 /* All other aggregate types are passed in an integer register in a mode
6710 corresponding to the size of the type. */
6711 else if (type && AGGREGATE_TYPE_P (type))
6713 HOST_WIDE_INT size = int_size_in_bytes (type);
6714 gcc_assert (size <= 16);
6716 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6719 return gen_rtx_REG (mode, regno);
6722 /* Handle the TARGET_FUNCTION_ARG target hook. */
6725 sparc_function_arg (cumulative_args_t cum, enum machine_mode mode,
6726 const_tree type, bool named)
6728 return sparc_function_arg_1 (cum, mode, type, named, false);
6731 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6734 sparc_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
6735 const_tree type, bool named)
6737 return sparc_function_arg_1 (cum, mode, type, named, true);
6740 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6743 sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
6745 return ((TARGET_ARCH64
6746 && (GET_MODE_ALIGNMENT (mode) == 128
6747 || (type && TYPE_ALIGN (type) == 128)))
6752 /* For an arg passed partly in registers and partly in memory,
6753 this is the number of bytes of registers used.
6754 For args passed entirely in registers or entirely in memory, zero.
6756 Any arg that starts in the first 6 regs but won't entirely fit in them
6757 needs partial registers on v8. On v9, structures with integer
6758 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
6759 values that begin in the last fp reg [where "last fp reg" varies with the
6760 mode] will be split between that reg and memory. */
6763 sparc_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
6764 tree type, bool named)
6766 int slotno, regno, padding;
6768 /* We pass false for incoming_p here, it doesn't matter. */
6769 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
6770 false, ®no, &padding);
6777 if ((slotno + (mode == BLKmode
6778 ? ROUND_ADVANCE (int_size_in_bytes (type))
6779 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
6780 > SPARC_INT_ARG_MAX)
6781 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
6785 /* We are guaranteed by pass_by_reference that the size of the
6786 argument is not greater than 16 bytes, so we only need to return
6787 one word if the argument is partially passed in registers. */
6789 if (type && AGGREGATE_TYPE_P (type))
6791 int size = int_size_in_bytes (type);
6793 if (size > UNITS_PER_WORD
6794 && slotno == SPARC_INT_ARG_MAX - 1)
6795 return UNITS_PER_WORD;
6797 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
6798 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6799 && ! (TARGET_FPU && named)))
6801 /* The complex types are passed as packed types. */
6802 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6803 && slotno == SPARC_INT_ARG_MAX - 1)
6804 return UNITS_PER_WORD;
6806 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6808 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
6810 return UNITS_PER_WORD;
6817 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6818 Specify whether to pass the argument by reference. */
6821 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6822 enum machine_mode mode, const_tree type,
6823 bool named ATTRIBUTE_UNUSED)
6826 /* Original SPARC 32-bit ABI says that structures and unions,
6827 and quad-precision floats are passed by reference. For Pascal,
6828 also pass arrays by reference. All other base types are passed
6831 Extended ABI (as implemented by the Sun compiler) says that all
6832 complex floats are passed by reference. Pass complex integers
6833 in registers up to 8 bytes. More generally, enforce the 2-word
6834 cap for passing arguments in registers.
6836 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6837 integers are passed like floats of the same size, that is in
6838 registers up to 8 bytes. Pass all vector floats by reference
6839 like structure and unions. */
6840 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6842 /* Catch CDImode, TFmode, DCmode and TCmode. */
6843 || GET_MODE_SIZE (mode) > 8
6845 && TREE_CODE (type) == VECTOR_TYPE
6846 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6848 /* Original SPARC 64-bit ABI says that structures and unions
6849 smaller than 16 bytes are passed in registers, as well as
6850 all other base types.
6852 Extended ABI (as implemented by the Sun compiler) says that
6853 complex floats are passed in registers up to 16 bytes. Pass
6854 all complex integers in registers up to 16 bytes. More generally,
6855 enforce the 2-word cap for passing arguments in registers.
6857 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6858 integers are passed like floats of the same size, that is in
6859 registers (up to 16 bytes). Pass all vector floats like structure
6862 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
6863 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6864 /* Catch CTImode and TCmode. */
6865 || GET_MODE_SIZE (mode) > 16);
6868 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
6869 Update the data in CUM to advance over an argument
6870 of mode MODE and data type TYPE.
6871 TYPE is null for libcalls where that information may not be available. */
6874 sparc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6875 const_tree type, bool named)
6877 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6880 /* We pass false for incoming_p here, it doesn't matter. */
6881 function_arg_slotno (cum, mode, type, named, false, ®no, &padding);
6883 /* If argument requires leading padding, add it. */
6884 cum->words += padding;
6888 cum->words += (mode != BLKmode
6889 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6890 : ROUND_ADVANCE (int_size_in_bytes (type)));
6894 if (type && AGGREGATE_TYPE_P (type))
6896 int size = int_size_in_bytes (type);
6900 else if (size <= 16)
6902 else /* passed by reference */
6907 cum->words += (mode != BLKmode
6908 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6909 : ROUND_ADVANCE (int_size_in_bytes (type)));
6914 /* Handle the FUNCTION_ARG_PADDING macro.
6915 For the 64 bit ABI structs are always stored left shifted in their
6919 function_arg_padding (enum machine_mode mode, const_tree type)
6921 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
6924 /* Fall back to the default. */
6925 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
6928 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
6929 Specify whether to return the return value in memory. */
6932 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6935 /* Original SPARC 32-bit ABI says that structures and unions,
6936 and quad-precision floats are returned in memory. All other
6937 base types are returned in registers.
6939 Extended ABI (as implemented by the Sun compiler) says that
6940 all complex floats are returned in registers (8 FP registers
6941 at most for '_Complex long double'). Return all complex integers
6942 in registers (4 at most for '_Complex long long').
6944 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6945 integers are returned like floats of the same size, that is in
6946 registers up to 8 bytes and in memory otherwise. Return all
6947 vector floats in memory like structure and unions; note that
6948 they always have BLKmode like the latter. */
6949 return (TYPE_MODE (type) == BLKmode
6950 || TYPE_MODE (type) == TFmode
6951 || (TREE_CODE (type) == VECTOR_TYPE
6952 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6954 /* Original SPARC 64-bit ABI says that structures and unions
6955 smaller than 32 bytes are returned in registers, as well as
6956 all other base types.
6958 Extended ABI (as implemented by the Sun compiler) says that all
6959 complex floats are returned in registers (8 FP registers at most
6960 for '_Complex long double'). Return all complex integers in
6961 registers (4 at most for '_Complex TItype').
6963 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6964 integers are returned like floats of the same size, that is in
6965 registers. Return all vector floats like structure and unions;
6966 note that they always have BLKmode like the latter. */
6967 return (TYPE_MODE (type) == BLKmode
6968 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
6971 /* Handle the TARGET_STRUCT_VALUE target hook.
6972 Return where to find the structure return value address. */
6975 sparc_struct_value_rtx (tree fndecl, int incoming)
6984 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
6985 STRUCT_VALUE_OFFSET));
6987 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
6988 STRUCT_VALUE_OFFSET));
6990 /* Only follow the SPARC ABI for fixed-size structure returns.
6991 Variable size structure returns are handled per the normal
6992 procedures in GCC. This is enabled by -mstd-struct-return */
6994 && sparc_std_struct_return
6995 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
6996 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
6998 /* We must check and adjust the return address, as it is
6999 optional as to whether the return object is really
7001 rtx ret_reg = gen_rtx_REG (Pmode, 31);
7002 rtx scratch = gen_reg_rtx (SImode);
7003 rtx endlab = gen_label_rtx ();
7005 /* Calculate the return object size */
7006 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7007 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7008 /* Construct a temporary return value */
7010 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7012 /* Implement SPARC 32-bit psABI callee return struct checking:
7014 Fetch the instruction where we will return to and see if
7015 it's an unimp instruction (the most significant 10 bits
7017 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7018 plus_constant (Pmode,
7020 /* Assume the size is valid and pre-adjust */
7021 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7022 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7024 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7025 /* Write the address of the memory pointed to by temp_val into
7026 the memory pointed to by mem */
7027 emit_move_insn (mem, XEXP (temp_val, 0));
7028 emit_label (endlab);
7035 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7036 For v9, function return values are subject to the same rules as arguments,
7037 except that up to 32 bytes may be returned in registers. */
7040 sparc_function_value_1 (const_tree type, enum machine_mode mode,
7043 /* Beware that the two values are swapped here wrt function_arg. */
7044 int regbase = (outgoing
7045 ? SPARC_INCOMING_INT_ARG_FIRST
7046 : SPARC_OUTGOING_INT_ARG_FIRST);
7047 enum mode_class mclass = GET_MODE_CLASS (mode);
7050 /* Vector types deserve special treatment because they are polymorphic wrt
7051 their mode, depending upon whether VIS instructions are enabled. */
7052 if (type && TREE_CODE (type) == VECTOR_TYPE)
7054 HOST_WIDE_INT size = int_size_in_bytes (type);
7055 gcc_assert ((TARGET_ARCH32 && size <= 8)
7056 || (TARGET_ARCH64 && size <= 32));
7058 if (mode == BLKmode)
7059 return function_arg_vector_value (size,
7060 SPARC_FP_ARG_FIRST);
7062 mclass = MODE_FLOAT;
7065 if (TARGET_ARCH64 && type)
7067 /* Structures up to 32 bytes in size are returned in registers. */
7068 if (TREE_CODE (type) == RECORD_TYPE)
7070 HOST_WIDE_INT size = int_size_in_bytes (type);
7071 gcc_assert (size <= 32);
7073 return function_arg_record_value (type, mode, 0, 1, regbase);
7076 /* Unions up to 32 bytes in size are returned in integer registers. */
7077 else if (TREE_CODE (type) == UNION_TYPE)
7079 HOST_WIDE_INT size = int_size_in_bytes (type);
7080 gcc_assert (size <= 32);
7082 return function_arg_union_value (size, mode, 0, regbase);
7085 /* Objects that require it are returned in FP registers. */
7086 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7089 /* All other aggregate types are returned in an integer register in a
7090 mode corresponding to the size of the type. */
7091 else if (AGGREGATE_TYPE_P (type))
7093 /* All other aggregate types are passed in an integer register
7094 in a mode corresponding to the size of the type. */
7095 HOST_WIDE_INT size = int_size_in_bytes (type);
7096 gcc_assert (size <= 32);
7098 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7100 /* ??? We probably should have made the same ABI change in
7101 3.4.0 as the one we made for unions. The latter was
7102 required by the SCD though, while the former is not
7103 specified, so we favored compatibility and efficiency.
7105 Now we're stuck for aggregates larger than 16 bytes,
7106 because OImode vanished in the meantime. Let's not
7107 try to be unduly clever, and simply follow the ABI
7108 for unions in that case. */
7109 if (mode == BLKmode)
7110 return function_arg_union_value (size, mode, 0, regbase);
7115 /* We should only have pointer and integer types at this point. This
7116 must match sparc_promote_function_mode. */
7117 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7121 /* We should only have pointer and integer types at this point. This must
7122 match sparc_promote_function_mode. */
7123 else if (TARGET_ARCH32
7124 && mclass == MODE_INT
7125 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7128 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7129 regno = SPARC_FP_ARG_FIRST;
7133 return gen_rtx_REG (mode, regno);
7136 /* Handle TARGET_FUNCTION_VALUE.
7137 On the SPARC, the value is found in the first "output" register, but the
7138 called function leaves it in the first "input" register. */
7141 sparc_function_value (const_tree valtype,
7142 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7145 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7148 /* Handle TARGET_LIBCALL_VALUE. */
7151 sparc_libcall_value (enum machine_mode mode,
7152 const_rtx fun ATTRIBUTE_UNUSED)
7154 return sparc_function_value_1 (NULL_TREE, mode, false);
7157 /* Handle FUNCTION_VALUE_REGNO_P.
7158 On the SPARC, the first "output" reg is used for integer values, and the
7159 first floating point register is used for floating point values. */
7162 sparc_function_value_regno_p (const unsigned int regno)
7164 return (regno == 8 || regno == 32);
7167 /* Do what is necessary for `va_start'. We look at the current function
7168 to determine if stdarg or varargs is used and return the address of
7169 the first unnamed parameter. */
7172 sparc_builtin_saveregs (void)
7174 int first_reg = crtl->args.info.words;
7178 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7179 emit_move_insn (gen_rtx_MEM (word_mode,
7180 gen_rtx_PLUS (Pmode,
7182 GEN_INT (FIRST_PARM_OFFSET (0)
7185 gen_rtx_REG (word_mode,
7186 SPARC_INCOMING_INT_ARG_FIRST + regno));
7188 address = gen_rtx_PLUS (Pmode,
7190 GEN_INT (FIRST_PARM_OFFSET (0)
7191 + UNITS_PER_WORD * first_reg));
7196 /* Implement `va_start' for stdarg. */
7199 sparc_va_start (tree valist, rtx nextarg)
7201 nextarg = expand_builtin_saveregs ();
7202 std_expand_builtin_va_start (valist, nextarg);
7205 /* Implement `va_arg' for stdarg. */
7208 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7211 HOST_WIDE_INT size, rsize, align;
7214 tree ptrtype = build_pointer_type (type);
7216 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7219 size = rsize = UNITS_PER_WORD;
7225 size = int_size_in_bytes (type);
7226 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7231 /* For SPARC64, objects requiring 16-byte alignment get it. */
7232 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7233 align = 2 * UNITS_PER_WORD;
7235 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7236 are left-justified in their slots. */
7237 if (AGGREGATE_TYPE_P (type))
7240 size = rsize = UNITS_PER_WORD;
7250 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7251 incr = fold_convert (sizetype, incr);
7252 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7254 incr = fold_convert (ptr_type_node, incr);
7257 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7260 if (BYTES_BIG_ENDIAN && size < rsize)
7261 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7265 addr = fold_convert (build_pointer_type (ptrtype), addr);
7266 addr = build_va_arg_indirect_ref (addr);
7269 /* If the address isn't aligned properly for the type, we need a temporary.
7270 FIXME: This is inefficient, usually we can do this in registers. */
7271 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7273 tree tmp = create_tmp_var (type, "va_arg_tmp");
7274 tree dest_addr = build_fold_addr_expr (tmp);
7275 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7276 3, dest_addr, addr, size_int (rsize));
7277 TREE_ADDRESSABLE (tmp) = 1;
7278 gimplify_and_add (copy, pre_p);
7283 addr = fold_convert (ptrtype, addr);
7285 incr = fold_build_pointer_plus_hwi (incr, rsize);
7286 gimplify_assign (valist, incr, post_p);
7288 return build_va_arg_indirect_ref (addr);
7291 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7292 Specify whether the vector mode is supported by the hardware. */
7295 sparc_vector_mode_supported_p (enum machine_mode mode)
7297 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7300 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7302 static enum machine_mode
7303 sparc_preferred_simd_mode (enum machine_mode mode)
7321 /* Return the string to output an unconditional branch to LABEL, which is
7322 the operand number of the label.
7324 DEST is the destination insn (i.e. the label), INSN is the source. */
7327 output_ubranch (rtx dest, rtx insn)
7329 static char string[64];
7330 bool v9_form = false;
7334 /* Even if we are trying to use cbcond for this, evaluate
7335 whether we can use V9 branches as our backup plan. */
7338 if (INSN_ADDRESSES_SET_P ())
7339 delta = (INSN_ADDRESSES (INSN_UID (dest))
7340 - INSN_ADDRESSES (INSN_UID (insn)));
7342 /* Leave some instructions for "slop". */
7343 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7348 bool emit_nop = emit_cbcond_nop (insn);
7352 if (delta < -500 || delta > 500)
7358 rval = "ba,a,pt\t%%xcc, %l0";
7365 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7367 rval = "cwbe\t%%g0, %%g0, %l0";
7373 strcpy (string, "ba%*,pt\t%%xcc, ");
7375 strcpy (string, "b%*\t");
7377 p = strchr (string, '\0');
7388 /* Return the string to output a conditional branch to LABEL, which is
7389 the operand number of the label. OP is the conditional expression.
7390 XEXP (OP, 0) is assumed to be a condition code register (integer or
7391 floating point) and its mode specifies what kind of comparison we made.
7393 DEST is the destination insn (i.e. the label), INSN is the source.
7395 REVERSED is nonzero if we should reverse the sense of the comparison.
7397 ANNUL is nonzero if we should generate an annulling branch. */
7400 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7403 static char string[64];
7404 enum rtx_code code = GET_CODE (op);
7405 rtx cc_reg = XEXP (op, 0);
7406 enum machine_mode mode = GET_MODE (cc_reg);
7407 const char *labelno, *branch;
7408 int spaces = 8, far;
7411 /* v9 branches are limited to +-1MB. If it is too far away,
7424 fbne,a,pn %fcc2, .LC29
7432 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7435 /* Reversal of FP compares takes care -- an ordered compare
7436 becomes an unordered compare and vice versa. */
7437 if (mode == CCFPmode || mode == CCFPEmode)
7438 code = reverse_condition_maybe_unordered (code);
7440 code = reverse_condition (code);
7443 /* Start by writing the branch condition. */
7444 if (mode == CCFPmode || mode == CCFPEmode)
7495 /* ??? !v9: FP branches cannot be preceded by another floating point
7496 insn. Because there is currently no concept of pre-delay slots,
7497 we can fix this only by always emitting a nop before a floating
7502 strcpy (string, "nop\n\t");
7503 strcat (string, branch);
7516 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7528 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7549 strcpy (string, branch);
7551 spaces -= strlen (branch);
7552 p = strchr (string, '\0');
7554 /* Now add the annulling, the label, and a possible noop. */
7567 if (! far && insn && INSN_ADDRESSES_SET_P ())
7569 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7570 - INSN_ADDRESSES (INSN_UID (insn)));
7571 /* Leave some instructions for "slop". */
7572 if (delta < -260000 || delta >= 260000)
7576 if (mode == CCFPmode || mode == CCFPEmode)
7578 static char v9_fcc_labelno[] = "%%fccX, ";
7579 /* Set the char indicating the number of the fcc reg to use. */
7580 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7581 labelno = v9_fcc_labelno;
7584 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7588 else if (mode == CCXmode || mode == CCX_NOOVmode)
7590 labelno = "%%xcc, ";
7595 labelno = "%%icc, ";
7600 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7603 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
7616 strcpy (p, labelno);
7617 p = strchr (p, '\0');
7620 strcpy (p, ".+12\n\t nop\n\tb\t");
7621 /* Skip the next insn if requested or
7622 if we know that it will be a nop. */
7623 if (annul || ! final_sequence)
7637 /* Emit a library call comparison between floating point X and Y.
7638 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7639 Return the new operator to be used in the comparison sequence.
7641 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7642 values as arguments instead of the TFmode registers themselves,
7643 that's why we cannot call emit_float_lib_cmp. */
7646 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7649 rtx slot0, slot1, result, tem, tem2, libfunc;
7650 enum machine_mode mode;
7651 enum rtx_code new_comparison;
7656 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7660 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7664 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7668 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7672 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7676 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7687 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7698 tree expr = MEM_EXPR (x);
7700 mark_addressable (expr);
7705 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7706 emit_move_insn (slot0, x);
7711 tree expr = MEM_EXPR (y);
7713 mark_addressable (expr);
7718 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7719 emit_move_insn (slot1, y);
7722 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7723 emit_library_call (libfunc, LCT_NORMAL,
7725 XEXP (slot0, 0), Pmode,
7726 XEXP (slot1, 0), Pmode);
7731 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7732 emit_library_call (libfunc, LCT_NORMAL,
7734 x, TFmode, y, TFmode);
7739 /* Immediately move the result of the libcall into a pseudo
7740 register so reload doesn't clobber the value if it needs
7741 the return register for a spill reg. */
7742 result = gen_reg_rtx (mode);
7743 emit_move_insn (result, hard_libcall_value (mode, libfunc));
7748 return gen_rtx_NE (VOIDmode, result, const0_rtx);
7751 new_comparison = (comparison == UNORDERED ? EQ : NE);
7752 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7755 new_comparison = (comparison == UNGT ? GT : NE);
7756 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7758 return gen_rtx_NE (VOIDmode, result, const2_rtx);
7760 tem = gen_reg_rtx (mode);
7762 emit_insn (gen_andsi3 (tem, result, const1_rtx));
7764 emit_insn (gen_anddi3 (tem, result, const1_rtx));
7765 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
7768 tem = gen_reg_rtx (mode);
7770 emit_insn (gen_addsi3 (tem, result, const1_rtx));
7772 emit_insn (gen_adddi3 (tem, result, const1_rtx));
7773 tem2 = gen_reg_rtx (mode);
7775 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
7777 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
7778 new_comparison = (comparison == UNEQ ? EQ : NE);
7779 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
7785 /* Generate an unsigned DImode to FP conversion. This is the same code
7786 optabs would emit if we didn't have TFmode patterns. */
7789 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
7791 rtx neglab, donelab, i0, i1, f0, in, out;
7794 in = force_reg (DImode, operands[1]);
7795 neglab = gen_label_rtx ();
7796 donelab = gen_label_rtx ();
7797 i0 = gen_reg_rtx (DImode);
7798 i1 = gen_reg_rtx (DImode);
7799 f0 = gen_reg_rtx (mode);
7801 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
7803 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
7804 emit_jump_insn (gen_jump (donelab));
7807 emit_label (neglab);
7809 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
7810 emit_insn (gen_anddi3 (i1, in, const1_rtx));
7811 emit_insn (gen_iordi3 (i0, i0, i1));
7812 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
7813 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
7815 emit_label (donelab);
7818 /* Generate an FP to unsigned DImode conversion. This is the same code
7819 optabs would emit if we didn't have TFmode patterns. */
7822 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
7824 rtx neglab, donelab, i0, i1, f0, in, out, limit;
7827 in = force_reg (mode, operands[1]);
7828 neglab = gen_label_rtx ();
7829 donelab = gen_label_rtx ();
7830 i0 = gen_reg_rtx (DImode);
7831 i1 = gen_reg_rtx (DImode);
7832 limit = gen_reg_rtx (mode);
7833 f0 = gen_reg_rtx (mode);
7835 emit_move_insn (limit,
7836 CONST_DOUBLE_FROM_REAL_VALUE (
7837 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
7838 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
7840 emit_insn (gen_rtx_SET (VOIDmode,
7842 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
7843 emit_jump_insn (gen_jump (donelab));
7846 emit_label (neglab);
7848 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
7849 emit_insn (gen_rtx_SET (VOIDmode,
7851 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
7852 emit_insn (gen_movdi (i1, const1_rtx));
7853 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
7854 emit_insn (gen_xordi3 (out, i0, i1));
7856 emit_label (donelab);
7859 /* Return the string to output a compare and branch instruction to DEST.
7860 DEST is the destination insn (i.e. the label), INSN is the source,
7861 and OP is the conditional expression. */
7864 output_cbcond (rtx op, rtx dest, rtx insn)
7866 enum machine_mode mode = GET_MODE (XEXP (op, 0));
7867 enum rtx_code code = GET_CODE (op);
7868 const char *cond_str, *tmpl;
7869 int far, emit_nop, len;
7870 static char string[64];
7873 /* Compare and Branch is limited to +-2KB. If it is too far away,
7885 len = get_attr_length (insn);
7888 emit_nop = len == 2;
7891 code = reverse_condition (code);
7893 size_char = ((mode == SImode) ? 'w' : 'x');
7906 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7921 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7949 int veryfar = 1, delta;
7951 if (INSN_ADDRESSES_SET_P ())
7953 delta = (INSN_ADDRESSES (INSN_UID (dest))
7954 - INSN_ADDRESSES (INSN_UID (insn)));
7955 /* Leave some instructions for "slop". */
7956 if (delta >= -260000 && delta < 260000)
7961 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
7963 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
7968 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
7970 tmpl = "c%cb%s\t%%1, %%2, %%3";
7973 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
7978 /* Return the string to output a conditional branch to LABEL, testing
7979 register REG. LABEL is the operand number of the label; REG is the
7980 operand number of the reg. OP is the conditional expression. The mode
7981 of REG says what kind of comparison we made.
7983 DEST is the destination insn (i.e. the label), INSN is the source.
7985 REVERSED is nonzero if we should reverse the sense of the comparison.
7987 ANNUL is nonzero if we should generate an annulling branch. */
7990 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
7991 int annul, rtx insn)
7993 static char string[64];
7994 enum rtx_code code = GET_CODE (op);
7995 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8000 /* branch on register are limited to +-128KB. If it is too far away,
8013 brgez,a,pn %o1, .LC29
8019 ba,pt %xcc, .LC29 */
8021 far = get_attr_length (insn) >= 3;
8023 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8025 code = reverse_condition (code);
8027 /* Only 64 bit versions of these instructions exist. */
8028 gcc_assert (mode == DImode);
8030 /* Start by writing the branch condition. */
8035 strcpy (string, "brnz");
8039 strcpy (string, "brz");
8043 strcpy (string, "brgez");
8047 strcpy (string, "brlz");
8051 strcpy (string, "brlez");
8055 strcpy (string, "brgz");
8062 p = strchr (string, '\0');
8064 /* Now add the annulling, reg, label, and nop. */
8071 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8074 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
8079 *p = p < string + 8 ? '\t' : ' ';
8087 int veryfar = 1, delta;
8089 if (INSN_ADDRESSES_SET_P ())
8091 delta = (INSN_ADDRESSES (INSN_UID (dest))
8092 - INSN_ADDRESSES (INSN_UID (insn)));
8093 /* Leave some instructions for "slop". */
8094 if (delta >= -260000 && delta < 260000)
8098 strcpy (p, ".+12\n\t nop\n\t");
8099 /* Skip the next insn if requested or
8100 if we know that it will be a nop. */
8101 if (annul || ! final_sequence)
8111 strcpy (p, "ba,pt\t%%xcc, ");
8125 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8126 Such instructions cannot be used in the delay slot of return insn on v9.
8127 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8131 epilogue_renumber (register rtx *where, int test)
8133 register const char *fmt;
8135 register enum rtx_code code;
8140 code = GET_CODE (*where);
8145 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8147 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8148 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8156 /* Do not replace the frame pointer with the stack pointer because
8157 it can cause the delayed instruction to load below the stack.
8158 This occurs when instructions like:
8160 (set (reg/i:SI 24 %i0)
8161 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8162 (const_int -20 [0xffffffec])) 0))
8164 are in the return delayed slot. */
8166 if (GET_CODE (XEXP (*where, 0)) == REG
8167 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8168 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8169 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8174 if (SPARC_STACK_BIAS
8175 && GET_CODE (XEXP (*where, 0)) == REG
8176 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8184 fmt = GET_RTX_FORMAT (code);
8186 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8191 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8192 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8195 else if (fmt[i] == 'e'
8196 && epilogue_renumber (&(XEXP (*where, i)), test))
8202 /* Leaf functions and non-leaf functions have different needs. */
8205 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8208 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8210 static const int *const reg_alloc_orders[] = {
8211 reg_leaf_alloc_order,
8212 reg_nonleaf_alloc_order};
8215 order_regs_for_local_alloc (void)
8217 static int last_order_nonleaf = 1;
8219 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8221 last_order_nonleaf = !last_order_nonleaf;
8222 memcpy ((char *) reg_alloc_order,
8223 (const char *) reg_alloc_orders[last_order_nonleaf],
8224 FIRST_PSEUDO_REGISTER * sizeof (int));
8228 /* Return 1 if REG and MEM are legitimate enough to allow the various
8229 mem<-->reg splits to be run. */
8232 sparc_splitdi_legitimate (rtx reg, rtx mem)
8234 /* Punt if we are here by mistake. */
8235 gcc_assert (reload_completed);
8237 /* We must have an offsettable memory reference. */
8238 if (! offsettable_memref_p (mem))
8241 /* If we have legitimate args for ldd/std, we do not want
8242 the split to happen. */
8243 if ((REGNO (reg) % 2) == 0
8244 && mem_min_alignment (mem, 8))
8251 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8254 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8258 if (GET_CODE (reg1) == SUBREG)
8259 reg1 = SUBREG_REG (reg1);
8260 if (GET_CODE (reg1) != REG)
8262 regno1 = REGNO (reg1);
8264 if (GET_CODE (reg2) == SUBREG)
8265 reg2 = SUBREG_REG (reg2);
8266 if (GET_CODE (reg2) != REG)
8268 regno2 = REGNO (reg2);
8270 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8275 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8276 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8283 /* Return 1 if x and y are some kind of REG and they refer to
8284 different hard registers. This test is guaranteed to be
8285 run after reload. */
8288 sparc_absnegfloat_split_legitimate (rtx x, rtx y)
8290 if (GET_CODE (x) != REG)
8292 if (GET_CODE (y) != REG)
8294 if (REGNO (x) == REGNO (y))
8299 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8300 This makes them candidates for using ldd and std insns.
8302 Note reg1 and reg2 *must* be hard registers. */
8305 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8307 /* We might have been passed a SUBREG. */
8308 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8311 if (REGNO (reg1) % 2 != 0)
8314 /* Integer ldd is deprecated in SPARC V9 */
8315 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8318 return (REGNO (reg1) == REGNO (reg2) - 1);
8321 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8324 This can only happen when addr1 and addr2, the addresses in mem1
8325 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8326 addr1 must also be aligned on a 64-bit boundary.
8328 Also iff dependent_reg_rtx is not null it should not be used to
8329 compute the address for mem1, i.e. we cannot optimize a sequence
8341 But, note that the transformation from:
8346 is perfectly fine. Thus, the peephole2 patterns always pass us
8347 the destination register of the first load, never the second one.
8349 For stores we don't have a similar problem, so dependent_reg_rtx is
8353 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8357 HOST_WIDE_INT offset1;
8359 /* The mems cannot be volatile. */
8360 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8363 /* MEM1 should be aligned on a 64-bit boundary. */
8364 if (MEM_ALIGN (mem1) < 64)
8367 addr1 = XEXP (mem1, 0);
8368 addr2 = XEXP (mem2, 0);
8370 /* Extract a register number and offset (if used) from the first addr. */
8371 if (GET_CODE (addr1) == PLUS)
8373 /* If not a REG, return zero. */
8374 if (GET_CODE (XEXP (addr1, 0)) != REG)
8378 reg1 = REGNO (XEXP (addr1, 0));
8379 /* The offset must be constant! */
8380 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8382 offset1 = INTVAL (XEXP (addr1, 1));
8385 else if (GET_CODE (addr1) != REG)
8389 reg1 = REGNO (addr1);
8390 /* This was a simple (mem (reg)) expression. Offset is 0. */
8394 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8395 if (GET_CODE (addr2) != PLUS)
8398 if (GET_CODE (XEXP (addr2, 0)) != REG
8399 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8402 if (reg1 != REGNO (XEXP (addr2, 0)))
8405 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8408 /* The first offset must be evenly divisible by 8 to ensure the
8409 address is 64 bit aligned. */
8410 if (offset1 % 8 != 0)
8413 /* The offset for the second addr must be 4 more than the first addr. */
8414 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8417 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8422 /* Return 1 if reg is a pseudo, or is the first register in
8423 a hard register pair. This makes it suitable for use in
8424 ldd and std insns. */
8427 register_ok_for_ldd (rtx reg)
8429 /* We might have been passed a SUBREG. */
8433 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8434 return (REGNO (reg) % 2 == 0);
8439 /* Return 1 if OP, a MEM, has an address which is known to be
8440 aligned to an 8-byte boundary. */
8443 memory_ok_for_ldd (rtx op)
8445 /* In 64-bit mode, we assume that the address is word-aligned. */
8446 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8449 if (! can_create_pseudo_p ()
8450 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8456 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8459 sparc_print_operand_punct_valid_p (unsigned char code)
8472 /* Implement TARGET_PRINT_OPERAND.
8473 Print operand X (an rtx) in assembler syntax to file FILE.
8474 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8475 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8478 sparc_print_operand (FILE *file, rtx x, int code)
8483 /* Output an insn in a delay slot. */
8485 sparc_indent_opcode = 1;
8487 fputs ("\n\t nop", file);
8490 /* Output an annul flag if there's nothing for the delay slot and we
8491 are optimizing. This is always used with '(' below.
8492 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8493 this is a dbx bug. So, we only do this when optimizing.
8494 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8495 Always emit a nop in case the next instruction is a branch. */
8496 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8500 /* Output a 'nop' if there's nothing for the delay slot and we are
8501 not optimizing. This is always used with '*' above. */
8502 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8503 fputs ("\n\t nop", file);
8504 else if (final_sequence)
8505 sparc_indent_opcode = 1;
8508 /* Output the right displacement from the saved PC on function return.
8509 The caller may have placed an "unimp" insn immediately after the call
8510 so we have to account for it. This insn is used in the 32-bit ABI
8511 when calling a function that returns a non zero-sized structure. The
8512 64-bit ABI doesn't have it. Be careful to have this test be the same
8513 as that for the call. The exception is when sparc_std_struct_return
8514 is enabled, the psABI is followed exactly and the adjustment is made
8515 by the code in sparc_struct_value_rtx. The call emitted is the same
8516 when sparc_std_struct_return is enabled. */
8518 && cfun->returns_struct
8519 && !sparc_std_struct_return
8520 && DECL_SIZE (DECL_RESULT (current_function_decl))
8521 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8523 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8529 /* Output the Embedded Medium/Anywhere code model base register. */
8530 fputs (EMBMEDANY_BASE_REG, file);
8533 /* Print some local dynamic TLS name. */
8534 assemble_name (file, get_some_local_dynamic_name ());
8538 /* Adjust the operand to take into account a RESTORE operation. */
8539 if (GET_CODE (x) == CONST_INT)
8541 else if (GET_CODE (x) != REG)
8542 output_operand_lossage ("invalid %%Y operand");
8543 else if (REGNO (x) < 8)
8544 fputs (reg_names[REGNO (x)], file);
8545 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8546 fputs (reg_names[REGNO (x)-16], file);
8548 output_operand_lossage ("invalid %%Y operand");
8551 /* Print out the low order register name of a register pair. */
8552 if (WORDS_BIG_ENDIAN)
8553 fputs (reg_names[REGNO (x)+1], file);
8555 fputs (reg_names[REGNO (x)], file);
8558 /* Print out the high order register name of a register pair. */
8559 if (WORDS_BIG_ENDIAN)
8560 fputs (reg_names[REGNO (x)], file);
8562 fputs (reg_names[REGNO (x)+1], file);
8565 /* Print out the second register name of a register pair or quad.
8566 I.e., R (%o0) => %o1. */
8567 fputs (reg_names[REGNO (x)+1], file);
8570 /* Print out the third register name of a register quad.
8571 I.e., S (%o0) => %o2. */
8572 fputs (reg_names[REGNO (x)+2], file);
8575 /* Print out the fourth register name of a register quad.
8576 I.e., T (%o0) => %o3. */
8577 fputs (reg_names[REGNO (x)+3], file);
8580 /* Print a condition code register. */
8581 if (REGNO (x) == SPARC_ICC_REG)
8583 /* We don't handle CC[X]_NOOVmode because they're not supposed
8585 if (GET_MODE (x) == CCmode)
8586 fputs ("%icc", file);
8587 else if (GET_MODE (x) == CCXmode)
8588 fputs ("%xcc", file);
8593 /* %fccN register */
8594 fputs (reg_names[REGNO (x)], file);
8597 /* Print the operand's address only. */
8598 output_address (XEXP (x, 0));
8601 /* In this case we need a register. Use %g0 if the
8602 operand is const0_rtx. */
8604 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8606 fputs ("%g0", file);
8613 switch (GET_CODE (x))
8615 case IOR: fputs ("or", file); break;
8616 case AND: fputs ("and", file); break;
8617 case XOR: fputs ("xor", file); break;
8618 default: output_operand_lossage ("invalid %%A operand");
8623 switch (GET_CODE (x))
8625 case IOR: fputs ("orn", file); break;
8626 case AND: fputs ("andn", file); break;
8627 case XOR: fputs ("xnor", file); break;
8628 default: output_operand_lossage ("invalid %%B operand");
8632 /* This is used by the conditional move instructions. */
8635 enum rtx_code rc = GET_CODE (x);
8639 case NE: fputs ("ne", file); break;
8640 case EQ: fputs ("e", file); break;
8641 case GE: fputs ("ge", file); break;
8642 case GT: fputs ("g", file); break;
8643 case LE: fputs ("le", file); break;
8644 case LT: fputs ("l", file); break;
8645 case GEU: fputs ("geu", file); break;
8646 case GTU: fputs ("gu", file); break;
8647 case LEU: fputs ("leu", file); break;
8648 case LTU: fputs ("lu", file); break;
8649 case LTGT: fputs ("lg", file); break;
8650 case UNORDERED: fputs ("u", file); break;
8651 case ORDERED: fputs ("o", file); break;
8652 case UNLT: fputs ("ul", file); break;
8653 case UNLE: fputs ("ule", file); break;
8654 case UNGT: fputs ("ug", file); break;
8655 case UNGE: fputs ("uge", file); break;
8656 case UNEQ: fputs ("ue", file); break;
8657 default: output_operand_lossage ("invalid %%C operand");
8662 /* This are used by the movr instruction pattern. */
8665 enum rtx_code rc = GET_CODE (x);
8668 case NE: fputs ("ne", file); break;
8669 case EQ: fputs ("e", file); break;
8670 case GE: fputs ("gez", file); break;
8671 case LT: fputs ("lz", file); break;
8672 case LE: fputs ("lez", file); break;
8673 case GT: fputs ("gz", file); break;
8674 default: output_operand_lossage ("invalid %%D operand");
8681 /* Print a sign-extended character. */
8682 int i = trunc_int_for_mode (INTVAL (x), QImode);
8683 fprintf (file, "%d", i);
8688 /* Operand must be a MEM; write its address. */
8689 if (GET_CODE (x) != MEM)
8690 output_operand_lossage ("invalid %%f operand");
8691 output_address (XEXP (x, 0));
8696 /* Print a sign-extended 32-bit value. */
8698 if (GET_CODE(x) == CONST_INT)
8700 else if (GET_CODE(x) == CONST_DOUBLE)
8701 i = CONST_DOUBLE_LOW (x);
8704 output_operand_lossage ("invalid %%s operand");
8707 i = trunc_int_for_mode (i, SImode);
8708 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8713 /* Do nothing special. */
8717 /* Undocumented flag. */
8718 output_operand_lossage ("invalid operand output code");
8721 if (GET_CODE (x) == REG)
8722 fputs (reg_names[REGNO (x)], file);
8723 else if (GET_CODE (x) == MEM)
8726 /* Poor Sun assembler doesn't understand absolute addressing. */
8727 if (CONSTANT_P (XEXP (x, 0)))
8728 fputs ("%g0+", file);
8729 output_address (XEXP (x, 0));
8732 else if (GET_CODE (x) == HIGH)
8734 fputs ("%hi(", file);
8735 output_addr_const (file, XEXP (x, 0));
8738 else if (GET_CODE (x) == LO_SUM)
8740 sparc_print_operand (file, XEXP (x, 0), 0);
8741 if (TARGET_CM_MEDMID)
8742 fputs ("+%l44(", file);
8744 fputs ("+%lo(", file);
8745 output_addr_const (file, XEXP (x, 1));
8748 else if (GET_CODE (x) == CONST_DOUBLE
8749 && (GET_MODE (x) == VOIDmode
8750 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
8752 if (CONST_DOUBLE_HIGH (x) == 0)
8753 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
8754 else if (CONST_DOUBLE_HIGH (x) == -1
8755 && CONST_DOUBLE_LOW (x) < 0)
8756 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
8758 output_operand_lossage ("long long constant not a valid immediate operand");
8760 else if (GET_CODE (x) == CONST_DOUBLE)
8761 output_operand_lossage ("floating point constant not a valid immediate operand");
8762 else { output_addr_const (file, x); }
8765 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
8768 sparc_print_operand_address (FILE *file, rtx x)
8770 register rtx base, index = 0;
8772 register rtx addr = x;
8775 fputs (reg_names[REGNO (addr)], file);
8776 else if (GET_CODE (addr) == PLUS)
8778 if (CONST_INT_P (XEXP (addr, 0)))
8779 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
8780 else if (CONST_INT_P (XEXP (addr, 1)))
8781 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
8783 base = XEXP (addr, 0), index = XEXP (addr, 1);
8784 if (GET_CODE (base) == LO_SUM)
8786 gcc_assert (USE_AS_OFFSETABLE_LO10
8788 && ! TARGET_CM_MEDMID);
8789 output_operand (XEXP (base, 0), 0);
8790 fputs ("+%lo(", file);
8791 output_address (XEXP (base, 1));
8792 fprintf (file, ")+%d", offset);
8796 fputs (reg_names[REGNO (base)], file);
8798 fprintf (file, "%+d", offset);
8799 else if (REG_P (index))
8800 fprintf (file, "+%s", reg_names[REGNO (index)]);
8801 else if (GET_CODE (index) == SYMBOL_REF
8802 || GET_CODE (index) == LABEL_REF
8803 || GET_CODE (index) == CONST)
8804 fputc ('+', file), output_addr_const (file, index);
8805 else gcc_unreachable ();
8808 else if (GET_CODE (addr) == MINUS
8809 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
8811 output_addr_const (file, XEXP (addr, 0));
8813 output_addr_const (file, XEXP (addr, 1));
8814 fputs ("-.)", file);
8816 else if (GET_CODE (addr) == LO_SUM)
8818 output_operand (XEXP (addr, 0), 0);
8819 if (TARGET_CM_MEDMID)
8820 fputs ("+%l44(", file);
8822 fputs ("+%lo(", file);
8823 output_address (XEXP (addr, 1));
8827 && GET_CODE (addr) == CONST
8828 && GET_CODE (XEXP (addr, 0)) == MINUS
8829 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
8830 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
8831 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
8833 addr = XEXP (addr, 0);
8834 output_addr_const (file, XEXP (addr, 0));
8835 /* Group the args of the second CONST in parenthesis. */
8837 /* Skip past the second CONST--it does nothing for us. */
8838 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
8839 /* Close the parenthesis. */
8844 output_addr_const (file, addr);
8848 /* Target hook for assembling integer objects. The sparc version has
8849 special handling for aligned DI-mode objects. */
8852 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
8854 /* ??? We only output .xword's for symbols and only then in environments
8855 where the assembler can handle them. */
8856 if (aligned_p && size == 8
8857 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
8861 assemble_integer_with_op ("\t.xword\t", x);
8866 assemble_aligned_integer (4, const0_rtx);
8867 assemble_aligned_integer (4, x);
8871 return default_assemble_integer (x, size, aligned_p);
8874 /* Return the value of a code used in the .proc pseudo-op that says
8875 what kind of result this function returns. For non-C types, we pick
8876 the closest C type. */
8878 #ifndef SHORT_TYPE_SIZE
8879 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
8882 #ifndef INT_TYPE_SIZE
8883 #define INT_TYPE_SIZE BITS_PER_WORD
8886 #ifndef LONG_TYPE_SIZE
8887 #define LONG_TYPE_SIZE BITS_PER_WORD
8890 #ifndef LONG_LONG_TYPE_SIZE
8891 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
8894 #ifndef FLOAT_TYPE_SIZE
8895 #define FLOAT_TYPE_SIZE BITS_PER_WORD
8898 #ifndef DOUBLE_TYPE_SIZE
8899 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
8902 #ifndef LONG_DOUBLE_TYPE_SIZE
8903 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
8907 sparc_type_code (register tree type)
8909 register unsigned long qualifiers = 0;
8910 register unsigned shift;
8912 /* Only the first 30 bits of the qualifier are valid. We must refrain from
8913 setting more, since some assemblers will give an error for this. Also,
8914 we must be careful to avoid shifts of 32 bits or more to avoid getting
8915 unpredictable results. */
8917 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
8919 switch (TREE_CODE (type))
8925 qualifiers |= (3 << shift);
8930 qualifiers |= (2 << shift);
8934 case REFERENCE_TYPE:
8936 qualifiers |= (1 << shift);
8940 return (qualifiers | 8);
8943 case QUAL_UNION_TYPE:
8944 return (qualifiers | 9);
8947 return (qualifiers | 10);
8950 return (qualifiers | 16);
8953 /* If this is a range type, consider it to be the underlying
8955 if (TREE_TYPE (type) != 0)
8958 /* Carefully distinguish all the standard types of C,
8959 without messing up if the language is not C. We do this by
8960 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
8961 look at both the names and the above fields, but that's redundant.
8962 Any type whose size is between two C types will be considered
8963 to be the wider of the two types. Also, we do not have a
8964 special code to use for "long long", so anything wider than
8965 long is treated the same. Note that we can't distinguish
8966 between "int" and "long" in this code if they are the same
8967 size, but that's fine, since neither can the assembler. */
8969 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
8970 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
8972 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
8973 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
8975 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
8976 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
8979 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
8982 /* If this is a range type, consider it to be the underlying
8984 if (TREE_TYPE (type) != 0)
8987 /* Carefully distinguish all the standard types of C,
8988 without messing up if the language is not C. */
8990 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
8991 return (qualifiers | 6);
8994 return (qualifiers | 7);
8996 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
8997 /* ??? We need to distinguish between double and float complex types,
8998 but I don't know how yet because I can't reach this code from
8999 existing front-ends. */
9000 return (qualifiers | 7); /* Who knows? */
9003 case BOOLEAN_TYPE: /* Boolean truth value type. */
9009 gcc_unreachable (); /* Not a type! */
9016 /* Nested function support. */
9018 /* Emit RTL insns to initialize the variable parts of a trampoline.
9019 FNADDR is an RTX for the address of the function's pure code.
9020 CXT is an RTX for the static chain value for the function.
9022 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9023 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9024 (to store insns). This is a bit excessive. Perhaps a different
9025 mechanism would be better here.
9027 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9030 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9032 /* SPARC 32-bit trampoline:
9035 sethi %hi(static), %g2
9037 or %g2, %lo(static), %g2
9039 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9040 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9044 (adjust_address (m_tramp, SImode, 0),
9045 expand_binop (SImode, ior_optab,
9046 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9047 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9048 NULL_RTX, 1, OPTAB_DIRECT));
9051 (adjust_address (m_tramp, SImode, 4),
9052 expand_binop (SImode, ior_optab,
9053 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9054 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9055 NULL_RTX, 1, OPTAB_DIRECT));
9058 (adjust_address (m_tramp, SImode, 8),
9059 expand_binop (SImode, ior_optab,
9060 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9061 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9062 NULL_RTX, 1, OPTAB_DIRECT));
9065 (adjust_address (m_tramp, SImode, 12),
9066 expand_binop (SImode, ior_optab,
9067 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9068 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9069 NULL_RTX, 1, OPTAB_DIRECT));
9071 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9072 aligned on a 16 byte boundary so one flush clears it all. */
9073 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0))));
9074 if (sparc_cpu != PROCESSOR_ULTRASPARC
9075 && sparc_cpu != PROCESSOR_ULTRASPARC3
9076 && sparc_cpu != PROCESSOR_NIAGARA
9077 && sparc_cpu != PROCESSOR_NIAGARA2
9078 && sparc_cpu != PROCESSOR_NIAGARA3
9079 && sparc_cpu != PROCESSOR_NIAGARA4)
9080 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8))));
9082 /* Call __enable_execute_stack after writing onto the stack to make sure
9083 the stack address is accessible. */
9084 #ifdef HAVE_ENABLE_EXECUTE_STACK
9085 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9086 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9091 /* The 64-bit version is simpler because it makes more sense to load the
9092 values as "immediate" data out of the trampoline. It's also easier since
9093 we can read the PC without clobbering a register. */
9096 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9098 /* SPARC 64-bit trampoline:
9107 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9108 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9109 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9110 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9111 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9112 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9113 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9114 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9115 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9116 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9117 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9119 if (sparc_cpu != PROCESSOR_ULTRASPARC
9120 && sparc_cpu != PROCESSOR_ULTRASPARC3
9121 && sparc_cpu != PROCESSOR_NIAGARA
9122 && sparc_cpu != PROCESSOR_NIAGARA2
9123 && sparc_cpu != PROCESSOR_NIAGARA3
9124 && sparc_cpu != PROCESSOR_NIAGARA4)
9125 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9127 /* Call __enable_execute_stack after writing onto the stack to make sure
9128 the stack address is accessible. */
9129 #ifdef HAVE_ENABLE_EXECUTE_STACK
9130 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9131 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9135 /* Worker for TARGET_TRAMPOLINE_INIT. */
9138 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9140 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9141 cxt = force_reg (Pmode, cxt);
9143 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9145 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9148 /* Adjust the cost of a scheduling dependency. Return the new cost of
9149 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9152 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9154 enum attr_type insn_type;
9156 if (! recog_memoized (insn))
9159 insn_type = get_attr_type (insn);
9161 if (REG_NOTE_KIND (link) == 0)
9163 /* Data dependency; DEP_INSN writes a register that INSN reads some
9166 /* if a load, then the dependence must be on the memory address;
9167 add an extra "cycle". Note that the cost could be two cycles
9168 if the reg was written late in an instruction group; we ca not tell
9170 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9173 /* Get the delay only if the address of the store is the dependence. */
9174 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9176 rtx pat = PATTERN(insn);
9177 rtx dep_pat = PATTERN (dep_insn);
9179 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9180 return cost; /* This should not happen! */
9182 /* The dependency between the two instructions was on the data that
9183 is being stored. Assume that this implies that the address of the
9184 store is not dependent. */
9185 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9188 return cost + 3; /* An approximation. */
9191 /* A shift instruction cannot receive its data from an instruction
9192 in the same cycle; add a one cycle penalty. */
9193 if (insn_type == TYPE_SHIFT)
9194 return cost + 3; /* Split before cascade into shift. */
9198 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9199 INSN writes some cycles later. */
9201 /* These are only significant for the fpu unit; writing a fp reg before
9202 the fpu has finished with it stalls the processor. */
9204 /* Reusing an integer register causes no problems. */
9205 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9213 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9215 enum attr_type insn_type, dep_type;
9216 rtx pat = PATTERN(insn);
9217 rtx dep_pat = PATTERN (dep_insn);
9219 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9222 insn_type = get_attr_type (insn);
9223 dep_type = get_attr_type (dep_insn);
9225 switch (REG_NOTE_KIND (link))
9228 /* Data dependency; DEP_INSN writes a register that INSN reads some
9235 /* Get the delay iff the address of the store is the dependence. */
9236 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9239 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9246 /* If a load, then the dependence must be on the memory address. If
9247 the addresses aren't equal, then it might be a false dependency */
9248 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9250 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9251 || GET_CODE (SET_DEST (dep_pat)) != MEM
9252 || GET_CODE (SET_SRC (pat)) != MEM
9253 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9254 XEXP (SET_SRC (pat), 0)))
9262 /* Compare to branch latency is 0. There is no benefit from
9263 separating compare and branch. */
9264 if (dep_type == TYPE_COMPARE)
9266 /* Floating point compare to branch latency is less than
9267 compare to conditional move. */
9268 if (dep_type == TYPE_FPCMP)
9277 /* Anti-dependencies only penalize the fpu unit. */
9278 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9290 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
9294 case PROCESSOR_SUPERSPARC:
9295 cost = supersparc_adjust_cost (insn, link, dep, cost);
9297 case PROCESSOR_HYPERSPARC:
9298 case PROCESSOR_SPARCLITE86X:
9299 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9308 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9309 int sched_verbose ATTRIBUTE_UNUSED,
9310 int max_ready ATTRIBUTE_UNUSED)
9314 sparc_use_sched_lookahead (void)
9316 if (sparc_cpu == PROCESSOR_NIAGARA
9317 || sparc_cpu == PROCESSOR_NIAGARA2
9318 || sparc_cpu == PROCESSOR_NIAGARA3)
9320 if (sparc_cpu == PROCESSOR_NIAGARA4)
9322 if (sparc_cpu == PROCESSOR_ULTRASPARC
9323 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9325 if ((1 << sparc_cpu) &
9326 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9327 (1 << PROCESSOR_SPARCLITE86X)))
9333 sparc_issue_rate (void)
9337 case PROCESSOR_NIAGARA:
9338 case PROCESSOR_NIAGARA2:
9339 case PROCESSOR_NIAGARA3:
9342 case PROCESSOR_NIAGARA4:
9344 /* Assume V9 processors are capable of at least dual-issue. */
9346 case PROCESSOR_SUPERSPARC:
9348 case PROCESSOR_HYPERSPARC:
9349 case PROCESSOR_SPARCLITE86X:
9351 case PROCESSOR_ULTRASPARC:
9352 case PROCESSOR_ULTRASPARC3:
9358 set_extends (rtx insn)
9360 register rtx pat = PATTERN (insn);
9362 switch (GET_CODE (SET_SRC (pat)))
9364 /* Load and some shift instructions zero extend. */
9367 /* sethi clears the high bits */
9369 /* LO_SUM is used with sethi. sethi cleared the high
9370 bits and the values used with lo_sum are positive */
9372 /* Store flag stores 0 or 1 */
9382 rtx op0 = XEXP (SET_SRC (pat), 0);
9383 rtx op1 = XEXP (SET_SRC (pat), 1);
9384 if (GET_CODE (op1) == CONST_INT)
9385 return INTVAL (op1) >= 0;
9386 if (GET_CODE (op0) != REG)
9388 if (sparc_check_64 (op0, insn) == 1)
9390 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9395 rtx op0 = XEXP (SET_SRC (pat), 0);
9396 rtx op1 = XEXP (SET_SRC (pat), 1);
9397 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9399 if (GET_CODE (op1) == CONST_INT)
9400 return INTVAL (op1) >= 0;
9401 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9404 return GET_MODE (SET_SRC (pat)) == SImode;
9405 /* Positive integers leave the high bits zero. */
9407 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
9409 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
9412 return - (GET_MODE (SET_SRC (pat)) == SImode);
9414 return sparc_check_64 (SET_SRC (pat), insn);
9420 /* We _ought_ to have only one kind per function, but... */
9421 static GTY(()) rtx sparc_addr_diff_list;
9422 static GTY(()) rtx sparc_addr_list;
9425 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9427 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9429 sparc_addr_diff_list
9430 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9432 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9436 sparc_output_addr_vec (rtx vec)
9438 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9439 int idx, vlen = XVECLEN (body, 0);
9441 #ifdef ASM_OUTPUT_ADDR_VEC_START
9442 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9445 #ifdef ASM_OUTPUT_CASE_LABEL
9446 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9449 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9452 for (idx = 0; idx < vlen; idx++)
9454 ASM_OUTPUT_ADDR_VEC_ELT
9455 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9458 #ifdef ASM_OUTPUT_ADDR_VEC_END
9459 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9464 sparc_output_addr_diff_vec (rtx vec)
9466 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9467 rtx base = XEXP (XEXP (body, 0), 0);
9468 int idx, vlen = XVECLEN (body, 1);
9470 #ifdef ASM_OUTPUT_ADDR_VEC_START
9471 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9474 #ifdef ASM_OUTPUT_CASE_LABEL
9475 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9478 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9481 for (idx = 0; idx < vlen; idx++)
9483 ASM_OUTPUT_ADDR_DIFF_ELT
9486 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9487 CODE_LABEL_NUMBER (base));
9490 #ifdef ASM_OUTPUT_ADDR_VEC_END
9491 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9496 sparc_output_deferred_case_vectors (void)
9501 if (sparc_addr_list == NULL_RTX
9502 && sparc_addr_diff_list == NULL_RTX)
9505 /* Align to cache line in the function's code section. */
9506 switch_to_section (current_function_section ());
9508 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9510 ASM_OUTPUT_ALIGN (asm_out_file, align);
9512 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9513 sparc_output_addr_vec (XEXP (t, 0));
9514 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9515 sparc_output_addr_diff_vec (XEXP (t, 0));
9517 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9520 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9521 unknown. Return 1 if the high bits are zero, -1 if the register is
9524 sparc_check_64 (rtx x, rtx insn)
9526 /* If a register is set only once it is safe to ignore insns this
9527 code does not know how to handle. The loop will either recognize
9528 the single set and return the correct value or fail to recognize
9533 gcc_assert (GET_CODE (x) == REG);
9535 if (GET_MODE (x) == DImode)
9536 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9538 if (flag_expensive_optimizations
9539 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9545 insn = get_last_insn_anywhere ();
9550 while ((insn = PREV_INSN (insn)))
9552 switch (GET_CODE (insn))
9565 rtx pat = PATTERN (insn);
9566 if (GET_CODE (pat) != SET)
9568 if (rtx_equal_p (x, SET_DEST (pat)))
9569 return set_extends (insn);
9570 if (y && rtx_equal_p (y, SET_DEST (pat)))
9571 return set_extends (insn);
9572 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9580 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9581 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9584 output_v8plus_shift (rtx insn, rtx *operands, const char *opcode)
9586 static char asm_code[60];
9588 /* The scratch register is only required when the destination
9589 register is not a 64-bit global or out register. */
9590 if (which_alternative != 2)
9591 operands[3] = operands[0];
9593 /* We can only shift by constants <= 63. */
9594 if (GET_CODE (operands[2]) == CONST_INT)
9595 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9597 if (GET_CODE (operands[1]) == CONST_INT)
9599 output_asm_insn ("mov\t%1, %3", operands);
9603 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9604 if (sparc_check_64 (operands[1], insn) <= 0)
9605 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9606 output_asm_insn ("or\t%L1, %3, %3", operands);
9609 strcpy (asm_code, opcode);
9611 if (which_alternative != 2)
9612 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9615 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9618 /* Output rtl to increment the profiler label LABELNO
9619 for profiling a function entry. */
9622 sparc_profile_hook (int labelno)
9627 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9628 if (NO_PROFILE_COUNTERS)
9630 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9634 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9635 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9636 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9640 #ifdef TARGET_SOLARIS
9641 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9644 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9645 tree decl ATTRIBUTE_UNUSED)
9647 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9649 solaris_elf_asm_comdat_section (name, flags, decl);
9653 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9655 if (!(flags & SECTION_DEBUG))
9656 fputs (",#alloc", asm_out_file);
9657 if (flags & SECTION_WRITE)
9658 fputs (",#write", asm_out_file);
9659 if (flags & SECTION_TLS)
9660 fputs (",#tls", asm_out_file);
9661 if (flags & SECTION_CODE)
9662 fputs (",#execinstr", asm_out_file);
9664 /* Sun as only supports #nobits/#progbits since Solaris 10. */
9665 if (HAVE_AS_SPARC_NOBITS)
9667 if (flags & SECTION_BSS)
9668 fputs (",#nobits", asm_out_file);
9670 fputs (",#progbits", asm_out_file);
9673 fputc ('\n', asm_out_file);
9675 #endif /* TARGET_SOLARIS */
9677 /* We do not allow indirect calls to be optimized into sibling calls.
9679 We cannot use sibling calls when delayed branches are disabled
9680 because they will likely require the call delay slot to be filled.
9682 Also, on SPARC 32-bit we cannot emit a sibling call when the
9683 current function returns a structure. This is because the "unimp
9684 after call" convention would cause the callee to return to the
9685 wrong place. The generic code already disallows cases where the
9686 function being called returns a structure.
9688 It may seem strange how this last case could occur. Usually there
9689 is code after the call which jumps to epilogue code which dumps the
9690 return value into the struct return area. That ought to invalidate
9691 the sibling call right? Well, in the C++ case we can end up passing
9692 the pointer to the struct return area to a constructor (which returns
9693 void) and then nothing else happens. Such a sibling call would look
9694 valid without the added check here.
9696 VxWorks PIC PLT entries require the global pointer to be initialized
9697 on entry. We therefore can't emit sibling calls to them. */
9699 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9702 && flag_delayed_branch
9703 && (TARGET_ARCH64 || ! cfun->returns_struct)
9704 && !(TARGET_VXWORKS_RTP
9706 && !targetm.binds_local_p (decl)));
9709 /* libfunc renaming. */
9712 sparc_init_libfuncs (void)
9716 /* Use the subroutines that Sun's library provides for integer
9717 multiply and divide. The `*' prevents an underscore from
9718 being prepended by the compiler. .umul is a little faster
9720 set_optab_libfunc (smul_optab, SImode, "*.umul");
9721 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9722 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9723 set_optab_libfunc (smod_optab, SImode, "*.rem");
9724 set_optab_libfunc (umod_optab, SImode, "*.urem");
9726 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9727 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9728 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9729 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9730 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9731 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9733 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9734 is because with soft-float, the SFmode and DFmode sqrt
9735 instructions will be absent, and the compiler will notice and
9736 try to use the TFmode sqrt instruction for calls to the
9737 builtin function sqrt, but this fails. */
9739 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9741 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9742 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9743 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
9744 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
9745 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
9746 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
9748 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
9749 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
9750 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
9751 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
9753 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
9754 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
9755 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
9756 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
9758 if (DITF_CONVERSION_LIBFUNCS)
9760 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
9761 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
9762 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
9763 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
9766 if (SUN_CONVERSION_LIBFUNCS)
9768 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
9769 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
9770 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
9771 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
9776 /* In the SPARC 64bit ABI, SImode multiply and divide functions
9777 do not exist in the library. Make sure the compiler does not
9778 emit calls to them by accident. (It should always use the
9779 hardware instructions.) */
9780 set_optab_libfunc (smul_optab, SImode, 0);
9781 set_optab_libfunc (sdiv_optab, SImode, 0);
9782 set_optab_libfunc (udiv_optab, SImode, 0);
9783 set_optab_libfunc (smod_optab, SImode, 0);
9784 set_optab_libfunc (umod_optab, SImode, 0);
9786 if (SUN_INTEGER_MULTIPLY_64)
9788 set_optab_libfunc (smul_optab, DImode, "__mul64");
9789 set_optab_libfunc (sdiv_optab, DImode, "__div64");
9790 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
9791 set_optab_libfunc (smod_optab, DImode, "__rem64");
9792 set_optab_libfunc (umod_optab, DImode, "__urem64");
9795 if (SUN_CONVERSION_LIBFUNCS)
9797 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
9798 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
9799 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
9800 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
9805 static tree def_builtin(const char *name, int code, tree type)
9807 return add_builtin_function(name, type, code, BUILT_IN_MD, NULL,
9811 static tree def_builtin_const(const char *name, int code, tree type)
9813 tree t = def_builtin(name, code, type);
9816 TREE_READONLY (t) = 1;
9821 /* Implement the TARGET_INIT_BUILTINS target hook.
9822 Create builtin functions for special SPARC instructions. */
9825 sparc_init_builtins (void)
9828 sparc_vis_init_builtins ();
9831 /* Create builtin functions for VIS 1.0 instructions. */
9834 sparc_vis_init_builtins (void)
9836 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
9837 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
9838 tree v4hi = build_vector_type (intHI_type_node, 4);
9839 tree v2hi = build_vector_type (intHI_type_node, 2);
9840 tree v2si = build_vector_type (intSI_type_node, 2);
9841 tree v1si = build_vector_type (intSI_type_node, 1);
9843 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
9844 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
9845 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
9846 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
9847 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
9848 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
9849 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
9850 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
9851 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
9852 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
9853 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
9854 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
9855 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
9856 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
9857 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
9859 intDI_type_node, 0);
9860 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
9862 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
9864 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
9866 intDI_type_node, 0);
9867 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
9869 intSI_type_node, 0);
9870 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
9872 intSI_type_node, 0);
9873 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
9875 intDI_type_node, 0);
9876 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
9879 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
9882 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
9884 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
9886 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
9888 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
9890 tree void_ftype_di = build_function_type_list (void_type_node,
9891 intDI_type_node, 0);
9892 tree di_ftype_void = build_function_type_list (intDI_type_node,
9894 tree void_ftype_si = build_function_type_list (void_type_node,
9895 intSI_type_node, 0);
9896 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
9898 float_type_node, 0);
9899 tree df_ftype_df_df = build_function_type_list (double_type_node,
9901 double_type_node, 0);
9903 /* Packing and expanding vectors. */
9904 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
9906 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
9907 v8qi_ftype_v2si_v8qi);
9908 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
9910 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
9912 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
9913 v8qi_ftype_v4qi_v4qi);
9915 /* Multiplications. */
9916 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
9917 v4hi_ftype_v4qi_v4hi);
9918 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
9919 v4hi_ftype_v4qi_v2hi);
9920 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
9921 v4hi_ftype_v4qi_v2hi);
9922 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
9923 v4hi_ftype_v8qi_v4hi);
9924 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
9925 v4hi_ftype_v8qi_v4hi);
9926 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
9927 v2si_ftype_v4qi_v2hi);
9928 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
9929 v2si_ftype_v4qi_v2hi);
9931 /* Data aligning. */
9932 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
9933 v4hi_ftype_v4hi_v4hi);
9934 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
9935 v8qi_ftype_v8qi_v8qi);
9936 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
9937 v2si_ftype_v2si_v2si);
9938 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
9941 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
9943 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
9948 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
9950 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
9955 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
9957 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
9961 /* Pixel distance. */
9962 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
9963 di_ftype_v8qi_v8qi_di);
9965 /* Edge handling. */
9968 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
9970 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
9972 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
9974 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
9976 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
9978 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
9982 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
9984 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
9986 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
9988 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
9990 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
9992 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
9998 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10000 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10002 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10004 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10006 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10008 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10012 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10014 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10016 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10018 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10020 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10022 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10027 /* Pixel compare. */
10030 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10031 di_ftype_v4hi_v4hi);
10032 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10033 di_ftype_v2si_v2si);
10034 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10035 di_ftype_v4hi_v4hi);
10036 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10037 di_ftype_v2si_v2si);
10038 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10039 di_ftype_v4hi_v4hi);
10040 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10041 di_ftype_v2si_v2si);
10042 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10043 di_ftype_v4hi_v4hi);
10044 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10045 di_ftype_v2si_v2si);
10049 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10050 si_ftype_v4hi_v4hi);
10051 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10052 si_ftype_v2si_v2si);
10053 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10054 si_ftype_v4hi_v4hi);
10055 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10056 si_ftype_v2si_v2si);
10057 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10058 si_ftype_v4hi_v4hi);
10059 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10060 si_ftype_v2si_v2si);
10061 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10062 si_ftype_v4hi_v4hi);
10063 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10064 si_ftype_v2si_v2si);
10067 /* Addition and subtraction. */
10068 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10069 v4hi_ftype_v4hi_v4hi);
10070 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10071 v2hi_ftype_v2hi_v2hi);
10072 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10073 v2si_ftype_v2si_v2si);
10074 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10075 v1si_ftype_v1si_v1si);
10076 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10077 v4hi_ftype_v4hi_v4hi);
10078 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10079 v2hi_ftype_v2hi_v2hi);
10080 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10081 v2si_ftype_v2si_v2si);
10082 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10083 v1si_ftype_v1si_v1si);
10085 /* Three-dimensional array addressing. */
10088 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10090 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10092 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10097 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10099 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10101 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10107 /* Byte mask and shuffle */
10109 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10112 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10114 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10115 v4hi_ftype_v4hi_v4hi);
10116 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10117 v8qi_ftype_v8qi_v8qi);
10118 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10119 v2si_ftype_v2si_v2si);
10120 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10128 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10130 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10132 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10137 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10139 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10141 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10145 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10146 v4hi_ftype_v4hi_v4hi);
10148 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10149 v4hi_ftype_v4hi_v4hi);
10150 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10151 v4hi_ftype_v4hi_v4hi);
10152 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10153 v4hi_ftype_v4hi_v4hi);
10154 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10155 v4hi_ftype_v4hi_v4hi);
10156 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10157 v2si_ftype_v2si_v2si);
10158 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10159 v2si_ftype_v2si_v2si);
10160 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10161 v2si_ftype_v2si_v2si);
10162 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10163 v2si_ftype_v2si_v2si);
10166 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10167 di_ftype_v8qi_v8qi);
10169 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10170 si_ftype_v8qi_v8qi);
10172 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10173 v4hi_ftype_v4hi_v4hi);
10174 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10176 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10179 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10180 v4hi_ftype_v4hi_v4hi);
10181 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10182 v2hi_ftype_v2hi_v2hi);
10183 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10184 v4hi_ftype_v4hi_v4hi);
10185 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10186 v2hi_ftype_v2hi_v2hi);
10187 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10188 v2si_ftype_v2si_v2si);
10189 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10190 v1si_ftype_v1si_v1si);
10191 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10192 v2si_ftype_v2si_v2si);
10193 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10194 v1si_ftype_v1si_v1si);
10198 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10199 di_ftype_v8qi_v8qi);
10200 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10201 di_ftype_v8qi_v8qi);
10202 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10203 di_ftype_v8qi_v8qi);
10204 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10205 di_ftype_v8qi_v8qi);
10209 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10210 si_ftype_v8qi_v8qi);
10211 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10212 si_ftype_v8qi_v8qi);
10213 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10214 si_ftype_v8qi_v8qi);
10215 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10216 si_ftype_v8qi_v8qi);
10219 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10221 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10223 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10225 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10227 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10229 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10232 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10234 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10236 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10241 /* Handle TARGET_EXPAND_BUILTIN target hook.
10242 Expand builtin functions for sparc intrinsics. */
10245 sparc_expand_builtin (tree exp, rtx target,
10246 rtx subtarget ATTRIBUTE_UNUSED,
10247 enum machine_mode tmode ATTRIBUTE_UNUSED,
10248 int ignore ATTRIBUTE_UNUSED)
10251 call_expr_arg_iterator iter;
10252 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10253 unsigned int icode = DECL_FUNCTION_CODE (fndecl);
10258 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10262 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10264 || GET_MODE (target) != tmode
10265 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10266 op[0] = gen_reg_rtx (tmode);
10270 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10272 const struct insn_operand_data *insn_op;
10275 if (arg == error_mark_node)
10279 idx = arg_count - !nonvoid;
10280 insn_op = &insn_data[icode].operand[idx];
10281 op[arg_count] = expand_normal (arg);
10283 if (insn_op->mode == V1DImode
10284 && GET_MODE (op[arg_count]) == DImode)
10285 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10286 else if (insn_op->mode == V1SImode
10287 && GET_MODE (op[arg_count]) == SImode)
10288 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10290 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10292 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10298 pat = GEN_FCN (icode) (op[0]);
10302 pat = GEN_FCN (icode) (op[0], op[1]);
10304 pat = GEN_FCN (icode) (op[1]);
10307 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10310 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10313 gcc_unreachable ();
10328 sparc_vis_mul8x16 (int e8, int e16)
10330 return (e8 * e16 + 128) / 256;
10333 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10334 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10337 sparc_handle_vis_mul8x16 (tree *n_elts, int fncode, tree inner_type,
10338 tree cst0, tree cst1)
10340 unsigned i, num = VECTOR_CST_NELTS (cst0);
10345 case CODE_FOR_fmul8x16_vis:
10346 for (i = 0; i < num; ++i)
10349 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10350 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10351 n_elts[i] = build_int_cst (inner_type, val);
10355 case CODE_FOR_fmul8x16au_vis:
10356 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10358 for (i = 0; i < num; ++i)
10361 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10363 n_elts[i] = build_int_cst (inner_type, val);
10367 case CODE_FOR_fmul8x16al_vis:
10368 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10370 for (i = 0; i < num; ++i)
10373 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10375 n_elts[i] = build_int_cst (inner_type, val);
10380 gcc_unreachable ();
10384 /* Handle TARGET_FOLD_BUILTIN target hook.
10385 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10386 result of the function call is ignored. NULL_TREE is returned if the
10387 function could not be folded. */
10390 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10391 tree *args, bool ignore)
10393 tree arg0, arg1, arg2;
10394 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10395 enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
10399 /* Note that a switch statement instead of the sequence of tests would
10400 be incorrect as many of the CODE_FOR values could be CODE_FOR_nothing
10401 and that would yield multiple alternatives with identical values. */
10402 if (icode == CODE_FOR_alignaddrsi_vis
10403 || icode == CODE_FOR_alignaddrdi_vis
10404 || icode == CODE_FOR_wrgsr_vis
10405 || icode == CODE_FOR_bmasksi_vis
10406 || icode == CODE_FOR_bmaskdi_vis
10407 || icode == CODE_FOR_cmask8si_vis
10408 || icode == CODE_FOR_cmask8di_vis
10409 || icode == CODE_FOR_cmask16si_vis
10410 || icode == CODE_FOR_cmask16di_vis
10411 || icode == CODE_FOR_cmask32si_vis
10412 || icode == CODE_FOR_cmask32di_vis)
10415 return build_zero_cst (rtype);
10420 case CODE_FOR_fexpand_vis:
10424 if (TREE_CODE (arg0) == VECTOR_CST)
10426 tree inner_type = TREE_TYPE (rtype);
10430 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10431 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10432 n_elts[i] = build_int_cst (inner_type,
10434 (VECTOR_CST_ELT (arg0, i)) << 4);
10435 return build_vector (rtype, n_elts);
10439 case CODE_FOR_fmul8x16_vis:
10440 case CODE_FOR_fmul8x16au_vis:
10441 case CODE_FOR_fmul8x16al_vis:
10447 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10449 tree inner_type = TREE_TYPE (rtype);
10450 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10451 sparc_handle_vis_mul8x16 (n_elts, icode, inner_type, arg0, arg1);
10452 return build_vector (rtype, n_elts);
10456 case CODE_FOR_fpmerge_vis:
10462 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10464 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10466 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10468 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10469 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10472 return build_vector (rtype, n_elts);
10476 case CODE_FOR_pdist_vis:
10484 if (TREE_CODE (arg0) == VECTOR_CST
10485 && TREE_CODE (arg1) == VECTOR_CST
10486 && TREE_CODE (arg2) == INTEGER_CST)
10488 bool overflow = false;
10489 double_int result = TREE_INT_CST (arg2);
10493 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10495 double_int e0 = TREE_INT_CST (VECTOR_CST_ELT (arg0, i));
10496 double_int e1 = TREE_INT_CST (VECTOR_CST_ELT (arg1, i));
10498 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10500 tmp = e1.neg_with_overflow (&neg1_ovf);
10501 tmp = e0.add_with_sign (tmp, false, &add1_ovf);
10502 if (tmp.is_negative ())
10503 tmp = tmp.neg_with_overflow (&neg2_ovf);
10505 result = result.add_with_sign (tmp, false, &add2_ovf);
10506 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10509 gcc_assert (!overflow);
10511 return build_int_cst_wide (rtype, result.low, result.high);
10521 /* ??? This duplicates information provided to the compiler by the
10522 ??? scheduler description. Some day, teach genautomata to output
10523 ??? the latencies and then CSE will just use that. */
10526 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10527 int *total, bool speed ATTRIBUTE_UNUSED)
10529 enum machine_mode mode = GET_MODE (x);
10530 bool float_mode_p = FLOAT_MODE_P (mode);
10535 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
10553 if (GET_MODE (x) == VOIDmode
10554 && ((CONST_DOUBLE_HIGH (x) == 0
10555 && CONST_DOUBLE_LOW (x) < 0x1000)
10556 || (CONST_DOUBLE_HIGH (x) == -1
10557 && CONST_DOUBLE_LOW (x) < 0
10558 && CONST_DOUBLE_LOW (x) >= -0x1000)))
10565 /* If outer-code was a sign or zero extension, a cost
10566 of COSTS_N_INSNS (1) was already added in. This is
10567 why we are subtracting it back out. */
10568 if (outer_code == ZERO_EXTEND)
10570 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
10572 else if (outer_code == SIGN_EXTEND)
10574 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
10576 else if (float_mode_p)
10578 *total = sparc_costs->float_load;
10582 *total = sparc_costs->int_load;
10590 *total = sparc_costs->float_plusminus;
10592 *total = COSTS_N_INSNS (1);
10599 gcc_assert (float_mode_p);
10600 *total = sparc_costs->float_mul;
10603 if (GET_CODE (sub) == NEG)
10604 sub = XEXP (sub, 0);
10605 *total += rtx_cost (sub, FMA, 0, speed);
10608 if (GET_CODE (sub) == NEG)
10609 sub = XEXP (sub, 0);
10610 *total += rtx_cost (sub, FMA, 2, speed);
10616 *total = sparc_costs->float_mul;
10617 else if (! TARGET_HARD_MUL)
10618 *total = COSTS_N_INSNS (25);
10624 if (sparc_costs->int_mul_bit_factor)
10628 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
10630 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
10631 for (nbits = 0; value != 0; value &= value - 1)
10634 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
10635 && GET_MODE (XEXP (x, 1)) == VOIDmode)
10637 rtx x1 = XEXP (x, 1);
10638 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
10639 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
10641 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
10643 for (; value2 != 0; value2 &= value2 - 1)
10651 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
10652 bit_cost = COSTS_N_INSNS (bit_cost);
10655 if (mode == DImode)
10656 *total = sparc_costs->int_mulX + bit_cost;
10658 *total = sparc_costs->int_mul + bit_cost;
10665 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
10674 if (mode == DFmode)
10675 *total = sparc_costs->float_div_df;
10677 *total = sparc_costs->float_div_sf;
10681 if (mode == DImode)
10682 *total = sparc_costs->int_divX;
10684 *total = sparc_costs->int_div;
10689 if (! float_mode_p)
10691 *total = COSTS_N_INSNS (1);
10698 case UNSIGNED_FLOAT:
10702 case FLOAT_TRUNCATE:
10703 *total = sparc_costs->float_move;
10707 if (mode == DFmode)
10708 *total = sparc_costs->float_sqrt_df;
10710 *total = sparc_costs->float_sqrt_sf;
10715 *total = sparc_costs->float_cmp;
10717 *total = COSTS_N_INSNS (1);
10722 *total = sparc_costs->float_cmove;
10724 *total = sparc_costs->int_cmove;
10728 /* Handle the NAND vector patterns. */
10729 if (sparc_vector_mode_supported_p (GET_MODE (x))
10730 && GET_CODE (XEXP (x, 0)) == NOT
10731 && GET_CODE (XEXP (x, 1)) == NOT)
10733 *total = COSTS_N_INSNS (1);
10744 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
10747 general_or_i64_p (reg_class_t rclass)
10749 return (rclass == GENERAL_REGS || rclass == I64_REGS);
10752 /* Implement TARGET_REGISTER_MOVE_COST. */
10755 sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
10756 reg_class_t from, reg_class_t to)
10758 bool need_memory = false;
10760 if (from == FPCC_REGS || to == FPCC_REGS)
10761 need_memory = true;
10762 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
10763 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
10767 int size = GET_MODE_SIZE (mode);
10768 if (size == 8 || size == 4)
10770 if (! TARGET_ARCH32 || size == 4)
10776 need_memory = true;
10781 if (sparc_cpu == PROCESSOR_ULTRASPARC
10782 || sparc_cpu == PROCESSOR_ULTRASPARC3
10783 || sparc_cpu == PROCESSOR_NIAGARA
10784 || sparc_cpu == PROCESSOR_NIAGARA2
10785 || sparc_cpu == PROCESSOR_NIAGARA3
10786 || sparc_cpu == PROCESSOR_NIAGARA4)
10795 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
10796 This is achieved by means of a manual dynamic stack space allocation in
10797 the current frame. We make the assumption that SEQ doesn't contain any
10798 function calls, with the possible exception of calls to the GOT helper. */
10801 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
10803 /* We must preserve the lowest 16 words for the register save area. */
10804 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
10805 /* We really need only 2 words of fresh stack space. */
10806 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
10809 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
10810 SPARC_STACK_BIAS + offset));
10812 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
10813 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
10815 emit_insn (gen_rtx_SET (VOIDmode,
10816 adjust_address (slot, word_mode, UNITS_PER_WORD),
10820 emit_insn (gen_rtx_SET (VOIDmode,
10822 adjust_address (slot, word_mode, UNITS_PER_WORD)));
10823 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
10824 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
10827 /* Output the assembler code for a thunk function. THUNK_DECL is the
10828 declaration for the thunk function itself, FUNCTION is the decl for
10829 the target function. DELTA is an immediate constant offset to be
10830 added to THIS. If VCALL_OFFSET is nonzero, the word at address
10831 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
10834 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10835 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10838 rtx this_rtx, insn, funexp;
10839 unsigned int int_arg_first;
10841 reload_completed = 1;
10842 epilogue_completed = 1;
10844 emit_note (NOTE_INSN_PROLOGUE_END);
10848 sparc_leaf_function_p = 1;
10850 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
10852 else if (flag_delayed_branch)
10854 /* We will emit a regular sibcall below, so we need to instruct
10855 output_sibcall that we are in a leaf function. */
10856 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
10858 /* This will cause final.c to invoke leaf_renumber_regs so we
10859 must behave as if we were in a not-yet-leafified function. */
10860 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
10864 /* We will emit the sibcall manually below, so we will need to
10865 manually spill non-leaf registers. */
10866 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
10868 /* We really are in a leaf function. */
10869 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
10872 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
10873 returns a structure, the structure return pointer is there instead. */
10875 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10876 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
10878 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
10880 /* Add DELTA. When possible use a plain add, otherwise load it into
10881 a register first. */
10884 rtx delta_rtx = GEN_INT (delta);
10886 if (! SPARC_SIMM13_P (delta))
10888 rtx scratch = gen_rtx_REG (Pmode, 1);
10889 emit_move_insn (scratch, delta_rtx);
10890 delta_rtx = scratch;
10893 /* THIS_RTX += DELTA. */
10894 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
10897 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
10900 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10901 rtx scratch = gen_rtx_REG (Pmode, 1);
10903 gcc_assert (vcall_offset < 0);
10905 /* SCRATCH = *THIS_RTX. */
10906 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
10908 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
10909 may not have any available scratch register at this point. */
10910 if (SPARC_SIMM13_P (vcall_offset))
10912 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
10913 else if (! fixed_regs[5]
10914 /* The below sequence is made up of at least 2 insns,
10915 while the default method may need only one. */
10916 && vcall_offset < -8192)
10918 rtx scratch2 = gen_rtx_REG (Pmode, 5);
10919 emit_move_insn (scratch2, vcall_offset_rtx);
10920 vcall_offset_rtx = scratch2;
10924 rtx increment = GEN_INT (-4096);
10926 /* VCALL_OFFSET is a negative number whose typical range can be
10927 estimated as -32768..0 in 32-bit mode. In almost all cases
10928 it is therefore cheaper to emit multiple add insns than
10929 spilling and loading the constant into a register (at least
10931 while (! SPARC_SIMM13_P (vcall_offset))
10933 emit_insn (gen_add2_insn (scratch, increment));
10934 vcall_offset += 4096;
10936 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
10939 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
10940 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
10941 gen_rtx_PLUS (Pmode,
10943 vcall_offset_rtx)));
10945 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
10946 emit_insn (gen_add2_insn (this_rtx, scratch));
10949 /* Generate a tail call to the target function. */
10950 if (! TREE_USED (function))
10952 assemble_external (function);
10953 TREE_USED (function) = 1;
10955 funexp = XEXP (DECL_RTL (function), 0);
10957 if (flag_delayed_branch)
10959 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10960 insn = emit_call_insn (gen_sibcall (funexp));
10961 SIBLING_CALL_P (insn) = 1;
10965 /* The hoops we have to jump through in order to generate a sibcall
10966 without using delay slots... */
10967 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
10971 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
10973 load_got_register (); /* clobbers %o7 */
10974 scratch = sparc_legitimize_pic_address (funexp, scratch);
10975 seq = get_insns ();
10977 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
10979 else if (TARGET_ARCH32)
10981 emit_insn (gen_rtx_SET (VOIDmode,
10983 gen_rtx_HIGH (SImode, funexp)));
10984 emit_insn (gen_rtx_SET (VOIDmode,
10986 gen_rtx_LO_SUM (SImode, scratch, funexp)));
10988 else /* TARGET_ARCH64 */
10990 switch (sparc_cmodel)
10994 /* The destination can serve as a temporary. */
10995 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11000 /* The destination cannot serve as a temporary. */
11001 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11003 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11004 seq = get_insns ();
11006 emit_and_preserve (seq, spill_reg, 0);
11010 gcc_unreachable ();
11014 emit_jump_insn (gen_indirect_jump (scratch));
11019 /* Run just enough of rest_of_compilation to get the insns emitted.
11020 There's not really enough bulk here to make other passes such as
11021 instruction scheduling worth while. Note that use_thunk calls
11022 assemble_start_function and assemble_end_function. */
11023 insn = get_insns ();
11024 shorten_branches (insn);
11025 final_start_function (insn, file, 1);
11026 final (insn, file, 1);
11027 final_end_function ();
11029 reload_completed = 0;
11030 epilogue_completed = 0;
11033 /* Return true if sparc_output_mi_thunk would be able to output the
11034 assembler code for the thunk function specified by the arguments
11035 it is passed, and false otherwise. */
11037 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11038 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11039 HOST_WIDE_INT vcall_offset,
11040 const_tree function ATTRIBUTE_UNUSED)
11042 /* Bound the loop used in the default method above. */
11043 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11046 /* How to allocate a 'struct machine_function'. */
11048 static struct machine_function *
11049 sparc_init_machine_status (void)
11051 return ggc_alloc_cleared_machine_function ();
11054 /* Locate some local-dynamic symbol still in use by this function
11055 so that we can print its name in local-dynamic base patterns. */
11057 static const char *
11058 get_some_local_dynamic_name (void)
11062 if (cfun->machine->some_ld_name)
11063 return cfun->machine->some_ld_name;
11065 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11067 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11068 return cfun->machine->some_ld_name;
11070 gcc_unreachable ();
11074 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11079 && GET_CODE (x) == SYMBOL_REF
11080 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11082 cfun->machine->some_ld_name = XSTR (x, 0);
11089 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11090 We need to emit DTP-relative relocations. */
11093 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11098 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11101 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11104 gcc_unreachable ();
11106 output_addr_const (file, x);
11110 /* Do whatever processing is required at the end of a file. */
11113 sparc_file_end (void)
11115 /* If we need to emit the special GOT helper function, do so now. */
11116 if (got_helper_rtx)
11118 const char *name = XSTR (got_helper_rtx, 0);
11119 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11120 #ifdef DWARF2_UNWIND_INFO
11124 if (USE_HIDDEN_LINKONCE)
11126 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11127 get_identifier (name),
11128 build_function_type_list (void_type_node,
11130 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11131 NULL_TREE, void_type_node);
11132 TREE_PUBLIC (decl) = 1;
11133 TREE_STATIC (decl) = 1;
11134 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11135 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11136 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11137 resolve_unique_section (decl, 0, flag_function_sections);
11138 allocate_struct_function (decl, true);
11139 cfun->is_thunk = 1;
11140 current_function_decl = decl;
11141 init_varasm_status ();
11142 assemble_start_function (decl, name);
11146 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11147 switch_to_section (text_section);
11149 ASM_OUTPUT_ALIGN (asm_out_file, align);
11150 ASM_OUTPUT_LABEL (asm_out_file, name);
11153 #ifdef DWARF2_UNWIND_INFO
11154 do_cfi = dwarf2out_do_cfi_asm ();
11156 fprintf (asm_out_file, "\t.cfi_startproc\n");
11158 if (flag_delayed_branch)
11159 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11160 reg_name, reg_name);
11162 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11163 reg_name, reg_name);
11164 #ifdef DWARF2_UNWIND_INFO
11166 fprintf (asm_out_file, "\t.cfi_endproc\n");
11170 if (NEED_INDICATE_EXEC_STACK)
11171 file_end_indicate_exec_stack ();
11173 #ifdef TARGET_SOLARIS
11174 solaris_file_end ();
11178 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11179 /* Implement TARGET_MANGLE_TYPE. */
11181 static const char *
11182 sparc_mangle_type (const_tree type)
11185 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11186 && TARGET_LONG_DOUBLE_128)
11189 /* For all other types, use normal C++ mangling. */
11194 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11195 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11196 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11199 sparc_emit_membar_for_model (enum memmodel model,
11200 int load_store, int before_after)
11202 /* Bits for the MEMBAR mmask field. */
11203 const int LoadLoad = 1;
11204 const int StoreLoad = 2;
11205 const int LoadStore = 4;
11206 const int StoreStore = 8;
11208 int mm = 0, implied = 0;
11210 switch (sparc_memory_model)
11213 /* Sequential Consistency. All memory transactions are immediately
11214 visible in sequential execution order. No barriers needed. */
11215 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11219 /* Total Store Ordering: all memory transactions with store semantics
11220 are followed by an implied StoreStore. */
11221 implied |= StoreStore;
11225 /* Partial Store Ordering: all memory transactions with load semantics
11226 are followed by an implied LoadLoad | LoadStore. */
11227 implied |= LoadLoad | LoadStore;
11229 /* If we're not looking for a raw barrer (before+after), then atomic
11230 operations get the benefit of being both load and store. */
11231 if (load_store == 3 && before_after == 2)
11232 implied |= StoreLoad | StoreStore;
11236 /* Relaxed Memory Ordering: no implicit bits. */
11240 gcc_unreachable ();
11243 if (before_after & 1)
11245 if (model == MEMMODEL_RELEASE
11246 || model == MEMMODEL_ACQ_REL
11247 || model == MEMMODEL_SEQ_CST)
11249 if (load_store & 1)
11250 mm |= LoadLoad | StoreLoad;
11251 if (load_store & 2)
11252 mm |= LoadStore | StoreStore;
11255 if (before_after & 2)
11257 if (model == MEMMODEL_ACQUIRE
11258 || model == MEMMODEL_ACQ_REL
11259 || model == MEMMODEL_SEQ_CST)
11261 if (load_store & 1)
11262 mm |= LoadLoad | LoadStore;
11263 if (load_store & 2)
11264 mm |= StoreLoad | StoreStore;
11268 /* Remove the bits implied by the system memory model. */
11271 /* For raw barriers (before+after), always emit a barrier.
11272 This will become a compile-time barrier if needed. */
11273 if (mm || before_after == 3)
11274 emit_insn (gen_membar (GEN_INT (mm)));
11277 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11278 compare and swap on the word containing the byte or half-word. */
11281 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11282 rtx oldval, rtx newval)
11284 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11285 rtx addr = gen_reg_rtx (Pmode);
11286 rtx off = gen_reg_rtx (SImode);
11287 rtx oldv = gen_reg_rtx (SImode);
11288 rtx newv = gen_reg_rtx (SImode);
11289 rtx oldvalue = gen_reg_rtx (SImode);
11290 rtx newvalue = gen_reg_rtx (SImode);
11291 rtx res = gen_reg_rtx (SImode);
11292 rtx resv = gen_reg_rtx (SImode);
11293 rtx memsi, val, mask, end_label, loop_label, cc;
11295 emit_insn (gen_rtx_SET (VOIDmode, addr,
11296 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11298 if (Pmode != SImode)
11299 addr1 = gen_lowpart (SImode, addr1);
11300 emit_insn (gen_rtx_SET (VOIDmode, off,
11301 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11303 memsi = gen_rtx_MEM (SImode, addr);
11304 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11305 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11307 val = copy_to_reg (memsi);
11309 emit_insn (gen_rtx_SET (VOIDmode, off,
11310 gen_rtx_XOR (SImode, off,
11311 GEN_INT (GET_MODE (mem) == QImode
11314 emit_insn (gen_rtx_SET (VOIDmode, off,
11315 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11317 if (GET_MODE (mem) == QImode)
11318 mask = force_reg (SImode, GEN_INT (0xff));
11320 mask = force_reg (SImode, GEN_INT (0xffff));
11322 emit_insn (gen_rtx_SET (VOIDmode, mask,
11323 gen_rtx_ASHIFT (SImode, mask, off)));
11325 emit_insn (gen_rtx_SET (VOIDmode, val,
11326 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11329 oldval = gen_lowpart (SImode, oldval);
11330 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11331 gen_rtx_ASHIFT (SImode, oldval, off)));
11333 newval = gen_lowpart_common (SImode, newval);
11334 emit_insn (gen_rtx_SET (VOIDmode, newv,
11335 gen_rtx_ASHIFT (SImode, newval, off)));
11337 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11338 gen_rtx_AND (SImode, oldv, mask)));
11340 emit_insn (gen_rtx_SET (VOIDmode, newv,
11341 gen_rtx_AND (SImode, newv, mask)));
11343 end_label = gen_label_rtx ();
11344 loop_label = gen_label_rtx ();
11345 emit_label (loop_label);
11347 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
11348 gen_rtx_IOR (SImode, oldv, val)));
11350 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
11351 gen_rtx_IOR (SImode, newv, val)));
11353 emit_move_insn (bool_result, const1_rtx);
11355 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11357 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11359 emit_insn (gen_rtx_SET (VOIDmode, resv,
11360 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11363 emit_move_insn (bool_result, const0_rtx);
11365 cc = gen_compare_reg_1 (NE, resv, val);
11366 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
11368 /* Use cbranchcc4 to separate the compare and branch! */
11369 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11370 cc, const0_rtx, loop_label));
11372 emit_label (end_label);
11374 emit_insn (gen_rtx_SET (VOIDmode, res,
11375 gen_rtx_AND (SImode, res, mask)));
11377 emit_insn (gen_rtx_SET (VOIDmode, res,
11378 gen_rtx_LSHIFTRT (SImode, res, off)));
11380 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11383 /* Expand code to perform a compare-and-swap. */
11386 sparc_expand_compare_and_swap (rtx operands[])
11388 rtx bval, retval, mem, oldval, newval;
11389 enum machine_mode mode;
11390 enum memmodel model;
11392 bval = operands[0];
11393 retval = operands[1];
11395 oldval = operands[3];
11396 newval = operands[4];
11397 model = (enum memmodel) INTVAL (operands[6]);
11398 mode = GET_MODE (mem);
11400 sparc_emit_membar_for_model (model, 3, 1);
11402 if (reg_overlap_mentioned_p (retval, oldval))
11403 oldval = copy_to_reg (oldval);
11405 if (mode == QImode || mode == HImode)
11406 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11409 rtx (*gen) (rtx, rtx, rtx, rtx);
11412 if (mode == SImode)
11413 gen = gen_atomic_compare_and_swapsi_1;
11415 gen = gen_atomic_compare_and_swapdi_1;
11416 emit_insn (gen (retval, mem, oldval, newval));
11418 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11420 convert_move (bval, x, 1);
11423 sparc_emit_membar_for_model (model, 3, 2);
11427 sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
11431 sel = gen_lowpart (DImode, sel);
11435 /* inp = xxxxxxxAxxxxxxxB */
11436 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11437 NULL_RTX, 1, OPTAB_DIRECT);
11438 /* t_1 = ....xxxxxxxAxxx. */
11439 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11440 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11441 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11442 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11443 /* sel = .......B */
11444 /* t_1 = ...A.... */
11445 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11446 /* sel = ...A...B */
11447 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11448 /* sel = AAAABBBB * 4 */
11449 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11450 /* sel = { A*4, A*4+1, A*4+2, ... } */
11454 /* inp = xxxAxxxBxxxCxxxD */
11455 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11456 NULL_RTX, 1, OPTAB_DIRECT);
11457 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11458 NULL_RTX, 1, OPTAB_DIRECT);
11459 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11460 NULL_RTX, 1, OPTAB_DIRECT);
11461 /* t_1 = ..xxxAxxxBxxxCxx */
11462 /* t_2 = ....xxxAxxxBxxxC */
11463 /* t_3 = ......xxxAxxxBxx */
11464 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11466 NULL_RTX, 1, OPTAB_DIRECT);
11467 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11469 NULL_RTX, 1, OPTAB_DIRECT);
11470 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11471 GEN_INT (0x070000),
11472 NULL_RTX, 1, OPTAB_DIRECT);
11473 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11474 GEN_INT (0x07000000),
11475 NULL_RTX, 1, OPTAB_DIRECT);
11476 /* sel = .......D */
11477 /* t_1 = .....C.. */
11478 /* t_2 = ...B.... */
11479 /* t_3 = .A...... */
11480 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11481 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11482 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11483 /* sel = .A.B.C.D */
11484 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11485 /* sel = AABBCCDD * 2 */
11486 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11487 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11491 /* input = xAxBxCxDxExFxGxH */
11492 sel = expand_simple_binop (DImode, AND, sel,
11493 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11495 NULL_RTX, 1, OPTAB_DIRECT);
11496 /* sel = .A.B.C.D.E.F.G.H */
11497 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11498 NULL_RTX, 1, OPTAB_DIRECT);
11499 /* t_1 = ..A.B.C.D.E.F.G. */
11500 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11501 NULL_RTX, 1, OPTAB_DIRECT);
11502 /* sel = .AABBCCDDEEFFGGH */
11503 sel = expand_simple_binop (DImode, AND, sel,
11504 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11506 NULL_RTX, 1, OPTAB_DIRECT);
11507 /* sel = ..AB..CD..EF..GH */
11508 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11509 NULL_RTX, 1, OPTAB_DIRECT);
11510 /* t_1 = ....AB..CD..EF.. */
11511 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11512 NULL_RTX, 1, OPTAB_DIRECT);
11513 /* sel = ..ABABCDCDEFEFGH */
11514 sel = expand_simple_binop (DImode, AND, sel,
11515 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11516 NULL_RTX, 1, OPTAB_DIRECT);
11517 /* sel = ....ABCD....EFGH */
11518 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11519 NULL_RTX, 1, OPTAB_DIRECT);
11520 /* t_1 = ........ABCD.... */
11521 sel = gen_lowpart (SImode, sel);
11522 t_1 = gen_lowpart (SImode, t_1);
11526 gcc_unreachable ();
11529 /* Always perform the final addition/merge within the bmask insn. */
11530 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
11533 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11536 sparc_frame_pointer_required (void)
11538 /* If the stack pointer is dynamically modified in the function, it cannot
11539 serve as the frame pointer. */
11540 if (cfun->calls_alloca)
11543 /* If the function receives nonlocal gotos, it needs to save the frame
11544 pointer in the nonlocal_goto_save_area object. */
11545 if (cfun->has_nonlocal_label)
11548 /* In flat mode, that's it. */
11552 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11553 return !(crtl->is_leaf && only_leaf_regs_used ());
11556 /* The way this is structured, we can't eliminate SFP in favor of SP
11557 if the frame pointer is required: we want to use the SFP->HFP elimination
11558 in that case. But the test in update_eliminables doesn't know we are
11559 assuming below that we only do the former elimination. */
11562 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
11564 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
11567 /* Return the hard frame pointer directly to bypass the stack bias. */
11570 sparc_builtin_setjmp_frame_value (void)
11572 return hard_frame_pointer_rtx;
11575 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
11576 they won't be allocated. */
11579 sparc_conditional_register_usage (void)
11581 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
11583 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11584 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11586 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
11587 /* then honor it. */
11588 if (TARGET_ARCH32 && fixed_regs[5])
11590 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
11595 for (regno = SPARC_FIRST_V9_FP_REG;
11596 regno <= SPARC_LAST_V9_FP_REG;
11598 fixed_regs[regno] = 1;
11599 /* %fcc0 is used by v8 and v9. */
11600 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
11601 regno <= SPARC_LAST_V9_FCC_REG;
11603 fixed_regs[regno] = 1;
11608 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
11609 fixed_regs[regno] = 1;
11611 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
11612 /* then honor it. Likewise with g3 and g4. */
11613 if (fixed_regs[2] == 2)
11614 fixed_regs[2] = ! TARGET_APP_REGS;
11615 if (fixed_regs[3] == 2)
11616 fixed_regs[3] = ! TARGET_APP_REGS;
11617 if (TARGET_ARCH32 && fixed_regs[4] == 2)
11618 fixed_regs[4] = ! TARGET_APP_REGS;
11619 else if (TARGET_CM_EMBMEDANY)
11621 else if (fixed_regs[4] == 2)
11626 /* Disable leaf functions. */
11627 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
11628 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11629 leaf_reg_remap [regno] = regno;
11632 global_regs[SPARC_GSR_REG] = 1;
11635 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
11637 - We can't load constants into FP registers.
11638 - We can't load FP constants into integer registers when soft-float,
11639 because there is no soft-float pattern with a r/F constraint.
11640 - We can't load FP constants into integer registers for TFmode unless
11641 it is 0.0L, because there is no movtf pattern with a r/F constraint.
11642 - Try and reload integer constants (symbolic or otherwise) back into
11643 registers directly, rather than having them dumped to memory. */
11646 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
11648 enum machine_mode mode = GET_MODE (x);
11649 if (CONSTANT_P (x))
11651 if (FP_REG_CLASS_P (rclass)
11652 || rclass == GENERAL_OR_FP_REGS
11653 || rclass == GENERAL_OR_EXTRA_FP_REGS
11654 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
11655 || (mode == TFmode && ! const_zero_operand (x, mode)))
11658 if (GET_MODE_CLASS (mode) == MODE_INT)
11659 return GENERAL_REGS;
11661 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11663 if (! FP_REG_CLASS_P (rclass)
11664 || !(const_zero_operand (x, mode)
11665 || const_all_ones_operand (x, mode)))
11672 && (rclass == EXTRA_FP_REGS
11673 || rclass == GENERAL_OR_EXTRA_FP_REGS))
11675 int regno = true_regnum (x);
11677 if (SPARC_INT_REG_P (regno))
11678 return (rclass == EXTRA_FP_REGS
11679 ? FP_REGS : GENERAL_OR_FP_REGS);
11685 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
11686 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
11689 output_v8plus_mult (rtx insn, rtx *operands, const char *opcode)
11693 gcc_assert (! TARGET_ARCH64);
11695 if (sparc_check_64 (operands[1], insn) <= 0)
11696 output_asm_insn ("srl\t%L1, 0, %L1", operands);
11697 if (which_alternative == 1)
11698 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
11699 if (GET_CODE (operands[2]) == CONST_INT)
11701 if (which_alternative == 1)
11703 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11704 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
11705 output_asm_insn (mulstr, operands);
11706 return "srlx\t%L0, 32, %H0";
11710 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11711 output_asm_insn ("or\t%L1, %3, %3", operands);
11712 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
11713 output_asm_insn (mulstr, operands);
11714 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11715 return "mov\t%3, %L0";
11718 else if (rtx_equal_p (operands[1], operands[2]))
11720 if (which_alternative == 1)
11722 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11723 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
11724 output_asm_insn (mulstr, operands);
11725 return "srlx\t%L0, 32, %H0";
11729 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11730 output_asm_insn ("or\t%L1, %3, %3", operands);
11731 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
11732 output_asm_insn (mulstr, operands);
11733 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11734 return "mov\t%3, %L0";
11737 if (sparc_check_64 (operands[2], insn) <= 0)
11738 output_asm_insn ("srl\t%L2, 0, %L2", operands);
11739 if (which_alternative == 1)
11741 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11742 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
11743 output_asm_insn ("or\t%L2, %L1, %L1", operands);
11744 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
11745 output_asm_insn (mulstr, operands);
11746 return "srlx\t%L0, 32, %H0";
11750 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11751 output_asm_insn ("sllx\t%H2, 32, %4", operands);
11752 output_asm_insn ("or\t%L1, %3, %3", operands);
11753 output_asm_insn ("or\t%L2, %4, %4", operands);
11754 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
11755 output_asm_insn (mulstr, operands);
11756 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11757 return "mov\t%3, %L0";
11761 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11762 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
11763 and INNER_MODE are the modes describing TARGET. */
11766 vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
11767 enum machine_mode inner_mode)
11769 rtx t1, final_insn;
11772 t1 = gen_reg_rtx (mode);
11774 elt = convert_modes (SImode, inner_mode, elt, true);
11775 emit_move_insn (gen_lowpart(SImode, t1), elt);
11780 final_insn = gen_bshufflev2si_vis (target, t1, t1);
11781 bmask = 0x45674567;
11784 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
11785 bmask = 0x67676767;
11788 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
11789 bmask = 0x77777777;
11792 gcc_unreachable ();
11795 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode),
11796 force_reg (SImode, GEN_INT (bmask))));
11797 emit_insn (final_insn);
11800 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11801 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
11804 vector_init_fpmerge (rtx target, rtx elt)
11806 rtx t1, t2, t2_low, t3, t3_low;
11808 t1 = gen_reg_rtx (V4QImode);
11809 elt = convert_modes (SImode, QImode, elt, true);
11810 emit_move_insn (gen_lowpart (SImode, t1), elt);
11812 t2 = gen_reg_rtx (V8QImode);
11813 t2_low = gen_lowpart (V4QImode, t2);
11814 emit_insn (gen_fpmerge_vis (t2, t1, t1));
11816 t3 = gen_reg_rtx (V8QImode);
11817 t3_low = gen_lowpart (V4QImode, t3);
11818 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
11820 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
11823 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11824 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
11827 vector_init_faligndata (rtx target, rtx elt)
11829 rtx t1 = gen_reg_rtx (V4HImode);
11832 elt = convert_modes (SImode, HImode, elt, true);
11833 emit_move_insn (gen_lowpart (SImode, t1), elt);
11835 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
11836 force_reg (SImode, GEN_INT (6)),
11839 for (i = 0; i < 4; i++)
11840 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
11843 /* Emit code to initialize TARGET to values for individual fields VALS. */
11846 sparc_expand_vector_init (rtx target, rtx vals)
11848 const enum machine_mode mode = GET_MODE (target);
11849 const enum machine_mode inner_mode = GET_MODE_INNER (mode);
11850 const int n_elts = GET_MODE_NUNITS (mode);
11856 for (i = 0; i < n_elts; i++)
11858 rtx x = XVECEXP (vals, 0, i);
11859 if (!CONSTANT_P (x))
11862 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
11868 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
11872 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
11874 if (GET_MODE_SIZE (inner_mode) == 4)
11876 emit_move_insn (gen_lowpart (SImode, target),
11877 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
11880 else if (GET_MODE_SIZE (inner_mode) == 8)
11882 emit_move_insn (gen_lowpart (DImode, target),
11883 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
11887 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
11888 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
11890 emit_move_insn (gen_highpart (word_mode, target),
11891 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
11892 emit_move_insn (gen_lowpart (word_mode, target),
11893 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
11897 if (all_same && GET_MODE_SIZE (mode) == 8)
11901 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
11904 if (mode == V8QImode)
11906 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
11909 if (mode == V4HImode)
11911 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
11916 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
11917 for (i = 0; i < n_elts; i++)
11918 emit_move_insn (adjust_address_nv (mem, inner_mode,
11919 i * GET_MODE_SIZE (inner_mode)),
11920 XVECEXP (vals, 0, i));
11921 emit_move_insn (target, mem);
11924 /* Implement TARGET_SECONDARY_RELOAD. */
11927 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
11928 enum machine_mode mode, secondary_reload_info *sri)
11930 enum reg_class rclass = (enum reg_class) rclass_i;
11932 sri->icode = CODE_FOR_nothing;
11933 sri->extra_cost = 0;
11935 /* We need a temporary when loading/storing a HImode/QImode value
11936 between memory and the FPU registers. This can happen when combine puts
11937 a paradoxical subreg in a float/fix conversion insn. */
11938 if (FP_REG_CLASS_P (rclass)
11939 && (mode == HImode || mode == QImode)
11940 && (GET_CODE (x) == MEM
11941 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
11942 && true_regnum (x) == -1)))
11943 return GENERAL_REGS;
11945 /* On 32-bit we need a temporary when loading/storing a DFmode value
11946 between unaligned memory and the upper FPU registers. */
11948 && rclass == EXTRA_FP_REGS
11950 && GET_CODE (x) == MEM
11951 && ! mem_min_alignment (x, 8))
11954 if (((TARGET_CM_MEDANY
11955 && symbolic_operand (x, mode))
11956 || (TARGET_CM_EMBMEDANY
11957 && text_segment_operand (x, mode)))
11961 sri->icode = direct_optab_handler (reload_in_optab, mode);
11963 sri->icode = direct_optab_handler (reload_out_optab, mode);
11967 if (TARGET_VIS3 && TARGET_ARCH32)
11969 int regno = true_regnum (x);
11971 /* When using VIS3 fp<-->int register moves, on 32-bit we have
11972 to move 8-byte values in 4-byte pieces. This only works via
11973 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
11974 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
11975 an FP_REGS intermediate move. */
11976 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
11977 || ((general_or_i64_p (rclass)
11978 || rclass == GENERAL_OR_FP_REGS)
11979 && SPARC_FP_REG_P (regno)))
11981 sri->extra_cost = 2;
11989 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
11990 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
11993 sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
11995 enum rtx_code rc = GET_CODE (operands[1]);
11996 enum machine_mode cmp_mode;
11997 rtx cc_reg, dst, cmp;
12000 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12003 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12004 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12006 cmp_mode = GET_MODE (XEXP (cmp, 0));
12007 rc = GET_CODE (cmp);
12010 if (! rtx_equal_p (operands[2], dst)
12011 && ! rtx_equal_p (operands[3], dst))
12013 if (reg_overlap_mentioned_p (dst, cmp))
12014 dst = gen_reg_rtx (mode);
12016 emit_move_insn (dst, operands[3]);
12018 else if (operands[2] == dst)
12020 operands[2] = operands[3];
12022 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12023 rc = reverse_condition_maybe_unordered (rc);
12025 rc = reverse_condition (rc);
12028 if (XEXP (cmp, 1) == const0_rtx
12029 && GET_CODE (XEXP (cmp, 0)) == REG
12030 && cmp_mode == DImode
12031 && v9_regcmp_p (rc))
12032 cc_reg = XEXP (cmp, 0);
12034 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12036 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12038 emit_insn (gen_rtx_SET (VOIDmode, dst,
12039 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12041 if (dst != operands[0])
12042 emit_move_insn (operands[0], dst);
12047 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12048 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12049 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12050 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12051 code to be used for the condition mask. */
12054 sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
12056 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12057 enum rtx_code code = GET_CODE (operands[3]);
12059 mask = gen_reg_rtx (Pmode);
12060 cop0 = operands[4];
12061 cop1 = operands[5];
12062 if (code == LT || code == GE)
12066 code = swap_condition (code);
12067 t = cop0; cop0 = cop1; cop1 = t;
12070 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12072 fcmp = gen_rtx_UNSPEC (Pmode,
12073 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12076 cmask = gen_rtx_UNSPEC (DImode,
12077 gen_rtvec (2, mask, gsr),
12080 bshuf = gen_rtx_UNSPEC (mode,
12081 gen_rtvec (3, operands[1], operands[2], gsr),
12084 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
12085 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
12087 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
12090 /* On sparc, any mode which naturally allocates into the float
12091 registers should return 4 here. */
12094 sparc_regmode_natural_size (enum machine_mode mode)
12096 int size = UNITS_PER_WORD;
12100 enum mode_class mclass = GET_MODE_CLASS (mode);
12102 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12109 /* Return TRUE if it is a good idea to tie two pseudo registers
12110 when one has mode MODE1 and one has mode MODE2.
12111 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12112 for any hard reg, then this must be FALSE for correct output.
12114 For V9 we have to deal with the fact that only the lower 32 floating
12115 point registers are 32-bit addressable. */
12118 sparc_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
12120 enum mode_class mclass1, mclass2;
12121 unsigned short size1, size2;
12123 if (mode1 == mode2)
12126 mclass1 = GET_MODE_CLASS (mode1);
12127 mclass2 = GET_MODE_CLASS (mode2);
12128 if (mclass1 != mclass2)
12134 /* Classes are the same and we are V9 so we have to deal with upper
12135 vs. lower floating point registers. If one of the modes is a
12136 4-byte mode, and the other is not, we have to mark them as not
12137 tieable because only the lower 32 floating point register are
12138 addressable 32-bits at a time.
12140 We can't just test explicitly for SFmode, otherwise we won't
12141 cover the vector mode cases properly. */
12143 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12146 size1 = GET_MODE_SIZE (mode1);
12147 size2 = GET_MODE_SIZE (mode2);
12148 if ((size1 > 4 && size2 == 4)
12149 || (size2 > 4 && size1 == 4))
12155 static enum machine_mode sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12157 return (TARGET_ARCH64 ? DImode : SImode);
12160 #include "gt-sparc.h"