1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2013 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "insn-codes.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
50 #include "langhooks.h"
55 #include "tree-pass.h"
60 struct processor_costs {
64 /* Integer signed load */
67 /* Integer zeroed load */
73 /* fmov, fneg, fabs */
77 const int float_plusminus;
83 const int float_cmove;
89 const int float_div_sf;
92 const int float_div_df;
95 const int float_sqrt_sf;
98 const int float_sqrt_df;
106 /* integer multiply cost for each bit set past the most
107 significant 3, so the formula for multiply cost becomes:
110 highest_bit = highest_clear_bit(rs1);
112 highest_bit = highest_set_bit(rs1);
115 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
117 A value of zero indicates that the multiply costs is fixed,
119 const int int_mul_bit_factor;
130 /* penalty for shifts, due to scheduling rules etc. */
131 const int shift_penalty;
135 struct processor_costs cypress_costs = {
136 COSTS_N_INSNS (2), /* int load */
137 COSTS_N_INSNS (2), /* int signed load */
138 COSTS_N_INSNS (2), /* int zeroed load */
139 COSTS_N_INSNS (2), /* float load */
140 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
141 COSTS_N_INSNS (5), /* fadd, fsub */
142 COSTS_N_INSNS (1), /* fcmp */
143 COSTS_N_INSNS (1), /* fmov, fmovr */
144 COSTS_N_INSNS (7), /* fmul */
145 COSTS_N_INSNS (37), /* fdivs */
146 COSTS_N_INSNS (37), /* fdivd */
147 COSTS_N_INSNS (63), /* fsqrts */
148 COSTS_N_INSNS (63), /* fsqrtd */
149 COSTS_N_INSNS (1), /* imul */
150 COSTS_N_INSNS (1), /* imulX */
151 0, /* imul bit factor */
152 COSTS_N_INSNS (1), /* idiv */
153 COSTS_N_INSNS (1), /* idivX */
154 COSTS_N_INSNS (1), /* movcc/movr */
155 0, /* shift penalty */
159 struct processor_costs supersparc_costs = {
160 COSTS_N_INSNS (1), /* int load */
161 COSTS_N_INSNS (1), /* int signed load */
162 COSTS_N_INSNS (1), /* int zeroed load */
163 COSTS_N_INSNS (0), /* float load */
164 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
165 COSTS_N_INSNS (3), /* fadd, fsub */
166 COSTS_N_INSNS (3), /* fcmp */
167 COSTS_N_INSNS (1), /* fmov, fmovr */
168 COSTS_N_INSNS (3), /* fmul */
169 COSTS_N_INSNS (6), /* fdivs */
170 COSTS_N_INSNS (9), /* fdivd */
171 COSTS_N_INSNS (12), /* fsqrts */
172 COSTS_N_INSNS (12), /* fsqrtd */
173 COSTS_N_INSNS (4), /* imul */
174 COSTS_N_INSNS (4), /* imulX */
175 0, /* imul bit factor */
176 COSTS_N_INSNS (4), /* idiv */
177 COSTS_N_INSNS (4), /* idivX */
178 COSTS_N_INSNS (1), /* movcc/movr */
179 1, /* shift penalty */
183 struct processor_costs hypersparc_costs = {
184 COSTS_N_INSNS (1), /* int load */
185 COSTS_N_INSNS (1), /* int signed load */
186 COSTS_N_INSNS (1), /* int zeroed load */
187 COSTS_N_INSNS (1), /* float load */
188 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
189 COSTS_N_INSNS (1), /* fadd, fsub */
190 COSTS_N_INSNS (1), /* fcmp */
191 COSTS_N_INSNS (1), /* fmov, fmovr */
192 COSTS_N_INSNS (1), /* fmul */
193 COSTS_N_INSNS (8), /* fdivs */
194 COSTS_N_INSNS (12), /* fdivd */
195 COSTS_N_INSNS (17), /* fsqrts */
196 COSTS_N_INSNS (17), /* fsqrtd */
197 COSTS_N_INSNS (17), /* imul */
198 COSTS_N_INSNS (17), /* imulX */
199 0, /* imul bit factor */
200 COSTS_N_INSNS (17), /* idiv */
201 COSTS_N_INSNS (17), /* idivX */
202 COSTS_N_INSNS (1), /* movcc/movr */
203 0, /* shift penalty */
207 struct processor_costs leon_costs = {
208 COSTS_N_INSNS (1), /* int load */
209 COSTS_N_INSNS (1), /* int signed load */
210 COSTS_N_INSNS (1), /* int zeroed load */
211 COSTS_N_INSNS (1), /* float load */
212 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
213 COSTS_N_INSNS (1), /* fadd, fsub */
214 COSTS_N_INSNS (1), /* fcmp */
215 COSTS_N_INSNS (1), /* fmov, fmovr */
216 COSTS_N_INSNS (1), /* fmul */
217 COSTS_N_INSNS (15), /* fdivs */
218 COSTS_N_INSNS (15), /* fdivd */
219 COSTS_N_INSNS (23), /* fsqrts */
220 COSTS_N_INSNS (23), /* fsqrtd */
221 COSTS_N_INSNS (5), /* imul */
222 COSTS_N_INSNS (5), /* imulX */
223 0, /* imul bit factor */
224 COSTS_N_INSNS (5), /* idiv */
225 COSTS_N_INSNS (5), /* idivX */
226 COSTS_N_INSNS (1), /* movcc/movr */
227 0, /* shift penalty */
231 struct processor_costs leon3_costs = {
232 COSTS_N_INSNS (1), /* int load */
233 COSTS_N_INSNS (1), /* int signed load */
234 COSTS_N_INSNS (1), /* int zeroed load */
235 COSTS_N_INSNS (1), /* float load */
236 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
237 COSTS_N_INSNS (1), /* fadd, fsub */
238 COSTS_N_INSNS (1), /* fcmp */
239 COSTS_N_INSNS (1), /* fmov, fmovr */
240 COSTS_N_INSNS (1), /* fmul */
241 COSTS_N_INSNS (14), /* fdivs */
242 COSTS_N_INSNS (15), /* fdivd */
243 COSTS_N_INSNS (22), /* fsqrts */
244 COSTS_N_INSNS (23), /* fsqrtd */
245 COSTS_N_INSNS (5), /* imul */
246 COSTS_N_INSNS (5), /* imulX */
247 0, /* imul bit factor */
248 COSTS_N_INSNS (35), /* idiv */
249 COSTS_N_INSNS (35), /* idivX */
250 COSTS_N_INSNS (1), /* movcc/movr */
251 0, /* shift penalty */
255 struct processor_costs sparclet_costs = {
256 COSTS_N_INSNS (3), /* int load */
257 COSTS_N_INSNS (3), /* int signed load */
258 COSTS_N_INSNS (1), /* int zeroed load */
259 COSTS_N_INSNS (1), /* float load */
260 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
261 COSTS_N_INSNS (1), /* fadd, fsub */
262 COSTS_N_INSNS (1), /* fcmp */
263 COSTS_N_INSNS (1), /* fmov, fmovr */
264 COSTS_N_INSNS (1), /* fmul */
265 COSTS_N_INSNS (1), /* fdivs */
266 COSTS_N_INSNS (1), /* fdivd */
267 COSTS_N_INSNS (1), /* fsqrts */
268 COSTS_N_INSNS (1), /* fsqrtd */
269 COSTS_N_INSNS (5), /* imul */
270 COSTS_N_INSNS (5), /* imulX */
271 0, /* imul bit factor */
272 COSTS_N_INSNS (5), /* idiv */
273 COSTS_N_INSNS (5), /* idivX */
274 COSTS_N_INSNS (1), /* movcc/movr */
275 0, /* shift penalty */
279 struct processor_costs ultrasparc_costs = {
280 COSTS_N_INSNS (2), /* int load */
281 COSTS_N_INSNS (3), /* int signed load */
282 COSTS_N_INSNS (2), /* int zeroed load */
283 COSTS_N_INSNS (2), /* float load */
284 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
285 COSTS_N_INSNS (4), /* fadd, fsub */
286 COSTS_N_INSNS (1), /* fcmp */
287 COSTS_N_INSNS (2), /* fmov, fmovr */
288 COSTS_N_INSNS (4), /* fmul */
289 COSTS_N_INSNS (13), /* fdivs */
290 COSTS_N_INSNS (23), /* fdivd */
291 COSTS_N_INSNS (13), /* fsqrts */
292 COSTS_N_INSNS (23), /* fsqrtd */
293 COSTS_N_INSNS (4), /* imul */
294 COSTS_N_INSNS (4), /* imulX */
295 2, /* imul bit factor */
296 COSTS_N_INSNS (37), /* idiv */
297 COSTS_N_INSNS (68), /* idivX */
298 COSTS_N_INSNS (2), /* movcc/movr */
299 2, /* shift penalty */
303 struct processor_costs ultrasparc3_costs = {
304 COSTS_N_INSNS (2), /* int load */
305 COSTS_N_INSNS (3), /* int signed load */
306 COSTS_N_INSNS (3), /* int zeroed load */
307 COSTS_N_INSNS (2), /* float load */
308 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
309 COSTS_N_INSNS (4), /* fadd, fsub */
310 COSTS_N_INSNS (5), /* fcmp */
311 COSTS_N_INSNS (3), /* fmov, fmovr */
312 COSTS_N_INSNS (4), /* fmul */
313 COSTS_N_INSNS (17), /* fdivs */
314 COSTS_N_INSNS (20), /* fdivd */
315 COSTS_N_INSNS (20), /* fsqrts */
316 COSTS_N_INSNS (29), /* fsqrtd */
317 COSTS_N_INSNS (6), /* imul */
318 COSTS_N_INSNS (6), /* imulX */
319 0, /* imul bit factor */
320 COSTS_N_INSNS (40), /* idiv */
321 COSTS_N_INSNS (71), /* idivX */
322 COSTS_N_INSNS (2), /* movcc/movr */
323 0, /* shift penalty */
327 struct processor_costs niagara_costs = {
328 COSTS_N_INSNS (3), /* int load */
329 COSTS_N_INSNS (3), /* int signed load */
330 COSTS_N_INSNS (3), /* int zeroed load */
331 COSTS_N_INSNS (9), /* float load */
332 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
333 COSTS_N_INSNS (8), /* fadd, fsub */
334 COSTS_N_INSNS (26), /* fcmp */
335 COSTS_N_INSNS (8), /* fmov, fmovr */
336 COSTS_N_INSNS (29), /* fmul */
337 COSTS_N_INSNS (54), /* fdivs */
338 COSTS_N_INSNS (83), /* fdivd */
339 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
340 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
341 COSTS_N_INSNS (11), /* imul */
342 COSTS_N_INSNS (11), /* imulX */
343 0, /* imul bit factor */
344 COSTS_N_INSNS (72), /* idiv */
345 COSTS_N_INSNS (72), /* idivX */
346 COSTS_N_INSNS (1), /* movcc/movr */
347 0, /* shift penalty */
351 struct processor_costs niagara2_costs = {
352 COSTS_N_INSNS (3), /* int load */
353 COSTS_N_INSNS (3), /* int signed load */
354 COSTS_N_INSNS (3), /* int zeroed load */
355 COSTS_N_INSNS (3), /* float load */
356 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
357 COSTS_N_INSNS (6), /* fadd, fsub */
358 COSTS_N_INSNS (6), /* fcmp */
359 COSTS_N_INSNS (6), /* fmov, fmovr */
360 COSTS_N_INSNS (6), /* fmul */
361 COSTS_N_INSNS (19), /* fdivs */
362 COSTS_N_INSNS (33), /* fdivd */
363 COSTS_N_INSNS (19), /* fsqrts */
364 COSTS_N_INSNS (33), /* fsqrtd */
365 COSTS_N_INSNS (5), /* imul */
366 COSTS_N_INSNS (5), /* imulX */
367 0, /* imul bit factor */
368 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
369 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
370 COSTS_N_INSNS (1), /* movcc/movr */
371 0, /* shift penalty */
375 struct processor_costs niagara3_costs = {
376 COSTS_N_INSNS (3), /* int load */
377 COSTS_N_INSNS (3), /* int signed load */
378 COSTS_N_INSNS (3), /* int zeroed load */
379 COSTS_N_INSNS (3), /* float load */
380 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
381 COSTS_N_INSNS (9), /* fadd, fsub */
382 COSTS_N_INSNS (9), /* fcmp */
383 COSTS_N_INSNS (9), /* fmov, fmovr */
384 COSTS_N_INSNS (9), /* fmul */
385 COSTS_N_INSNS (23), /* fdivs */
386 COSTS_N_INSNS (37), /* fdivd */
387 COSTS_N_INSNS (23), /* fsqrts */
388 COSTS_N_INSNS (37), /* fsqrtd */
389 COSTS_N_INSNS (9), /* imul */
390 COSTS_N_INSNS (9), /* imulX */
391 0, /* imul bit factor */
392 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
393 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
394 COSTS_N_INSNS (1), /* movcc/movr */
395 0, /* shift penalty */
399 struct processor_costs niagara4_costs = {
400 COSTS_N_INSNS (5), /* int load */
401 COSTS_N_INSNS (5), /* int signed load */
402 COSTS_N_INSNS (5), /* int zeroed load */
403 COSTS_N_INSNS (5), /* float load */
404 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
405 COSTS_N_INSNS (11), /* fadd, fsub */
406 COSTS_N_INSNS (11), /* fcmp */
407 COSTS_N_INSNS (11), /* fmov, fmovr */
408 COSTS_N_INSNS (11), /* fmul */
409 COSTS_N_INSNS (24), /* fdivs */
410 COSTS_N_INSNS (37), /* fdivd */
411 COSTS_N_INSNS (24), /* fsqrts */
412 COSTS_N_INSNS (37), /* fsqrtd */
413 COSTS_N_INSNS (12), /* imul */
414 COSTS_N_INSNS (12), /* imulX */
415 0, /* imul bit factor */
416 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
417 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
418 COSTS_N_INSNS (1), /* movcc/movr */
419 0, /* shift penalty */
422 static const struct processor_costs *sparc_costs = &cypress_costs;
424 #ifdef HAVE_AS_RELAX_OPTION
425 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
426 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
427 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
428 somebody does not branch between the sethi and jmp. */
429 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
431 #define LEAF_SIBCALL_SLOT_RESERVED_P \
432 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
435 /* Vector to say how input registers are mapped to output registers.
436 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
437 eliminate it. You must use -fomit-frame-pointer to get that. */
438 char leaf_reg_remap[] =
439 { 0, 1, 2, 3, 4, 5, 6, 7,
440 -1, -1, -1, -1, -1, -1, 14, -1,
441 -1, -1, -1, -1, -1, -1, -1, -1,
442 8, 9, 10, 11, 12, 13, -1, 15,
444 32, 33, 34, 35, 36, 37, 38, 39,
445 40, 41, 42, 43, 44, 45, 46, 47,
446 48, 49, 50, 51, 52, 53, 54, 55,
447 56, 57, 58, 59, 60, 61, 62, 63,
448 64, 65, 66, 67, 68, 69, 70, 71,
449 72, 73, 74, 75, 76, 77, 78, 79,
450 80, 81, 82, 83, 84, 85, 86, 87,
451 88, 89, 90, 91, 92, 93, 94, 95,
452 96, 97, 98, 99, 100, 101, 102};
454 /* Vector, indexed by hard register number, which contains 1
455 for a register that is allowable in a candidate for leaf
456 function treatment. */
457 char sparc_leaf_regs[] =
458 { 1, 1, 1, 1, 1, 1, 1, 1,
459 0, 0, 0, 0, 0, 0, 1, 0,
460 0, 0, 0, 0, 0, 0, 0, 0,
461 1, 1, 1, 1, 1, 1, 0, 1,
462 1, 1, 1, 1, 1, 1, 1, 1,
463 1, 1, 1, 1, 1, 1, 1, 1,
464 1, 1, 1, 1, 1, 1, 1, 1,
465 1, 1, 1, 1, 1, 1, 1, 1,
466 1, 1, 1, 1, 1, 1, 1, 1,
467 1, 1, 1, 1, 1, 1, 1, 1,
468 1, 1, 1, 1, 1, 1, 1, 1,
469 1, 1, 1, 1, 1, 1, 1, 1,
470 1, 1, 1, 1, 1, 1, 1};
472 struct GTY(()) machine_function
474 /* Size of the frame of the function. */
475 HOST_WIDE_INT frame_size;
477 /* Size of the frame of the function minus the register window save area
478 and the outgoing argument area. */
479 HOST_WIDE_INT apparent_frame_size;
481 /* Register we pretend the frame pointer is allocated to. Normally, this
482 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
483 record "offset" separately as it may be too big for (reg + disp). */
485 HOST_WIDE_INT frame_base_offset;
487 /* Some local-dynamic TLS symbol name. */
488 const char *some_ld_name;
490 /* Number of global or FP registers to be saved (as 4-byte quantities). */
491 int n_global_fp_regs;
493 /* True if the current function is leaf and uses only leaf regs,
494 so that the SPARC leaf function optimization can be applied.
495 Private version of crtl->uses_only_leaf_regs, see
496 sparc_expand_prologue for the rationale. */
499 /* True if the prologue saves local or in registers. */
500 bool save_local_in_regs_p;
502 /* True if the data calculated by sparc_expand_prologue are valid. */
503 bool prologue_data_valid_p;
506 #define sparc_frame_size cfun->machine->frame_size
507 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
508 #define sparc_frame_base_reg cfun->machine->frame_base_reg
509 #define sparc_frame_base_offset cfun->machine->frame_base_offset
510 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
511 #define sparc_leaf_function_p cfun->machine->leaf_function_p
512 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
513 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
515 /* 1 if the next opcode is to be specially indented. */
516 int sparc_indent_opcode = 0;
518 static void sparc_option_override (void);
519 static void sparc_init_modes (void);
520 static void scan_record_type (const_tree, int *, int *, int *);
521 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
522 const_tree, bool, bool, int *, int *);
524 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
525 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
527 static void sparc_emit_set_const32 (rtx, rtx);
528 static void sparc_emit_set_const64 (rtx, rtx);
529 static void sparc_output_addr_vec (rtx);
530 static void sparc_output_addr_diff_vec (rtx);
531 static void sparc_output_deferred_case_vectors (void);
532 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
533 static bool sparc_legitimate_constant_p (enum machine_mode, rtx);
534 static rtx sparc_builtin_saveregs (void);
535 static int epilogue_renumber (rtx *, int);
536 static bool sparc_assemble_integer (rtx, unsigned int, int);
537 static int set_extends (rtx);
538 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
539 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
540 #ifdef TARGET_SOLARIS
541 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
542 tree) ATTRIBUTE_UNUSED;
544 static int sparc_adjust_cost (rtx, rtx, rtx, int);
545 static int sparc_issue_rate (void);
546 static void sparc_sched_init (FILE *, int, int);
547 static int sparc_use_sched_lookahead (void);
549 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
550 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
551 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
552 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
553 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
555 static bool sparc_function_ok_for_sibcall (tree, tree);
556 static void sparc_init_libfuncs (void);
557 static void sparc_init_builtins (void);
558 static void sparc_vis_init_builtins (void);
559 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
560 static tree sparc_fold_builtin (tree, int, tree *, bool);
561 static int sparc_vis_mul8x16 (int, int);
562 static void sparc_handle_vis_mul8x16 (tree *, int, tree, tree, tree);
563 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
564 HOST_WIDE_INT, tree);
565 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
566 HOST_WIDE_INT, const_tree);
567 static struct machine_function * sparc_init_machine_status (void);
568 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
569 static rtx sparc_tls_get_addr (void);
570 static rtx sparc_tls_got (void);
571 static const char *get_some_local_dynamic_name (void);
572 static int get_some_local_dynamic_name_1 (rtx *, void *);
573 static int sparc_register_move_cost (enum machine_mode,
574 reg_class_t, reg_class_t);
575 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
576 static rtx sparc_function_value (const_tree, const_tree, bool);
577 static rtx sparc_libcall_value (enum machine_mode, const_rtx);
578 static bool sparc_function_value_regno_p (const unsigned int);
579 static rtx sparc_struct_value_rtx (tree, int);
580 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
581 int *, const_tree, int);
582 static bool sparc_return_in_memory (const_tree, const_tree);
583 static bool sparc_strict_argument_naming (cumulative_args_t);
584 static void sparc_va_start (tree, rtx);
585 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
586 static bool sparc_vector_mode_supported_p (enum machine_mode);
587 static bool sparc_tls_referenced_p (rtx);
588 static rtx sparc_legitimize_tls_address (rtx);
589 static rtx sparc_legitimize_pic_address (rtx, rtx);
590 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
591 static rtx sparc_delegitimize_address (rtx);
592 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
593 static bool sparc_pass_by_reference (cumulative_args_t,
594 enum machine_mode, const_tree, bool);
595 static void sparc_function_arg_advance (cumulative_args_t,
596 enum machine_mode, const_tree, bool);
597 static rtx sparc_function_arg_1 (cumulative_args_t,
598 enum machine_mode, const_tree, bool, bool);
599 static rtx sparc_function_arg (cumulative_args_t,
600 enum machine_mode, const_tree, bool);
601 static rtx sparc_function_incoming_arg (cumulative_args_t,
602 enum machine_mode, const_tree, bool);
603 static unsigned int sparc_function_arg_boundary (enum machine_mode,
605 static int sparc_arg_partial_bytes (cumulative_args_t,
606 enum machine_mode, tree, bool);
607 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
608 static void sparc_file_end (void);
609 static bool sparc_frame_pointer_required (void);
610 static bool sparc_can_eliminate (const int, const int);
611 static rtx sparc_builtin_setjmp_frame_value (void);
612 static void sparc_conditional_register_usage (void);
613 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
614 static const char *sparc_mangle_type (const_tree);
616 static void sparc_trampoline_init (rtx, tree, rtx);
617 static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
618 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
619 static bool sparc_print_operand_punct_valid_p (unsigned char);
620 static void sparc_print_operand (FILE *, rtx, int);
621 static void sparc_print_operand_address (FILE *, rtx);
622 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
624 secondary_reload_info *);
625 static enum machine_mode sparc_cstore_mode (enum insn_code icode);
627 #ifdef SUBTARGET_ATTRIBUTE_TABLE
628 /* Table of valid machine attributes. */
629 static const struct attribute_spec sparc_attribute_table[] =
631 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
633 SUBTARGET_ATTRIBUTE_TABLE,
634 { NULL, 0, 0, false, false, false, NULL, false }
638 /* Option handling. */
641 enum cmodel sparc_cmodel;
643 char sparc_hard_reg_printed[8];
645 /* Initialize the GCC target structure. */
647 /* The default is to use .half rather than .short for aligned HI objects. */
648 #undef TARGET_ASM_ALIGNED_HI_OP
649 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
651 #undef TARGET_ASM_UNALIGNED_HI_OP
652 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
653 #undef TARGET_ASM_UNALIGNED_SI_OP
654 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
655 #undef TARGET_ASM_UNALIGNED_DI_OP
656 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
658 /* The target hook has to handle DI-mode values. */
659 #undef TARGET_ASM_INTEGER
660 #define TARGET_ASM_INTEGER sparc_assemble_integer
662 #undef TARGET_ASM_FUNCTION_PROLOGUE
663 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
664 #undef TARGET_ASM_FUNCTION_EPILOGUE
665 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
667 #undef TARGET_SCHED_ADJUST_COST
668 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
669 #undef TARGET_SCHED_ISSUE_RATE
670 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
671 #undef TARGET_SCHED_INIT
672 #define TARGET_SCHED_INIT sparc_sched_init
673 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
674 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
676 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
677 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
679 #undef TARGET_INIT_LIBFUNCS
680 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
681 #undef TARGET_INIT_BUILTINS
682 #define TARGET_INIT_BUILTINS sparc_init_builtins
684 #undef TARGET_LEGITIMIZE_ADDRESS
685 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
686 #undef TARGET_DELEGITIMIZE_ADDRESS
687 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
688 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
689 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
691 #undef TARGET_EXPAND_BUILTIN
692 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
693 #undef TARGET_FOLD_BUILTIN
694 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
697 #undef TARGET_HAVE_TLS
698 #define TARGET_HAVE_TLS true
701 #undef TARGET_CANNOT_FORCE_CONST_MEM
702 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
704 #undef TARGET_ASM_OUTPUT_MI_THUNK
705 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
706 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
707 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
709 #undef TARGET_RTX_COSTS
710 #define TARGET_RTX_COSTS sparc_rtx_costs
711 #undef TARGET_ADDRESS_COST
712 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
713 #undef TARGET_REGISTER_MOVE_COST
714 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
716 #undef TARGET_PROMOTE_FUNCTION_MODE
717 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
719 #undef TARGET_FUNCTION_VALUE
720 #define TARGET_FUNCTION_VALUE sparc_function_value
721 #undef TARGET_LIBCALL_VALUE
722 #define TARGET_LIBCALL_VALUE sparc_libcall_value
723 #undef TARGET_FUNCTION_VALUE_REGNO_P
724 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
726 #undef TARGET_STRUCT_VALUE_RTX
727 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
728 #undef TARGET_RETURN_IN_MEMORY
729 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
730 #undef TARGET_MUST_PASS_IN_STACK
731 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
732 #undef TARGET_PASS_BY_REFERENCE
733 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
734 #undef TARGET_ARG_PARTIAL_BYTES
735 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
736 #undef TARGET_FUNCTION_ARG_ADVANCE
737 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
738 #undef TARGET_FUNCTION_ARG
739 #define TARGET_FUNCTION_ARG sparc_function_arg
740 #undef TARGET_FUNCTION_INCOMING_ARG
741 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
742 #undef TARGET_FUNCTION_ARG_BOUNDARY
743 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
745 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
746 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
747 #undef TARGET_STRICT_ARGUMENT_NAMING
748 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
750 #undef TARGET_EXPAND_BUILTIN_VA_START
751 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
752 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
753 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
755 #undef TARGET_VECTOR_MODE_SUPPORTED_P
756 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
758 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
759 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
761 #ifdef SUBTARGET_INSERT_ATTRIBUTES
762 #undef TARGET_INSERT_ATTRIBUTES
763 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
766 #ifdef SUBTARGET_ATTRIBUTE_TABLE
767 #undef TARGET_ATTRIBUTE_TABLE
768 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
771 #undef TARGET_RELAXED_ORDERING
772 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
774 #undef TARGET_OPTION_OVERRIDE
775 #define TARGET_OPTION_OVERRIDE sparc_option_override
777 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
778 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
779 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
782 #undef TARGET_ASM_FILE_END
783 #define TARGET_ASM_FILE_END sparc_file_end
785 #undef TARGET_FRAME_POINTER_REQUIRED
786 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
788 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
789 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
791 #undef TARGET_CAN_ELIMINATE
792 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
794 #undef TARGET_PREFERRED_RELOAD_CLASS
795 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
797 #undef TARGET_SECONDARY_RELOAD
798 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
800 #undef TARGET_CONDITIONAL_REGISTER_USAGE
801 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
803 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
804 #undef TARGET_MANGLE_TYPE
805 #define TARGET_MANGLE_TYPE sparc_mangle_type
808 #undef TARGET_LEGITIMATE_ADDRESS_P
809 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
811 #undef TARGET_LEGITIMATE_CONSTANT_P
812 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
814 #undef TARGET_TRAMPOLINE_INIT
815 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
817 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
818 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
819 #undef TARGET_PRINT_OPERAND
820 #define TARGET_PRINT_OPERAND sparc_print_operand
821 #undef TARGET_PRINT_OPERAND_ADDRESS
822 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
824 /* The value stored by LDSTUB. */
825 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
826 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
828 #undef TARGET_CSTORE_MODE
829 #define TARGET_CSTORE_MODE sparc_cstore_mode
831 struct gcc_target targetm = TARGET_INITIALIZER;
833 /* Return the memory reference contained in X if any, zero otherwise. */
838 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
847 /* We use a machine specific pass to enable workarounds for errata.
848 We need to have the (essentially) final form of the insn stream in order
849 to properly detect the various hazards. Therefore, this machine specific
850 pass runs as late as possible. The pass is inserted in the pass pipeline
851 at the end of sparc_option_override. */
854 sparc_gate_work_around_errata (void)
856 /* The only errata we handle are those of the AT697F and UT699. */
857 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
861 sparc_do_work_around_errata (void)
865 /* Force all instructions to be split into their final form. */
866 split_all_insns_noflow ();
868 /* Now look for specific patterns in the insn stream. */
869 for (insn = get_insns (); insn; insn = next)
871 bool insert_nop = false;
874 /* Look into the instruction in a delay slot. */
875 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
876 insn = XVECEXP (PATTERN (insn), 0, 1);
878 /* Look for a single-word load into an odd-numbered FP register. */
880 && NONJUMP_INSN_P (insn)
881 && (set = single_set (insn)) != NULL_RTX
882 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
883 && MEM_P (SET_SRC (set))
884 && REG_P (SET_DEST (set))
885 && REGNO (SET_DEST (set)) > 31
886 && REGNO (SET_DEST (set)) % 2 != 0)
888 /* The wrong dependency is on the enclosing double register. */
889 unsigned int x = REGNO (SET_DEST (set)) - 1;
890 unsigned int src1, src2, dest;
893 /* If the insn has a delay slot, then it cannot be problematic. */
894 next = next_active_insn (insn);
897 if (NONJUMP_INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE)
901 code = INSN_CODE (next);
905 case CODE_FOR_adddf3:
906 case CODE_FOR_subdf3:
907 case CODE_FOR_muldf3:
908 case CODE_FOR_divdf3:
909 dest = REGNO (recog_data.operand[0]);
910 src1 = REGNO (recog_data.operand[1]);
911 src2 = REGNO (recog_data.operand[2]);
916 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
917 if ((src1 == x || src2 == x)
918 && (dest == src1 || dest == src2))
925 FPOPd %fx, %fx, %fx */
928 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
933 case CODE_FOR_sqrtdf2:
934 dest = REGNO (recog_data.operand[0]);
935 src1 = REGNO (recog_data.operand[1]);
939 if (src1 == x && dest == src1)
948 /* Look for a single-word load into an integer register. */
949 else if (sparc_fix_ut699
950 && NONJUMP_INSN_P (insn)
951 && (set = single_set (insn)) != NULL_RTX
952 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
953 && mem_ref (SET_SRC (set)) != NULL_RTX
954 && REG_P (SET_DEST (set))
955 && REGNO (SET_DEST (set)) < 32)
957 /* There is no problem if the second memory access has a data
958 dependency on the first single-cycle load. */
959 rtx x = SET_DEST (set);
961 /* If the insn has a delay slot, then it cannot be problematic. */
962 next = next_active_insn (insn);
965 if (NONJUMP_INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE)
968 /* Look for a second memory access to/from an integer register. */
969 if ((set = single_set (next)) != NULL_RTX)
971 rtx src = SET_SRC (set);
972 rtx dest = SET_DEST (set);
975 /* LDD is affected. */
976 if ((mem = mem_ref (src)) != NULL_RTX
979 && !reg_mentioned_p (x, XEXP (mem, 0)))
982 /* STD is *not* affected. */
983 else if ((mem = mem_ref (dest)) != NULL_RTX
984 && GET_MODE_SIZE (GET_MODE (mem)) <= 4
985 && (src == const0_rtx
988 && REGNO (src) != REGNO (x)))
989 && !reg_mentioned_p (x, XEXP (mem, 0)))
995 next = NEXT_INSN (insn);
998 emit_insn_before (gen_nop (), next);
1006 const pass_data pass_data_work_around_errata =
1008 RTL_PASS, /* type */
1009 "errata", /* name */
1010 OPTGROUP_NONE, /* optinfo_flags */
1011 true, /* has_gate */
1012 true, /* has_execute */
1013 TV_MACH_DEP, /* tv_id */
1014 0, /* properties_required */
1015 0, /* properties_provided */
1016 0, /* properties_destroyed */
1017 0, /* todo_flags_start */
1018 TODO_verify_rtl_sharing, /* todo_flags_finish */
1021 class pass_work_around_errata : public rtl_opt_pass
1024 pass_work_around_errata(gcc::context *ctxt)
1025 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1028 /* opt_pass methods: */
1029 bool gate () { return sparc_gate_work_around_errata (); }
1030 unsigned int execute () { return sparc_do_work_around_errata (); }
1032 }; // class pass_work_around_errata
1037 make_pass_work_around_errata (gcc::context *ctxt)
1039 return new pass_work_around_errata (ctxt);
1042 /* Helpers for TARGET_DEBUG_OPTIONS. */
1044 dump_target_flag_bits (const int flags)
1046 if (flags & MASK_64BIT)
1047 fprintf (stderr, "64BIT ");
1048 if (flags & MASK_APP_REGS)
1049 fprintf (stderr, "APP_REGS ");
1050 if (flags & MASK_FASTER_STRUCTS)
1051 fprintf (stderr, "FASTER_STRUCTS ");
1052 if (flags & MASK_FLAT)
1053 fprintf (stderr, "FLAT ");
1054 if (flags & MASK_FMAF)
1055 fprintf (stderr, "FMAF ");
1056 if (flags & MASK_FPU)
1057 fprintf (stderr, "FPU ");
1058 if (flags & MASK_HARD_QUAD)
1059 fprintf (stderr, "HARD_QUAD ");
1060 if (flags & MASK_POPC)
1061 fprintf (stderr, "POPC ");
1062 if (flags & MASK_PTR64)
1063 fprintf (stderr, "PTR64 ");
1064 if (flags & MASK_STACK_BIAS)
1065 fprintf (stderr, "STACK_BIAS ");
1066 if (flags & MASK_UNALIGNED_DOUBLES)
1067 fprintf (stderr, "UNALIGNED_DOUBLES ");
1068 if (flags & MASK_V8PLUS)
1069 fprintf (stderr, "V8PLUS ");
1070 if (flags & MASK_VIS)
1071 fprintf (stderr, "VIS ");
1072 if (flags & MASK_VIS2)
1073 fprintf (stderr, "VIS2 ");
1074 if (flags & MASK_VIS3)
1075 fprintf (stderr, "VIS3 ");
1076 if (flags & MASK_CBCOND)
1077 fprintf (stderr, "CBCOND ");
1078 if (flags & MASK_DEPRECATED_V8_INSNS)
1079 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1080 if (flags & MASK_SPARCLET)
1081 fprintf (stderr, "SPARCLET ");
1082 if (flags & MASK_SPARCLITE)
1083 fprintf (stderr, "SPARCLITE ");
1084 if (flags & MASK_V8)
1085 fprintf (stderr, "V8 ");
1086 if (flags & MASK_V9)
1087 fprintf (stderr, "V9 ");
1091 dump_target_flags (const char *prefix, const int flags)
1093 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1094 dump_target_flag_bits (flags);
1095 fprintf(stderr, "]\n");
1098 /* Validate and override various options, and do some machine dependent
1102 sparc_option_override (void)
1104 static struct code_model {
1105 const char *const name;
1106 const enum cmodel value;
1107 } const cmodels[] = {
1109 { "medlow", CM_MEDLOW },
1110 { "medmid", CM_MEDMID },
1111 { "medany", CM_MEDANY },
1112 { "embmedany", CM_EMBMEDANY },
1113 { NULL, (enum cmodel) 0 }
1115 const struct code_model *cmodel;
1116 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1117 static struct cpu_default {
1119 const enum processor_type processor;
1120 } const cpu_default[] = {
1121 /* There must be one entry here for each TARGET_CPU value. */
1122 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1123 { TARGET_CPU_v8, PROCESSOR_V8 },
1124 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1125 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1126 { TARGET_CPU_leon, PROCESSOR_LEON },
1127 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1128 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1129 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1130 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1131 { TARGET_CPU_v9, PROCESSOR_V9 },
1132 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1133 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1134 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1135 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1136 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1137 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1138 { -1, PROCESSOR_V7 }
1140 const struct cpu_default *def;
1141 /* Table of values for -m{cpu,tune}=. This must match the order of
1142 the enum processor_type in sparc-opts.h. */
1143 static struct cpu_table {
1144 const char *const name;
1147 } const cpu_table[] = {
1148 { "v7", MASK_ISA, 0 },
1149 { "cypress", MASK_ISA, 0 },
1150 { "v8", MASK_ISA, MASK_V8 },
1151 /* TI TMS390Z55 supersparc */
1152 { "supersparc", MASK_ISA, MASK_V8 },
1153 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1155 { "leon", MASK_ISA, MASK_V8|MASK_FPU },
1156 { "leon3", MASK_ISA, MASK_V8|MASK_FPU },
1157 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1158 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1159 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1160 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1161 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1162 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1163 { "sparclet", MASK_ISA, MASK_SPARCLET },
1164 /* TEMIC sparclet */
1165 { "tsc701", MASK_ISA, MASK_SPARCLET },
1166 { "v9", MASK_ISA, MASK_V9 },
1167 /* UltraSPARC I, II, IIi */
1168 { "ultrasparc", MASK_ISA,
1169 /* Although insns using %y are deprecated, it is a clear win. */
1170 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1171 /* UltraSPARC III */
1172 /* ??? Check if %y issue still holds true. */
1173 { "ultrasparc3", MASK_ISA,
1174 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1176 { "niagara", MASK_ISA,
1177 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1179 { "niagara2", MASK_ISA,
1180 MASK_V9|MASK_POPC|MASK_VIS2 },
1182 { "niagara3", MASK_ISA,
1183 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1185 { "niagara4", MASK_ISA,
1186 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1188 const struct cpu_table *cpu;
1192 if (sparc_debug_string != NULL)
1197 p = ASTRDUP (sparc_debug_string);
1198 while ((q = strtok (p, ",")) != NULL)
1212 if (! strcmp (q, "all"))
1213 mask = MASK_DEBUG_ALL;
1214 else if (! strcmp (q, "options"))
1215 mask = MASK_DEBUG_OPTIONS;
1217 error ("unknown -mdebug-%s switch", q);
1220 sparc_debug &= ~mask;
1222 sparc_debug |= mask;
1226 if (TARGET_DEBUG_OPTIONS)
1228 dump_target_flags("Initial target_flags", target_flags);
1229 dump_target_flags("target_flags_explicit", target_flags_explicit);
1232 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1233 SUBTARGET_OVERRIDE_OPTIONS;
1236 #ifndef SPARC_BI_ARCH
1237 /* Check for unsupported architecture size. */
1238 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1239 error ("%s is not supported by this configuration",
1240 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1243 /* We force all 64bit archs to use 128 bit long double */
1244 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1246 error ("-mlong-double-64 not allowed with -m64");
1247 target_flags |= MASK_LONG_DOUBLE_128;
1250 /* Code model selection. */
1251 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1253 #ifdef SPARC_BI_ARCH
1255 sparc_cmodel = CM_32;
1258 if (sparc_cmodel_string != NULL)
1262 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1263 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1265 if (cmodel->name == NULL)
1266 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1268 sparc_cmodel = cmodel->value;
1271 error ("-mcmodel= is not supported on 32 bit systems");
1274 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1275 for (i = 8; i < 16; i++)
1276 if (!call_used_regs [i])
1278 error ("-fcall-saved-REG is not supported for out registers");
1279 call_used_regs [i] = 1;
1282 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1284 /* Set the default CPU. */
1285 if (!global_options_set.x_sparc_cpu_and_features)
1287 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1288 if (def->cpu == TARGET_CPU_DEFAULT)
1290 gcc_assert (def->cpu != -1);
1291 sparc_cpu_and_features = def->processor;
1294 if (!global_options_set.x_sparc_cpu)
1295 sparc_cpu = sparc_cpu_and_features;
1297 cpu = &cpu_table[(int) sparc_cpu_and_features];
1299 if (TARGET_DEBUG_OPTIONS)
1301 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1302 fprintf (stderr, "sparc_cpu: %s\n",
1303 cpu_table[(int) sparc_cpu].name);
1304 dump_target_flags ("cpu->disable", cpu->disable);
1305 dump_target_flags ("cpu->enable", cpu->enable);
1308 target_flags &= ~cpu->disable;
1309 target_flags |= (cpu->enable
1310 #ifndef HAVE_AS_FMAF_HPC_VIS3
1311 & ~(MASK_FMAF | MASK_VIS3)
1313 #ifndef HAVE_AS_SPARC4
1318 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1319 the processor default. */
1320 if (target_flags_explicit & MASK_FPU)
1321 target_flags = (target_flags & ~MASK_FPU) | fpu;
1323 /* -mvis2 implies -mvis */
1325 target_flags |= MASK_VIS;
1327 /* -mvis3 implies -mvis2 and -mvis */
1329 target_flags |= MASK_VIS2 | MASK_VIS;
1331 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1334 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1336 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1338 -m64 also implies v9. */
1339 if (TARGET_VIS || TARGET_ARCH64)
1341 target_flags |= MASK_V9;
1342 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1345 /* -mvis also implies -mv8plus on 32-bit */
1346 if (TARGET_VIS && ! TARGET_ARCH64)
1347 target_flags |= MASK_V8PLUS;
1349 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1350 if (TARGET_V9 && TARGET_ARCH32)
1351 target_flags |= MASK_DEPRECATED_V8_INSNS;
1353 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1354 if (! TARGET_V9 || TARGET_ARCH64)
1355 target_flags &= ~MASK_V8PLUS;
1357 /* Don't use stack biasing in 32 bit mode. */
1359 target_flags &= ~MASK_STACK_BIAS;
1361 /* Supply a default value for align_functions. */
1362 if (align_functions == 0
1363 && (sparc_cpu == PROCESSOR_ULTRASPARC
1364 || sparc_cpu == PROCESSOR_ULTRASPARC3
1365 || sparc_cpu == PROCESSOR_NIAGARA
1366 || sparc_cpu == PROCESSOR_NIAGARA2
1367 || sparc_cpu == PROCESSOR_NIAGARA3
1368 || sparc_cpu == PROCESSOR_NIAGARA4))
1369 align_functions = 32;
1371 /* Validate PCC_STRUCT_RETURN. */
1372 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1373 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1375 /* Only use .uaxword when compiling for a 64-bit target. */
1377 targetm.asm_out.unaligned_op.di = NULL;
1379 /* Do various machine dependent initializations. */
1380 sparc_init_modes ();
1382 /* Set up function hooks. */
1383 init_machine_status = sparc_init_machine_status;
1388 case PROCESSOR_CYPRESS:
1389 sparc_costs = &cypress_costs;
1392 case PROCESSOR_SPARCLITE:
1393 case PROCESSOR_SUPERSPARC:
1394 sparc_costs = &supersparc_costs;
1396 case PROCESSOR_F930:
1397 case PROCESSOR_F934:
1398 case PROCESSOR_HYPERSPARC:
1399 case PROCESSOR_SPARCLITE86X:
1400 sparc_costs = &hypersparc_costs;
1402 case PROCESSOR_LEON:
1403 sparc_costs = &leon_costs;
1405 case PROCESSOR_LEON3:
1406 sparc_costs = &leon3_costs;
1408 case PROCESSOR_SPARCLET:
1409 case PROCESSOR_TSC701:
1410 sparc_costs = &sparclet_costs;
1413 case PROCESSOR_ULTRASPARC:
1414 sparc_costs = &ultrasparc_costs;
1416 case PROCESSOR_ULTRASPARC3:
1417 sparc_costs = &ultrasparc3_costs;
1419 case PROCESSOR_NIAGARA:
1420 sparc_costs = &niagara_costs;
1422 case PROCESSOR_NIAGARA2:
1423 sparc_costs = &niagara2_costs;
1425 case PROCESSOR_NIAGARA3:
1426 sparc_costs = &niagara3_costs;
1428 case PROCESSOR_NIAGARA4:
1429 sparc_costs = &niagara4_costs;
1431 case PROCESSOR_NATIVE:
1435 if (sparc_memory_model == SMM_DEFAULT)
1437 /* Choose the memory model for the operating system. */
1438 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1439 if (os_default != SMM_DEFAULT)
1440 sparc_memory_model = os_default;
1441 /* Choose the most relaxed model for the processor. */
1443 sparc_memory_model = SMM_RMO;
1445 sparc_memory_model = SMM_PSO;
1447 sparc_memory_model = SMM_SC;
1450 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1451 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1452 target_flags |= MASK_LONG_DOUBLE_128;
1455 if (TARGET_DEBUG_OPTIONS)
1456 dump_target_flags ("Final target_flags", target_flags);
1458 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1459 ((sparc_cpu == PROCESSOR_ULTRASPARC
1460 || sparc_cpu == PROCESSOR_NIAGARA
1461 || sparc_cpu == PROCESSOR_NIAGARA2
1462 || sparc_cpu == PROCESSOR_NIAGARA3
1463 || sparc_cpu == PROCESSOR_NIAGARA4)
1465 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1467 global_options.x_param_values,
1468 global_options_set.x_param_values);
1469 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1470 ((sparc_cpu == PROCESSOR_ULTRASPARC
1471 || sparc_cpu == PROCESSOR_ULTRASPARC3
1472 || sparc_cpu == PROCESSOR_NIAGARA
1473 || sparc_cpu == PROCESSOR_NIAGARA2
1474 || sparc_cpu == PROCESSOR_NIAGARA3
1475 || sparc_cpu == PROCESSOR_NIAGARA4)
1477 global_options.x_param_values,
1478 global_options_set.x_param_values);
1480 /* Disable save slot sharing for call-clobbered registers by default.
1481 The IRA sharing algorithm works on single registers only and this
1482 pessimizes for double floating-point registers. */
1483 if (!global_options_set.x_flag_ira_share_save_slots)
1484 flag_ira_share_save_slots = 0;
1486 /* We register a machine specific pass to work around errata, if any.
1487 The pass mut be scheduled as late as possible so that we have the
1488 (essentially) final form of the insn stream to work on.
1489 Registering the pass must be done at start up. It's convenient to
1491 opt_pass *errata_pass = make_pass_work_around_errata (g);
1492 struct register_pass_info insert_pass_work_around_errata =
1494 errata_pass, /* pass */
1495 "dbr", /* reference_pass_name */
1496 1, /* ref_pass_instance_number */
1497 PASS_POS_INSERT_AFTER /* po_op */
1499 register_pass (&insert_pass_work_around_errata);
1502 /* Miscellaneous utilities. */
1504 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1505 or branch on register contents instructions. */
1508 v9_regcmp_p (enum rtx_code code)
1510 return (code == EQ || code == NE || code == GE || code == LT
1511 || code == LE || code == GT);
1514 /* Nonzero if OP is a floating point constant which can
1515 be loaded into an integer register using a single
1516 sethi instruction. */
1521 if (GET_CODE (op) == CONST_DOUBLE)
1526 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1527 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1528 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1534 /* Nonzero if OP is a floating point constant which can
1535 be loaded into an integer register using a single
1541 if (GET_CODE (op) == CONST_DOUBLE)
1546 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1547 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1548 return SPARC_SIMM13_P (i);
1554 /* Nonzero if OP is a floating point constant which can
1555 be loaded into an integer register using a high/losum
1556 instruction sequence. */
1559 fp_high_losum_p (rtx op)
1561 /* The constraints calling this should only be in
1562 SFmode move insns, so any constant which cannot
1563 be moved using a single insn will do. */
1564 if (GET_CODE (op) == CONST_DOUBLE)
1569 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1570 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1571 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1577 /* Return true if the address of LABEL can be loaded by means of the
1578 mov{si,di}_pic_label_ref patterns in PIC mode. */
1581 can_use_mov_pic_label_ref (rtx label)
1583 /* VxWorks does not impose a fixed gap between segments; the run-time
1584 gap can be different from the object-file gap. We therefore can't
1585 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1586 are absolutely sure that X is in the same segment as the GOT.
1587 Unfortunately, the flexibility of linker scripts means that we
1588 can't be sure of that in general, so assume that GOT-relative
1589 accesses are never valid on VxWorks. */
1590 if (TARGET_VXWORKS_RTP)
1593 /* Similarly, if the label is non-local, it might end up being placed
1594 in a different section than the current one; now mov_pic_label_ref
1595 requires the label and the code to be in the same section. */
1596 if (LABEL_REF_NONLOCAL_P (label))
1599 /* Finally, if we are reordering basic blocks and partition into hot
1600 and cold sections, this might happen for any label. */
1601 if (flag_reorder_blocks_and_partition)
1607 /* Expand a move instruction. Return true if all work is done. */
1610 sparc_expand_move (enum machine_mode mode, rtx *operands)
1612 /* Handle sets of MEM first. */
1613 if (GET_CODE (operands[0]) == MEM)
1615 /* 0 is a register (or a pair of registers) on SPARC. */
1616 if (register_or_zero_operand (operands[1], mode))
1619 if (!reload_in_progress)
1621 operands[0] = validize_mem (operands[0]);
1622 operands[1] = force_reg (mode, operands[1]);
1626 /* Fixup TLS cases. */
1628 && CONSTANT_P (operands[1])
1629 && sparc_tls_referenced_p (operands [1]))
1631 operands[1] = sparc_legitimize_tls_address (operands[1]);
1635 /* Fixup PIC cases. */
1636 if (flag_pic && CONSTANT_P (operands[1]))
1638 if (pic_address_needs_scratch (operands[1]))
1639 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1641 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1642 if (GET_CODE (operands[1]) == LABEL_REF
1643 && can_use_mov_pic_label_ref (operands[1]))
1647 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1653 gcc_assert (TARGET_ARCH64);
1654 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1659 if (symbolic_operand (operands[1], mode))
1662 = sparc_legitimize_pic_address (operands[1],
1664 ? operands[0] : NULL_RTX);
1669 /* If we are trying to toss an integer constant into FP registers,
1670 or loading a FP or vector constant, force it into memory. */
1671 if (CONSTANT_P (operands[1])
1672 && REG_P (operands[0])
1673 && (SPARC_FP_REG_P (REGNO (operands[0]))
1674 || SCALAR_FLOAT_MODE_P (mode)
1675 || VECTOR_MODE_P (mode)))
1677 /* emit_group_store will send such bogosity to us when it is
1678 not storing directly into memory. So fix this up to avoid
1679 crashes in output_constant_pool. */
1680 if (operands [1] == const0_rtx)
1681 operands[1] = CONST0_RTX (mode);
1683 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1684 always other regs. */
1685 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1686 && (const_zero_operand (operands[1], mode)
1687 || const_all_ones_operand (operands[1], mode)))
1690 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1691 /* We are able to build any SF constant in integer registers
1692 with at most 2 instructions. */
1694 /* And any DF constant in integer registers. */
1696 && ! can_create_pseudo_p ())))
1699 operands[1] = force_const_mem (mode, operands[1]);
1700 if (!reload_in_progress)
1701 operands[1] = validize_mem (operands[1]);
1705 /* Accept non-constants and valid constants unmodified. */
1706 if (!CONSTANT_P (operands[1])
1707 || GET_CODE (operands[1]) == HIGH
1708 || input_operand (operands[1], mode))
1714 /* All QImode constants require only one insn, so proceed. */
1719 sparc_emit_set_const32 (operands[0], operands[1]);
1723 /* input_operand should have filtered out 32-bit mode. */
1724 sparc_emit_set_const64 (operands[0], operands[1]);
1730 /* TImode isn't available in 32-bit mode. */
1731 split_double (operands[1], &high, &low);
1732 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1734 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1746 /* Load OP1, a 32-bit constant, into OP0, a register.
1747 We know it can't be done in one insn when we get
1748 here, the move expander guarantees this. */
1751 sparc_emit_set_const32 (rtx op0, rtx op1)
1753 enum machine_mode mode = GET_MODE (op0);
1756 if (can_create_pseudo_p ())
1757 temp = gen_reg_rtx (mode);
1759 if (GET_CODE (op1) == CONST_INT)
1761 gcc_assert (!small_int_operand (op1, mode)
1762 && !const_high_operand (op1, mode));
1764 /* Emit them as real moves instead of a HIGH/LO_SUM,
1765 this way CSE can see everything and reuse intermediate
1766 values if it wants. */
1767 emit_insn (gen_rtx_SET (VOIDmode, temp,
1768 GEN_INT (INTVAL (op1)
1769 & ~(HOST_WIDE_INT)0x3ff)));
1771 emit_insn (gen_rtx_SET (VOIDmode,
1773 gen_rtx_IOR (mode, temp,
1774 GEN_INT (INTVAL (op1) & 0x3ff))));
1778 /* A symbol, emit in the traditional way. */
1779 emit_insn (gen_rtx_SET (VOIDmode, temp,
1780 gen_rtx_HIGH (mode, op1)));
1781 emit_insn (gen_rtx_SET (VOIDmode,
1782 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1786 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1787 If TEMP is nonzero, we are forbidden to use any other scratch
1788 registers. Otherwise, we are allowed to generate them as needed.
1790 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1791 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1794 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1796 rtx temp1, temp2, temp3, temp4, temp5;
1799 if (temp && GET_MODE (temp) == TImode)
1802 temp = gen_rtx_REG (DImode, REGNO (temp));
1805 /* SPARC-V9 code-model support. */
1806 switch (sparc_cmodel)
1809 /* The range spanned by all instructions in the object is less
1810 than 2^31 bytes (2GB) and the distance from any instruction
1811 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1812 than 2^31 bytes (2GB).
1814 The executable must be in the low 4TB of the virtual address
1817 sethi %hi(symbol), %temp1
1818 or %temp1, %lo(symbol), %reg */
1820 temp1 = temp; /* op0 is allowed. */
1822 temp1 = gen_reg_rtx (DImode);
1824 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1825 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1829 /* The range spanned by all instructions in the object is less
1830 than 2^31 bytes (2GB) and the distance from any instruction
1831 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1832 than 2^31 bytes (2GB).
1834 The executable must be in the low 16TB of the virtual address
1837 sethi %h44(symbol), %temp1
1838 or %temp1, %m44(symbol), %temp2
1839 sllx %temp2, 12, %temp3
1840 or %temp3, %l44(symbol), %reg */
1845 temp3 = temp; /* op0 is allowed. */
1849 temp1 = gen_reg_rtx (DImode);
1850 temp2 = gen_reg_rtx (DImode);
1851 temp3 = gen_reg_rtx (DImode);
1854 emit_insn (gen_seth44 (temp1, op1));
1855 emit_insn (gen_setm44 (temp2, temp1, op1));
1856 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1857 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1858 emit_insn (gen_setl44 (op0, temp3, op1));
1862 /* The range spanned by all instructions in the object is less
1863 than 2^31 bytes (2GB) and the distance from any instruction
1864 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1865 than 2^31 bytes (2GB).
1867 The executable can be placed anywhere in the virtual address
1870 sethi %hh(symbol), %temp1
1871 sethi %lm(symbol), %temp2
1872 or %temp1, %hm(symbol), %temp3
1873 sllx %temp3, 32, %temp4
1874 or %temp4, %temp2, %temp5
1875 or %temp5, %lo(symbol), %reg */
1878 /* It is possible that one of the registers we got for operands[2]
1879 might coincide with that of operands[0] (which is why we made
1880 it TImode). Pick the other one to use as our scratch. */
1881 if (rtx_equal_p (temp, op0))
1883 gcc_assert (ti_temp);
1884 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1887 temp2 = temp; /* op0 is _not_ allowed, see above. */
1894 temp1 = gen_reg_rtx (DImode);
1895 temp2 = gen_reg_rtx (DImode);
1896 temp3 = gen_reg_rtx (DImode);
1897 temp4 = gen_reg_rtx (DImode);
1898 temp5 = gen_reg_rtx (DImode);
1901 emit_insn (gen_sethh (temp1, op1));
1902 emit_insn (gen_setlm (temp2, op1));
1903 emit_insn (gen_sethm (temp3, temp1, op1));
1904 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1905 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1906 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1907 gen_rtx_PLUS (DImode, temp4, temp2)));
1908 emit_insn (gen_setlo (op0, temp5, op1));
1912 /* Old old old backwards compatibility kruft here.
1913 Essentially it is MEDLOW with a fixed 64-bit
1914 virtual base added to all data segment addresses.
1915 Text-segment stuff is computed like MEDANY, we can't
1916 reuse the code above because the relocation knobs
1919 Data segment: sethi %hi(symbol), %temp1
1920 add %temp1, EMBMEDANY_BASE_REG, %temp2
1921 or %temp2, %lo(symbol), %reg */
1922 if (data_segment_operand (op1, GET_MODE (op1)))
1926 temp1 = temp; /* op0 is allowed. */
1931 temp1 = gen_reg_rtx (DImode);
1932 temp2 = gen_reg_rtx (DImode);
1935 emit_insn (gen_embmedany_sethi (temp1, op1));
1936 emit_insn (gen_embmedany_brsum (temp2, temp1));
1937 emit_insn (gen_embmedany_losum (op0, temp2, op1));
1940 /* Text segment: sethi %uhi(symbol), %temp1
1941 sethi %hi(symbol), %temp2
1942 or %temp1, %ulo(symbol), %temp3
1943 sllx %temp3, 32, %temp4
1944 or %temp4, %temp2, %temp5
1945 or %temp5, %lo(symbol), %reg */
1950 /* It is possible that one of the registers we got for operands[2]
1951 might coincide with that of operands[0] (which is why we made
1952 it TImode). Pick the other one to use as our scratch. */
1953 if (rtx_equal_p (temp, op0))
1955 gcc_assert (ti_temp);
1956 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1959 temp2 = temp; /* op0 is _not_ allowed, see above. */
1966 temp1 = gen_reg_rtx (DImode);
1967 temp2 = gen_reg_rtx (DImode);
1968 temp3 = gen_reg_rtx (DImode);
1969 temp4 = gen_reg_rtx (DImode);
1970 temp5 = gen_reg_rtx (DImode);
1973 emit_insn (gen_embmedany_textuhi (temp1, op1));
1974 emit_insn (gen_embmedany_texthi (temp2, op1));
1975 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
1976 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1977 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1978 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1979 gen_rtx_PLUS (DImode, temp4, temp2)));
1980 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
1989 #if HOST_BITS_PER_WIDE_INT == 32
1991 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
1996 /* These avoid problems when cross compiling. If we do not
1997 go through all this hair then the optimizer will see
1998 invalid REG_EQUAL notes or in some cases none at all. */
1999 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2000 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2001 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2002 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2004 /* The optimizer is not to assume anything about exactly
2005 which bits are set for a HIGH, they are unspecified.
2006 Unfortunately this leads to many missed optimizations
2007 during CSE. We mask out the non-HIGH bits, and matches
2008 a plain movdi, to alleviate this problem. */
2010 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2012 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2016 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2018 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
2022 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2024 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2028 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2030 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2033 /* Worker routines for 64-bit constant formation on arch64.
2034 One of the key things to be doing in these emissions is
2035 to create as many temp REGs as possible. This makes it
2036 possible for half-built constants to be used later when
2037 such values are similar to something required later on.
2038 Without doing this, the optimizer cannot see such
2041 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2042 unsigned HOST_WIDE_INT, int);
2045 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2046 unsigned HOST_WIDE_INT low_bits, int is_neg)
2048 unsigned HOST_WIDE_INT high_bits;
2051 high_bits = (~low_bits) & 0xffffffff;
2053 high_bits = low_bits;
2055 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2058 emit_insn (gen_rtx_SET (VOIDmode, op0,
2059 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2063 /* If we are XOR'ing with -1, then we should emit a one's complement
2064 instead. This way the combiner will notice logical operations
2065 such as ANDN later on and substitute. */
2066 if ((low_bits & 0x3ff) == 0x3ff)
2068 emit_insn (gen_rtx_SET (VOIDmode, op0,
2069 gen_rtx_NOT (DImode, temp)));
2073 emit_insn (gen_rtx_SET (VOIDmode, op0,
2074 gen_safe_XOR64 (temp,
2075 (-(HOST_WIDE_INT)0x400
2076 | (low_bits & 0x3ff)))));
2081 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2082 unsigned HOST_WIDE_INT, int);
2085 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2086 unsigned HOST_WIDE_INT high_bits,
2087 unsigned HOST_WIDE_INT low_immediate,
2092 if ((high_bits & 0xfffffc00) != 0)
2094 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2095 if ((high_bits & ~0xfffffc00) != 0)
2096 emit_insn (gen_rtx_SET (VOIDmode, op0,
2097 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2103 emit_insn (gen_safe_SET64 (temp, high_bits));
2107 /* Now shift it up into place. */
2108 emit_insn (gen_rtx_SET (VOIDmode, op0,
2109 gen_rtx_ASHIFT (DImode, temp2,
2110 GEN_INT (shift_count))));
2112 /* If there is a low immediate part piece, finish up by
2113 putting that in as well. */
2114 if (low_immediate != 0)
2115 emit_insn (gen_rtx_SET (VOIDmode, op0,
2116 gen_safe_OR64 (op0, low_immediate)));
2119 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2120 unsigned HOST_WIDE_INT);
2122 /* Full 64-bit constant decomposition. Even though this is the
2123 'worst' case, we still optimize a few things away. */
2125 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2126 unsigned HOST_WIDE_INT high_bits,
2127 unsigned HOST_WIDE_INT low_bits)
2131 if (can_create_pseudo_p ())
2132 sub_temp = gen_reg_rtx (DImode);
2134 if ((high_bits & 0xfffffc00) != 0)
2136 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2137 if ((high_bits & ~0xfffffc00) != 0)
2138 emit_insn (gen_rtx_SET (VOIDmode,
2140 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2146 emit_insn (gen_safe_SET64 (temp, high_bits));
2150 if (can_create_pseudo_p ())
2152 rtx temp2 = gen_reg_rtx (DImode);
2153 rtx temp3 = gen_reg_rtx (DImode);
2154 rtx temp4 = gen_reg_rtx (DImode);
2156 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2157 gen_rtx_ASHIFT (DImode, sub_temp,
2160 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2161 if ((low_bits & ~0xfffffc00) != 0)
2163 emit_insn (gen_rtx_SET (VOIDmode, temp3,
2164 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2165 emit_insn (gen_rtx_SET (VOIDmode, op0,
2166 gen_rtx_PLUS (DImode, temp4, temp3)));
2170 emit_insn (gen_rtx_SET (VOIDmode, op0,
2171 gen_rtx_PLUS (DImode, temp4, temp2)));
2176 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2177 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2178 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2181 /* We are in the middle of reload, so this is really
2182 painful. However we do still make an attempt to
2183 avoid emitting truly stupid code. */
2184 if (low1 != const0_rtx)
2186 emit_insn (gen_rtx_SET (VOIDmode, op0,
2187 gen_rtx_ASHIFT (DImode, sub_temp,
2188 GEN_INT (to_shift))));
2189 emit_insn (gen_rtx_SET (VOIDmode, op0,
2190 gen_rtx_IOR (DImode, op0, low1)));
2198 if (low2 != const0_rtx)
2200 emit_insn (gen_rtx_SET (VOIDmode, op0,
2201 gen_rtx_ASHIFT (DImode, sub_temp,
2202 GEN_INT (to_shift))));
2203 emit_insn (gen_rtx_SET (VOIDmode, op0,
2204 gen_rtx_IOR (DImode, op0, low2)));
2212 emit_insn (gen_rtx_SET (VOIDmode, op0,
2213 gen_rtx_ASHIFT (DImode, sub_temp,
2214 GEN_INT (to_shift))));
2215 if (low3 != const0_rtx)
2216 emit_insn (gen_rtx_SET (VOIDmode, op0,
2217 gen_rtx_IOR (DImode, op0, low3)));
2222 /* Analyze a 64-bit constant for certain properties. */
2223 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2224 unsigned HOST_WIDE_INT,
2225 int *, int *, int *);
2228 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2229 unsigned HOST_WIDE_INT low_bits,
2230 int *hbsp, int *lbsp, int *abbasp)
2232 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2235 lowest_bit_set = highest_bit_set = -1;
2239 if ((lowest_bit_set == -1)
2240 && ((low_bits >> i) & 1))
2242 if ((highest_bit_set == -1)
2243 && ((high_bits >> (32 - i - 1)) & 1))
2244 highest_bit_set = (64 - i - 1);
2247 && ((highest_bit_set == -1)
2248 || (lowest_bit_set == -1)));
2254 if ((lowest_bit_set == -1)
2255 && ((high_bits >> i) & 1))
2256 lowest_bit_set = i + 32;
2257 if ((highest_bit_set == -1)
2258 && ((low_bits >> (32 - i - 1)) & 1))
2259 highest_bit_set = 32 - i - 1;
2262 && ((highest_bit_set == -1)
2263 || (lowest_bit_set == -1)));
2265 /* If there are no bits set this should have gone out
2266 as one instruction! */
2267 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2268 all_bits_between_are_set = 1;
2269 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2273 if ((low_bits & (1 << i)) != 0)
2278 if ((high_bits & (1 << (i - 32))) != 0)
2281 all_bits_between_are_set = 0;
2284 *hbsp = highest_bit_set;
2285 *lbsp = lowest_bit_set;
2286 *abbasp = all_bits_between_are_set;
2289 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2292 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2293 unsigned HOST_WIDE_INT low_bits)
2295 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2298 || high_bits == 0xffffffff)
2301 analyze_64bit_constant (high_bits, low_bits,
2302 &highest_bit_set, &lowest_bit_set,
2303 &all_bits_between_are_set);
2305 if ((highest_bit_set == 63
2306 || lowest_bit_set == 0)
2307 && all_bits_between_are_set != 0)
2310 if ((highest_bit_set - lowest_bit_set) < 21)
2316 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2317 unsigned HOST_WIDE_INT,
2320 static unsigned HOST_WIDE_INT
2321 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2322 unsigned HOST_WIDE_INT low_bits,
2323 int lowest_bit_set, int shift)
2325 HOST_WIDE_INT hi, lo;
2327 if (lowest_bit_set < 32)
2329 lo = (low_bits >> lowest_bit_set) << shift;
2330 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2335 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2337 gcc_assert (! (hi & lo));
2341 /* Here we are sure to be arch64 and this is an integer constant
2342 being loaded into a register. Emit the most efficient
2343 insn sequence possible. Detection of all the 1-insn cases
2344 has been done already. */
2346 sparc_emit_set_const64 (rtx op0, rtx op1)
2348 unsigned HOST_WIDE_INT high_bits, low_bits;
2349 int lowest_bit_set, highest_bit_set;
2350 int all_bits_between_are_set;
2353 /* Sanity check that we know what we are working with. */
2354 gcc_assert (TARGET_ARCH64
2355 && (GET_CODE (op0) == SUBREG
2356 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2358 if (! can_create_pseudo_p ())
2361 if (GET_CODE (op1) != CONST_INT)
2363 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2368 temp = gen_reg_rtx (DImode);
2370 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2371 low_bits = (INTVAL (op1) & 0xffffffff);
2373 /* low_bits bits 0 --> 31
2374 high_bits bits 32 --> 63 */
2376 analyze_64bit_constant (high_bits, low_bits,
2377 &highest_bit_set, &lowest_bit_set,
2378 &all_bits_between_are_set);
2380 /* First try for a 2-insn sequence. */
2382 /* These situations are preferred because the optimizer can
2383 * do more things with them:
2385 * sllx %reg, shift, %reg
2387 * srlx %reg, shift, %reg
2388 * 3) mov some_small_const, %reg
2389 * sllx %reg, shift, %reg
2391 if (((highest_bit_set == 63
2392 || lowest_bit_set == 0)
2393 && all_bits_between_are_set != 0)
2394 || ((highest_bit_set - lowest_bit_set) < 12))
2396 HOST_WIDE_INT the_const = -1;
2397 int shift = lowest_bit_set;
2399 if ((highest_bit_set != 63
2400 && lowest_bit_set != 0)
2401 || all_bits_between_are_set == 0)
2404 create_simple_focus_bits (high_bits, low_bits,
2407 else if (lowest_bit_set == 0)
2408 shift = -(63 - highest_bit_set);
2410 gcc_assert (SPARC_SIMM13_P (the_const));
2411 gcc_assert (shift != 0);
2413 emit_insn (gen_safe_SET64 (temp, the_const));
2415 emit_insn (gen_rtx_SET (VOIDmode,
2417 gen_rtx_ASHIFT (DImode,
2421 emit_insn (gen_rtx_SET (VOIDmode,
2423 gen_rtx_LSHIFTRT (DImode,
2425 GEN_INT (-shift))));
2429 /* Now a range of 22 or less bits set somewhere.
2430 * 1) sethi %hi(focus_bits), %reg
2431 * sllx %reg, shift, %reg
2432 * 2) sethi %hi(focus_bits), %reg
2433 * srlx %reg, shift, %reg
2435 if ((highest_bit_set - lowest_bit_set) < 21)
2437 unsigned HOST_WIDE_INT focus_bits =
2438 create_simple_focus_bits (high_bits, low_bits,
2439 lowest_bit_set, 10);
2441 gcc_assert (SPARC_SETHI_P (focus_bits));
2442 gcc_assert (lowest_bit_set != 10);
2444 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2446 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2447 if (lowest_bit_set < 10)
2448 emit_insn (gen_rtx_SET (VOIDmode,
2450 gen_rtx_LSHIFTRT (DImode, temp,
2451 GEN_INT (10 - lowest_bit_set))));
2452 else if (lowest_bit_set > 10)
2453 emit_insn (gen_rtx_SET (VOIDmode,
2455 gen_rtx_ASHIFT (DImode, temp,
2456 GEN_INT (lowest_bit_set - 10))));
2460 /* 1) sethi %hi(low_bits), %reg
2461 * or %reg, %lo(low_bits), %reg
2462 * 2) sethi %hi(~low_bits), %reg
2463 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2466 || high_bits == 0xffffffff)
2468 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2469 (high_bits == 0xffffffff));
2473 /* Now, try 3-insn sequences. */
2475 /* 1) sethi %hi(high_bits), %reg
2476 * or %reg, %lo(high_bits), %reg
2477 * sllx %reg, 32, %reg
2481 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2485 /* We may be able to do something quick
2486 when the constant is negated, so try that. */
2487 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2488 (~low_bits) & 0xfffffc00))
2490 /* NOTE: The trailing bits get XOR'd so we need the
2491 non-negated bits, not the negated ones. */
2492 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2494 if ((((~high_bits) & 0xffffffff) == 0
2495 && ((~low_bits) & 0x80000000) == 0)
2496 || (((~high_bits) & 0xffffffff) == 0xffffffff
2497 && ((~low_bits) & 0x80000000) != 0))
2499 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2501 if ((SPARC_SETHI_P (fast_int)
2502 && (~high_bits & 0xffffffff) == 0)
2503 || SPARC_SIMM13_P (fast_int))
2504 emit_insn (gen_safe_SET64 (temp, fast_int));
2506 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2511 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2512 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2513 sparc_emit_set_const64 (temp, negated_const);
2516 /* If we are XOR'ing with -1, then we should emit a one's complement
2517 instead. This way the combiner will notice logical operations
2518 such as ANDN later on and substitute. */
2519 if (trailing_bits == 0x3ff)
2521 emit_insn (gen_rtx_SET (VOIDmode, op0,
2522 gen_rtx_NOT (DImode, temp)));
2526 emit_insn (gen_rtx_SET (VOIDmode,
2528 gen_safe_XOR64 (temp,
2529 (-0x400 | trailing_bits))));
2534 /* 1) sethi %hi(xxx), %reg
2535 * or %reg, %lo(xxx), %reg
2536 * sllx %reg, yyy, %reg
2538 * ??? This is just a generalized version of the low_bits==0
2539 * thing above, FIXME...
2541 if ((highest_bit_set - lowest_bit_set) < 32)
2543 unsigned HOST_WIDE_INT focus_bits =
2544 create_simple_focus_bits (high_bits, low_bits,
2547 /* We can't get here in this state. */
2548 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2550 /* So what we know is that the set bits straddle the
2551 middle of the 64-bit word. */
2552 sparc_emit_set_const64_quick2 (op0, temp,
2558 /* 1) sethi %hi(high_bits), %reg
2559 * or %reg, %lo(high_bits), %reg
2560 * sllx %reg, 32, %reg
2561 * or %reg, low_bits, %reg
2563 if (SPARC_SIMM13_P(low_bits)
2564 && ((int)low_bits > 0))
2566 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2570 /* The easiest way when all else fails, is full decomposition. */
2571 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2573 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2575 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2576 return the mode to be used for the comparison. For floating-point,
2577 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2578 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2579 processing is needed. */
2582 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2584 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2610 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2611 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2613 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2614 return CCX_NOOVmode;
2620 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2627 /* Emit the compare insn and return the CC reg for a CODE comparison
2628 with operands X and Y. */
2631 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2633 enum machine_mode mode;
2636 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2639 mode = SELECT_CC_MODE (code, x, y);
2641 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2642 fcc regs (cse can't tell they're really call clobbered regs and will
2643 remove a duplicate comparison even if there is an intervening function
2644 call - it will then try to reload the cc reg via an int reg which is why
2645 we need the movcc patterns). It is possible to provide the movcc
2646 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2647 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2648 to tell cse that CCFPE mode registers (even pseudos) are call
2651 /* ??? This is an experiment. Rather than making changes to cse which may
2652 or may not be easy/clean, we do our own cse. This is possible because
2653 we will generate hard registers. Cse knows they're call clobbered (it
2654 doesn't know the same thing about pseudos). If we guess wrong, no big
2655 deal, but if we win, great! */
2657 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2658 #if 1 /* experiment */
2661 /* We cycle through the registers to ensure they're all exercised. */
2662 static int next_fcc_reg = 0;
2663 /* Previous x,y for each fcc reg. */
2664 static rtx prev_args[4][2];
2666 /* Scan prev_args for x,y. */
2667 for (reg = 0; reg < 4; reg++)
2668 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2673 prev_args[reg][0] = x;
2674 prev_args[reg][1] = y;
2675 next_fcc_reg = (next_fcc_reg + 1) & 3;
2677 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2680 cc_reg = gen_reg_rtx (mode);
2681 #endif /* ! experiment */
2682 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2683 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2685 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2687 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2688 will only result in an unrecognizable insn so no point in asserting. */
2689 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2695 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2698 gen_compare_reg (rtx cmp)
2700 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2703 /* This function is used for v9 only.
2704 DEST is the target of the Scc insn.
2705 CODE is the code for an Scc's comparison.
2706 X and Y are the values we compare.
2708 This function is needed to turn
2711 (gt (reg:CCX 100 %icc)
2715 (gt:DI (reg:CCX 100 %icc)
2718 IE: The instruction recognizer needs to see the mode of the comparison to
2719 find the right instruction. We could use "gt:DI" right in the
2720 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2723 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2726 && (GET_MODE (x) == DImode
2727 || GET_MODE (dest) == DImode))
2730 /* Try to use the movrCC insns. */
2732 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2734 && v9_regcmp_p (compare_code))
2739 /* Special case for op0 != 0. This can be done with one instruction if
2742 if (compare_code == NE
2743 && GET_MODE (dest) == DImode
2744 && rtx_equal_p (op0, dest))
2746 emit_insn (gen_rtx_SET (VOIDmode, dest,
2747 gen_rtx_IF_THEN_ELSE (DImode,
2748 gen_rtx_fmt_ee (compare_code, DImode,
2755 if (reg_overlap_mentioned_p (dest, op0))
2757 /* Handle the case where dest == x.
2758 We "early clobber" the result. */
2759 op0 = gen_reg_rtx (GET_MODE (x));
2760 emit_move_insn (op0, x);
2763 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2764 if (GET_MODE (op0) != DImode)
2766 temp = gen_reg_rtx (DImode);
2767 convert_move (temp, op0, 0);
2771 emit_insn (gen_rtx_SET (VOIDmode, dest,
2772 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2773 gen_rtx_fmt_ee (compare_code, DImode,
2781 x = gen_compare_reg_1 (compare_code, x, y);
2784 gcc_assert (GET_MODE (x) != CC_NOOVmode
2785 && GET_MODE (x) != CCX_NOOVmode);
2787 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2788 emit_insn (gen_rtx_SET (VOIDmode, dest,
2789 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2790 gen_rtx_fmt_ee (compare_code,
2791 GET_MODE (x), x, y),
2792 const1_rtx, dest)));
2798 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2799 without jumps using the addx/subx instructions. */
2802 emit_scc_insn (rtx operands[])
2809 /* The quad-word fp compare library routines all return nonzero to indicate
2810 true, which is different from the equivalent libgcc routines, so we must
2811 handle them specially here. */
2812 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2814 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2815 GET_CODE (operands[1]));
2816 operands[2] = XEXP (operands[1], 0);
2817 operands[3] = XEXP (operands[1], 1);
2820 code = GET_CODE (operands[1]);
2824 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2825 more applications). The exception to this is "reg != 0" which can
2826 be done in one instruction on v9 (so we do it). */
2829 if (GET_MODE (x) == SImode)
2833 pat = gen_seqsidi_special (operands[0], x, y);
2835 pat = gen_seqsisi_special (operands[0], x, y);
2839 else if (GET_MODE (x) == DImode)
2841 rtx pat = gen_seqdi_special (operands[0], x, y);
2849 if (GET_MODE (x) == SImode)
2853 pat = gen_snesidi_special (operands[0], x, y);
2855 pat = gen_snesisi_special (operands[0], x, y);
2859 else if (GET_MODE (x) == DImode)
2863 pat = gen_snedi_special_vis3 (operands[0], x, y);
2865 pat = gen_snedi_special (operands[0], x, y);
2873 && GET_MODE (x) == DImode
2875 && (code == GTU || code == LTU))
2876 && gen_v9_scc (operands[0], code, x, y))
2879 /* We can do LTU and GEU using the addx/subx instructions too. And
2880 for GTU/LEU, if both operands are registers swap them and fall
2881 back to the easy case. */
2882 if (code == GTU || code == LEU)
2884 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2885 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2890 code = swap_condition (code);
2895 || (!TARGET_VIS3 && code == GEU))
2897 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2898 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2899 gen_compare_reg_1 (code, x, y),
2904 /* All the posibilities to use addx/subx based sequences has been
2905 exhausted, try for a 3 instruction sequence using v9 conditional
2907 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
2910 /* Nope, do branches. */
2914 /* Emit a conditional jump insn for the v9 architecture using comparison code
2915 CODE and jump target LABEL.
2916 This function exists to take advantage of the v9 brxx insns. */
2919 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2921 emit_jump_insn (gen_rtx_SET (VOIDmode,
2923 gen_rtx_IF_THEN_ELSE (VOIDmode,
2924 gen_rtx_fmt_ee (code, GET_MODE (op0),
2926 gen_rtx_LABEL_REF (VOIDmode, label),
2930 /* Emit a conditional jump insn for the UA2011 architecture using
2931 comparison code CODE and jump target LABEL. This function exists
2932 to take advantage of the UA2011 Compare and Branch insns. */
2935 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
2939 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
2940 gen_rtx_fmt_ee(code, GET_MODE(op0),
2942 gen_rtx_LABEL_REF (VOIDmode, label),
2945 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
2949 emit_conditional_branch_insn (rtx operands[])
2951 /* The quad-word fp compare library routines all return nonzero to indicate
2952 true, which is different from the equivalent libgcc routines, so we must
2953 handle them specially here. */
2954 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
2956 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
2957 GET_CODE (operands[0]));
2958 operands[1] = XEXP (operands[0], 0);
2959 operands[2] = XEXP (operands[0], 1);
2962 /* If we can tell early on that the comparison is against a constant
2963 that won't fit in the 5-bit signed immediate field of a cbcond,
2964 use one of the other v9 conditional branch sequences. */
2966 && GET_CODE (operands[1]) == REG
2967 && (GET_MODE (operands[1]) == SImode
2968 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
2969 && (GET_CODE (operands[2]) != CONST_INT
2970 || SPARC_SIMM5_P (INTVAL (operands[2]))))
2972 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
2976 if (TARGET_ARCH64 && operands[2] == const0_rtx
2977 && GET_CODE (operands[1]) == REG
2978 && GET_MODE (operands[1]) == DImode)
2980 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
2984 operands[1] = gen_compare_reg (operands[0]);
2985 operands[2] = const0_rtx;
2986 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
2987 operands[1], operands[2]);
2988 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
2993 /* Generate a DFmode part of a hard TFmode register.
2994 REG is the TFmode hard register, LOW is 1 for the
2995 low 64bit of the register and 0 otherwise.
2998 gen_df_reg (rtx reg, int low)
3000 int regno = REGNO (reg);
3002 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3003 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3004 return gen_rtx_REG (DFmode, regno);
3007 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3008 Unlike normal calls, TFmode operands are passed by reference. It is
3009 assumed that no more than 3 operands are required. */
3012 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3014 rtx ret_slot = NULL, arg[3], func_sym;
3017 /* We only expect to be called for conversions, unary, and binary ops. */
3018 gcc_assert (nargs == 2 || nargs == 3);
3020 for (i = 0; i < nargs; ++i)
3022 rtx this_arg = operands[i];
3025 /* TFmode arguments and return values are passed by reference. */
3026 if (GET_MODE (this_arg) == TFmode)
3028 int force_stack_temp;
3030 force_stack_temp = 0;
3031 if (TARGET_BUGGY_QP_LIB && i == 0)
3032 force_stack_temp = 1;
3034 if (GET_CODE (this_arg) == MEM
3035 && ! force_stack_temp)
3037 tree expr = MEM_EXPR (this_arg);
3039 mark_addressable (expr);
3040 this_arg = XEXP (this_arg, 0);
3042 else if (CONSTANT_P (this_arg)
3043 && ! force_stack_temp)
3045 this_slot = force_const_mem (TFmode, this_arg);
3046 this_arg = XEXP (this_slot, 0);
3050 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3052 /* Operand 0 is the return value. We'll copy it out later. */
3054 emit_move_insn (this_slot, this_arg);
3056 ret_slot = this_slot;
3058 this_arg = XEXP (this_slot, 0);
3065 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3067 if (GET_MODE (operands[0]) == TFmode)
3070 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3071 arg[0], GET_MODE (arg[0]),
3072 arg[1], GET_MODE (arg[1]));
3074 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3075 arg[0], GET_MODE (arg[0]),
3076 arg[1], GET_MODE (arg[1]),
3077 arg[2], GET_MODE (arg[2]));
3080 emit_move_insn (operands[0], ret_slot);
3086 gcc_assert (nargs == 2);
3088 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3089 GET_MODE (operands[0]), 1,
3090 arg[1], GET_MODE (arg[1]));
3092 if (ret != operands[0])
3093 emit_move_insn (operands[0], ret);
3097 /* Expand soft-float TFmode calls to sparc abi routines. */
3100 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3122 emit_soft_tfmode_libcall (func, 3, operands);
3126 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3130 gcc_assert (code == SQRT);
3133 emit_soft_tfmode_libcall (func, 2, operands);
3137 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3144 switch (GET_MODE (operands[1]))
3157 case FLOAT_TRUNCATE:
3158 switch (GET_MODE (operands[0]))
3172 switch (GET_MODE (operands[1]))
3177 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3187 case UNSIGNED_FLOAT:
3188 switch (GET_MODE (operands[1]))
3193 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3204 switch (GET_MODE (operands[0]))
3218 switch (GET_MODE (operands[0]))
3235 emit_soft_tfmode_libcall (func, 2, operands);
3238 /* Expand a hard-float tfmode operation. All arguments must be in
3242 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3246 if (GET_RTX_CLASS (code) == RTX_UNARY)
3248 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3249 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3253 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3254 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3255 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3256 operands[1], operands[2]);
3259 if (register_operand (operands[0], VOIDmode))
3262 dest = gen_reg_rtx (GET_MODE (operands[0]));
3264 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
3266 if (dest != operands[0])
3267 emit_move_insn (operands[0], dest);
3271 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3273 if (TARGET_HARD_QUAD)
3274 emit_hard_tfmode_operation (code, operands);
3276 emit_soft_tfmode_binop (code, operands);
3280 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3282 if (TARGET_HARD_QUAD)
3283 emit_hard_tfmode_operation (code, operands);
3285 emit_soft_tfmode_unop (code, operands);
3289 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3291 if (TARGET_HARD_QUAD)
3292 emit_hard_tfmode_operation (code, operands);
3294 emit_soft_tfmode_cvt (code, operands);
3297 /* Return nonzero if a branch/jump/call instruction will be emitting
3298 nop into its delay slot. */
3301 empty_delay_slot (rtx insn)
3305 /* If no previous instruction (should not happen), return true. */
3306 if (PREV_INSN (insn) == NULL)
3309 seq = NEXT_INSN (PREV_INSN (insn));
3310 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3316 /* Return nonzero if we should emit a nop after a cbcond instruction.
3317 The cbcond instruction does not have a delay slot, however there is
3318 a severe performance penalty if a control transfer appears right
3319 after a cbcond. Therefore we emit a nop when we detect this
3323 emit_cbcond_nop (rtx insn)
3325 rtx next = next_active_insn (insn);
3330 if (NONJUMP_INSN_P (next)
3331 && GET_CODE (PATTERN (next)) == SEQUENCE)
3332 next = XVECEXP (PATTERN (next), 0, 0);
3333 else if (CALL_P (next)
3334 && GET_CODE (PATTERN (next)) == PARALLEL)
3336 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3338 if (GET_CODE (delay) == RETURN)
3340 /* It's a sibling call. Do not emit the nop if we're going
3341 to emit something other than the jump itself as the first
3342 instruction of the sibcall sequence. */
3343 if (sparc_leaf_function_p || TARGET_FLAT)
3348 if (NONJUMP_INSN_P (next))
3354 /* Return nonzero if TRIAL can go into the call delay slot. */
3357 tls_call_delay (rtx trial)
3362 call __tls_get_addr, %tgd_call (foo)
3363 add %l7, %o0, %o0, %tgd_add (foo)
3364 while Sun as/ld does not. */
3365 if (TARGET_GNU_TLS || !TARGET_TLS)
3368 pat = PATTERN (trial);
3370 /* We must reject tgd_add{32|64}, i.e.
3371 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3372 and tldm_add{32|64}, i.e.
3373 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3375 if (GET_CODE (pat) == SET
3376 && GET_CODE (SET_SRC (pat)) == PLUS)
3378 rtx unspec = XEXP (SET_SRC (pat), 1);
3380 if (GET_CODE (unspec) == UNSPEC
3381 && (XINT (unspec, 1) == UNSPEC_TLSGD
3382 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3389 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3390 instruction. RETURN_P is true if the v9 variant 'return' is to be
3391 considered in the test too.
3393 TRIAL must be a SET whose destination is a REG appropriate for the
3394 'restore' instruction or, if RETURN_P is true, for the 'return'
3398 eligible_for_restore_insn (rtx trial, bool return_p)
3400 rtx pat = PATTERN (trial);
3401 rtx src = SET_SRC (pat);
3402 bool src_is_freg = false;
3405 /* Since we now can do moves between float and integer registers when
3406 VIS3 is enabled, we have to catch this case. We can allow such
3407 moves when doing a 'return' however. */
3409 if (GET_CODE (src_reg) == SUBREG)
3410 src_reg = SUBREG_REG (src_reg);
3411 if (GET_CODE (src_reg) == REG
3412 && SPARC_FP_REG_P (REGNO (src_reg)))
3415 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3416 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3417 && arith_operand (src, GET_MODE (src))
3421 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3423 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3426 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3427 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3428 && arith_double_operand (src, GET_MODE (src))
3430 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3432 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3433 else if (! TARGET_FPU && register_operand (src, SFmode))
3436 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3437 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3440 /* If we have the 'return' instruction, anything that does not use
3441 local or output registers and can go into a delay slot wins. */
3444 && !epilogue_renumber (&pat, 1)
3445 && get_attr_in_uncond_branch_delay (trial)
3446 == IN_UNCOND_BRANCH_DELAY_TRUE)
3449 /* The 'restore src1,src2,dest' pattern for SImode. */
3450 else if (GET_CODE (src) == PLUS
3451 && register_operand (XEXP (src, 0), SImode)
3452 && arith_operand (XEXP (src, 1), SImode))
3455 /* The 'restore src1,src2,dest' pattern for DImode. */
3456 else if (GET_CODE (src) == PLUS
3457 && register_operand (XEXP (src, 0), DImode)
3458 && arith_double_operand (XEXP (src, 1), DImode))
3461 /* The 'restore src1,%lo(src2),dest' pattern. */
3462 else if (GET_CODE (src) == LO_SUM
3463 && ! TARGET_CM_MEDMID
3464 && ((register_operand (XEXP (src, 0), SImode)
3465 && immediate_operand (XEXP (src, 1), SImode))
3467 && register_operand (XEXP (src, 0), DImode)
3468 && immediate_operand (XEXP (src, 1), DImode))))
3471 /* The 'restore src,src,dest' pattern. */
3472 else if (GET_CODE (src) == ASHIFT
3473 && (register_operand (XEXP (src, 0), SImode)
3474 || register_operand (XEXP (src, 0), DImode))
3475 && XEXP (src, 1) == const1_rtx)
3481 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3484 eligible_for_return_delay (rtx trial)
3489 if (! NONJUMP_INSN_P (trial))
3492 if (get_attr_length (trial) != 1)
3495 /* If the function uses __builtin_eh_return, the eh_return machinery
3496 occupies the delay slot. */
3497 if (crtl->calls_eh_return)
3500 /* In the case of a leaf or flat function, anything can go into the slot. */
3501 if (sparc_leaf_function_p || TARGET_FLAT)
3503 get_attr_in_uncond_branch_delay (trial) == IN_UNCOND_BRANCH_DELAY_TRUE;
3505 pat = PATTERN (trial);
3506 if (GET_CODE (pat) == PARALLEL)
3512 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3514 rtx expr = XVECEXP (pat, 0, i);
3515 if (GET_CODE (expr) != SET)
3517 if (GET_CODE (SET_DEST (expr)) != REG)
3519 regno = REGNO (SET_DEST (expr));
3520 if (regno >= 8 && regno < 24)
3523 return !epilogue_renumber (&pat, 1)
3524 && (get_attr_in_uncond_branch_delay (trial)
3525 == IN_UNCOND_BRANCH_DELAY_TRUE);
3528 if (GET_CODE (pat) != SET)
3531 if (GET_CODE (SET_DEST (pat)) != REG)
3534 regno = REGNO (SET_DEST (pat));
3536 /* Otherwise, only operations which can be done in tandem with
3537 a `restore' or `return' insn can go into the delay slot. */
3538 if (regno >= 8 && regno < 24)
3541 /* If this instruction sets up floating point register and we have a return
3542 instruction, it can probably go in. But restore will not work
3544 if (! SPARC_INT_REG_P (regno))
3546 && !epilogue_renumber (&pat, 1)
3547 && get_attr_in_uncond_branch_delay (trial)
3548 == IN_UNCOND_BRANCH_DELAY_TRUE);
3550 return eligible_for_restore_insn (trial, true);
3553 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3556 eligible_for_sibcall_delay (rtx trial)
3560 if (! NONJUMP_INSN_P (trial) || GET_CODE (PATTERN (trial)) != SET)
3563 if (get_attr_length (trial) != 1)
3566 pat = PATTERN (trial);
3568 if (sparc_leaf_function_p || TARGET_FLAT)
3570 /* If the tail call is done using the call instruction,
3571 we have to restore %o7 in the delay slot. */
3572 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3575 /* %g1 is used to build the function address */
3576 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3582 /* Otherwise, only operations which can be done in tandem with
3583 a `restore' insn can go into the delay slot. */
3584 if (GET_CODE (SET_DEST (pat)) != REG
3585 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3586 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3589 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3591 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3594 return eligible_for_restore_insn (trial, false);
3597 /* Determine if it's legal to put X into the constant pool. This
3598 is not possible if X contains the address of a symbol that is
3599 not constant (TLS) or not known at final link time (PIC). */
3602 sparc_cannot_force_const_mem (enum machine_mode mode, rtx x)
3604 switch (GET_CODE (x))
3609 /* Accept all non-symbolic constants. */
3613 /* Labels are OK iff we are non-PIC. */
3614 return flag_pic != 0;
3617 /* 'Naked' TLS symbol references are never OK,
3618 non-TLS symbols are OK iff we are non-PIC. */
3619 if (SYMBOL_REF_TLS_MODEL (x))
3622 return flag_pic != 0;
3625 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3628 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3629 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3637 /* Global Offset Table support. */
3638 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3639 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3641 /* Return the SYMBOL_REF for the Global Offset Table. */
3643 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3648 if (!sparc_got_symbol)
3649 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3651 return sparc_got_symbol;
3654 /* Ensure that we are not using patterns that are not OK with PIC. */
3664 op = recog_data.operand[i];
3665 gcc_assert (GET_CODE (op) != SYMBOL_REF
3666 && (GET_CODE (op) != CONST
3667 || (GET_CODE (XEXP (op, 0)) == MINUS
3668 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3669 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3676 /* Return true if X is an address which needs a temporary register when
3677 reloaded while generating PIC code. */
3680 pic_address_needs_scratch (rtx x)
3682 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3683 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3684 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3685 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3686 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3692 /* Determine if a given RTX is a valid constant. We already know this
3693 satisfies CONSTANT_P. */
3696 sparc_legitimate_constant_p (enum machine_mode mode, rtx x)
3698 switch (GET_CODE (x))
3702 if (sparc_tls_referenced_p (x))
3707 if (GET_MODE (x) == VOIDmode)
3710 /* Floating point constants are generally not ok.
3711 The only exception is 0.0 and all-ones in VIS. */
3713 && SCALAR_FLOAT_MODE_P (mode)
3714 && (const_zero_operand (x, mode)
3715 || const_all_ones_operand (x, mode)))
3721 /* Vector constants are generally not ok.
3722 The only exception is 0 or -1 in VIS. */
3724 && (const_zero_operand (x, mode)
3725 || const_all_ones_operand (x, mode)))
3737 /* Determine if a given RTX is a valid constant address. */
3740 constant_address_p (rtx x)
3742 switch (GET_CODE (x))
3750 if (flag_pic && pic_address_needs_scratch (x))
3752 return sparc_legitimate_constant_p (Pmode, x);
3755 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3762 /* Nonzero if the constant value X is a legitimate general operand
3763 when generating PIC code. It is given that flag_pic is on and
3764 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3767 legitimate_pic_operand_p (rtx x)
3769 if (pic_address_needs_scratch (x))
3771 if (sparc_tls_referenced_p (x))
3776 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3778 && INTVAL (X) >= -0x1000 \
3779 && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE)))
3781 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3783 && INTVAL (X) >= -0x1000 \
3784 && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE)))
3786 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3788 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3789 ordinarily. This changes a bit when generating PIC. */
3792 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3794 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3796 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3798 else if (GET_CODE (addr) == PLUS)
3800 rs1 = XEXP (addr, 0);
3801 rs2 = XEXP (addr, 1);
3803 /* Canonicalize. REG comes first, if there are no regs,
3804 LO_SUM comes first. */
3806 && GET_CODE (rs1) != SUBREG
3808 || GET_CODE (rs2) == SUBREG
3809 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3811 rs1 = XEXP (addr, 1);
3812 rs2 = XEXP (addr, 0);
3816 && rs1 == pic_offset_table_rtx
3818 && GET_CODE (rs2) != SUBREG
3819 && GET_CODE (rs2) != LO_SUM
3820 && GET_CODE (rs2) != MEM
3821 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3822 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3823 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3825 || GET_CODE (rs1) == SUBREG)
3826 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3831 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3832 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3834 /* We prohibit REG + REG for TFmode when there are no quad move insns
3835 and we consequently need to split. We do this because REG+REG
3836 is not an offsettable address. If we get the situation in reload
3837 where source and destination of a movtf pattern are both MEMs with
3838 REG+REG address, then only one of them gets converted to an
3839 offsettable address. */
3841 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3844 /* Likewise for TImode, but in all cases. */
3848 /* We prohibit REG + REG on ARCH32 if not optimizing for
3849 DFmode/DImode because then mem_min_alignment is likely to be zero
3850 after reload and the forced split would lack a matching splitter
3852 if (TARGET_ARCH32 && !optimize
3853 && (mode == DFmode || mode == DImode))
3856 else if (USE_AS_OFFSETABLE_LO10
3857 && GET_CODE (rs1) == LO_SUM
3859 && ! TARGET_CM_MEDMID
3860 && RTX_OK_FOR_OLO10_P (rs2, mode))
3863 imm1 = XEXP (rs1, 1);
3864 rs1 = XEXP (rs1, 0);
3865 if (!CONSTANT_P (imm1)
3866 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3870 else if (GET_CODE (addr) == LO_SUM)
3872 rs1 = XEXP (addr, 0);
3873 imm1 = XEXP (addr, 1);
3875 if (!CONSTANT_P (imm1)
3876 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3879 /* We can't allow TFmode in 32-bit mode, because an offset greater
3880 than the alignment (8) may cause the LO_SUM to overflow. */
3881 if (mode == TFmode && TARGET_ARCH32)
3884 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3889 if (GET_CODE (rs1) == SUBREG)
3890 rs1 = SUBREG_REG (rs1);
3896 if (GET_CODE (rs2) == SUBREG)
3897 rs2 = SUBREG_REG (rs2);
3904 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3905 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3910 if ((! SPARC_INT_REG_P (REGNO (rs1))
3911 && REGNO (rs1) != FRAME_POINTER_REGNUM
3912 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3914 && (! SPARC_INT_REG_P (REGNO (rs2))
3915 && REGNO (rs2) != FRAME_POINTER_REGNUM
3916 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3922 /* Return the SYMBOL_REF for the tls_get_addr function. */
3924 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
3927 sparc_tls_get_addr (void)
3929 if (!sparc_tls_symbol)
3930 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
3932 return sparc_tls_symbol;
3935 /* Return the Global Offset Table to be used in TLS mode. */
3938 sparc_tls_got (void)
3940 /* In PIC mode, this is just the PIC offset table. */
3943 crtl->uses_pic_offset_table = 1;
3944 return pic_offset_table_rtx;
3947 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
3948 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
3949 if (TARGET_SUN_TLS && TARGET_ARCH32)
3951 load_got_register ();
3952 return global_offset_table_rtx;
3955 /* In all other cases, we load a new pseudo with the GOT symbol. */
3956 return copy_to_reg (sparc_got ());
3959 /* Return true if X contains a thread-local symbol. */
3962 sparc_tls_referenced_p (rtx x)
3964 if (!TARGET_HAVE_TLS)
3967 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3968 x = XEXP (XEXP (x, 0), 0);
3970 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
3973 /* That's all we handle in sparc_legitimize_tls_address for now. */
3977 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3978 this (thread-local) address. */
3981 sparc_legitimize_tls_address (rtx addr)
3983 rtx temp1, temp2, temp3, ret, o0, got, insn;
3985 gcc_assert (can_create_pseudo_p ());
3987 if (GET_CODE (addr) == SYMBOL_REF)
3988 switch (SYMBOL_REF_TLS_MODEL (addr))
3990 case TLS_MODEL_GLOBAL_DYNAMIC:
3992 temp1 = gen_reg_rtx (SImode);
3993 temp2 = gen_reg_rtx (SImode);
3994 ret = gen_reg_rtx (Pmode);
3995 o0 = gen_rtx_REG (Pmode, 8);
3996 got = sparc_tls_got ();
3997 emit_insn (gen_tgd_hi22 (temp1, addr));
3998 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4001 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4002 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4007 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4008 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4011 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4012 insn = get_insns ();
4014 emit_libcall_block (insn, ret, o0, addr);
4017 case TLS_MODEL_LOCAL_DYNAMIC:
4019 temp1 = gen_reg_rtx (SImode);
4020 temp2 = gen_reg_rtx (SImode);
4021 temp3 = gen_reg_rtx (Pmode);
4022 ret = gen_reg_rtx (Pmode);
4023 o0 = gen_rtx_REG (Pmode, 8);
4024 got = sparc_tls_got ();
4025 emit_insn (gen_tldm_hi22 (temp1));
4026 emit_insn (gen_tldm_lo10 (temp2, temp1));
4029 emit_insn (gen_tldm_add32 (o0, got, temp2));
4030 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4035 emit_insn (gen_tldm_add64 (o0, got, temp2));
4036 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4039 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4040 insn = get_insns ();
4042 emit_libcall_block (insn, temp3, o0,
4043 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4044 UNSPEC_TLSLD_BASE));
4045 temp1 = gen_reg_rtx (SImode);
4046 temp2 = gen_reg_rtx (SImode);
4047 emit_insn (gen_tldo_hix22 (temp1, addr));
4048 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4050 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4052 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4055 case TLS_MODEL_INITIAL_EXEC:
4056 temp1 = gen_reg_rtx (SImode);
4057 temp2 = gen_reg_rtx (SImode);
4058 temp3 = gen_reg_rtx (Pmode);
4059 got = sparc_tls_got ();
4060 emit_insn (gen_tie_hi22 (temp1, addr));
4061 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4063 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4065 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4068 ret = gen_reg_rtx (Pmode);
4070 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4073 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4077 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4080 case TLS_MODEL_LOCAL_EXEC:
4081 temp1 = gen_reg_rtx (Pmode);
4082 temp2 = gen_reg_rtx (Pmode);
4085 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4086 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4090 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4091 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4093 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4100 else if (GET_CODE (addr) == CONST)
4104 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4106 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4107 offset = XEXP (XEXP (addr, 0), 1);
4109 base = force_operand (base, NULL_RTX);
4110 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4111 offset = force_reg (Pmode, offset);
4112 ret = gen_rtx_PLUS (Pmode, base, offset);
4116 gcc_unreachable (); /* for now ... */
4121 /* Legitimize PIC addresses. If the address is already position-independent,
4122 we return ORIG. Newly generated position-independent addresses go into a
4123 reg. This is REG if nonzero, otherwise we allocate register(s) as
4127 sparc_legitimize_pic_address (rtx orig, rtx reg)
4129 bool gotdata_op = false;
4131 if (GET_CODE (orig) == SYMBOL_REF
4132 /* See the comment in sparc_expand_move. */
4133 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4135 rtx pic_ref, address;
4140 gcc_assert (can_create_pseudo_p ());
4141 reg = gen_reg_rtx (Pmode);
4146 /* If not during reload, allocate another temp reg here for loading
4147 in the address, so that these instructions can be optimized
4149 rtx temp_reg = (! can_create_pseudo_p ()
4150 ? reg : gen_reg_rtx (Pmode));
4152 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4153 won't get confused into thinking that these two instructions
4154 are loading in the true address of the symbol. If in the
4155 future a PIC rtx exists, that should be used instead. */
4158 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4159 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4163 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4164 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4172 crtl->uses_pic_offset_table = 1;
4176 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4177 pic_offset_table_rtx,
4180 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4181 pic_offset_table_rtx,
4187 = gen_const_mem (Pmode,
4188 gen_rtx_PLUS (Pmode,
4189 pic_offset_table_rtx, address));
4190 insn = emit_move_insn (reg, pic_ref);
4193 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4195 set_unique_reg_note (insn, REG_EQUAL, orig);
4198 else if (GET_CODE (orig) == CONST)
4202 if (GET_CODE (XEXP (orig, 0)) == PLUS
4203 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4208 gcc_assert (can_create_pseudo_p ());
4209 reg = gen_reg_rtx (Pmode);
4212 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4213 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4214 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4215 base == reg ? NULL_RTX : reg);
4217 if (GET_CODE (offset) == CONST_INT)
4219 if (SMALL_INT (offset))
4220 return plus_constant (Pmode, base, INTVAL (offset));
4221 else if (can_create_pseudo_p ())
4222 offset = force_reg (Pmode, offset);
4224 /* If we reach here, then something is seriously wrong. */
4227 return gen_rtx_PLUS (Pmode, base, offset);
4229 else if (GET_CODE (orig) == LABEL_REF)
4230 /* ??? We ought to be checking that the register is live instead, in case
4231 it is eliminated. */
4232 crtl->uses_pic_offset_table = 1;
4237 /* Try machine-dependent ways of modifying an illegitimate address X
4238 to be legitimate. If we find one, return the new, valid address.
4240 OLDX is the address as it was before break_out_memory_refs was called.
4241 In some cases it is useful to look at this to decide what needs to be done.
4243 MODE is the mode of the operand pointed to by X.
4245 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4248 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4249 enum machine_mode mode)
4253 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4254 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4255 force_operand (XEXP (x, 0), NULL_RTX));
4256 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4257 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4258 force_operand (XEXP (x, 1), NULL_RTX));
4259 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4260 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4262 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4263 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4264 force_operand (XEXP (x, 1), NULL_RTX));
4266 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4269 if (sparc_tls_referenced_p (x))
4270 x = sparc_legitimize_tls_address (x);
4272 x = sparc_legitimize_pic_address (x, NULL_RTX);
4273 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4274 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4275 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4276 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4277 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4278 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4279 else if (GET_CODE (x) == SYMBOL_REF
4280 || GET_CODE (x) == CONST
4281 || GET_CODE (x) == LABEL_REF)
4282 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4287 /* Delegitimize an address that was legitimized by the above function. */
4290 sparc_delegitimize_address (rtx x)
4292 x = delegitimize_mem_from_attrs (x);
4294 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4295 switch (XINT (XEXP (x, 1), 1))
4297 case UNSPEC_MOVE_PIC:
4299 x = XVECEXP (XEXP (x, 1), 0, 0);
4300 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4306 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4307 if (GET_CODE (x) == MINUS
4308 && REG_P (XEXP (x, 0))
4309 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4310 && GET_CODE (XEXP (x, 1)) == LO_SUM
4311 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4312 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4314 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4315 gcc_assert (GET_CODE (x) == LABEL_REF);
4321 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4322 replace the input X, or the original X if no replacement is called for.
4323 The output parameter *WIN is 1 if the calling macro should goto WIN,
4326 For SPARC, we wish to handle addresses by splitting them into
4327 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4328 This cuts the number of extra insns by one.
4330 Do nothing when generating PIC code and the address is a symbolic
4331 operand or requires a scratch register. */
4334 sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
4335 int opnum, int type,
4336 int ind_levels ATTRIBUTE_UNUSED, int *win)
4338 /* Decompose SImode constants into HIGH+LO_SUM. */
4340 && (mode != TFmode || TARGET_ARCH64)
4341 && GET_MODE (x) == SImode
4342 && GET_CODE (x) != LO_SUM
4343 && GET_CODE (x) != HIGH
4344 && sparc_cmodel <= CM_MEDLOW
4346 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4348 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4349 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4350 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4351 opnum, (enum reload_type)type);
4356 /* We have to recognize what we have already generated above. */
4357 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4359 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4360 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4361 opnum, (enum reload_type)type);
4370 /* Return true if ADDR (a legitimate address expression)
4371 has an effect that depends on the machine mode it is used for.
4377 is not equivalent to
4379 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4381 because [%l7+a+1] is interpreted as the address of (a+1). */
4385 sparc_mode_dependent_address_p (const_rtx addr,
4386 addr_space_t as ATTRIBUTE_UNUSED)
4388 if (flag_pic && GET_CODE (addr) == PLUS)
4390 rtx op0 = XEXP (addr, 0);
4391 rtx op1 = XEXP (addr, 1);
4392 if (op0 == pic_offset_table_rtx
4393 && symbolic_operand (op1, VOIDmode))
4400 #ifdef HAVE_GAS_HIDDEN
4401 # define USE_HIDDEN_LINKONCE 1
4403 # define USE_HIDDEN_LINKONCE 0
4407 get_pc_thunk_name (char name[32], unsigned int regno)
4409 const char *reg_name = reg_names[regno];
4411 /* Skip the leading '%' as that cannot be used in a
4415 if (USE_HIDDEN_LINKONCE)
4416 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4418 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4421 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4424 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4426 int orig_flag_pic = flag_pic;
4429 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4432 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4434 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4435 flag_pic = orig_flag_pic;
4440 /* Emit code to load the GOT register. */
4443 load_got_register (void)
4445 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4446 if (!global_offset_table_rtx)
4447 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4449 if (TARGET_VXWORKS_RTP)
4450 emit_insn (gen_vxworks_load_got ());
4453 /* The GOT symbol is subject to a PC-relative relocation so we need a
4454 helper function to add the PC value and thus get the final value. */
4455 if (!got_helper_rtx)
4458 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4459 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4462 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4464 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4467 /* Need to emit this whether or not we obey regdecls,
4468 since setjmp/longjmp can cause life info to screw up.
4469 ??? In the case where we don't obey regdecls, this is not sufficient
4470 since we may not fall out the bottom. */
4471 emit_use (global_offset_table_rtx);
4474 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4475 address of the call target. */
4478 sparc_emit_call_insn (rtx pat, rtx addr)
4482 insn = emit_call_insn (pat);
4484 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4485 if (TARGET_VXWORKS_RTP
4487 && GET_CODE (addr) == SYMBOL_REF
4488 && (SYMBOL_REF_DECL (addr)
4489 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4490 : !SYMBOL_REF_LOCAL_P (addr)))
4492 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4493 crtl->uses_pic_offset_table = 1;
4497 /* Return 1 if RTX is a MEM which is known to be aligned to at
4498 least a DESIRED byte boundary. */
4501 mem_min_alignment (rtx mem, int desired)
4503 rtx addr, base, offset;
4505 /* If it's not a MEM we can't accept it. */
4506 if (GET_CODE (mem) != MEM)
4510 if (!TARGET_UNALIGNED_DOUBLES
4511 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4514 /* ??? The rest of the function predates MEM_ALIGN so
4515 there is probably a bit of redundancy. */
4516 addr = XEXP (mem, 0);
4517 base = offset = NULL_RTX;
4518 if (GET_CODE (addr) == PLUS)
4520 if (GET_CODE (XEXP (addr, 0)) == REG)
4522 base = XEXP (addr, 0);
4524 /* What we are saying here is that if the base
4525 REG is aligned properly, the compiler will make
4526 sure any REG based index upon it will be so
4528 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4529 offset = XEXP (addr, 1);
4531 offset = const0_rtx;
4534 else if (GET_CODE (addr) == REG)
4537 offset = const0_rtx;
4540 if (base != NULL_RTX)
4542 int regno = REGNO (base);
4544 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4546 /* Check if the compiler has recorded some information
4547 about the alignment of the base REG. If reload has
4548 completed, we already matched with proper alignments.
4549 If not running global_alloc, reload might give us
4550 unaligned pointer to local stack though. */
4552 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4553 || (optimize && reload_completed))
4554 && (INTVAL (offset) & (desired - 1)) == 0)
4559 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4563 else if (! TARGET_UNALIGNED_DOUBLES
4564 || CONSTANT_P (addr)
4565 || GET_CODE (addr) == LO_SUM)
4567 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4568 is true, in which case we can only assume that an access is aligned if
4569 it is to a constant address, or the address involves a LO_SUM. */
4573 /* An obviously unaligned address. */
4578 /* Vectors to keep interesting information about registers where it can easily
4579 be got. We used to use the actual mode value as the bit number, but there
4580 are more than 32 modes now. Instead we use two tables: one indexed by
4581 hard register number, and one indexed by mode. */
4583 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4584 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4585 mapped into one sparc_mode_class mode. */
4587 enum sparc_mode_class {
4588 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4589 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4593 /* Modes for single-word and smaller quantities. */
4595 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4597 /* Modes for double-word and smaller quantities. */
4598 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4600 /* Modes for quad-word and smaller quantities. */
4601 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4603 /* Modes for 8-word and smaller quantities. */
4604 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4606 /* Modes for single-float quantities. */
4607 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4609 /* Modes for double-float and smaller quantities. */
4610 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4612 /* Modes for quad-float and smaller quantities. */
4613 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4615 /* Modes for quad-float pairs and smaller quantities. */
4616 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4618 /* Modes for double-float only quantities. */
4619 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4621 /* Modes for quad-float and double-float only quantities. */
4622 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4624 /* Modes for quad-float pairs and double-float only quantities. */
4625 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4627 /* Modes for condition codes. */
4628 #define CC_MODES (1 << (int) CC_MODE)
4629 #define CCFP_MODES (1 << (int) CCFP_MODE)
4631 /* Value is 1 if register/mode pair is acceptable on sparc.
4632 The funny mixture of D and T modes is because integer operations
4633 do not specially operate on tetra quantities, so non-quad-aligned
4634 registers can hold quadword quantities (except %o4 and %i4 because
4635 they cross fixed registers). */
4637 /* This points to either the 32 bit or the 64 bit version. */
4638 const int *hard_regno_mode_classes;
4640 static const int hard_32bit_mode_classes[] = {
4641 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4642 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4643 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4644 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4646 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4647 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4648 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4649 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4651 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4652 and none can hold SFmode/SImode values. */
4653 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4654 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4655 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4656 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4659 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4661 /* %icc, %sfp, %gsr */
4662 CC_MODES, 0, D_MODES
4665 static const int hard_64bit_mode_classes[] = {
4666 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4667 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4668 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4669 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4671 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4672 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4673 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4674 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4676 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4677 and none can hold SFmode/SImode values. */
4678 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4679 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4680 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4681 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4684 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4686 /* %icc, %sfp, %gsr */
4687 CC_MODES, 0, D_MODES
4690 int sparc_mode_class [NUM_MACHINE_MODES];
4692 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4695 sparc_init_modes (void)
4699 for (i = 0; i < NUM_MACHINE_MODES; i++)
4701 switch (GET_MODE_CLASS (i))
4704 case MODE_PARTIAL_INT:
4705 case MODE_COMPLEX_INT:
4706 if (GET_MODE_SIZE (i) < 4)
4707 sparc_mode_class[i] = 1 << (int) H_MODE;
4708 else if (GET_MODE_SIZE (i) == 4)
4709 sparc_mode_class[i] = 1 << (int) S_MODE;
4710 else if (GET_MODE_SIZE (i) == 8)
4711 sparc_mode_class[i] = 1 << (int) D_MODE;
4712 else if (GET_MODE_SIZE (i) == 16)
4713 sparc_mode_class[i] = 1 << (int) T_MODE;
4714 else if (GET_MODE_SIZE (i) == 32)
4715 sparc_mode_class[i] = 1 << (int) O_MODE;
4717 sparc_mode_class[i] = 0;
4719 case MODE_VECTOR_INT:
4720 if (GET_MODE_SIZE (i) == 4)
4721 sparc_mode_class[i] = 1 << (int) SF_MODE;
4722 else if (GET_MODE_SIZE (i) == 8)
4723 sparc_mode_class[i] = 1 << (int) DF_MODE;
4725 sparc_mode_class[i] = 0;
4728 case MODE_COMPLEX_FLOAT:
4729 if (GET_MODE_SIZE (i) == 4)
4730 sparc_mode_class[i] = 1 << (int) SF_MODE;
4731 else if (GET_MODE_SIZE (i) == 8)
4732 sparc_mode_class[i] = 1 << (int) DF_MODE;
4733 else if (GET_MODE_SIZE (i) == 16)
4734 sparc_mode_class[i] = 1 << (int) TF_MODE;
4735 else if (GET_MODE_SIZE (i) == 32)
4736 sparc_mode_class[i] = 1 << (int) OF_MODE;
4738 sparc_mode_class[i] = 0;
4741 if (i == (int) CCFPmode || i == (int) CCFPEmode)
4742 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4744 sparc_mode_class[i] = 1 << (int) CC_MODE;
4747 sparc_mode_class[i] = 0;
4753 hard_regno_mode_classes = hard_64bit_mode_classes;
4755 hard_regno_mode_classes = hard_32bit_mode_classes;
4757 /* Initialize the array used by REGNO_REG_CLASS. */
4758 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4760 if (i < 16 && TARGET_V8PLUS)
4761 sparc_regno_reg_class[i] = I64_REGS;
4762 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4763 sparc_regno_reg_class[i] = GENERAL_REGS;
4765 sparc_regno_reg_class[i] = FP_REGS;
4767 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4769 sparc_regno_reg_class[i] = FPCC_REGS;
4771 sparc_regno_reg_class[i] = NO_REGS;
4775 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4778 save_global_or_fp_reg_p (unsigned int regno,
4779 int leaf_function ATTRIBUTE_UNUSED)
4781 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4784 /* Return whether the return address register (%i7) is needed. */
4787 return_addr_reg_needed_p (int leaf_function)
4789 /* If it is live, for example because of __builtin_return_address (0). */
4790 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4793 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4795 /* Loading the GOT register clobbers %o7. */
4796 || crtl->uses_pic_offset_table
4797 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4803 /* Return whether REGNO, a local or in register, must be saved/restored. */
4806 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4808 /* General case: call-saved registers live at some point. */
4809 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4812 /* Frame pointer register (%fp) if needed. */
4813 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4816 /* Return address register (%i7) if needed. */
4817 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4820 /* GOT register (%l7) if needed. */
4821 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4824 /* If the function accesses prior frames, the frame pointer and the return
4825 address of the previous frame must be saved on the stack. */
4826 if (crtl->accesses_prior_frames
4827 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4833 /* Compute the frame size required by the function. This function is called
4834 during the reload pass and also by sparc_expand_prologue. */
4837 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4839 HOST_WIDE_INT frame_size, apparent_frame_size;
4840 int args_size, n_global_fp_regs = 0;
4841 bool save_local_in_regs_p = false;
4844 /* If the function allocates dynamic stack space, the dynamic offset is
4845 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4846 if (leaf_function && !cfun->calls_alloca)
4849 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4851 /* Calculate space needed for global registers. */
4853 for (i = 0; i < 8; i++)
4854 if (save_global_or_fp_reg_p (i, 0))
4855 n_global_fp_regs += 2;
4857 for (i = 0; i < 8; i += 2)
4858 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4859 n_global_fp_regs += 2;
4861 /* In the flat window model, find out which local and in registers need to
4862 be saved. We don't reserve space in the current frame for them as they
4863 will be spilled into the register window save area of the caller's frame.
4864 However, as soon as we use this register window save area, we must create
4865 that of the current frame to make it the live one. */
4867 for (i = 16; i < 32; i++)
4868 if (save_local_or_in_reg_p (i, leaf_function))
4870 save_local_in_regs_p = true;
4874 /* Calculate space needed for FP registers. */
4875 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
4876 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4877 n_global_fp_regs += 2;
4880 && n_global_fp_regs == 0
4882 && !save_local_in_regs_p)
4883 frame_size = apparent_frame_size = 0;
4886 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
4887 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
4888 apparent_frame_size += n_global_fp_regs * 4;
4890 /* We need to add the size of the outgoing argument area. */
4891 frame_size = apparent_frame_size + ((args_size + 7) & -8);
4893 /* And that of the register window save area. */
4894 frame_size += FIRST_PARM_OFFSET (cfun->decl);
4896 /* Finally, bump to the appropriate alignment. */
4897 frame_size = SPARC_STACK_ALIGN (frame_size);
4900 /* Set up values for use in prologue and epilogue. */
4901 sparc_frame_size = frame_size;
4902 sparc_apparent_frame_size = apparent_frame_size;
4903 sparc_n_global_fp_regs = n_global_fp_regs;
4904 sparc_save_local_in_regs_p = save_local_in_regs_p;
4909 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
4912 sparc_initial_elimination_offset (int to)
4916 if (to == STACK_POINTER_REGNUM)
4917 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
4921 offset += SPARC_STACK_BIAS;
4925 /* Output any necessary .register pseudo-ops. */
4928 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
4930 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
4936 /* Check if %g[2367] were used without
4937 .register being printed for them already. */
4938 for (i = 2; i < 8; i++)
4940 if (df_regs_ever_live_p (i)
4941 && ! sparc_hard_reg_printed [i])
4943 sparc_hard_reg_printed [i] = 1;
4944 /* %g7 is used as TLS base register, use #ignore
4945 for it instead of #scratch. */
4946 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
4947 i == 7 ? "ignore" : "scratch");
4954 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
4956 #if PROBE_INTERVAL > 4096
4957 #error Cannot use indexed addressing mode for stack probing
4960 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
4961 inclusive. These are offsets from the current stack pointer.
4963 Note that we don't use the REG+REG addressing mode for the probes because
4964 of the stack bias in 64-bit mode. And it doesn't really buy us anything
4965 so the advantages of having a single code win here. */
4968 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
4970 rtx g1 = gen_rtx_REG (Pmode, 1);
4972 /* See if we have a constant small number of probes to generate. If so,
4973 that's the easy case. */
4974 if (size <= PROBE_INTERVAL)
4976 emit_move_insn (g1, GEN_INT (first));
4977 emit_insn (gen_rtx_SET (VOIDmode, g1,
4978 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4979 emit_stack_probe (plus_constant (Pmode, g1, -size));
4982 /* The run-time loop is made up of 10 insns in the generic case while the
4983 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
4984 else if (size <= 5 * PROBE_INTERVAL)
4988 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
4989 emit_insn (gen_rtx_SET (VOIDmode, g1,
4990 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4991 emit_stack_probe (g1);
4993 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
4994 it exceeds SIZE. If only two probes are needed, this will not
4995 generate any code. Then probe at FIRST + SIZE. */
4996 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
4998 emit_insn (gen_rtx_SET (VOIDmode, g1,
4999 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5000 emit_stack_probe (g1);
5003 emit_stack_probe (plus_constant (Pmode, g1,
5004 (i - PROBE_INTERVAL) - size));
5007 /* Otherwise, do the same as above, but in a loop. Note that we must be
5008 extra careful with variables wrapping around because we might be at
5009 the very top (or the very bottom) of the address space and we have
5010 to be able to handle this case properly; in particular, we use an
5011 equality test for the loop condition. */
5014 HOST_WIDE_INT rounded_size;
5015 rtx g4 = gen_rtx_REG (Pmode, 4);
5017 emit_move_insn (g1, GEN_INT (first));
5020 /* Step 1: round SIZE to the previous multiple of the interval. */
5022 rounded_size = size & -PROBE_INTERVAL;
5023 emit_move_insn (g4, GEN_INT (rounded_size));
5026 /* Step 2: compute initial and final value of the loop counter. */
5028 /* TEST_ADDR = SP + FIRST. */
5029 emit_insn (gen_rtx_SET (VOIDmode, g1,
5030 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5032 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5033 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
5038 while (TEST_ADDR != LAST_ADDR)
5040 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5044 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5045 until it is equal to ROUNDED_SIZE. */
5048 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5050 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5053 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5054 that SIZE is equal to ROUNDED_SIZE. */
5056 if (size != rounded_size)
5057 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5060 /* Make sure nothing is scheduled before we are done. */
5061 emit_insn (gen_blockage ());
5064 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5065 absolute addresses. */
5068 output_probe_stack_range (rtx reg1, rtx reg2)
5070 static int labelno = 0;
5071 char loop_lab[32], end_lab[32];
5074 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
5075 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
5077 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5079 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
5082 output_asm_insn ("cmp\t%0, %1", xops);
5084 fputs ("\tbe,pn\t%xcc,", asm_out_file);
5086 fputs ("\tbe\t", asm_out_file);
5087 assemble_name_raw (asm_out_file, end_lab);
5088 fputc ('\n', asm_out_file);
5090 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5091 xops[1] = GEN_INT (-PROBE_INTERVAL);
5092 output_asm_insn (" add\t%0, %1, %0", xops);
5094 /* Probe at TEST_ADDR and branch. */
5096 fputs ("\tba,pt\t%xcc,", asm_out_file);
5098 fputs ("\tba\t", asm_out_file);
5099 assemble_name_raw (asm_out_file, loop_lab);
5100 fputc ('\n', asm_out_file);
5101 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5102 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5104 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
5109 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5110 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5111 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5112 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5113 the action to be performed if it returns false. Return the new offset. */
5115 typedef bool (*sorr_pred_t) (unsigned int, int);
5116 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5119 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5120 int offset, int leaf_function, sorr_pred_t save_p,
5121 sorr_act_t action_true, sorr_act_t action_false)
5126 if (TARGET_ARCH64 && high <= 32)
5130 for (i = low; i < high; i++)
5132 if (save_p (i, leaf_function))
5134 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5136 if (action_true == SORR_SAVE)
5138 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5139 RTX_FRAME_RELATED_P (insn) = 1;
5141 else /* action_true == SORR_RESTORE */
5143 /* The frame pointer must be restored last since its old
5144 value may be used as base address for the frame. This
5145 is problematic in 64-bit mode only because of the lack
5146 of double-word load instruction. */
5147 if (i == HARD_FRAME_POINTER_REGNUM)
5150 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5154 else if (action_false == SORR_ADVANCE)
5160 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5161 emit_move_insn (hard_frame_pointer_rtx, mem);
5166 for (i = low; i < high; i += 2)
5168 bool reg0 = save_p (i, leaf_function);
5169 bool reg1 = save_p (i + 1, leaf_function);
5170 enum machine_mode mode;
5175 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5180 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5185 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5191 if (action_false == SORR_ADVANCE)
5196 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5197 if (action_true == SORR_SAVE)
5199 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5200 RTX_FRAME_RELATED_P (insn) = 1;
5204 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5206 set1 = gen_rtx_SET (VOIDmode, mem,
5207 gen_rtx_REG (SImode, regno));
5208 RTX_FRAME_RELATED_P (set1) = 1;
5210 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5212 set2 = gen_rtx_SET (VOIDmode, mem,
5213 gen_rtx_REG (SImode, regno + 1));
5214 RTX_FRAME_RELATED_P (set2) = 1;
5215 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5216 gen_rtx_PARALLEL (VOIDmode,
5217 gen_rtvec (2, set1, set2)));
5220 else /* action_true == SORR_RESTORE */
5221 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5223 /* Always preserve double-word alignment. */
5224 offset = (offset + 8) & -8;
5231 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5234 emit_adjust_base_to_offset (rtx base, int offset)
5236 /* ??? This might be optimized a little as %g1 might already have a
5237 value close enough that a single add insn will do. */
5238 /* ??? Although, all of this is probably only a temporary fix because
5239 if %g1 can hold a function result, then sparc_expand_epilogue will
5240 lose (the result will be clobbered). */
5241 rtx new_base = gen_rtx_REG (Pmode, 1);
5242 emit_move_insn (new_base, GEN_INT (offset));
5243 emit_insn (gen_rtx_SET (VOIDmode,
5244 new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5248 /* Emit code to save/restore call-saved global and FP registers. */
5251 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5253 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5255 base = emit_adjust_base_to_offset (base, offset);
5260 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5261 save_global_or_fp_reg_p, action, SORR_NONE);
5262 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5263 save_global_or_fp_reg_p, action, SORR_NONE);
5266 /* Emit code to save/restore call-saved local and in registers. */
5269 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5271 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5273 base = emit_adjust_base_to_offset (base, offset);
5277 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5278 save_local_or_in_reg_p, action, SORR_ADVANCE);
5281 /* Emit a window_save insn. */
5284 emit_window_save (rtx increment)
5286 rtx insn = emit_insn (gen_window_save (increment));
5287 RTX_FRAME_RELATED_P (insn) = 1;
5289 /* The incoming return address (%o7) is saved in %i7. */
5290 add_reg_note (insn, REG_CFA_REGISTER,
5291 gen_rtx_SET (VOIDmode,
5292 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5294 INCOMING_RETURN_ADDR_REGNUM)));
5296 /* The window save event. */
5297 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5299 /* The CFA is %fp, the hard frame pointer. */
5300 add_reg_note (insn, REG_CFA_DEF_CFA,
5301 plus_constant (Pmode, hard_frame_pointer_rtx,
5302 INCOMING_FRAME_SP_OFFSET));
5307 /* Generate an increment for the stack pointer. */
5310 gen_stack_pointer_inc (rtx increment)
5312 return gen_rtx_SET (VOIDmode,
5314 gen_rtx_PLUS (Pmode,
5319 /* Expand the function prologue. The prologue is responsible for reserving
5320 storage for the frame, saving the call-saved registers and loading the
5321 GOT register if needed. */
5324 sparc_expand_prologue (void)
5329 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5330 on the final value of the flag means deferring the prologue/epilogue
5331 expansion until just before the second scheduling pass, which is too
5332 late to emit multiple epilogues or return insns.
5334 Of course we are making the assumption that the value of the flag
5335 will not change between now and its final value. Of the three parts
5336 of the formula, only the last one can reasonably vary. Let's take a
5337 closer look, after assuming that the first two ones are set to true
5338 (otherwise the last value is effectively silenced).
5340 If only_leaf_regs_used returns false, the global predicate will also
5341 be false so the actual frame size calculated below will be positive.
5342 As a consequence, the save_register_window insn will be emitted in
5343 the instruction stream; now this insn explicitly references %fp
5344 which is not a leaf register so only_leaf_regs_used will always
5345 return false subsequently.
5347 If only_leaf_regs_used returns true, we hope that the subsequent
5348 optimization passes won't cause non-leaf registers to pop up. For
5349 example, the regrename pass has special provisions to not rename to
5350 non-leaf registers in a leaf function. */
5351 sparc_leaf_function_p
5352 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5354 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5356 if (flag_stack_usage_info)
5357 current_function_static_stack_size = size;
5359 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
5360 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5364 else if (sparc_leaf_function_p)
5366 rtx size_int_rtx = GEN_INT (-size);
5369 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5370 else if (size <= 8192)
5372 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5373 RTX_FRAME_RELATED_P (insn) = 1;
5375 /* %sp is still the CFA register. */
5376 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5380 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5381 emit_move_insn (size_rtx, size_int_rtx);
5382 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5383 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5384 gen_stack_pointer_inc (size_int_rtx));
5387 RTX_FRAME_RELATED_P (insn) = 1;
5391 rtx size_int_rtx = GEN_INT (-size);
5394 emit_window_save (size_int_rtx);
5395 else if (size <= 8192)
5397 emit_window_save (GEN_INT (-4096));
5399 /* %sp is not the CFA register anymore. */
5400 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5402 /* Make sure no %fp-based store is issued until after the frame is
5403 established. The offset between the frame pointer and the stack
5404 pointer is calculated relative to the value of the stack pointer
5405 at the end of the function prologue, and moving instructions that
5406 access the stack via the frame pointer between the instructions
5407 that decrement the stack pointer could result in accessing the
5408 register window save area, which is volatile. */
5409 emit_insn (gen_frame_blockage ());
5413 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5414 emit_move_insn (size_rtx, size_int_rtx);
5415 emit_window_save (size_rtx);
5419 if (sparc_leaf_function_p)
5421 sparc_frame_base_reg = stack_pointer_rtx;
5422 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5426 sparc_frame_base_reg = hard_frame_pointer_rtx;
5427 sparc_frame_base_offset = SPARC_STACK_BIAS;
5430 if (sparc_n_global_fp_regs > 0)
5431 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5432 sparc_frame_base_offset
5433 - sparc_apparent_frame_size,
5436 /* Load the GOT register if needed. */
5437 if (crtl->uses_pic_offset_table)
5438 load_got_register ();
5440 /* Advertise that the data calculated just above are now valid. */
5441 sparc_prologue_data_valid_p = true;
5444 /* Expand the function prologue. The prologue is responsible for reserving
5445 storage for the frame, saving the call-saved registers and loading the
5446 GOT register if needed. */
5449 sparc_flat_expand_prologue (void)
5454 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5456 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5458 if (flag_stack_usage_info)
5459 current_function_static_stack_size = size;
5461 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
5462 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5464 if (sparc_save_local_in_regs_p)
5465 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5472 rtx size_int_rtx, size_rtx;
5474 size_rtx = size_int_rtx = GEN_INT (-size);
5476 /* We establish the frame (i.e. decrement the stack pointer) first, even
5477 if we use a frame pointer, because we cannot clobber any call-saved
5478 registers, including the frame pointer, if we haven't created a new
5479 register save area, for the sake of compatibility with the ABI. */
5481 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5482 else if (size <= 8192 && !frame_pointer_needed)
5484 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5485 RTX_FRAME_RELATED_P (insn) = 1;
5486 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5490 size_rtx = gen_rtx_REG (Pmode, 1);
5491 emit_move_insn (size_rtx, size_int_rtx);
5492 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5493 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5494 gen_stack_pointer_inc (size_int_rtx));
5496 RTX_FRAME_RELATED_P (insn) = 1;
5498 /* Ensure nothing is scheduled until after the frame is established. */
5499 emit_insn (gen_blockage ());
5501 if (frame_pointer_needed)
5503 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5504 gen_rtx_MINUS (Pmode,
5507 RTX_FRAME_RELATED_P (insn) = 1;
5509 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5510 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5511 plus_constant (Pmode, stack_pointer_rtx,
5515 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5517 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5518 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5520 insn = emit_move_insn (i7, o7);
5521 RTX_FRAME_RELATED_P (insn) = 1;
5523 add_reg_note (insn, REG_CFA_REGISTER,
5524 gen_rtx_SET (VOIDmode, i7, o7));
5526 /* Prevent this instruction from ever being considered dead,
5527 even if this function has no epilogue. */
5532 if (frame_pointer_needed)
5534 sparc_frame_base_reg = hard_frame_pointer_rtx;
5535 sparc_frame_base_offset = SPARC_STACK_BIAS;
5539 sparc_frame_base_reg = stack_pointer_rtx;
5540 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5543 if (sparc_n_global_fp_regs > 0)
5544 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5545 sparc_frame_base_offset
5546 - sparc_apparent_frame_size,
5549 /* Load the GOT register if needed. */
5550 if (crtl->uses_pic_offset_table)
5551 load_got_register ();
5553 /* Advertise that the data calculated just above are now valid. */
5554 sparc_prologue_data_valid_p = true;
5557 /* This function generates the assembly code for function entry, which boils
5558 down to emitting the necessary .register directives. */
5561 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5563 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5565 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5567 sparc_output_scratch_registers (file);
5570 /* Expand the function epilogue, either normal or part of a sibcall.
5571 We emit all the instructions except the return or the call. */
5574 sparc_expand_epilogue (bool for_eh)
5576 HOST_WIDE_INT size = sparc_frame_size;
5578 if (sparc_n_global_fp_regs > 0)
5579 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5580 sparc_frame_base_offset
5581 - sparc_apparent_frame_size,
5584 if (size == 0 || for_eh)
5586 else if (sparc_leaf_function_p)
5589 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5590 else if (size <= 8192)
5592 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5593 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5597 rtx reg = gen_rtx_REG (Pmode, 1);
5598 emit_move_insn (reg, GEN_INT (size));
5599 emit_insn (gen_stack_pointer_inc (reg));
5604 /* Expand the function epilogue, either normal or part of a sibcall.
5605 We emit all the instructions except the return or the call. */
5608 sparc_flat_expand_epilogue (bool for_eh)
5610 HOST_WIDE_INT size = sparc_frame_size;
5612 if (sparc_n_global_fp_regs > 0)
5613 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5614 sparc_frame_base_offset
5615 - sparc_apparent_frame_size,
5618 /* If we have a frame pointer, we'll need both to restore it before the
5619 frame is destroyed and use its current value in destroying the frame.
5620 Since we don't have an atomic way to do that in the flat window model,
5621 we save the current value into a temporary register (%g1). */
5622 if (frame_pointer_needed && !for_eh)
5623 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5625 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5626 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5627 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5629 if (sparc_save_local_in_regs_p)
5630 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5631 sparc_frame_base_offset,
5634 if (size == 0 || for_eh)
5636 else if (frame_pointer_needed)
5638 /* Make sure the frame is destroyed after everything else is done. */
5639 emit_insn (gen_blockage ());
5641 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5646 emit_insn (gen_blockage ());
5649 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5650 else if (size <= 8192)
5652 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5653 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5657 rtx reg = gen_rtx_REG (Pmode, 1);
5658 emit_move_insn (reg, GEN_INT (size));
5659 emit_insn (gen_stack_pointer_inc (reg));
5664 /* Return true if it is appropriate to emit `return' instructions in the
5665 body of a function. */
5668 sparc_can_use_return_insn_p (void)
5670 return sparc_prologue_data_valid_p
5671 && sparc_n_global_fp_regs == 0
5673 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5674 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5677 /* This function generates the assembly code for function exit. */
5680 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5682 /* If the last two instructions of a function are "call foo; dslot;"
5683 the return address might point to the first instruction in the next
5684 function and we have to output a dummy nop for the sake of sane
5685 backtraces in such cases. This is pointless for sibling calls since
5686 the return address is explicitly adjusted. */
5688 rtx insn, last_real_insn;
5690 insn = get_last_insn ();
5692 last_real_insn = prev_real_insn (insn);
5694 && NONJUMP_INSN_P (last_real_insn)
5695 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5696 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5699 && CALL_P (last_real_insn)
5700 && !SIBLING_CALL_P (last_real_insn))
5701 fputs("\tnop\n", file);
5703 sparc_output_deferred_case_vectors ();
5706 /* Output a 'restore' instruction. */
5709 output_restore (rtx pat)
5715 fputs ("\t restore\n", asm_out_file);
5719 gcc_assert (GET_CODE (pat) == SET);
5721 operands[0] = SET_DEST (pat);
5722 pat = SET_SRC (pat);
5724 switch (GET_CODE (pat))
5727 operands[1] = XEXP (pat, 0);
5728 operands[2] = XEXP (pat, 1);
5729 output_asm_insn (" restore %r1, %2, %Y0", operands);
5732 operands[1] = XEXP (pat, 0);
5733 operands[2] = XEXP (pat, 1);
5734 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5737 operands[1] = XEXP (pat, 0);
5738 gcc_assert (XEXP (pat, 1) == const1_rtx);
5739 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5743 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5748 /* Output a return. */
5751 output_return (rtx insn)
5753 if (crtl->calls_eh_return)
5755 /* If the function uses __builtin_eh_return, the eh_return
5756 machinery occupies the delay slot. */
5757 gcc_assert (!final_sequence);
5759 if (flag_delayed_branch)
5761 if (!TARGET_FLAT && TARGET_V9)
5762 fputs ("\treturn\t%i7+8\n", asm_out_file);
5766 fputs ("\trestore\n", asm_out_file);
5768 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5771 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5776 fputs ("\trestore\n", asm_out_file);
5778 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5779 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5782 else if (sparc_leaf_function_p || TARGET_FLAT)
5784 /* This is a leaf or flat function so we don't have to bother restoring
5785 the register window, which frees us from dealing with the convoluted
5786 semantics of restore/return. We simply output the jump to the
5787 return address and the insn in the delay slot (if any). */
5789 return "jmp\t%%o7+%)%#";
5793 /* This is a regular function so we have to restore the register window.
5794 We may have a pending insn for the delay slot, which will be either
5795 combined with the 'restore' instruction or put in the delay slot of
5796 the 'return' instruction. */
5802 delay = NEXT_INSN (insn);
5805 pat = PATTERN (delay);
5807 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5809 epilogue_renumber (&pat, 0);
5810 return "return\t%%i7+%)%#";
5814 output_asm_insn ("jmp\t%%i7+%)", NULL);
5815 output_restore (pat);
5816 PATTERN (delay) = gen_blockage ();
5817 INSN_CODE (delay) = -1;
5822 /* The delay slot is empty. */
5824 return "return\t%%i7+%)\n\t nop";
5825 else if (flag_delayed_branch)
5826 return "jmp\t%%i7+%)\n\t restore";
5828 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5835 /* Output a sibling call. */
5838 output_sibcall (rtx insn, rtx call_operand)
5842 gcc_assert (flag_delayed_branch);
5844 operands[0] = call_operand;
5846 if (sparc_leaf_function_p || TARGET_FLAT)
5848 /* This is a leaf or flat function so we don't have to bother restoring
5849 the register window. We simply output the jump to the function and
5850 the insn in the delay slot (if any). */
5852 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
5855 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
5858 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
5859 it into branch if possible. */
5860 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
5865 /* This is a regular function so we have to restore the register window.
5866 We may have a pending insn for the delay slot, which will be combined
5867 with the 'restore' instruction. */
5869 output_asm_insn ("call\t%a0, 0", operands);
5873 rtx delay = NEXT_INSN (insn);
5876 output_restore (PATTERN (delay));
5878 PATTERN (delay) = gen_blockage ();
5879 INSN_CODE (delay) = -1;
5882 output_restore (NULL_RTX);
5888 /* Functions for handling argument passing.
5890 For 32-bit, the first 6 args are normally in registers and the rest are
5891 pushed. Any arg that starts within the first 6 words is at least
5892 partially passed in a register unless its data type forbids.
5894 For 64-bit, the argument registers are laid out as an array of 16 elements
5895 and arguments are added sequentially. The first 6 int args and up to the
5896 first 16 fp args (depending on size) are passed in regs.
5898 Slot Stack Integral Float Float in structure Double Long Double
5899 ---- ----- -------- ----- ------------------ ------ -----------
5900 15 [SP+248] %f31 %f30,%f31 %d30
5901 14 [SP+240] %f29 %f28,%f29 %d28 %q28
5902 13 [SP+232] %f27 %f26,%f27 %d26
5903 12 [SP+224] %f25 %f24,%f25 %d24 %q24
5904 11 [SP+216] %f23 %f22,%f23 %d22
5905 10 [SP+208] %f21 %f20,%f21 %d20 %q20
5906 9 [SP+200] %f19 %f18,%f19 %d18
5907 8 [SP+192] %f17 %f16,%f17 %d16 %q16
5908 7 [SP+184] %f15 %f14,%f15 %d14
5909 6 [SP+176] %f13 %f12,%f13 %d12 %q12
5910 5 [SP+168] %o5 %f11 %f10,%f11 %d10
5911 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
5912 3 [SP+152] %o3 %f7 %f6,%f7 %d6
5913 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
5914 1 [SP+136] %o1 %f3 %f2,%f3 %d2
5915 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
5917 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
5919 Integral arguments are always passed as 64-bit quantities appropriately
5922 Passing of floating point values is handled as follows.
5923 If a prototype is in scope:
5924 If the value is in a named argument (i.e. not a stdarg function or a
5925 value not part of the `...') then the value is passed in the appropriate
5927 If the value is part of the `...' and is passed in one of the first 6
5928 slots then the value is passed in the appropriate int reg.
5929 If the value is part of the `...' and is not passed in one of the first 6
5930 slots then the value is passed in memory.
5931 If a prototype is not in scope:
5932 If the value is one of the first 6 arguments the value is passed in the
5933 appropriate integer reg and the appropriate fp reg.
5934 If the value is not one of the first 6 arguments the value is passed in
5935 the appropriate fp reg and in memory.
5938 Summary of the calling conventions implemented by GCC on the SPARC:
5941 size argument return value
5943 small integer <4 int. reg. int. reg.
5944 word 4 int. reg. int. reg.
5945 double word 8 int. reg. int. reg.
5947 _Complex small integer <8 int. reg. int. reg.
5948 _Complex word 8 int. reg. int. reg.
5949 _Complex double word 16 memory int. reg.
5951 vector integer <=8 int. reg. FP reg.
5952 vector integer >8 memory memory
5954 float 4 int. reg. FP reg.
5955 double 8 int. reg. FP reg.
5956 long double 16 memory memory
5958 _Complex float 8 memory FP reg.
5959 _Complex double 16 memory FP reg.
5960 _Complex long double 32 memory FP reg.
5962 vector float any memory memory
5964 aggregate any memory memory
5969 size argument return value
5971 small integer <8 int. reg. int. reg.
5972 word 8 int. reg. int. reg.
5973 double word 16 int. reg. int. reg.
5975 _Complex small integer <16 int. reg. int. reg.
5976 _Complex word 16 int. reg. int. reg.
5977 _Complex double word 32 memory int. reg.
5979 vector integer <=16 FP reg. FP reg.
5980 vector integer 16<s<=32 memory FP reg.
5981 vector integer >32 memory memory
5983 float 4 FP reg. FP reg.
5984 double 8 FP reg. FP reg.
5985 long double 16 FP reg. FP reg.
5987 _Complex float 8 FP reg. FP reg.
5988 _Complex double 16 FP reg. FP reg.
5989 _Complex long double 32 memory FP reg.
5991 vector float <=16 FP reg. FP reg.
5992 vector float 16<s<=32 memory FP reg.
5993 vector float >32 memory memory
5995 aggregate <=16 reg. reg.
5996 aggregate 16<s<=32 memory reg.
5997 aggregate >32 memory memory
6001 Note #1: complex floating-point types follow the extended SPARC ABIs as
6002 implemented by the Sun compiler.
6004 Note #2: integral vector types follow the scalar floating-point types
6005 conventions to match what is implemented by the Sun VIS SDK.
6007 Note #3: floating-point vector types follow the aggregate types
6011 /* Maximum number of int regs for args. */
6012 #define SPARC_INT_ARG_MAX 6
6013 /* Maximum number of fp regs for args. */
6014 #define SPARC_FP_ARG_MAX 16
6016 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
6018 /* Handle the INIT_CUMULATIVE_ARGS macro.
6019 Initialize a variable CUM of type CUMULATIVE_ARGS
6020 for a call to a function whose data type is FNTYPE.
6021 For a library call, FNTYPE is 0. */
6024 init_cumulative_args (struct sparc_args *cum, tree fntype,
6025 rtx libname ATTRIBUTE_UNUSED,
6026 tree fndecl ATTRIBUTE_UNUSED)
6029 cum->prototype_p = fntype && prototype_p (fntype);
6030 cum->libcall_p = fntype == 0;
6033 /* Handle promotion of pointer and integer arguments. */
6035 static enum machine_mode
6036 sparc_promote_function_mode (const_tree type,
6037 enum machine_mode mode,
6039 const_tree fntype ATTRIBUTE_UNUSED,
6040 int for_return ATTRIBUTE_UNUSED)
6042 if (type != NULL_TREE && POINTER_TYPE_P (type))
6044 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6048 /* Integral arguments are passed as full words, as per the ABI. */
6049 if (GET_MODE_CLASS (mode) == MODE_INT
6050 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6056 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6059 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6061 return TARGET_ARCH64 ? true : false;
6064 /* Scan the record type TYPE and return the following predicates:
6065 - INTREGS_P: the record contains at least one field or sub-field
6066 that is eligible for promotion in integer registers.
6067 - FP_REGS_P: the record contains at least one field or sub-field
6068 that is eligible for promotion in floating-point registers.
6069 - PACKED_P: the record contains at least one field that is packed.
6071 Sub-fields are not taken into account for the PACKED_P predicate. */
6074 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
6079 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6081 if (TREE_CODE (field) == FIELD_DECL)
6083 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6084 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
6085 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6086 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6092 if (packed_p && DECL_PACKED (field))
6098 /* Compute the slot number to pass an argument in.
6099 Return the slot number or -1 if passing on the stack.
6101 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6102 the preceding args and about the function being called.
6103 MODE is the argument's machine mode.
6104 TYPE is the data type of the argument (as a tree).
6105 This is null for libcalls where that information may
6107 NAMED is nonzero if this argument is a named parameter
6108 (otherwise it is an extra parameter matching an ellipsis).
6109 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6110 *PREGNO records the register number to use if scalar type.
6111 *PPADDING records the amount of padding needed in words. */
6114 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
6115 const_tree type, bool named, bool incoming_p,
6116 int *pregno, int *ppadding)
6118 int regbase = (incoming_p
6119 ? SPARC_INCOMING_INT_ARG_FIRST
6120 : SPARC_OUTGOING_INT_ARG_FIRST);
6121 int slotno = cum->words;
6122 enum mode_class mclass;
6127 if (type && TREE_ADDRESSABLE (type))
6133 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6136 /* For SPARC64, objects requiring 16-byte alignment get it. */
6138 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6139 && (slotno & 1) != 0)
6140 slotno++, *ppadding = 1;
6142 mclass = GET_MODE_CLASS (mode);
6143 if (type && TREE_CODE (type) == VECTOR_TYPE)
6145 /* Vector types deserve special treatment because they are
6146 polymorphic wrt their mode, depending upon whether VIS
6147 instructions are enabled. */
6148 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6150 /* The SPARC port defines no floating-point vector modes. */
6151 gcc_assert (mode == BLKmode);
6155 /* Integral vector types should either have a vector
6156 mode or an integral mode, because we are guaranteed
6157 by pass_by_reference that their size is not greater
6158 than 16 bytes and TImode is 16-byte wide. */
6159 gcc_assert (mode != BLKmode);
6161 /* Vector integers are handled like floats according to
6163 mclass = MODE_FLOAT;
6170 case MODE_COMPLEX_FLOAT:
6171 case MODE_VECTOR_INT:
6172 if (TARGET_ARCH64 && TARGET_FPU && named)
6174 if (slotno >= SPARC_FP_ARG_MAX)
6176 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6177 /* Arguments filling only one single FP register are
6178 right-justified in the outer double FP register. */
6179 if (GET_MODE_SIZE (mode) <= 4)
6186 case MODE_COMPLEX_INT:
6187 if (slotno >= SPARC_INT_ARG_MAX)
6189 regno = regbase + slotno;
6193 if (mode == VOIDmode)
6194 /* MODE is VOIDmode when generating the actual call. */
6197 gcc_assert (mode == BLKmode);
6201 || (TREE_CODE (type) != VECTOR_TYPE
6202 && TREE_CODE (type) != RECORD_TYPE))
6204 if (slotno >= SPARC_INT_ARG_MAX)
6206 regno = regbase + slotno;
6208 else /* TARGET_ARCH64 && type */
6210 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
6212 /* First see what kinds of registers we would need. */
6213 if (TREE_CODE (type) == VECTOR_TYPE)
6216 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
6218 /* The ABI obviously doesn't specify how packed structures
6219 are passed. These are defined to be passed in int regs
6220 if possible, otherwise memory. */
6221 if (packed_p || !named)
6222 fpregs_p = 0, intregs_p = 1;
6224 /* If all arg slots are filled, then must pass on stack. */
6225 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
6228 /* If there are only int args and all int arg slots are filled,
6229 then must pass on stack. */
6230 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
6233 /* Note that even if all int arg slots are filled, fp members may
6234 still be passed in regs if such regs are available.
6235 *PREGNO isn't set because there may be more than one, it's up
6236 to the caller to compute them. */
6249 /* Handle recursive register counting for structure field layout. */
6251 struct function_arg_record_value_parms
6253 rtx ret; /* return expression being built. */
6254 int slotno; /* slot number of the argument. */
6255 int named; /* whether the argument is named. */
6256 int regbase; /* regno of the base register. */
6257 int stack; /* 1 if part of the argument is on the stack. */
6258 int intoffset; /* offset of the first pending integer field. */
6259 unsigned int nregs; /* number of words passed in registers. */
6262 static void function_arg_record_value_3
6263 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
6264 static void function_arg_record_value_2
6265 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6266 static void function_arg_record_value_1
6267 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6268 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
6269 static rtx function_arg_union_value (int, enum machine_mode, int, int);
6271 /* A subroutine of function_arg_record_value. Traverse the structure
6272 recursively and determine how many registers will be required. */
6275 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6276 struct function_arg_record_value_parms *parms,
6281 /* We need to compute how many registers are needed so we can
6282 allocate the PARALLEL but before we can do that we need to know
6283 whether there are any packed fields. The ABI obviously doesn't
6284 specify how structures are passed in this case, so they are
6285 defined to be passed in int regs if possible, otherwise memory,
6286 regardless of whether there are fp values present. */
6289 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6291 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6298 /* Compute how many registers we need. */
6299 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6301 if (TREE_CODE (field) == FIELD_DECL)
6303 HOST_WIDE_INT bitpos = startbitpos;
6305 if (DECL_SIZE (field) != 0)
6307 if (integer_zerop (DECL_SIZE (field)))
6310 if (host_integerp (bit_position (field), 1))
6311 bitpos += int_bit_position (field);
6314 /* ??? FIXME: else assume zero offset. */
6316 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6317 function_arg_record_value_1 (TREE_TYPE (field),
6321 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6322 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6327 if (parms->intoffset != -1)
6329 unsigned int startbit, endbit;
6330 int intslots, this_slotno;
6332 startbit = parms->intoffset & -BITS_PER_WORD;
6333 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6335 intslots = (endbit - startbit) / BITS_PER_WORD;
6336 this_slotno = parms->slotno + parms->intoffset
6339 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6341 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6342 /* We need to pass this field on the stack. */
6346 parms->nregs += intslots;
6347 parms->intoffset = -1;
6350 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6351 If it wasn't true we wouldn't be here. */
6352 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6353 && DECL_MODE (field) == BLKmode)
6354 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6355 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6362 if (parms->intoffset == -1)
6363 parms->intoffset = bitpos;
6369 /* A subroutine of function_arg_record_value. Assign the bits of the
6370 structure between parms->intoffset and bitpos to integer registers. */
6373 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6374 struct function_arg_record_value_parms *parms)
6376 enum machine_mode mode;
6378 unsigned int startbit, endbit;
6379 int this_slotno, intslots, intoffset;
6382 if (parms->intoffset == -1)
6385 intoffset = parms->intoffset;
6386 parms->intoffset = -1;
6388 startbit = intoffset & -BITS_PER_WORD;
6389 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6390 intslots = (endbit - startbit) / BITS_PER_WORD;
6391 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6393 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6397 /* If this is the trailing part of a word, only load that much into
6398 the register. Otherwise load the whole register. Note that in
6399 the latter case we may pick up unwanted bits. It's not a problem
6400 at the moment but may wish to revisit. */
6402 if (intoffset % BITS_PER_WORD != 0)
6403 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6408 intoffset /= BITS_PER_UNIT;
6411 regno = parms->regbase + this_slotno;
6412 reg = gen_rtx_REG (mode, regno);
6413 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6414 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6417 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6422 while (intslots > 0);
6425 /* A subroutine of function_arg_record_value. Traverse the structure
6426 recursively and assign bits to floating point registers. Track which
6427 bits in between need integer registers; invoke function_arg_record_value_3
6428 to make that happen. */
6431 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6432 struct function_arg_record_value_parms *parms,
6438 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6440 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6447 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6449 if (TREE_CODE (field) == FIELD_DECL)
6451 HOST_WIDE_INT bitpos = startbitpos;
6453 if (DECL_SIZE (field) != 0)
6455 if (integer_zerop (DECL_SIZE (field)))
6458 if (host_integerp (bit_position (field), 1))
6459 bitpos += int_bit_position (field);
6462 /* ??? FIXME: else assume zero offset. */
6464 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6465 function_arg_record_value_2 (TREE_TYPE (field),
6469 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6470 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6475 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6476 int regno, nregs, pos;
6477 enum machine_mode mode = DECL_MODE (field);
6480 function_arg_record_value_3 (bitpos, parms);
6482 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6485 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6486 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6488 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6490 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6496 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6497 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6499 reg = gen_rtx_REG (mode, regno);
6500 pos = bitpos / BITS_PER_UNIT;
6501 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6502 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6506 regno += GET_MODE_SIZE (mode) / 4;
6507 reg = gen_rtx_REG (mode, regno);
6508 pos += GET_MODE_SIZE (mode);
6509 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6510 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6516 if (parms->intoffset == -1)
6517 parms->intoffset = bitpos;
6523 /* Used by function_arg and sparc_function_value_1 to implement the complex
6524 conventions of the 64-bit ABI for passing and returning structures.
6525 Return an expression valid as a return value for the FUNCTION_ARG
6526 and TARGET_FUNCTION_VALUE.
6528 TYPE is the data type of the argument (as a tree).
6529 This is null for libcalls where that information may
6531 MODE is the argument's machine mode.
6532 SLOTNO is the index number of the argument's slot in the parameter array.
6533 NAMED is nonzero if this argument is a named parameter
6534 (otherwise it is an extra parameter matching an ellipsis).
6535 REGBASE is the regno of the base register for the parameter array. */
6538 function_arg_record_value (const_tree type, enum machine_mode mode,
6539 int slotno, int named, int regbase)
6541 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6542 struct function_arg_record_value_parms parms;
6545 parms.ret = NULL_RTX;
6546 parms.slotno = slotno;
6547 parms.named = named;
6548 parms.regbase = regbase;
6551 /* Compute how many registers we need. */
6553 parms.intoffset = 0;
6554 function_arg_record_value_1 (type, 0, &parms, false);
6556 /* Take into account pending integer fields. */
6557 if (parms.intoffset != -1)
6559 unsigned int startbit, endbit;
6560 int intslots, this_slotno;
6562 startbit = parms.intoffset & -BITS_PER_WORD;
6563 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6564 intslots = (endbit - startbit) / BITS_PER_WORD;
6565 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6567 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6569 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6570 /* We need to pass this field on the stack. */
6574 parms.nregs += intslots;
6576 nregs = parms.nregs;
6578 /* Allocate the vector and handle some annoying special cases. */
6581 /* ??? Empty structure has no value? Duh? */
6584 /* Though there's nothing really to store, return a word register
6585 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6586 leads to breakage due to the fact that there are zero bytes to
6588 return gen_rtx_REG (mode, regbase);
6592 /* ??? C++ has structures with no fields, and yet a size. Give up
6593 for now and pass everything back in integer registers. */
6594 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6596 if (nregs + slotno > SPARC_INT_ARG_MAX)
6597 nregs = SPARC_INT_ARG_MAX - slotno;
6599 gcc_assert (nregs != 0);
6601 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6603 /* If at least one field must be passed on the stack, generate
6604 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6605 also be passed on the stack. We can't do much better because the
6606 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6607 of structures for which the fields passed exclusively in registers
6608 are not at the beginning of the structure. */
6610 XVECEXP (parms.ret, 0, 0)
6611 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6613 /* Fill in the entries. */
6615 parms.intoffset = 0;
6616 function_arg_record_value_2 (type, 0, &parms, false);
6617 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6619 gcc_assert (parms.nregs == nregs);
6624 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6625 of the 64-bit ABI for passing and returning unions.
6626 Return an expression valid as a return value for the FUNCTION_ARG
6627 and TARGET_FUNCTION_VALUE.
6629 SIZE is the size in bytes of the union.
6630 MODE is the argument's machine mode.
6631 REGNO is the hard register the union will be passed in. */
6634 function_arg_union_value (int size, enum machine_mode mode, int slotno,
6637 int nwords = ROUND_ADVANCE (size), i;
6640 /* See comment in previous function for empty structures. */
6642 return gen_rtx_REG (mode, regno);
6644 if (slotno == SPARC_INT_ARG_MAX - 1)
6647 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6649 for (i = 0; i < nwords; i++)
6651 /* Unions are passed left-justified. */
6652 XVECEXP (regs, 0, i)
6653 = gen_rtx_EXPR_LIST (VOIDmode,
6654 gen_rtx_REG (word_mode, regno),
6655 GEN_INT (UNITS_PER_WORD * i));
6662 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6663 for passing and returning large (BLKmode) vectors.
6664 Return an expression valid as a return value for the FUNCTION_ARG
6665 and TARGET_FUNCTION_VALUE.
6667 SIZE is the size in bytes of the vector (at least 8 bytes).
6668 REGNO is the FP hard register the vector will be passed in. */
6671 function_arg_vector_value (int size, int regno)
6673 int i, nregs = size / 8;
6676 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6678 for (i = 0; i < nregs; i++)
6680 XVECEXP (regs, 0, i)
6681 = gen_rtx_EXPR_LIST (VOIDmode,
6682 gen_rtx_REG (DImode, regno + 2*i),
6689 /* Determine where to put an argument to a function.
6690 Value is zero to push the argument on the stack,
6691 or a hard register in which to store the argument.
6693 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6694 the preceding args and about the function being called.
6695 MODE is the argument's machine mode.
6696 TYPE is the data type of the argument (as a tree).
6697 This is null for libcalls where that information may
6699 NAMED is true if this argument is a named parameter
6700 (otherwise it is an extra parameter matching an ellipsis).
6701 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6702 TARGET_FUNCTION_INCOMING_ARG. */
6705 sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
6706 const_tree type, bool named, bool incoming_p)
6708 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6710 int regbase = (incoming_p
6711 ? SPARC_INCOMING_INT_ARG_FIRST
6712 : SPARC_OUTGOING_INT_ARG_FIRST);
6713 int slotno, regno, padding;
6714 enum mode_class mclass = GET_MODE_CLASS (mode);
6716 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6721 /* Vector types deserve special treatment because they are polymorphic wrt
6722 their mode, depending upon whether VIS instructions are enabled. */
6723 if (type && TREE_CODE (type) == VECTOR_TYPE)
6725 HOST_WIDE_INT size = int_size_in_bytes (type);
6726 gcc_assert ((TARGET_ARCH32 && size <= 8)
6727 || (TARGET_ARCH64 && size <= 16));
6729 if (mode == BLKmode)
6730 return function_arg_vector_value (size,
6731 SPARC_FP_ARG_FIRST + 2*slotno);
6733 mclass = MODE_FLOAT;
6737 return gen_rtx_REG (mode, regno);
6739 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6740 and are promoted to registers if possible. */
6741 if (type && TREE_CODE (type) == RECORD_TYPE)
6743 HOST_WIDE_INT size = int_size_in_bytes (type);
6744 gcc_assert (size <= 16);
6746 return function_arg_record_value (type, mode, slotno, named, regbase);
6749 /* Unions up to 16 bytes in size are passed in integer registers. */
6750 else if (type && TREE_CODE (type) == UNION_TYPE)
6752 HOST_WIDE_INT size = int_size_in_bytes (type);
6753 gcc_assert (size <= 16);
6755 return function_arg_union_value (size, mode, slotno, regno);
6758 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6759 but also have the slot allocated for them.
6760 If no prototype is in scope fp values in register slots get passed
6761 in two places, either fp regs and int regs or fp regs and memory. */
6762 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6763 && SPARC_FP_REG_P (regno))
6765 rtx reg = gen_rtx_REG (mode, regno);
6766 if (cum->prototype_p || cum->libcall_p)
6768 /* "* 2" because fp reg numbers are recorded in 4 byte
6771 /* ??? This will cause the value to be passed in the fp reg and
6772 in the stack. When a prototype exists we want to pass the
6773 value in the reg but reserve space on the stack. That's an
6774 optimization, and is deferred [for a bit]. */
6775 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6776 return gen_rtx_PARALLEL (mode,
6778 gen_rtx_EXPR_LIST (VOIDmode,
6779 NULL_RTX, const0_rtx),
6780 gen_rtx_EXPR_LIST (VOIDmode,
6784 /* ??? It seems that passing back a register even when past
6785 the area declared by REG_PARM_STACK_SPACE will allocate
6786 space appropriately, and will not copy the data onto the
6787 stack, exactly as we desire.
6789 This is due to locate_and_pad_parm being called in
6790 expand_call whenever reg_parm_stack_space > 0, which
6791 while beneficial to our example here, would seem to be
6792 in error from what had been intended. Ho hum... -- r~ */
6800 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6804 /* On incoming, we don't need to know that the value
6805 is passed in %f0 and %i0, and it confuses other parts
6806 causing needless spillage even on the simplest cases. */
6810 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6811 + (regno - SPARC_FP_ARG_FIRST) / 2);
6813 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6814 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6816 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6820 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6821 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6822 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6827 /* All other aggregate types are passed in an integer register in a mode
6828 corresponding to the size of the type. */
6829 else if (type && AGGREGATE_TYPE_P (type))
6831 HOST_WIDE_INT size = int_size_in_bytes (type);
6832 gcc_assert (size <= 16);
6834 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6837 return gen_rtx_REG (mode, regno);
6840 /* Handle the TARGET_FUNCTION_ARG target hook. */
6843 sparc_function_arg (cumulative_args_t cum, enum machine_mode mode,
6844 const_tree type, bool named)
6846 return sparc_function_arg_1 (cum, mode, type, named, false);
6849 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6852 sparc_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
6853 const_tree type, bool named)
6855 return sparc_function_arg_1 (cum, mode, type, named, true);
6858 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6861 sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
6863 return ((TARGET_ARCH64
6864 && (GET_MODE_ALIGNMENT (mode) == 128
6865 || (type && TYPE_ALIGN (type) == 128)))
6870 /* For an arg passed partly in registers and partly in memory,
6871 this is the number of bytes of registers used.
6872 For args passed entirely in registers or entirely in memory, zero.
6874 Any arg that starts in the first 6 regs but won't entirely fit in them
6875 needs partial registers on v8. On v9, structures with integer
6876 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
6877 values that begin in the last fp reg [where "last fp reg" varies with the
6878 mode] will be split between that reg and memory. */
6881 sparc_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
6882 tree type, bool named)
6884 int slotno, regno, padding;
6886 /* We pass false for incoming_p here, it doesn't matter. */
6887 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
6888 false, ®no, &padding);
6895 if ((slotno + (mode == BLKmode
6896 ? ROUND_ADVANCE (int_size_in_bytes (type))
6897 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
6898 > SPARC_INT_ARG_MAX)
6899 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
6903 /* We are guaranteed by pass_by_reference that the size of the
6904 argument is not greater than 16 bytes, so we only need to return
6905 one word if the argument is partially passed in registers. */
6907 if (type && AGGREGATE_TYPE_P (type))
6909 int size = int_size_in_bytes (type);
6911 if (size > UNITS_PER_WORD
6912 && slotno == SPARC_INT_ARG_MAX - 1)
6913 return UNITS_PER_WORD;
6915 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
6916 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6917 && ! (TARGET_FPU && named)))
6919 /* The complex types are passed as packed types. */
6920 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6921 && slotno == SPARC_INT_ARG_MAX - 1)
6922 return UNITS_PER_WORD;
6924 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6926 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
6928 return UNITS_PER_WORD;
6935 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6936 Specify whether to pass the argument by reference. */
6939 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6940 enum machine_mode mode, const_tree type,
6941 bool named ATTRIBUTE_UNUSED)
6944 /* Original SPARC 32-bit ABI says that structures and unions,
6945 and quad-precision floats are passed by reference. For Pascal,
6946 also pass arrays by reference. All other base types are passed
6949 Extended ABI (as implemented by the Sun compiler) says that all
6950 complex floats are passed by reference. Pass complex integers
6951 in registers up to 8 bytes. More generally, enforce the 2-word
6952 cap for passing arguments in registers.
6954 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6955 integers are passed like floats of the same size, that is in
6956 registers up to 8 bytes. Pass all vector floats by reference
6957 like structure and unions. */
6958 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6960 /* Catch CDImode, TFmode, DCmode and TCmode. */
6961 || GET_MODE_SIZE (mode) > 8
6963 && TREE_CODE (type) == VECTOR_TYPE
6964 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6966 /* Original SPARC 64-bit ABI says that structures and unions
6967 smaller than 16 bytes are passed in registers, as well as
6968 all other base types.
6970 Extended ABI (as implemented by the Sun compiler) says that
6971 complex floats are passed in registers up to 16 bytes. Pass
6972 all complex integers in registers up to 16 bytes. More generally,
6973 enforce the 2-word cap for passing arguments in registers.
6975 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6976 integers are passed like floats of the same size, that is in
6977 registers (up to 16 bytes). Pass all vector floats like structure
6980 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
6981 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6982 /* Catch CTImode and TCmode. */
6983 || GET_MODE_SIZE (mode) > 16);
6986 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
6987 Update the data in CUM to advance over an argument
6988 of mode MODE and data type TYPE.
6989 TYPE is null for libcalls where that information may not be available. */
6992 sparc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6993 const_tree type, bool named)
6995 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6998 /* We pass false for incoming_p here, it doesn't matter. */
6999 function_arg_slotno (cum, mode, type, named, false, ®no, &padding);
7001 /* If argument requires leading padding, add it. */
7002 cum->words += padding;
7006 cum->words += (mode != BLKmode
7007 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7008 : ROUND_ADVANCE (int_size_in_bytes (type)));
7012 if (type && AGGREGATE_TYPE_P (type))
7014 int size = int_size_in_bytes (type);
7018 else if (size <= 16)
7020 else /* passed by reference */
7025 cum->words += (mode != BLKmode
7026 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7027 : ROUND_ADVANCE (int_size_in_bytes (type)));
7032 /* Handle the FUNCTION_ARG_PADDING macro.
7033 For the 64 bit ABI structs are always stored left shifted in their
7037 function_arg_padding (enum machine_mode mode, const_tree type)
7039 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
7042 /* Fall back to the default. */
7043 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7046 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7047 Specify whether to return the return value in memory. */
7050 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7053 /* Original SPARC 32-bit ABI says that structures and unions,
7054 and quad-precision floats are returned in memory. All other
7055 base types are returned in registers.
7057 Extended ABI (as implemented by the Sun compiler) says that
7058 all complex floats are returned in registers (8 FP registers
7059 at most for '_Complex long double'). Return all complex integers
7060 in registers (4 at most for '_Complex long long').
7062 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7063 integers are returned like floats of the same size, that is in
7064 registers up to 8 bytes and in memory otherwise. Return all
7065 vector floats in memory like structure and unions; note that
7066 they always have BLKmode like the latter. */
7067 return (TYPE_MODE (type) == BLKmode
7068 || TYPE_MODE (type) == TFmode
7069 || (TREE_CODE (type) == VECTOR_TYPE
7070 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7072 /* Original SPARC 64-bit ABI says that structures and unions
7073 smaller than 32 bytes are returned in registers, as well as
7074 all other base types.
7076 Extended ABI (as implemented by the Sun compiler) says that all
7077 complex floats are returned in registers (8 FP registers at most
7078 for '_Complex long double'). Return all complex integers in
7079 registers (4 at most for '_Complex TItype').
7081 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7082 integers are returned like floats of the same size, that is in
7083 registers. Return all vector floats like structure and unions;
7084 note that they always have BLKmode like the latter. */
7085 return (TYPE_MODE (type) == BLKmode
7086 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7089 /* Handle the TARGET_STRUCT_VALUE target hook.
7090 Return where to find the structure return value address. */
7093 sparc_struct_value_rtx (tree fndecl, int incoming)
7102 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7103 STRUCT_VALUE_OFFSET));
7105 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7106 STRUCT_VALUE_OFFSET));
7108 /* Only follow the SPARC ABI for fixed-size structure returns.
7109 Variable size structure returns are handled per the normal
7110 procedures in GCC. This is enabled by -mstd-struct-return */
7112 && sparc_std_struct_return
7113 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7114 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7116 /* We must check and adjust the return address, as it is
7117 optional as to whether the return object is really
7119 rtx ret_reg = gen_rtx_REG (Pmode, 31);
7120 rtx scratch = gen_reg_rtx (SImode);
7121 rtx endlab = gen_label_rtx ();
7123 /* Calculate the return object size */
7124 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7125 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7126 /* Construct a temporary return value */
7128 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7130 /* Implement SPARC 32-bit psABI callee return struct checking:
7132 Fetch the instruction where we will return to and see if
7133 it's an unimp instruction (the most significant 10 bits
7135 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7136 plus_constant (Pmode,
7138 /* Assume the size is valid and pre-adjust */
7139 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7140 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7142 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7143 /* Write the address of the memory pointed to by temp_val into
7144 the memory pointed to by mem */
7145 emit_move_insn (mem, XEXP (temp_val, 0));
7146 emit_label (endlab);
7153 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7154 For v9, function return values are subject to the same rules as arguments,
7155 except that up to 32 bytes may be returned in registers. */
7158 sparc_function_value_1 (const_tree type, enum machine_mode mode,
7161 /* Beware that the two values are swapped here wrt function_arg. */
7162 int regbase = (outgoing
7163 ? SPARC_INCOMING_INT_ARG_FIRST
7164 : SPARC_OUTGOING_INT_ARG_FIRST);
7165 enum mode_class mclass = GET_MODE_CLASS (mode);
7168 /* Vector types deserve special treatment because they are polymorphic wrt
7169 their mode, depending upon whether VIS instructions are enabled. */
7170 if (type && TREE_CODE (type) == VECTOR_TYPE)
7172 HOST_WIDE_INT size = int_size_in_bytes (type);
7173 gcc_assert ((TARGET_ARCH32 && size <= 8)
7174 || (TARGET_ARCH64 && size <= 32));
7176 if (mode == BLKmode)
7177 return function_arg_vector_value (size,
7178 SPARC_FP_ARG_FIRST);
7180 mclass = MODE_FLOAT;
7183 if (TARGET_ARCH64 && type)
7185 /* Structures up to 32 bytes in size are returned in registers. */
7186 if (TREE_CODE (type) == RECORD_TYPE)
7188 HOST_WIDE_INT size = int_size_in_bytes (type);
7189 gcc_assert (size <= 32);
7191 return function_arg_record_value (type, mode, 0, 1, regbase);
7194 /* Unions up to 32 bytes in size are returned in integer registers. */
7195 else if (TREE_CODE (type) == UNION_TYPE)
7197 HOST_WIDE_INT size = int_size_in_bytes (type);
7198 gcc_assert (size <= 32);
7200 return function_arg_union_value (size, mode, 0, regbase);
7203 /* Objects that require it are returned in FP registers. */
7204 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7207 /* All other aggregate types are returned in an integer register in a
7208 mode corresponding to the size of the type. */
7209 else if (AGGREGATE_TYPE_P (type))
7211 /* All other aggregate types are passed in an integer register
7212 in a mode corresponding to the size of the type. */
7213 HOST_WIDE_INT size = int_size_in_bytes (type);
7214 gcc_assert (size <= 32);
7216 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7218 /* ??? We probably should have made the same ABI change in
7219 3.4.0 as the one we made for unions. The latter was
7220 required by the SCD though, while the former is not
7221 specified, so we favored compatibility and efficiency.
7223 Now we're stuck for aggregates larger than 16 bytes,
7224 because OImode vanished in the meantime. Let's not
7225 try to be unduly clever, and simply follow the ABI
7226 for unions in that case. */
7227 if (mode == BLKmode)
7228 return function_arg_union_value (size, mode, 0, regbase);
7233 /* We should only have pointer and integer types at this point. This
7234 must match sparc_promote_function_mode. */
7235 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7239 /* We should only have pointer and integer types at this point. This must
7240 match sparc_promote_function_mode. */
7241 else if (TARGET_ARCH32
7242 && mclass == MODE_INT
7243 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7246 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7247 regno = SPARC_FP_ARG_FIRST;
7251 return gen_rtx_REG (mode, regno);
7254 /* Handle TARGET_FUNCTION_VALUE.
7255 On the SPARC, the value is found in the first "output" register, but the
7256 called function leaves it in the first "input" register. */
7259 sparc_function_value (const_tree valtype,
7260 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7263 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7266 /* Handle TARGET_LIBCALL_VALUE. */
7269 sparc_libcall_value (enum machine_mode mode,
7270 const_rtx fun ATTRIBUTE_UNUSED)
7272 return sparc_function_value_1 (NULL_TREE, mode, false);
7275 /* Handle FUNCTION_VALUE_REGNO_P.
7276 On the SPARC, the first "output" reg is used for integer values, and the
7277 first floating point register is used for floating point values. */
7280 sparc_function_value_regno_p (const unsigned int regno)
7282 return (regno == 8 || regno == 32);
7285 /* Do what is necessary for `va_start'. We look at the current function
7286 to determine if stdarg or varargs is used and return the address of
7287 the first unnamed parameter. */
7290 sparc_builtin_saveregs (void)
7292 int first_reg = crtl->args.info.words;
7296 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7297 emit_move_insn (gen_rtx_MEM (word_mode,
7298 gen_rtx_PLUS (Pmode,
7300 GEN_INT (FIRST_PARM_OFFSET (0)
7303 gen_rtx_REG (word_mode,
7304 SPARC_INCOMING_INT_ARG_FIRST + regno));
7306 address = gen_rtx_PLUS (Pmode,
7308 GEN_INT (FIRST_PARM_OFFSET (0)
7309 + UNITS_PER_WORD * first_reg));
7314 /* Implement `va_start' for stdarg. */
7317 sparc_va_start (tree valist, rtx nextarg)
7319 nextarg = expand_builtin_saveregs ();
7320 std_expand_builtin_va_start (valist, nextarg);
7323 /* Implement `va_arg' for stdarg. */
7326 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7329 HOST_WIDE_INT size, rsize, align;
7332 tree ptrtype = build_pointer_type (type);
7334 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7337 size = rsize = UNITS_PER_WORD;
7343 size = int_size_in_bytes (type);
7344 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7349 /* For SPARC64, objects requiring 16-byte alignment get it. */
7350 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7351 align = 2 * UNITS_PER_WORD;
7353 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7354 are left-justified in their slots. */
7355 if (AGGREGATE_TYPE_P (type))
7358 size = rsize = UNITS_PER_WORD;
7368 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7369 incr = fold_convert (sizetype, incr);
7370 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7372 incr = fold_convert (ptr_type_node, incr);
7375 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7378 if (BYTES_BIG_ENDIAN && size < rsize)
7379 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7383 addr = fold_convert (build_pointer_type (ptrtype), addr);
7384 addr = build_va_arg_indirect_ref (addr);
7387 /* If the address isn't aligned properly for the type, we need a temporary.
7388 FIXME: This is inefficient, usually we can do this in registers. */
7389 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7391 tree tmp = create_tmp_var (type, "va_arg_tmp");
7392 tree dest_addr = build_fold_addr_expr (tmp);
7393 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7394 3, dest_addr, addr, size_int (rsize));
7395 TREE_ADDRESSABLE (tmp) = 1;
7396 gimplify_and_add (copy, pre_p);
7401 addr = fold_convert (ptrtype, addr);
7403 incr = fold_build_pointer_plus_hwi (incr, rsize);
7404 gimplify_assign (valist, incr, post_p);
7406 return build_va_arg_indirect_ref (addr);
7409 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7410 Specify whether the vector mode is supported by the hardware. */
7413 sparc_vector_mode_supported_p (enum machine_mode mode)
7415 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7418 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7420 static enum machine_mode
7421 sparc_preferred_simd_mode (enum machine_mode mode)
7439 /* Return the string to output an unconditional branch to LABEL, which is
7440 the operand number of the label.
7442 DEST is the destination insn (i.e. the label), INSN is the source. */
7445 output_ubranch (rtx dest, rtx insn)
7447 static char string[64];
7448 bool v9_form = false;
7452 /* Even if we are trying to use cbcond for this, evaluate
7453 whether we can use V9 branches as our backup plan. */
7456 if (INSN_ADDRESSES_SET_P ())
7457 delta = (INSN_ADDRESSES (INSN_UID (dest))
7458 - INSN_ADDRESSES (INSN_UID (insn)));
7460 /* Leave some instructions for "slop". */
7461 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7466 bool emit_nop = emit_cbcond_nop (insn);
7470 if (delta < -500 || delta > 500)
7476 rval = "ba,a,pt\t%%xcc, %l0";
7483 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7485 rval = "cwbe\t%%g0, %%g0, %l0";
7491 strcpy (string, "ba%*,pt\t%%xcc, ");
7493 strcpy (string, "b%*\t");
7495 p = strchr (string, '\0');
7506 /* Return the string to output a conditional branch to LABEL, which is
7507 the operand number of the label. OP is the conditional expression.
7508 XEXP (OP, 0) is assumed to be a condition code register (integer or
7509 floating point) and its mode specifies what kind of comparison we made.
7511 DEST is the destination insn (i.e. the label), INSN is the source.
7513 REVERSED is nonzero if we should reverse the sense of the comparison.
7515 ANNUL is nonzero if we should generate an annulling branch. */
7518 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7521 static char string[64];
7522 enum rtx_code code = GET_CODE (op);
7523 rtx cc_reg = XEXP (op, 0);
7524 enum machine_mode mode = GET_MODE (cc_reg);
7525 const char *labelno, *branch;
7526 int spaces = 8, far;
7529 /* v9 branches are limited to +-1MB. If it is too far away,
7542 fbne,a,pn %fcc2, .LC29
7550 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7553 /* Reversal of FP compares takes care -- an ordered compare
7554 becomes an unordered compare and vice versa. */
7555 if (mode == CCFPmode || mode == CCFPEmode)
7556 code = reverse_condition_maybe_unordered (code);
7558 code = reverse_condition (code);
7561 /* Start by writing the branch condition. */
7562 if (mode == CCFPmode || mode == CCFPEmode)
7613 /* ??? !v9: FP branches cannot be preceded by another floating point
7614 insn. Because there is currently no concept of pre-delay slots,
7615 we can fix this only by always emitting a nop before a floating
7620 strcpy (string, "nop\n\t");
7621 strcat (string, branch);
7634 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7646 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7667 strcpy (string, branch);
7669 spaces -= strlen (branch);
7670 p = strchr (string, '\0');
7672 /* Now add the annulling, the label, and a possible noop. */
7685 if (! far && insn && INSN_ADDRESSES_SET_P ())
7687 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7688 - INSN_ADDRESSES (INSN_UID (insn)));
7689 /* Leave some instructions for "slop". */
7690 if (delta < -260000 || delta >= 260000)
7694 if (mode == CCFPmode || mode == CCFPEmode)
7696 static char v9_fcc_labelno[] = "%%fccX, ";
7697 /* Set the char indicating the number of the fcc reg to use. */
7698 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7699 labelno = v9_fcc_labelno;
7702 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7706 else if (mode == CCXmode || mode == CCX_NOOVmode)
7708 labelno = "%%xcc, ";
7713 labelno = "%%icc, ";
7718 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7721 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
7734 strcpy (p, labelno);
7735 p = strchr (p, '\0');
7738 strcpy (p, ".+12\n\t nop\n\tb\t");
7739 /* Skip the next insn if requested or
7740 if we know that it will be a nop. */
7741 if (annul || ! final_sequence)
7755 /* Emit a library call comparison between floating point X and Y.
7756 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7757 Return the new operator to be used in the comparison sequence.
7759 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7760 values as arguments instead of the TFmode registers themselves,
7761 that's why we cannot call emit_float_lib_cmp. */
7764 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7767 rtx slot0, slot1, result, tem, tem2, libfunc;
7768 enum machine_mode mode;
7769 enum rtx_code new_comparison;
7774 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7778 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7782 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7786 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7790 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7794 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7805 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7816 tree expr = MEM_EXPR (x);
7818 mark_addressable (expr);
7823 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7824 emit_move_insn (slot0, x);
7829 tree expr = MEM_EXPR (y);
7831 mark_addressable (expr);
7836 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7837 emit_move_insn (slot1, y);
7840 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7841 emit_library_call (libfunc, LCT_NORMAL,
7843 XEXP (slot0, 0), Pmode,
7844 XEXP (slot1, 0), Pmode);
7849 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7850 emit_library_call (libfunc, LCT_NORMAL,
7852 x, TFmode, y, TFmode);
7857 /* Immediately move the result of the libcall into a pseudo
7858 register so reload doesn't clobber the value if it needs
7859 the return register for a spill reg. */
7860 result = gen_reg_rtx (mode);
7861 emit_move_insn (result, hard_libcall_value (mode, libfunc));
7866 return gen_rtx_NE (VOIDmode, result, const0_rtx);
7869 new_comparison = (comparison == UNORDERED ? EQ : NE);
7870 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7873 new_comparison = (comparison == UNGT ? GT : NE);
7874 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7876 return gen_rtx_NE (VOIDmode, result, const2_rtx);
7878 tem = gen_reg_rtx (mode);
7880 emit_insn (gen_andsi3 (tem, result, const1_rtx));
7882 emit_insn (gen_anddi3 (tem, result, const1_rtx));
7883 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
7886 tem = gen_reg_rtx (mode);
7888 emit_insn (gen_addsi3 (tem, result, const1_rtx));
7890 emit_insn (gen_adddi3 (tem, result, const1_rtx));
7891 tem2 = gen_reg_rtx (mode);
7893 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
7895 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
7896 new_comparison = (comparison == UNEQ ? EQ : NE);
7897 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
7903 /* Generate an unsigned DImode to FP conversion. This is the same code
7904 optabs would emit if we didn't have TFmode patterns. */
7907 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
7909 rtx neglab, donelab, i0, i1, f0, in, out;
7912 in = force_reg (DImode, operands[1]);
7913 neglab = gen_label_rtx ();
7914 donelab = gen_label_rtx ();
7915 i0 = gen_reg_rtx (DImode);
7916 i1 = gen_reg_rtx (DImode);
7917 f0 = gen_reg_rtx (mode);
7919 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
7921 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
7922 emit_jump_insn (gen_jump (donelab));
7925 emit_label (neglab);
7927 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
7928 emit_insn (gen_anddi3 (i1, in, const1_rtx));
7929 emit_insn (gen_iordi3 (i0, i0, i1));
7930 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
7931 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
7933 emit_label (donelab);
7936 /* Generate an FP to unsigned DImode conversion. This is the same code
7937 optabs would emit if we didn't have TFmode patterns. */
7940 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
7942 rtx neglab, donelab, i0, i1, f0, in, out, limit;
7945 in = force_reg (mode, operands[1]);
7946 neglab = gen_label_rtx ();
7947 donelab = gen_label_rtx ();
7948 i0 = gen_reg_rtx (DImode);
7949 i1 = gen_reg_rtx (DImode);
7950 limit = gen_reg_rtx (mode);
7951 f0 = gen_reg_rtx (mode);
7953 emit_move_insn (limit,
7954 CONST_DOUBLE_FROM_REAL_VALUE (
7955 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
7956 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
7958 emit_insn (gen_rtx_SET (VOIDmode,
7960 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
7961 emit_jump_insn (gen_jump (donelab));
7964 emit_label (neglab);
7966 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
7967 emit_insn (gen_rtx_SET (VOIDmode,
7969 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
7970 emit_insn (gen_movdi (i1, const1_rtx));
7971 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
7972 emit_insn (gen_xordi3 (out, i0, i1));
7974 emit_label (donelab);
7977 /* Return the string to output a compare and branch instruction to DEST.
7978 DEST is the destination insn (i.e. the label), INSN is the source,
7979 and OP is the conditional expression. */
7982 output_cbcond (rtx op, rtx dest, rtx insn)
7984 enum machine_mode mode = GET_MODE (XEXP (op, 0));
7985 enum rtx_code code = GET_CODE (op);
7986 const char *cond_str, *tmpl;
7987 int far, emit_nop, len;
7988 static char string[64];
7991 /* Compare and Branch is limited to +-2KB. If it is too far away,
8003 len = get_attr_length (insn);
8006 emit_nop = len == 2;
8009 code = reverse_condition (code);
8011 size_char = ((mode == SImode) ? 'w' : 'x');
8024 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8039 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8067 int veryfar = 1, delta;
8069 if (INSN_ADDRESSES_SET_P ())
8071 delta = (INSN_ADDRESSES (INSN_UID (dest))
8072 - INSN_ADDRESSES (INSN_UID (insn)));
8073 /* Leave some instructions for "slop". */
8074 if (delta >= -260000 && delta < 260000)
8079 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8081 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8086 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8088 tmpl = "c%cb%s\t%%1, %%2, %%3";
8091 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8096 /* Return the string to output a conditional branch to LABEL, testing
8097 register REG. LABEL is the operand number of the label; REG is the
8098 operand number of the reg. OP is the conditional expression. The mode
8099 of REG says what kind of comparison we made.
8101 DEST is the destination insn (i.e. the label), INSN is the source.
8103 REVERSED is nonzero if we should reverse the sense of the comparison.
8105 ANNUL is nonzero if we should generate an annulling branch. */
8108 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8109 int annul, rtx insn)
8111 static char string[64];
8112 enum rtx_code code = GET_CODE (op);
8113 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8118 /* branch on register are limited to +-128KB. If it is too far away,
8131 brgez,a,pn %o1, .LC29
8137 ba,pt %xcc, .LC29 */
8139 far = get_attr_length (insn) >= 3;
8141 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8143 code = reverse_condition (code);
8145 /* Only 64 bit versions of these instructions exist. */
8146 gcc_assert (mode == DImode);
8148 /* Start by writing the branch condition. */
8153 strcpy (string, "brnz");
8157 strcpy (string, "brz");
8161 strcpy (string, "brgez");
8165 strcpy (string, "brlz");
8169 strcpy (string, "brlez");
8173 strcpy (string, "brgz");
8180 p = strchr (string, '\0');
8182 /* Now add the annulling, reg, label, and nop. */
8189 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8192 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
8197 *p = p < string + 8 ? '\t' : ' ';
8205 int veryfar = 1, delta;
8207 if (INSN_ADDRESSES_SET_P ())
8209 delta = (INSN_ADDRESSES (INSN_UID (dest))
8210 - INSN_ADDRESSES (INSN_UID (insn)));
8211 /* Leave some instructions for "slop". */
8212 if (delta >= -260000 && delta < 260000)
8216 strcpy (p, ".+12\n\t nop\n\t");
8217 /* Skip the next insn if requested or
8218 if we know that it will be a nop. */
8219 if (annul || ! final_sequence)
8229 strcpy (p, "ba,pt\t%%xcc, ");
8243 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8244 Such instructions cannot be used in the delay slot of return insn on v9.
8245 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8249 epilogue_renumber (register rtx *where, int test)
8251 register const char *fmt;
8253 register enum rtx_code code;
8258 code = GET_CODE (*where);
8263 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8265 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8266 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8274 /* Do not replace the frame pointer with the stack pointer because
8275 it can cause the delayed instruction to load below the stack.
8276 This occurs when instructions like:
8278 (set (reg/i:SI 24 %i0)
8279 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8280 (const_int -20 [0xffffffec])) 0))
8282 are in the return delayed slot. */
8284 if (GET_CODE (XEXP (*where, 0)) == REG
8285 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8286 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8287 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8292 if (SPARC_STACK_BIAS
8293 && GET_CODE (XEXP (*where, 0)) == REG
8294 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8302 fmt = GET_RTX_FORMAT (code);
8304 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8309 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8310 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8313 else if (fmt[i] == 'e'
8314 && epilogue_renumber (&(XEXP (*where, i)), test))
8320 /* Leaf functions and non-leaf functions have different needs. */
8323 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8326 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8328 static const int *const reg_alloc_orders[] = {
8329 reg_leaf_alloc_order,
8330 reg_nonleaf_alloc_order};
8333 order_regs_for_local_alloc (void)
8335 static int last_order_nonleaf = 1;
8337 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8339 last_order_nonleaf = !last_order_nonleaf;
8340 memcpy ((char *) reg_alloc_order,
8341 (const char *) reg_alloc_orders[last_order_nonleaf],
8342 FIRST_PSEUDO_REGISTER * sizeof (int));
8346 /* Return 1 if REG and MEM are legitimate enough to allow the various
8347 mem<-->reg splits to be run. */
8350 sparc_splitdi_legitimate (rtx reg, rtx mem)
8352 /* Punt if we are here by mistake. */
8353 gcc_assert (reload_completed);
8355 /* We must have an offsettable memory reference. */
8356 if (! offsettable_memref_p (mem))
8359 /* If we have legitimate args for ldd/std, we do not want
8360 the split to happen. */
8361 if ((REGNO (reg) % 2) == 0
8362 && mem_min_alignment (mem, 8))
8369 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8372 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8376 if (GET_CODE (reg1) == SUBREG)
8377 reg1 = SUBREG_REG (reg1);
8378 if (GET_CODE (reg1) != REG)
8380 regno1 = REGNO (reg1);
8382 if (GET_CODE (reg2) == SUBREG)
8383 reg2 = SUBREG_REG (reg2);
8384 if (GET_CODE (reg2) != REG)
8386 regno2 = REGNO (reg2);
8388 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8393 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8394 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8401 /* Return 1 if x and y are some kind of REG and they refer to
8402 different hard registers. This test is guaranteed to be
8403 run after reload. */
8406 sparc_absnegfloat_split_legitimate (rtx x, rtx y)
8408 if (GET_CODE (x) != REG)
8410 if (GET_CODE (y) != REG)
8412 if (REGNO (x) == REGNO (y))
8417 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8418 This makes them candidates for using ldd and std insns.
8420 Note reg1 and reg2 *must* be hard registers. */
8423 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8425 /* We might have been passed a SUBREG. */
8426 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8429 if (REGNO (reg1) % 2 != 0)
8432 /* Integer ldd is deprecated in SPARC V9 */
8433 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8436 return (REGNO (reg1) == REGNO (reg2) - 1);
8439 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8442 This can only happen when addr1 and addr2, the addresses in mem1
8443 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8444 addr1 must also be aligned on a 64-bit boundary.
8446 Also iff dependent_reg_rtx is not null it should not be used to
8447 compute the address for mem1, i.e. we cannot optimize a sequence
8459 But, note that the transformation from:
8464 is perfectly fine. Thus, the peephole2 patterns always pass us
8465 the destination register of the first load, never the second one.
8467 For stores we don't have a similar problem, so dependent_reg_rtx is
8471 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8475 HOST_WIDE_INT offset1;
8477 /* The mems cannot be volatile. */
8478 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8481 /* MEM1 should be aligned on a 64-bit boundary. */
8482 if (MEM_ALIGN (mem1) < 64)
8485 addr1 = XEXP (mem1, 0);
8486 addr2 = XEXP (mem2, 0);
8488 /* Extract a register number and offset (if used) from the first addr. */
8489 if (GET_CODE (addr1) == PLUS)
8491 /* If not a REG, return zero. */
8492 if (GET_CODE (XEXP (addr1, 0)) != REG)
8496 reg1 = REGNO (XEXP (addr1, 0));
8497 /* The offset must be constant! */
8498 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8500 offset1 = INTVAL (XEXP (addr1, 1));
8503 else if (GET_CODE (addr1) != REG)
8507 reg1 = REGNO (addr1);
8508 /* This was a simple (mem (reg)) expression. Offset is 0. */
8512 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8513 if (GET_CODE (addr2) != PLUS)
8516 if (GET_CODE (XEXP (addr2, 0)) != REG
8517 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8520 if (reg1 != REGNO (XEXP (addr2, 0)))
8523 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8526 /* The first offset must be evenly divisible by 8 to ensure the
8527 address is 64 bit aligned. */
8528 if (offset1 % 8 != 0)
8531 /* The offset for the second addr must be 4 more than the first addr. */
8532 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8535 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8540 /* Return 1 if reg is a pseudo, or is the first register in
8541 a hard register pair. This makes it suitable for use in
8542 ldd and std insns. */
8545 register_ok_for_ldd (rtx reg)
8547 /* We might have been passed a SUBREG. */
8551 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8552 return (REGNO (reg) % 2 == 0);
8557 /* Return 1 if OP, a MEM, has an address which is known to be
8558 aligned to an 8-byte boundary. */
8561 memory_ok_for_ldd (rtx op)
8563 /* In 64-bit mode, we assume that the address is word-aligned. */
8564 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8567 if (! can_create_pseudo_p ()
8568 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8574 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8577 sparc_print_operand_punct_valid_p (unsigned char code)
8590 /* Implement TARGET_PRINT_OPERAND.
8591 Print operand X (an rtx) in assembler syntax to file FILE.
8592 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8593 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8596 sparc_print_operand (FILE *file, rtx x, int code)
8601 /* Output an insn in a delay slot. */
8603 sparc_indent_opcode = 1;
8605 fputs ("\n\t nop", file);
8608 /* Output an annul flag if there's nothing for the delay slot and we
8609 are optimizing. This is always used with '(' below.
8610 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8611 this is a dbx bug. So, we only do this when optimizing.
8612 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8613 Always emit a nop in case the next instruction is a branch. */
8614 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8618 /* Output a 'nop' if there's nothing for the delay slot and we are
8619 not optimizing. This is always used with '*' above. */
8620 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8621 fputs ("\n\t nop", file);
8622 else if (final_sequence)
8623 sparc_indent_opcode = 1;
8626 /* Output the right displacement from the saved PC on function return.
8627 The caller may have placed an "unimp" insn immediately after the call
8628 so we have to account for it. This insn is used in the 32-bit ABI
8629 when calling a function that returns a non zero-sized structure. The
8630 64-bit ABI doesn't have it. Be careful to have this test be the same
8631 as that for the call. The exception is when sparc_std_struct_return
8632 is enabled, the psABI is followed exactly and the adjustment is made
8633 by the code in sparc_struct_value_rtx. The call emitted is the same
8634 when sparc_std_struct_return is enabled. */
8636 && cfun->returns_struct
8637 && !sparc_std_struct_return
8638 && DECL_SIZE (DECL_RESULT (current_function_decl))
8639 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8641 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8647 /* Output the Embedded Medium/Anywhere code model base register. */
8648 fputs (EMBMEDANY_BASE_REG, file);
8651 /* Print some local dynamic TLS name. */
8652 assemble_name (file, get_some_local_dynamic_name ());
8656 /* Adjust the operand to take into account a RESTORE operation. */
8657 if (GET_CODE (x) == CONST_INT)
8659 else if (GET_CODE (x) != REG)
8660 output_operand_lossage ("invalid %%Y operand");
8661 else if (REGNO (x) < 8)
8662 fputs (reg_names[REGNO (x)], file);
8663 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8664 fputs (reg_names[REGNO (x)-16], file);
8666 output_operand_lossage ("invalid %%Y operand");
8669 /* Print out the low order register name of a register pair. */
8670 if (WORDS_BIG_ENDIAN)
8671 fputs (reg_names[REGNO (x)+1], file);
8673 fputs (reg_names[REGNO (x)], file);
8676 /* Print out the high order register name of a register pair. */
8677 if (WORDS_BIG_ENDIAN)
8678 fputs (reg_names[REGNO (x)], file);
8680 fputs (reg_names[REGNO (x)+1], file);
8683 /* Print out the second register name of a register pair or quad.
8684 I.e., R (%o0) => %o1. */
8685 fputs (reg_names[REGNO (x)+1], file);
8688 /* Print out the third register name of a register quad.
8689 I.e., S (%o0) => %o2. */
8690 fputs (reg_names[REGNO (x)+2], file);
8693 /* Print out the fourth register name of a register quad.
8694 I.e., T (%o0) => %o3. */
8695 fputs (reg_names[REGNO (x)+3], file);
8698 /* Print a condition code register. */
8699 if (REGNO (x) == SPARC_ICC_REG)
8701 /* We don't handle CC[X]_NOOVmode because they're not supposed
8703 if (GET_MODE (x) == CCmode)
8704 fputs ("%icc", file);
8705 else if (GET_MODE (x) == CCXmode)
8706 fputs ("%xcc", file);
8711 /* %fccN register */
8712 fputs (reg_names[REGNO (x)], file);
8715 /* Print the operand's address only. */
8716 output_address (XEXP (x, 0));
8719 /* In this case we need a register. Use %g0 if the
8720 operand is const0_rtx. */
8722 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8724 fputs ("%g0", file);
8731 switch (GET_CODE (x))
8733 case IOR: fputs ("or", file); break;
8734 case AND: fputs ("and", file); break;
8735 case XOR: fputs ("xor", file); break;
8736 default: output_operand_lossage ("invalid %%A operand");
8741 switch (GET_CODE (x))
8743 case IOR: fputs ("orn", file); break;
8744 case AND: fputs ("andn", file); break;
8745 case XOR: fputs ("xnor", file); break;
8746 default: output_operand_lossage ("invalid %%B operand");
8750 /* This is used by the conditional move instructions. */
8753 enum rtx_code rc = GET_CODE (x);
8757 case NE: fputs ("ne", file); break;
8758 case EQ: fputs ("e", file); break;
8759 case GE: fputs ("ge", file); break;
8760 case GT: fputs ("g", file); break;
8761 case LE: fputs ("le", file); break;
8762 case LT: fputs ("l", file); break;
8763 case GEU: fputs ("geu", file); break;
8764 case GTU: fputs ("gu", file); break;
8765 case LEU: fputs ("leu", file); break;
8766 case LTU: fputs ("lu", file); break;
8767 case LTGT: fputs ("lg", file); break;
8768 case UNORDERED: fputs ("u", file); break;
8769 case ORDERED: fputs ("o", file); break;
8770 case UNLT: fputs ("ul", file); break;
8771 case UNLE: fputs ("ule", file); break;
8772 case UNGT: fputs ("ug", file); break;
8773 case UNGE: fputs ("uge", file); break;
8774 case UNEQ: fputs ("ue", file); break;
8775 default: output_operand_lossage ("invalid %%C operand");
8780 /* This are used by the movr instruction pattern. */
8783 enum rtx_code rc = GET_CODE (x);
8786 case NE: fputs ("ne", file); break;
8787 case EQ: fputs ("e", file); break;
8788 case GE: fputs ("gez", file); break;
8789 case LT: fputs ("lz", file); break;
8790 case LE: fputs ("lez", file); break;
8791 case GT: fputs ("gz", file); break;
8792 default: output_operand_lossage ("invalid %%D operand");
8799 /* Print a sign-extended character. */
8800 int i = trunc_int_for_mode (INTVAL (x), QImode);
8801 fprintf (file, "%d", i);
8806 /* Operand must be a MEM; write its address. */
8807 if (GET_CODE (x) != MEM)
8808 output_operand_lossage ("invalid %%f operand");
8809 output_address (XEXP (x, 0));
8814 /* Print a sign-extended 32-bit value. */
8816 if (GET_CODE(x) == CONST_INT)
8818 else if (GET_CODE(x) == CONST_DOUBLE)
8819 i = CONST_DOUBLE_LOW (x);
8822 output_operand_lossage ("invalid %%s operand");
8825 i = trunc_int_for_mode (i, SImode);
8826 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8831 /* Do nothing special. */
8835 /* Undocumented flag. */
8836 output_operand_lossage ("invalid operand output code");
8839 if (GET_CODE (x) == REG)
8840 fputs (reg_names[REGNO (x)], file);
8841 else if (GET_CODE (x) == MEM)
8844 /* Poor Sun assembler doesn't understand absolute addressing. */
8845 if (CONSTANT_P (XEXP (x, 0)))
8846 fputs ("%g0+", file);
8847 output_address (XEXP (x, 0));
8850 else if (GET_CODE (x) == HIGH)
8852 fputs ("%hi(", file);
8853 output_addr_const (file, XEXP (x, 0));
8856 else if (GET_CODE (x) == LO_SUM)
8858 sparc_print_operand (file, XEXP (x, 0), 0);
8859 if (TARGET_CM_MEDMID)
8860 fputs ("+%l44(", file);
8862 fputs ("+%lo(", file);
8863 output_addr_const (file, XEXP (x, 1));
8866 else if (GET_CODE (x) == CONST_DOUBLE
8867 && (GET_MODE (x) == VOIDmode
8868 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
8870 if (CONST_DOUBLE_HIGH (x) == 0)
8871 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
8872 else if (CONST_DOUBLE_HIGH (x) == -1
8873 && CONST_DOUBLE_LOW (x) < 0)
8874 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
8876 output_operand_lossage ("long long constant not a valid immediate operand");
8878 else if (GET_CODE (x) == CONST_DOUBLE)
8879 output_operand_lossage ("floating point constant not a valid immediate operand");
8880 else { output_addr_const (file, x); }
8883 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
8886 sparc_print_operand_address (FILE *file, rtx x)
8888 register rtx base, index = 0;
8890 register rtx addr = x;
8893 fputs (reg_names[REGNO (addr)], file);
8894 else if (GET_CODE (addr) == PLUS)
8896 if (CONST_INT_P (XEXP (addr, 0)))
8897 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
8898 else if (CONST_INT_P (XEXP (addr, 1)))
8899 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
8901 base = XEXP (addr, 0), index = XEXP (addr, 1);
8902 if (GET_CODE (base) == LO_SUM)
8904 gcc_assert (USE_AS_OFFSETABLE_LO10
8906 && ! TARGET_CM_MEDMID);
8907 output_operand (XEXP (base, 0), 0);
8908 fputs ("+%lo(", file);
8909 output_address (XEXP (base, 1));
8910 fprintf (file, ")+%d", offset);
8914 fputs (reg_names[REGNO (base)], file);
8916 fprintf (file, "%+d", offset);
8917 else if (REG_P (index))
8918 fprintf (file, "+%s", reg_names[REGNO (index)]);
8919 else if (GET_CODE (index) == SYMBOL_REF
8920 || GET_CODE (index) == LABEL_REF
8921 || GET_CODE (index) == CONST)
8922 fputc ('+', file), output_addr_const (file, index);
8923 else gcc_unreachable ();
8926 else if (GET_CODE (addr) == MINUS
8927 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
8929 output_addr_const (file, XEXP (addr, 0));
8931 output_addr_const (file, XEXP (addr, 1));
8932 fputs ("-.)", file);
8934 else if (GET_CODE (addr) == LO_SUM)
8936 output_operand (XEXP (addr, 0), 0);
8937 if (TARGET_CM_MEDMID)
8938 fputs ("+%l44(", file);
8940 fputs ("+%lo(", file);
8941 output_address (XEXP (addr, 1));
8945 && GET_CODE (addr) == CONST
8946 && GET_CODE (XEXP (addr, 0)) == MINUS
8947 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
8948 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
8949 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
8951 addr = XEXP (addr, 0);
8952 output_addr_const (file, XEXP (addr, 0));
8953 /* Group the args of the second CONST in parenthesis. */
8955 /* Skip past the second CONST--it does nothing for us. */
8956 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
8957 /* Close the parenthesis. */
8962 output_addr_const (file, addr);
8966 /* Target hook for assembling integer objects. The sparc version has
8967 special handling for aligned DI-mode objects. */
8970 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
8972 /* ??? We only output .xword's for symbols and only then in environments
8973 where the assembler can handle them. */
8974 if (aligned_p && size == 8
8975 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
8979 assemble_integer_with_op ("\t.xword\t", x);
8984 assemble_aligned_integer (4, const0_rtx);
8985 assemble_aligned_integer (4, x);
8989 return default_assemble_integer (x, size, aligned_p);
8992 /* Return the value of a code used in the .proc pseudo-op that says
8993 what kind of result this function returns. For non-C types, we pick
8994 the closest C type. */
8996 #ifndef SHORT_TYPE_SIZE
8997 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9000 #ifndef INT_TYPE_SIZE
9001 #define INT_TYPE_SIZE BITS_PER_WORD
9004 #ifndef LONG_TYPE_SIZE
9005 #define LONG_TYPE_SIZE BITS_PER_WORD
9008 #ifndef LONG_LONG_TYPE_SIZE
9009 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9012 #ifndef FLOAT_TYPE_SIZE
9013 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9016 #ifndef DOUBLE_TYPE_SIZE
9017 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9020 #ifndef LONG_DOUBLE_TYPE_SIZE
9021 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9025 sparc_type_code (register tree type)
9027 register unsigned long qualifiers = 0;
9028 register unsigned shift;
9030 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9031 setting more, since some assemblers will give an error for this. Also,
9032 we must be careful to avoid shifts of 32 bits or more to avoid getting
9033 unpredictable results. */
9035 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9037 switch (TREE_CODE (type))
9043 qualifiers |= (3 << shift);
9048 qualifiers |= (2 << shift);
9052 case REFERENCE_TYPE:
9054 qualifiers |= (1 << shift);
9058 return (qualifiers | 8);
9061 case QUAL_UNION_TYPE:
9062 return (qualifiers | 9);
9065 return (qualifiers | 10);
9068 return (qualifiers | 16);
9071 /* If this is a range type, consider it to be the underlying
9073 if (TREE_TYPE (type) != 0)
9076 /* Carefully distinguish all the standard types of C,
9077 without messing up if the language is not C. We do this by
9078 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9079 look at both the names and the above fields, but that's redundant.
9080 Any type whose size is between two C types will be considered
9081 to be the wider of the two types. Also, we do not have a
9082 special code to use for "long long", so anything wider than
9083 long is treated the same. Note that we can't distinguish
9084 between "int" and "long" in this code if they are the same
9085 size, but that's fine, since neither can the assembler. */
9087 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9088 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9090 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9091 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9093 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9094 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9097 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9100 /* If this is a range type, consider it to be the underlying
9102 if (TREE_TYPE (type) != 0)
9105 /* Carefully distinguish all the standard types of C,
9106 without messing up if the language is not C. */
9108 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9109 return (qualifiers | 6);
9112 return (qualifiers | 7);
9114 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9115 /* ??? We need to distinguish between double and float complex types,
9116 but I don't know how yet because I can't reach this code from
9117 existing front-ends. */
9118 return (qualifiers | 7); /* Who knows? */
9121 case BOOLEAN_TYPE: /* Boolean truth value type. */
9127 gcc_unreachable (); /* Not a type! */
9134 /* Nested function support. */
9136 /* Emit RTL insns to initialize the variable parts of a trampoline.
9137 FNADDR is an RTX for the address of the function's pure code.
9138 CXT is an RTX for the static chain value for the function.
9140 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9141 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9142 (to store insns). This is a bit excessive. Perhaps a different
9143 mechanism would be better here.
9145 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9148 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9150 /* SPARC 32-bit trampoline:
9153 sethi %hi(static), %g2
9155 or %g2, %lo(static), %g2
9157 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9158 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9162 (adjust_address (m_tramp, SImode, 0),
9163 expand_binop (SImode, ior_optab,
9164 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9165 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9166 NULL_RTX, 1, OPTAB_DIRECT));
9169 (adjust_address (m_tramp, SImode, 4),
9170 expand_binop (SImode, ior_optab,
9171 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9172 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9173 NULL_RTX, 1, OPTAB_DIRECT));
9176 (adjust_address (m_tramp, SImode, 8),
9177 expand_binop (SImode, ior_optab,
9178 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9179 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9180 NULL_RTX, 1, OPTAB_DIRECT));
9183 (adjust_address (m_tramp, SImode, 12),
9184 expand_binop (SImode, ior_optab,
9185 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9186 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9187 NULL_RTX, 1, OPTAB_DIRECT));
9189 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9190 aligned on a 16 byte boundary so one flush clears it all. */
9191 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0))));
9192 if (sparc_cpu != PROCESSOR_ULTRASPARC
9193 && sparc_cpu != PROCESSOR_ULTRASPARC3
9194 && sparc_cpu != PROCESSOR_NIAGARA
9195 && sparc_cpu != PROCESSOR_NIAGARA2
9196 && sparc_cpu != PROCESSOR_NIAGARA3
9197 && sparc_cpu != PROCESSOR_NIAGARA4)
9198 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8))));
9200 /* Call __enable_execute_stack after writing onto the stack to make sure
9201 the stack address is accessible. */
9202 #ifdef HAVE_ENABLE_EXECUTE_STACK
9203 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9204 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9209 /* The 64-bit version is simpler because it makes more sense to load the
9210 values as "immediate" data out of the trampoline. It's also easier since
9211 we can read the PC without clobbering a register. */
9214 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9216 /* SPARC 64-bit trampoline:
9225 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9226 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9227 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9228 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9229 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9230 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9231 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9232 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9233 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9234 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9235 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9237 if (sparc_cpu != PROCESSOR_ULTRASPARC
9238 && sparc_cpu != PROCESSOR_ULTRASPARC3
9239 && sparc_cpu != PROCESSOR_NIAGARA
9240 && sparc_cpu != PROCESSOR_NIAGARA2
9241 && sparc_cpu != PROCESSOR_NIAGARA3
9242 && sparc_cpu != PROCESSOR_NIAGARA4)
9243 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9245 /* Call __enable_execute_stack after writing onto the stack to make sure
9246 the stack address is accessible. */
9247 #ifdef HAVE_ENABLE_EXECUTE_STACK
9248 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9249 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9253 /* Worker for TARGET_TRAMPOLINE_INIT. */
9256 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9258 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9259 cxt = force_reg (Pmode, cxt);
9261 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9263 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9266 /* Adjust the cost of a scheduling dependency. Return the new cost of
9267 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9270 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9272 enum attr_type insn_type;
9274 if (! recog_memoized (insn))
9277 insn_type = get_attr_type (insn);
9279 if (REG_NOTE_KIND (link) == 0)
9281 /* Data dependency; DEP_INSN writes a register that INSN reads some
9284 /* if a load, then the dependence must be on the memory address;
9285 add an extra "cycle". Note that the cost could be two cycles
9286 if the reg was written late in an instruction group; we ca not tell
9288 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9291 /* Get the delay only if the address of the store is the dependence. */
9292 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9294 rtx pat = PATTERN(insn);
9295 rtx dep_pat = PATTERN (dep_insn);
9297 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9298 return cost; /* This should not happen! */
9300 /* The dependency between the two instructions was on the data that
9301 is being stored. Assume that this implies that the address of the
9302 store is not dependent. */
9303 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9306 return cost + 3; /* An approximation. */
9309 /* A shift instruction cannot receive its data from an instruction
9310 in the same cycle; add a one cycle penalty. */
9311 if (insn_type == TYPE_SHIFT)
9312 return cost + 3; /* Split before cascade into shift. */
9316 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9317 INSN writes some cycles later. */
9319 /* These are only significant for the fpu unit; writing a fp reg before
9320 the fpu has finished with it stalls the processor. */
9322 /* Reusing an integer register causes no problems. */
9323 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9331 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9333 enum attr_type insn_type, dep_type;
9334 rtx pat = PATTERN(insn);
9335 rtx dep_pat = PATTERN (dep_insn);
9337 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9340 insn_type = get_attr_type (insn);
9341 dep_type = get_attr_type (dep_insn);
9343 switch (REG_NOTE_KIND (link))
9346 /* Data dependency; DEP_INSN writes a register that INSN reads some
9353 /* Get the delay iff the address of the store is the dependence. */
9354 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9357 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9364 /* If a load, then the dependence must be on the memory address. If
9365 the addresses aren't equal, then it might be a false dependency */
9366 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9368 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9369 || GET_CODE (SET_DEST (dep_pat)) != MEM
9370 || GET_CODE (SET_SRC (pat)) != MEM
9371 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9372 XEXP (SET_SRC (pat), 0)))
9380 /* Compare to branch latency is 0. There is no benefit from
9381 separating compare and branch. */
9382 if (dep_type == TYPE_COMPARE)
9384 /* Floating point compare to branch latency is less than
9385 compare to conditional move. */
9386 if (dep_type == TYPE_FPCMP)
9395 /* Anti-dependencies only penalize the fpu unit. */
9396 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9408 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
9412 case PROCESSOR_SUPERSPARC:
9413 cost = supersparc_adjust_cost (insn, link, dep, cost);
9415 case PROCESSOR_HYPERSPARC:
9416 case PROCESSOR_SPARCLITE86X:
9417 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9426 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9427 int sched_verbose ATTRIBUTE_UNUSED,
9428 int max_ready ATTRIBUTE_UNUSED)
9432 sparc_use_sched_lookahead (void)
9434 if (sparc_cpu == PROCESSOR_NIAGARA
9435 || sparc_cpu == PROCESSOR_NIAGARA2
9436 || sparc_cpu == PROCESSOR_NIAGARA3)
9438 if (sparc_cpu == PROCESSOR_NIAGARA4)
9440 if (sparc_cpu == PROCESSOR_ULTRASPARC
9441 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9443 if ((1 << sparc_cpu) &
9444 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9445 (1 << PROCESSOR_SPARCLITE86X)))
9451 sparc_issue_rate (void)
9455 case PROCESSOR_NIAGARA:
9456 case PROCESSOR_NIAGARA2:
9457 case PROCESSOR_NIAGARA3:
9460 case PROCESSOR_NIAGARA4:
9462 /* Assume V9 processors are capable of at least dual-issue. */
9464 case PROCESSOR_SUPERSPARC:
9466 case PROCESSOR_HYPERSPARC:
9467 case PROCESSOR_SPARCLITE86X:
9469 case PROCESSOR_ULTRASPARC:
9470 case PROCESSOR_ULTRASPARC3:
9476 set_extends (rtx insn)
9478 register rtx pat = PATTERN (insn);
9480 switch (GET_CODE (SET_SRC (pat)))
9482 /* Load and some shift instructions zero extend. */
9485 /* sethi clears the high bits */
9487 /* LO_SUM is used with sethi. sethi cleared the high
9488 bits and the values used with lo_sum are positive */
9490 /* Store flag stores 0 or 1 */
9500 rtx op0 = XEXP (SET_SRC (pat), 0);
9501 rtx op1 = XEXP (SET_SRC (pat), 1);
9502 if (GET_CODE (op1) == CONST_INT)
9503 return INTVAL (op1) >= 0;
9504 if (GET_CODE (op0) != REG)
9506 if (sparc_check_64 (op0, insn) == 1)
9508 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9513 rtx op0 = XEXP (SET_SRC (pat), 0);
9514 rtx op1 = XEXP (SET_SRC (pat), 1);
9515 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9517 if (GET_CODE (op1) == CONST_INT)
9518 return INTVAL (op1) >= 0;
9519 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9522 return GET_MODE (SET_SRC (pat)) == SImode;
9523 /* Positive integers leave the high bits zero. */
9525 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
9527 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
9530 return - (GET_MODE (SET_SRC (pat)) == SImode);
9532 return sparc_check_64 (SET_SRC (pat), insn);
9538 /* We _ought_ to have only one kind per function, but... */
9539 static GTY(()) rtx sparc_addr_diff_list;
9540 static GTY(()) rtx sparc_addr_list;
9543 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9545 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9547 sparc_addr_diff_list
9548 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9550 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9554 sparc_output_addr_vec (rtx vec)
9556 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9557 int idx, vlen = XVECLEN (body, 0);
9559 #ifdef ASM_OUTPUT_ADDR_VEC_START
9560 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9563 #ifdef ASM_OUTPUT_CASE_LABEL
9564 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9567 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9570 for (idx = 0; idx < vlen; idx++)
9572 ASM_OUTPUT_ADDR_VEC_ELT
9573 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9576 #ifdef ASM_OUTPUT_ADDR_VEC_END
9577 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9582 sparc_output_addr_diff_vec (rtx vec)
9584 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9585 rtx base = XEXP (XEXP (body, 0), 0);
9586 int idx, vlen = XVECLEN (body, 1);
9588 #ifdef ASM_OUTPUT_ADDR_VEC_START
9589 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9592 #ifdef ASM_OUTPUT_CASE_LABEL
9593 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9596 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9599 for (idx = 0; idx < vlen; idx++)
9601 ASM_OUTPUT_ADDR_DIFF_ELT
9604 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9605 CODE_LABEL_NUMBER (base));
9608 #ifdef ASM_OUTPUT_ADDR_VEC_END
9609 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9614 sparc_output_deferred_case_vectors (void)
9619 if (sparc_addr_list == NULL_RTX
9620 && sparc_addr_diff_list == NULL_RTX)
9623 /* Align to cache line in the function's code section. */
9624 switch_to_section (current_function_section ());
9626 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9628 ASM_OUTPUT_ALIGN (asm_out_file, align);
9630 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9631 sparc_output_addr_vec (XEXP (t, 0));
9632 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9633 sparc_output_addr_diff_vec (XEXP (t, 0));
9635 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9638 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9639 unknown. Return 1 if the high bits are zero, -1 if the register is
9642 sparc_check_64 (rtx x, rtx insn)
9644 /* If a register is set only once it is safe to ignore insns this
9645 code does not know how to handle. The loop will either recognize
9646 the single set and return the correct value or fail to recognize
9651 gcc_assert (GET_CODE (x) == REG);
9653 if (GET_MODE (x) == DImode)
9654 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9656 if (flag_expensive_optimizations
9657 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9663 insn = get_last_insn_anywhere ();
9668 while ((insn = PREV_INSN (insn)))
9670 switch (GET_CODE (insn))
9683 rtx pat = PATTERN (insn);
9684 if (GET_CODE (pat) != SET)
9686 if (rtx_equal_p (x, SET_DEST (pat)))
9687 return set_extends (insn);
9688 if (y && rtx_equal_p (y, SET_DEST (pat)))
9689 return set_extends (insn);
9690 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9698 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9699 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9702 output_v8plus_shift (rtx insn, rtx *operands, const char *opcode)
9704 static char asm_code[60];
9706 /* The scratch register is only required when the destination
9707 register is not a 64-bit global or out register. */
9708 if (which_alternative != 2)
9709 operands[3] = operands[0];
9711 /* We can only shift by constants <= 63. */
9712 if (GET_CODE (operands[2]) == CONST_INT)
9713 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9715 if (GET_CODE (operands[1]) == CONST_INT)
9717 output_asm_insn ("mov\t%1, %3", operands);
9721 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9722 if (sparc_check_64 (operands[1], insn) <= 0)
9723 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9724 output_asm_insn ("or\t%L1, %3, %3", operands);
9727 strcpy (asm_code, opcode);
9729 if (which_alternative != 2)
9730 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9733 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9736 /* Output rtl to increment the profiler label LABELNO
9737 for profiling a function entry. */
9740 sparc_profile_hook (int labelno)
9745 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9746 if (NO_PROFILE_COUNTERS)
9748 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9752 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9753 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9754 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9758 #ifdef TARGET_SOLARIS
9759 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9762 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9763 tree decl ATTRIBUTE_UNUSED)
9765 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9767 solaris_elf_asm_comdat_section (name, flags, decl);
9771 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9773 if (!(flags & SECTION_DEBUG))
9774 fputs (",#alloc", asm_out_file);
9775 if (flags & SECTION_WRITE)
9776 fputs (",#write", asm_out_file);
9777 if (flags & SECTION_TLS)
9778 fputs (",#tls", asm_out_file);
9779 if (flags & SECTION_CODE)
9780 fputs (",#execinstr", asm_out_file);
9782 /* Sun as only supports #nobits/#progbits since Solaris 10. */
9783 if (HAVE_AS_SPARC_NOBITS)
9785 if (flags & SECTION_BSS)
9786 fputs (",#nobits", asm_out_file);
9788 fputs (",#progbits", asm_out_file);
9791 fputc ('\n', asm_out_file);
9793 #endif /* TARGET_SOLARIS */
9795 /* We do not allow indirect calls to be optimized into sibling calls.
9797 We cannot use sibling calls when delayed branches are disabled
9798 because they will likely require the call delay slot to be filled.
9800 Also, on SPARC 32-bit we cannot emit a sibling call when the
9801 current function returns a structure. This is because the "unimp
9802 after call" convention would cause the callee to return to the
9803 wrong place. The generic code already disallows cases where the
9804 function being called returns a structure.
9806 It may seem strange how this last case could occur. Usually there
9807 is code after the call which jumps to epilogue code which dumps the
9808 return value into the struct return area. That ought to invalidate
9809 the sibling call right? Well, in the C++ case we can end up passing
9810 the pointer to the struct return area to a constructor (which returns
9811 void) and then nothing else happens. Such a sibling call would look
9812 valid without the added check here.
9814 VxWorks PIC PLT entries require the global pointer to be initialized
9815 on entry. We therefore can't emit sibling calls to them. */
9817 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9820 && flag_delayed_branch
9821 && (TARGET_ARCH64 || ! cfun->returns_struct)
9822 && !(TARGET_VXWORKS_RTP
9824 && !targetm.binds_local_p (decl)));
9827 /* libfunc renaming. */
9830 sparc_init_libfuncs (void)
9834 /* Use the subroutines that Sun's library provides for integer
9835 multiply and divide. The `*' prevents an underscore from
9836 being prepended by the compiler. .umul is a little faster
9838 set_optab_libfunc (smul_optab, SImode, "*.umul");
9839 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9840 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9841 set_optab_libfunc (smod_optab, SImode, "*.rem");
9842 set_optab_libfunc (umod_optab, SImode, "*.urem");
9844 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9845 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9846 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9847 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9848 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9849 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9851 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9852 is because with soft-float, the SFmode and DFmode sqrt
9853 instructions will be absent, and the compiler will notice and
9854 try to use the TFmode sqrt instruction for calls to the
9855 builtin function sqrt, but this fails. */
9857 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9859 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9860 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9861 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
9862 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
9863 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
9864 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
9866 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
9867 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
9868 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
9869 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
9871 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
9872 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
9873 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
9874 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
9876 if (DITF_CONVERSION_LIBFUNCS)
9878 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
9879 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
9880 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
9881 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
9884 if (SUN_CONVERSION_LIBFUNCS)
9886 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
9887 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
9888 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
9889 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
9894 /* In the SPARC 64bit ABI, SImode multiply and divide functions
9895 do not exist in the library. Make sure the compiler does not
9896 emit calls to them by accident. (It should always use the
9897 hardware instructions.) */
9898 set_optab_libfunc (smul_optab, SImode, 0);
9899 set_optab_libfunc (sdiv_optab, SImode, 0);
9900 set_optab_libfunc (udiv_optab, SImode, 0);
9901 set_optab_libfunc (smod_optab, SImode, 0);
9902 set_optab_libfunc (umod_optab, SImode, 0);
9904 if (SUN_INTEGER_MULTIPLY_64)
9906 set_optab_libfunc (smul_optab, DImode, "__mul64");
9907 set_optab_libfunc (sdiv_optab, DImode, "__div64");
9908 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
9909 set_optab_libfunc (smod_optab, DImode, "__rem64");
9910 set_optab_libfunc (umod_optab, DImode, "__urem64");
9913 if (SUN_CONVERSION_LIBFUNCS)
9915 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
9916 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
9917 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
9918 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
9923 static tree def_builtin(const char *name, int code, tree type)
9925 return add_builtin_function(name, type, code, BUILT_IN_MD, NULL,
9929 static tree def_builtin_const(const char *name, int code, tree type)
9931 tree t = def_builtin(name, code, type);
9934 TREE_READONLY (t) = 1;
9939 /* Implement the TARGET_INIT_BUILTINS target hook.
9940 Create builtin functions for special SPARC instructions. */
9943 sparc_init_builtins (void)
9946 sparc_vis_init_builtins ();
9949 /* Create builtin functions for VIS 1.0 instructions. */
9952 sparc_vis_init_builtins (void)
9954 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
9955 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
9956 tree v4hi = build_vector_type (intHI_type_node, 4);
9957 tree v2hi = build_vector_type (intHI_type_node, 2);
9958 tree v2si = build_vector_type (intSI_type_node, 2);
9959 tree v1si = build_vector_type (intSI_type_node, 1);
9961 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
9962 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
9963 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
9964 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
9965 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
9966 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
9967 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
9968 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
9969 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
9970 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
9971 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
9972 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
9973 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
9974 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
9975 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
9977 intDI_type_node, 0);
9978 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
9980 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
9982 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
9984 intDI_type_node, 0);
9985 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
9987 intSI_type_node, 0);
9988 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
9990 intSI_type_node, 0);
9991 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
9993 intDI_type_node, 0);
9994 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
9997 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10000 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10002 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10004 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10006 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10008 tree void_ftype_di = build_function_type_list (void_type_node,
10009 intDI_type_node, 0);
10010 tree di_ftype_void = build_function_type_list (intDI_type_node,
10011 void_type_node, 0);
10012 tree void_ftype_si = build_function_type_list (void_type_node,
10013 intSI_type_node, 0);
10014 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10016 float_type_node, 0);
10017 tree df_ftype_df_df = build_function_type_list (double_type_node,
10019 double_type_node, 0);
10021 /* Packing and expanding vectors. */
10022 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10024 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10025 v8qi_ftype_v2si_v8qi);
10026 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10028 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10030 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10031 v8qi_ftype_v4qi_v4qi);
10033 /* Multiplications. */
10034 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10035 v4hi_ftype_v4qi_v4hi);
10036 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10037 v4hi_ftype_v4qi_v2hi);
10038 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10039 v4hi_ftype_v4qi_v2hi);
10040 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10041 v4hi_ftype_v8qi_v4hi);
10042 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10043 v4hi_ftype_v8qi_v4hi);
10044 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10045 v2si_ftype_v4qi_v2hi);
10046 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10047 v2si_ftype_v4qi_v2hi);
10049 /* Data aligning. */
10050 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10051 v4hi_ftype_v4hi_v4hi);
10052 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10053 v8qi_ftype_v8qi_v8qi);
10054 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10055 v2si_ftype_v2si_v2si);
10056 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10059 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10061 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10066 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10068 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10073 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10075 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10079 /* Pixel distance. */
10080 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10081 di_ftype_v8qi_v8qi_di);
10083 /* Edge handling. */
10086 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10088 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10090 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10092 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10094 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10096 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10100 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10102 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10104 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10106 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10108 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10110 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10116 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10118 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10120 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10122 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10124 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10126 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10130 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10132 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10134 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10136 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10138 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10140 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10145 /* Pixel compare. */
10148 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10149 di_ftype_v4hi_v4hi);
10150 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10151 di_ftype_v2si_v2si);
10152 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10153 di_ftype_v4hi_v4hi);
10154 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10155 di_ftype_v2si_v2si);
10156 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10157 di_ftype_v4hi_v4hi);
10158 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10159 di_ftype_v2si_v2si);
10160 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10161 di_ftype_v4hi_v4hi);
10162 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10163 di_ftype_v2si_v2si);
10167 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10168 si_ftype_v4hi_v4hi);
10169 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10170 si_ftype_v2si_v2si);
10171 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10172 si_ftype_v4hi_v4hi);
10173 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10174 si_ftype_v2si_v2si);
10175 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10176 si_ftype_v4hi_v4hi);
10177 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10178 si_ftype_v2si_v2si);
10179 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10180 si_ftype_v4hi_v4hi);
10181 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10182 si_ftype_v2si_v2si);
10185 /* Addition and subtraction. */
10186 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10187 v4hi_ftype_v4hi_v4hi);
10188 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10189 v2hi_ftype_v2hi_v2hi);
10190 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10191 v2si_ftype_v2si_v2si);
10192 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10193 v1si_ftype_v1si_v1si);
10194 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10195 v4hi_ftype_v4hi_v4hi);
10196 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10197 v2hi_ftype_v2hi_v2hi);
10198 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10199 v2si_ftype_v2si_v2si);
10200 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10201 v1si_ftype_v1si_v1si);
10203 /* Three-dimensional array addressing. */
10206 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10208 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10210 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10215 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10217 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10219 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10225 /* Byte mask and shuffle */
10227 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10230 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10232 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10233 v4hi_ftype_v4hi_v4hi);
10234 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10235 v8qi_ftype_v8qi_v8qi);
10236 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10237 v2si_ftype_v2si_v2si);
10238 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10246 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10248 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10250 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10255 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10257 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10259 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10263 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10264 v4hi_ftype_v4hi_v4hi);
10266 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10267 v4hi_ftype_v4hi_v4hi);
10268 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10269 v4hi_ftype_v4hi_v4hi);
10270 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10271 v4hi_ftype_v4hi_v4hi);
10272 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10273 v4hi_ftype_v4hi_v4hi);
10274 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10275 v2si_ftype_v2si_v2si);
10276 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10277 v2si_ftype_v2si_v2si);
10278 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10279 v2si_ftype_v2si_v2si);
10280 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10281 v2si_ftype_v2si_v2si);
10284 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10285 di_ftype_v8qi_v8qi);
10287 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10288 si_ftype_v8qi_v8qi);
10290 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10291 v4hi_ftype_v4hi_v4hi);
10292 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10294 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10297 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10298 v4hi_ftype_v4hi_v4hi);
10299 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10300 v2hi_ftype_v2hi_v2hi);
10301 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10302 v4hi_ftype_v4hi_v4hi);
10303 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10304 v2hi_ftype_v2hi_v2hi);
10305 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10306 v2si_ftype_v2si_v2si);
10307 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10308 v1si_ftype_v1si_v1si);
10309 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10310 v2si_ftype_v2si_v2si);
10311 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10312 v1si_ftype_v1si_v1si);
10316 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10317 di_ftype_v8qi_v8qi);
10318 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10319 di_ftype_v8qi_v8qi);
10320 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10321 di_ftype_v8qi_v8qi);
10322 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10323 di_ftype_v8qi_v8qi);
10327 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10328 si_ftype_v8qi_v8qi);
10329 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10330 si_ftype_v8qi_v8qi);
10331 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10332 si_ftype_v8qi_v8qi);
10333 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10334 si_ftype_v8qi_v8qi);
10337 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10339 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10341 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10343 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10345 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10347 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10350 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10352 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10354 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10359 /* Handle TARGET_EXPAND_BUILTIN target hook.
10360 Expand builtin functions for sparc intrinsics. */
10363 sparc_expand_builtin (tree exp, rtx target,
10364 rtx subtarget ATTRIBUTE_UNUSED,
10365 enum machine_mode tmode ATTRIBUTE_UNUSED,
10366 int ignore ATTRIBUTE_UNUSED)
10369 call_expr_arg_iterator iter;
10370 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10371 unsigned int icode = DECL_FUNCTION_CODE (fndecl);
10376 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10380 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10382 || GET_MODE (target) != tmode
10383 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10384 op[0] = gen_reg_rtx (tmode);
10388 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10390 const struct insn_operand_data *insn_op;
10393 if (arg == error_mark_node)
10397 idx = arg_count - !nonvoid;
10398 insn_op = &insn_data[icode].operand[idx];
10399 op[arg_count] = expand_normal (arg);
10401 if (insn_op->mode == V1DImode
10402 && GET_MODE (op[arg_count]) == DImode)
10403 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10404 else if (insn_op->mode == V1SImode
10405 && GET_MODE (op[arg_count]) == SImode)
10406 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10408 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10410 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10416 pat = GEN_FCN (icode) (op[0]);
10420 pat = GEN_FCN (icode) (op[0], op[1]);
10422 pat = GEN_FCN (icode) (op[1]);
10425 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10428 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10431 gcc_unreachable ();
10446 sparc_vis_mul8x16 (int e8, int e16)
10448 return (e8 * e16 + 128) / 256;
10451 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10452 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10455 sparc_handle_vis_mul8x16 (tree *n_elts, int fncode, tree inner_type,
10456 tree cst0, tree cst1)
10458 unsigned i, num = VECTOR_CST_NELTS (cst0);
10463 case CODE_FOR_fmul8x16_vis:
10464 for (i = 0; i < num; ++i)
10467 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10468 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10469 n_elts[i] = build_int_cst (inner_type, val);
10473 case CODE_FOR_fmul8x16au_vis:
10474 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10476 for (i = 0; i < num; ++i)
10479 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10481 n_elts[i] = build_int_cst (inner_type, val);
10485 case CODE_FOR_fmul8x16al_vis:
10486 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10488 for (i = 0; i < num; ++i)
10491 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10493 n_elts[i] = build_int_cst (inner_type, val);
10498 gcc_unreachable ();
10502 /* Handle TARGET_FOLD_BUILTIN target hook.
10503 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10504 result of the function call is ignored. NULL_TREE is returned if the
10505 function could not be folded. */
10508 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10509 tree *args, bool ignore)
10511 tree arg0, arg1, arg2;
10512 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10513 enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
10517 /* Note that a switch statement instead of the sequence of tests would
10518 be incorrect as many of the CODE_FOR values could be CODE_FOR_nothing
10519 and that would yield multiple alternatives with identical values. */
10520 if (icode == CODE_FOR_alignaddrsi_vis
10521 || icode == CODE_FOR_alignaddrdi_vis
10522 || icode == CODE_FOR_wrgsr_vis
10523 || icode == CODE_FOR_bmasksi_vis
10524 || icode == CODE_FOR_bmaskdi_vis
10525 || icode == CODE_FOR_cmask8si_vis
10526 || icode == CODE_FOR_cmask8di_vis
10527 || icode == CODE_FOR_cmask16si_vis
10528 || icode == CODE_FOR_cmask16di_vis
10529 || icode == CODE_FOR_cmask32si_vis
10530 || icode == CODE_FOR_cmask32di_vis)
10533 return build_zero_cst (rtype);
10538 case CODE_FOR_fexpand_vis:
10542 if (TREE_CODE (arg0) == VECTOR_CST)
10544 tree inner_type = TREE_TYPE (rtype);
10548 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10549 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10550 n_elts[i] = build_int_cst (inner_type,
10552 (VECTOR_CST_ELT (arg0, i)) << 4);
10553 return build_vector (rtype, n_elts);
10557 case CODE_FOR_fmul8x16_vis:
10558 case CODE_FOR_fmul8x16au_vis:
10559 case CODE_FOR_fmul8x16al_vis:
10565 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10567 tree inner_type = TREE_TYPE (rtype);
10568 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10569 sparc_handle_vis_mul8x16 (n_elts, icode, inner_type, arg0, arg1);
10570 return build_vector (rtype, n_elts);
10574 case CODE_FOR_fpmerge_vis:
10580 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10582 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10584 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10586 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10587 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10590 return build_vector (rtype, n_elts);
10594 case CODE_FOR_pdist_vis:
10602 if (TREE_CODE (arg0) == VECTOR_CST
10603 && TREE_CODE (arg1) == VECTOR_CST
10604 && TREE_CODE (arg2) == INTEGER_CST)
10606 bool overflow = false;
10607 double_int result = TREE_INT_CST (arg2);
10611 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10613 double_int e0 = TREE_INT_CST (VECTOR_CST_ELT (arg0, i));
10614 double_int e1 = TREE_INT_CST (VECTOR_CST_ELT (arg1, i));
10616 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10618 tmp = e1.neg_with_overflow (&neg1_ovf);
10619 tmp = e0.add_with_sign (tmp, false, &add1_ovf);
10620 if (tmp.is_negative ())
10621 tmp = tmp.neg_with_overflow (&neg2_ovf);
10623 result = result.add_with_sign (tmp, false, &add2_ovf);
10624 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10627 gcc_assert (!overflow);
10629 return build_int_cst_wide (rtype, result.low, result.high);
10639 /* ??? This duplicates information provided to the compiler by the
10640 ??? scheduler description. Some day, teach genautomata to output
10641 ??? the latencies and then CSE will just use that. */
10644 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10645 int *total, bool speed ATTRIBUTE_UNUSED)
10647 enum machine_mode mode = GET_MODE (x);
10648 bool float_mode_p = FLOAT_MODE_P (mode);
10653 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
10671 if (GET_MODE (x) == VOIDmode
10672 && ((CONST_DOUBLE_HIGH (x) == 0
10673 && CONST_DOUBLE_LOW (x) < 0x1000)
10674 || (CONST_DOUBLE_HIGH (x) == -1
10675 && CONST_DOUBLE_LOW (x) < 0
10676 && CONST_DOUBLE_LOW (x) >= -0x1000)))
10683 /* If outer-code was a sign or zero extension, a cost
10684 of COSTS_N_INSNS (1) was already added in. This is
10685 why we are subtracting it back out. */
10686 if (outer_code == ZERO_EXTEND)
10688 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
10690 else if (outer_code == SIGN_EXTEND)
10692 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
10694 else if (float_mode_p)
10696 *total = sparc_costs->float_load;
10700 *total = sparc_costs->int_load;
10708 *total = sparc_costs->float_plusminus;
10710 *total = COSTS_N_INSNS (1);
10717 gcc_assert (float_mode_p);
10718 *total = sparc_costs->float_mul;
10721 if (GET_CODE (sub) == NEG)
10722 sub = XEXP (sub, 0);
10723 *total += rtx_cost (sub, FMA, 0, speed);
10726 if (GET_CODE (sub) == NEG)
10727 sub = XEXP (sub, 0);
10728 *total += rtx_cost (sub, FMA, 2, speed);
10734 *total = sparc_costs->float_mul;
10735 else if (! TARGET_HARD_MUL)
10736 *total = COSTS_N_INSNS (25);
10742 if (sparc_costs->int_mul_bit_factor)
10746 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
10748 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
10749 for (nbits = 0; value != 0; value &= value - 1)
10752 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
10753 && GET_MODE (XEXP (x, 1)) == VOIDmode)
10755 rtx x1 = XEXP (x, 1);
10756 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
10757 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
10759 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
10761 for (; value2 != 0; value2 &= value2 - 1)
10769 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
10770 bit_cost = COSTS_N_INSNS (bit_cost);
10773 if (mode == DImode)
10774 *total = sparc_costs->int_mulX + bit_cost;
10776 *total = sparc_costs->int_mul + bit_cost;
10783 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
10792 if (mode == DFmode)
10793 *total = sparc_costs->float_div_df;
10795 *total = sparc_costs->float_div_sf;
10799 if (mode == DImode)
10800 *total = sparc_costs->int_divX;
10802 *total = sparc_costs->int_div;
10807 if (! float_mode_p)
10809 *total = COSTS_N_INSNS (1);
10816 case UNSIGNED_FLOAT:
10820 case FLOAT_TRUNCATE:
10821 *total = sparc_costs->float_move;
10825 if (mode == DFmode)
10826 *total = sparc_costs->float_sqrt_df;
10828 *total = sparc_costs->float_sqrt_sf;
10833 *total = sparc_costs->float_cmp;
10835 *total = COSTS_N_INSNS (1);
10840 *total = sparc_costs->float_cmove;
10842 *total = sparc_costs->int_cmove;
10846 /* Handle the NAND vector patterns. */
10847 if (sparc_vector_mode_supported_p (GET_MODE (x))
10848 && GET_CODE (XEXP (x, 0)) == NOT
10849 && GET_CODE (XEXP (x, 1)) == NOT)
10851 *total = COSTS_N_INSNS (1);
10862 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
10865 general_or_i64_p (reg_class_t rclass)
10867 return (rclass == GENERAL_REGS || rclass == I64_REGS);
10870 /* Implement TARGET_REGISTER_MOVE_COST. */
10873 sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
10874 reg_class_t from, reg_class_t to)
10876 bool need_memory = false;
10878 if (from == FPCC_REGS || to == FPCC_REGS)
10879 need_memory = true;
10880 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
10881 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
10885 int size = GET_MODE_SIZE (mode);
10886 if (size == 8 || size == 4)
10888 if (! TARGET_ARCH32 || size == 4)
10894 need_memory = true;
10899 if (sparc_cpu == PROCESSOR_ULTRASPARC
10900 || sparc_cpu == PROCESSOR_ULTRASPARC3
10901 || sparc_cpu == PROCESSOR_NIAGARA
10902 || sparc_cpu == PROCESSOR_NIAGARA2
10903 || sparc_cpu == PROCESSOR_NIAGARA3
10904 || sparc_cpu == PROCESSOR_NIAGARA4)
10913 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
10914 This is achieved by means of a manual dynamic stack space allocation in
10915 the current frame. We make the assumption that SEQ doesn't contain any
10916 function calls, with the possible exception of calls to the GOT helper. */
10919 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
10921 /* We must preserve the lowest 16 words for the register save area. */
10922 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
10923 /* We really need only 2 words of fresh stack space. */
10924 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
10927 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
10928 SPARC_STACK_BIAS + offset));
10930 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
10931 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
10933 emit_insn (gen_rtx_SET (VOIDmode,
10934 adjust_address (slot, word_mode, UNITS_PER_WORD),
10938 emit_insn (gen_rtx_SET (VOIDmode,
10940 adjust_address (slot, word_mode, UNITS_PER_WORD)));
10941 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
10942 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
10945 /* Output the assembler code for a thunk function. THUNK_DECL is the
10946 declaration for the thunk function itself, FUNCTION is the decl for
10947 the target function. DELTA is an immediate constant offset to be
10948 added to THIS. If VCALL_OFFSET is nonzero, the word at address
10949 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
10952 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10953 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10956 rtx this_rtx, insn, funexp;
10957 unsigned int int_arg_first;
10959 reload_completed = 1;
10960 epilogue_completed = 1;
10962 emit_note (NOTE_INSN_PROLOGUE_END);
10966 sparc_leaf_function_p = 1;
10968 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
10970 else if (flag_delayed_branch)
10972 /* We will emit a regular sibcall below, so we need to instruct
10973 output_sibcall that we are in a leaf function. */
10974 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
10976 /* This will cause final.c to invoke leaf_renumber_regs so we
10977 must behave as if we were in a not-yet-leafified function. */
10978 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
10982 /* We will emit the sibcall manually below, so we will need to
10983 manually spill non-leaf registers. */
10984 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
10986 /* We really are in a leaf function. */
10987 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
10990 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
10991 returns a structure, the structure return pointer is there instead. */
10993 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10994 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
10996 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
10998 /* Add DELTA. When possible use a plain add, otherwise load it into
10999 a register first. */
11002 rtx delta_rtx = GEN_INT (delta);
11004 if (! SPARC_SIMM13_P (delta))
11006 rtx scratch = gen_rtx_REG (Pmode, 1);
11007 emit_move_insn (scratch, delta_rtx);
11008 delta_rtx = scratch;
11011 /* THIS_RTX += DELTA. */
11012 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11015 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11018 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11019 rtx scratch = gen_rtx_REG (Pmode, 1);
11021 gcc_assert (vcall_offset < 0);
11023 /* SCRATCH = *THIS_RTX. */
11024 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11026 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11027 may not have any available scratch register at this point. */
11028 if (SPARC_SIMM13_P (vcall_offset))
11030 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11031 else if (! fixed_regs[5]
11032 /* The below sequence is made up of at least 2 insns,
11033 while the default method may need only one. */
11034 && vcall_offset < -8192)
11036 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11037 emit_move_insn (scratch2, vcall_offset_rtx);
11038 vcall_offset_rtx = scratch2;
11042 rtx increment = GEN_INT (-4096);
11044 /* VCALL_OFFSET is a negative number whose typical range can be
11045 estimated as -32768..0 in 32-bit mode. In almost all cases
11046 it is therefore cheaper to emit multiple add insns than
11047 spilling and loading the constant into a register (at least
11049 while (! SPARC_SIMM13_P (vcall_offset))
11051 emit_insn (gen_add2_insn (scratch, increment));
11052 vcall_offset += 4096;
11054 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11057 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11058 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11059 gen_rtx_PLUS (Pmode,
11061 vcall_offset_rtx)));
11063 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11064 emit_insn (gen_add2_insn (this_rtx, scratch));
11067 /* Generate a tail call to the target function. */
11068 if (! TREE_USED (function))
11070 assemble_external (function);
11071 TREE_USED (function) = 1;
11073 funexp = XEXP (DECL_RTL (function), 0);
11075 if (flag_delayed_branch)
11077 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11078 insn = emit_call_insn (gen_sibcall (funexp));
11079 SIBLING_CALL_P (insn) = 1;
11083 /* The hoops we have to jump through in order to generate a sibcall
11084 without using delay slots... */
11085 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11089 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11091 load_got_register (); /* clobbers %o7 */
11092 scratch = sparc_legitimize_pic_address (funexp, scratch);
11093 seq = get_insns ();
11095 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11097 else if (TARGET_ARCH32)
11099 emit_insn (gen_rtx_SET (VOIDmode,
11101 gen_rtx_HIGH (SImode, funexp)));
11102 emit_insn (gen_rtx_SET (VOIDmode,
11104 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11106 else /* TARGET_ARCH64 */
11108 switch (sparc_cmodel)
11112 /* The destination can serve as a temporary. */
11113 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11118 /* The destination cannot serve as a temporary. */
11119 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11121 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11122 seq = get_insns ();
11124 emit_and_preserve (seq, spill_reg, 0);
11128 gcc_unreachable ();
11132 emit_jump_insn (gen_indirect_jump (scratch));
11137 /* Run just enough of rest_of_compilation to get the insns emitted.
11138 There's not really enough bulk here to make other passes such as
11139 instruction scheduling worth while. Note that use_thunk calls
11140 assemble_start_function and assemble_end_function. */
11141 insn = get_insns ();
11142 shorten_branches (insn);
11143 final_start_function (insn, file, 1);
11144 final (insn, file, 1);
11145 final_end_function ();
11147 reload_completed = 0;
11148 epilogue_completed = 0;
11151 /* Return true if sparc_output_mi_thunk would be able to output the
11152 assembler code for the thunk function specified by the arguments
11153 it is passed, and false otherwise. */
11155 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11156 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11157 HOST_WIDE_INT vcall_offset,
11158 const_tree function ATTRIBUTE_UNUSED)
11160 /* Bound the loop used in the default method above. */
11161 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11164 /* How to allocate a 'struct machine_function'. */
11166 static struct machine_function *
11167 sparc_init_machine_status (void)
11169 return ggc_alloc_cleared_machine_function ();
11172 /* Locate some local-dynamic symbol still in use by this function
11173 so that we can print its name in local-dynamic base patterns. */
11175 static const char *
11176 get_some_local_dynamic_name (void)
11180 if (cfun->machine->some_ld_name)
11181 return cfun->machine->some_ld_name;
11183 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11185 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11186 return cfun->machine->some_ld_name;
11188 gcc_unreachable ();
11192 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11197 && GET_CODE (x) == SYMBOL_REF
11198 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11200 cfun->machine->some_ld_name = XSTR (x, 0);
11207 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11208 We need to emit DTP-relative relocations. */
11211 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11216 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11219 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11222 gcc_unreachable ();
11224 output_addr_const (file, x);
11228 /* Do whatever processing is required at the end of a file. */
11231 sparc_file_end (void)
11233 /* If we need to emit the special GOT helper function, do so now. */
11234 if (got_helper_rtx)
11236 const char *name = XSTR (got_helper_rtx, 0);
11237 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11238 #ifdef DWARF2_UNWIND_INFO
11242 if (USE_HIDDEN_LINKONCE)
11244 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11245 get_identifier (name),
11246 build_function_type_list (void_type_node,
11248 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11249 NULL_TREE, void_type_node);
11250 TREE_PUBLIC (decl) = 1;
11251 TREE_STATIC (decl) = 1;
11252 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11253 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11254 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11255 resolve_unique_section (decl, 0, flag_function_sections);
11256 allocate_struct_function (decl, true);
11257 cfun->is_thunk = 1;
11258 current_function_decl = decl;
11259 init_varasm_status ();
11260 assemble_start_function (decl, name);
11264 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11265 switch_to_section (text_section);
11267 ASM_OUTPUT_ALIGN (asm_out_file, align);
11268 ASM_OUTPUT_LABEL (asm_out_file, name);
11271 #ifdef DWARF2_UNWIND_INFO
11272 do_cfi = dwarf2out_do_cfi_asm ();
11274 fprintf (asm_out_file, "\t.cfi_startproc\n");
11276 if (flag_delayed_branch)
11277 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11278 reg_name, reg_name);
11280 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11281 reg_name, reg_name);
11282 #ifdef DWARF2_UNWIND_INFO
11284 fprintf (asm_out_file, "\t.cfi_endproc\n");
11288 if (NEED_INDICATE_EXEC_STACK)
11289 file_end_indicate_exec_stack ();
11291 #ifdef TARGET_SOLARIS
11292 solaris_file_end ();
11296 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11297 /* Implement TARGET_MANGLE_TYPE. */
11299 static const char *
11300 sparc_mangle_type (const_tree type)
11303 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11304 && TARGET_LONG_DOUBLE_128)
11307 /* For all other types, use normal C++ mangling. */
11312 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11313 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11314 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11317 sparc_emit_membar_for_model (enum memmodel model,
11318 int load_store, int before_after)
11320 /* Bits for the MEMBAR mmask field. */
11321 const int LoadLoad = 1;
11322 const int StoreLoad = 2;
11323 const int LoadStore = 4;
11324 const int StoreStore = 8;
11326 int mm = 0, implied = 0;
11328 switch (sparc_memory_model)
11331 /* Sequential Consistency. All memory transactions are immediately
11332 visible in sequential execution order. No barriers needed. */
11333 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11337 /* Total Store Ordering: all memory transactions with store semantics
11338 are followed by an implied StoreStore. */
11339 implied |= StoreStore;
11341 /* If we're not looking for a raw barrer (before+after), then atomic
11342 operations get the benefit of being both load and store. */
11343 if (load_store == 3 && before_after == 1)
11344 implied |= StoreLoad;
11348 /* Partial Store Ordering: all memory transactions with load semantics
11349 are followed by an implied LoadLoad | LoadStore. */
11350 implied |= LoadLoad | LoadStore;
11352 /* If we're not looking for a raw barrer (before+after), then atomic
11353 operations get the benefit of being both load and store. */
11354 if (load_store == 3 && before_after == 2)
11355 implied |= StoreLoad | StoreStore;
11359 /* Relaxed Memory Ordering: no implicit bits. */
11363 gcc_unreachable ();
11366 if (before_after & 1)
11368 if (model == MEMMODEL_RELEASE
11369 || model == MEMMODEL_ACQ_REL
11370 || model == MEMMODEL_SEQ_CST)
11372 if (load_store & 1)
11373 mm |= LoadLoad | StoreLoad;
11374 if (load_store & 2)
11375 mm |= LoadStore | StoreStore;
11378 if (before_after & 2)
11380 if (model == MEMMODEL_ACQUIRE
11381 || model == MEMMODEL_ACQ_REL
11382 || model == MEMMODEL_SEQ_CST)
11384 if (load_store & 1)
11385 mm |= LoadLoad | LoadStore;
11386 if (load_store & 2)
11387 mm |= StoreLoad | StoreStore;
11391 /* Remove the bits implied by the system memory model. */
11394 /* For raw barriers (before+after), always emit a barrier.
11395 This will become a compile-time barrier if needed. */
11396 if (mm || before_after == 3)
11397 emit_insn (gen_membar (GEN_INT (mm)));
11400 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11401 compare and swap on the word containing the byte or half-word. */
11404 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11405 rtx oldval, rtx newval)
11407 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11408 rtx addr = gen_reg_rtx (Pmode);
11409 rtx off = gen_reg_rtx (SImode);
11410 rtx oldv = gen_reg_rtx (SImode);
11411 rtx newv = gen_reg_rtx (SImode);
11412 rtx oldvalue = gen_reg_rtx (SImode);
11413 rtx newvalue = gen_reg_rtx (SImode);
11414 rtx res = gen_reg_rtx (SImode);
11415 rtx resv = gen_reg_rtx (SImode);
11416 rtx memsi, val, mask, end_label, loop_label, cc;
11418 emit_insn (gen_rtx_SET (VOIDmode, addr,
11419 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11421 if (Pmode != SImode)
11422 addr1 = gen_lowpart (SImode, addr1);
11423 emit_insn (gen_rtx_SET (VOIDmode, off,
11424 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11426 memsi = gen_rtx_MEM (SImode, addr);
11427 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11428 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11430 val = copy_to_reg (memsi);
11432 emit_insn (gen_rtx_SET (VOIDmode, off,
11433 gen_rtx_XOR (SImode, off,
11434 GEN_INT (GET_MODE (mem) == QImode
11437 emit_insn (gen_rtx_SET (VOIDmode, off,
11438 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11440 if (GET_MODE (mem) == QImode)
11441 mask = force_reg (SImode, GEN_INT (0xff));
11443 mask = force_reg (SImode, GEN_INT (0xffff));
11445 emit_insn (gen_rtx_SET (VOIDmode, mask,
11446 gen_rtx_ASHIFT (SImode, mask, off)));
11448 emit_insn (gen_rtx_SET (VOIDmode, val,
11449 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11452 oldval = gen_lowpart (SImode, oldval);
11453 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11454 gen_rtx_ASHIFT (SImode, oldval, off)));
11456 newval = gen_lowpart_common (SImode, newval);
11457 emit_insn (gen_rtx_SET (VOIDmode, newv,
11458 gen_rtx_ASHIFT (SImode, newval, off)));
11460 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11461 gen_rtx_AND (SImode, oldv, mask)));
11463 emit_insn (gen_rtx_SET (VOIDmode, newv,
11464 gen_rtx_AND (SImode, newv, mask)));
11466 end_label = gen_label_rtx ();
11467 loop_label = gen_label_rtx ();
11468 emit_label (loop_label);
11470 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
11471 gen_rtx_IOR (SImode, oldv, val)));
11473 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
11474 gen_rtx_IOR (SImode, newv, val)));
11476 emit_move_insn (bool_result, const1_rtx);
11478 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11480 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11482 emit_insn (gen_rtx_SET (VOIDmode, resv,
11483 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11486 emit_move_insn (bool_result, const0_rtx);
11488 cc = gen_compare_reg_1 (NE, resv, val);
11489 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
11491 /* Use cbranchcc4 to separate the compare and branch! */
11492 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11493 cc, const0_rtx, loop_label));
11495 emit_label (end_label);
11497 emit_insn (gen_rtx_SET (VOIDmode, res,
11498 gen_rtx_AND (SImode, res, mask)));
11500 emit_insn (gen_rtx_SET (VOIDmode, res,
11501 gen_rtx_LSHIFTRT (SImode, res, off)));
11503 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11506 /* Expand code to perform a compare-and-swap. */
11509 sparc_expand_compare_and_swap (rtx operands[])
11511 rtx bval, retval, mem, oldval, newval;
11512 enum machine_mode mode;
11513 enum memmodel model;
11515 bval = operands[0];
11516 retval = operands[1];
11518 oldval = operands[3];
11519 newval = operands[4];
11520 model = (enum memmodel) INTVAL (operands[6]);
11521 mode = GET_MODE (mem);
11523 sparc_emit_membar_for_model (model, 3, 1);
11525 if (reg_overlap_mentioned_p (retval, oldval))
11526 oldval = copy_to_reg (oldval);
11528 if (mode == QImode || mode == HImode)
11529 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11532 rtx (*gen) (rtx, rtx, rtx, rtx);
11535 if (mode == SImode)
11536 gen = gen_atomic_compare_and_swapsi_1;
11538 gen = gen_atomic_compare_and_swapdi_1;
11539 emit_insn (gen (retval, mem, oldval, newval));
11541 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11543 convert_move (bval, x, 1);
11546 sparc_emit_membar_for_model (model, 3, 2);
11550 sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
11554 sel = gen_lowpart (DImode, sel);
11558 /* inp = xxxxxxxAxxxxxxxB */
11559 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11560 NULL_RTX, 1, OPTAB_DIRECT);
11561 /* t_1 = ....xxxxxxxAxxx. */
11562 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11563 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11564 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11565 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11566 /* sel = .......B */
11567 /* t_1 = ...A.... */
11568 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11569 /* sel = ...A...B */
11570 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11571 /* sel = AAAABBBB * 4 */
11572 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11573 /* sel = { A*4, A*4+1, A*4+2, ... } */
11577 /* inp = xxxAxxxBxxxCxxxD */
11578 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11579 NULL_RTX, 1, OPTAB_DIRECT);
11580 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11581 NULL_RTX, 1, OPTAB_DIRECT);
11582 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11583 NULL_RTX, 1, OPTAB_DIRECT);
11584 /* t_1 = ..xxxAxxxBxxxCxx */
11585 /* t_2 = ....xxxAxxxBxxxC */
11586 /* t_3 = ......xxxAxxxBxx */
11587 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11589 NULL_RTX, 1, OPTAB_DIRECT);
11590 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11592 NULL_RTX, 1, OPTAB_DIRECT);
11593 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11594 GEN_INT (0x070000),
11595 NULL_RTX, 1, OPTAB_DIRECT);
11596 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11597 GEN_INT (0x07000000),
11598 NULL_RTX, 1, OPTAB_DIRECT);
11599 /* sel = .......D */
11600 /* t_1 = .....C.. */
11601 /* t_2 = ...B.... */
11602 /* t_3 = .A...... */
11603 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11604 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11605 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11606 /* sel = .A.B.C.D */
11607 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11608 /* sel = AABBCCDD * 2 */
11609 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11610 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11614 /* input = xAxBxCxDxExFxGxH */
11615 sel = expand_simple_binop (DImode, AND, sel,
11616 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11618 NULL_RTX, 1, OPTAB_DIRECT);
11619 /* sel = .A.B.C.D.E.F.G.H */
11620 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11621 NULL_RTX, 1, OPTAB_DIRECT);
11622 /* t_1 = ..A.B.C.D.E.F.G. */
11623 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11624 NULL_RTX, 1, OPTAB_DIRECT);
11625 /* sel = .AABBCCDDEEFFGGH */
11626 sel = expand_simple_binop (DImode, AND, sel,
11627 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11629 NULL_RTX, 1, OPTAB_DIRECT);
11630 /* sel = ..AB..CD..EF..GH */
11631 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11632 NULL_RTX, 1, OPTAB_DIRECT);
11633 /* t_1 = ....AB..CD..EF.. */
11634 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11635 NULL_RTX, 1, OPTAB_DIRECT);
11636 /* sel = ..ABABCDCDEFEFGH */
11637 sel = expand_simple_binop (DImode, AND, sel,
11638 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11639 NULL_RTX, 1, OPTAB_DIRECT);
11640 /* sel = ....ABCD....EFGH */
11641 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11642 NULL_RTX, 1, OPTAB_DIRECT);
11643 /* t_1 = ........ABCD.... */
11644 sel = gen_lowpart (SImode, sel);
11645 t_1 = gen_lowpart (SImode, t_1);
11649 gcc_unreachable ();
11652 /* Always perform the final addition/merge within the bmask insn. */
11653 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
11656 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11659 sparc_frame_pointer_required (void)
11661 /* If the stack pointer is dynamically modified in the function, it cannot
11662 serve as the frame pointer. */
11663 if (cfun->calls_alloca)
11666 /* If the function receives nonlocal gotos, it needs to save the frame
11667 pointer in the nonlocal_goto_save_area object. */
11668 if (cfun->has_nonlocal_label)
11671 /* In flat mode, that's it. */
11675 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11676 return !(crtl->is_leaf && only_leaf_regs_used ());
11679 /* The way this is structured, we can't eliminate SFP in favor of SP
11680 if the frame pointer is required: we want to use the SFP->HFP elimination
11681 in that case. But the test in update_eliminables doesn't know we are
11682 assuming below that we only do the former elimination. */
11685 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
11687 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
11690 /* Return the hard frame pointer directly to bypass the stack bias. */
11693 sparc_builtin_setjmp_frame_value (void)
11695 return hard_frame_pointer_rtx;
11698 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
11699 they won't be allocated. */
11702 sparc_conditional_register_usage (void)
11704 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
11706 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11707 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11709 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
11710 /* then honor it. */
11711 if (TARGET_ARCH32 && fixed_regs[5])
11713 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
11718 for (regno = SPARC_FIRST_V9_FP_REG;
11719 regno <= SPARC_LAST_V9_FP_REG;
11721 fixed_regs[regno] = 1;
11722 /* %fcc0 is used by v8 and v9. */
11723 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
11724 regno <= SPARC_LAST_V9_FCC_REG;
11726 fixed_regs[regno] = 1;
11731 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
11732 fixed_regs[regno] = 1;
11734 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
11735 /* then honor it. Likewise with g3 and g4. */
11736 if (fixed_regs[2] == 2)
11737 fixed_regs[2] = ! TARGET_APP_REGS;
11738 if (fixed_regs[3] == 2)
11739 fixed_regs[3] = ! TARGET_APP_REGS;
11740 if (TARGET_ARCH32 && fixed_regs[4] == 2)
11741 fixed_regs[4] = ! TARGET_APP_REGS;
11742 else if (TARGET_CM_EMBMEDANY)
11744 else if (fixed_regs[4] == 2)
11749 /* Disable leaf functions. */
11750 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
11751 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11752 leaf_reg_remap [regno] = regno;
11755 global_regs[SPARC_GSR_REG] = 1;
11758 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
11760 - We can't load constants into FP registers.
11761 - We can't load FP constants into integer registers when soft-float,
11762 because there is no soft-float pattern with a r/F constraint.
11763 - We can't load FP constants into integer registers for TFmode unless
11764 it is 0.0L, because there is no movtf pattern with a r/F constraint.
11765 - Try and reload integer constants (symbolic or otherwise) back into
11766 registers directly, rather than having them dumped to memory. */
11769 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
11771 enum machine_mode mode = GET_MODE (x);
11772 if (CONSTANT_P (x))
11774 if (FP_REG_CLASS_P (rclass)
11775 || rclass == GENERAL_OR_FP_REGS
11776 || rclass == GENERAL_OR_EXTRA_FP_REGS
11777 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
11778 || (mode == TFmode && ! const_zero_operand (x, mode)))
11781 if (GET_MODE_CLASS (mode) == MODE_INT)
11782 return GENERAL_REGS;
11784 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11786 if (! FP_REG_CLASS_P (rclass)
11787 || !(const_zero_operand (x, mode)
11788 || const_all_ones_operand (x, mode)))
11795 && (rclass == EXTRA_FP_REGS
11796 || rclass == GENERAL_OR_EXTRA_FP_REGS))
11798 int regno = true_regnum (x);
11800 if (SPARC_INT_REG_P (regno))
11801 return (rclass == EXTRA_FP_REGS
11802 ? FP_REGS : GENERAL_OR_FP_REGS);
11808 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
11809 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
11812 output_v8plus_mult (rtx insn, rtx *operands, const char *opcode)
11816 gcc_assert (! TARGET_ARCH64);
11818 if (sparc_check_64 (operands[1], insn) <= 0)
11819 output_asm_insn ("srl\t%L1, 0, %L1", operands);
11820 if (which_alternative == 1)
11821 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
11822 if (GET_CODE (operands[2]) == CONST_INT)
11824 if (which_alternative == 1)
11826 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11827 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
11828 output_asm_insn (mulstr, operands);
11829 return "srlx\t%L0, 32, %H0";
11833 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11834 output_asm_insn ("or\t%L1, %3, %3", operands);
11835 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
11836 output_asm_insn (mulstr, operands);
11837 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11838 return "mov\t%3, %L0";
11841 else if (rtx_equal_p (operands[1], operands[2]))
11843 if (which_alternative == 1)
11845 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11846 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
11847 output_asm_insn (mulstr, operands);
11848 return "srlx\t%L0, 32, %H0";
11852 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11853 output_asm_insn ("or\t%L1, %3, %3", operands);
11854 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
11855 output_asm_insn (mulstr, operands);
11856 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11857 return "mov\t%3, %L0";
11860 if (sparc_check_64 (operands[2], insn) <= 0)
11861 output_asm_insn ("srl\t%L2, 0, %L2", operands);
11862 if (which_alternative == 1)
11864 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11865 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
11866 output_asm_insn ("or\t%L2, %L1, %L1", operands);
11867 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
11868 output_asm_insn (mulstr, operands);
11869 return "srlx\t%L0, 32, %H0";
11873 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11874 output_asm_insn ("sllx\t%H2, 32, %4", operands);
11875 output_asm_insn ("or\t%L1, %3, %3", operands);
11876 output_asm_insn ("or\t%L2, %4, %4", operands);
11877 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
11878 output_asm_insn (mulstr, operands);
11879 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11880 return "mov\t%3, %L0";
11884 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11885 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
11886 and INNER_MODE are the modes describing TARGET. */
11889 vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
11890 enum machine_mode inner_mode)
11892 rtx t1, final_insn, sel;
11895 t1 = gen_reg_rtx (mode);
11897 elt = convert_modes (SImode, inner_mode, elt, true);
11898 emit_move_insn (gen_lowpart(SImode, t1), elt);
11903 final_insn = gen_bshufflev2si_vis (target, t1, t1);
11904 bmask = 0x45674567;
11907 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
11908 bmask = 0x67676767;
11911 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
11912 bmask = 0x77777777;
11915 gcc_unreachable ();
11918 sel = force_reg (SImode, GEN_INT (bmask));
11919 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
11920 emit_insn (final_insn);
11923 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11924 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
11927 vector_init_fpmerge (rtx target, rtx elt)
11929 rtx t1, t2, t2_low, t3, t3_low;
11931 t1 = gen_reg_rtx (V4QImode);
11932 elt = convert_modes (SImode, QImode, elt, true);
11933 emit_move_insn (gen_lowpart (SImode, t1), elt);
11935 t2 = gen_reg_rtx (V8QImode);
11936 t2_low = gen_lowpart (V4QImode, t2);
11937 emit_insn (gen_fpmerge_vis (t2, t1, t1));
11939 t3 = gen_reg_rtx (V8QImode);
11940 t3_low = gen_lowpart (V4QImode, t3);
11941 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
11943 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
11946 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11947 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
11950 vector_init_faligndata (rtx target, rtx elt)
11952 rtx t1 = gen_reg_rtx (V4HImode);
11955 elt = convert_modes (SImode, HImode, elt, true);
11956 emit_move_insn (gen_lowpart (SImode, t1), elt);
11958 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
11959 force_reg (SImode, GEN_INT (6)),
11962 for (i = 0; i < 4; i++)
11963 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
11966 /* Emit code to initialize TARGET to values for individual fields VALS. */
11969 sparc_expand_vector_init (rtx target, rtx vals)
11971 const enum machine_mode mode = GET_MODE (target);
11972 const enum machine_mode inner_mode = GET_MODE_INNER (mode);
11973 const int n_elts = GET_MODE_NUNITS (mode);
11979 for (i = 0; i < n_elts; i++)
11981 rtx x = XVECEXP (vals, 0, i);
11982 if (!CONSTANT_P (x))
11985 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
11991 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
11995 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
11997 if (GET_MODE_SIZE (inner_mode) == 4)
11999 emit_move_insn (gen_lowpart (SImode, target),
12000 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12003 else if (GET_MODE_SIZE (inner_mode) == 8)
12005 emit_move_insn (gen_lowpart (DImode, target),
12006 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12010 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12011 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12013 emit_move_insn (gen_highpart (word_mode, target),
12014 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12015 emit_move_insn (gen_lowpart (word_mode, target),
12016 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12020 if (all_same && GET_MODE_SIZE (mode) == 8)
12024 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12027 if (mode == V8QImode)
12029 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12032 if (mode == V4HImode)
12034 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12039 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12040 for (i = 0; i < n_elts; i++)
12041 emit_move_insn (adjust_address_nv (mem, inner_mode,
12042 i * GET_MODE_SIZE (inner_mode)),
12043 XVECEXP (vals, 0, i));
12044 emit_move_insn (target, mem);
12047 /* Implement TARGET_SECONDARY_RELOAD. */
12050 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12051 enum machine_mode mode, secondary_reload_info *sri)
12053 enum reg_class rclass = (enum reg_class) rclass_i;
12055 sri->icode = CODE_FOR_nothing;
12056 sri->extra_cost = 0;
12058 /* We need a temporary when loading/storing a HImode/QImode value
12059 between memory and the FPU registers. This can happen when combine puts
12060 a paradoxical subreg in a float/fix conversion insn. */
12061 if (FP_REG_CLASS_P (rclass)
12062 && (mode == HImode || mode == QImode)
12063 && (GET_CODE (x) == MEM
12064 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12065 && true_regnum (x) == -1)))
12066 return GENERAL_REGS;
12068 /* On 32-bit we need a temporary when loading/storing a DFmode value
12069 between unaligned memory and the upper FPU registers. */
12071 && rclass == EXTRA_FP_REGS
12073 && GET_CODE (x) == MEM
12074 && ! mem_min_alignment (x, 8))
12077 if (((TARGET_CM_MEDANY
12078 && symbolic_operand (x, mode))
12079 || (TARGET_CM_EMBMEDANY
12080 && text_segment_operand (x, mode)))
12084 sri->icode = direct_optab_handler (reload_in_optab, mode);
12086 sri->icode = direct_optab_handler (reload_out_optab, mode);
12090 if (TARGET_VIS3 && TARGET_ARCH32)
12092 int regno = true_regnum (x);
12094 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12095 to move 8-byte values in 4-byte pieces. This only works via
12096 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12097 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12098 an FP_REGS intermediate move. */
12099 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12100 || ((general_or_i64_p (rclass)
12101 || rclass == GENERAL_OR_FP_REGS)
12102 && SPARC_FP_REG_P (regno)))
12104 sri->extra_cost = 2;
12112 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12113 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12116 sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
12118 enum rtx_code rc = GET_CODE (operands[1]);
12119 enum machine_mode cmp_mode;
12120 rtx cc_reg, dst, cmp;
12123 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12126 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12127 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12129 cmp_mode = GET_MODE (XEXP (cmp, 0));
12130 rc = GET_CODE (cmp);
12133 if (! rtx_equal_p (operands[2], dst)
12134 && ! rtx_equal_p (operands[3], dst))
12136 if (reg_overlap_mentioned_p (dst, cmp))
12137 dst = gen_reg_rtx (mode);
12139 emit_move_insn (dst, operands[3]);
12141 else if (operands[2] == dst)
12143 operands[2] = operands[3];
12145 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12146 rc = reverse_condition_maybe_unordered (rc);
12148 rc = reverse_condition (rc);
12151 if (XEXP (cmp, 1) == const0_rtx
12152 && GET_CODE (XEXP (cmp, 0)) == REG
12153 && cmp_mode == DImode
12154 && v9_regcmp_p (rc))
12155 cc_reg = XEXP (cmp, 0);
12157 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12159 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12161 emit_insn (gen_rtx_SET (VOIDmode, dst,
12162 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12164 if (dst != operands[0])
12165 emit_move_insn (operands[0], dst);
12170 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12171 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12172 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12173 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12174 code to be used for the condition mask. */
12177 sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
12179 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12180 enum rtx_code code = GET_CODE (operands[3]);
12182 mask = gen_reg_rtx (Pmode);
12183 cop0 = operands[4];
12184 cop1 = operands[5];
12185 if (code == LT || code == GE)
12189 code = swap_condition (code);
12190 t = cop0; cop0 = cop1; cop1 = t;
12193 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12195 fcmp = gen_rtx_UNSPEC (Pmode,
12196 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12199 cmask = gen_rtx_UNSPEC (DImode,
12200 gen_rtvec (2, mask, gsr),
12203 bshuf = gen_rtx_UNSPEC (mode,
12204 gen_rtvec (3, operands[1], operands[2], gsr),
12207 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
12208 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
12210 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
12213 /* On sparc, any mode which naturally allocates into the float
12214 registers should return 4 here. */
12217 sparc_regmode_natural_size (enum machine_mode mode)
12219 int size = UNITS_PER_WORD;
12223 enum mode_class mclass = GET_MODE_CLASS (mode);
12225 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12232 /* Return TRUE if it is a good idea to tie two pseudo registers
12233 when one has mode MODE1 and one has mode MODE2.
12234 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12235 for any hard reg, then this must be FALSE for correct output.
12237 For V9 we have to deal with the fact that only the lower 32 floating
12238 point registers are 32-bit addressable. */
12241 sparc_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
12243 enum mode_class mclass1, mclass2;
12244 unsigned short size1, size2;
12246 if (mode1 == mode2)
12249 mclass1 = GET_MODE_CLASS (mode1);
12250 mclass2 = GET_MODE_CLASS (mode2);
12251 if (mclass1 != mclass2)
12257 /* Classes are the same and we are V9 so we have to deal with upper
12258 vs. lower floating point registers. If one of the modes is a
12259 4-byte mode, and the other is not, we have to mark them as not
12260 tieable because only the lower 32 floating point register are
12261 addressable 32-bits at a time.
12263 We can't just test explicitly for SFmode, otherwise we won't
12264 cover the vector mode cases properly. */
12266 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12269 size1 = GET_MODE_SIZE (mode1);
12270 size2 = GET_MODE_SIZE (mode2);
12271 if ((size1 > 4 && size2 == 4)
12272 || (size2 > 4 && size1 == 4))
12278 static enum machine_mode sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12280 return (TARGET_ARCH64 ? DImode : SImode);
12283 #include "gt-sparc.h"