1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2013 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "insn-codes.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
50 #include "langhooks.h"
55 #include "tree-pass.h"
59 struct processor_costs {
63 /* Integer signed load */
66 /* Integer zeroed load */
72 /* fmov, fneg, fabs */
76 const int float_plusminus;
82 const int float_cmove;
88 const int float_div_sf;
91 const int float_div_df;
94 const int float_sqrt_sf;
97 const int float_sqrt_df;
105 /* integer multiply cost for each bit set past the most
106 significant 3, so the formula for multiply cost becomes:
109 highest_bit = highest_clear_bit(rs1);
111 highest_bit = highest_set_bit(rs1);
114 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
116 A value of zero indicates that the multiply costs is fixed,
118 const int int_mul_bit_factor;
129 /* penalty for shifts, due to scheduling rules etc. */
130 const int shift_penalty;
134 struct processor_costs cypress_costs = {
135 COSTS_N_INSNS (2), /* int load */
136 COSTS_N_INSNS (2), /* int signed load */
137 COSTS_N_INSNS (2), /* int zeroed load */
138 COSTS_N_INSNS (2), /* float load */
139 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
140 COSTS_N_INSNS (5), /* fadd, fsub */
141 COSTS_N_INSNS (1), /* fcmp */
142 COSTS_N_INSNS (1), /* fmov, fmovr */
143 COSTS_N_INSNS (7), /* fmul */
144 COSTS_N_INSNS (37), /* fdivs */
145 COSTS_N_INSNS (37), /* fdivd */
146 COSTS_N_INSNS (63), /* fsqrts */
147 COSTS_N_INSNS (63), /* fsqrtd */
148 COSTS_N_INSNS (1), /* imul */
149 COSTS_N_INSNS (1), /* imulX */
150 0, /* imul bit factor */
151 COSTS_N_INSNS (1), /* idiv */
152 COSTS_N_INSNS (1), /* idivX */
153 COSTS_N_INSNS (1), /* movcc/movr */
154 0, /* shift penalty */
158 struct processor_costs supersparc_costs = {
159 COSTS_N_INSNS (1), /* int load */
160 COSTS_N_INSNS (1), /* int signed load */
161 COSTS_N_INSNS (1), /* int zeroed load */
162 COSTS_N_INSNS (0), /* float load */
163 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
164 COSTS_N_INSNS (3), /* fadd, fsub */
165 COSTS_N_INSNS (3), /* fcmp */
166 COSTS_N_INSNS (1), /* fmov, fmovr */
167 COSTS_N_INSNS (3), /* fmul */
168 COSTS_N_INSNS (6), /* fdivs */
169 COSTS_N_INSNS (9), /* fdivd */
170 COSTS_N_INSNS (12), /* fsqrts */
171 COSTS_N_INSNS (12), /* fsqrtd */
172 COSTS_N_INSNS (4), /* imul */
173 COSTS_N_INSNS (4), /* imulX */
174 0, /* imul bit factor */
175 COSTS_N_INSNS (4), /* idiv */
176 COSTS_N_INSNS (4), /* idivX */
177 COSTS_N_INSNS (1), /* movcc/movr */
178 1, /* shift penalty */
182 struct processor_costs hypersparc_costs = {
183 COSTS_N_INSNS (1), /* int load */
184 COSTS_N_INSNS (1), /* int signed load */
185 COSTS_N_INSNS (1), /* int zeroed load */
186 COSTS_N_INSNS (1), /* float load */
187 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
188 COSTS_N_INSNS (1), /* fadd, fsub */
189 COSTS_N_INSNS (1), /* fcmp */
190 COSTS_N_INSNS (1), /* fmov, fmovr */
191 COSTS_N_INSNS (1), /* fmul */
192 COSTS_N_INSNS (8), /* fdivs */
193 COSTS_N_INSNS (12), /* fdivd */
194 COSTS_N_INSNS (17), /* fsqrts */
195 COSTS_N_INSNS (17), /* fsqrtd */
196 COSTS_N_INSNS (17), /* imul */
197 COSTS_N_INSNS (17), /* imulX */
198 0, /* imul bit factor */
199 COSTS_N_INSNS (17), /* idiv */
200 COSTS_N_INSNS (17), /* idivX */
201 COSTS_N_INSNS (1), /* movcc/movr */
202 0, /* shift penalty */
206 struct processor_costs leon_costs = {
207 COSTS_N_INSNS (1), /* int load */
208 COSTS_N_INSNS (1), /* int signed load */
209 COSTS_N_INSNS (1), /* int zeroed load */
210 COSTS_N_INSNS (1), /* float load */
211 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
212 COSTS_N_INSNS (1), /* fadd, fsub */
213 COSTS_N_INSNS (1), /* fcmp */
214 COSTS_N_INSNS (1), /* fmov, fmovr */
215 COSTS_N_INSNS (1), /* fmul */
216 COSTS_N_INSNS (15), /* fdivs */
217 COSTS_N_INSNS (15), /* fdivd */
218 COSTS_N_INSNS (23), /* fsqrts */
219 COSTS_N_INSNS (23), /* fsqrtd */
220 COSTS_N_INSNS (5), /* imul */
221 COSTS_N_INSNS (5), /* imulX */
222 0, /* imul bit factor */
223 COSTS_N_INSNS (5), /* idiv */
224 COSTS_N_INSNS (5), /* idivX */
225 COSTS_N_INSNS (1), /* movcc/movr */
226 0, /* shift penalty */
230 struct processor_costs leon3_costs = {
231 COSTS_N_INSNS (1), /* int load */
232 COSTS_N_INSNS (1), /* int signed load */
233 COSTS_N_INSNS (1), /* int zeroed load */
234 COSTS_N_INSNS (1), /* float load */
235 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
236 COSTS_N_INSNS (1), /* fadd, fsub */
237 COSTS_N_INSNS (1), /* fcmp */
238 COSTS_N_INSNS (1), /* fmov, fmovr */
239 COSTS_N_INSNS (1), /* fmul */
240 COSTS_N_INSNS (14), /* fdivs */
241 COSTS_N_INSNS (15), /* fdivd */
242 COSTS_N_INSNS (22), /* fsqrts */
243 COSTS_N_INSNS (23), /* fsqrtd */
244 COSTS_N_INSNS (5), /* imul */
245 COSTS_N_INSNS (5), /* imulX */
246 0, /* imul bit factor */
247 COSTS_N_INSNS (35), /* idiv */
248 COSTS_N_INSNS (35), /* idivX */
249 COSTS_N_INSNS (1), /* movcc/movr */
250 0, /* shift penalty */
254 struct processor_costs sparclet_costs = {
255 COSTS_N_INSNS (3), /* int load */
256 COSTS_N_INSNS (3), /* int signed load */
257 COSTS_N_INSNS (1), /* int zeroed load */
258 COSTS_N_INSNS (1), /* float load */
259 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
260 COSTS_N_INSNS (1), /* fadd, fsub */
261 COSTS_N_INSNS (1), /* fcmp */
262 COSTS_N_INSNS (1), /* fmov, fmovr */
263 COSTS_N_INSNS (1), /* fmul */
264 COSTS_N_INSNS (1), /* fdivs */
265 COSTS_N_INSNS (1), /* fdivd */
266 COSTS_N_INSNS (1), /* fsqrts */
267 COSTS_N_INSNS (1), /* fsqrtd */
268 COSTS_N_INSNS (5), /* imul */
269 COSTS_N_INSNS (5), /* imulX */
270 0, /* imul bit factor */
271 COSTS_N_INSNS (5), /* idiv */
272 COSTS_N_INSNS (5), /* idivX */
273 COSTS_N_INSNS (1), /* movcc/movr */
274 0, /* shift penalty */
278 struct processor_costs ultrasparc_costs = {
279 COSTS_N_INSNS (2), /* int load */
280 COSTS_N_INSNS (3), /* int signed load */
281 COSTS_N_INSNS (2), /* int zeroed load */
282 COSTS_N_INSNS (2), /* float load */
283 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
284 COSTS_N_INSNS (4), /* fadd, fsub */
285 COSTS_N_INSNS (1), /* fcmp */
286 COSTS_N_INSNS (2), /* fmov, fmovr */
287 COSTS_N_INSNS (4), /* fmul */
288 COSTS_N_INSNS (13), /* fdivs */
289 COSTS_N_INSNS (23), /* fdivd */
290 COSTS_N_INSNS (13), /* fsqrts */
291 COSTS_N_INSNS (23), /* fsqrtd */
292 COSTS_N_INSNS (4), /* imul */
293 COSTS_N_INSNS (4), /* imulX */
294 2, /* imul bit factor */
295 COSTS_N_INSNS (37), /* idiv */
296 COSTS_N_INSNS (68), /* idivX */
297 COSTS_N_INSNS (2), /* movcc/movr */
298 2, /* shift penalty */
302 struct processor_costs ultrasparc3_costs = {
303 COSTS_N_INSNS (2), /* int load */
304 COSTS_N_INSNS (3), /* int signed load */
305 COSTS_N_INSNS (3), /* int zeroed load */
306 COSTS_N_INSNS (2), /* float load */
307 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
308 COSTS_N_INSNS (4), /* fadd, fsub */
309 COSTS_N_INSNS (5), /* fcmp */
310 COSTS_N_INSNS (3), /* fmov, fmovr */
311 COSTS_N_INSNS (4), /* fmul */
312 COSTS_N_INSNS (17), /* fdivs */
313 COSTS_N_INSNS (20), /* fdivd */
314 COSTS_N_INSNS (20), /* fsqrts */
315 COSTS_N_INSNS (29), /* fsqrtd */
316 COSTS_N_INSNS (6), /* imul */
317 COSTS_N_INSNS (6), /* imulX */
318 0, /* imul bit factor */
319 COSTS_N_INSNS (40), /* idiv */
320 COSTS_N_INSNS (71), /* idivX */
321 COSTS_N_INSNS (2), /* movcc/movr */
322 0, /* shift penalty */
326 struct processor_costs niagara_costs = {
327 COSTS_N_INSNS (3), /* int load */
328 COSTS_N_INSNS (3), /* int signed load */
329 COSTS_N_INSNS (3), /* int zeroed load */
330 COSTS_N_INSNS (9), /* float load */
331 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
332 COSTS_N_INSNS (8), /* fadd, fsub */
333 COSTS_N_INSNS (26), /* fcmp */
334 COSTS_N_INSNS (8), /* fmov, fmovr */
335 COSTS_N_INSNS (29), /* fmul */
336 COSTS_N_INSNS (54), /* fdivs */
337 COSTS_N_INSNS (83), /* fdivd */
338 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
339 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
340 COSTS_N_INSNS (11), /* imul */
341 COSTS_N_INSNS (11), /* imulX */
342 0, /* imul bit factor */
343 COSTS_N_INSNS (72), /* idiv */
344 COSTS_N_INSNS (72), /* idivX */
345 COSTS_N_INSNS (1), /* movcc/movr */
346 0, /* shift penalty */
350 struct processor_costs niagara2_costs = {
351 COSTS_N_INSNS (3), /* int load */
352 COSTS_N_INSNS (3), /* int signed load */
353 COSTS_N_INSNS (3), /* int zeroed load */
354 COSTS_N_INSNS (3), /* float load */
355 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
356 COSTS_N_INSNS (6), /* fadd, fsub */
357 COSTS_N_INSNS (6), /* fcmp */
358 COSTS_N_INSNS (6), /* fmov, fmovr */
359 COSTS_N_INSNS (6), /* fmul */
360 COSTS_N_INSNS (19), /* fdivs */
361 COSTS_N_INSNS (33), /* fdivd */
362 COSTS_N_INSNS (19), /* fsqrts */
363 COSTS_N_INSNS (33), /* fsqrtd */
364 COSTS_N_INSNS (5), /* imul */
365 COSTS_N_INSNS (5), /* imulX */
366 0, /* imul bit factor */
367 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
368 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
369 COSTS_N_INSNS (1), /* movcc/movr */
370 0, /* shift penalty */
374 struct processor_costs niagara3_costs = {
375 COSTS_N_INSNS (3), /* int load */
376 COSTS_N_INSNS (3), /* int signed load */
377 COSTS_N_INSNS (3), /* int zeroed load */
378 COSTS_N_INSNS (3), /* float load */
379 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
380 COSTS_N_INSNS (9), /* fadd, fsub */
381 COSTS_N_INSNS (9), /* fcmp */
382 COSTS_N_INSNS (9), /* fmov, fmovr */
383 COSTS_N_INSNS (9), /* fmul */
384 COSTS_N_INSNS (23), /* fdivs */
385 COSTS_N_INSNS (37), /* fdivd */
386 COSTS_N_INSNS (23), /* fsqrts */
387 COSTS_N_INSNS (37), /* fsqrtd */
388 COSTS_N_INSNS (9), /* imul */
389 COSTS_N_INSNS (9), /* imulX */
390 0, /* imul bit factor */
391 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
392 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
393 COSTS_N_INSNS (1), /* movcc/movr */
394 0, /* shift penalty */
398 struct processor_costs niagara4_costs = {
399 COSTS_N_INSNS (5), /* int load */
400 COSTS_N_INSNS (5), /* int signed load */
401 COSTS_N_INSNS (5), /* int zeroed load */
402 COSTS_N_INSNS (5), /* float load */
403 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
404 COSTS_N_INSNS (11), /* fadd, fsub */
405 COSTS_N_INSNS (11), /* fcmp */
406 COSTS_N_INSNS (11), /* fmov, fmovr */
407 COSTS_N_INSNS (11), /* fmul */
408 COSTS_N_INSNS (24), /* fdivs */
409 COSTS_N_INSNS (37), /* fdivd */
410 COSTS_N_INSNS (24), /* fsqrts */
411 COSTS_N_INSNS (37), /* fsqrtd */
412 COSTS_N_INSNS (12), /* imul */
413 COSTS_N_INSNS (12), /* imulX */
414 0, /* imul bit factor */
415 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
416 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
417 COSTS_N_INSNS (1), /* movcc/movr */
418 0, /* shift penalty */
421 static const struct processor_costs *sparc_costs = &cypress_costs;
423 #ifdef HAVE_AS_RELAX_OPTION
424 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
425 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
426 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
427 somebody does not branch between the sethi and jmp. */
428 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
430 #define LEAF_SIBCALL_SLOT_RESERVED_P \
431 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
434 /* Vector to say how input registers are mapped to output registers.
435 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
436 eliminate it. You must use -fomit-frame-pointer to get that. */
437 char leaf_reg_remap[] =
438 { 0, 1, 2, 3, 4, 5, 6, 7,
439 -1, -1, -1, -1, -1, -1, 14, -1,
440 -1, -1, -1, -1, -1, -1, -1, -1,
441 8, 9, 10, 11, 12, 13, -1, 15,
443 32, 33, 34, 35, 36, 37, 38, 39,
444 40, 41, 42, 43, 44, 45, 46, 47,
445 48, 49, 50, 51, 52, 53, 54, 55,
446 56, 57, 58, 59, 60, 61, 62, 63,
447 64, 65, 66, 67, 68, 69, 70, 71,
448 72, 73, 74, 75, 76, 77, 78, 79,
449 80, 81, 82, 83, 84, 85, 86, 87,
450 88, 89, 90, 91, 92, 93, 94, 95,
451 96, 97, 98, 99, 100, 101, 102};
453 /* Vector, indexed by hard register number, which contains 1
454 for a register that is allowable in a candidate for leaf
455 function treatment. */
456 char sparc_leaf_regs[] =
457 { 1, 1, 1, 1, 1, 1, 1, 1,
458 0, 0, 0, 0, 0, 0, 1, 0,
459 0, 0, 0, 0, 0, 0, 0, 0,
460 1, 1, 1, 1, 1, 1, 0, 1,
461 1, 1, 1, 1, 1, 1, 1, 1,
462 1, 1, 1, 1, 1, 1, 1, 1,
463 1, 1, 1, 1, 1, 1, 1, 1,
464 1, 1, 1, 1, 1, 1, 1, 1,
465 1, 1, 1, 1, 1, 1, 1, 1,
466 1, 1, 1, 1, 1, 1, 1, 1,
467 1, 1, 1, 1, 1, 1, 1, 1,
468 1, 1, 1, 1, 1, 1, 1, 1,
469 1, 1, 1, 1, 1, 1, 1};
471 struct GTY(()) machine_function
473 /* Size of the frame of the function. */
474 HOST_WIDE_INT frame_size;
476 /* Size of the frame of the function minus the register window save area
477 and the outgoing argument area. */
478 HOST_WIDE_INT apparent_frame_size;
480 /* Register we pretend the frame pointer is allocated to. Normally, this
481 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
482 record "offset" separately as it may be too big for (reg + disp). */
484 HOST_WIDE_INT frame_base_offset;
486 /* Some local-dynamic TLS symbol name. */
487 const char *some_ld_name;
489 /* Number of global or FP registers to be saved (as 4-byte quantities). */
490 int n_global_fp_regs;
492 /* True if the current function is leaf and uses only leaf regs,
493 so that the SPARC leaf function optimization can be applied.
494 Private version of crtl->uses_only_leaf_regs, see
495 sparc_expand_prologue for the rationale. */
498 /* True if the prologue saves local or in registers. */
499 bool save_local_in_regs_p;
501 /* True if the data calculated by sparc_expand_prologue are valid. */
502 bool prologue_data_valid_p;
505 #define sparc_frame_size cfun->machine->frame_size
506 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
507 #define sparc_frame_base_reg cfun->machine->frame_base_reg
508 #define sparc_frame_base_offset cfun->machine->frame_base_offset
509 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
510 #define sparc_leaf_function_p cfun->machine->leaf_function_p
511 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
512 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
514 /* 1 if the next opcode is to be specially indented. */
515 int sparc_indent_opcode = 0;
517 static void sparc_option_override (void);
518 static void sparc_init_modes (void);
519 static void scan_record_type (const_tree, int *, int *, int *);
520 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
521 const_tree, bool, bool, int *, int *);
523 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
524 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
526 static void sparc_emit_set_const32 (rtx, rtx);
527 static void sparc_emit_set_const64 (rtx, rtx);
528 static void sparc_output_addr_vec (rtx);
529 static void sparc_output_addr_diff_vec (rtx);
530 static void sparc_output_deferred_case_vectors (void);
531 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
532 static bool sparc_legitimate_constant_p (enum machine_mode, rtx);
533 static rtx sparc_builtin_saveregs (void);
534 static int epilogue_renumber (rtx *, int);
535 static bool sparc_assemble_integer (rtx, unsigned int, int);
536 static int set_extends (rtx);
537 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
538 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
539 #ifdef TARGET_SOLARIS
540 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
541 tree) ATTRIBUTE_UNUSED;
543 static int sparc_adjust_cost (rtx, rtx, rtx, int);
544 static int sparc_issue_rate (void);
545 static void sparc_sched_init (FILE *, int, int);
546 static int sparc_use_sched_lookahead (void);
548 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
549 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
550 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
551 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
552 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
554 static bool sparc_function_ok_for_sibcall (tree, tree);
555 static void sparc_init_libfuncs (void);
556 static void sparc_init_builtins (void);
557 static void sparc_vis_init_builtins (void);
558 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
559 static tree sparc_fold_builtin (tree, int, tree *, bool);
560 static int sparc_vis_mul8x16 (int, int);
561 static void sparc_handle_vis_mul8x16 (tree *, int, tree, tree, tree);
562 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
563 HOST_WIDE_INT, tree);
564 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
565 HOST_WIDE_INT, const_tree);
566 static struct machine_function * sparc_init_machine_status (void);
567 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
568 static rtx sparc_tls_get_addr (void);
569 static rtx sparc_tls_got (void);
570 static const char *get_some_local_dynamic_name (void);
571 static int get_some_local_dynamic_name_1 (rtx *, void *);
572 static int sparc_register_move_cost (enum machine_mode,
573 reg_class_t, reg_class_t);
574 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
575 static rtx sparc_function_value (const_tree, const_tree, bool);
576 static rtx sparc_libcall_value (enum machine_mode, const_rtx);
577 static bool sparc_function_value_regno_p (const unsigned int);
578 static rtx sparc_struct_value_rtx (tree, int);
579 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
580 int *, const_tree, int);
581 static bool sparc_return_in_memory (const_tree, const_tree);
582 static bool sparc_strict_argument_naming (cumulative_args_t);
583 static void sparc_va_start (tree, rtx);
584 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
585 static bool sparc_vector_mode_supported_p (enum machine_mode);
586 static bool sparc_tls_referenced_p (rtx);
587 static rtx sparc_legitimize_tls_address (rtx);
588 static rtx sparc_legitimize_pic_address (rtx, rtx);
589 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
590 static rtx sparc_delegitimize_address (rtx);
591 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
592 static bool sparc_pass_by_reference (cumulative_args_t,
593 enum machine_mode, const_tree, bool);
594 static void sparc_function_arg_advance (cumulative_args_t,
595 enum machine_mode, const_tree, bool);
596 static rtx sparc_function_arg_1 (cumulative_args_t,
597 enum machine_mode, const_tree, bool, bool);
598 static rtx sparc_function_arg (cumulative_args_t,
599 enum machine_mode, const_tree, bool);
600 static rtx sparc_function_incoming_arg (cumulative_args_t,
601 enum machine_mode, const_tree, bool);
602 static unsigned int sparc_function_arg_boundary (enum machine_mode,
604 static int sparc_arg_partial_bytes (cumulative_args_t,
605 enum machine_mode, tree, bool);
606 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
607 static void sparc_file_end (void);
608 static bool sparc_frame_pointer_required (void);
609 static bool sparc_can_eliminate (const int, const int);
610 static rtx sparc_builtin_setjmp_frame_value (void);
611 static void sparc_conditional_register_usage (void);
612 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
613 static const char *sparc_mangle_type (const_tree);
615 static void sparc_trampoline_init (rtx, tree, rtx);
616 static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
617 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
618 static bool sparc_print_operand_punct_valid_p (unsigned char);
619 static void sparc_print_operand (FILE *, rtx, int);
620 static void sparc_print_operand_address (FILE *, rtx);
621 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
623 secondary_reload_info *);
624 static enum machine_mode sparc_cstore_mode (enum insn_code icode);
626 #ifdef SUBTARGET_ATTRIBUTE_TABLE
627 /* Table of valid machine attributes. */
628 static const struct attribute_spec sparc_attribute_table[] =
630 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
632 SUBTARGET_ATTRIBUTE_TABLE,
633 { NULL, 0, 0, false, false, false, NULL, false }
637 /* Option handling. */
640 enum cmodel sparc_cmodel;
642 char sparc_hard_reg_printed[8];
644 /* Initialize the GCC target structure. */
646 /* The default is to use .half rather than .short for aligned HI objects. */
647 #undef TARGET_ASM_ALIGNED_HI_OP
648 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
650 #undef TARGET_ASM_UNALIGNED_HI_OP
651 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
652 #undef TARGET_ASM_UNALIGNED_SI_OP
653 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
654 #undef TARGET_ASM_UNALIGNED_DI_OP
655 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
657 /* The target hook has to handle DI-mode values. */
658 #undef TARGET_ASM_INTEGER
659 #define TARGET_ASM_INTEGER sparc_assemble_integer
661 #undef TARGET_ASM_FUNCTION_PROLOGUE
662 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
663 #undef TARGET_ASM_FUNCTION_EPILOGUE
664 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
666 #undef TARGET_SCHED_ADJUST_COST
667 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
668 #undef TARGET_SCHED_ISSUE_RATE
669 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
670 #undef TARGET_SCHED_INIT
671 #define TARGET_SCHED_INIT sparc_sched_init
672 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
673 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
675 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
676 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
678 #undef TARGET_INIT_LIBFUNCS
679 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
680 #undef TARGET_INIT_BUILTINS
681 #define TARGET_INIT_BUILTINS sparc_init_builtins
683 #undef TARGET_LEGITIMIZE_ADDRESS
684 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
685 #undef TARGET_DELEGITIMIZE_ADDRESS
686 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
687 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
688 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
690 #undef TARGET_EXPAND_BUILTIN
691 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
692 #undef TARGET_FOLD_BUILTIN
693 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
696 #undef TARGET_HAVE_TLS
697 #define TARGET_HAVE_TLS true
700 #undef TARGET_CANNOT_FORCE_CONST_MEM
701 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
703 #undef TARGET_ASM_OUTPUT_MI_THUNK
704 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
705 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
706 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
708 #undef TARGET_RTX_COSTS
709 #define TARGET_RTX_COSTS sparc_rtx_costs
710 #undef TARGET_ADDRESS_COST
711 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
712 #undef TARGET_REGISTER_MOVE_COST
713 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
715 #undef TARGET_PROMOTE_FUNCTION_MODE
716 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
718 #undef TARGET_FUNCTION_VALUE
719 #define TARGET_FUNCTION_VALUE sparc_function_value
720 #undef TARGET_LIBCALL_VALUE
721 #define TARGET_LIBCALL_VALUE sparc_libcall_value
722 #undef TARGET_FUNCTION_VALUE_REGNO_P
723 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
725 #undef TARGET_STRUCT_VALUE_RTX
726 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
727 #undef TARGET_RETURN_IN_MEMORY
728 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
729 #undef TARGET_MUST_PASS_IN_STACK
730 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
731 #undef TARGET_PASS_BY_REFERENCE
732 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
733 #undef TARGET_ARG_PARTIAL_BYTES
734 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
735 #undef TARGET_FUNCTION_ARG_ADVANCE
736 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
737 #undef TARGET_FUNCTION_ARG
738 #define TARGET_FUNCTION_ARG sparc_function_arg
739 #undef TARGET_FUNCTION_INCOMING_ARG
740 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
741 #undef TARGET_FUNCTION_ARG_BOUNDARY
742 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
744 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
745 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
746 #undef TARGET_STRICT_ARGUMENT_NAMING
747 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
749 #undef TARGET_EXPAND_BUILTIN_VA_START
750 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
751 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
752 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
754 #undef TARGET_VECTOR_MODE_SUPPORTED_P
755 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
757 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
758 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
760 #ifdef SUBTARGET_INSERT_ATTRIBUTES
761 #undef TARGET_INSERT_ATTRIBUTES
762 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
765 #ifdef SUBTARGET_ATTRIBUTE_TABLE
766 #undef TARGET_ATTRIBUTE_TABLE
767 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
770 #undef TARGET_RELAXED_ORDERING
771 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
773 #undef TARGET_OPTION_OVERRIDE
774 #define TARGET_OPTION_OVERRIDE sparc_option_override
776 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
777 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
778 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
781 #undef TARGET_ASM_FILE_END
782 #define TARGET_ASM_FILE_END sparc_file_end
784 #undef TARGET_FRAME_POINTER_REQUIRED
785 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
787 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
788 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
790 #undef TARGET_CAN_ELIMINATE
791 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
793 #undef TARGET_PREFERRED_RELOAD_CLASS
794 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
796 #undef TARGET_SECONDARY_RELOAD
797 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
799 #undef TARGET_CONDITIONAL_REGISTER_USAGE
800 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
802 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
803 #undef TARGET_MANGLE_TYPE
804 #define TARGET_MANGLE_TYPE sparc_mangle_type
807 #undef TARGET_LEGITIMATE_ADDRESS_P
808 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
810 #undef TARGET_LEGITIMATE_CONSTANT_P
811 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
813 #undef TARGET_TRAMPOLINE_INIT
814 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
816 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
817 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
818 #undef TARGET_PRINT_OPERAND
819 #define TARGET_PRINT_OPERAND sparc_print_operand
820 #undef TARGET_PRINT_OPERAND_ADDRESS
821 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
823 /* The value stored by LDSTUB. */
824 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
825 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
827 #undef TARGET_CSTORE_MODE
828 #define TARGET_CSTORE_MODE sparc_cstore_mode
830 struct gcc_target targetm = TARGET_INITIALIZER;
832 /* Return the memory reference contained in X if any, zero otherwise. */
837 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
846 /* We use a machine specific pass to enable workarounds for errata.
847 We need to have the (essentially) final form of the insn stream in order
848 to properly detect the various hazards. Therefore, this machine specific
849 pass runs as late as possible. The pass is inserted in the pass pipeline
850 at the end of sparc_option_override. */
853 sparc_gate_work_around_errata (void)
855 /* The only errata we handle are those of the AT697F and UT699. */
856 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
860 sparc_do_work_around_errata (void)
864 /* Force all instructions to be split into their final form. */
865 split_all_insns_noflow ();
867 /* Now look for specific patterns in the insn stream. */
868 for (insn = get_insns (); insn; insn = next)
870 bool insert_nop = false;
873 /* Look into the instruction in a delay slot. */
874 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
875 insn = XVECEXP (PATTERN (insn), 0, 1);
877 /* Look for a single-word load into an odd-numbered FP register. */
879 && NONJUMP_INSN_P (insn)
880 && (set = single_set (insn)) != NULL_RTX
881 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
882 && MEM_P (SET_SRC (set))
883 && REG_P (SET_DEST (set))
884 && REGNO (SET_DEST (set)) > 31
885 && REGNO (SET_DEST (set)) % 2 != 0)
887 /* The wrong dependency is on the enclosing double register. */
888 unsigned int x = REGNO (SET_DEST (set)) - 1;
889 unsigned int src1, src2, dest;
892 /* If the insn has a delay slot, then it cannot be problematic. */
893 next = next_active_insn (insn);
896 if (NONJUMP_INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE)
900 code = INSN_CODE (next);
904 case CODE_FOR_adddf3:
905 case CODE_FOR_subdf3:
906 case CODE_FOR_muldf3:
907 case CODE_FOR_divdf3:
908 dest = REGNO (recog_data.operand[0]);
909 src1 = REGNO (recog_data.operand[1]);
910 src2 = REGNO (recog_data.operand[2]);
915 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
916 if ((src1 == x || src2 == x)
917 && (dest == src1 || dest == src2))
924 FPOPd %fx, %fx, %fx */
927 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
932 case CODE_FOR_sqrtdf2:
933 dest = REGNO (recog_data.operand[0]);
934 src1 = REGNO (recog_data.operand[1]);
938 if (src1 == x && dest == src1)
947 /* Look for a single-word load into an integer register. */
948 else if (sparc_fix_ut699
949 && NONJUMP_INSN_P (insn)
950 && (set = single_set (insn)) != NULL_RTX
951 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
952 && mem_ref (SET_SRC (set)) != NULL_RTX
953 && REG_P (SET_DEST (set))
954 && REGNO (SET_DEST (set)) < 32)
956 /* There is no problem if the second memory access has a data
957 dependency on the first single-cycle load. */
958 rtx x = SET_DEST (set);
960 /* If the insn has a delay slot, then it cannot be problematic. */
961 next = next_active_insn (insn);
964 if (NONJUMP_INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE)
967 /* Look for a second memory access to/from an integer register. */
968 if ((set = single_set (next)) != NULL_RTX)
970 rtx src = SET_SRC (set);
971 rtx dest = SET_DEST (set);
974 /* LDD is affected. */
975 if ((mem = mem_ref (src)) != NULL_RTX
978 && !reg_mentioned_p (x, XEXP (mem, 0)))
981 /* STD is *not* affected. */
982 else if ((mem = mem_ref (dest)) != NULL_RTX
983 && GET_MODE_SIZE (GET_MODE (mem)) <= 4
984 && (src == const0_rtx
987 && REGNO (src) != REGNO (x)))
988 && !reg_mentioned_p (x, XEXP (mem, 0)))
994 next = NEXT_INSN (insn);
997 emit_insn_before (gen_nop (), next);
1003 struct rtl_opt_pass pass_work_around_errata =
1007 "errata", /* name */
1008 OPTGROUP_NONE, /* optinfo_flags */
1009 sparc_gate_work_around_errata, /* gate */
1010 sparc_do_work_around_errata, /* execute */
1013 0, /* static_pass_number */
1014 TV_MACH_DEP, /* tv_id */
1015 0, /* properties_required */
1016 0, /* properties_provided */
1017 0, /* properties_destroyed */
1018 0, /* todo_flags_start */
1019 TODO_verify_rtl_sharing, /* todo_flags_finish */
1023 struct register_pass_info insert_pass_work_around_errata =
1025 &pass_work_around_errata.pass, /* pass */
1026 "dbr", /* reference_pass_name */
1027 1, /* ref_pass_instance_number */
1028 PASS_POS_INSERT_AFTER /* po_op */
1031 /* Helpers for TARGET_DEBUG_OPTIONS. */
1033 dump_target_flag_bits (const int flags)
1035 if (flags & MASK_64BIT)
1036 fprintf (stderr, "64BIT ");
1037 if (flags & MASK_APP_REGS)
1038 fprintf (stderr, "APP_REGS ");
1039 if (flags & MASK_FASTER_STRUCTS)
1040 fprintf (stderr, "FASTER_STRUCTS ");
1041 if (flags & MASK_FLAT)
1042 fprintf (stderr, "FLAT ");
1043 if (flags & MASK_FMAF)
1044 fprintf (stderr, "FMAF ");
1045 if (flags & MASK_FPU)
1046 fprintf (stderr, "FPU ");
1047 if (flags & MASK_HARD_QUAD)
1048 fprintf (stderr, "HARD_QUAD ");
1049 if (flags & MASK_POPC)
1050 fprintf (stderr, "POPC ");
1051 if (flags & MASK_PTR64)
1052 fprintf (stderr, "PTR64 ");
1053 if (flags & MASK_STACK_BIAS)
1054 fprintf (stderr, "STACK_BIAS ");
1055 if (flags & MASK_UNALIGNED_DOUBLES)
1056 fprintf (stderr, "UNALIGNED_DOUBLES ");
1057 if (flags & MASK_V8PLUS)
1058 fprintf (stderr, "V8PLUS ");
1059 if (flags & MASK_VIS)
1060 fprintf (stderr, "VIS ");
1061 if (flags & MASK_VIS2)
1062 fprintf (stderr, "VIS2 ");
1063 if (flags & MASK_VIS3)
1064 fprintf (stderr, "VIS3 ");
1065 if (flags & MASK_CBCOND)
1066 fprintf (stderr, "CBCOND ");
1067 if (flags & MASK_DEPRECATED_V8_INSNS)
1068 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1069 if (flags & MASK_SPARCLET)
1070 fprintf (stderr, "SPARCLET ");
1071 if (flags & MASK_SPARCLITE)
1072 fprintf (stderr, "SPARCLITE ");
1073 if (flags & MASK_V8)
1074 fprintf (stderr, "V8 ");
1075 if (flags & MASK_V9)
1076 fprintf (stderr, "V9 ");
1080 dump_target_flags (const char *prefix, const int flags)
1082 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1083 dump_target_flag_bits (flags);
1084 fprintf(stderr, "]\n");
1087 /* Validate and override various options, and do some machine dependent
1091 sparc_option_override (void)
1093 static struct code_model {
1094 const char *const name;
1095 const enum cmodel value;
1096 } const cmodels[] = {
1098 { "medlow", CM_MEDLOW },
1099 { "medmid", CM_MEDMID },
1100 { "medany", CM_MEDANY },
1101 { "embmedany", CM_EMBMEDANY },
1102 { NULL, (enum cmodel) 0 }
1104 const struct code_model *cmodel;
1105 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1106 static struct cpu_default {
1108 const enum processor_type processor;
1109 } const cpu_default[] = {
1110 /* There must be one entry here for each TARGET_CPU value. */
1111 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1112 { TARGET_CPU_v8, PROCESSOR_V8 },
1113 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1114 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1115 { TARGET_CPU_leon, PROCESSOR_LEON },
1116 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1117 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1118 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1119 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1120 { TARGET_CPU_v9, PROCESSOR_V9 },
1121 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1122 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1123 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1124 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1125 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1126 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1127 { -1, PROCESSOR_V7 }
1129 const struct cpu_default *def;
1130 /* Table of values for -m{cpu,tune}=. This must match the order of
1131 the enum processor_type in sparc-opts.h. */
1132 static struct cpu_table {
1133 const char *const name;
1136 } const cpu_table[] = {
1137 { "v7", MASK_ISA, 0 },
1138 { "cypress", MASK_ISA, 0 },
1139 { "v8", MASK_ISA, MASK_V8 },
1140 /* TI TMS390Z55 supersparc */
1141 { "supersparc", MASK_ISA, MASK_V8 },
1142 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1144 { "leon", MASK_ISA, MASK_V8|MASK_FPU },
1145 { "leon3", MASK_ISA, MASK_V8|MASK_FPU },
1146 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1147 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1148 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1149 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1150 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1151 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1152 { "sparclet", MASK_ISA, MASK_SPARCLET },
1153 /* TEMIC sparclet */
1154 { "tsc701", MASK_ISA, MASK_SPARCLET },
1155 { "v9", MASK_ISA, MASK_V9 },
1156 /* UltraSPARC I, II, IIi */
1157 { "ultrasparc", MASK_ISA,
1158 /* Although insns using %y are deprecated, it is a clear win. */
1159 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1160 /* UltraSPARC III */
1161 /* ??? Check if %y issue still holds true. */
1162 { "ultrasparc3", MASK_ISA,
1163 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1165 { "niagara", MASK_ISA,
1166 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1168 { "niagara2", MASK_ISA,
1169 MASK_V9|MASK_POPC|MASK_VIS2 },
1171 { "niagara3", MASK_ISA,
1172 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1174 { "niagara4", MASK_ISA,
1175 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1177 const struct cpu_table *cpu;
1181 if (sparc_debug_string != NULL)
1186 p = ASTRDUP (sparc_debug_string);
1187 while ((q = strtok (p, ",")) != NULL)
1201 if (! strcmp (q, "all"))
1202 mask = MASK_DEBUG_ALL;
1203 else if (! strcmp (q, "options"))
1204 mask = MASK_DEBUG_OPTIONS;
1206 error ("unknown -mdebug-%s switch", q);
1209 sparc_debug &= ~mask;
1211 sparc_debug |= mask;
1215 if (TARGET_DEBUG_OPTIONS)
1217 dump_target_flags("Initial target_flags", target_flags);
1218 dump_target_flags("target_flags_explicit", target_flags_explicit);
1221 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1222 SUBTARGET_OVERRIDE_OPTIONS;
1225 #ifndef SPARC_BI_ARCH
1226 /* Check for unsupported architecture size. */
1227 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1228 error ("%s is not supported by this configuration",
1229 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1232 /* We force all 64bit archs to use 128 bit long double */
1233 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1235 error ("-mlong-double-64 not allowed with -m64");
1236 target_flags |= MASK_LONG_DOUBLE_128;
1239 /* Code model selection. */
1240 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1242 #ifdef SPARC_BI_ARCH
1244 sparc_cmodel = CM_32;
1247 if (sparc_cmodel_string != NULL)
1251 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1252 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1254 if (cmodel->name == NULL)
1255 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1257 sparc_cmodel = cmodel->value;
1260 error ("-mcmodel= is not supported on 32 bit systems");
1263 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1264 for (i = 8; i < 16; i++)
1265 if (!call_used_regs [i])
1267 error ("-fcall-saved-REG is not supported for out registers");
1268 call_used_regs [i] = 1;
1271 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1273 /* Set the default CPU. */
1274 if (!global_options_set.x_sparc_cpu_and_features)
1276 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1277 if (def->cpu == TARGET_CPU_DEFAULT)
1279 gcc_assert (def->cpu != -1);
1280 sparc_cpu_and_features = def->processor;
1283 if (!global_options_set.x_sparc_cpu)
1284 sparc_cpu = sparc_cpu_and_features;
1286 cpu = &cpu_table[(int) sparc_cpu_and_features];
1288 if (TARGET_DEBUG_OPTIONS)
1290 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1291 fprintf (stderr, "sparc_cpu: %s\n",
1292 cpu_table[(int) sparc_cpu].name);
1293 dump_target_flags ("cpu->disable", cpu->disable);
1294 dump_target_flags ("cpu->enable", cpu->enable);
1297 target_flags &= ~cpu->disable;
1298 target_flags |= (cpu->enable
1299 #ifndef HAVE_AS_FMAF_HPC_VIS3
1300 & ~(MASK_FMAF | MASK_VIS3)
1302 #ifndef HAVE_AS_SPARC4
1307 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1308 the processor default. */
1309 if (target_flags_explicit & MASK_FPU)
1310 target_flags = (target_flags & ~MASK_FPU) | fpu;
1312 /* -mvis2 implies -mvis */
1314 target_flags |= MASK_VIS;
1316 /* -mvis3 implies -mvis2 and -mvis */
1318 target_flags |= MASK_VIS2 | MASK_VIS;
1320 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1323 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1325 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1327 -m64 also implies v9. */
1328 if (TARGET_VIS || TARGET_ARCH64)
1330 target_flags |= MASK_V9;
1331 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1334 /* -mvis also implies -mv8plus on 32-bit */
1335 if (TARGET_VIS && ! TARGET_ARCH64)
1336 target_flags |= MASK_V8PLUS;
1338 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1339 if (TARGET_V9 && TARGET_ARCH32)
1340 target_flags |= MASK_DEPRECATED_V8_INSNS;
1342 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1343 if (! TARGET_V9 || TARGET_ARCH64)
1344 target_flags &= ~MASK_V8PLUS;
1346 /* Don't use stack biasing in 32 bit mode. */
1348 target_flags &= ~MASK_STACK_BIAS;
1350 /* Supply a default value for align_functions. */
1351 if (align_functions == 0
1352 && (sparc_cpu == PROCESSOR_ULTRASPARC
1353 || sparc_cpu == PROCESSOR_ULTRASPARC3
1354 || sparc_cpu == PROCESSOR_NIAGARA
1355 || sparc_cpu == PROCESSOR_NIAGARA2
1356 || sparc_cpu == PROCESSOR_NIAGARA3
1357 || sparc_cpu == PROCESSOR_NIAGARA4))
1358 align_functions = 32;
1360 /* Validate PCC_STRUCT_RETURN. */
1361 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1362 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1364 /* Only use .uaxword when compiling for a 64-bit target. */
1366 targetm.asm_out.unaligned_op.di = NULL;
1368 /* Do various machine dependent initializations. */
1369 sparc_init_modes ();
1371 /* Set up function hooks. */
1372 init_machine_status = sparc_init_machine_status;
1377 case PROCESSOR_CYPRESS:
1378 sparc_costs = &cypress_costs;
1381 case PROCESSOR_SPARCLITE:
1382 case PROCESSOR_SUPERSPARC:
1383 sparc_costs = &supersparc_costs;
1385 case PROCESSOR_F930:
1386 case PROCESSOR_F934:
1387 case PROCESSOR_HYPERSPARC:
1388 case PROCESSOR_SPARCLITE86X:
1389 sparc_costs = &hypersparc_costs;
1391 case PROCESSOR_LEON:
1392 sparc_costs = &leon_costs;
1394 case PROCESSOR_LEON3:
1395 sparc_costs = &leon3_costs;
1397 case PROCESSOR_SPARCLET:
1398 case PROCESSOR_TSC701:
1399 sparc_costs = &sparclet_costs;
1402 case PROCESSOR_ULTRASPARC:
1403 sparc_costs = &ultrasparc_costs;
1405 case PROCESSOR_ULTRASPARC3:
1406 sparc_costs = &ultrasparc3_costs;
1408 case PROCESSOR_NIAGARA:
1409 sparc_costs = &niagara_costs;
1411 case PROCESSOR_NIAGARA2:
1412 sparc_costs = &niagara2_costs;
1414 case PROCESSOR_NIAGARA3:
1415 sparc_costs = &niagara3_costs;
1417 case PROCESSOR_NIAGARA4:
1418 sparc_costs = &niagara4_costs;
1420 case PROCESSOR_NATIVE:
1424 if (sparc_memory_model == SMM_DEFAULT)
1426 /* Choose the memory model for the operating system. */
1427 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1428 if (os_default != SMM_DEFAULT)
1429 sparc_memory_model = os_default;
1430 /* Choose the most relaxed model for the processor. */
1432 sparc_memory_model = SMM_RMO;
1434 sparc_memory_model = SMM_PSO;
1436 sparc_memory_model = SMM_SC;
1439 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1440 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1441 target_flags |= MASK_LONG_DOUBLE_128;
1444 if (TARGET_DEBUG_OPTIONS)
1445 dump_target_flags ("Final target_flags", target_flags);
1447 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1448 ((sparc_cpu == PROCESSOR_ULTRASPARC
1449 || sparc_cpu == PROCESSOR_NIAGARA
1450 || sparc_cpu == PROCESSOR_NIAGARA2
1451 || sparc_cpu == PROCESSOR_NIAGARA3
1452 || sparc_cpu == PROCESSOR_NIAGARA4)
1454 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1456 global_options.x_param_values,
1457 global_options_set.x_param_values);
1458 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1459 ((sparc_cpu == PROCESSOR_ULTRASPARC
1460 || sparc_cpu == PROCESSOR_ULTRASPARC3
1461 || sparc_cpu == PROCESSOR_NIAGARA
1462 || sparc_cpu == PROCESSOR_NIAGARA2
1463 || sparc_cpu == PROCESSOR_NIAGARA3
1464 || sparc_cpu == PROCESSOR_NIAGARA4)
1466 global_options.x_param_values,
1467 global_options_set.x_param_values);
1469 /* Disable save slot sharing for call-clobbered registers by default.
1470 The IRA sharing algorithm works on single registers only and this
1471 pessimizes for double floating-point registers. */
1472 if (!global_options_set.x_flag_ira_share_save_slots)
1473 flag_ira_share_save_slots = 0;
1475 /* We register a machine specific pass to work around errata, if any.
1476 The pass mut be scheduled as late as possible so that we have the
1477 (essentially) final form of the insn stream to work on.
1478 Registering the pass must be done at start up. It's convenient to
1480 register_pass (&insert_pass_work_around_errata);
1483 /* Miscellaneous utilities. */
1485 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1486 or branch on register contents instructions. */
1489 v9_regcmp_p (enum rtx_code code)
1491 return (code == EQ || code == NE || code == GE || code == LT
1492 || code == LE || code == GT);
1495 /* Nonzero if OP is a floating point constant which can
1496 be loaded into an integer register using a single
1497 sethi instruction. */
1502 if (GET_CODE (op) == CONST_DOUBLE)
1507 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1508 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1509 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1515 /* Nonzero if OP is a floating point constant which can
1516 be loaded into an integer register using a single
1522 if (GET_CODE (op) == CONST_DOUBLE)
1527 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1528 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1529 return SPARC_SIMM13_P (i);
1535 /* Nonzero if OP is a floating point constant which can
1536 be loaded into an integer register using a high/losum
1537 instruction sequence. */
1540 fp_high_losum_p (rtx op)
1542 /* The constraints calling this should only be in
1543 SFmode move insns, so any constant which cannot
1544 be moved using a single insn will do. */
1545 if (GET_CODE (op) == CONST_DOUBLE)
1550 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1551 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1552 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1558 /* Return true if the address of LABEL can be loaded by means of the
1559 mov{si,di}_pic_label_ref patterns in PIC mode. */
1562 can_use_mov_pic_label_ref (rtx label)
1564 /* VxWorks does not impose a fixed gap between segments; the run-time
1565 gap can be different from the object-file gap. We therefore can't
1566 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1567 are absolutely sure that X is in the same segment as the GOT.
1568 Unfortunately, the flexibility of linker scripts means that we
1569 can't be sure of that in general, so assume that GOT-relative
1570 accesses are never valid on VxWorks. */
1571 if (TARGET_VXWORKS_RTP)
1574 /* Similarly, if the label is non-local, it might end up being placed
1575 in a different section than the current one; now mov_pic_label_ref
1576 requires the label and the code to be in the same section. */
1577 if (LABEL_REF_NONLOCAL_P (label))
1580 /* Finally, if we are reordering basic blocks and partition into hot
1581 and cold sections, this might happen for any label. */
1582 if (flag_reorder_blocks_and_partition)
1588 /* Expand a move instruction. Return true if all work is done. */
1591 sparc_expand_move (enum machine_mode mode, rtx *operands)
1593 /* Handle sets of MEM first. */
1594 if (GET_CODE (operands[0]) == MEM)
1596 /* 0 is a register (or a pair of registers) on SPARC. */
1597 if (register_or_zero_operand (operands[1], mode))
1600 if (!reload_in_progress)
1602 operands[0] = validize_mem (operands[0]);
1603 operands[1] = force_reg (mode, operands[1]);
1607 /* Fixup TLS cases. */
1609 && CONSTANT_P (operands[1])
1610 && sparc_tls_referenced_p (operands [1]))
1612 operands[1] = sparc_legitimize_tls_address (operands[1]);
1616 /* Fixup PIC cases. */
1617 if (flag_pic && CONSTANT_P (operands[1]))
1619 if (pic_address_needs_scratch (operands[1]))
1620 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1622 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1623 if (GET_CODE (operands[1]) == LABEL_REF
1624 && can_use_mov_pic_label_ref (operands[1]))
1628 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1634 gcc_assert (TARGET_ARCH64);
1635 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1640 if (symbolic_operand (operands[1], mode))
1643 = sparc_legitimize_pic_address (operands[1],
1645 ? operands[0] : NULL_RTX);
1650 /* If we are trying to toss an integer constant into FP registers,
1651 or loading a FP or vector constant, force it into memory. */
1652 if (CONSTANT_P (operands[1])
1653 && REG_P (operands[0])
1654 && (SPARC_FP_REG_P (REGNO (operands[0]))
1655 || SCALAR_FLOAT_MODE_P (mode)
1656 || VECTOR_MODE_P (mode)))
1658 /* emit_group_store will send such bogosity to us when it is
1659 not storing directly into memory. So fix this up to avoid
1660 crashes in output_constant_pool. */
1661 if (operands [1] == const0_rtx)
1662 operands[1] = CONST0_RTX (mode);
1664 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1665 always other regs. */
1666 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1667 && (const_zero_operand (operands[1], mode)
1668 || const_all_ones_operand (operands[1], mode)))
1671 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1672 /* We are able to build any SF constant in integer registers
1673 with at most 2 instructions. */
1675 /* And any DF constant in integer registers. */
1677 && ! can_create_pseudo_p ())))
1680 operands[1] = force_const_mem (mode, operands[1]);
1681 if (!reload_in_progress)
1682 operands[1] = validize_mem (operands[1]);
1686 /* Accept non-constants and valid constants unmodified. */
1687 if (!CONSTANT_P (operands[1])
1688 || GET_CODE (operands[1]) == HIGH
1689 || input_operand (operands[1], mode))
1695 /* All QImode constants require only one insn, so proceed. */
1700 sparc_emit_set_const32 (operands[0], operands[1]);
1704 /* input_operand should have filtered out 32-bit mode. */
1705 sparc_emit_set_const64 (operands[0], operands[1]);
1711 /* TImode isn't available in 32-bit mode. */
1712 split_double (operands[1], &high, &low);
1713 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1715 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1727 /* Load OP1, a 32-bit constant, into OP0, a register.
1728 We know it can't be done in one insn when we get
1729 here, the move expander guarantees this. */
1732 sparc_emit_set_const32 (rtx op0, rtx op1)
1734 enum machine_mode mode = GET_MODE (op0);
1737 if (can_create_pseudo_p ())
1738 temp = gen_reg_rtx (mode);
1740 if (GET_CODE (op1) == CONST_INT)
1742 gcc_assert (!small_int_operand (op1, mode)
1743 && !const_high_operand (op1, mode));
1745 /* Emit them as real moves instead of a HIGH/LO_SUM,
1746 this way CSE can see everything and reuse intermediate
1747 values if it wants. */
1748 emit_insn (gen_rtx_SET (VOIDmode, temp,
1749 GEN_INT (INTVAL (op1)
1750 & ~(HOST_WIDE_INT)0x3ff)));
1752 emit_insn (gen_rtx_SET (VOIDmode,
1754 gen_rtx_IOR (mode, temp,
1755 GEN_INT (INTVAL (op1) & 0x3ff))));
1759 /* A symbol, emit in the traditional way. */
1760 emit_insn (gen_rtx_SET (VOIDmode, temp,
1761 gen_rtx_HIGH (mode, op1)));
1762 emit_insn (gen_rtx_SET (VOIDmode,
1763 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1767 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1768 If TEMP is nonzero, we are forbidden to use any other scratch
1769 registers. Otherwise, we are allowed to generate them as needed.
1771 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1772 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1775 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1777 rtx temp1, temp2, temp3, temp4, temp5;
1780 if (temp && GET_MODE (temp) == TImode)
1783 temp = gen_rtx_REG (DImode, REGNO (temp));
1786 /* SPARC-V9 code-model support. */
1787 switch (sparc_cmodel)
1790 /* The range spanned by all instructions in the object is less
1791 than 2^31 bytes (2GB) and the distance from any instruction
1792 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1793 than 2^31 bytes (2GB).
1795 The executable must be in the low 4TB of the virtual address
1798 sethi %hi(symbol), %temp1
1799 or %temp1, %lo(symbol), %reg */
1801 temp1 = temp; /* op0 is allowed. */
1803 temp1 = gen_reg_rtx (DImode);
1805 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1806 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1810 /* The range spanned by all instructions in the object is less
1811 than 2^31 bytes (2GB) and the distance from any instruction
1812 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1813 than 2^31 bytes (2GB).
1815 The executable must be in the low 16TB of the virtual address
1818 sethi %h44(symbol), %temp1
1819 or %temp1, %m44(symbol), %temp2
1820 sllx %temp2, 12, %temp3
1821 or %temp3, %l44(symbol), %reg */
1826 temp3 = temp; /* op0 is allowed. */
1830 temp1 = gen_reg_rtx (DImode);
1831 temp2 = gen_reg_rtx (DImode);
1832 temp3 = gen_reg_rtx (DImode);
1835 emit_insn (gen_seth44 (temp1, op1));
1836 emit_insn (gen_setm44 (temp2, temp1, op1));
1837 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1838 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1839 emit_insn (gen_setl44 (op0, temp3, op1));
1843 /* The range spanned by all instructions in the object is less
1844 than 2^31 bytes (2GB) and the distance from any instruction
1845 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1846 than 2^31 bytes (2GB).
1848 The executable can be placed anywhere in the virtual address
1851 sethi %hh(symbol), %temp1
1852 sethi %lm(symbol), %temp2
1853 or %temp1, %hm(symbol), %temp3
1854 sllx %temp3, 32, %temp4
1855 or %temp4, %temp2, %temp5
1856 or %temp5, %lo(symbol), %reg */
1859 /* It is possible that one of the registers we got for operands[2]
1860 might coincide with that of operands[0] (which is why we made
1861 it TImode). Pick the other one to use as our scratch. */
1862 if (rtx_equal_p (temp, op0))
1864 gcc_assert (ti_temp);
1865 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1868 temp2 = temp; /* op0 is _not_ allowed, see above. */
1875 temp1 = gen_reg_rtx (DImode);
1876 temp2 = gen_reg_rtx (DImode);
1877 temp3 = gen_reg_rtx (DImode);
1878 temp4 = gen_reg_rtx (DImode);
1879 temp5 = gen_reg_rtx (DImode);
1882 emit_insn (gen_sethh (temp1, op1));
1883 emit_insn (gen_setlm (temp2, op1));
1884 emit_insn (gen_sethm (temp3, temp1, op1));
1885 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1886 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1887 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1888 gen_rtx_PLUS (DImode, temp4, temp2)));
1889 emit_insn (gen_setlo (op0, temp5, op1));
1893 /* Old old old backwards compatibility kruft here.
1894 Essentially it is MEDLOW with a fixed 64-bit
1895 virtual base added to all data segment addresses.
1896 Text-segment stuff is computed like MEDANY, we can't
1897 reuse the code above because the relocation knobs
1900 Data segment: sethi %hi(symbol), %temp1
1901 add %temp1, EMBMEDANY_BASE_REG, %temp2
1902 or %temp2, %lo(symbol), %reg */
1903 if (data_segment_operand (op1, GET_MODE (op1)))
1907 temp1 = temp; /* op0 is allowed. */
1912 temp1 = gen_reg_rtx (DImode);
1913 temp2 = gen_reg_rtx (DImode);
1916 emit_insn (gen_embmedany_sethi (temp1, op1));
1917 emit_insn (gen_embmedany_brsum (temp2, temp1));
1918 emit_insn (gen_embmedany_losum (op0, temp2, op1));
1921 /* Text segment: sethi %uhi(symbol), %temp1
1922 sethi %hi(symbol), %temp2
1923 or %temp1, %ulo(symbol), %temp3
1924 sllx %temp3, 32, %temp4
1925 or %temp4, %temp2, %temp5
1926 or %temp5, %lo(symbol), %reg */
1931 /* It is possible that one of the registers we got for operands[2]
1932 might coincide with that of operands[0] (which is why we made
1933 it TImode). Pick the other one to use as our scratch. */
1934 if (rtx_equal_p (temp, op0))
1936 gcc_assert (ti_temp);
1937 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1940 temp2 = temp; /* op0 is _not_ allowed, see above. */
1947 temp1 = gen_reg_rtx (DImode);
1948 temp2 = gen_reg_rtx (DImode);
1949 temp3 = gen_reg_rtx (DImode);
1950 temp4 = gen_reg_rtx (DImode);
1951 temp5 = gen_reg_rtx (DImode);
1954 emit_insn (gen_embmedany_textuhi (temp1, op1));
1955 emit_insn (gen_embmedany_texthi (temp2, op1));
1956 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
1957 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1958 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1959 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1960 gen_rtx_PLUS (DImode, temp4, temp2)));
1961 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
1970 #if HOST_BITS_PER_WIDE_INT == 32
1972 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
1977 /* These avoid problems when cross compiling. If we do not
1978 go through all this hair then the optimizer will see
1979 invalid REG_EQUAL notes or in some cases none at all. */
1980 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
1981 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
1982 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
1983 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
1985 /* The optimizer is not to assume anything about exactly
1986 which bits are set for a HIGH, they are unspecified.
1987 Unfortunately this leads to many missed optimizations
1988 during CSE. We mask out the non-HIGH bits, and matches
1989 a plain movdi, to alleviate this problem. */
1991 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
1993 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
1997 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
1999 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
2003 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2005 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2009 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2011 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2014 /* Worker routines for 64-bit constant formation on arch64.
2015 One of the key things to be doing in these emissions is
2016 to create as many temp REGs as possible. This makes it
2017 possible for half-built constants to be used later when
2018 such values are similar to something required later on.
2019 Without doing this, the optimizer cannot see such
2022 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2023 unsigned HOST_WIDE_INT, int);
2026 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2027 unsigned HOST_WIDE_INT low_bits, int is_neg)
2029 unsigned HOST_WIDE_INT high_bits;
2032 high_bits = (~low_bits) & 0xffffffff;
2034 high_bits = low_bits;
2036 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2039 emit_insn (gen_rtx_SET (VOIDmode, op0,
2040 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2044 /* If we are XOR'ing with -1, then we should emit a one's complement
2045 instead. This way the combiner will notice logical operations
2046 such as ANDN later on and substitute. */
2047 if ((low_bits & 0x3ff) == 0x3ff)
2049 emit_insn (gen_rtx_SET (VOIDmode, op0,
2050 gen_rtx_NOT (DImode, temp)));
2054 emit_insn (gen_rtx_SET (VOIDmode, op0,
2055 gen_safe_XOR64 (temp,
2056 (-(HOST_WIDE_INT)0x400
2057 | (low_bits & 0x3ff)))));
2062 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2063 unsigned HOST_WIDE_INT, int);
2066 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2067 unsigned HOST_WIDE_INT high_bits,
2068 unsigned HOST_WIDE_INT low_immediate,
2073 if ((high_bits & 0xfffffc00) != 0)
2075 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2076 if ((high_bits & ~0xfffffc00) != 0)
2077 emit_insn (gen_rtx_SET (VOIDmode, op0,
2078 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2084 emit_insn (gen_safe_SET64 (temp, high_bits));
2088 /* Now shift it up into place. */
2089 emit_insn (gen_rtx_SET (VOIDmode, op0,
2090 gen_rtx_ASHIFT (DImode, temp2,
2091 GEN_INT (shift_count))));
2093 /* If there is a low immediate part piece, finish up by
2094 putting that in as well. */
2095 if (low_immediate != 0)
2096 emit_insn (gen_rtx_SET (VOIDmode, op0,
2097 gen_safe_OR64 (op0, low_immediate)));
2100 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2101 unsigned HOST_WIDE_INT);
2103 /* Full 64-bit constant decomposition. Even though this is the
2104 'worst' case, we still optimize a few things away. */
2106 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2107 unsigned HOST_WIDE_INT high_bits,
2108 unsigned HOST_WIDE_INT low_bits)
2112 if (can_create_pseudo_p ())
2113 sub_temp = gen_reg_rtx (DImode);
2115 if ((high_bits & 0xfffffc00) != 0)
2117 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2118 if ((high_bits & ~0xfffffc00) != 0)
2119 emit_insn (gen_rtx_SET (VOIDmode,
2121 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2127 emit_insn (gen_safe_SET64 (temp, high_bits));
2131 if (can_create_pseudo_p ())
2133 rtx temp2 = gen_reg_rtx (DImode);
2134 rtx temp3 = gen_reg_rtx (DImode);
2135 rtx temp4 = gen_reg_rtx (DImode);
2137 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2138 gen_rtx_ASHIFT (DImode, sub_temp,
2141 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2142 if ((low_bits & ~0xfffffc00) != 0)
2144 emit_insn (gen_rtx_SET (VOIDmode, temp3,
2145 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2146 emit_insn (gen_rtx_SET (VOIDmode, op0,
2147 gen_rtx_PLUS (DImode, temp4, temp3)));
2151 emit_insn (gen_rtx_SET (VOIDmode, op0,
2152 gen_rtx_PLUS (DImode, temp4, temp2)));
2157 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2158 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2159 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2162 /* We are in the middle of reload, so this is really
2163 painful. However we do still make an attempt to
2164 avoid emitting truly stupid code. */
2165 if (low1 != const0_rtx)
2167 emit_insn (gen_rtx_SET (VOIDmode, op0,
2168 gen_rtx_ASHIFT (DImode, sub_temp,
2169 GEN_INT (to_shift))));
2170 emit_insn (gen_rtx_SET (VOIDmode, op0,
2171 gen_rtx_IOR (DImode, op0, low1)));
2179 if (low2 != const0_rtx)
2181 emit_insn (gen_rtx_SET (VOIDmode, op0,
2182 gen_rtx_ASHIFT (DImode, sub_temp,
2183 GEN_INT (to_shift))));
2184 emit_insn (gen_rtx_SET (VOIDmode, op0,
2185 gen_rtx_IOR (DImode, op0, low2)));
2193 emit_insn (gen_rtx_SET (VOIDmode, op0,
2194 gen_rtx_ASHIFT (DImode, sub_temp,
2195 GEN_INT (to_shift))));
2196 if (low3 != const0_rtx)
2197 emit_insn (gen_rtx_SET (VOIDmode, op0,
2198 gen_rtx_IOR (DImode, op0, low3)));
2203 /* Analyze a 64-bit constant for certain properties. */
2204 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2205 unsigned HOST_WIDE_INT,
2206 int *, int *, int *);
2209 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2210 unsigned HOST_WIDE_INT low_bits,
2211 int *hbsp, int *lbsp, int *abbasp)
2213 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2216 lowest_bit_set = highest_bit_set = -1;
2220 if ((lowest_bit_set == -1)
2221 && ((low_bits >> i) & 1))
2223 if ((highest_bit_set == -1)
2224 && ((high_bits >> (32 - i - 1)) & 1))
2225 highest_bit_set = (64 - i - 1);
2228 && ((highest_bit_set == -1)
2229 || (lowest_bit_set == -1)));
2235 if ((lowest_bit_set == -1)
2236 && ((high_bits >> i) & 1))
2237 lowest_bit_set = i + 32;
2238 if ((highest_bit_set == -1)
2239 && ((low_bits >> (32 - i - 1)) & 1))
2240 highest_bit_set = 32 - i - 1;
2243 && ((highest_bit_set == -1)
2244 || (lowest_bit_set == -1)));
2246 /* If there are no bits set this should have gone out
2247 as one instruction! */
2248 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2249 all_bits_between_are_set = 1;
2250 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2254 if ((low_bits & (1 << i)) != 0)
2259 if ((high_bits & (1 << (i - 32))) != 0)
2262 all_bits_between_are_set = 0;
2265 *hbsp = highest_bit_set;
2266 *lbsp = lowest_bit_set;
2267 *abbasp = all_bits_between_are_set;
2270 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2273 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2274 unsigned HOST_WIDE_INT low_bits)
2276 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2279 || high_bits == 0xffffffff)
2282 analyze_64bit_constant (high_bits, low_bits,
2283 &highest_bit_set, &lowest_bit_set,
2284 &all_bits_between_are_set);
2286 if ((highest_bit_set == 63
2287 || lowest_bit_set == 0)
2288 && all_bits_between_are_set != 0)
2291 if ((highest_bit_set - lowest_bit_set) < 21)
2297 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2298 unsigned HOST_WIDE_INT,
2301 static unsigned HOST_WIDE_INT
2302 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2303 unsigned HOST_WIDE_INT low_bits,
2304 int lowest_bit_set, int shift)
2306 HOST_WIDE_INT hi, lo;
2308 if (lowest_bit_set < 32)
2310 lo = (low_bits >> lowest_bit_set) << shift;
2311 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2316 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2318 gcc_assert (! (hi & lo));
2322 /* Here we are sure to be arch64 and this is an integer constant
2323 being loaded into a register. Emit the most efficient
2324 insn sequence possible. Detection of all the 1-insn cases
2325 has been done already. */
2327 sparc_emit_set_const64 (rtx op0, rtx op1)
2329 unsigned HOST_WIDE_INT high_bits, low_bits;
2330 int lowest_bit_set, highest_bit_set;
2331 int all_bits_between_are_set;
2334 /* Sanity check that we know what we are working with. */
2335 gcc_assert (TARGET_ARCH64
2336 && (GET_CODE (op0) == SUBREG
2337 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2339 if (! can_create_pseudo_p ())
2342 if (GET_CODE (op1) != CONST_INT)
2344 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2349 temp = gen_reg_rtx (DImode);
2351 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2352 low_bits = (INTVAL (op1) & 0xffffffff);
2354 /* low_bits bits 0 --> 31
2355 high_bits bits 32 --> 63 */
2357 analyze_64bit_constant (high_bits, low_bits,
2358 &highest_bit_set, &lowest_bit_set,
2359 &all_bits_between_are_set);
2361 /* First try for a 2-insn sequence. */
2363 /* These situations are preferred because the optimizer can
2364 * do more things with them:
2366 * sllx %reg, shift, %reg
2368 * srlx %reg, shift, %reg
2369 * 3) mov some_small_const, %reg
2370 * sllx %reg, shift, %reg
2372 if (((highest_bit_set == 63
2373 || lowest_bit_set == 0)
2374 && all_bits_between_are_set != 0)
2375 || ((highest_bit_set - lowest_bit_set) < 12))
2377 HOST_WIDE_INT the_const = -1;
2378 int shift = lowest_bit_set;
2380 if ((highest_bit_set != 63
2381 && lowest_bit_set != 0)
2382 || all_bits_between_are_set == 0)
2385 create_simple_focus_bits (high_bits, low_bits,
2388 else if (lowest_bit_set == 0)
2389 shift = -(63 - highest_bit_set);
2391 gcc_assert (SPARC_SIMM13_P (the_const));
2392 gcc_assert (shift != 0);
2394 emit_insn (gen_safe_SET64 (temp, the_const));
2396 emit_insn (gen_rtx_SET (VOIDmode,
2398 gen_rtx_ASHIFT (DImode,
2402 emit_insn (gen_rtx_SET (VOIDmode,
2404 gen_rtx_LSHIFTRT (DImode,
2406 GEN_INT (-shift))));
2410 /* Now a range of 22 or less bits set somewhere.
2411 * 1) sethi %hi(focus_bits), %reg
2412 * sllx %reg, shift, %reg
2413 * 2) sethi %hi(focus_bits), %reg
2414 * srlx %reg, shift, %reg
2416 if ((highest_bit_set - lowest_bit_set) < 21)
2418 unsigned HOST_WIDE_INT focus_bits =
2419 create_simple_focus_bits (high_bits, low_bits,
2420 lowest_bit_set, 10);
2422 gcc_assert (SPARC_SETHI_P (focus_bits));
2423 gcc_assert (lowest_bit_set != 10);
2425 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2427 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2428 if (lowest_bit_set < 10)
2429 emit_insn (gen_rtx_SET (VOIDmode,
2431 gen_rtx_LSHIFTRT (DImode, temp,
2432 GEN_INT (10 - lowest_bit_set))));
2433 else if (lowest_bit_set > 10)
2434 emit_insn (gen_rtx_SET (VOIDmode,
2436 gen_rtx_ASHIFT (DImode, temp,
2437 GEN_INT (lowest_bit_set - 10))));
2441 /* 1) sethi %hi(low_bits), %reg
2442 * or %reg, %lo(low_bits), %reg
2443 * 2) sethi %hi(~low_bits), %reg
2444 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2447 || high_bits == 0xffffffff)
2449 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2450 (high_bits == 0xffffffff));
2454 /* Now, try 3-insn sequences. */
2456 /* 1) sethi %hi(high_bits), %reg
2457 * or %reg, %lo(high_bits), %reg
2458 * sllx %reg, 32, %reg
2462 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2466 /* We may be able to do something quick
2467 when the constant is negated, so try that. */
2468 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2469 (~low_bits) & 0xfffffc00))
2471 /* NOTE: The trailing bits get XOR'd so we need the
2472 non-negated bits, not the negated ones. */
2473 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2475 if ((((~high_bits) & 0xffffffff) == 0
2476 && ((~low_bits) & 0x80000000) == 0)
2477 || (((~high_bits) & 0xffffffff) == 0xffffffff
2478 && ((~low_bits) & 0x80000000) != 0))
2480 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2482 if ((SPARC_SETHI_P (fast_int)
2483 && (~high_bits & 0xffffffff) == 0)
2484 || SPARC_SIMM13_P (fast_int))
2485 emit_insn (gen_safe_SET64 (temp, fast_int));
2487 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2492 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2493 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2494 sparc_emit_set_const64 (temp, negated_const);
2497 /* If we are XOR'ing with -1, then we should emit a one's complement
2498 instead. This way the combiner will notice logical operations
2499 such as ANDN later on and substitute. */
2500 if (trailing_bits == 0x3ff)
2502 emit_insn (gen_rtx_SET (VOIDmode, op0,
2503 gen_rtx_NOT (DImode, temp)));
2507 emit_insn (gen_rtx_SET (VOIDmode,
2509 gen_safe_XOR64 (temp,
2510 (-0x400 | trailing_bits))));
2515 /* 1) sethi %hi(xxx), %reg
2516 * or %reg, %lo(xxx), %reg
2517 * sllx %reg, yyy, %reg
2519 * ??? This is just a generalized version of the low_bits==0
2520 * thing above, FIXME...
2522 if ((highest_bit_set - lowest_bit_set) < 32)
2524 unsigned HOST_WIDE_INT focus_bits =
2525 create_simple_focus_bits (high_bits, low_bits,
2528 /* We can't get here in this state. */
2529 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2531 /* So what we know is that the set bits straddle the
2532 middle of the 64-bit word. */
2533 sparc_emit_set_const64_quick2 (op0, temp,
2539 /* 1) sethi %hi(high_bits), %reg
2540 * or %reg, %lo(high_bits), %reg
2541 * sllx %reg, 32, %reg
2542 * or %reg, low_bits, %reg
2544 if (SPARC_SIMM13_P(low_bits)
2545 && ((int)low_bits > 0))
2547 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2551 /* The easiest way when all else fails, is full decomposition. */
2552 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2554 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2556 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2557 return the mode to be used for the comparison. For floating-point,
2558 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2559 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2560 processing is needed. */
2563 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2565 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2591 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2592 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2594 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2595 return CCX_NOOVmode;
2601 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2608 /* Emit the compare insn and return the CC reg for a CODE comparison
2609 with operands X and Y. */
2612 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2614 enum machine_mode mode;
2617 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2620 mode = SELECT_CC_MODE (code, x, y);
2622 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2623 fcc regs (cse can't tell they're really call clobbered regs and will
2624 remove a duplicate comparison even if there is an intervening function
2625 call - it will then try to reload the cc reg via an int reg which is why
2626 we need the movcc patterns). It is possible to provide the movcc
2627 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2628 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2629 to tell cse that CCFPE mode registers (even pseudos) are call
2632 /* ??? This is an experiment. Rather than making changes to cse which may
2633 or may not be easy/clean, we do our own cse. This is possible because
2634 we will generate hard registers. Cse knows they're call clobbered (it
2635 doesn't know the same thing about pseudos). If we guess wrong, no big
2636 deal, but if we win, great! */
2638 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2639 #if 1 /* experiment */
2642 /* We cycle through the registers to ensure they're all exercised. */
2643 static int next_fcc_reg = 0;
2644 /* Previous x,y for each fcc reg. */
2645 static rtx prev_args[4][2];
2647 /* Scan prev_args for x,y. */
2648 for (reg = 0; reg < 4; reg++)
2649 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2654 prev_args[reg][0] = x;
2655 prev_args[reg][1] = y;
2656 next_fcc_reg = (next_fcc_reg + 1) & 3;
2658 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2661 cc_reg = gen_reg_rtx (mode);
2662 #endif /* ! experiment */
2663 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2664 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2666 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2668 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2669 will only result in an unrecognizable insn so no point in asserting. */
2670 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2676 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2679 gen_compare_reg (rtx cmp)
2681 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2684 /* This function is used for v9 only.
2685 DEST is the target of the Scc insn.
2686 CODE is the code for an Scc's comparison.
2687 X and Y are the values we compare.
2689 This function is needed to turn
2692 (gt (reg:CCX 100 %icc)
2696 (gt:DI (reg:CCX 100 %icc)
2699 IE: The instruction recognizer needs to see the mode of the comparison to
2700 find the right instruction. We could use "gt:DI" right in the
2701 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2704 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2707 && (GET_MODE (x) == DImode
2708 || GET_MODE (dest) == DImode))
2711 /* Try to use the movrCC insns. */
2713 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2715 && v9_regcmp_p (compare_code))
2720 /* Special case for op0 != 0. This can be done with one instruction if
2723 if (compare_code == NE
2724 && GET_MODE (dest) == DImode
2725 && rtx_equal_p (op0, dest))
2727 emit_insn (gen_rtx_SET (VOIDmode, dest,
2728 gen_rtx_IF_THEN_ELSE (DImode,
2729 gen_rtx_fmt_ee (compare_code, DImode,
2736 if (reg_overlap_mentioned_p (dest, op0))
2738 /* Handle the case where dest == x.
2739 We "early clobber" the result. */
2740 op0 = gen_reg_rtx (GET_MODE (x));
2741 emit_move_insn (op0, x);
2744 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2745 if (GET_MODE (op0) != DImode)
2747 temp = gen_reg_rtx (DImode);
2748 convert_move (temp, op0, 0);
2752 emit_insn (gen_rtx_SET (VOIDmode, dest,
2753 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2754 gen_rtx_fmt_ee (compare_code, DImode,
2762 x = gen_compare_reg_1 (compare_code, x, y);
2765 gcc_assert (GET_MODE (x) != CC_NOOVmode
2766 && GET_MODE (x) != CCX_NOOVmode);
2768 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2769 emit_insn (gen_rtx_SET (VOIDmode, dest,
2770 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2771 gen_rtx_fmt_ee (compare_code,
2772 GET_MODE (x), x, y),
2773 const1_rtx, dest)));
2779 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2780 without jumps using the addx/subx instructions. */
2783 emit_scc_insn (rtx operands[])
2790 /* The quad-word fp compare library routines all return nonzero to indicate
2791 true, which is different from the equivalent libgcc routines, so we must
2792 handle them specially here. */
2793 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2795 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2796 GET_CODE (operands[1]));
2797 operands[2] = XEXP (operands[1], 0);
2798 operands[3] = XEXP (operands[1], 1);
2801 code = GET_CODE (operands[1]);
2805 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2806 more applications). The exception to this is "reg != 0" which can
2807 be done in one instruction on v9 (so we do it). */
2810 if (GET_MODE (x) == SImode)
2814 pat = gen_seqsidi_special (operands[0], x, y);
2816 pat = gen_seqsisi_special (operands[0], x, y);
2820 else if (GET_MODE (x) == DImode)
2822 rtx pat = gen_seqdi_special (operands[0], x, y);
2830 if (GET_MODE (x) == SImode)
2834 pat = gen_snesidi_special (operands[0], x, y);
2836 pat = gen_snesisi_special (operands[0], x, y);
2840 else if (GET_MODE (x) == DImode)
2844 pat = gen_snedi_special_vis3 (operands[0], x, y);
2846 pat = gen_snedi_special (operands[0], x, y);
2854 && GET_MODE (x) == DImode
2856 && (code == GTU || code == LTU))
2857 && gen_v9_scc (operands[0], code, x, y))
2860 /* We can do LTU and GEU using the addx/subx instructions too. And
2861 for GTU/LEU, if both operands are registers swap them and fall
2862 back to the easy case. */
2863 if (code == GTU || code == LEU)
2865 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2866 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2871 code = swap_condition (code);
2876 || (!TARGET_VIS3 && code == GEU))
2878 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2879 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2880 gen_compare_reg_1 (code, x, y),
2885 /* All the posibilities to use addx/subx based sequences has been
2886 exhausted, try for a 3 instruction sequence using v9 conditional
2888 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
2891 /* Nope, do branches. */
2895 /* Emit a conditional jump insn for the v9 architecture using comparison code
2896 CODE and jump target LABEL.
2897 This function exists to take advantage of the v9 brxx insns. */
2900 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2902 emit_jump_insn (gen_rtx_SET (VOIDmode,
2904 gen_rtx_IF_THEN_ELSE (VOIDmode,
2905 gen_rtx_fmt_ee (code, GET_MODE (op0),
2907 gen_rtx_LABEL_REF (VOIDmode, label),
2911 /* Emit a conditional jump insn for the UA2011 architecture using
2912 comparison code CODE and jump target LABEL. This function exists
2913 to take advantage of the UA2011 Compare and Branch insns. */
2916 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
2920 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
2921 gen_rtx_fmt_ee(code, GET_MODE(op0),
2923 gen_rtx_LABEL_REF (VOIDmode, label),
2926 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
2930 emit_conditional_branch_insn (rtx operands[])
2932 /* The quad-word fp compare library routines all return nonzero to indicate
2933 true, which is different from the equivalent libgcc routines, so we must
2934 handle them specially here. */
2935 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
2937 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
2938 GET_CODE (operands[0]));
2939 operands[1] = XEXP (operands[0], 0);
2940 operands[2] = XEXP (operands[0], 1);
2943 /* If we can tell early on that the comparison is against a constant
2944 that won't fit in the 5-bit signed immediate field of a cbcond,
2945 use one of the other v9 conditional branch sequences. */
2947 && GET_CODE (operands[1]) == REG
2948 && (GET_MODE (operands[1]) == SImode
2949 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
2950 && (GET_CODE (operands[2]) != CONST_INT
2951 || SPARC_SIMM5_P (INTVAL (operands[2]))))
2953 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
2957 if (TARGET_ARCH64 && operands[2] == const0_rtx
2958 && GET_CODE (operands[1]) == REG
2959 && GET_MODE (operands[1]) == DImode)
2961 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
2965 operands[1] = gen_compare_reg (operands[0]);
2966 operands[2] = const0_rtx;
2967 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
2968 operands[1], operands[2]);
2969 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
2974 /* Generate a DFmode part of a hard TFmode register.
2975 REG is the TFmode hard register, LOW is 1 for the
2976 low 64bit of the register and 0 otherwise.
2979 gen_df_reg (rtx reg, int low)
2981 int regno = REGNO (reg);
2983 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
2984 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
2985 return gen_rtx_REG (DFmode, regno);
2988 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
2989 Unlike normal calls, TFmode operands are passed by reference. It is
2990 assumed that no more than 3 operands are required. */
2993 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
2995 rtx ret_slot = NULL, arg[3], func_sym;
2998 /* We only expect to be called for conversions, unary, and binary ops. */
2999 gcc_assert (nargs == 2 || nargs == 3);
3001 for (i = 0; i < nargs; ++i)
3003 rtx this_arg = operands[i];
3006 /* TFmode arguments and return values are passed by reference. */
3007 if (GET_MODE (this_arg) == TFmode)
3009 int force_stack_temp;
3011 force_stack_temp = 0;
3012 if (TARGET_BUGGY_QP_LIB && i == 0)
3013 force_stack_temp = 1;
3015 if (GET_CODE (this_arg) == MEM
3016 && ! force_stack_temp)
3018 tree expr = MEM_EXPR (this_arg);
3020 mark_addressable (expr);
3021 this_arg = XEXP (this_arg, 0);
3023 else if (CONSTANT_P (this_arg)
3024 && ! force_stack_temp)
3026 this_slot = force_const_mem (TFmode, this_arg);
3027 this_arg = XEXP (this_slot, 0);
3031 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3033 /* Operand 0 is the return value. We'll copy it out later. */
3035 emit_move_insn (this_slot, this_arg);
3037 ret_slot = this_slot;
3039 this_arg = XEXP (this_slot, 0);
3046 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3048 if (GET_MODE (operands[0]) == TFmode)
3051 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3052 arg[0], GET_MODE (arg[0]),
3053 arg[1], GET_MODE (arg[1]));
3055 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3056 arg[0], GET_MODE (arg[0]),
3057 arg[1], GET_MODE (arg[1]),
3058 arg[2], GET_MODE (arg[2]));
3061 emit_move_insn (operands[0], ret_slot);
3067 gcc_assert (nargs == 2);
3069 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3070 GET_MODE (operands[0]), 1,
3071 arg[1], GET_MODE (arg[1]));
3073 if (ret != operands[0])
3074 emit_move_insn (operands[0], ret);
3078 /* Expand soft-float TFmode calls to sparc abi routines. */
3081 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3103 emit_soft_tfmode_libcall (func, 3, operands);
3107 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3111 gcc_assert (code == SQRT);
3114 emit_soft_tfmode_libcall (func, 2, operands);
3118 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3125 switch (GET_MODE (operands[1]))
3138 case FLOAT_TRUNCATE:
3139 switch (GET_MODE (operands[0]))
3153 switch (GET_MODE (operands[1]))
3158 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3168 case UNSIGNED_FLOAT:
3169 switch (GET_MODE (operands[1]))
3174 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3185 switch (GET_MODE (operands[0]))
3199 switch (GET_MODE (operands[0]))
3216 emit_soft_tfmode_libcall (func, 2, operands);
3219 /* Expand a hard-float tfmode operation. All arguments must be in
3223 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3227 if (GET_RTX_CLASS (code) == RTX_UNARY)
3229 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3230 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3234 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3235 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3236 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3237 operands[1], operands[2]);
3240 if (register_operand (operands[0], VOIDmode))
3243 dest = gen_reg_rtx (GET_MODE (operands[0]));
3245 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
3247 if (dest != operands[0])
3248 emit_move_insn (operands[0], dest);
3252 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3254 if (TARGET_HARD_QUAD)
3255 emit_hard_tfmode_operation (code, operands);
3257 emit_soft_tfmode_binop (code, operands);
3261 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3263 if (TARGET_HARD_QUAD)
3264 emit_hard_tfmode_operation (code, operands);
3266 emit_soft_tfmode_unop (code, operands);
3270 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3272 if (TARGET_HARD_QUAD)
3273 emit_hard_tfmode_operation (code, operands);
3275 emit_soft_tfmode_cvt (code, operands);
3278 /* Return nonzero if a branch/jump/call instruction will be emitting
3279 nop into its delay slot. */
3282 empty_delay_slot (rtx insn)
3286 /* If no previous instruction (should not happen), return true. */
3287 if (PREV_INSN (insn) == NULL)
3290 seq = NEXT_INSN (PREV_INSN (insn));
3291 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3297 /* Return nonzero if we should emit a nop after a cbcond instruction.
3298 The cbcond instruction does not have a delay slot, however there is
3299 a severe performance penalty if a control transfer appears right
3300 after a cbcond. Therefore we emit a nop when we detect this
3304 emit_cbcond_nop (rtx insn)
3306 rtx next = next_active_insn (insn);
3311 if (NONJUMP_INSN_P (next)
3312 && GET_CODE (PATTERN (next)) == SEQUENCE)
3313 next = XVECEXP (PATTERN (next), 0, 0);
3314 else if (CALL_P (next)
3315 && GET_CODE (PATTERN (next)) == PARALLEL)
3317 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3319 if (GET_CODE (delay) == RETURN)
3321 /* It's a sibling call. Do not emit the nop if we're going
3322 to emit something other than the jump itself as the first
3323 instruction of the sibcall sequence. */
3324 if (sparc_leaf_function_p || TARGET_FLAT)
3329 if (NONJUMP_INSN_P (next))
3335 /* Return nonzero if TRIAL can go into the call delay slot. */
3338 tls_call_delay (rtx trial)
3343 call __tls_get_addr, %tgd_call (foo)
3344 add %l7, %o0, %o0, %tgd_add (foo)
3345 while Sun as/ld does not. */
3346 if (TARGET_GNU_TLS || !TARGET_TLS)
3349 pat = PATTERN (trial);
3351 /* We must reject tgd_add{32|64}, i.e.
3352 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3353 and tldm_add{32|64}, i.e.
3354 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3356 if (GET_CODE (pat) == SET
3357 && GET_CODE (SET_SRC (pat)) == PLUS)
3359 rtx unspec = XEXP (SET_SRC (pat), 1);
3361 if (GET_CODE (unspec) == UNSPEC
3362 && (XINT (unspec, 1) == UNSPEC_TLSGD
3363 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3370 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3371 instruction. RETURN_P is true if the v9 variant 'return' is to be
3372 considered in the test too.
3374 TRIAL must be a SET whose destination is a REG appropriate for the
3375 'restore' instruction or, if RETURN_P is true, for the 'return'
3379 eligible_for_restore_insn (rtx trial, bool return_p)
3381 rtx pat = PATTERN (trial);
3382 rtx src = SET_SRC (pat);
3383 bool src_is_freg = false;
3386 /* Since we now can do moves between float and integer registers when
3387 VIS3 is enabled, we have to catch this case. We can allow such
3388 moves when doing a 'return' however. */
3390 if (GET_CODE (src_reg) == SUBREG)
3391 src_reg = SUBREG_REG (src_reg);
3392 if (GET_CODE (src_reg) == REG
3393 && SPARC_FP_REG_P (REGNO (src_reg)))
3396 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3397 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3398 && arith_operand (src, GET_MODE (src))
3402 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3404 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3407 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3408 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3409 && arith_double_operand (src, GET_MODE (src))
3411 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3413 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3414 else if (! TARGET_FPU && register_operand (src, SFmode))
3417 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3418 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3421 /* If we have the 'return' instruction, anything that does not use
3422 local or output registers and can go into a delay slot wins. */
3425 && !epilogue_renumber (&pat, 1)
3426 && get_attr_in_uncond_branch_delay (trial)
3427 == IN_UNCOND_BRANCH_DELAY_TRUE)
3430 /* The 'restore src1,src2,dest' pattern for SImode. */
3431 else if (GET_CODE (src) == PLUS
3432 && register_operand (XEXP (src, 0), SImode)
3433 && arith_operand (XEXP (src, 1), SImode))
3436 /* The 'restore src1,src2,dest' pattern for DImode. */
3437 else if (GET_CODE (src) == PLUS
3438 && register_operand (XEXP (src, 0), DImode)
3439 && arith_double_operand (XEXP (src, 1), DImode))
3442 /* The 'restore src1,%lo(src2),dest' pattern. */
3443 else if (GET_CODE (src) == LO_SUM
3444 && ! TARGET_CM_MEDMID
3445 && ((register_operand (XEXP (src, 0), SImode)
3446 && immediate_operand (XEXP (src, 1), SImode))
3448 && register_operand (XEXP (src, 0), DImode)
3449 && immediate_operand (XEXP (src, 1), DImode))))
3452 /* The 'restore src,src,dest' pattern. */
3453 else if (GET_CODE (src) == ASHIFT
3454 && (register_operand (XEXP (src, 0), SImode)
3455 || register_operand (XEXP (src, 0), DImode))
3456 && XEXP (src, 1) == const1_rtx)
3462 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3465 eligible_for_return_delay (rtx trial)
3470 if (! NONJUMP_INSN_P (trial))
3473 if (get_attr_length (trial) != 1)
3476 /* If the function uses __builtin_eh_return, the eh_return machinery
3477 occupies the delay slot. */
3478 if (crtl->calls_eh_return)
3481 /* In the case of a leaf or flat function, anything can go into the slot. */
3482 if (sparc_leaf_function_p || TARGET_FLAT)
3484 get_attr_in_uncond_branch_delay (trial) == IN_UNCOND_BRANCH_DELAY_TRUE;
3486 pat = PATTERN (trial);
3487 if (GET_CODE (pat) == PARALLEL)
3493 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3495 rtx expr = XVECEXP (pat, 0, i);
3496 if (GET_CODE (expr) != SET)
3498 if (GET_CODE (SET_DEST (expr)) != REG)
3500 regno = REGNO (SET_DEST (expr));
3501 if (regno >= 8 && regno < 24)
3504 return !epilogue_renumber (&pat, 1)
3505 && (get_attr_in_uncond_branch_delay (trial)
3506 == IN_UNCOND_BRANCH_DELAY_TRUE);
3509 if (GET_CODE (pat) != SET)
3512 if (GET_CODE (SET_DEST (pat)) != REG)
3515 regno = REGNO (SET_DEST (pat));
3517 /* Otherwise, only operations which can be done in tandem with
3518 a `restore' or `return' insn can go into the delay slot. */
3519 if (regno >= 8 && regno < 24)
3522 /* If this instruction sets up floating point register and we have a return
3523 instruction, it can probably go in. But restore will not work
3525 if (! SPARC_INT_REG_P (regno))
3527 && !epilogue_renumber (&pat, 1)
3528 && get_attr_in_uncond_branch_delay (trial)
3529 == IN_UNCOND_BRANCH_DELAY_TRUE);
3531 return eligible_for_restore_insn (trial, true);
3534 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3537 eligible_for_sibcall_delay (rtx trial)
3541 if (! NONJUMP_INSN_P (trial) || GET_CODE (PATTERN (trial)) != SET)
3544 if (get_attr_length (trial) != 1)
3547 pat = PATTERN (trial);
3549 if (sparc_leaf_function_p || TARGET_FLAT)
3551 /* If the tail call is done using the call instruction,
3552 we have to restore %o7 in the delay slot. */
3553 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3556 /* %g1 is used to build the function address */
3557 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3563 /* Otherwise, only operations which can be done in tandem with
3564 a `restore' insn can go into the delay slot. */
3565 if (GET_CODE (SET_DEST (pat)) != REG
3566 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3567 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3570 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3572 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3575 return eligible_for_restore_insn (trial, false);
3578 /* Determine if it's legal to put X into the constant pool. This
3579 is not possible if X contains the address of a symbol that is
3580 not constant (TLS) or not known at final link time (PIC). */
3583 sparc_cannot_force_const_mem (enum machine_mode mode, rtx x)
3585 switch (GET_CODE (x))
3590 /* Accept all non-symbolic constants. */
3594 /* Labels are OK iff we are non-PIC. */
3595 return flag_pic != 0;
3598 /* 'Naked' TLS symbol references are never OK,
3599 non-TLS symbols are OK iff we are non-PIC. */
3600 if (SYMBOL_REF_TLS_MODEL (x))
3603 return flag_pic != 0;
3606 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3609 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3610 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3618 /* Global Offset Table support. */
3619 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3620 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3622 /* Return the SYMBOL_REF for the Global Offset Table. */
3624 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3629 if (!sparc_got_symbol)
3630 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3632 return sparc_got_symbol;
3635 /* Ensure that we are not using patterns that are not OK with PIC. */
3645 op = recog_data.operand[i];
3646 gcc_assert (GET_CODE (op) != SYMBOL_REF
3647 && (GET_CODE (op) != CONST
3648 || (GET_CODE (XEXP (op, 0)) == MINUS
3649 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3650 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3657 /* Return true if X is an address which needs a temporary register when
3658 reloaded while generating PIC code. */
3661 pic_address_needs_scratch (rtx x)
3663 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3664 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3665 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3666 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3667 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3673 /* Determine if a given RTX is a valid constant. We already know this
3674 satisfies CONSTANT_P. */
3677 sparc_legitimate_constant_p (enum machine_mode mode, rtx x)
3679 switch (GET_CODE (x))
3683 if (sparc_tls_referenced_p (x))
3688 if (GET_MODE (x) == VOIDmode)
3691 /* Floating point constants are generally not ok.
3692 The only exception is 0.0 and all-ones in VIS. */
3694 && SCALAR_FLOAT_MODE_P (mode)
3695 && (const_zero_operand (x, mode)
3696 || const_all_ones_operand (x, mode)))
3702 /* Vector constants are generally not ok.
3703 The only exception is 0 or -1 in VIS. */
3705 && (const_zero_operand (x, mode)
3706 || const_all_ones_operand (x, mode)))
3718 /* Determine if a given RTX is a valid constant address. */
3721 constant_address_p (rtx x)
3723 switch (GET_CODE (x))
3731 if (flag_pic && pic_address_needs_scratch (x))
3733 return sparc_legitimate_constant_p (Pmode, x);
3736 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3743 /* Nonzero if the constant value X is a legitimate general operand
3744 when generating PIC code. It is given that flag_pic is on and
3745 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3748 legitimate_pic_operand_p (rtx x)
3750 if (pic_address_needs_scratch (x))
3752 if (sparc_tls_referenced_p (x))
3757 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3759 && INTVAL (X) >= -0x1000 \
3760 && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE)))
3762 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3764 && INTVAL (X) >= -0x1000 \
3765 && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE)))
3767 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3769 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3770 ordinarily. This changes a bit when generating PIC. */
3773 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3775 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3777 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3779 else if (GET_CODE (addr) == PLUS)
3781 rs1 = XEXP (addr, 0);
3782 rs2 = XEXP (addr, 1);
3784 /* Canonicalize. REG comes first, if there are no regs,
3785 LO_SUM comes first. */
3787 && GET_CODE (rs1) != SUBREG
3789 || GET_CODE (rs2) == SUBREG
3790 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3792 rs1 = XEXP (addr, 1);
3793 rs2 = XEXP (addr, 0);
3797 && rs1 == pic_offset_table_rtx
3799 && GET_CODE (rs2) != SUBREG
3800 && GET_CODE (rs2) != LO_SUM
3801 && GET_CODE (rs2) != MEM
3802 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3803 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3804 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3806 || GET_CODE (rs1) == SUBREG)
3807 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3812 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3813 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3815 /* We prohibit REG + REG for TFmode when there are no quad move insns
3816 and we consequently need to split. We do this because REG+REG
3817 is not an offsettable address. If we get the situation in reload
3818 where source and destination of a movtf pattern are both MEMs with
3819 REG+REG address, then only one of them gets converted to an
3820 offsettable address. */
3822 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3825 /* Likewise for TImode, but in all cases. */
3829 /* We prohibit REG + REG on ARCH32 if not optimizing for
3830 DFmode/DImode because then mem_min_alignment is likely to be zero
3831 after reload and the forced split would lack a matching splitter
3833 if (TARGET_ARCH32 && !optimize
3834 && (mode == DFmode || mode == DImode))
3837 else if (USE_AS_OFFSETABLE_LO10
3838 && GET_CODE (rs1) == LO_SUM
3840 && ! TARGET_CM_MEDMID
3841 && RTX_OK_FOR_OLO10_P (rs2, mode))
3844 imm1 = XEXP (rs1, 1);
3845 rs1 = XEXP (rs1, 0);
3846 if (!CONSTANT_P (imm1)
3847 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3851 else if (GET_CODE (addr) == LO_SUM)
3853 rs1 = XEXP (addr, 0);
3854 imm1 = XEXP (addr, 1);
3856 if (!CONSTANT_P (imm1)
3857 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3860 /* We can't allow TFmode in 32-bit mode, because an offset greater
3861 than the alignment (8) may cause the LO_SUM to overflow. */
3862 if (mode == TFmode && TARGET_ARCH32)
3865 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3870 if (GET_CODE (rs1) == SUBREG)
3871 rs1 = SUBREG_REG (rs1);
3877 if (GET_CODE (rs2) == SUBREG)
3878 rs2 = SUBREG_REG (rs2);
3885 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3886 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3891 if ((! SPARC_INT_REG_P (REGNO (rs1))
3892 && REGNO (rs1) != FRAME_POINTER_REGNUM
3893 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3895 && (! SPARC_INT_REG_P (REGNO (rs2))
3896 && REGNO (rs2) != FRAME_POINTER_REGNUM
3897 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3903 /* Return the SYMBOL_REF for the tls_get_addr function. */
3905 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
3908 sparc_tls_get_addr (void)
3910 if (!sparc_tls_symbol)
3911 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
3913 return sparc_tls_symbol;
3916 /* Return the Global Offset Table to be used in TLS mode. */
3919 sparc_tls_got (void)
3921 /* In PIC mode, this is just the PIC offset table. */
3924 crtl->uses_pic_offset_table = 1;
3925 return pic_offset_table_rtx;
3928 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
3929 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
3930 if (TARGET_SUN_TLS && TARGET_ARCH32)
3932 load_got_register ();
3933 return global_offset_table_rtx;
3936 /* In all other cases, we load a new pseudo with the GOT symbol. */
3937 return copy_to_reg (sparc_got ());
3940 /* Return true if X contains a thread-local symbol. */
3943 sparc_tls_referenced_p (rtx x)
3945 if (!TARGET_HAVE_TLS)
3948 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3949 x = XEXP (XEXP (x, 0), 0);
3951 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
3954 /* That's all we handle in sparc_legitimize_tls_address for now. */
3958 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3959 this (thread-local) address. */
3962 sparc_legitimize_tls_address (rtx addr)
3964 rtx temp1, temp2, temp3, ret, o0, got, insn;
3966 gcc_assert (can_create_pseudo_p ());
3968 if (GET_CODE (addr) == SYMBOL_REF)
3969 switch (SYMBOL_REF_TLS_MODEL (addr))
3971 case TLS_MODEL_GLOBAL_DYNAMIC:
3973 temp1 = gen_reg_rtx (SImode);
3974 temp2 = gen_reg_rtx (SImode);
3975 ret = gen_reg_rtx (Pmode);
3976 o0 = gen_rtx_REG (Pmode, 8);
3977 got = sparc_tls_got ();
3978 emit_insn (gen_tgd_hi22 (temp1, addr));
3979 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
3982 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
3983 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
3988 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
3989 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
3992 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
3993 insn = get_insns ();
3995 emit_libcall_block (insn, ret, o0, addr);
3998 case TLS_MODEL_LOCAL_DYNAMIC:
4000 temp1 = gen_reg_rtx (SImode);
4001 temp2 = gen_reg_rtx (SImode);
4002 temp3 = gen_reg_rtx (Pmode);
4003 ret = gen_reg_rtx (Pmode);
4004 o0 = gen_rtx_REG (Pmode, 8);
4005 got = sparc_tls_got ();
4006 emit_insn (gen_tldm_hi22 (temp1));
4007 emit_insn (gen_tldm_lo10 (temp2, temp1));
4010 emit_insn (gen_tldm_add32 (o0, got, temp2));
4011 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4016 emit_insn (gen_tldm_add64 (o0, got, temp2));
4017 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4020 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4021 insn = get_insns ();
4023 emit_libcall_block (insn, temp3, o0,
4024 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4025 UNSPEC_TLSLD_BASE));
4026 temp1 = gen_reg_rtx (SImode);
4027 temp2 = gen_reg_rtx (SImode);
4028 emit_insn (gen_tldo_hix22 (temp1, addr));
4029 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4031 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4033 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4036 case TLS_MODEL_INITIAL_EXEC:
4037 temp1 = gen_reg_rtx (SImode);
4038 temp2 = gen_reg_rtx (SImode);
4039 temp3 = gen_reg_rtx (Pmode);
4040 got = sparc_tls_got ();
4041 emit_insn (gen_tie_hi22 (temp1, addr));
4042 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4044 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4046 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4049 ret = gen_reg_rtx (Pmode);
4051 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4054 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4058 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4061 case TLS_MODEL_LOCAL_EXEC:
4062 temp1 = gen_reg_rtx (Pmode);
4063 temp2 = gen_reg_rtx (Pmode);
4066 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4067 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4071 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4072 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4074 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4081 else if (GET_CODE (addr) == CONST)
4085 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4087 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4088 offset = XEXP (XEXP (addr, 0), 1);
4090 base = force_operand (base, NULL_RTX);
4091 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4092 offset = force_reg (Pmode, offset);
4093 ret = gen_rtx_PLUS (Pmode, base, offset);
4097 gcc_unreachable (); /* for now ... */
4102 /* Legitimize PIC addresses. If the address is already position-independent,
4103 we return ORIG. Newly generated position-independent addresses go into a
4104 reg. This is REG if nonzero, otherwise we allocate register(s) as
4108 sparc_legitimize_pic_address (rtx orig, rtx reg)
4110 bool gotdata_op = false;
4112 if (GET_CODE (orig) == SYMBOL_REF
4113 /* See the comment in sparc_expand_move. */
4114 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4116 rtx pic_ref, address;
4121 gcc_assert (can_create_pseudo_p ());
4122 reg = gen_reg_rtx (Pmode);
4127 /* If not during reload, allocate another temp reg here for loading
4128 in the address, so that these instructions can be optimized
4130 rtx temp_reg = (! can_create_pseudo_p ()
4131 ? reg : gen_reg_rtx (Pmode));
4133 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4134 won't get confused into thinking that these two instructions
4135 are loading in the true address of the symbol. If in the
4136 future a PIC rtx exists, that should be used instead. */
4139 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4140 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4144 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4145 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4153 crtl->uses_pic_offset_table = 1;
4157 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4158 pic_offset_table_rtx,
4161 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4162 pic_offset_table_rtx,
4168 = gen_const_mem (Pmode,
4169 gen_rtx_PLUS (Pmode,
4170 pic_offset_table_rtx, address));
4171 insn = emit_move_insn (reg, pic_ref);
4174 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4176 set_unique_reg_note (insn, REG_EQUAL, orig);
4179 else if (GET_CODE (orig) == CONST)
4183 if (GET_CODE (XEXP (orig, 0)) == PLUS
4184 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4189 gcc_assert (can_create_pseudo_p ());
4190 reg = gen_reg_rtx (Pmode);
4193 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4194 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4195 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4196 base == reg ? NULL_RTX : reg);
4198 if (GET_CODE (offset) == CONST_INT)
4200 if (SMALL_INT (offset))
4201 return plus_constant (Pmode, base, INTVAL (offset));
4202 else if (can_create_pseudo_p ())
4203 offset = force_reg (Pmode, offset);
4205 /* If we reach here, then something is seriously wrong. */
4208 return gen_rtx_PLUS (Pmode, base, offset);
4210 else if (GET_CODE (orig) == LABEL_REF)
4211 /* ??? We ought to be checking that the register is live instead, in case
4212 it is eliminated. */
4213 crtl->uses_pic_offset_table = 1;
4218 /* Try machine-dependent ways of modifying an illegitimate address X
4219 to be legitimate. If we find one, return the new, valid address.
4221 OLDX is the address as it was before break_out_memory_refs was called.
4222 In some cases it is useful to look at this to decide what needs to be done.
4224 MODE is the mode of the operand pointed to by X.
4226 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4229 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4230 enum machine_mode mode)
4234 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4235 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4236 force_operand (XEXP (x, 0), NULL_RTX));
4237 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4238 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4239 force_operand (XEXP (x, 1), NULL_RTX));
4240 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4241 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4243 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4244 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4245 force_operand (XEXP (x, 1), NULL_RTX));
4247 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4250 if (sparc_tls_referenced_p (x))
4251 x = sparc_legitimize_tls_address (x);
4253 x = sparc_legitimize_pic_address (x, NULL_RTX);
4254 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4255 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4256 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4257 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4258 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4259 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4260 else if (GET_CODE (x) == SYMBOL_REF
4261 || GET_CODE (x) == CONST
4262 || GET_CODE (x) == LABEL_REF)
4263 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4268 /* Delegitimize an address that was legitimized by the above function. */
4271 sparc_delegitimize_address (rtx x)
4273 x = delegitimize_mem_from_attrs (x);
4275 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4276 switch (XINT (XEXP (x, 1), 1))
4278 case UNSPEC_MOVE_PIC:
4280 x = XVECEXP (XEXP (x, 1), 0, 0);
4281 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4287 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4288 if (GET_CODE (x) == MINUS
4289 && REG_P (XEXP (x, 0))
4290 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4291 && GET_CODE (XEXP (x, 1)) == LO_SUM
4292 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4293 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4295 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4296 gcc_assert (GET_CODE (x) == LABEL_REF);
4302 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4303 replace the input X, or the original X if no replacement is called for.
4304 The output parameter *WIN is 1 if the calling macro should goto WIN,
4307 For SPARC, we wish to handle addresses by splitting them into
4308 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4309 This cuts the number of extra insns by one.
4311 Do nothing when generating PIC code and the address is a symbolic
4312 operand or requires a scratch register. */
4315 sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
4316 int opnum, int type,
4317 int ind_levels ATTRIBUTE_UNUSED, int *win)
4319 /* Decompose SImode constants into HIGH+LO_SUM. */
4321 && (mode != TFmode || TARGET_ARCH64)
4322 && GET_MODE (x) == SImode
4323 && GET_CODE (x) != LO_SUM
4324 && GET_CODE (x) != HIGH
4325 && sparc_cmodel <= CM_MEDLOW
4327 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4329 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4330 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4331 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4332 opnum, (enum reload_type)type);
4337 /* We have to recognize what we have already generated above. */
4338 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4340 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4341 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4342 opnum, (enum reload_type)type);
4351 /* Return true if ADDR (a legitimate address expression)
4352 has an effect that depends on the machine mode it is used for.
4358 is not equivalent to
4360 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4362 because [%l7+a+1] is interpreted as the address of (a+1). */
4366 sparc_mode_dependent_address_p (const_rtx addr,
4367 addr_space_t as ATTRIBUTE_UNUSED)
4369 if (flag_pic && GET_CODE (addr) == PLUS)
4371 rtx op0 = XEXP (addr, 0);
4372 rtx op1 = XEXP (addr, 1);
4373 if (op0 == pic_offset_table_rtx
4374 && symbolic_operand (op1, VOIDmode))
4381 #ifdef HAVE_GAS_HIDDEN
4382 # define USE_HIDDEN_LINKONCE 1
4384 # define USE_HIDDEN_LINKONCE 0
4388 get_pc_thunk_name (char name[32], unsigned int regno)
4390 const char *reg_name = reg_names[regno];
4392 /* Skip the leading '%' as that cannot be used in a
4396 if (USE_HIDDEN_LINKONCE)
4397 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4399 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4402 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4405 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4407 int orig_flag_pic = flag_pic;
4410 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4413 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4415 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4416 flag_pic = orig_flag_pic;
4421 /* Emit code to load the GOT register. */
4424 load_got_register (void)
4426 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4427 if (!global_offset_table_rtx)
4428 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4430 if (TARGET_VXWORKS_RTP)
4431 emit_insn (gen_vxworks_load_got ());
4434 /* The GOT symbol is subject to a PC-relative relocation so we need a
4435 helper function to add the PC value and thus get the final value. */
4436 if (!got_helper_rtx)
4439 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4440 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4443 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4445 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4448 /* Need to emit this whether or not we obey regdecls,
4449 since setjmp/longjmp can cause life info to screw up.
4450 ??? In the case where we don't obey regdecls, this is not sufficient
4451 since we may not fall out the bottom. */
4452 emit_use (global_offset_table_rtx);
4455 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4456 address of the call target. */
4459 sparc_emit_call_insn (rtx pat, rtx addr)
4463 insn = emit_call_insn (pat);
4465 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4466 if (TARGET_VXWORKS_RTP
4468 && GET_CODE (addr) == SYMBOL_REF
4469 && (SYMBOL_REF_DECL (addr)
4470 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4471 : !SYMBOL_REF_LOCAL_P (addr)))
4473 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4474 crtl->uses_pic_offset_table = 1;
4478 /* Return 1 if RTX is a MEM which is known to be aligned to at
4479 least a DESIRED byte boundary. */
4482 mem_min_alignment (rtx mem, int desired)
4484 rtx addr, base, offset;
4486 /* If it's not a MEM we can't accept it. */
4487 if (GET_CODE (mem) != MEM)
4491 if (!TARGET_UNALIGNED_DOUBLES
4492 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4495 /* ??? The rest of the function predates MEM_ALIGN so
4496 there is probably a bit of redundancy. */
4497 addr = XEXP (mem, 0);
4498 base = offset = NULL_RTX;
4499 if (GET_CODE (addr) == PLUS)
4501 if (GET_CODE (XEXP (addr, 0)) == REG)
4503 base = XEXP (addr, 0);
4505 /* What we are saying here is that if the base
4506 REG is aligned properly, the compiler will make
4507 sure any REG based index upon it will be so
4509 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4510 offset = XEXP (addr, 1);
4512 offset = const0_rtx;
4515 else if (GET_CODE (addr) == REG)
4518 offset = const0_rtx;
4521 if (base != NULL_RTX)
4523 int regno = REGNO (base);
4525 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4527 /* Check if the compiler has recorded some information
4528 about the alignment of the base REG. If reload has
4529 completed, we already matched with proper alignments.
4530 If not running global_alloc, reload might give us
4531 unaligned pointer to local stack though. */
4533 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4534 || (optimize && reload_completed))
4535 && (INTVAL (offset) & (desired - 1)) == 0)
4540 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4544 else if (! TARGET_UNALIGNED_DOUBLES
4545 || CONSTANT_P (addr)
4546 || GET_CODE (addr) == LO_SUM)
4548 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4549 is true, in which case we can only assume that an access is aligned if
4550 it is to a constant address, or the address involves a LO_SUM. */
4554 /* An obviously unaligned address. */
4559 /* Vectors to keep interesting information about registers where it can easily
4560 be got. We used to use the actual mode value as the bit number, but there
4561 are more than 32 modes now. Instead we use two tables: one indexed by
4562 hard register number, and one indexed by mode. */
4564 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4565 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4566 mapped into one sparc_mode_class mode. */
4568 enum sparc_mode_class {
4569 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4570 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4574 /* Modes for single-word and smaller quantities. */
4576 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4578 /* Modes for double-word and smaller quantities. */
4579 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4581 /* Modes for quad-word and smaller quantities. */
4582 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4584 /* Modes for 8-word and smaller quantities. */
4585 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4587 /* Modes for single-float quantities. */
4588 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4590 /* Modes for double-float and smaller quantities. */
4591 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4593 /* Modes for quad-float and smaller quantities. */
4594 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4596 /* Modes for quad-float pairs and smaller quantities. */
4597 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4599 /* Modes for double-float only quantities. */
4600 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4602 /* Modes for quad-float and double-float only quantities. */
4603 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4605 /* Modes for quad-float pairs and double-float only quantities. */
4606 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4608 /* Modes for condition codes. */
4609 #define CC_MODES (1 << (int) CC_MODE)
4610 #define CCFP_MODES (1 << (int) CCFP_MODE)
4612 /* Value is 1 if register/mode pair is acceptable on sparc.
4613 The funny mixture of D and T modes is because integer operations
4614 do not specially operate on tetra quantities, so non-quad-aligned
4615 registers can hold quadword quantities (except %o4 and %i4 because
4616 they cross fixed registers). */
4618 /* This points to either the 32 bit or the 64 bit version. */
4619 const int *hard_regno_mode_classes;
4621 static const int hard_32bit_mode_classes[] = {
4622 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4623 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4624 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4625 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4627 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4628 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4629 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4630 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4632 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4633 and none can hold SFmode/SImode values. */
4634 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4635 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4636 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4637 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4640 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4642 /* %icc, %sfp, %gsr */
4643 CC_MODES, 0, D_MODES
4646 static const int hard_64bit_mode_classes[] = {
4647 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4648 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4649 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4650 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4652 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4653 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4654 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4655 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4657 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4658 and none can hold SFmode/SImode values. */
4659 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4660 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4661 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4662 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4665 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4667 /* %icc, %sfp, %gsr */
4668 CC_MODES, 0, D_MODES
4671 int sparc_mode_class [NUM_MACHINE_MODES];
4673 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4676 sparc_init_modes (void)
4680 for (i = 0; i < NUM_MACHINE_MODES; i++)
4682 switch (GET_MODE_CLASS (i))
4685 case MODE_PARTIAL_INT:
4686 case MODE_COMPLEX_INT:
4687 if (GET_MODE_SIZE (i) < 4)
4688 sparc_mode_class[i] = 1 << (int) H_MODE;
4689 else if (GET_MODE_SIZE (i) == 4)
4690 sparc_mode_class[i] = 1 << (int) S_MODE;
4691 else if (GET_MODE_SIZE (i) == 8)
4692 sparc_mode_class[i] = 1 << (int) D_MODE;
4693 else if (GET_MODE_SIZE (i) == 16)
4694 sparc_mode_class[i] = 1 << (int) T_MODE;
4695 else if (GET_MODE_SIZE (i) == 32)
4696 sparc_mode_class[i] = 1 << (int) O_MODE;
4698 sparc_mode_class[i] = 0;
4700 case MODE_VECTOR_INT:
4701 if (GET_MODE_SIZE (i) == 4)
4702 sparc_mode_class[i] = 1 << (int) SF_MODE;
4703 else if (GET_MODE_SIZE (i) == 8)
4704 sparc_mode_class[i] = 1 << (int) DF_MODE;
4706 sparc_mode_class[i] = 0;
4709 case MODE_COMPLEX_FLOAT:
4710 if (GET_MODE_SIZE (i) == 4)
4711 sparc_mode_class[i] = 1 << (int) SF_MODE;
4712 else if (GET_MODE_SIZE (i) == 8)
4713 sparc_mode_class[i] = 1 << (int) DF_MODE;
4714 else if (GET_MODE_SIZE (i) == 16)
4715 sparc_mode_class[i] = 1 << (int) TF_MODE;
4716 else if (GET_MODE_SIZE (i) == 32)
4717 sparc_mode_class[i] = 1 << (int) OF_MODE;
4719 sparc_mode_class[i] = 0;
4722 if (i == (int) CCFPmode || i == (int) CCFPEmode)
4723 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4725 sparc_mode_class[i] = 1 << (int) CC_MODE;
4728 sparc_mode_class[i] = 0;
4734 hard_regno_mode_classes = hard_64bit_mode_classes;
4736 hard_regno_mode_classes = hard_32bit_mode_classes;
4738 /* Initialize the array used by REGNO_REG_CLASS. */
4739 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4741 if (i < 16 && TARGET_V8PLUS)
4742 sparc_regno_reg_class[i] = I64_REGS;
4743 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4744 sparc_regno_reg_class[i] = GENERAL_REGS;
4746 sparc_regno_reg_class[i] = FP_REGS;
4748 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4750 sparc_regno_reg_class[i] = FPCC_REGS;
4752 sparc_regno_reg_class[i] = NO_REGS;
4756 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4759 save_global_or_fp_reg_p (unsigned int regno,
4760 int leaf_function ATTRIBUTE_UNUSED)
4762 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4765 /* Return whether the return address register (%i7) is needed. */
4768 return_addr_reg_needed_p (int leaf_function)
4770 /* If it is live, for example because of __builtin_return_address (0). */
4771 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4774 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4776 /* Loading the GOT register clobbers %o7. */
4777 || crtl->uses_pic_offset_table
4778 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4784 /* Return whether REGNO, a local or in register, must be saved/restored. */
4787 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4789 /* General case: call-saved registers live at some point. */
4790 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4793 /* Frame pointer register (%fp) if needed. */
4794 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4797 /* Return address register (%i7) if needed. */
4798 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4801 /* GOT register (%l7) if needed. */
4802 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4805 /* If the function accesses prior frames, the frame pointer and the return
4806 address of the previous frame must be saved on the stack. */
4807 if (crtl->accesses_prior_frames
4808 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4814 /* Compute the frame size required by the function. This function is called
4815 during the reload pass and also by sparc_expand_prologue. */
4818 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4820 HOST_WIDE_INT frame_size, apparent_frame_size;
4821 int args_size, n_global_fp_regs = 0;
4822 bool save_local_in_regs_p = false;
4825 /* If the function allocates dynamic stack space, the dynamic offset is
4826 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4827 if (leaf_function && !cfun->calls_alloca)
4830 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4832 /* Calculate space needed for global registers. */
4834 for (i = 0; i < 8; i++)
4835 if (save_global_or_fp_reg_p (i, 0))
4836 n_global_fp_regs += 2;
4838 for (i = 0; i < 8; i += 2)
4839 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4840 n_global_fp_regs += 2;
4842 /* In the flat window model, find out which local and in registers need to
4843 be saved. We don't reserve space in the current frame for them as they
4844 will be spilled into the register window save area of the caller's frame.
4845 However, as soon as we use this register window save area, we must create
4846 that of the current frame to make it the live one. */
4848 for (i = 16; i < 32; i++)
4849 if (save_local_or_in_reg_p (i, leaf_function))
4851 save_local_in_regs_p = true;
4855 /* Calculate space needed for FP registers. */
4856 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
4857 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4858 n_global_fp_regs += 2;
4861 && n_global_fp_regs == 0
4863 && !save_local_in_regs_p)
4864 frame_size = apparent_frame_size = 0;
4867 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
4868 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
4869 apparent_frame_size += n_global_fp_regs * 4;
4871 /* We need to add the size of the outgoing argument area. */
4872 frame_size = apparent_frame_size + ((args_size + 7) & -8);
4874 /* And that of the register window save area. */
4875 frame_size += FIRST_PARM_OFFSET (cfun->decl);
4877 /* Finally, bump to the appropriate alignment. */
4878 frame_size = SPARC_STACK_ALIGN (frame_size);
4881 /* Set up values for use in prologue and epilogue. */
4882 sparc_frame_size = frame_size;
4883 sparc_apparent_frame_size = apparent_frame_size;
4884 sparc_n_global_fp_regs = n_global_fp_regs;
4885 sparc_save_local_in_regs_p = save_local_in_regs_p;
4890 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
4893 sparc_initial_elimination_offset (int to)
4897 if (to == STACK_POINTER_REGNUM)
4898 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
4902 offset += SPARC_STACK_BIAS;
4906 /* Output any necessary .register pseudo-ops. */
4909 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
4911 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
4917 /* Check if %g[2367] were used without
4918 .register being printed for them already. */
4919 for (i = 2; i < 8; i++)
4921 if (df_regs_ever_live_p (i)
4922 && ! sparc_hard_reg_printed [i])
4924 sparc_hard_reg_printed [i] = 1;
4925 /* %g7 is used as TLS base register, use #ignore
4926 for it instead of #scratch. */
4927 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
4928 i == 7 ? "ignore" : "scratch");
4935 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
4937 #if PROBE_INTERVAL > 4096
4938 #error Cannot use indexed addressing mode for stack probing
4941 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
4942 inclusive. These are offsets from the current stack pointer.
4944 Note that we don't use the REG+REG addressing mode for the probes because
4945 of the stack bias in 64-bit mode. And it doesn't really buy us anything
4946 so the advantages of having a single code win here. */
4949 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
4951 rtx g1 = gen_rtx_REG (Pmode, 1);
4953 /* See if we have a constant small number of probes to generate. If so,
4954 that's the easy case. */
4955 if (size <= PROBE_INTERVAL)
4957 emit_move_insn (g1, GEN_INT (first));
4958 emit_insn (gen_rtx_SET (VOIDmode, g1,
4959 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4960 emit_stack_probe (plus_constant (Pmode, g1, -size));
4963 /* The run-time loop is made up of 10 insns in the generic case while the
4964 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
4965 else if (size <= 5 * PROBE_INTERVAL)
4969 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
4970 emit_insn (gen_rtx_SET (VOIDmode, g1,
4971 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4972 emit_stack_probe (g1);
4974 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
4975 it exceeds SIZE. If only two probes are needed, this will not
4976 generate any code. Then probe at FIRST + SIZE. */
4977 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
4979 emit_insn (gen_rtx_SET (VOIDmode, g1,
4980 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
4981 emit_stack_probe (g1);
4984 emit_stack_probe (plus_constant (Pmode, g1,
4985 (i - PROBE_INTERVAL) - size));
4988 /* Otherwise, do the same as above, but in a loop. Note that we must be
4989 extra careful with variables wrapping around because we might be at
4990 the very top (or the very bottom) of the address space and we have
4991 to be able to handle this case properly; in particular, we use an
4992 equality test for the loop condition. */
4995 HOST_WIDE_INT rounded_size;
4996 rtx g4 = gen_rtx_REG (Pmode, 4);
4998 emit_move_insn (g1, GEN_INT (first));
5001 /* Step 1: round SIZE to the previous multiple of the interval. */
5003 rounded_size = size & -PROBE_INTERVAL;
5004 emit_move_insn (g4, GEN_INT (rounded_size));
5007 /* Step 2: compute initial and final value of the loop counter. */
5009 /* TEST_ADDR = SP + FIRST. */
5010 emit_insn (gen_rtx_SET (VOIDmode, g1,
5011 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5013 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5014 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
5019 while (TEST_ADDR != LAST_ADDR)
5021 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5025 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5026 until it is equal to ROUNDED_SIZE. */
5029 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5031 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5034 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5035 that SIZE is equal to ROUNDED_SIZE. */
5037 if (size != rounded_size)
5038 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5041 /* Make sure nothing is scheduled before we are done. */
5042 emit_insn (gen_blockage ());
5045 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5046 absolute addresses. */
5049 output_probe_stack_range (rtx reg1, rtx reg2)
5051 static int labelno = 0;
5052 char loop_lab[32], end_lab[32];
5055 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
5056 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
5058 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5060 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
5063 output_asm_insn ("cmp\t%0, %1", xops);
5065 fputs ("\tbe,pn\t%xcc,", asm_out_file);
5067 fputs ("\tbe\t", asm_out_file);
5068 assemble_name_raw (asm_out_file, end_lab);
5069 fputc ('\n', asm_out_file);
5071 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5072 xops[1] = GEN_INT (-PROBE_INTERVAL);
5073 output_asm_insn (" add\t%0, %1, %0", xops);
5075 /* Probe at TEST_ADDR and branch. */
5077 fputs ("\tba,pt\t%xcc,", asm_out_file);
5079 fputs ("\tba\t", asm_out_file);
5080 assemble_name_raw (asm_out_file, loop_lab);
5081 fputc ('\n', asm_out_file);
5082 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5083 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5085 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
5090 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5091 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5092 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5093 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5094 the action to be performed if it returns false. Return the new offset. */
5096 typedef bool (*sorr_pred_t) (unsigned int, int);
5097 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5100 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5101 int offset, int leaf_function, sorr_pred_t save_p,
5102 sorr_act_t action_true, sorr_act_t action_false)
5107 if (TARGET_ARCH64 && high <= 32)
5111 for (i = low; i < high; i++)
5113 if (save_p (i, leaf_function))
5115 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5117 if (action_true == SORR_SAVE)
5119 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5120 RTX_FRAME_RELATED_P (insn) = 1;
5122 else /* action_true == SORR_RESTORE */
5124 /* The frame pointer must be restored last since its old
5125 value may be used as base address for the frame. This
5126 is problematic in 64-bit mode only because of the lack
5127 of double-word load instruction. */
5128 if (i == HARD_FRAME_POINTER_REGNUM)
5131 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5135 else if (action_false == SORR_ADVANCE)
5141 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5142 emit_move_insn (hard_frame_pointer_rtx, mem);
5147 for (i = low; i < high; i += 2)
5149 bool reg0 = save_p (i, leaf_function);
5150 bool reg1 = save_p (i + 1, leaf_function);
5151 enum machine_mode mode;
5156 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5161 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5166 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5172 if (action_false == SORR_ADVANCE)
5177 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5178 if (action_true == SORR_SAVE)
5180 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5181 RTX_FRAME_RELATED_P (insn) = 1;
5185 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5187 set1 = gen_rtx_SET (VOIDmode, mem,
5188 gen_rtx_REG (SImode, regno));
5189 RTX_FRAME_RELATED_P (set1) = 1;
5191 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5193 set2 = gen_rtx_SET (VOIDmode, mem,
5194 gen_rtx_REG (SImode, regno + 1));
5195 RTX_FRAME_RELATED_P (set2) = 1;
5196 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5197 gen_rtx_PARALLEL (VOIDmode,
5198 gen_rtvec (2, set1, set2)));
5201 else /* action_true == SORR_RESTORE */
5202 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5204 /* Always preserve double-word alignment. */
5205 offset = (offset + 8) & -8;
5212 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5215 emit_adjust_base_to_offset (rtx base, int offset)
5217 /* ??? This might be optimized a little as %g1 might already have a
5218 value close enough that a single add insn will do. */
5219 /* ??? Although, all of this is probably only a temporary fix because
5220 if %g1 can hold a function result, then sparc_expand_epilogue will
5221 lose (the result will be clobbered). */
5222 rtx new_base = gen_rtx_REG (Pmode, 1);
5223 emit_move_insn (new_base, GEN_INT (offset));
5224 emit_insn (gen_rtx_SET (VOIDmode,
5225 new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5229 /* Emit code to save/restore call-saved global and FP registers. */
5232 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5234 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5236 base = emit_adjust_base_to_offset (base, offset);
5241 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5242 save_global_or_fp_reg_p, action, SORR_NONE);
5243 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5244 save_global_or_fp_reg_p, action, SORR_NONE);
5247 /* Emit code to save/restore call-saved local and in registers. */
5250 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5252 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5254 base = emit_adjust_base_to_offset (base, offset);
5258 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5259 save_local_or_in_reg_p, action, SORR_ADVANCE);
5262 /* Emit a window_save insn. */
5265 emit_window_save (rtx increment)
5267 rtx insn = emit_insn (gen_window_save (increment));
5268 RTX_FRAME_RELATED_P (insn) = 1;
5270 /* The incoming return address (%o7) is saved in %i7. */
5271 add_reg_note (insn, REG_CFA_REGISTER,
5272 gen_rtx_SET (VOIDmode,
5273 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5275 INCOMING_RETURN_ADDR_REGNUM)));
5277 /* The window save event. */
5278 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5280 /* The CFA is %fp, the hard frame pointer. */
5281 add_reg_note (insn, REG_CFA_DEF_CFA,
5282 plus_constant (Pmode, hard_frame_pointer_rtx,
5283 INCOMING_FRAME_SP_OFFSET));
5288 /* Generate an increment for the stack pointer. */
5291 gen_stack_pointer_inc (rtx increment)
5293 return gen_rtx_SET (VOIDmode,
5295 gen_rtx_PLUS (Pmode,
5300 /* Expand the function prologue. The prologue is responsible for reserving
5301 storage for the frame, saving the call-saved registers and loading the
5302 GOT register if needed. */
5305 sparc_expand_prologue (void)
5310 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5311 on the final value of the flag means deferring the prologue/epilogue
5312 expansion until just before the second scheduling pass, which is too
5313 late to emit multiple epilogues or return insns.
5315 Of course we are making the assumption that the value of the flag
5316 will not change between now and its final value. Of the three parts
5317 of the formula, only the last one can reasonably vary. Let's take a
5318 closer look, after assuming that the first two ones are set to true
5319 (otherwise the last value is effectively silenced).
5321 If only_leaf_regs_used returns false, the global predicate will also
5322 be false so the actual frame size calculated below will be positive.
5323 As a consequence, the save_register_window insn will be emitted in
5324 the instruction stream; now this insn explicitly references %fp
5325 which is not a leaf register so only_leaf_regs_used will always
5326 return false subsequently.
5328 If only_leaf_regs_used returns true, we hope that the subsequent
5329 optimization passes won't cause non-leaf registers to pop up. For
5330 example, the regrename pass has special provisions to not rename to
5331 non-leaf registers in a leaf function. */
5332 sparc_leaf_function_p
5333 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5335 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5337 if (flag_stack_usage_info)
5338 current_function_static_stack_size = size;
5340 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
5341 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5345 else if (sparc_leaf_function_p)
5347 rtx size_int_rtx = GEN_INT (-size);
5350 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5351 else if (size <= 8192)
5353 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5354 RTX_FRAME_RELATED_P (insn) = 1;
5356 /* %sp is still the CFA register. */
5357 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5361 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5362 emit_move_insn (size_rtx, size_int_rtx);
5363 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5364 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5365 gen_stack_pointer_inc (size_int_rtx));
5368 RTX_FRAME_RELATED_P (insn) = 1;
5372 rtx size_int_rtx = GEN_INT (-size);
5375 emit_window_save (size_int_rtx);
5376 else if (size <= 8192)
5378 emit_window_save (GEN_INT (-4096));
5380 /* %sp is not the CFA register anymore. */
5381 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5383 /* Make sure no %fp-based store is issued until after the frame is
5384 established. The offset between the frame pointer and the stack
5385 pointer is calculated relative to the value of the stack pointer
5386 at the end of the function prologue, and moving instructions that
5387 access the stack via the frame pointer between the instructions
5388 that decrement the stack pointer could result in accessing the
5389 register window save area, which is volatile. */
5390 emit_insn (gen_frame_blockage ());
5394 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5395 emit_move_insn (size_rtx, size_int_rtx);
5396 emit_window_save (size_rtx);
5400 if (sparc_leaf_function_p)
5402 sparc_frame_base_reg = stack_pointer_rtx;
5403 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5407 sparc_frame_base_reg = hard_frame_pointer_rtx;
5408 sparc_frame_base_offset = SPARC_STACK_BIAS;
5411 if (sparc_n_global_fp_regs > 0)
5412 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5413 sparc_frame_base_offset
5414 - sparc_apparent_frame_size,
5417 /* Load the GOT register if needed. */
5418 if (crtl->uses_pic_offset_table)
5419 load_got_register ();
5421 /* Advertise that the data calculated just above are now valid. */
5422 sparc_prologue_data_valid_p = true;
5425 /* Expand the function prologue. The prologue is responsible for reserving
5426 storage for the frame, saving the call-saved registers and loading the
5427 GOT register if needed. */
5430 sparc_flat_expand_prologue (void)
5435 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5437 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5439 if (flag_stack_usage_info)
5440 current_function_static_stack_size = size;
5442 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
5443 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5445 if (sparc_save_local_in_regs_p)
5446 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5453 rtx size_int_rtx, size_rtx;
5455 size_rtx = size_int_rtx = GEN_INT (-size);
5457 /* We establish the frame (i.e. decrement the stack pointer) first, even
5458 if we use a frame pointer, because we cannot clobber any call-saved
5459 registers, including the frame pointer, if we haven't created a new
5460 register save area, for the sake of compatibility with the ABI. */
5462 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5463 else if (size <= 8192 && !frame_pointer_needed)
5465 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5466 RTX_FRAME_RELATED_P (insn) = 1;
5467 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5471 size_rtx = gen_rtx_REG (Pmode, 1);
5472 emit_move_insn (size_rtx, size_int_rtx);
5473 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5474 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5475 gen_stack_pointer_inc (size_int_rtx));
5477 RTX_FRAME_RELATED_P (insn) = 1;
5479 /* Ensure nothing is scheduled until after the frame is established. */
5480 emit_insn (gen_blockage ());
5482 if (frame_pointer_needed)
5484 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5485 gen_rtx_MINUS (Pmode,
5488 RTX_FRAME_RELATED_P (insn) = 1;
5490 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5491 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5492 plus_constant (Pmode, stack_pointer_rtx,
5496 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5498 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5499 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5501 insn = emit_move_insn (i7, o7);
5502 RTX_FRAME_RELATED_P (insn) = 1;
5504 add_reg_note (insn, REG_CFA_REGISTER,
5505 gen_rtx_SET (VOIDmode, i7, o7));
5507 /* Prevent this instruction from ever being considered dead,
5508 even if this function has no epilogue. */
5513 if (frame_pointer_needed)
5515 sparc_frame_base_reg = hard_frame_pointer_rtx;
5516 sparc_frame_base_offset = SPARC_STACK_BIAS;
5520 sparc_frame_base_reg = stack_pointer_rtx;
5521 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5524 if (sparc_n_global_fp_regs > 0)
5525 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5526 sparc_frame_base_offset
5527 - sparc_apparent_frame_size,
5530 /* Load the GOT register if needed. */
5531 if (crtl->uses_pic_offset_table)
5532 load_got_register ();
5534 /* Advertise that the data calculated just above are now valid. */
5535 sparc_prologue_data_valid_p = true;
5538 /* This function generates the assembly code for function entry, which boils
5539 down to emitting the necessary .register directives. */
5542 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5544 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5546 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5548 sparc_output_scratch_registers (file);
5551 /* Expand the function epilogue, either normal or part of a sibcall.
5552 We emit all the instructions except the return or the call. */
5555 sparc_expand_epilogue (bool for_eh)
5557 HOST_WIDE_INT size = sparc_frame_size;
5559 if (sparc_n_global_fp_regs > 0)
5560 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5561 sparc_frame_base_offset
5562 - sparc_apparent_frame_size,
5565 if (size == 0 || for_eh)
5567 else if (sparc_leaf_function_p)
5570 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5571 else if (size <= 8192)
5573 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5574 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5578 rtx reg = gen_rtx_REG (Pmode, 1);
5579 emit_move_insn (reg, GEN_INT (size));
5580 emit_insn (gen_stack_pointer_inc (reg));
5585 /* Expand the function epilogue, either normal or part of a sibcall.
5586 We emit all the instructions except the return or the call. */
5589 sparc_flat_expand_epilogue (bool for_eh)
5591 HOST_WIDE_INT size = sparc_frame_size;
5593 if (sparc_n_global_fp_regs > 0)
5594 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5595 sparc_frame_base_offset
5596 - sparc_apparent_frame_size,
5599 /* If we have a frame pointer, we'll need both to restore it before the
5600 frame is destroyed and use its current value in destroying the frame.
5601 Since we don't have an atomic way to do that in the flat window model,
5602 we save the current value into a temporary register (%g1). */
5603 if (frame_pointer_needed && !for_eh)
5604 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5606 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5607 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5608 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5610 if (sparc_save_local_in_regs_p)
5611 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5612 sparc_frame_base_offset,
5615 if (size == 0 || for_eh)
5617 else if (frame_pointer_needed)
5619 /* Make sure the frame is destroyed after everything else is done. */
5620 emit_insn (gen_blockage ());
5622 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5627 emit_insn (gen_blockage ());
5630 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5631 else if (size <= 8192)
5633 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5634 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5638 rtx reg = gen_rtx_REG (Pmode, 1);
5639 emit_move_insn (reg, GEN_INT (size));
5640 emit_insn (gen_stack_pointer_inc (reg));
5645 /* Return true if it is appropriate to emit `return' instructions in the
5646 body of a function. */
5649 sparc_can_use_return_insn_p (void)
5651 return sparc_prologue_data_valid_p
5652 && sparc_n_global_fp_regs == 0
5654 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5655 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5658 /* This function generates the assembly code for function exit. */
5661 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5663 /* If the last two instructions of a function are "call foo; dslot;"
5664 the return address might point to the first instruction in the next
5665 function and we have to output a dummy nop for the sake of sane
5666 backtraces in such cases. This is pointless for sibling calls since
5667 the return address is explicitly adjusted. */
5669 rtx insn, last_real_insn;
5671 insn = get_last_insn ();
5673 last_real_insn = prev_real_insn (insn);
5675 && NONJUMP_INSN_P (last_real_insn)
5676 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5677 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5680 && CALL_P (last_real_insn)
5681 && !SIBLING_CALL_P (last_real_insn))
5682 fputs("\tnop\n", file);
5684 sparc_output_deferred_case_vectors ();
5687 /* Output a 'restore' instruction. */
5690 output_restore (rtx pat)
5696 fputs ("\t restore\n", asm_out_file);
5700 gcc_assert (GET_CODE (pat) == SET);
5702 operands[0] = SET_DEST (pat);
5703 pat = SET_SRC (pat);
5705 switch (GET_CODE (pat))
5708 operands[1] = XEXP (pat, 0);
5709 operands[2] = XEXP (pat, 1);
5710 output_asm_insn (" restore %r1, %2, %Y0", operands);
5713 operands[1] = XEXP (pat, 0);
5714 operands[2] = XEXP (pat, 1);
5715 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5718 operands[1] = XEXP (pat, 0);
5719 gcc_assert (XEXP (pat, 1) == const1_rtx);
5720 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5724 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5729 /* Output a return. */
5732 output_return (rtx insn)
5734 if (crtl->calls_eh_return)
5736 /* If the function uses __builtin_eh_return, the eh_return
5737 machinery occupies the delay slot. */
5738 gcc_assert (!final_sequence);
5740 if (flag_delayed_branch)
5742 if (!TARGET_FLAT && TARGET_V9)
5743 fputs ("\treturn\t%i7+8\n", asm_out_file);
5747 fputs ("\trestore\n", asm_out_file);
5749 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5752 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5757 fputs ("\trestore\n", asm_out_file);
5759 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5760 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5763 else if (sparc_leaf_function_p || TARGET_FLAT)
5765 /* This is a leaf or flat function so we don't have to bother restoring
5766 the register window, which frees us from dealing with the convoluted
5767 semantics of restore/return. We simply output the jump to the
5768 return address and the insn in the delay slot (if any). */
5770 return "jmp\t%%o7+%)%#";
5774 /* This is a regular function so we have to restore the register window.
5775 We may have a pending insn for the delay slot, which will be either
5776 combined with the 'restore' instruction or put in the delay slot of
5777 the 'return' instruction. */
5783 delay = NEXT_INSN (insn);
5786 pat = PATTERN (delay);
5788 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5790 epilogue_renumber (&pat, 0);
5791 return "return\t%%i7+%)%#";
5795 output_asm_insn ("jmp\t%%i7+%)", NULL);
5796 output_restore (pat);
5797 PATTERN (delay) = gen_blockage ();
5798 INSN_CODE (delay) = -1;
5803 /* The delay slot is empty. */
5805 return "return\t%%i7+%)\n\t nop";
5806 else if (flag_delayed_branch)
5807 return "jmp\t%%i7+%)\n\t restore";
5809 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5816 /* Output a sibling call. */
5819 output_sibcall (rtx insn, rtx call_operand)
5823 gcc_assert (flag_delayed_branch);
5825 operands[0] = call_operand;
5827 if (sparc_leaf_function_p || TARGET_FLAT)
5829 /* This is a leaf or flat function so we don't have to bother restoring
5830 the register window. We simply output the jump to the function and
5831 the insn in the delay slot (if any). */
5833 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
5836 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
5839 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
5840 it into branch if possible. */
5841 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
5846 /* This is a regular function so we have to restore the register window.
5847 We may have a pending insn for the delay slot, which will be combined
5848 with the 'restore' instruction. */
5850 output_asm_insn ("call\t%a0, 0", operands);
5854 rtx delay = NEXT_INSN (insn);
5857 output_restore (PATTERN (delay));
5859 PATTERN (delay) = gen_blockage ();
5860 INSN_CODE (delay) = -1;
5863 output_restore (NULL_RTX);
5869 /* Functions for handling argument passing.
5871 For 32-bit, the first 6 args are normally in registers and the rest are
5872 pushed. Any arg that starts within the first 6 words is at least
5873 partially passed in a register unless its data type forbids.
5875 For 64-bit, the argument registers are laid out as an array of 16 elements
5876 and arguments are added sequentially. The first 6 int args and up to the
5877 first 16 fp args (depending on size) are passed in regs.
5879 Slot Stack Integral Float Float in structure Double Long Double
5880 ---- ----- -------- ----- ------------------ ------ -----------
5881 15 [SP+248] %f31 %f30,%f31 %d30
5882 14 [SP+240] %f29 %f28,%f29 %d28 %q28
5883 13 [SP+232] %f27 %f26,%f27 %d26
5884 12 [SP+224] %f25 %f24,%f25 %d24 %q24
5885 11 [SP+216] %f23 %f22,%f23 %d22
5886 10 [SP+208] %f21 %f20,%f21 %d20 %q20
5887 9 [SP+200] %f19 %f18,%f19 %d18
5888 8 [SP+192] %f17 %f16,%f17 %d16 %q16
5889 7 [SP+184] %f15 %f14,%f15 %d14
5890 6 [SP+176] %f13 %f12,%f13 %d12 %q12
5891 5 [SP+168] %o5 %f11 %f10,%f11 %d10
5892 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
5893 3 [SP+152] %o3 %f7 %f6,%f7 %d6
5894 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
5895 1 [SP+136] %o1 %f3 %f2,%f3 %d2
5896 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
5898 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
5900 Integral arguments are always passed as 64-bit quantities appropriately
5903 Passing of floating point values is handled as follows.
5904 If a prototype is in scope:
5905 If the value is in a named argument (i.e. not a stdarg function or a
5906 value not part of the `...') then the value is passed in the appropriate
5908 If the value is part of the `...' and is passed in one of the first 6
5909 slots then the value is passed in the appropriate int reg.
5910 If the value is part of the `...' and is not passed in one of the first 6
5911 slots then the value is passed in memory.
5912 If a prototype is not in scope:
5913 If the value is one of the first 6 arguments the value is passed in the
5914 appropriate integer reg and the appropriate fp reg.
5915 If the value is not one of the first 6 arguments the value is passed in
5916 the appropriate fp reg and in memory.
5919 Summary of the calling conventions implemented by GCC on the SPARC:
5922 size argument return value
5924 small integer <4 int. reg. int. reg.
5925 word 4 int. reg. int. reg.
5926 double word 8 int. reg. int. reg.
5928 _Complex small integer <8 int. reg. int. reg.
5929 _Complex word 8 int. reg. int. reg.
5930 _Complex double word 16 memory int. reg.
5932 vector integer <=8 int. reg. FP reg.
5933 vector integer >8 memory memory
5935 float 4 int. reg. FP reg.
5936 double 8 int. reg. FP reg.
5937 long double 16 memory memory
5939 _Complex float 8 memory FP reg.
5940 _Complex double 16 memory FP reg.
5941 _Complex long double 32 memory FP reg.
5943 vector float any memory memory
5945 aggregate any memory memory
5950 size argument return value
5952 small integer <8 int. reg. int. reg.
5953 word 8 int. reg. int. reg.
5954 double word 16 int. reg. int. reg.
5956 _Complex small integer <16 int. reg. int. reg.
5957 _Complex word 16 int. reg. int. reg.
5958 _Complex double word 32 memory int. reg.
5960 vector integer <=16 FP reg. FP reg.
5961 vector integer 16<s<=32 memory FP reg.
5962 vector integer >32 memory memory
5964 float 4 FP reg. FP reg.
5965 double 8 FP reg. FP reg.
5966 long double 16 FP reg. FP reg.
5968 _Complex float 8 FP reg. FP reg.
5969 _Complex double 16 FP reg. FP reg.
5970 _Complex long double 32 memory FP reg.
5972 vector float <=16 FP reg. FP reg.
5973 vector float 16<s<=32 memory FP reg.
5974 vector float >32 memory memory
5976 aggregate <=16 reg. reg.
5977 aggregate 16<s<=32 memory reg.
5978 aggregate >32 memory memory
5982 Note #1: complex floating-point types follow the extended SPARC ABIs as
5983 implemented by the Sun compiler.
5985 Note #2: integral vector types follow the scalar floating-point types
5986 conventions to match what is implemented by the Sun VIS SDK.
5988 Note #3: floating-point vector types follow the aggregate types
5992 /* Maximum number of int regs for args. */
5993 #define SPARC_INT_ARG_MAX 6
5994 /* Maximum number of fp regs for args. */
5995 #define SPARC_FP_ARG_MAX 16
5997 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
5999 /* Handle the INIT_CUMULATIVE_ARGS macro.
6000 Initialize a variable CUM of type CUMULATIVE_ARGS
6001 for a call to a function whose data type is FNTYPE.
6002 For a library call, FNTYPE is 0. */
6005 init_cumulative_args (struct sparc_args *cum, tree fntype,
6006 rtx libname ATTRIBUTE_UNUSED,
6007 tree fndecl ATTRIBUTE_UNUSED)
6010 cum->prototype_p = fntype && prototype_p (fntype);
6011 cum->libcall_p = fntype == 0;
6014 /* Handle promotion of pointer and integer arguments. */
6016 static enum machine_mode
6017 sparc_promote_function_mode (const_tree type,
6018 enum machine_mode mode,
6020 const_tree fntype ATTRIBUTE_UNUSED,
6021 int for_return ATTRIBUTE_UNUSED)
6023 if (type != NULL_TREE && POINTER_TYPE_P (type))
6025 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6029 /* Integral arguments are passed as full words, as per the ABI. */
6030 if (GET_MODE_CLASS (mode) == MODE_INT
6031 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6037 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6040 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6042 return TARGET_ARCH64 ? true : false;
6045 /* Scan the record type TYPE and return the following predicates:
6046 - INTREGS_P: the record contains at least one field or sub-field
6047 that is eligible for promotion in integer registers.
6048 - FP_REGS_P: the record contains at least one field or sub-field
6049 that is eligible for promotion in floating-point registers.
6050 - PACKED_P: the record contains at least one field that is packed.
6052 Sub-fields are not taken into account for the PACKED_P predicate. */
6055 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
6060 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6062 if (TREE_CODE (field) == FIELD_DECL)
6064 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6065 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
6066 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6067 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6073 if (packed_p && DECL_PACKED (field))
6079 /* Compute the slot number to pass an argument in.
6080 Return the slot number or -1 if passing on the stack.
6082 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6083 the preceding args and about the function being called.
6084 MODE is the argument's machine mode.
6085 TYPE is the data type of the argument (as a tree).
6086 This is null for libcalls where that information may
6088 NAMED is nonzero if this argument is a named parameter
6089 (otherwise it is an extra parameter matching an ellipsis).
6090 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6091 *PREGNO records the register number to use if scalar type.
6092 *PPADDING records the amount of padding needed in words. */
6095 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
6096 const_tree type, bool named, bool incoming_p,
6097 int *pregno, int *ppadding)
6099 int regbase = (incoming_p
6100 ? SPARC_INCOMING_INT_ARG_FIRST
6101 : SPARC_OUTGOING_INT_ARG_FIRST);
6102 int slotno = cum->words;
6103 enum mode_class mclass;
6108 if (type && TREE_ADDRESSABLE (type))
6114 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6117 /* For SPARC64, objects requiring 16-byte alignment get it. */
6119 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6120 && (slotno & 1) != 0)
6121 slotno++, *ppadding = 1;
6123 mclass = GET_MODE_CLASS (mode);
6124 if (type && TREE_CODE (type) == VECTOR_TYPE)
6126 /* Vector types deserve special treatment because they are
6127 polymorphic wrt their mode, depending upon whether VIS
6128 instructions are enabled. */
6129 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6131 /* The SPARC port defines no floating-point vector modes. */
6132 gcc_assert (mode == BLKmode);
6136 /* Integral vector types should either have a vector
6137 mode or an integral mode, because we are guaranteed
6138 by pass_by_reference that their size is not greater
6139 than 16 bytes and TImode is 16-byte wide. */
6140 gcc_assert (mode != BLKmode);
6142 /* Vector integers are handled like floats according to
6144 mclass = MODE_FLOAT;
6151 case MODE_COMPLEX_FLOAT:
6152 case MODE_VECTOR_INT:
6153 if (TARGET_ARCH64 && TARGET_FPU && named)
6155 if (slotno >= SPARC_FP_ARG_MAX)
6157 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6158 /* Arguments filling only one single FP register are
6159 right-justified in the outer double FP register. */
6160 if (GET_MODE_SIZE (mode) <= 4)
6167 case MODE_COMPLEX_INT:
6168 if (slotno >= SPARC_INT_ARG_MAX)
6170 regno = regbase + slotno;
6174 if (mode == VOIDmode)
6175 /* MODE is VOIDmode when generating the actual call. */
6178 gcc_assert (mode == BLKmode);
6182 || (TREE_CODE (type) != VECTOR_TYPE
6183 && TREE_CODE (type) != RECORD_TYPE))
6185 if (slotno >= SPARC_INT_ARG_MAX)
6187 regno = regbase + slotno;
6189 else /* TARGET_ARCH64 && type */
6191 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
6193 /* First see what kinds of registers we would need. */
6194 if (TREE_CODE (type) == VECTOR_TYPE)
6197 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
6199 /* The ABI obviously doesn't specify how packed structures
6200 are passed. These are defined to be passed in int regs
6201 if possible, otherwise memory. */
6202 if (packed_p || !named)
6203 fpregs_p = 0, intregs_p = 1;
6205 /* If all arg slots are filled, then must pass on stack. */
6206 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
6209 /* If there are only int args and all int arg slots are filled,
6210 then must pass on stack. */
6211 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
6214 /* Note that even if all int arg slots are filled, fp members may
6215 still be passed in regs if such regs are available.
6216 *PREGNO isn't set because there may be more than one, it's up
6217 to the caller to compute them. */
6230 /* Handle recursive register counting for structure field layout. */
6232 struct function_arg_record_value_parms
6234 rtx ret; /* return expression being built. */
6235 int slotno; /* slot number of the argument. */
6236 int named; /* whether the argument is named. */
6237 int regbase; /* regno of the base register. */
6238 int stack; /* 1 if part of the argument is on the stack. */
6239 int intoffset; /* offset of the first pending integer field. */
6240 unsigned int nregs; /* number of words passed in registers. */
6243 static void function_arg_record_value_3
6244 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
6245 static void function_arg_record_value_2
6246 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6247 static void function_arg_record_value_1
6248 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6249 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
6250 static rtx function_arg_union_value (int, enum machine_mode, int, int);
6252 /* A subroutine of function_arg_record_value. Traverse the structure
6253 recursively and determine how many registers will be required. */
6256 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6257 struct function_arg_record_value_parms *parms,
6262 /* We need to compute how many registers are needed so we can
6263 allocate the PARALLEL but before we can do that we need to know
6264 whether there are any packed fields. The ABI obviously doesn't
6265 specify how structures are passed in this case, so they are
6266 defined to be passed in int regs if possible, otherwise memory,
6267 regardless of whether there are fp values present. */
6270 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6272 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6279 /* Compute how many registers we need. */
6280 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6282 if (TREE_CODE (field) == FIELD_DECL)
6284 HOST_WIDE_INT bitpos = startbitpos;
6286 if (DECL_SIZE (field) != 0)
6288 if (integer_zerop (DECL_SIZE (field)))
6291 if (host_integerp (bit_position (field), 1))
6292 bitpos += int_bit_position (field);
6295 /* ??? FIXME: else assume zero offset. */
6297 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6298 function_arg_record_value_1 (TREE_TYPE (field),
6302 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6303 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6308 if (parms->intoffset != -1)
6310 unsigned int startbit, endbit;
6311 int intslots, this_slotno;
6313 startbit = parms->intoffset & -BITS_PER_WORD;
6314 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6316 intslots = (endbit - startbit) / BITS_PER_WORD;
6317 this_slotno = parms->slotno + parms->intoffset
6320 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6322 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6323 /* We need to pass this field on the stack. */
6327 parms->nregs += intslots;
6328 parms->intoffset = -1;
6331 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6332 If it wasn't true we wouldn't be here. */
6333 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6334 && DECL_MODE (field) == BLKmode)
6335 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6336 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6343 if (parms->intoffset == -1)
6344 parms->intoffset = bitpos;
6350 /* A subroutine of function_arg_record_value. Assign the bits of the
6351 structure between parms->intoffset and bitpos to integer registers. */
6354 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6355 struct function_arg_record_value_parms *parms)
6357 enum machine_mode mode;
6359 unsigned int startbit, endbit;
6360 int this_slotno, intslots, intoffset;
6363 if (parms->intoffset == -1)
6366 intoffset = parms->intoffset;
6367 parms->intoffset = -1;
6369 startbit = intoffset & -BITS_PER_WORD;
6370 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6371 intslots = (endbit - startbit) / BITS_PER_WORD;
6372 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6374 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6378 /* If this is the trailing part of a word, only load that much into
6379 the register. Otherwise load the whole register. Note that in
6380 the latter case we may pick up unwanted bits. It's not a problem
6381 at the moment but may wish to revisit. */
6383 if (intoffset % BITS_PER_WORD != 0)
6384 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6389 intoffset /= BITS_PER_UNIT;
6392 regno = parms->regbase + this_slotno;
6393 reg = gen_rtx_REG (mode, regno);
6394 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6395 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6398 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6403 while (intslots > 0);
6406 /* A subroutine of function_arg_record_value. Traverse the structure
6407 recursively and assign bits to floating point registers. Track which
6408 bits in between need integer registers; invoke function_arg_record_value_3
6409 to make that happen. */
6412 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6413 struct function_arg_record_value_parms *parms,
6419 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6421 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6428 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6430 if (TREE_CODE (field) == FIELD_DECL)
6432 HOST_WIDE_INT bitpos = startbitpos;
6434 if (DECL_SIZE (field) != 0)
6436 if (integer_zerop (DECL_SIZE (field)))
6439 if (host_integerp (bit_position (field), 1))
6440 bitpos += int_bit_position (field);
6443 /* ??? FIXME: else assume zero offset. */
6445 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6446 function_arg_record_value_2 (TREE_TYPE (field),
6450 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6451 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6456 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6457 int regno, nregs, pos;
6458 enum machine_mode mode = DECL_MODE (field);
6461 function_arg_record_value_3 (bitpos, parms);
6463 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6466 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6467 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6469 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6471 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6477 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6478 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6480 reg = gen_rtx_REG (mode, regno);
6481 pos = bitpos / BITS_PER_UNIT;
6482 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6483 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6487 regno += GET_MODE_SIZE (mode) / 4;
6488 reg = gen_rtx_REG (mode, regno);
6489 pos += GET_MODE_SIZE (mode);
6490 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6491 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6497 if (parms->intoffset == -1)
6498 parms->intoffset = bitpos;
6504 /* Used by function_arg and sparc_function_value_1 to implement the complex
6505 conventions of the 64-bit ABI for passing and returning structures.
6506 Return an expression valid as a return value for the FUNCTION_ARG
6507 and TARGET_FUNCTION_VALUE.
6509 TYPE is the data type of the argument (as a tree).
6510 This is null for libcalls where that information may
6512 MODE is the argument's machine mode.
6513 SLOTNO is the index number of the argument's slot in the parameter array.
6514 NAMED is nonzero if this argument is a named parameter
6515 (otherwise it is an extra parameter matching an ellipsis).
6516 REGBASE is the regno of the base register for the parameter array. */
6519 function_arg_record_value (const_tree type, enum machine_mode mode,
6520 int slotno, int named, int regbase)
6522 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6523 struct function_arg_record_value_parms parms;
6526 parms.ret = NULL_RTX;
6527 parms.slotno = slotno;
6528 parms.named = named;
6529 parms.regbase = regbase;
6532 /* Compute how many registers we need. */
6534 parms.intoffset = 0;
6535 function_arg_record_value_1 (type, 0, &parms, false);
6537 /* Take into account pending integer fields. */
6538 if (parms.intoffset != -1)
6540 unsigned int startbit, endbit;
6541 int intslots, this_slotno;
6543 startbit = parms.intoffset & -BITS_PER_WORD;
6544 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6545 intslots = (endbit - startbit) / BITS_PER_WORD;
6546 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6548 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6550 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6551 /* We need to pass this field on the stack. */
6555 parms.nregs += intslots;
6557 nregs = parms.nregs;
6559 /* Allocate the vector and handle some annoying special cases. */
6562 /* ??? Empty structure has no value? Duh? */
6565 /* Though there's nothing really to store, return a word register
6566 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6567 leads to breakage due to the fact that there are zero bytes to
6569 return gen_rtx_REG (mode, regbase);
6573 /* ??? C++ has structures with no fields, and yet a size. Give up
6574 for now and pass everything back in integer registers. */
6575 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6577 if (nregs + slotno > SPARC_INT_ARG_MAX)
6578 nregs = SPARC_INT_ARG_MAX - slotno;
6580 gcc_assert (nregs != 0);
6582 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6584 /* If at least one field must be passed on the stack, generate
6585 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6586 also be passed on the stack. We can't do much better because the
6587 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6588 of structures for which the fields passed exclusively in registers
6589 are not at the beginning of the structure. */
6591 XVECEXP (parms.ret, 0, 0)
6592 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6594 /* Fill in the entries. */
6596 parms.intoffset = 0;
6597 function_arg_record_value_2 (type, 0, &parms, false);
6598 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6600 gcc_assert (parms.nregs == nregs);
6605 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6606 of the 64-bit ABI for passing and returning unions.
6607 Return an expression valid as a return value for the FUNCTION_ARG
6608 and TARGET_FUNCTION_VALUE.
6610 SIZE is the size in bytes of the union.
6611 MODE is the argument's machine mode.
6612 REGNO is the hard register the union will be passed in. */
6615 function_arg_union_value (int size, enum machine_mode mode, int slotno,
6618 int nwords = ROUND_ADVANCE (size), i;
6621 /* See comment in previous function for empty structures. */
6623 return gen_rtx_REG (mode, regno);
6625 if (slotno == SPARC_INT_ARG_MAX - 1)
6628 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6630 for (i = 0; i < nwords; i++)
6632 /* Unions are passed left-justified. */
6633 XVECEXP (regs, 0, i)
6634 = gen_rtx_EXPR_LIST (VOIDmode,
6635 gen_rtx_REG (word_mode, regno),
6636 GEN_INT (UNITS_PER_WORD * i));
6643 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6644 for passing and returning large (BLKmode) vectors.
6645 Return an expression valid as a return value for the FUNCTION_ARG
6646 and TARGET_FUNCTION_VALUE.
6648 SIZE is the size in bytes of the vector (at least 8 bytes).
6649 REGNO is the FP hard register the vector will be passed in. */
6652 function_arg_vector_value (int size, int regno)
6654 int i, nregs = size / 8;
6657 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6659 for (i = 0; i < nregs; i++)
6661 XVECEXP (regs, 0, i)
6662 = gen_rtx_EXPR_LIST (VOIDmode,
6663 gen_rtx_REG (DImode, regno + 2*i),
6670 /* Determine where to put an argument to a function.
6671 Value is zero to push the argument on the stack,
6672 or a hard register in which to store the argument.
6674 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6675 the preceding args and about the function being called.
6676 MODE is the argument's machine mode.
6677 TYPE is the data type of the argument (as a tree).
6678 This is null for libcalls where that information may
6680 NAMED is true if this argument is a named parameter
6681 (otherwise it is an extra parameter matching an ellipsis).
6682 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6683 TARGET_FUNCTION_INCOMING_ARG. */
6686 sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
6687 const_tree type, bool named, bool incoming_p)
6689 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6691 int regbase = (incoming_p
6692 ? SPARC_INCOMING_INT_ARG_FIRST
6693 : SPARC_OUTGOING_INT_ARG_FIRST);
6694 int slotno, regno, padding;
6695 enum mode_class mclass = GET_MODE_CLASS (mode);
6697 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6702 /* Vector types deserve special treatment because they are polymorphic wrt
6703 their mode, depending upon whether VIS instructions are enabled. */
6704 if (type && TREE_CODE (type) == VECTOR_TYPE)
6706 HOST_WIDE_INT size = int_size_in_bytes (type);
6707 gcc_assert ((TARGET_ARCH32 && size <= 8)
6708 || (TARGET_ARCH64 && size <= 16));
6710 if (mode == BLKmode)
6711 return function_arg_vector_value (size,
6712 SPARC_FP_ARG_FIRST + 2*slotno);
6714 mclass = MODE_FLOAT;
6718 return gen_rtx_REG (mode, regno);
6720 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6721 and are promoted to registers if possible. */
6722 if (type && TREE_CODE (type) == RECORD_TYPE)
6724 HOST_WIDE_INT size = int_size_in_bytes (type);
6725 gcc_assert (size <= 16);
6727 return function_arg_record_value (type, mode, slotno, named, regbase);
6730 /* Unions up to 16 bytes in size are passed in integer registers. */
6731 else if (type && TREE_CODE (type) == UNION_TYPE)
6733 HOST_WIDE_INT size = int_size_in_bytes (type);
6734 gcc_assert (size <= 16);
6736 return function_arg_union_value (size, mode, slotno, regno);
6739 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6740 but also have the slot allocated for them.
6741 If no prototype is in scope fp values in register slots get passed
6742 in two places, either fp regs and int regs or fp regs and memory. */
6743 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6744 && SPARC_FP_REG_P (regno))
6746 rtx reg = gen_rtx_REG (mode, regno);
6747 if (cum->prototype_p || cum->libcall_p)
6749 /* "* 2" because fp reg numbers are recorded in 4 byte
6752 /* ??? This will cause the value to be passed in the fp reg and
6753 in the stack. When a prototype exists we want to pass the
6754 value in the reg but reserve space on the stack. That's an
6755 optimization, and is deferred [for a bit]. */
6756 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6757 return gen_rtx_PARALLEL (mode,
6759 gen_rtx_EXPR_LIST (VOIDmode,
6760 NULL_RTX, const0_rtx),
6761 gen_rtx_EXPR_LIST (VOIDmode,
6765 /* ??? It seems that passing back a register even when past
6766 the area declared by REG_PARM_STACK_SPACE will allocate
6767 space appropriately, and will not copy the data onto the
6768 stack, exactly as we desire.
6770 This is due to locate_and_pad_parm being called in
6771 expand_call whenever reg_parm_stack_space > 0, which
6772 while beneficial to our example here, would seem to be
6773 in error from what had been intended. Ho hum... -- r~ */
6781 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6785 /* On incoming, we don't need to know that the value
6786 is passed in %f0 and %i0, and it confuses other parts
6787 causing needless spillage even on the simplest cases. */
6791 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6792 + (regno - SPARC_FP_ARG_FIRST) / 2);
6794 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6795 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6797 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6801 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6802 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6803 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6808 /* All other aggregate types are passed in an integer register in a mode
6809 corresponding to the size of the type. */
6810 else if (type && AGGREGATE_TYPE_P (type))
6812 HOST_WIDE_INT size = int_size_in_bytes (type);
6813 gcc_assert (size <= 16);
6815 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6818 return gen_rtx_REG (mode, regno);
6821 /* Handle the TARGET_FUNCTION_ARG target hook. */
6824 sparc_function_arg (cumulative_args_t cum, enum machine_mode mode,
6825 const_tree type, bool named)
6827 return sparc_function_arg_1 (cum, mode, type, named, false);
6830 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6833 sparc_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
6834 const_tree type, bool named)
6836 return sparc_function_arg_1 (cum, mode, type, named, true);
6839 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6842 sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
6844 return ((TARGET_ARCH64
6845 && (GET_MODE_ALIGNMENT (mode) == 128
6846 || (type && TYPE_ALIGN (type) == 128)))
6851 /* For an arg passed partly in registers and partly in memory,
6852 this is the number of bytes of registers used.
6853 For args passed entirely in registers or entirely in memory, zero.
6855 Any arg that starts in the first 6 regs but won't entirely fit in them
6856 needs partial registers on v8. On v9, structures with integer
6857 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
6858 values that begin in the last fp reg [where "last fp reg" varies with the
6859 mode] will be split between that reg and memory. */
6862 sparc_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
6863 tree type, bool named)
6865 int slotno, regno, padding;
6867 /* We pass false for incoming_p here, it doesn't matter. */
6868 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
6869 false, ®no, &padding);
6876 if ((slotno + (mode == BLKmode
6877 ? ROUND_ADVANCE (int_size_in_bytes (type))
6878 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
6879 > SPARC_INT_ARG_MAX)
6880 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
6884 /* We are guaranteed by pass_by_reference that the size of the
6885 argument is not greater than 16 bytes, so we only need to return
6886 one word if the argument is partially passed in registers. */
6888 if (type && AGGREGATE_TYPE_P (type))
6890 int size = int_size_in_bytes (type);
6892 if (size > UNITS_PER_WORD
6893 && slotno == SPARC_INT_ARG_MAX - 1)
6894 return UNITS_PER_WORD;
6896 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
6897 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6898 && ! (TARGET_FPU && named)))
6900 /* The complex types are passed as packed types. */
6901 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6902 && slotno == SPARC_INT_ARG_MAX - 1)
6903 return UNITS_PER_WORD;
6905 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6907 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
6909 return UNITS_PER_WORD;
6916 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6917 Specify whether to pass the argument by reference. */
6920 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6921 enum machine_mode mode, const_tree type,
6922 bool named ATTRIBUTE_UNUSED)
6925 /* Original SPARC 32-bit ABI says that structures and unions,
6926 and quad-precision floats are passed by reference. For Pascal,
6927 also pass arrays by reference. All other base types are passed
6930 Extended ABI (as implemented by the Sun compiler) says that all
6931 complex floats are passed by reference. Pass complex integers
6932 in registers up to 8 bytes. More generally, enforce the 2-word
6933 cap for passing arguments in registers.
6935 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6936 integers are passed like floats of the same size, that is in
6937 registers up to 8 bytes. Pass all vector floats by reference
6938 like structure and unions. */
6939 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6941 /* Catch CDImode, TFmode, DCmode and TCmode. */
6942 || GET_MODE_SIZE (mode) > 8
6944 && TREE_CODE (type) == VECTOR_TYPE
6945 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6947 /* Original SPARC 64-bit ABI says that structures and unions
6948 smaller than 16 bytes are passed in registers, as well as
6949 all other base types.
6951 Extended ABI (as implemented by the Sun compiler) says that
6952 complex floats are passed in registers up to 16 bytes. Pass
6953 all complex integers in registers up to 16 bytes. More generally,
6954 enforce the 2-word cap for passing arguments in registers.
6956 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6957 integers are passed like floats of the same size, that is in
6958 registers (up to 16 bytes). Pass all vector floats like structure
6961 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
6962 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6963 /* Catch CTImode and TCmode. */
6964 || GET_MODE_SIZE (mode) > 16);
6967 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
6968 Update the data in CUM to advance over an argument
6969 of mode MODE and data type TYPE.
6970 TYPE is null for libcalls where that information may not be available. */
6973 sparc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6974 const_tree type, bool named)
6976 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6979 /* We pass false for incoming_p here, it doesn't matter. */
6980 function_arg_slotno (cum, mode, type, named, false, ®no, &padding);
6982 /* If argument requires leading padding, add it. */
6983 cum->words += padding;
6987 cum->words += (mode != BLKmode
6988 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6989 : ROUND_ADVANCE (int_size_in_bytes (type)));
6993 if (type && AGGREGATE_TYPE_P (type))
6995 int size = int_size_in_bytes (type);
6999 else if (size <= 16)
7001 else /* passed by reference */
7006 cum->words += (mode != BLKmode
7007 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7008 : ROUND_ADVANCE (int_size_in_bytes (type)));
7013 /* Handle the FUNCTION_ARG_PADDING macro.
7014 For the 64 bit ABI structs are always stored left shifted in their
7018 function_arg_padding (enum machine_mode mode, const_tree type)
7020 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
7023 /* Fall back to the default. */
7024 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7027 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7028 Specify whether to return the return value in memory. */
7031 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7034 /* Original SPARC 32-bit ABI says that structures and unions,
7035 and quad-precision floats are returned in memory. All other
7036 base types are returned in registers.
7038 Extended ABI (as implemented by the Sun compiler) says that
7039 all complex floats are returned in registers (8 FP registers
7040 at most for '_Complex long double'). Return all complex integers
7041 in registers (4 at most for '_Complex long long').
7043 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7044 integers are returned like floats of the same size, that is in
7045 registers up to 8 bytes and in memory otherwise. Return all
7046 vector floats in memory like structure and unions; note that
7047 they always have BLKmode like the latter. */
7048 return (TYPE_MODE (type) == BLKmode
7049 || TYPE_MODE (type) == TFmode
7050 || (TREE_CODE (type) == VECTOR_TYPE
7051 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7053 /* Original SPARC 64-bit ABI says that structures and unions
7054 smaller than 32 bytes are returned in registers, as well as
7055 all other base types.
7057 Extended ABI (as implemented by the Sun compiler) says that all
7058 complex floats are returned in registers (8 FP registers at most
7059 for '_Complex long double'). Return all complex integers in
7060 registers (4 at most for '_Complex TItype').
7062 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7063 integers are returned like floats of the same size, that is in
7064 registers. Return all vector floats like structure and unions;
7065 note that they always have BLKmode like the latter. */
7066 return (TYPE_MODE (type) == BLKmode
7067 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7070 /* Handle the TARGET_STRUCT_VALUE target hook.
7071 Return where to find the structure return value address. */
7074 sparc_struct_value_rtx (tree fndecl, int incoming)
7083 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7084 STRUCT_VALUE_OFFSET));
7086 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7087 STRUCT_VALUE_OFFSET));
7089 /* Only follow the SPARC ABI for fixed-size structure returns.
7090 Variable size structure returns are handled per the normal
7091 procedures in GCC. This is enabled by -mstd-struct-return */
7093 && sparc_std_struct_return
7094 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7095 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7097 /* We must check and adjust the return address, as it is
7098 optional as to whether the return object is really
7100 rtx ret_reg = gen_rtx_REG (Pmode, 31);
7101 rtx scratch = gen_reg_rtx (SImode);
7102 rtx endlab = gen_label_rtx ();
7104 /* Calculate the return object size */
7105 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7106 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7107 /* Construct a temporary return value */
7109 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7111 /* Implement SPARC 32-bit psABI callee return struct checking:
7113 Fetch the instruction where we will return to and see if
7114 it's an unimp instruction (the most significant 10 bits
7116 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7117 plus_constant (Pmode,
7119 /* Assume the size is valid and pre-adjust */
7120 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7121 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7123 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7124 /* Write the address of the memory pointed to by temp_val into
7125 the memory pointed to by mem */
7126 emit_move_insn (mem, XEXP (temp_val, 0));
7127 emit_label (endlab);
7134 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7135 For v9, function return values are subject to the same rules as arguments,
7136 except that up to 32 bytes may be returned in registers. */
7139 sparc_function_value_1 (const_tree type, enum machine_mode mode,
7142 /* Beware that the two values are swapped here wrt function_arg. */
7143 int regbase = (outgoing
7144 ? SPARC_INCOMING_INT_ARG_FIRST
7145 : SPARC_OUTGOING_INT_ARG_FIRST);
7146 enum mode_class mclass = GET_MODE_CLASS (mode);
7149 /* Vector types deserve special treatment because they are polymorphic wrt
7150 their mode, depending upon whether VIS instructions are enabled. */
7151 if (type && TREE_CODE (type) == VECTOR_TYPE)
7153 HOST_WIDE_INT size = int_size_in_bytes (type);
7154 gcc_assert ((TARGET_ARCH32 && size <= 8)
7155 || (TARGET_ARCH64 && size <= 32));
7157 if (mode == BLKmode)
7158 return function_arg_vector_value (size,
7159 SPARC_FP_ARG_FIRST);
7161 mclass = MODE_FLOAT;
7164 if (TARGET_ARCH64 && type)
7166 /* Structures up to 32 bytes in size are returned in registers. */
7167 if (TREE_CODE (type) == RECORD_TYPE)
7169 HOST_WIDE_INT size = int_size_in_bytes (type);
7170 gcc_assert (size <= 32);
7172 return function_arg_record_value (type, mode, 0, 1, regbase);
7175 /* Unions up to 32 bytes in size are returned in integer registers. */
7176 else if (TREE_CODE (type) == UNION_TYPE)
7178 HOST_WIDE_INT size = int_size_in_bytes (type);
7179 gcc_assert (size <= 32);
7181 return function_arg_union_value (size, mode, 0, regbase);
7184 /* Objects that require it are returned in FP registers. */
7185 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7188 /* All other aggregate types are returned in an integer register in a
7189 mode corresponding to the size of the type. */
7190 else if (AGGREGATE_TYPE_P (type))
7192 /* All other aggregate types are passed in an integer register
7193 in a mode corresponding to the size of the type. */
7194 HOST_WIDE_INT size = int_size_in_bytes (type);
7195 gcc_assert (size <= 32);
7197 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7199 /* ??? We probably should have made the same ABI change in
7200 3.4.0 as the one we made for unions. The latter was
7201 required by the SCD though, while the former is not
7202 specified, so we favored compatibility and efficiency.
7204 Now we're stuck for aggregates larger than 16 bytes,
7205 because OImode vanished in the meantime. Let's not
7206 try to be unduly clever, and simply follow the ABI
7207 for unions in that case. */
7208 if (mode == BLKmode)
7209 return function_arg_union_value (size, mode, 0, regbase);
7214 /* We should only have pointer and integer types at this point. This
7215 must match sparc_promote_function_mode. */
7216 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7220 /* We should only have pointer and integer types at this point. This must
7221 match sparc_promote_function_mode. */
7222 else if (TARGET_ARCH32
7223 && mclass == MODE_INT
7224 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7227 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7228 regno = SPARC_FP_ARG_FIRST;
7232 return gen_rtx_REG (mode, regno);
7235 /* Handle TARGET_FUNCTION_VALUE.
7236 On the SPARC, the value is found in the first "output" register, but the
7237 called function leaves it in the first "input" register. */
7240 sparc_function_value (const_tree valtype,
7241 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7244 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7247 /* Handle TARGET_LIBCALL_VALUE. */
7250 sparc_libcall_value (enum machine_mode mode,
7251 const_rtx fun ATTRIBUTE_UNUSED)
7253 return sparc_function_value_1 (NULL_TREE, mode, false);
7256 /* Handle FUNCTION_VALUE_REGNO_P.
7257 On the SPARC, the first "output" reg is used for integer values, and the
7258 first floating point register is used for floating point values. */
7261 sparc_function_value_regno_p (const unsigned int regno)
7263 return (regno == 8 || regno == 32);
7266 /* Do what is necessary for `va_start'. We look at the current function
7267 to determine if stdarg or varargs is used and return the address of
7268 the first unnamed parameter. */
7271 sparc_builtin_saveregs (void)
7273 int first_reg = crtl->args.info.words;
7277 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7278 emit_move_insn (gen_rtx_MEM (word_mode,
7279 gen_rtx_PLUS (Pmode,
7281 GEN_INT (FIRST_PARM_OFFSET (0)
7284 gen_rtx_REG (word_mode,
7285 SPARC_INCOMING_INT_ARG_FIRST + regno));
7287 address = gen_rtx_PLUS (Pmode,
7289 GEN_INT (FIRST_PARM_OFFSET (0)
7290 + UNITS_PER_WORD * first_reg));
7295 /* Implement `va_start' for stdarg. */
7298 sparc_va_start (tree valist, rtx nextarg)
7300 nextarg = expand_builtin_saveregs ();
7301 std_expand_builtin_va_start (valist, nextarg);
7304 /* Implement `va_arg' for stdarg. */
7307 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7310 HOST_WIDE_INT size, rsize, align;
7313 tree ptrtype = build_pointer_type (type);
7315 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7318 size = rsize = UNITS_PER_WORD;
7324 size = int_size_in_bytes (type);
7325 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7330 /* For SPARC64, objects requiring 16-byte alignment get it. */
7331 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7332 align = 2 * UNITS_PER_WORD;
7334 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7335 are left-justified in their slots. */
7336 if (AGGREGATE_TYPE_P (type))
7339 size = rsize = UNITS_PER_WORD;
7349 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7350 incr = fold_convert (sizetype, incr);
7351 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7353 incr = fold_convert (ptr_type_node, incr);
7356 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7359 if (BYTES_BIG_ENDIAN && size < rsize)
7360 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7364 addr = fold_convert (build_pointer_type (ptrtype), addr);
7365 addr = build_va_arg_indirect_ref (addr);
7368 /* If the address isn't aligned properly for the type, we need a temporary.
7369 FIXME: This is inefficient, usually we can do this in registers. */
7370 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7372 tree tmp = create_tmp_var (type, "va_arg_tmp");
7373 tree dest_addr = build_fold_addr_expr (tmp);
7374 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7375 3, dest_addr, addr, size_int (rsize));
7376 TREE_ADDRESSABLE (tmp) = 1;
7377 gimplify_and_add (copy, pre_p);
7382 addr = fold_convert (ptrtype, addr);
7384 incr = fold_build_pointer_plus_hwi (incr, rsize);
7385 gimplify_assign (valist, incr, post_p);
7387 return build_va_arg_indirect_ref (addr);
7390 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7391 Specify whether the vector mode is supported by the hardware. */
7394 sparc_vector_mode_supported_p (enum machine_mode mode)
7396 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7399 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7401 static enum machine_mode
7402 sparc_preferred_simd_mode (enum machine_mode mode)
7420 /* Return the string to output an unconditional branch to LABEL, which is
7421 the operand number of the label.
7423 DEST is the destination insn (i.e. the label), INSN is the source. */
7426 output_ubranch (rtx dest, rtx insn)
7428 static char string[64];
7429 bool v9_form = false;
7433 /* Even if we are trying to use cbcond for this, evaluate
7434 whether we can use V9 branches as our backup plan. */
7437 if (INSN_ADDRESSES_SET_P ())
7438 delta = (INSN_ADDRESSES (INSN_UID (dest))
7439 - INSN_ADDRESSES (INSN_UID (insn)));
7441 /* Leave some instructions for "slop". */
7442 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7447 bool emit_nop = emit_cbcond_nop (insn);
7451 if (delta < -500 || delta > 500)
7457 rval = "ba,a,pt\t%%xcc, %l0";
7464 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7466 rval = "cwbe\t%%g0, %%g0, %l0";
7472 strcpy (string, "ba%*,pt\t%%xcc, ");
7474 strcpy (string, "b%*\t");
7476 p = strchr (string, '\0');
7487 /* Return the string to output a conditional branch to LABEL, which is
7488 the operand number of the label. OP is the conditional expression.
7489 XEXP (OP, 0) is assumed to be a condition code register (integer or
7490 floating point) and its mode specifies what kind of comparison we made.
7492 DEST is the destination insn (i.e. the label), INSN is the source.
7494 REVERSED is nonzero if we should reverse the sense of the comparison.
7496 ANNUL is nonzero if we should generate an annulling branch. */
7499 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7502 static char string[64];
7503 enum rtx_code code = GET_CODE (op);
7504 rtx cc_reg = XEXP (op, 0);
7505 enum machine_mode mode = GET_MODE (cc_reg);
7506 const char *labelno, *branch;
7507 int spaces = 8, far;
7510 /* v9 branches are limited to +-1MB. If it is too far away,
7523 fbne,a,pn %fcc2, .LC29
7531 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7534 /* Reversal of FP compares takes care -- an ordered compare
7535 becomes an unordered compare and vice versa. */
7536 if (mode == CCFPmode || mode == CCFPEmode)
7537 code = reverse_condition_maybe_unordered (code);
7539 code = reverse_condition (code);
7542 /* Start by writing the branch condition. */
7543 if (mode == CCFPmode || mode == CCFPEmode)
7594 /* ??? !v9: FP branches cannot be preceded by another floating point
7595 insn. Because there is currently no concept of pre-delay slots,
7596 we can fix this only by always emitting a nop before a floating
7601 strcpy (string, "nop\n\t");
7602 strcat (string, branch);
7615 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7627 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7648 strcpy (string, branch);
7650 spaces -= strlen (branch);
7651 p = strchr (string, '\0');
7653 /* Now add the annulling, the label, and a possible noop. */
7666 if (! far && insn && INSN_ADDRESSES_SET_P ())
7668 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7669 - INSN_ADDRESSES (INSN_UID (insn)));
7670 /* Leave some instructions for "slop". */
7671 if (delta < -260000 || delta >= 260000)
7675 if (mode == CCFPmode || mode == CCFPEmode)
7677 static char v9_fcc_labelno[] = "%%fccX, ";
7678 /* Set the char indicating the number of the fcc reg to use. */
7679 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7680 labelno = v9_fcc_labelno;
7683 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7687 else if (mode == CCXmode || mode == CCX_NOOVmode)
7689 labelno = "%%xcc, ";
7694 labelno = "%%icc, ";
7699 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7702 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
7715 strcpy (p, labelno);
7716 p = strchr (p, '\0');
7719 strcpy (p, ".+12\n\t nop\n\tb\t");
7720 /* Skip the next insn if requested or
7721 if we know that it will be a nop. */
7722 if (annul || ! final_sequence)
7736 /* Emit a library call comparison between floating point X and Y.
7737 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7738 Return the new operator to be used in the comparison sequence.
7740 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7741 values as arguments instead of the TFmode registers themselves,
7742 that's why we cannot call emit_float_lib_cmp. */
7745 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7748 rtx slot0, slot1, result, tem, tem2, libfunc;
7749 enum machine_mode mode;
7750 enum rtx_code new_comparison;
7755 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7759 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7763 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7767 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7771 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7775 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7786 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7797 tree expr = MEM_EXPR (x);
7799 mark_addressable (expr);
7804 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7805 emit_move_insn (slot0, x);
7810 tree expr = MEM_EXPR (y);
7812 mark_addressable (expr);
7817 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7818 emit_move_insn (slot1, y);
7821 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7822 emit_library_call (libfunc, LCT_NORMAL,
7824 XEXP (slot0, 0), Pmode,
7825 XEXP (slot1, 0), Pmode);
7830 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7831 emit_library_call (libfunc, LCT_NORMAL,
7833 x, TFmode, y, TFmode);
7838 /* Immediately move the result of the libcall into a pseudo
7839 register so reload doesn't clobber the value if it needs
7840 the return register for a spill reg. */
7841 result = gen_reg_rtx (mode);
7842 emit_move_insn (result, hard_libcall_value (mode, libfunc));
7847 return gen_rtx_NE (VOIDmode, result, const0_rtx);
7850 new_comparison = (comparison == UNORDERED ? EQ : NE);
7851 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7854 new_comparison = (comparison == UNGT ? GT : NE);
7855 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7857 return gen_rtx_NE (VOIDmode, result, const2_rtx);
7859 tem = gen_reg_rtx (mode);
7861 emit_insn (gen_andsi3 (tem, result, const1_rtx));
7863 emit_insn (gen_anddi3 (tem, result, const1_rtx));
7864 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
7867 tem = gen_reg_rtx (mode);
7869 emit_insn (gen_addsi3 (tem, result, const1_rtx));
7871 emit_insn (gen_adddi3 (tem, result, const1_rtx));
7872 tem2 = gen_reg_rtx (mode);
7874 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
7876 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
7877 new_comparison = (comparison == UNEQ ? EQ : NE);
7878 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
7884 /* Generate an unsigned DImode to FP conversion. This is the same code
7885 optabs would emit if we didn't have TFmode patterns. */
7888 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
7890 rtx neglab, donelab, i0, i1, f0, in, out;
7893 in = force_reg (DImode, operands[1]);
7894 neglab = gen_label_rtx ();
7895 donelab = gen_label_rtx ();
7896 i0 = gen_reg_rtx (DImode);
7897 i1 = gen_reg_rtx (DImode);
7898 f0 = gen_reg_rtx (mode);
7900 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
7902 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
7903 emit_jump_insn (gen_jump (donelab));
7906 emit_label (neglab);
7908 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
7909 emit_insn (gen_anddi3 (i1, in, const1_rtx));
7910 emit_insn (gen_iordi3 (i0, i0, i1));
7911 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
7912 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
7914 emit_label (donelab);
7917 /* Generate an FP to unsigned DImode conversion. This is the same code
7918 optabs would emit if we didn't have TFmode patterns. */
7921 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
7923 rtx neglab, donelab, i0, i1, f0, in, out, limit;
7926 in = force_reg (mode, operands[1]);
7927 neglab = gen_label_rtx ();
7928 donelab = gen_label_rtx ();
7929 i0 = gen_reg_rtx (DImode);
7930 i1 = gen_reg_rtx (DImode);
7931 limit = gen_reg_rtx (mode);
7932 f0 = gen_reg_rtx (mode);
7934 emit_move_insn (limit,
7935 CONST_DOUBLE_FROM_REAL_VALUE (
7936 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
7937 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
7939 emit_insn (gen_rtx_SET (VOIDmode,
7941 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
7942 emit_jump_insn (gen_jump (donelab));
7945 emit_label (neglab);
7947 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
7948 emit_insn (gen_rtx_SET (VOIDmode,
7950 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
7951 emit_insn (gen_movdi (i1, const1_rtx));
7952 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
7953 emit_insn (gen_xordi3 (out, i0, i1));
7955 emit_label (donelab);
7958 /* Return the string to output a compare and branch instruction to DEST.
7959 DEST is the destination insn (i.e. the label), INSN is the source,
7960 and OP is the conditional expression. */
7963 output_cbcond (rtx op, rtx dest, rtx insn)
7965 enum machine_mode mode = GET_MODE (XEXP (op, 0));
7966 enum rtx_code code = GET_CODE (op);
7967 const char *cond_str, *tmpl;
7968 int far, emit_nop, len;
7969 static char string[64];
7972 /* Compare and Branch is limited to +-2KB. If it is too far away,
7984 len = get_attr_length (insn);
7987 emit_nop = len == 2;
7990 code = reverse_condition (code);
7992 size_char = ((mode == SImode) ? 'w' : 'x');
8005 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8020 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8048 int veryfar = 1, delta;
8050 if (INSN_ADDRESSES_SET_P ())
8052 delta = (INSN_ADDRESSES (INSN_UID (dest))
8053 - INSN_ADDRESSES (INSN_UID (insn)));
8054 /* Leave some instructions for "slop". */
8055 if (delta >= -260000 && delta < 260000)
8060 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8062 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8067 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8069 tmpl = "c%cb%s\t%%1, %%2, %%3";
8072 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8077 /* Return the string to output a conditional branch to LABEL, testing
8078 register REG. LABEL is the operand number of the label; REG is the
8079 operand number of the reg. OP is the conditional expression. The mode
8080 of REG says what kind of comparison we made.
8082 DEST is the destination insn (i.e. the label), INSN is the source.
8084 REVERSED is nonzero if we should reverse the sense of the comparison.
8086 ANNUL is nonzero if we should generate an annulling branch. */
8089 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8090 int annul, rtx insn)
8092 static char string[64];
8093 enum rtx_code code = GET_CODE (op);
8094 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8099 /* branch on register are limited to +-128KB. If it is too far away,
8112 brgez,a,pn %o1, .LC29
8118 ba,pt %xcc, .LC29 */
8120 far = get_attr_length (insn) >= 3;
8122 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8124 code = reverse_condition (code);
8126 /* Only 64 bit versions of these instructions exist. */
8127 gcc_assert (mode == DImode);
8129 /* Start by writing the branch condition. */
8134 strcpy (string, "brnz");
8138 strcpy (string, "brz");
8142 strcpy (string, "brgez");
8146 strcpy (string, "brlz");
8150 strcpy (string, "brlez");
8154 strcpy (string, "brgz");
8161 p = strchr (string, '\0');
8163 /* Now add the annulling, reg, label, and nop. */
8170 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8173 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
8178 *p = p < string + 8 ? '\t' : ' ';
8186 int veryfar = 1, delta;
8188 if (INSN_ADDRESSES_SET_P ())
8190 delta = (INSN_ADDRESSES (INSN_UID (dest))
8191 - INSN_ADDRESSES (INSN_UID (insn)));
8192 /* Leave some instructions for "slop". */
8193 if (delta >= -260000 && delta < 260000)
8197 strcpy (p, ".+12\n\t nop\n\t");
8198 /* Skip the next insn if requested or
8199 if we know that it will be a nop. */
8200 if (annul || ! final_sequence)
8210 strcpy (p, "ba,pt\t%%xcc, ");
8224 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8225 Such instructions cannot be used in the delay slot of return insn on v9.
8226 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8230 epilogue_renumber (register rtx *where, int test)
8232 register const char *fmt;
8234 register enum rtx_code code;
8239 code = GET_CODE (*where);
8244 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8246 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8247 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8255 /* Do not replace the frame pointer with the stack pointer because
8256 it can cause the delayed instruction to load below the stack.
8257 This occurs when instructions like:
8259 (set (reg/i:SI 24 %i0)
8260 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8261 (const_int -20 [0xffffffec])) 0))
8263 are in the return delayed slot. */
8265 if (GET_CODE (XEXP (*where, 0)) == REG
8266 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8267 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8268 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8273 if (SPARC_STACK_BIAS
8274 && GET_CODE (XEXP (*where, 0)) == REG
8275 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8283 fmt = GET_RTX_FORMAT (code);
8285 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8290 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8291 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8294 else if (fmt[i] == 'e'
8295 && epilogue_renumber (&(XEXP (*where, i)), test))
8301 /* Leaf functions and non-leaf functions have different needs. */
8304 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8307 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8309 static const int *const reg_alloc_orders[] = {
8310 reg_leaf_alloc_order,
8311 reg_nonleaf_alloc_order};
8314 order_regs_for_local_alloc (void)
8316 static int last_order_nonleaf = 1;
8318 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8320 last_order_nonleaf = !last_order_nonleaf;
8321 memcpy ((char *) reg_alloc_order,
8322 (const char *) reg_alloc_orders[last_order_nonleaf],
8323 FIRST_PSEUDO_REGISTER * sizeof (int));
8327 /* Return 1 if REG and MEM are legitimate enough to allow the various
8328 mem<-->reg splits to be run. */
8331 sparc_splitdi_legitimate (rtx reg, rtx mem)
8333 /* Punt if we are here by mistake. */
8334 gcc_assert (reload_completed);
8336 /* We must have an offsettable memory reference. */
8337 if (! offsettable_memref_p (mem))
8340 /* If we have legitimate args for ldd/std, we do not want
8341 the split to happen. */
8342 if ((REGNO (reg) % 2) == 0
8343 && mem_min_alignment (mem, 8))
8350 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8353 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8357 if (GET_CODE (reg1) == SUBREG)
8358 reg1 = SUBREG_REG (reg1);
8359 if (GET_CODE (reg1) != REG)
8361 regno1 = REGNO (reg1);
8363 if (GET_CODE (reg2) == SUBREG)
8364 reg2 = SUBREG_REG (reg2);
8365 if (GET_CODE (reg2) != REG)
8367 regno2 = REGNO (reg2);
8369 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8374 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8375 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8382 /* Return 1 if x and y are some kind of REG and they refer to
8383 different hard registers. This test is guaranteed to be
8384 run after reload. */
8387 sparc_absnegfloat_split_legitimate (rtx x, rtx y)
8389 if (GET_CODE (x) != REG)
8391 if (GET_CODE (y) != REG)
8393 if (REGNO (x) == REGNO (y))
8398 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8399 This makes them candidates for using ldd and std insns.
8401 Note reg1 and reg2 *must* be hard registers. */
8404 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8406 /* We might have been passed a SUBREG. */
8407 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8410 if (REGNO (reg1) % 2 != 0)
8413 /* Integer ldd is deprecated in SPARC V9 */
8414 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8417 return (REGNO (reg1) == REGNO (reg2) - 1);
8420 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8423 This can only happen when addr1 and addr2, the addresses in mem1
8424 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8425 addr1 must also be aligned on a 64-bit boundary.
8427 Also iff dependent_reg_rtx is not null it should not be used to
8428 compute the address for mem1, i.e. we cannot optimize a sequence
8440 But, note that the transformation from:
8445 is perfectly fine. Thus, the peephole2 patterns always pass us
8446 the destination register of the first load, never the second one.
8448 For stores we don't have a similar problem, so dependent_reg_rtx is
8452 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8456 HOST_WIDE_INT offset1;
8458 /* The mems cannot be volatile. */
8459 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8462 /* MEM1 should be aligned on a 64-bit boundary. */
8463 if (MEM_ALIGN (mem1) < 64)
8466 addr1 = XEXP (mem1, 0);
8467 addr2 = XEXP (mem2, 0);
8469 /* Extract a register number and offset (if used) from the first addr. */
8470 if (GET_CODE (addr1) == PLUS)
8472 /* If not a REG, return zero. */
8473 if (GET_CODE (XEXP (addr1, 0)) != REG)
8477 reg1 = REGNO (XEXP (addr1, 0));
8478 /* The offset must be constant! */
8479 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8481 offset1 = INTVAL (XEXP (addr1, 1));
8484 else if (GET_CODE (addr1) != REG)
8488 reg1 = REGNO (addr1);
8489 /* This was a simple (mem (reg)) expression. Offset is 0. */
8493 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8494 if (GET_CODE (addr2) != PLUS)
8497 if (GET_CODE (XEXP (addr2, 0)) != REG
8498 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8501 if (reg1 != REGNO (XEXP (addr2, 0)))
8504 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8507 /* The first offset must be evenly divisible by 8 to ensure the
8508 address is 64 bit aligned. */
8509 if (offset1 % 8 != 0)
8512 /* The offset for the second addr must be 4 more than the first addr. */
8513 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8516 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8521 /* Return 1 if reg is a pseudo, or is the first register in
8522 a hard register pair. This makes it suitable for use in
8523 ldd and std insns. */
8526 register_ok_for_ldd (rtx reg)
8528 /* We might have been passed a SUBREG. */
8532 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8533 return (REGNO (reg) % 2 == 0);
8538 /* Return 1 if OP, a MEM, has an address which is known to be
8539 aligned to an 8-byte boundary. */
8542 memory_ok_for_ldd (rtx op)
8544 /* In 64-bit mode, we assume that the address is word-aligned. */
8545 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8548 if (! can_create_pseudo_p ()
8549 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8555 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8558 sparc_print_operand_punct_valid_p (unsigned char code)
8571 /* Implement TARGET_PRINT_OPERAND.
8572 Print operand X (an rtx) in assembler syntax to file FILE.
8573 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8574 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8577 sparc_print_operand (FILE *file, rtx x, int code)
8582 /* Output an insn in a delay slot. */
8584 sparc_indent_opcode = 1;
8586 fputs ("\n\t nop", file);
8589 /* Output an annul flag if there's nothing for the delay slot and we
8590 are optimizing. This is always used with '(' below.
8591 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8592 this is a dbx bug. So, we only do this when optimizing.
8593 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8594 Always emit a nop in case the next instruction is a branch. */
8595 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8599 /* Output a 'nop' if there's nothing for the delay slot and we are
8600 not optimizing. This is always used with '*' above. */
8601 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8602 fputs ("\n\t nop", file);
8603 else if (final_sequence)
8604 sparc_indent_opcode = 1;
8607 /* Output the right displacement from the saved PC on function return.
8608 The caller may have placed an "unimp" insn immediately after the call
8609 so we have to account for it. This insn is used in the 32-bit ABI
8610 when calling a function that returns a non zero-sized structure. The
8611 64-bit ABI doesn't have it. Be careful to have this test be the same
8612 as that for the call. The exception is when sparc_std_struct_return
8613 is enabled, the psABI is followed exactly and the adjustment is made
8614 by the code in sparc_struct_value_rtx. The call emitted is the same
8615 when sparc_std_struct_return is enabled. */
8617 && cfun->returns_struct
8618 && !sparc_std_struct_return
8619 && DECL_SIZE (DECL_RESULT (current_function_decl))
8620 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8622 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8628 /* Output the Embedded Medium/Anywhere code model base register. */
8629 fputs (EMBMEDANY_BASE_REG, file);
8632 /* Print some local dynamic TLS name. */
8633 assemble_name (file, get_some_local_dynamic_name ());
8637 /* Adjust the operand to take into account a RESTORE operation. */
8638 if (GET_CODE (x) == CONST_INT)
8640 else if (GET_CODE (x) != REG)
8641 output_operand_lossage ("invalid %%Y operand");
8642 else if (REGNO (x) < 8)
8643 fputs (reg_names[REGNO (x)], file);
8644 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8645 fputs (reg_names[REGNO (x)-16], file);
8647 output_operand_lossage ("invalid %%Y operand");
8650 /* Print out the low order register name of a register pair. */
8651 if (WORDS_BIG_ENDIAN)
8652 fputs (reg_names[REGNO (x)+1], file);
8654 fputs (reg_names[REGNO (x)], file);
8657 /* Print out the high order register name of a register pair. */
8658 if (WORDS_BIG_ENDIAN)
8659 fputs (reg_names[REGNO (x)], file);
8661 fputs (reg_names[REGNO (x)+1], file);
8664 /* Print out the second register name of a register pair or quad.
8665 I.e., R (%o0) => %o1. */
8666 fputs (reg_names[REGNO (x)+1], file);
8669 /* Print out the third register name of a register quad.
8670 I.e., S (%o0) => %o2. */
8671 fputs (reg_names[REGNO (x)+2], file);
8674 /* Print out the fourth register name of a register quad.
8675 I.e., T (%o0) => %o3. */
8676 fputs (reg_names[REGNO (x)+3], file);
8679 /* Print a condition code register. */
8680 if (REGNO (x) == SPARC_ICC_REG)
8682 /* We don't handle CC[X]_NOOVmode because they're not supposed
8684 if (GET_MODE (x) == CCmode)
8685 fputs ("%icc", file);
8686 else if (GET_MODE (x) == CCXmode)
8687 fputs ("%xcc", file);
8692 /* %fccN register */
8693 fputs (reg_names[REGNO (x)], file);
8696 /* Print the operand's address only. */
8697 output_address (XEXP (x, 0));
8700 /* In this case we need a register. Use %g0 if the
8701 operand is const0_rtx. */
8703 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8705 fputs ("%g0", file);
8712 switch (GET_CODE (x))
8714 case IOR: fputs ("or", file); break;
8715 case AND: fputs ("and", file); break;
8716 case XOR: fputs ("xor", file); break;
8717 default: output_operand_lossage ("invalid %%A operand");
8722 switch (GET_CODE (x))
8724 case IOR: fputs ("orn", file); break;
8725 case AND: fputs ("andn", file); break;
8726 case XOR: fputs ("xnor", file); break;
8727 default: output_operand_lossage ("invalid %%B operand");
8731 /* This is used by the conditional move instructions. */
8734 enum rtx_code rc = GET_CODE (x);
8738 case NE: fputs ("ne", file); break;
8739 case EQ: fputs ("e", file); break;
8740 case GE: fputs ("ge", file); break;
8741 case GT: fputs ("g", file); break;
8742 case LE: fputs ("le", file); break;
8743 case LT: fputs ("l", file); break;
8744 case GEU: fputs ("geu", file); break;
8745 case GTU: fputs ("gu", file); break;
8746 case LEU: fputs ("leu", file); break;
8747 case LTU: fputs ("lu", file); break;
8748 case LTGT: fputs ("lg", file); break;
8749 case UNORDERED: fputs ("u", file); break;
8750 case ORDERED: fputs ("o", file); break;
8751 case UNLT: fputs ("ul", file); break;
8752 case UNLE: fputs ("ule", file); break;
8753 case UNGT: fputs ("ug", file); break;
8754 case UNGE: fputs ("uge", file); break;
8755 case UNEQ: fputs ("ue", file); break;
8756 default: output_operand_lossage ("invalid %%C operand");
8761 /* This are used by the movr instruction pattern. */
8764 enum rtx_code rc = GET_CODE (x);
8767 case NE: fputs ("ne", file); break;
8768 case EQ: fputs ("e", file); break;
8769 case GE: fputs ("gez", file); break;
8770 case LT: fputs ("lz", file); break;
8771 case LE: fputs ("lez", file); break;
8772 case GT: fputs ("gz", file); break;
8773 default: output_operand_lossage ("invalid %%D operand");
8780 /* Print a sign-extended character. */
8781 int i = trunc_int_for_mode (INTVAL (x), QImode);
8782 fprintf (file, "%d", i);
8787 /* Operand must be a MEM; write its address. */
8788 if (GET_CODE (x) != MEM)
8789 output_operand_lossage ("invalid %%f operand");
8790 output_address (XEXP (x, 0));
8795 /* Print a sign-extended 32-bit value. */
8797 if (GET_CODE(x) == CONST_INT)
8799 else if (GET_CODE(x) == CONST_DOUBLE)
8800 i = CONST_DOUBLE_LOW (x);
8803 output_operand_lossage ("invalid %%s operand");
8806 i = trunc_int_for_mode (i, SImode);
8807 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8812 /* Do nothing special. */
8816 /* Undocumented flag. */
8817 output_operand_lossage ("invalid operand output code");
8820 if (GET_CODE (x) == REG)
8821 fputs (reg_names[REGNO (x)], file);
8822 else if (GET_CODE (x) == MEM)
8825 /* Poor Sun assembler doesn't understand absolute addressing. */
8826 if (CONSTANT_P (XEXP (x, 0)))
8827 fputs ("%g0+", file);
8828 output_address (XEXP (x, 0));
8831 else if (GET_CODE (x) == HIGH)
8833 fputs ("%hi(", file);
8834 output_addr_const (file, XEXP (x, 0));
8837 else if (GET_CODE (x) == LO_SUM)
8839 sparc_print_operand (file, XEXP (x, 0), 0);
8840 if (TARGET_CM_MEDMID)
8841 fputs ("+%l44(", file);
8843 fputs ("+%lo(", file);
8844 output_addr_const (file, XEXP (x, 1));
8847 else if (GET_CODE (x) == CONST_DOUBLE
8848 && (GET_MODE (x) == VOIDmode
8849 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
8851 if (CONST_DOUBLE_HIGH (x) == 0)
8852 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
8853 else if (CONST_DOUBLE_HIGH (x) == -1
8854 && CONST_DOUBLE_LOW (x) < 0)
8855 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
8857 output_operand_lossage ("long long constant not a valid immediate operand");
8859 else if (GET_CODE (x) == CONST_DOUBLE)
8860 output_operand_lossage ("floating point constant not a valid immediate operand");
8861 else { output_addr_const (file, x); }
8864 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
8867 sparc_print_operand_address (FILE *file, rtx x)
8869 register rtx base, index = 0;
8871 register rtx addr = x;
8874 fputs (reg_names[REGNO (addr)], file);
8875 else if (GET_CODE (addr) == PLUS)
8877 if (CONST_INT_P (XEXP (addr, 0)))
8878 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
8879 else if (CONST_INT_P (XEXP (addr, 1)))
8880 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
8882 base = XEXP (addr, 0), index = XEXP (addr, 1);
8883 if (GET_CODE (base) == LO_SUM)
8885 gcc_assert (USE_AS_OFFSETABLE_LO10
8887 && ! TARGET_CM_MEDMID);
8888 output_operand (XEXP (base, 0), 0);
8889 fputs ("+%lo(", file);
8890 output_address (XEXP (base, 1));
8891 fprintf (file, ")+%d", offset);
8895 fputs (reg_names[REGNO (base)], file);
8897 fprintf (file, "%+d", offset);
8898 else if (REG_P (index))
8899 fprintf (file, "+%s", reg_names[REGNO (index)]);
8900 else if (GET_CODE (index) == SYMBOL_REF
8901 || GET_CODE (index) == LABEL_REF
8902 || GET_CODE (index) == CONST)
8903 fputc ('+', file), output_addr_const (file, index);
8904 else gcc_unreachable ();
8907 else if (GET_CODE (addr) == MINUS
8908 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
8910 output_addr_const (file, XEXP (addr, 0));
8912 output_addr_const (file, XEXP (addr, 1));
8913 fputs ("-.)", file);
8915 else if (GET_CODE (addr) == LO_SUM)
8917 output_operand (XEXP (addr, 0), 0);
8918 if (TARGET_CM_MEDMID)
8919 fputs ("+%l44(", file);
8921 fputs ("+%lo(", file);
8922 output_address (XEXP (addr, 1));
8926 && GET_CODE (addr) == CONST
8927 && GET_CODE (XEXP (addr, 0)) == MINUS
8928 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
8929 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
8930 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
8932 addr = XEXP (addr, 0);
8933 output_addr_const (file, XEXP (addr, 0));
8934 /* Group the args of the second CONST in parenthesis. */
8936 /* Skip past the second CONST--it does nothing for us. */
8937 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
8938 /* Close the parenthesis. */
8943 output_addr_const (file, addr);
8947 /* Target hook for assembling integer objects. The sparc version has
8948 special handling for aligned DI-mode objects. */
8951 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
8953 /* ??? We only output .xword's for symbols and only then in environments
8954 where the assembler can handle them. */
8955 if (aligned_p && size == 8
8956 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
8960 assemble_integer_with_op ("\t.xword\t", x);
8965 assemble_aligned_integer (4, const0_rtx);
8966 assemble_aligned_integer (4, x);
8970 return default_assemble_integer (x, size, aligned_p);
8973 /* Return the value of a code used in the .proc pseudo-op that says
8974 what kind of result this function returns. For non-C types, we pick
8975 the closest C type. */
8977 #ifndef SHORT_TYPE_SIZE
8978 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
8981 #ifndef INT_TYPE_SIZE
8982 #define INT_TYPE_SIZE BITS_PER_WORD
8985 #ifndef LONG_TYPE_SIZE
8986 #define LONG_TYPE_SIZE BITS_PER_WORD
8989 #ifndef LONG_LONG_TYPE_SIZE
8990 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
8993 #ifndef FLOAT_TYPE_SIZE
8994 #define FLOAT_TYPE_SIZE BITS_PER_WORD
8997 #ifndef DOUBLE_TYPE_SIZE
8998 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9001 #ifndef LONG_DOUBLE_TYPE_SIZE
9002 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9006 sparc_type_code (register tree type)
9008 register unsigned long qualifiers = 0;
9009 register unsigned shift;
9011 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9012 setting more, since some assemblers will give an error for this. Also,
9013 we must be careful to avoid shifts of 32 bits or more to avoid getting
9014 unpredictable results. */
9016 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9018 switch (TREE_CODE (type))
9024 qualifiers |= (3 << shift);
9029 qualifiers |= (2 << shift);
9033 case REFERENCE_TYPE:
9035 qualifiers |= (1 << shift);
9039 return (qualifiers | 8);
9042 case QUAL_UNION_TYPE:
9043 return (qualifiers | 9);
9046 return (qualifiers | 10);
9049 return (qualifiers | 16);
9052 /* If this is a range type, consider it to be the underlying
9054 if (TREE_TYPE (type) != 0)
9057 /* Carefully distinguish all the standard types of C,
9058 without messing up if the language is not C. We do this by
9059 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9060 look at both the names and the above fields, but that's redundant.
9061 Any type whose size is between two C types will be considered
9062 to be the wider of the two types. Also, we do not have a
9063 special code to use for "long long", so anything wider than
9064 long is treated the same. Note that we can't distinguish
9065 between "int" and "long" in this code if they are the same
9066 size, but that's fine, since neither can the assembler. */
9068 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9069 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9071 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9072 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9074 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9075 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9078 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9081 /* If this is a range type, consider it to be the underlying
9083 if (TREE_TYPE (type) != 0)
9086 /* Carefully distinguish all the standard types of C,
9087 without messing up if the language is not C. */
9089 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9090 return (qualifiers | 6);
9093 return (qualifiers | 7);
9095 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9096 /* ??? We need to distinguish between double and float complex types,
9097 but I don't know how yet because I can't reach this code from
9098 existing front-ends. */
9099 return (qualifiers | 7); /* Who knows? */
9102 case BOOLEAN_TYPE: /* Boolean truth value type. */
9108 gcc_unreachable (); /* Not a type! */
9115 /* Nested function support. */
9117 /* Emit RTL insns to initialize the variable parts of a trampoline.
9118 FNADDR is an RTX for the address of the function's pure code.
9119 CXT is an RTX for the static chain value for the function.
9121 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9122 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9123 (to store insns). This is a bit excessive. Perhaps a different
9124 mechanism would be better here.
9126 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9129 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9131 /* SPARC 32-bit trampoline:
9134 sethi %hi(static), %g2
9136 or %g2, %lo(static), %g2
9138 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9139 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9143 (adjust_address (m_tramp, SImode, 0),
9144 expand_binop (SImode, ior_optab,
9145 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9146 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9147 NULL_RTX, 1, OPTAB_DIRECT));
9150 (adjust_address (m_tramp, SImode, 4),
9151 expand_binop (SImode, ior_optab,
9152 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9153 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9154 NULL_RTX, 1, OPTAB_DIRECT));
9157 (adjust_address (m_tramp, SImode, 8),
9158 expand_binop (SImode, ior_optab,
9159 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9160 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9161 NULL_RTX, 1, OPTAB_DIRECT));
9164 (adjust_address (m_tramp, SImode, 12),
9165 expand_binop (SImode, ior_optab,
9166 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9167 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9168 NULL_RTX, 1, OPTAB_DIRECT));
9170 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9171 aligned on a 16 byte boundary so one flush clears it all. */
9172 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0))));
9173 if (sparc_cpu != PROCESSOR_ULTRASPARC
9174 && sparc_cpu != PROCESSOR_ULTRASPARC3
9175 && sparc_cpu != PROCESSOR_NIAGARA
9176 && sparc_cpu != PROCESSOR_NIAGARA2
9177 && sparc_cpu != PROCESSOR_NIAGARA3
9178 && sparc_cpu != PROCESSOR_NIAGARA4)
9179 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8))));
9181 /* Call __enable_execute_stack after writing onto the stack to make sure
9182 the stack address is accessible. */
9183 #ifdef HAVE_ENABLE_EXECUTE_STACK
9184 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9185 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9190 /* The 64-bit version is simpler because it makes more sense to load the
9191 values as "immediate" data out of the trampoline. It's also easier since
9192 we can read the PC without clobbering a register. */
9195 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9197 /* SPARC 64-bit trampoline:
9206 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9207 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9208 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9209 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9210 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9211 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9212 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9213 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9214 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9215 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9216 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9218 if (sparc_cpu != PROCESSOR_ULTRASPARC
9219 && sparc_cpu != PROCESSOR_ULTRASPARC3
9220 && sparc_cpu != PROCESSOR_NIAGARA
9221 && sparc_cpu != PROCESSOR_NIAGARA2
9222 && sparc_cpu != PROCESSOR_NIAGARA3
9223 && sparc_cpu != PROCESSOR_NIAGARA4)
9224 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9226 /* Call __enable_execute_stack after writing onto the stack to make sure
9227 the stack address is accessible. */
9228 #ifdef HAVE_ENABLE_EXECUTE_STACK
9229 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9230 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9234 /* Worker for TARGET_TRAMPOLINE_INIT. */
9237 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9239 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9240 cxt = force_reg (Pmode, cxt);
9242 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9244 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9247 /* Adjust the cost of a scheduling dependency. Return the new cost of
9248 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9251 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9253 enum attr_type insn_type;
9255 if (! recog_memoized (insn))
9258 insn_type = get_attr_type (insn);
9260 if (REG_NOTE_KIND (link) == 0)
9262 /* Data dependency; DEP_INSN writes a register that INSN reads some
9265 /* if a load, then the dependence must be on the memory address;
9266 add an extra "cycle". Note that the cost could be two cycles
9267 if the reg was written late in an instruction group; we ca not tell
9269 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9272 /* Get the delay only if the address of the store is the dependence. */
9273 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9275 rtx pat = PATTERN(insn);
9276 rtx dep_pat = PATTERN (dep_insn);
9278 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9279 return cost; /* This should not happen! */
9281 /* The dependency between the two instructions was on the data that
9282 is being stored. Assume that this implies that the address of the
9283 store is not dependent. */
9284 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9287 return cost + 3; /* An approximation. */
9290 /* A shift instruction cannot receive its data from an instruction
9291 in the same cycle; add a one cycle penalty. */
9292 if (insn_type == TYPE_SHIFT)
9293 return cost + 3; /* Split before cascade into shift. */
9297 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9298 INSN writes some cycles later. */
9300 /* These are only significant for the fpu unit; writing a fp reg before
9301 the fpu has finished with it stalls the processor. */
9303 /* Reusing an integer register causes no problems. */
9304 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9312 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9314 enum attr_type insn_type, dep_type;
9315 rtx pat = PATTERN(insn);
9316 rtx dep_pat = PATTERN (dep_insn);
9318 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9321 insn_type = get_attr_type (insn);
9322 dep_type = get_attr_type (dep_insn);
9324 switch (REG_NOTE_KIND (link))
9327 /* Data dependency; DEP_INSN writes a register that INSN reads some
9334 /* Get the delay iff the address of the store is the dependence. */
9335 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9338 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9345 /* If a load, then the dependence must be on the memory address. If
9346 the addresses aren't equal, then it might be a false dependency */
9347 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9349 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9350 || GET_CODE (SET_DEST (dep_pat)) != MEM
9351 || GET_CODE (SET_SRC (pat)) != MEM
9352 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9353 XEXP (SET_SRC (pat), 0)))
9361 /* Compare to branch latency is 0. There is no benefit from
9362 separating compare and branch. */
9363 if (dep_type == TYPE_COMPARE)
9365 /* Floating point compare to branch latency is less than
9366 compare to conditional move. */
9367 if (dep_type == TYPE_FPCMP)
9376 /* Anti-dependencies only penalize the fpu unit. */
9377 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9389 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
9393 case PROCESSOR_SUPERSPARC:
9394 cost = supersparc_adjust_cost (insn, link, dep, cost);
9396 case PROCESSOR_HYPERSPARC:
9397 case PROCESSOR_SPARCLITE86X:
9398 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9407 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9408 int sched_verbose ATTRIBUTE_UNUSED,
9409 int max_ready ATTRIBUTE_UNUSED)
9413 sparc_use_sched_lookahead (void)
9415 if (sparc_cpu == PROCESSOR_NIAGARA
9416 || sparc_cpu == PROCESSOR_NIAGARA2
9417 || sparc_cpu == PROCESSOR_NIAGARA3)
9419 if (sparc_cpu == PROCESSOR_NIAGARA4)
9421 if (sparc_cpu == PROCESSOR_ULTRASPARC
9422 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9424 if ((1 << sparc_cpu) &
9425 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9426 (1 << PROCESSOR_SPARCLITE86X)))
9432 sparc_issue_rate (void)
9436 case PROCESSOR_NIAGARA:
9437 case PROCESSOR_NIAGARA2:
9438 case PROCESSOR_NIAGARA3:
9441 case PROCESSOR_NIAGARA4:
9443 /* Assume V9 processors are capable of at least dual-issue. */
9445 case PROCESSOR_SUPERSPARC:
9447 case PROCESSOR_HYPERSPARC:
9448 case PROCESSOR_SPARCLITE86X:
9450 case PROCESSOR_ULTRASPARC:
9451 case PROCESSOR_ULTRASPARC3:
9457 set_extends (rtx insn)
9459 register rtx pat = PATTERN (insn);
9461 switch (GET_CODE (SET_SRC (pat)))
9463 /* Load and some shift instructions zero extend. */
9466 /* sethi clears the high bits */
9468 /* LO_SUM is used with sethi. sethi cleared the high
9469 bits and the values used with lo_sum are positive */
9471 /* Store flag stores 0 or 1 */
9481 rtx op0 = XEXP (SET_SRC (pat), 0);
9482 rtx op1 = XEXP (SET_SRC (pat), 1);
9483 if (GET_CODE (op1) == CONST_INT)
9484 return INTVAL (op1) >= 0;
9485 if (GET_CODE (op0) != REG)
9487 if (sparc_check_64 (op0, insn) == 1)
9489 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9494 rtx op0 = XEXP (SET_SRC (pat), 0);
9495 rtx op1 = XEXP (SET_SRC (pat), 1);
9496 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9498 if (GET_CODE (op1) == CONST_INT)
9499 return INTVAL (op1) >= 0;
9500 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9503 return GET_MODE (SET_SRC (pat)) == SImode;
9504 /* Positive integers leave the high bits zero. */
9506 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
9508 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
9511 return - (GET_MODE (SET_SRC (pat)) == SImode);
9513 return sparc_check_64 (SET_SRC (pat), insn);
9519 /* We _ought_ to have only one kind per function, but... */
9520 static GTY(()) rtx sparc_addr_diff_list;
9521 static GTY(()) rtx sparc_addr_list;
9524 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9526 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9528 sparc_addr_diff_list
9529 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9531 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9535 sparc_output_addr_vec (rtx vec)
9537 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9538 int idx, vlen = XVECLEN (body, 0);
9540 #ifdef ASM_OUTPUT_ADDR_VEC_START
9541 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9544 #ifdef ASM_OUTPUT_CASE_LABEL
9545 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9548 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9551 for (idx = 0; idx < vlen; idx++)
9553 ASM_OUTPUT_ADDR_VEC_ELT
9554 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9557 #ifdef ASM_OUTPUT_ADDR_VEC_END
9558 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9563 sparc_output_addr_diff_vec (rtx vec)
9565 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9566 rtx base = XEXP (XEXP (body, 0), 0);
9567 int idx, vlen = XVECLEN (body, 1);
9569 #ifdef ASM_OUTPUT_ADDR_VEC_START
9570 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9573 #ifdef ASM_OUTPUT_CASE_LABEL
9574 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9577 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9580 for (idx = 0; idx < vlen; idx++)
9582 ASM_OUTPUT_ADDR_DIFF_ELT
9585 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9586 CODE_LABEL_NUMBER (base));
9589 #ifdef ASM_OUTPUT_ADDR_VEC_END
9590 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9595 sparc_output_deferred_case_vectors (void)
9600 if (sparc_addr_list == NULL_RTX
9601 && sparc_addr_diff_list == NULL_RTX)
9604 /* Align to cache line in the function's code section. */
9605 switch_to_section (current_function_section ());
9607 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9609 ASM_OUTPUT_ALIGN (asm_out_file, align);
9611 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9612 sparc_output_addr_vec (XEXP (t, 0));
9613 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9614 sparc_output_addr_diff_vec (XEXP (t, 0));
9616 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9619 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9620 unknown. Return 1 if the high bits are zero, -1 if the register is
9623 sparc_check_64 (rtx x, rtx insn)
9625 /* If a register is set only once it is safe to ignore insns this
9626 code does not know how to handle. The loop will either recognize
9627 the single set and return the correct value or fail to recognize
9632 gcc_assert (GET_CODE (x) == REG);
9634 if (GET_MODE (x) == DImode)
9635 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9637 if (flag_expensive_optimizations
9638 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9644 insn = get_last_insn_anywhere ();
9649 while ((insn = PREV_INSN (insn)))
9651 switch (GET_CODE (insn))
9664 rtx pat = PATTERN (insn);
9665 if (GET_CODE (pat) != SET)
9667 if (rtx_equal_p (x, SET_DEST (pat)))
9668 return set_extends (insn);
9669 if (y && rtx_equal_p (y, SET_DEST (pat)))
9670 return set_extends (insn);
9671 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9679 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9680 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9683 output_v8plus_shift (rtx insn, rtx *operands, const char *opcode)
9685 static char asm_code[60];
9687 /* The scratch register is only required when the destination
9688 register is not a 64-bit global or out register. */
9689 if (which_alternative != 2)
9690 operands[3] = operands[0];
9692 /* We can only shift by constants <= 63. */
9693 if (GET_CODE (operands[2]) == CONST_INT)
9694 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9696 if (GET_CODE (operands[1]) == CONST_INT)
9698 output_asm_insn ("mov\t%1, %3", operands);
9702 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9703 if (sparc_check_64 (operands[1], insn) <= 0)
9704 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9705 output_asm_insn ("or\t%L1, %3, %3", operands);
9708 strcpy (asm_code, opcode);
9710 if (which_alternative != 2)
9711 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9714 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9717 /* Output rtl to increment the profiler label LABELNO
9718 for profiling a function entry. */
9721 sparc_profile_hook (int labelno)
9726 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9727 if (NO_PROFILE_COUNTERS)
9729 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9733 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9734 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9735 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9739 #ifdef TARGET_SOLARIS
9740 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9743 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9744 tree decl ATTRIBUTE_UNUSED)
9746 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9748 solaris_elf_asm_comdat_section (name, flags, decl);
9752 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9754 if (!(flags & SECTION_DEBUG))
9755 fputs (",#alloc", asm_out_file);
9756 if (flags & SECTION_WRITE)
9757 fputs (",#write", asm_out_file);
9758 if (flags & SECTION_TLS)
9759 fputs (",#tls", asm_out_file);
9760 if (flags & SECTION_CODE)
9761 fputs (",#execinstr", asm_out_file);
9763 /* Sun as only supports #nobits/#progbits since Solaris 10. */
9764 if (HAVE_AS_SPARC_NOBITS)
9766 if (flags & SECTION_BSS)
9767 fputs (",#nobits", asm_out_file);
9769 fputs (",#progbits", asm_out_file);
9772 fputc ('\n', asm_out_file);
9774 #endif /* TARGET_SOLARIS */
9776 /* We do not allow indirect calls to be optimized into sibling calls.
9778 We cannot use sibling calls when delayed branches are disabled
9779 because they will likely require the call delay slot to be filled.
9781 Also, on SPARC 32-bit we cannot emit a sibling call when the
9782 current function returns a structure. This is because the "unimp
9783 after call" convention would cause the callee to return to the
9784 wrong place. The generic code already disallows cases where the
9785 function being called returns a structure.
9787 It may seem strange how this last case could occur. Usually there
9788 is code after the call which jumps to epilogue code which dumps the
9789 return value into the struct return area. That ought to invalidate
9790 the sibling call right? Well, in the C++ case we can end up passing
9791 the pointer to the struct return area to a constructor (which returns
9792 void) and then nothing else happens. Such a sibling call would look
9793 valid without the added check here.
9795 VxWorks PIC PLT entries require the global pointer to be initialized
9796 on entry. We therefore can't emit sibling calls to them. */
9798 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9801 && flag_delayed_branch
9802 && (TARGET_ARCH64 || ! cfun->returns_struct)
9803 && !(TARGET_VXWORKS_RTP
9805 && !targetm.binds_local_p (decl)));
9808 /* libfunc renaming. */
9811 sparc_init_libfuncs (void)
9815 /* Use the subroutines that Sun's library provides for integer
9816 multiply and divide. The `*' prevents an underscore from
9817 being prepended by the compiler. .umul is a little faster
9819 set_optab_libfunc (smul_optab, SImode, "*.umul");
9820 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9821 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9822 set_optab_libfunc (smod_optab, SImode, "*.rem");
9823 set_optab_libfunc (umod_optab, SImode, "*.urem");
9825 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9826 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9827 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9828 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9829 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9830 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9832 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9833 is because with soft-float, the SFmode and DFmode sqrt
9834 instructions will be absent, and the compiler will notice and
9835 try to use the TFmode sqrt instruction for calls to the
9836 builtin function sqrt, but this fails. */
9838 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9840 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9841 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9842 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
9843 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
9844 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
9845 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
9847 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
9848 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
9849 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
9850 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
9852 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
9853 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
9854 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
9855 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
9857 if (DITF_CONVERSION_LIBFUNCS)
9859 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
9860 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
9861 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
9862 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
9865 if (SUN_CONVERSION_LIBFUNCS)
9867 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
9868 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
9869 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
9870 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
9875 /* In the SPARC 64bit ABI, SImode multiply and divide functions
9876 do not exist in the library. Make sure the compiler does not
9877 emit calls to them by accident. (It should always use the
9878 hardware instructions.) */
9879 set_optab_libfunc (smul_optab, SImode, 0);
9880 set_optab_libfunc (sdiv_optab, SImode, 0);
9881 set_optab_libfunc (udiv_optab, SImode, 0);
9882 set_optab_libfunc (smod_optab, SImode, 0);
9883 set_optab_libfunc (umod_optab, SImode, 0);
9885 if (SUN_INTEGER_MULTIPLY_64)
9887 set_optab_libfunc (smul_optab, DImode, "__mul64");
9888 set_optab_libfunc (sdiv_optab, DImode, "__div64");
9889 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
9890 set_optab_libfunc (smod_optab, DImode, "__rem64");
9891 set_optab_libfunc (umod_optab, DImode, "__urem64");
9894 if (SUN_CONVERSION_LIBFUNCS)
9896 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
9897 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
9898 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
9899 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
9904 static tree def_builtin(const char *name, int code, tree type)
9906 return add_builtin_function(name, type, code, BUILT_IN_MD, NULL,
9910 static tree def_builtin_const(const char *name, int code, tree type)
9912 tree t = def_builtin(name, code, type);
9915 TREE_READONLY (t) = 1;
9920 /* Implement the TARGET_INIT_BUILTINS target hook.
9921 Create builtin functions for special SPARC instructions. */
9924 sparc_init_builtins (void)
9927 sparc_vis_init_builtins ();
9930 /* Create builtin functions for VIS 1.0 instructions. */
9933 sparc_vis_init_builtins (void)
9935 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
9936 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
9937 tree v4hi = build_vector_type (intHI_type_node, 4);
9938 tree v2hi = build_vector_type (intHI_type_node, 2);
9939 tree v2si = build_vector_type (intSI_type_node, 2);
9940 tree v1si = build_vector_type (intSI_type_node, 1);
9942 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
9943 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
9944 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
9945 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
9946 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
9947 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
9948 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
9949 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
9950 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
9951 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
9952 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
9953 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
9954 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
9955 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
9956 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
9958 intDI_type_node, 0);
9959 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
9961 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
9963 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
9965 intDI_type_node, 0);
9966 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
9968 intSI_type_node, 0);
9969 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
9971 intSI_type_node, 0);
9972 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
9974 intDI_type_node, 0);
9975 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
9978 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
9981 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
9983 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
9985 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
9987 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
9989 tree void_ftype_di = build_function_type_list (void_type_node,
9990 intDI_type_node, 0);
9991 tree di_ftype_void = build_function_type_list (intDI_type_node,
9993 tree void_ftype_si = build_function_type_list (void_type_node,
9994 intSI_type_node, 0);
9995 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
9997 float_type_node, 0);
9998 tree df_ftype_df_df = build_function_type_list (double_type_node,
10000 double_type_node, 0);
10002 /* Packing and expanding vectors. */
10003 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10005 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10006 v8qi_ftype_v2si_v8qi);
10007 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10009 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10011 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10012 v8qi_ftype_v4qi_v4qi);
10014 /* Multiplications. */
10015 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10016 v4hi_ftype_v4qi_v4hi);
10017 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10018 v4hi_ftype_v4qi_v2hi);
10019 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10020 v4hi_ftype_v4qi_v2hi);
10021 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10022 v4hi_ftype_v8qi_v4hi);
10023 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10024 v4hi_ftype_v8qi_v4hi);
10025 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10026 v2si_ftype_v4qi_v2hi);
10027 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10028 v2si_ftype_v4qi_v2hi);
10030 /* Data aligning. */
10031 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10032 v4hi_ftype_v4hi_v4hi);
10033 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10034 v8qi_ftype_v8qi_v8qi);
10035 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10036 v2si_ftype_v2si_v2si);
10037 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10040 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10042 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10047 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10049 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10054 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10056 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10060 /* Pixel distance. */
10061 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10062 di_ftype_v8qi_v8qi_di);
10064 /* Edge handling. */
10067 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10069 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10071 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10073 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10075 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10077 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10081 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10083 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10085 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10087 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10089 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10091 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10097 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10099 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10101 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10103 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10105 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10107 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10111 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10113 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10115 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10117 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10119 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10121 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10126 /* Pixel compare. */
10129 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10130 di_ftype_v4hi_v4hi);
10131 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10132 di_ftype_v2si_v2si);
10133 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10134 di_ftype_v4hi_v4hi);
10135 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10136 di_ftype_v2si_v2si);
10137 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10138 di_ftype_v4hi_v4hi);
10139 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10140 di_ftype_v2si_v2si);
10141 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10142 di_ftype_v4hi_v4hi);
10143 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10144 di_ftype_v2si_v2si);
10148 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10149 si_ftype_v4hi_v4hi);
10150 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10151 si_ftype_v2si_v2si);
10152 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10153 si_ftype_v4hi_v4hi);
10154 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10155 si_ftype_v2si_v2si);
10156 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10157 si_ftype_v4hi_v4hi);
10158 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10159 si_ftype_v2si_v2si);
10160 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10161 si_ftype_v4hi_v4hi);
10162 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10163 si_ftype_v2si_v2si);
10166 /* Addition and subtraction. */
10167 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10168 v4hi_ftype_v4hi_v4hi);
10169 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10170 v2hi_ftype_v2hi_v2hi);
10171 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10172 v2si_ftype_v2si_v2si);
10173 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10174 v1si_ftype_v1si_v1si);
10175 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10176 v4hi_ftype_v4hi_v4hi);
10177 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10178 v2hi_ftype_v2hi_v2hi);
10179 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10180 v2si_ftype_v2si_v2si);
10181 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10182 v1si_ftype_v1si_v1si);
10184 /* Three-dimensional array addressing. */
10187 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10189 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10191 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10196 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10198 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10200 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10206 /* Byte mask and shuffle */
10208 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10211 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10213 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10214 v4hi_ftype_v4hi_v4hi);
10215 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10216 v8qi_ftype_v8qi_v8qi);
10217 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10218 v2si_ftype_v2si_v2si);
10219 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10227 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10229 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10231 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10236 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10238 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10240 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10244 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10245 v4hi_ftype_v4hi_v4hi);
10247 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10248 v4hi_ftype_v4hi_v4hi);
10249 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10250 v4hi_ftype_v4hi_v4hi);
10251 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10252 v4hi_ftype_v4hi_v4hi);
10253 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10254 v4hi_ftype_v4hi_v4hi);
10255 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10256 v2si_ftype_v2si_v2si);
10257 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10258 v2si_ftype_v2si_v2si);
10259 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10260 v2si_ftype_v2si_v2si);
10261 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10262 v2si_ftype_v2si_v2si);
10265 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10266 di_ftype_v8qi_v8qi);
10268 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10269 si_ftype_v8qi_v8qi);
10271 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10272 v4hi_ftype_v4hi_v4hi);
10273 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10275 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10278 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10279 v4hi_ftype_v4hi_v4hi);
10280 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10281 v2hi_ftype_v2hi_v2hi);
10282 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10283 v4hi_ftype_v4hi_v4hi);
10284 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10285 v2hi_ftype_v2hi_v2hi);
10286 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10287 v2si_ftype_v2si_v2si);
10288 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10289 v1si_ftype_v1si_v1si);
10290 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10291 v2si_ftype_v2si_v2si);
10292 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10293 v1si_ftype_v1si_v1si);
10297 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10298 di_ftype_v8qi_v8qi);
10299 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10300 di_ftype_v8qi_v8qi);
10301 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10302 di_ftype_v8qi_v8qi);
10303 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10304 di_ftype_v8qi_v8qi);
10308 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10309 si_ftype_v8qi_v8qi);
10310 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10311 si_ftype_v8qi_v8qi);
10312 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10313 si_ftype_v8qi_v8qi);
10314 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10315 si_ftype_v8qi_v8qi);
10318 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10320 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10322 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10324 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10326 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10328 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10331 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10333 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10335 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10340 /* Handle TARGET_EXPAND_BUILTIN target hook.
10341 Expand builtin functions for sparc intrinsics. */
10344 sparc_expand_builtin (tree exp, rtx target,
10345 rtx subtarget ATTRIBUTE_UNUSED,
10346 enum machine_mode tmode ATTRIBUTE_UNUSED,
10347 int ignore ATTRIBUTE_UNUSED)
10350 call_expr_arg_iterator iter;
10351 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10352 unsigned int icode = DECL_FUNCTION_CODE (fndecl);
10357 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10361 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10363 || GET_MODE (target) != tmode
10364 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10365 op[0] = gen_reg_rtx (tmode);
10369 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10371 const struct insn_operand_data *insn_op;
10374 if (arg == error_mark_node)
10378 idx = arg_count - !nonvoid;
10379 insn_op = &insn_data[icode].operand[idx];
10380 op[arg_count] = expand_normal (arg);
10382 if (insn_op->mode == V1DImode
10383 && GET_MODE (op[arg_count]) == DImode)
10384 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10385 else if (insn_op->mode == V1SImode
10386 && GET_MODE (op[arg_count]) == SImode)
10387 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10389 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10391 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10397 pat = GEN_FCN (icode) (op[0]);
10401 pat = GEN_FCN (icode) (op[0], op[1]);
10403 pat = GEN_FCN (icode) (op[1]);
10406 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10409 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10412 gcc_unreachable ();
10427 sparc_vis_mul8x16 (int e8, int e16)
10429 return (e8 * e16 + 128) / 256;
10432 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10433 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10436 sparc_handle_vis_mul8x16 (tree *n_elts, int fncode, tree inner_type,
10437 tree cst0, tree cst1)
10439 unsigned i, num = VECTOR_CST_NELTS (cst0);
10444 case CODE_FOR_fmul8x16_vis:
10445 for (i = 0; i < num; ++i)
10448 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10449 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10450 n_elts[i] = build_int_cst (inner_type, val);
10454 case CODE_FOR_fmul8x16au_vis:
10455 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10457 for (i = 0; i < num; ++i)
10460 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10462 n_elts[i] = build_int_cst (inner_type, val);
10466 case CODE_FOR_fmul8x16al_vis:
10467 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10469 for (i = 0; i < num; ++i)
10472 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10474 n_elts[i] = build_int_cst (inner_type, val);
10479 gcc_unreachable ();
10483 /* Handle TARGET_FOLD_BUILTIN target hook.
10484 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10485 result of the function call is ignored. NULL_TREE is returned if the
10486 function could not be folded. */
10489 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10490 tree *args, bool ignore)
10492 tree arg0, arg1, arg2;
10493 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10494 enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
10498 /* Note that a switch statement instead of the sequence of tests would
10499 be incorrect as many of the CODE_FOR values could be CODE_FOR_nothing
10500 and that would yield multiple alternatives with identical values. */
10501 if (icode == CODE_FOR_alignaddrsi_vis
10502 || icode == CODE_FOR_alignaddrdi_vis
10503 || icode == CODE_FOR_wrgsr_vis
10504 || icode == CODE_FOR_bmasksi_vis
10505 || icode == CODE_FOR_bmaskdi_vis
10506 || icode == CODE_FOR_cmask8si_vis
10507 || icode == CODE_FOR_cmask8di_vis
10508 || icode == CODE_FOR_cmask16si_vis
10509 || icode == CODE_FOR_cmask16di_vis
10510 || icode == CODE_FOR_cmask32si_vis
10511 || icode == CODE_FOR_cmask32di_vis)
10514 return build_zero_cst (rtype);
10519 case CODE_FOR_fexpand_vis:
10523 if (TREE_CODE (arg0) == VECTOR_CST)
10525 tree inner_type = TREE_TYPE (rtype);
10529 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10530 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10531 n_elts[i] = build_int_cst (inner_type,
10533 (VECTOR_CST_ELT (arg0, i)) << 4);
10534 return build_vector (rtype, n_elts);
10538 case CODE_FOR_fmul8x16_vis:
10539 case CODE_FOR_fmul8x16au_vis:
10540 case CODE_FOR_fmul8x16al_vis:
10546 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10548 tree inner_type = TREE_TYPE (rtype);
10549 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10550 sparc_handle_vis_mul8x16 (n_elts, icode, inner_type, arg0, arg1);
10551 return build_vector (rtype, n_elts);
10555 case CODE_FOR_fpmerge_vis:
10561 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10563 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10565 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10567 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10568 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10571 return build_vector (rtype, n_elts);
10575 case CODE_FOR_pdist_vis:
10583 if (TREE_CODE (arg0) == VECTOR_CST
10584 && TREE_CODE (arg1) == VECTOR_CST
10585 && TREE_CODE (arg2) == INTEGER_CST)
10587 bool overflow = false;
10588 double_int result = TREE_INT_CST (arg2);
10592 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10594 double_int e0 = TREE_INT_CST (VECTOR_CST_ELT (arg0, i));
10595 double_int e1 = TREE_INT_CST (VECTOR_CST_ELT (arg1, i));
10597 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10599 tmp = e1.neg_with_overflow (&neg1_ovf);
10600 tmp = e0.add_with_sign (tmp, false, &add1_ovf);
10601 if (tmp.is_negative ())
10602 tmp = tmp.neg_with_overflow (&neg2_ovf);
10604 result = result.add_with_sign (tmp, false, &add2_ovf);
10605 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10608 gcc_assert (!overflow);
10610 return build_int_cst_wide (rtype, result.low, result.high);
10620 /* ??? This duplicates information provided to the compiler by the
10621 ??? scheduler description. Some day, teach genautomata to output
10622 ??? the latencies and then CSE will just use that. */
10625 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10626 int *total, bool speed ATTRIBUTE_UNUSED)
10628 enum machine_mode mode = GET_MODE (x);
10629 bool float_mode_p = FLOAT_MODE_P (mode);
10634 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
10652 if (GET_MODE (x) == VOIDmode
10653 && ((CONST_DOUBLE_HIGH (x) == 0
10654 && CONST_DOUBLE_LOW (x) < 0x1000)
10655 || (CONST_DOUBLE_HIGH (x) == -1
10656 && CONST_DOUBLE_LOW (x) < 0
10657 && CONST_DOUBLE_LOW (x) >= -0x1000)))
10664 /* If outer-code was a sign or zero extension, a cost
10665 of COSTS_N_INSNS (1) was already added in. This is
10666 why we are subtracting it back out. */
10667 if (outer_code == ZERO_EXTEND)
10669 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
10671 else if (outer_code == SIGN_EXTEND)
10673 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
10675 else if (float_mode_p)
10677 *total = sparc_costs->float_load;
10681 *total = sparc_costs->int_load;
10689 *total = sparc_costs->float_plusminus;
10691 *total = COSTS_N_INSNS (1);
10698 gcc_assert (float_mode_p);
10699 *total = sparc_costs->float_mul;
10702 if (GET_CODE (sub) == NEG)
10703 sub = XEXP (sub, 0);
10704 *total += rtx_cost (sub, FMA, 0, speed);
10707 if (GET_CODE (sub) == NEG)
10708 sub = XEXP (sub, 0);
10709 *total += rtx_cost (sub, FMA, 2, speed);
10715 *total = sparc_costs->float_mul;
10716 else if (! TARGET_HARD_MUL)
10717 *total = COSTS_N_INSNS (25);
10723 if (sparc_costs->int_mul_bit_factor)
10727 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
10729 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
10730 for (nbits = 0; value != 0; value &= value - 1)
10733 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
10734 && GET_MODE (XEXP (x, 1)) == VOIDmode)
10736 rtx x1 = XEXP (x, 1);
10737 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
10738 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
10740 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
10742 for (; value2 != 0; value2 &= value2 - 1)
10750 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
10751 bit_cost = COSTS_N_INSNS (bit_cost);
10754 if (mode == DImode)
10755 *total = sparc_costs->int_mulX + bit_cost;
10757 *total = sparc_costs->int_mul + bit_cost;
10764 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
10773 if (mode == DFmode)
10774 *total = sparc_costs->float_div_df;
10776 *total = sparc_costs->float_div_sf;
10780 if (mode == DImode)
10781 *total = sparc_costs->int_divX;
10783 *total = sparc_costs->int_div;
10788 if (! float_mode_p)
10790 *total = COSTS_N_INSNS (1);
10797 case UNSIGNED_FLOAT:
10801 case FLOAT_TRUNCATE:
10802 *total = sparc_costs->float_move;
10806 if (mode == DFmode)
10807 *total = sparc_costs->float_sqrt_df;
10809 *total = sparc_costs->float_sqrt_sf;
10814 *total = sparc_costs->float_cmp;
10816 *total = COSTS_N_INSNS (1);
10821 *total = sparc_costs->float_cmove;
10823 *total = sparc_costs->int_cmove;
10827 /* Handle the NAND vector patterns. */
10828 if (sparc_vector_mode_supported_p (GET_MODE (x))
10829 && GET_CODE (XEXP (x, 0)) == NOT
10830 && GET_CODE (XEXP (x, 1)) == NOT)
10832 *total = COSTS_N_INSNS (1);
10843 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
10846 general_or_i64_p (reg_class_t rclass)
10848 return (rclass == GENERAL_REGS || rclass == I64_REGS);
10851 /* Implement TARGET_REGISTER_MOVE_COST. */
10854 sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
10855 reg_class_t from, reg_class_t to)
10857 bool need_memory = false;
10859 if (from == FPCC_REGS || to == FPCC_REGS)
10860 need_memory = true;
10861 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
10862 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
10866 int size = GET_MODE_SIZE (mode);
10867 if (size == 8 || size == 4)
10869 if (! TARGET_ARCH32 || size == 4)
10875 need_memory = true;
10880 if (sparc_cpu == PROCESSOR_ULTRASPARC
10881 || sparc_cpu == PROCESSOR_ULTRASPARC3
10882 || sparc_cpu == PROCESSOR_NIAGARA
10883 || sparc_cpu == PROCESSOR_NIAGARA2
10884 || sparc_cpu == PROCESSOR_NIAGARA3
10885 || sparc_cpu == PROCESSOR_NIAGARA4)
10894 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
10895 This is achieved by means of a manual dynamic stack space allocation in
10896 the current frame. We make the assumption that SEQ doesn't contain any
10897 function calls, with the possible exception of calls to the GOT helper. */
10900 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
10902 /* We must preserve the lowest 16 words for the register save area. */
10903 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
10904 /* We really need only 2 words of fresh stack space. */
10905 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
10908 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
10909 SPARC_STACK_BIAS + offset));
10911 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
10912 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
10914 emit_insn (gen_rtx_SET (VOIDmode,
10915 adjust_address (slot, word_mode, UNITS_PER_WORD),
10919 emit_insn (gen_rtx_SET (VOIDmode,
10921 adjust_address (slot, word_mode, UNITS_PER_WORD)));
10922 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
10923 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
10926 /* Output the assembler code for a thunk function. THUNK_DECL is the
10927 declaration for the thunk function itself, FUNCTION is the decl for
10928 the target function. DELTA is an immediate constant offset to be
10929 added to THIS. If VCALL_OFFSET is nonzero, the word at address
10930 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
10933 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10934 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10937 rtx this_rtx, insn, funexp;
10938 unsigned int int_arg_first;
10940 reload_completed = 1;
10941 epilogue_completed = 1;
10943 emit_note (NOTE_INSN_PROLOGUE_END);
10947 sparc_leaf_function_p = 1;
10949 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
10951 else if (flag_delayed_branch)
10953 /* We will emit a regular sibcall below, so we need to instruct
10954 output_sibcall that we are in a leaf function. */
10955 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
10957 /* This will cause final.c to invoke leaf_renumber_regs so we
10958 must behave as if we were in a not-yet-leafified function. */
10959 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
10963 /* We will emit the sibcall manually below, so we will need to
10964 manually spill non-leaf registers. */
10965 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
10967 /* We really are in a leaf function. */
10968 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
10971 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
10972 returns a structure, the structure return pointer is there instead. */
10974 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10975 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
10977 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
10979 /* Add DELTA. When possible use a plain add, otherwise load it into
10980 a register first. */
10983 rtx delta_rtx = GEN_INT (delta);
10985 if (! SPARC_SIMM13_P (delta))
10987 rtx scratch = gen_rtx_REG (Pmode, 1);
10988 emit_move_insn (scratch, delta_rtx);
10989 delta_rtx = scratch;
10992 /* THIS_RTX += DELTA. */
10993 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
10996 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
10999 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11000 rtx scratch = gen_rtx_REG (Pmode, 1);
11002 gcc_assert (vcall_offset < 0);
11004 /* SCRATCH = *THIS_RTX. */
11005 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11007 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11008 may not have any available scratch register at this point. */
11009 if (SPARC_SIMM13_P (vcall_offset))
11011 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11012 else if (! fixed_regs[5]
11013 /* The below sequence is made up of at least 2 insns,
11014 while the default method may need only one. */
11015 && vcall_offset < -8192)
11017 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11018 emit_move_insn (scratch2, vcall_offset_rtx);
11019 vcall_offset_rtx = scratch2;
11023 rtx increment = GEN_INT (-4096);
11025 /* VCALL_OFFSET is a negative number whose typical range can be
11026 estimated as -32768..0 in 32-bit mode. In almost all cases
11027 it is therefore cheaper to emit multiple add insns than
11028 spilling and loading the constant into a register (at least
11030 while (! SPARC_SIMM13_P (vcall_offset))
11032 emit_insn (gen_add2_insn (scratch, increment));
11033 vcall_offset += 4096;
11035 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11038 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11039 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11040 gen_rtx_PLUS (Pmode,
11042 vcall_offset_rtx)));
11044 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11045 emit_insn (gen_add2_insn (this_rtx, scratch));
11048 /* Generate a tail call to the target function. */
11049 if (! TREE_USED (function))
11051 assemble_external (function);
11052 TREE_USED (function) = 1;
11054 funexp = XEXP (DECL_RTL (function), 0);
11056 if (flag_delayed_branch)
11058 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11059 insn = emit_call_insn (gen_sibcall (funexp));
11060 SIBLING_CALL_P (insn) = 1;
11064 /* The hoops we have to jump through in order to generate a sibcall
11065 without using delay slots... */
11066 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11070 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11072 load_got_register (); /* clobbers %o7 */
11073 scratch = sparc_legitimize_pic_address (funexp, scratch);
11074 seq = get_insns ();
11076 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11078 else if (TARGET_ARCH32)
11080 emit_insn (gen_rtx_SET (VOIDmode,
11082 gen_rtx_HIGH (SImode, funexp)));
11083 emit_insn (gen_rtx_SET (VOIDmode,
11085 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11087 else /* TARGET_ARCH64 */
11089 switch (sparc_cmodel)
11093 /* The destination can serve as a temporary. */
11094 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11099 /* The destination cannot serve as a temporary. */
11100 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11102 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11103 seq = get_insns ();
11105 emit_and_preserve (seq, spill_reg, 0);
11109 gcc_unreachable ();
11113 emit_jump_insn (gen_indirect_jump (scratch));
11118 /* Run just enough of rest_of_compilation to get the insns emitted.
11119 There's not really enough bulk here to make other passes such as
11120 instruction scheduling worth while. Note that use_thunk calls
11121 assemble_start_function and assemble_end_function. */
11122 insn = get_insns ();
11123 shorten_branches (insn);
11124 final_start_function (insn, file, 1);
11125 final (insn, file, 1);
11126 final_end_function ();
11128 reload_completed = 0;
11129 epilogue_completed = 0;
11132 /* Return true if sparc_output_mi_thunk would be able to output the
11133 assembler code for the thunk function specified by the arguments
11134 it is passed, and false otherwise. */
11136 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11137 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11138 HOST_WIDE_INT vcall_offset,
11139 const_tree function ATTRIBUTE_UNUSED)
11141 /* Bound the loop used in the default method above. */
11142 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11145 /* How to allocate a 'struct machine_function'. */
11147 static struct machine_function *
11148 sparc_init_machine_status (void)
11150 return ggc_alloc_cleared_machine_function ();
11153 /* Locate some local-dynamic symbol still in use by this function
11154 so that we can print its name in local-dynamic base patterns. */
11156 static const char *
11157 get_some_local_dynamic_name (void)
11161 if (cfun->machine->some_ld_name)
11162 return cfun->machine->some_ld_name;
11164 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11166 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11167 return cfun->machine->some_ld_name;
11169 gcc_unreachable ();
11173 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11178 && GET_CODE (x) == SYMBOL_REF
11179 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11181 cfun->machine->some_ld_name = XSTR (x, 0);
11188 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11189 We need to emit DTP-relative relocations. */
11192 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11197 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11200 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11203 gcc_unreachable ();
11205 output_addr_const (file, x);
11209 /* Do whatever processing is required at the end of a file. */
11212 sparc_file_end (void)
11214 /* If we need to emit the special GOT helper function, do so now. */
11215 if (got_helper_rtx)
11217 const char *name = XSTR (got_helper_rtx, 0);
11218 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11219 #ifdef DWARF2_UNWIND_INFO
11223 if (USE_HIDDEN_LINKONCE)
11225 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11226 get_identifier (name),
11227 build_function_type_list (void_type_node,
11229 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11230 NULL_TREE, void_type_node);
11231 TREE_PUBLIC (decl) = 1;
11232 TREE_STATIC (decl) = 1;
11233 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11234 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11235 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11236 resolve_unique_section (decl, 0, flag_function_sections);
11237 allocate_struct_function (decl, true);
11238 cfun->is_thunk = 1;
11239 current_function_decl = decl;
11240 init_varasm_status ();
11241 assemble_start_function (decl, name);
11245 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11246 switch_to_section (text_section);
11248 ASM_OUTPUT_ALIGN (asm_out_file, align);
11249 ASM_OUTPUT_LABEL (asm_out_file, name);
11252 #ifdef DWARF2_UNWIND_INFO
11253 do_cfi = dwarf2out_do_cfi_asm ();
11255 fprintf (asm_out_file, "\t.cfi_startproc\n");
11257 if (flag_delayed_branch)
11258 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11259 reg_name, reg_name);
11261 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11262 reg_name, reg_name);
11263 #ifdef DWARF2_UNWIND_INFO
11265 fprintf (asm_out_file, "\t.cfi_endproc\n");
11269 if (NEED_INDICATE_EXEC_STACK)
11270 file_end_indicate_exec_stack ();
11272 #ifdef TARGET_SOLARIS
11273 solaris_file_end ();
11277 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11278 /* Implement TARGET_MANGLE_TYPE. */
11280 static const char *
11281 sparc_mangle_type (const_tree type)
11284 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11285 && TARGET_LONG_DOUBLE_128)
11288 /* For all other types, use normal C++ mangling. */
11293 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11294 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11295 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11298 sparc_emit_membar_for_model (enum memmodel model,
11299 int load_store, int before_after)
11301 /* Bits for the MEMBAR mmask field. */
11302 const int LoadLoad = 1;
11303 const int StoreLoad = 2;
11304 const int LoadStore = 4;
11305 const int StoreStore = 8;
11307 int mm = 0, implied = 0;
11309 switch (sparc_memory_model)
11312 /* Sequential Consistency. All memory transactions are immediately
11313 visible in sequential execution order. No barriers needed. */
11314 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11318 /* Total Store Ordering: all memory transactions with store semantics
11319 are followed by an implied StoreStore. */
11320 implied |= StoreStore;
11324 /* Partial Store Ordering: all memory transactions with load semantics
11325 are followed by an implied LoadLoad | LoadStore. */
11326 implied |= LoadLoad | LoadStore;
11328 /* If we're not looking for a raw barrer (before+after), then atomic
11329 operations get the benefit of being both load and store. */
11330 if (load_store == 3 && before_after == 2)
11331 implied |= StoreLoad | StoreStore;
11335 /* Relaxed Memory Ordering: no implicit bits. */
11339 gcc_unreachable ();
11342 if (before_after & 1)
11344 if (model == MEMMODEL_RELEASE
11345 || model == MEMMODEL_ACQ_REL
11346 || model == MEMMODEL_SEQ_CST)
11348 if (load_store & 1)
11349 mm |= LoadLoad | StoreLoad;
11350 if (load_store & 2)
11351 mm |= LoadStore | StoreStore;
11354 if (before_after & 2)
11356 if (model == MEMMODEL_ACQUIRE
11357 || model == MEMMODEL_ACQ_REL
11358 || model == MEMMODEL_SEQ_CST)
11360 if (load_store & 1)
11361 mm |= LoadLoad | LoadStore;
11362 if (load_store & 2)
11363 mm |= StoreLoad | StoreStore;
11367 /* Remove the bits implied by the system memory model. */
11370 /* For raw barriers (before+after), always emit a barrier.
11371 This will become a compile-time barrier if needed. */
11372 if (mm || before_after == 3)
11373 emit_insn (gen_membar (GEN_INT (mm)));
11376 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11377 compare and swap on the word containing the byte or half-word. */
11380 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11381 rtx oldval, rtx newval)
11383 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11384 rtx addr = gen_reg_rtx (Pmode);
11385 rtx off = gen_reg_rtx (SImode);
11386 rtx oldv = gen_reg_rtx (SImode);
11387 rtx newv = gen_reg_rtx (SImode);
11388 rtx oldvalue = gen_reg_rtx (SImode);
11389 rtx newvalue = gen_reg_rtx (SImode);
11390 rtx res = gen_reg_rtx (SImode);
11391 rtx resv = gen_reg_rtx (SImode);
11392 rtx memsi, val, mask, end_label, loop_label, cc;
11394 emit_insn (gen_rtx_SET (VOIDmode, addr,
11395 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11397 if (Pmode != SImode)
11398 addr1 = gen_lowpart (SImode, addr1);
11399 emit_insn (gen_rtx_SET (VOIDmode, off,
11400 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11402 memsi = gen_rtx_MEM (SImode, addr);
11403 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11404 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11406 val = copy_to_reg (memsi);
11408 emit_insn (gen_rtx_SET (VOIDmode, off,
11409 gen_rtx_XOR (SImode, off,
11410 GEN_INT (GET_MODE (mem) == QImode
11413 emit_insn (gen_rtx_SET (VOIDmode, off,
11414 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11416 if (GET_MODE (mem) == QImode)
11417 mask = force_reg (SImode, GEN_INT (0xff));
11419 mask = force_reg (SImode, GEN_INT (0xffff));
11421 emit_insn (gen_rtx_SET (VOIDmode, mask,
11422 gen_rtx_ASHIFT (SImode, mask, off)));
11424 emit_insn (gen_rtx_SET (VOIDmode, val,
11425 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11428 oldval = gen_lowpart (SImode, oldval);
11429 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11430 gen_rtx_ASHIFT (SImode, oldval, off)));
11432 newval = gen_lowpart_common (SImode, newval);
11433 emit_insn (gen_rtx_SET (VOIDmode, newv,
11434 gen_rtx_ASHIFT (SImode, newval, off)));
11436 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11437 gen_rtx_AND (SImode, oldv, mask)));
11439 emit_insn (gen_rtx_SET (VOIDmode, newv,
11440 gen_rtx_AND (SImode, newv, mask)));
11442 end_label = gen_label_rtx ();
11443 loop_label = gen_label_rtx ();
11444 emit_label (loop_label);
11446 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
11447 gen_rtx_IOR (SImode, oldv, val)));
11449 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
11450 gen_rtx_IOR (SImode, newv, val)));
11452 emit_move_insn (bool_result, const1_rtx);
11454 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11456 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11458 emit_insn (gen_rtx_SET (VOIDmode, resv,
11459 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11462 emit_move_insn (bool_result, const0_rtx);
11464 cc = gen_compare_reg_1 (NE, resv, val);
11465 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
11467 /* Use cbranchcc4 to separate the compare and branch! */
11468 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11469 cc, const0_rtx, loop_label));
11471 emit_label (end_label);
11473 emit_insn (gen_rtx_SET (VOIDmode, res,
11474 gen_rtx_AND (SImode, res, mask)));
11476 emit_insn (gen_rtx_SET (VOIDmode, res,
11477 gen_rtx_LSHIFTRT (SImode, res, off)));
11479 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11482 /* Expand code to perform a compare-and-swap. */
11485 sparc_expand_compare_and_swap (rtx operands[])
11487 rtx bval, retval, mem, oldval, newval;
11488 enum machine_mode mode;
11489 enum memmodel model;
11491 bval = operands[0];
11492 retval = operands[1];
11494 oldval = operands[3];
11495 newval = operands[4];
11496 model = (enum memmodel) INTVAL (operands[6]);
11497 mode = GET_MODE (mem);
11499 sparc_emit_membar_for_model (model, 3, 1);
11501 if (reg_overlap_mentioned_p (retval, oldval))
11502 oldval = copy_to_reg (oldval);
11504 if (mode == QImode || mode == HImode)
11505 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11508 rtx (*gen) (rtx, rtx, rtx, rtx);
11511 if (mode == SImode)
11512 gen = gen_atomic_compare_and_swapsi_1;
11514 gen = gen_atomic_compare_and_swapdi_1;
11515 emit_insn (gen (retval, mem, oldval, newval));
11517 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11519 convert_move (bval, x, 1);
11522 sparc_emit_membar_for_model (model, 3, 2);
11526 sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
11530 sel = gen_lowpart (DImode, sel);
11534 /* inp = xxxxxxxAxxxxxxxB */
11535 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11536 NULL_RTX, 1, OPTAB_DIRECT);
11537 /* t_1 = ....xxxxxxxAxxx. */
11538 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11539 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11540 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11541 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11542 /* sel = .......B */
11543 /* t_1 = ...A.... */
11544 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11545 /* sel = ...A...B */
11546 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11547 /* sel = AAAABBBB * 4 */
11548 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11549 /* sel = { A*4, A*4+1, A*4+2, ... } */
11553 /* inp = xxxAxxxBxxxCxxxD */
11554 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11555 NULL_RTX, 1, OPTAB_DIRECT);
11556 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11557 NULL_RTX, 1, OPTAB_DIRECT);
11558 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11559 NULL_RTX, 1, OPTAB_DIRECT);
11560 /* t_1 = ..xxxAxxxBxxxCxx */
11561 /* t_2 = ....xxxAxxxBxxxC */
11562 /* t_3 = ......xxxAxxxBxx */
11563 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11565 NULL_RTX, 1, OPTAB_DIRECT);
11566 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11568 NULL_RTX, 1, OPTAB_DIRECT);
11569 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11570 GEN_INT (0x070000),
11571 NULL_RTX, 1, OPTAB_DIRECT);
11572 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11573 GEN_INT (0x07000000),
11574 NULL_RTX, 1, OPTAB_DIRECT);
11575 /* sel = .......D */
11576 /* t_1 = .....C.. */
11577 /* t_2 = ...B.... */
11578 /* t_3 = .A...... */
11579 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11580 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11581 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11582 /* sel = .A.B.C.D */
11583 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11584 /* sel = AABBCCDD * 2 */
11585 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11586 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11590 /* input = xAxBxCxDxExFxGxH */
11591 sel = expand_simple_binop (DImode, AND, sel,
11592 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11594 NULL_RTX, 1, OPTAB_DIRECT);
11595 /* sel = .A.B.C.D.E.F.G.H */
11596 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11597 NULL_RTX, 1, OPTAB_DIRECT);
11598 /* t_1 = ..A.B.C.D.E.F.G. */
11599 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11600 NULL_RTX, 1, OPTAB_DIRECT);
11601 /* sel = .AABBCCDDEEFFGGH */
11602 sel = expand_simple_binop (DImode, AND, sel,
11603 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11605 NULL_RTX, 1, OPTAB_DIRECT);
11606 /* sel = ..AB..CD..EF..GH */
11607 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11608 NULL_RTX, 1, OPTAB_DIRECT);
11609 /* t_1 = ....AB..CD..EF.. */
11610 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11611 NULL_RTX, 1, OPTAB_DIRECT);
11612 /* sel = ..ABABCDCDEFEFGH */
11613 sel = expand_simple_binop (DImode, AND, sel,
11614 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11615 NULL_RTX, 1, OPTAB_DIRECT);
11616 /* sel = ....ABCD....EFGH */
11617 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11618 NULL_RTX, 1, OPTAB_DIRECT);
11619 /* t_1 = ........ABCD.... */
11620 sel = gen_lowpart (SImode, sel);
11621 t_1 = gen_lowpart (SImode, t_1);
11625 gcc_unreachable ();
11628 /* Always perform the final addition/merge within the bmask insn. */
11629 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
11632 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11635 sparc_frame_pointer_required (void)
11637 /* If the stack pointer is dynamically modified in the function, it cannot
11638 serve as the frame pointer. */
11639 if (cfun->calls_alloca)
11642 /* If the function receives nonlocal gotos, it needs to save the frame
11643 pointer in the nonlocal_goto_save_area object. */
11644 if (cfun->has_nonlocal_label)
11647 /* In flat mode, that's it. */
11651 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11652 return !(crtl->is_leaf && only_leaf_regs_used ());
11655 /* The way this is structured, we can't eliminate SFP in favor of SP
11656 if the frame pointer is required: we want to use the SFP->HFP elimination
11657 in that case. But the test in update_eliminables doesn't know we are
11658 assuming below that we only do the former elimination. */
11661 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
11663 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
11666 /* Return the hard frame pointer directly to bypass the stack bias. */
11669 sparc_builtin_setjmp_frame_value (void)
11671 return hard_frame_pointer_rtx;
11674 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
11675 they won't be allocated. */
11678 sparc_conditional_register_usage (void)
11680 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
11682 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11683 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11685 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
11686 /* then honor it. */
11687 if (TARGET_ARCH32 && fixed_regs[5])
11689 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
11694 for (regno = SPARC_FIRST_V9_FP_REG;
11695 regno <= SPARC_LAST_V9_FP_REG;
11697 fixed_regs[regno] = 1;
11698 /* %fcc0 is used by v8 and v9. */
11699 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
11700 regno <= SPARC_LAST_V9_FCC_REG;
11702 fixed_regs[regno] = 1;
11707 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
11708 fixed_regs[regno] = 1;
11710 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
11711 /* then honor it. Likewise with g3 and g4. */
11712 if (fixed_regs[2] == 2)
11713 fixed_regs[2] = ! TARGET_APP_REGS;
11714 if (fixed_regs[3] == 2)
11715 fixed_regs[3] = ! TARGET_APP_REGS;
11716 if (TARGET_ARCH32 && fixed_regs[4] == 2)
11717 fixed_regs[4] = ! TARGET_APP_REGS;
11718 else if (TARGET_CM_EMBMEDANY)
11720 else if (fixed_regs[4] == 2)
11725 /* Disable leaf functions. */
11726 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
11727 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11728 leaf_reg_remap [regno] = regno;
11731 global_regs[SPARC_GSR_REG] = 1;
11734 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
11736 - We can't load constants into FP registers.
11737 - We can't load FP constants into integer registers when soft-float,
11738 because there is no soft-float pattern with a r/F constraint.
11739 - We can't load FP constants into integer registers for TFmode unless
11740 it is 0.0L, because there is no movtf pattern with a r/F constraint.
11741 - Try and reload integer constants (symbolic or otherwise) back into
11742 registers directly, rather than having them dumped to memory. */
11745 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
11747 enum machine_mode mode = GET_MODE (x);
11748 if (CONSTANT_P (x))
11750 if (FP_REG_CLASS_P (rclass)
11751 || rclass == GENERAL_OR_FP_REGS
11752 || rclass == GENERAL_OR_EXTRA_FP_REGS
11753 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
11754 || (mode == TFmode && ! const_zero_operand (x, mode)))
11757 if (GET_MODE_CLASS (mode) == MODE_INT)
11758 return GENERAL_REGS;
11760 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11762 if (! FP_REG_CLASS_P (rclass)
11763 || !(const_zero_operand (x, mode)
11764 || const_all_ones_operand (x, mode)))
11771 && (rclass == EXTRA_FP_REGS
11772 || rclass == GENERAL_OR_EXTRA_FP_REGS))
11774 int regno = true_regnum (x);
11776 if (SPARC_INT_REG_P (regno))
11777 return (rclass == EXTRA_FP_REGS
11778 ? FP_REGS : GENERAL_OR_FP_REGS);
11784 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
11785 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
11788 output_v8plus_mult (rtx insn, rtx *operands, const char *opcode)
11792 gcc_assert (! TARGET_ARCH64);
11794 if (sparc_check_64 (operands[1], insn) <= 0)
11795 output_asm_insn ("srl\t%L1, 0, %L1", operands);
11796 if (which_alternative == 1)
11797 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
11798 if (GET_CODE (operands[2]) == CONST_INT)
11800 if (which_alternative == 1)
11802 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11803 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
11804 output_asm_insn (mulstr, operands);
11805 return "srlx\t%L0, 32, %H0";
11809 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11810 output_asm_insn ("or\t%L1, %3, %3", operands);
11811 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
11812 output_asm_insn (mulstr, operands);
11813 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11814 return "mov\t%3, %L0";
11817 else if (rtx_equal_p (operands[1], operands[2]))
11819 if (which_alternative == 1)
11821 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11822 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
11823 output_asm_insn (mulstr, operands);
11824 return "srlx\t%L0, 32, %H0";
11828 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11829 output_asm_insn ("or\t%L1, %3, %3", operands);
11830 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
11831 output_asm_insn (mulstr, operands);
11832 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11833 return "mov\t%3, %L0";
11836 if (sparc_check_64 (operands[2], insn) <= 0)
11837 output_asm_insn ("srl\t%L2, 0, %L2", operands);
11838 if (which_alternative == 1)
11840 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11841 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
11842 output_asm_insn ("or\t%L2, %L1, %L1", operands);
11843 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
11844 output_asm_insn (mulstr, operands);
11845 return "srlx\t%L0, 32, %H0";
11849 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11850 output_asm_insn ("sllx\t%H2, 32, %4", operands);
11851 output_asm_insn ("or\t%L1, %3, %3", operands);
11852 output_asm_insn ("or\t%L2, %4, %4", operands);
11853 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
11854 output_asm_insn (mulstr, operands);
11855 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11856 return "mov\t%3, %L0";
11860 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11861 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
11862 and INNER_MODE are the modes describing TARGET. */
11865 vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
11866 enum machine_mode inner_mode)
11868 rtx t1, final_insn, sel;
11871 t1 = gen_reg_rtx (mode);
11873 elt = convert_modes (SImode, inner_mode, elt, true);
11874 emit_move_insn (gen_lowpart(SImode, t1), elt);
11879 final_insn = gen_bshufflev2si_vis (target, t1, t1);
11880 bmask = 0x45674567;
11883 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
11884 bmask = 0x67676767;
11887 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
11888 bmask = 0x77777777;
11891 gcc_unreachable ();
11894 sel = force_reg (SImode, GEN_INT (bmask));
11895 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
11896 emit_insn (final_insn);
11899 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11900 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
11903 vector_init_fpmerge (rtx target, rtx elt)
11905 rtx t1, t2, t2_low, t3, t3_low;
11907 t1 = gen_reg_rtx (V4QImode);
11908 elt = convert_modes (SImode, QImode, elt, true);
11909 emit_move_insn (gen_lowpart (SImode, t1), elt);
11911 t2 = gen_reg_rtx (V8QImode);
11912 t2_low = gen_lowpart (V4QImode, t2);
11913 emit_insn (gen_fpmerge_vis (t2, t1, t1));
11915 t3 = gen_reg_rtx (V8QImode);
11916 t3_low = gen_lowpart (V4QImode, t3);
11917 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
11919 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
11922 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11923 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
11926 vector_init_faligndata (rtx target, rtx elt)
11928 rtx t1 = gen_reg_rtx (V4HImode);
11931 elt = convert_modes (SImode, HImode, elt, true);
11932 emit_move_insn (gen_lowpart (SImode, t1), elt);
11934 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
11935 force_reg (SImode, GEN_INT (6)),
11938 for (i = 0; i < 4; i++)
11939 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
11942 /* Emit code to initialize TARGET to values for individual fields VALS. */
11945 sparc_expand_vector_init (rtx target, rtx vals)
11947 const enum machine_mode mode = GET_MODE (target);
11948 const enum machine_mode inner_mode = GET_MODE_INNER (mode);
11949 const int n_elts = GET_MODE_NUNITS (mode);
11955 for (i = 0; i < n_elts; i++)
11957 rtx x = XVECEXP (vals, 0, i);
11958 if (!CONSTANT_P (x))
11961 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
11967 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
11971 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
11973 if (GET_MODE_SIZE (inner_mode) == 4)
11975 emit_move_insn (gen_lowpart (SImode, target),
11976 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
11979 else if (GET_MODE_SIZE (inner_mode) == 8)
11981 emit_move_insn (gen_lowpart (DImode, target),
11982 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
11986 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
11987 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
11989 emit_move_insn (gen_highpart (word_mode, target),
11990 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
11991 emit_move_insn (gen_lowpart (word_mode, target),
11992 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
11996 if (all_same && GET_MODE_SIZE (mode) == 8)
12000 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12003 if (mode == V8QImode)
12005 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12008 if (mode == V4HImode)
12010 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12015 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12016 for (i = 0; i < n_elts; i++)
12017 emit_move_insn (adjust_address_nv (mem, inner_mode,
12018 i * GET_MODE_SIZE (inner_mode)),
12019 XVECEXP (vals, 0, i));
12020 emit_move_insn (target, mem);
12023 /* Implement TARGET_SECONDARY_RELOAD. */
12026 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12027 enum machine_mode mode, secondary_reload_info *sri)
12029 enum reg_class rclass = (enum reg_class) rclass_i;
12031 sri->icode = CODE_FOR_nothing;
12032 sri->extra_cost = 0;
12034 /* We need a temporary when loading/storing a HImode/QImode value
12035 between memory and the FPU registers. This can happen when combine puts
12036 a paradoxical subreg in a float/fix conversion insn. */
12037 if (FP_REG_CLASS_P (rclass)
12038 && (mode == HImode || mode == QImode)
12039 && (GET_CODE (x) == MEM
12040 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12041 && true_regnum (x) == -1)))
12042 return GENERAL_REGS;
12044 /* On 32-bit we need a temporary when loading/storing a DFmode value
12045 between unaligned memory and the upper FPU registers. */
12047 && rclass == EXTRA_FP_REGS
12049 && GET_CODE (x) == MEM
12050 && ! mem_min_alignment (x, 8))
12053 if (((TARGET_CM_MEDANY
12054 && symbolic_operand (x, mode))
12055 || (TARGET_CM_EMBMEDANY
12056 && text_segment_operand (x, mode)))
12060 sri->icode = direct_optab_handler (reload_in_optab, mode);
12062 sri->icode = direct_optab_handler (reload_out_optab, mode);
12066 if (TARGET_VIS3 && TARGET_ARCH32)
12068 int regno = true_regnum (x);
12070 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12071 to move 8-byte values in 4-byte pieces. This only works via
12072 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12073 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12074 an FP_REGS intermediate move. */
12075 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12076 || ((general_or_i64_p (rclass)
12077 || rclass == GENERAL_OR_FP_REGS)
12078 && SPARC_FP_REG_P (regno)))
12080 sri->extra_cost = 2;
12088 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12089 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12092 sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
12094 enum rtx_code rc = GET_CODE (operands[1]);
12095 enum machine_mode cmp_mode;
12096 rtx cc_reg, dst, cmp;
12099 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12102 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12103 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12105 cmp_mode = GET_MODE (XEXP (cmp, 0));
12106 rc = GET_CODE (cmp);
12109 if (! rtx_equal_p (operands[2], dst)
12110 && ! rtx_equal_p (operands[3], dst))
12112 if (reg_overlap_mentioned_p (dst, cmp))
12113 dst = gen_reg_rtx (mode);
12115 emit_move_insn (dst, operands[3]);
12117 else if (operands[2] == dst)
12119 operands[2] = operands[3];
12121 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12122 rc = reverse_condition_maybe_unordered (rc);
12124 rc = reverse_condition (rc);
12127 if (XEXP (cmp, 1) == const0_rtx
12128 && GET_CODE (XEXP (cmp, 0)) == REG
12129 && cmp_mode == DImode
12130 && v9_regcmp_p (rc))
12131 cc_reg = XEXP (cmp, 0);
12133 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12135 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12137 emit_insn (gen_rtx_SET (VOIDmode, dst,
12138 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12140 if (dst != operands[0])
12141 emit_move_insn (operands[0], dst);
12146 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12147 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12148 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12149 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12150 code to be used for the condition mask. */
12153 sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
12155 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12156 enum rtx_code code = GET_CODE (operands[3]);
12158 mask = gen_reg_rtx (Pmode);
12159 cop0 = operands[4];
12160 cop1 = operands[5];
12161 if (code == LT || code == GE)
12165 code = swap_condition (code);
12166 t = cop0; cop0 = cop1; cop1 = t;
12169 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12171 fcmp = gen_rtx_UNSPEC (Pmode,
12172 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12175 cmask = gen_rtx_UNSPEC (DImode,
12176 gen_rtvec (2, mask, gsr),
12179 bshuf = gen_rtx_UNSPEC (mode,
12180 gen_rtvec (3, operands[1], operands[2], gsr),
12183 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
12184 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
12186 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
12189 /* On sparc, any mode which naturally allocates into the float
12190 registers should return 4 here. */
12193 sparc_regmode_natural_size (enum machine_mode mode)
12195 int size = UNITS_PER_WORD;
12199 enum mode_class mclass = GET_MODE_CLASS (mode);
12201 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12208 /* Return TRUE if it is a good idea to tie two pseudo registers
12209 when one has mode MODE1 and one has mode MODE2.
12210 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12211 for any hard reg, then this must be FALSE for correct output.
12213 For V9 we have to deal with the fact that only the lower 32 floating
12214 point registers are 32-bit addressable. */
12217 sparc_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
12219 enum mode_class mclass1, mclass2;
12220 unsigned short size1, size2;
12222 if (mode1 == mode2)
12225 mclass1 = GET_MODE_CLASS (mode1);
12226 mclass2 = GET_MODE_CLASS (mode2);
12227 if (mclass1 != mclass2)
12233 /* Classes are the same and we are V9 so we have to deal with upper
12234 vs. lower floating point registers. If one of the modes is a
12235 4-byte mode, and the other is not, we have to mark them as not
12236 tieable because only the lower 32 floating point register are
12237 addressable 32-bits at a time.
12239 We can't just test explicitly for SFmode, otherwise we won't
12240 cover the vector mode cases properly. */
12242 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12245 size1 = GET_MODE_SIZE (mode1);
12246 size2 = GET_MODE_SIZE (mode2);
12247 if ((size1 > 4 && size2 == 4)
12248 || (size2 > 4 && size1 == 4))
12254 static enum machine_mode sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12256 return (TARGET_ARCH64 ? DImode : SImode);
12259 #include "gt-sparc.h"