1 /* Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
31 #include "stringpool.h"
32 #include "stor-layout.h"
40 #include "basic-block.h"
41 #include "diagnostic-core.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #include "sched-int.h"
52 #include "hash-table.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
57 #include "gimple-expr.h"
61 #include "tm-constrs.h"
70 /* Builtin types, data and prototypes. */
72 enum spu_builtin_type_index
74 SPU_BTI_END_OF_PARAMS,
76 /* We create new type nodes for these. */
88 /* A 16-byte type. (Implemented with V16QI_type_node) */
91 /* These all correspond to intSI_type_node */
105 /* These correspond to the standard types */
125 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
126 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
127 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
128 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
129 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
130 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
131 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
132 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
133 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
134 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
136 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
138 struct spu_builtin_range
143 static struct spu_builtin_range spu_builtin_range[] = {
144 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
145 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
146 {0ll, 0x7fll}, /* SPU_BTI_U7 */
147 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
148 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
149 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
150 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
151 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
152 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
153 {0ll, 0xffffll}, /* SPU_BTI_U16 */
154 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
155 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
159 /* Target specific attribute specifications. */
160 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
162 /* Prototypes and external defs. */
163 static int get_pipe (rtx_insn *insn);
164 static int spu_naked_function_p (tree func);
165 static int mem_is_padded_component_ref (rtx x);
166 static void fix_range (const char *);
167 static rtx spu_expand_load (rtx, rtx, rtx, int);
169 /* Which instruction set architecture to use. */
171 /* Which cpu are we tuning for. */
174 /* The hardware requires 8 insns between a hint and the branch it
175 effects. This variable describes how many rtl instructions the
176 compiler needs to see before inserting a hint, and then the compiler
177 will insert enough nops to make it at least 8 insns. The default is
178 for the compiler to allow up to 2 nops be emitted. The nops are
179 inserted in pairs, so we round down. */
180 int spu_hint_dist = (8*4) - (2*4);
195 IC_POOL, /* constant pool */
196 IC_IL1, /* one il* instruction */
197 IC_IL2, /* both ilhu and iohl instructions */
198 IC_IL1s, /* one il* instruction */
199 IC_IL2s, /* both ilhu and iohl instructions */
200 IC_FSMBI, /* the fsmbi instruction */
201 IC_CPAT, /* one of the c*d instructions */
202 IC_FSMBI2 /* fsmbi plus 1 other instruction */
205 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
206 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
207 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
208 static enum immediate_class classify_immediate (rtx op,
209 enum machine_mode mode);
211 /* Pointer mode for __ea references. */
212 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
215 /* Define the structure for the machine field in struct function. */
216 struct GTY(()) machine_function
218 /* Register to use for PIC accesses. */
222 /* How to allocate a 'struct machine_function'. */
223 static struct machine_function *
224 spu_init_machine_status (void)
226 return ggc_cleared_alloc<machine_function> ();
229 /* Implement TARGET_OPTION_OVERRIDE. */
231 spu_option_override (void)
233 /* Set up function hooks. */
234 init_machine_status = spu_init_machine_status;
236 /* Small loops will be unpeeled at -O3. For SPU it is more important
237 to keep code small by default. */
238 if (!flag_unroll_loops && !flag_peel_loops)
239 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
240 global_options.x_param_values,
241 global_options_set.x_param_values);
243 flag_omit_frame_pointer = 1;
245 /* Functions must be 8 byte aligned so we correctly handle dual issue */
246 if (align_functions < 8)
249 spu_hint_dist = 8*4 - spu_max_nops*4;
250 if (spu_hint_dist < 0)
253 if (spu_fixed_range_string)
254 fix_range (spu_fixed_range_string);
256 /* Determine processor architectural level. */
259 if (strcmp (&spu_arch_string[0], "cell") == 0)
260 spu_arch = PROCESSOR_CELL;
261 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
262 spu_arch = PROCESSOR_CELLEDP;
264 error ("bad value (%s) for -march= switch", spu_arch_string);
267 /* Determine processor to tune for. */
270 if (strcmp (&spu_tune_string[0], "cell") == 0)
271 spu_tune = PROCESSOR_CELL;
272 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
273 spu_tune = PROCESSOR_CELLEDP;
275 error ("bad value (%s) for -mtune= switch", spu_tune_string);
278 /* Change defaults according to the processor architecture. */
279 if (spu_arch == PROCESSOR_CELLEDP)
281 /* If no command line option has been otherwise specified, change
282 the default to -mno-safe-hints on celledp -- only the original
283 Cell/B.E. processors require this workaround. */
284 if (!(target_flags_explicit & MASK_SAFE_HINTS))
285 target_flags &= ~MASK_SAFE_HINTS;
288 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
291 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
292 struct attribute_spec.handler. */
294 /* True if MODE is valid for the target. By "valid", we mean able to
295 be manipulated in non-trivial ways. In particular, this means all
296 the arithmetic is supported. */
298 spu_scalar_mode_supported_p (enum machine_mode mode)
316 /* Similarly for vector modes. "Supported" here is less strict. At
317 least some operations are supported; need to check optabs or builtins
318 for further details. */
320 spu_vector_mode_supported_p (enum machine_mode mode)
337 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
338 least significant bytes of the outer mode. This function returns
339 TRUE for the SUBREG's where this is correct. */
341 valid_subreg (rtx op)
343 enum machine_mode om = GET_MODE (op);
344 enum machine_mode im = GET_MODE (SUBREG_REG (op));
345 return om != VOIDmode && im != VOIDmode
346 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
347 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
348 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
351 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
352 and adjust the start offset. */
354 adjust_operand (rtx op, HOST_WIDE_INT * start)
356 enum machine_mode mode;
358 /* Strip any paradoxical SUBREG. */
359 if (GET_CODE (op) == SUBREG
360 && (GET_MODE_BITSIZE (GET_MODE (op))
361 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
365 GET_MODE_BITSIZE (GET_MODE (op)) -
366 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
367 op = SUBREG_REG (op);
369 /* If it is smaller than SI, assure a SUBREG */
370 op_size = GET_MODE_BITSIZE (GET_MODE (op));
374 *start += 32 - op_size;
377 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
378 mode = mode_for_size (op_size, MODE_INT, 0);
379 if (mode != GET_MODE (op))
380 op = gen_rtx_SUBREG (mode, op, 0);
385 spu_expand_extv (rtx ops[], int unsignedp)
387 rtx dst = ops[0], src = ops[1];
388 HOST_WIDE_INT width = INTVAL (ops[2]);
389 HOST_WIDE_INT start = INTVAL (ops[3]);
390 HOST_WIDE_INT align_mask;
391 rtx s0, s1, mask, r0;
393 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
397 /* First, determine if we need 1 TImode load or 2. We need only 1
398 if the bits being extracted do not cross the alignment boundary
399 as determined by the MEM and its address. */
401 align_mask = -MEM_ALIGN (src);
402 if ((start & align_mask) == ((start + width - 1) & align_mask))
404 /* Alignment is sufficient for 1 load. */
405 s0 = gen_reg_rtx (TImode);
406 r0 = spu_expand_load (s0, 0, src, start / 8);
409 emit_insn (gen_rotqby_ti (s0, s0, r0));
414 s0 = gen_reg_rtx (TImode);
415 s1 = gen_reg_rtx (TImode);
416 r0 = spu_expand_load (s0, s1, src, start / 8);
419 gcc_assert (start + width <= 128);
422 rtx r1 = gen_reg_rtx (SImode);
423 mask = gen_reg_rtx (TImode);
424 emit_move_insn (mask, GEN_INT (-1));
425 emit_insn (gen_rotqby_ti (s0, s0, r0));
426 emit_insn (gen_rotqby_ti (s1, s1, r0));
427 if (GET_CODE (r0) == CONST_INT)
428 r1 = GEN_INT (INTVAL (r0) & 15);
430 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
431 emit_insn (gen_shlqby_ti (mask, mask, r1));
432 emit_insn (gen_selb (s0, s1, s0, mask));
437 else if (GET_CODE (src) == SUBREG)
439 rtx r = SUBREG_REG (src);
440 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
441 s0 = gen_reg_rtx (TImode);
442 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
443 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
445 emit_move_insn (s0, src);
449 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
450 s0 = gen_reg_rtx (TImode);
451 emit_move_insn (s0, src);
454 /* Now s0 is TImode and contains the bits to extract at start. */
457 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
460 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
462 emit_move_insn (dst, s0);
466 spu_expand_insv (rtx ops[])
468 HOST_WIDE_INT width = INTVAL (ops[1]);
469 HOST_WIDE_INT start = INTVAL (ops[2]);
470 HOST_WIDE_INT maskbits;
471 enum machine_mode dst_mode;
472 rtx dst = ops[0], src = ops[3];
479 if (GET_CODE (ops[0]) == MEM)
480 dst = gen_reg_rtx (TImode);
482 dst = adjust_operand (dst, &start);
483 dst_mode = GET_MODE (dst);
484 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
486 if (CONSTANT_P (src))
488 enum machine_mode m =
489 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
490 src = force_reg (m, convert_to_mode (m, src, 0));
492 src = adjust_operand (src, 0);
494 mask = gen_reg_rtx (dst_mode);
495 shift_reg = gen_reg_rtx (dst_mode);
496 shift = dst_size - start - width;
498 /* It's not safe to use subreg here because the compiler assumes
499 that the SUBREG_REG is right justified in the SUBREG. */
500 convert_move (shift_reg, src, 1);
507 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
510 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
513 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
525 maskbits = (-1ll << (32 - width - start));
527 maskbits += (1ll << (32 - start));
528 emit_move_insn (mask, GEN_INT (maskbits));
531 maskbits = (-1ll << (64 - width - start));
533 maskbits += (1ll << (64 - start));
534 emit_move_insn (mask, GEN_INT (maskbits));
538 unsigned char arr[16];
540 memset (arr, 0, sizeof (arr));
541 arr[i] = 0xff >> (start & 7);
542 for (i++; i <= (start + width - 1) / 8; i++)
544 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
545 emit_move_insn (mask, array_to_constant (TImode, arr));
551 if (GET_CODE (ops[0]) == MEM)
553 rtx low = gen_reg_rtx (SImode);
554 rtx rotl = gen_reg_rtx (SImode);
555 rtx mask0 = gen_reg_rtx (TImode);
561 addr = force_reg (Pmode, XEXP (ops[0], 0));
562 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
563 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
564 emit_insn (gen_negsi2 (rotl, low));
565 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
566 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
567 mem = change_address (ops[0], TImode, addr0);
568 set_mem_alias_set (mem, 0);
569 emit_move_insn (dst, mem);
570 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
571 if (start + width > MEM_ALIGN (ops[0]))
573 rtx shl = gen_reg_rtx (SImode);
574 rtx mask1 = gen_reg_rtx (TImode);
575 rtx dst1 = gen_reg_rtx (TImode);
577 addr1 = plus_constant (Pmode, addr, 16);
578 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
579 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
580 emit_insn (gen_shlqby_ti (mask1, mask, shl));
581 mem1 = change_address (ops[0], TImode, addr1);
582 set_mem_alias_set (mem1, 0);
583 emit_move_insn (dst1, mem1);
584 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
585 emit_move_insn (mem1, dst1);
587 emit_move_insn (mem, dst);
590 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
595 spu_expand_block_move (rtx ops[])
597 HOST_WIDE_INT bytes, align, offset;
598 rtx src, dst, sreg, dreg, target;
600 if (GET_CODE (ops[2]) != CONST_INT
601 || GET_CODE (ops[3]) != CONST_INT
602 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
605 bytes = INTVAL (ops[2]);
606 align = INTVAL (ops[3]);
616 for (offset = 0; offset + 16 <= bytes; offset += 16)
618 dst = adjust_address (ops[0], V16QImode, offset);
619 src = adjust_address (ops[1], V16QImode, offset);
620 emit_move_insn (dst, src);
625 unsigned char arr[16] = { 0 };
626 for (i = 0; i < bytes - offset; i++)
628 dst = adjust_address (ops[0], V16QImode, offset);
629 src = adjust_address (ops[1], V16QImode, offset);
630 mask = gen_reg_rtx (V16QImode);
631 sreg = gen_reg_rtx (V16QImode);
632 dreg = gen_reg_rtx (V16QImode);
633 target = gen_reg_rtx (V16QImode);
634 emit_move_insn (mask, array_to_constant (V16QImode, arr));
635 emit_move_insn (dreg, dst);
636 emit_move_insn (sreg, src);
637 emit_insn (gen_selb (target, dreg, sreg, mask));
638 emit_move_insn (dst, target);
646 { SPU_EQ, SPU_GT, SPU_GTU };
648 int spu_comp_icode[12][3] = {
649 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
650 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
651 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
652 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
653 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
654 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
655 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
656 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
657 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
658 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
659 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
660 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
663 /* Generate a compare for CODE. Return a brand-new rtx that represents
664 the result of the compare. GCC can figure this out too if we don't
665 provide all variations of compares, but GCC always wants to use
666 WORD_MODE, we can generate better code in most cases if we do it
669 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
671 int reverse_compare = 0;
672 int reverse_test = 0;
673 rtx compare_result, eq_result;
674 rtx comp_rtx, eq_rtx;
675 enum machine_mode comp_mode;
676 enum machine_mode op_mode;
677 enum spu_comp_code scode, eq_code;
678 enum insn_code ior_code;
679 enum rtx_code code = GET_CODE (cmp);
680 rtx op0 = XEXP (cmp, 0);
681 rtx op1 = XEXP (cmp, 1);
685 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
686 and so on, to keep the constant in operand 1. */
687 if (GET_CODE (op1) == CONST_INT)
689 HOST_WIDE_INT val = INTVAL (op1) - 1;
690 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
714 /* However, if we generate an integer result, performing a reverse test
715 would require an extra negation, so avoid that where possible. */
716 if (GET_CODE (op1) == CONST_INT && is_set == 1)
718 HOST_WIDE_INT val = INTVAL (op1) + 1;
719 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
736 op_mode = GET_MODE (op0);
742 if (HONOR_NANS (op_mode))
757 if (HONOR_NANS (op_mode))
849 comp_mode = V4SImode;
853 comp_mode = V2DImode;
860 if (GET_MODE (op1) == DFmode
861 && (scode != SPU_GT && scode != SPU_EQ))
864 if (is_set == 0 && op1 == const0_rtx
865 && (GET_MODE (op0) == SImode
866 || GET_MODE (op0) == HImode
867 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
869 /* Don't need to set a register with the result when we are
870 comparing against zero and branching. */
871 reverse_test = !reverse_test;
872 compare_result = op0;
876 compare_result = gen_reg_rtx (comp_mode);
885 if (spu_comp_icode[index][scode] == 0)
888 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
890 op0 = force_reg (op_mode, op0);
891 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
893 op1 = force_reg (op_mode, op1);
894 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
898 emit_insn (comp_rtx);
902 eq_result = gen_reg_rtx (comp_mode);
903 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
908 ior_code = optab_handler (ior_optab, comp_mode);
909 gcc_assert (ior_code != CODE_FOR_nothing);
910 emit_insn (GEN_FCN (ior_code)
911 (compare_result, compare_result, eq_result));
920 /* We don't have branch on QI compare insns, so we convert the
921 QI compare result to a HI result. */
922 if (comp_mode == QImode)
924 rtx old_res = compare_result;
925 compare_result = gen_reg_rtx (HImode);
927 emit_insn (gen_extendqihi2 (compare_result, old_res));
931 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
933 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
935 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
936 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
937 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
940 else if (is_set == 2)
942 rtx target = operands[0];
943 int compare_size = GET_MODE_BITSIZE (comp_mode);
944 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
945 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
947 rtx op_t = operands[2];
948 rtx op_f = operands[3];
950 /* The result of the comparison can be SI, HI or QI mode. Create a
951 mask based on that result. */
952 if (target_size > compare_size)
954 select_mask = gen_reg_rtx (mode);
955 emit_insn (gen_extend_compare (select_mask, compare_result));
957 else if (target_size < compare_size)
959 gen_rtx_SUBREG (mode, compare_result,
960 (compare_size - target_size) / BITS_PER_UNIT);
961 else if (comp_mode != mode)
962 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
964 select_mask = compare_result;
966 if (GET_MODE (target) != GET_MODE (op_t)
967 || GET_MODE (target) != GET_MODE (op_f))
971 emit_insn (gen_selb (target, op_t, op_f, select_mask));
973 emit_insn (gen_selb (target, op_f, op_t, select_mask));
977 rtx target = operands[0];
979 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
980 gen_rtx_NOT (comp_mode, compare_result)));
981 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
982 emit_insn (gen_extendhisi2 (target, compare_result));
983 else if (GET_MODE (target) == SImode
984 && GET_MODE (compare_result) == QImode)
985 emit_insn (gen_extend_compare (target, compare_result));
987 emit_move_insn (target, compare_result);
992 const_double_to_hwint (rtx x)
996 if (GET_MODE (x) == SFmode)
998 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
999 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1001 else if (GET_MODE (x) == DFmode)
1004 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1005 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1007 val = (val << 32) | (l[1] & 0xffffffff);
1015 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1019 gcc_assert (mode == SFmode || mode == DFmode);
1022 tv[0] = (v << 32) >> 32;
1023 else if (mode == DFmode)
1025 tv[1] = (v << 32) >> 32;
1028 real_from_target (&rv, tv, mode);
1029 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1033 print_operand_address (FILE * file, register rtx addr)
1038 if (GET_CODE (addr) == AND
1039 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1040 && INTVAL (XEXP (addr, 1)) == -16)
1041 addr = XEXP (addr, 0);
1043 switch (GET_CODE (addr))
1046 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1050 reg = XEXP (addr, 0);
1051 offset = XEXP (addr, 1);
1052 if (GET_CODE (offset) == REG)
1054 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1055 reg_names[REGNO (offset)]);
1057 else if (GET_CODE (offset) == CONST_INT)
1059 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1060 INTVAL (offset), reg_names[REGNO (reg)]);
1070 output_addr_const (file, addr);
1080 print_operand (FILE * file, rtx x, int code)
1082 enum machine_mode mode = GET_MODE (x);
1084 unsigned char arr[16];
1085 int xcode = GET_CODE (x);
1087 if (GET_MODE (x) == VOIDmode)
1090 case 'L': /* 128 bits, signed */
1091 case 'm': /* 128 bits, signed */
1092 case 'T': /* 128 bits, signed */
1093 case 't': /* 128 bits, signed */
1096 case 'K': /* 64 bits, signed */
1097 case 'k': /* 64 bits, signed */
1098 case 'D': /* 64 bits, signed */
1099 case 'd': /* 64 bits, signed */
1102 case 'J': /* 32 bits, signed */
1103 case 'j': /* 32 bits, signed */
1104 case 's': /* 32 bits, signed */
1105 case 'S': /* 32 bits, signed */
1112 case 'j': /* 32 bits, signed */
1113 case 'k': /* 64 bits, signed */
1114 case 'm': /* 128 bits, signed */
1115 if (xcode == CONST_INT
1116 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1118 gcc_assert (logical_immediate_p (x, mode));
1119 constant_to_array (mode, x, arr);
1120 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1121 val = trunc_int_for_mode (val, SImode);
1122 switch (which_logical_immediate (val))
1127 fprintf (file, "h");
1130 fprintf (file, "b");
1140 case 'J': /* 32 bits, signed */
1141 case 'K': /* 64 bits, signed */
1142 case 'L': /* 128 bits, signed */
1143 if (xcode == CONST_INT
1144 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1146 gcc_assert (logical_immediate_p (x, mode)
1147 || iohl_immediate_p (x, mode));
1148 constant_to_array (mode, x, arr);
1149 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1150 val = trunc_int_for_mode (val, SImode);
1151 switch (which_logical_immediate (val))
1157 val = trunc_int_for_mode (val, HImode);
1160 val = trunc_int_for_mode (val, QImode);
1165 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1171 case 't': /* 128 bits, signed */
1172 case 'd': /* 64 bits, signed */
1173 case 's': /* 32 bits, signed */
1176 enum immediate_class c = classify_immediate (x, mode);
1180 constant_to_array (mode, x, arr);
1181 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1182 val = trunc_int_for_mode (val, SImode);
1183 switch (which_immediate_load (val))
1188 fprintf (file, "a");
1191 fprintf (file, "h");
1194 fprintf (file, "hu");
1201 constant_to_array (mode, x, arr);
1202 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1204 fprintf (file, "b");
1206 fprintf (file, "h");
1208 fprintf (file, "w");
1210 fprintf (file, "d");
1213 if (xcode == CONST_VECTOR)
1215 x = CONST_VECTOR_ELT (x, 0);
1216 xcode = GET_CODE (x);
1218 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1219 fprintf (file, "a");
1220 else if (xcode == HIGH)
1221 fprintf (file, "hu");
1235 case 'T': /* 128 bits, signed */
1236 case 'D': /* 64 bits, signed */
1237 case 'S': /* 32 bits, signed */
1240 enum immediate_class c = classify_immediate (x, mode);
1244 constant_to_array (mode, x, arr);
1245 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1246 val = trunc_int_for_mode (val, SImode);
1247 switch (which_immediate_load (val))
1254 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1259 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1262 constant_to_array (mode, x, arr);
1264 for (i = 0; i < 16; i++)
1269 print_operand (file, GEN_INT (val), 0);
1272 constant_to_array (mode, x, arr);
1273 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1274 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1279 if (GET_CODE (x) == CONST_VECTOR)
1280 x = CONST_VECTOR_ELT (x, 0);
1281 output_addr_const (file, x);
1283 fprintf (file, "@h");
1297 if (xcode == CONST_INT)
1299 /* Only 4 least significant bits are relevant for generate
1300 control word instructions. */
1301 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1306 case 'M': /* print code for c*d */
1307 if (GET_CODE (x) == CONST_INT)
1311 fprintf (file, "b");
1314 fprintf (file, "h");
1317 fprintf (file, "w");
1320 fprintf (file, "d");
1329 case 'N': /* Negate the operand */
1330 if (xcode == CONST_INT)
1331 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1332 else if (xcode == CONST_VECTOR)
1333 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1334 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1337 case 'I': /* enable/disable interrupts */
1338 if (xcode == CONST_INT)
1339 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1342 case 'b': /* branch modifiers */
1344 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1345 else if (COMPARISON_P (x))
1346 fprintf (file, "%s", xcode == NE ? "n" : "");
1349 case 'i': /* indirect call */
1352 if (GET_CODE (XEXP (x, 0)) == REG)
1353 /* Used in indirect function calls. */
1354 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1356 output_address (XEXP (x, 0));
1360 case 'p': /* load/store */
1364 xcode = GET_CODE (x);
1369 xcode = GET_CODE (x);
1372 fprintf (file, "d");
1373 else if (xcode == CONST_INT)
1374 fprintf (file, "a");
1375 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1376 fprintf (file, "r");
1377 else if (xcode == PLUS || xcode == LO_SUM)
1379 if (GET_CODE (XEXP (x, 1)) == REG)
1380 fprintf (file, "x");
1382 fprintf (file, "d");
1387 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1389 output_addr_const (file, GEN_INT (val));
1393 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1395 output_addr_const (file, GEN_INT (val));
1399 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1401 output_addr_const (file, GEN_INT (val));
1405 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1406 val = (val >> 3) & 0x1f;
1407 output_addr_const (file, GEN_INT (val));
1411 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1414 output_addr_const (file, GEN_INT (val));
1418 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1421 output_addr_const (file, GEN_INT (val));
1425 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1428 output_addr_const (file, GEN_INT (val));
1432 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1433 val = -(val & -8ll);
1434 val = (val >> 3) & 0x1f;
1435 output_addr_const (file, GEN_INT (val));
1440 constant_to_array (mode, x, arr);
1441 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1442 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1447 fprintf (file, "%s", reg_names[REGNO (x)]);
1448 else if (xcode == MEM)
1449 output_address (XEXP (x, 0));
1450 else if (xcode == CONST_VECTOR)
1451 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1453 output_addr_const (file, x);
1460 output_operand_lossage ("invalid %%xn code");
1465 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1466 caller saved register. For leaf functions it is more efficient to
1467 use a volatile register because we won't need to save and restore the
1468 pic register. This routine is only valid after register allocation
1469 is completed, so we can pick an unused register. */
1473 if (!reload_completed && !reload_in_progress)
1476 /* If we've already made the decision, we need to keep with it. Once we've
1477 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1478 return true since the register is now live; this should not cause us to
1479 "switch back" to using pic_offset_table_rtx. */
1480 if (!cfun->machine->pic_reg)
1482 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1483 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1485 cfun->machine->pic_reg = pic_offset_table_rtx;
1488 return cfun->machine->pic_reg;
1491 /* Split constant addresses to handle cases that are too large.
1492 Add in the pic register when in PIC mode.
1493 Split immediates that require more than 1 instruction. */
1495 spu_split_immediate (rtx * ops)
1497 enum machine_mode mode = GET_MODE (ops[0]);
1498 enum immediate_class c = classify_immediate (ops[1], mode);
1504 unsigned char arrhi[16];
1505 unsigned char arrlo[16];
1506 rtx to, temp, hi, lo;
1508 enum machine_mode imode = mode;
1509 /* We need to do reals as ints because the constant used in the
1510 IOR might not be a legitimate real constant. */
1511 imode = int_mode_for_mode (mode);
1512 constant_to_array (mode, ops[1], arrhi);
1514 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1517 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1518 for (i = 0; i < 16; i += 4)
1520 arrlo[i + 2] = arrhi[i + 2];
1521 arrlo[i + 3] = arrhi[i + 3];
1522 arrlo[i + 0] = arrlo[i + 1] = 0;
1523 arrhi[i + 2] = arrhi[i + 3] = 0;
1525 hi = array_to_constant (imode, arrhi);
1526 lo = array_to_constant (imode, arrlo);
1527 emit_move_insn (temp, hi);
1528 emit_insn (gen_rtx_SET
1529 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1534 unsigned char arr_fsmbi[16];
1535 unsigned char arr_andbi[16];
1536 rtx to, reg_fsmbi, reg_and;
1538 enum machine_mode imode = mode;
1539 /* We need to do reals as ints because the constant used in the
1540 * AND might not be a legitimate real constant. */
1541 imode = int_mode_for_mode (mode);
1542 constant_to_array (mode, ops[1], arr_fsmbi);
1544 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1547 for (i = 0; i < 16; i++)
1548 if (arr_fsmbi[i] != 0)
1550 arr_andbi[0] = arr_fsmbi[i];
1551 arr_fsmbi[i] = 0xff;
1553 for (i = 1; i < 16; i++)
1554 arr_andbi[i] = arr_andbi[0];
1555 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1556 reg_and = array_to_constant (imode, arr_andbi);
1557 emit_move_insn (to, reg_fsmbi);
1558 emit_insn (gen_rtx_SET
1559 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1563 if (reload_in_progress || reload_completed)
1565 rtx mem = force_const_mem (mode, ops[1]);
1566 if (TARGET_LARGE_MEM)
1568 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1569 emit_move_insn (addr, XEXP (mem, 0));
1570 mem = replace_equiv_address (mem, addr);
1572 emit_move_insn (ops[0], mem);
1578 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1582 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1583 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1586 emit_insn (gen_pic (ops[0], ops[1]));
1589 rtx pic_reg = get_pic_reg ();
1590 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1592 return flag_pic || c == IC_IL2s;
1603 /* SAVING is TRUE when we are generating the actual load and store
1604 instructions for REGNO. When determining the size of the stack
1605 needed for saving register we must allocate enough space for the
1606 worst case, because we don't always have the information early enough
1607 to not allocate it. But we can at least eliminate the actual loads
1608 and stores during the prologue/epilogue. */
1610 need_to_save_reg (int regno, int saving)
1612 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1615 && regno == PIC_OFFSET_TABLE_REGNUM
1616 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1621 /* This function is only correct starting with local register
1624 spu_saved_regs_size (void)
1626 int reg_save_size = 0;
1629 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1630 if (need_to_save_reg (regno, 0))
1631 reg_save_size += 0x10;
1632 return reg_save_size;
1636 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1638 rtx reg = gen_rtx_REG (V4SImode, regno);
1640 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1641 return emit_insn (gen_movv4si (mem, reg));
1645 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1647 rtx reg = gen_rtx_REG (V4SImode, regno);
1649 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1650 return emit_insn (gen_movv4si (reg, mem));
1653 /* This happens after reload, so we need to expand it. */
1655 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1658 if (satisfies_constraint_K (GEN_INT (imm)))
1660 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1664 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1665 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1666 if (REGNO (src) == REGNO (scratch))
1672 /* Return nonzero if this function is known to have a null epilogue. */
1675 direct_return (void)
1677 if (reload_completed)
1679 if (cfun->static_chain_decl == 0
1680 && (spu_saved_regs_size ()
1682 + crtl->outgoing_args_size
1683 + crtl->args.pretend_args_size == 0)
1691 The stack frame looks like this:
1695 AP -> +-------------+
1698 prev SP | back chain |
1701 | reg save | crtl->args.pretend_args_size bytes
1704 | saved regs | spu_saved_regs_size() bytes
1705 FP -> +-------------+
1707 | vars | get_frame_size() bytes
1708 HFP -> +-------------+
1711 | args | crtl->outgoing_args_size bytes
1717 SP -> +-------------+
1721 spu_expand_prologue (void)
1723 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1724 HOST_WIDE_INT total_size;
1725 HOST_WIDE_INT saved_regs_size;
1726 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1727 rtx scratch_reg_0, scratch_reg_1;
1731 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1732 cfun->machine->pic_reg = pic_offset_table_rtx;
1734 if (spu_naked_function_p (current_function_decl))
1737 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1738 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1740 saved_regs_size = spu_saved_regs_size ();
1741 total_size = size + saved_regs_size
1742 + crtl->outgoing_args_size
1743 + crtl->args.pretend_args_size;
1746 || cfun->calls_alloca || total_size > 0)
1747 total_size += STACK_POINTER_OFFSET;
1749 /* Save this first because code after this might use the link
1750 register as a scratch register. */
1753 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1754 RTX_FRAME_RELATED_P (insn) = 1;
1759 offset = -crtl->args.pretend_args_size;
1760 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1761 if (need_to_save_reg (regno, 1))
1764 insn = frame_emit_store (regno, sp_reg, offset);
1765 RTX_FRAME_RELATED_P (insn) = 1;
1769 if (flag_pic && cfun->machine->pic_reg)
1771 rtx pic_reg = cfun->machine->pic_reg;
1772 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1773 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1778 if (flag_stack_check)
1780 /* We compare against total_size-1 because
1781 ($sp >= total_size) <=> ($sp > total_size-1) */
1782 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1783 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1784 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1785 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1787 emit_move_insn (scratch_v4si, size_v4si);
1788 size_v4si = scratch_v4si;
1790 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1791 emit_insn (gen_vec_extractv4si
1792 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1793 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1796 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1797 the value of the previous $sp because we save it as the back
1799 if (total_size <= 2000)
1801 /* In this case we save the back chain first. */
1802 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1804 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1808 insn = emit_move_insn (scratch_reg_0, sp_reg);
1810 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1812 RTX_FRAME_RELATED_P (insn) = 1;
1813 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1814 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1816 if (total_size > 2000)
1818 /* Save the back chain ptr */
1819 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1822 if (frame_pointer_needed)
1824 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1825 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1826 + crtl->outgoing_args_size;
1827 /* Set the new frame_pointer */
1828 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1829 RTX_FRAME_RELATED_P (insn) = 1;
1830 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1831 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1832 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1836 if (flag_stack_usage_info)
1837 current_function_static_stack_size = total_size;
1841 spu_expand_epilogue (bool sibcall_p)
1843 int size = get_frame_size (), offset, regno;
1844 HOST_WIDE_INT saved_regs_size, total_size;
1845 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1848 if (spu_naked_function_p (current_function_decl))
1851 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1853 saved_regs_size = spu_saved_regs_size ();
1854 total_size = size + saved_regs_size
1855 + crtl->outgoing_args_size
1856 + crtl->args.pretend_args_size;
1859 || cfun->calls_alloca || total_size > 0)
1860 total_size += STACK_POINTER_OFFSET;
1864 if (cfun->calls_alloca)
1865 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1867 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1870 if (saved_regs_size > 0)
1872 offset = -crtl->args.pretend_args_size;
1873 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1874 if (need_to_save_reg (regno, 1))
1877 frame_emit_load (regno, sp_reg, offset);
1883 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1887 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1888 emit_jump_insn (gen__return ());
1893 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1897 /* This is inefficient because it ends up copying to a save-register
1898 which then gets saved even though $lr has already been saved. But
1899 it does generate better code for leaf functions and we don't need
1900 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1901 used for __builtin_return_address anyway, so maybe we don't care if
1902 it's inefficient. */
1903 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1907 /* Given VAL, generate a constant appropriate for MODE.
1908 If MODE is a vector mode, every element will be VAL.
1909 For TImode, VAL will be zero extended to 128 bits. */
1911 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1917 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1918 || GET_MODE_CLASS (mode) == MODE_FLOAT
1919 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1920 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1922 if (GET_MODE_CLASS (mode) == MODE_INT)
1923 return immed_double_const (val, 0, mode);
1925 /* val is the bit representation of the float */
1926 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1927 return hwint_to_const_double (mode, val);
1929 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1930 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1932 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1934 units = GET_MODE_NUNITS (mode);
1936 v = rtvec_alloc (units);
1938 for (i = 0; i < units; ++i)
1939 RTVEC_ELT (v, i) = inner;
1941 return gen_rtx_CONST_VECTOR (mode, v);
1944 /* Create a MODE vector constant from 4 ints. */
1946 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
1948 unsigned char arr[16];
1949 arr[0] = (a >> 24) & 0xff;
1950 arr[1] = (a >> 16) & 0xff;
1951 arr[2] = (a >> 8) & 0xff;
1952 arr[3] = (a >> 0) & 0xff;
1953 arr[4] = (b >> 24) & 0xff;
1954 arr[5] = (b >> 16) & 0xff;
1955 arr[6] = (b >> 8) & 0xff;
1956 arr[7] = (b >> 0) & 0xff;
1957 arr[8] = (c >> 24) & 0xff;
1958 arr[9] = (c >> 16) & 0xff;
1959 arr[10] = (c >> 8) & 0xff;
1960 arr[11] = (c >> 0) & 0xff;
1961 arr[12] = (d >> 24) & 0xff;
1962 arr[13] = (d >> 16) & 0xff;
1963 arr[14] = (d >> 8) & 0xff;
1964 arr[15] = (d >> 0) & 0xff;
1965 return array_to_constant(mode, arr);
1968 /* branch hint stuff */
1970 /* An array of these is used to propagate hints to predecessor blocks. */
1973 rtx_insn *prop_jump; /* propagated from another block */
1974 int bb_index; /* the original block. */
1976 static struct spu_bb_info *spu_bb_info;
1978 #define STOP_HINT_P(INSN) \
1980 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1981 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1983 /* 1 when RTX is a hinted branch or its target. We keep track of
1984 what has been hinted so the safe-hint code can test it easily. */
1985 #define HINTED_P(RTX) \
1986 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1988 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1989 #define SCHED_ON_EVEN_P(RTX) \
1990 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1992 /* Emit a nop for INSN such that the two will dual issue. This assumes
1993 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1994 We check for TImode to handle a MULTI1 insn which has dual issued its
1995 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1997 emit_nop_for_insn (rtx_insn *insn)
2002 /* We need to handle JUMP_TABLE_DATA separately. */
2003 if (JUMP_TABLE_DATA_P (insn))
2005 new_insn = emit_insn_after (gen_lnop(), insn);
2006 recog_memoized (new_insn);
2007 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
2011 p = get_pipe (insn);
2012 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2013 new_insn = emit_insn_after (gen_lnop (), insn);
2014 else if (p == 1 && GET_MODE (insn) == TImode)
2016 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2017 PUT_MODE (new_insn, TImode);
2018 PUT_MODE (insn, VOIDmode);
2021 new_insn = emit_insn_after (gen_lnop (), insn);
2022 recog_memoized (new_insn);
2023 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2026 /* Insert nops in basic blocks to meet dual issue alignment
2027 requirements. Also make sure hbrp and hint instructions are at least
2028 one cycle apart, possibly inserting a nop. */
2032 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2036 /* This sets up INSN_ADDRESSES. */
2037 shorten_branches (get_insns ());
2039 /* Keep track of length added by nops. */
2043 insn = get_insns ();
2044 if (!active_insn_p (insn))
2045 insn = next_active_insn (insn);
2046 for (; insn; insn = next_insn)
2048 next_insn = next_active_insn (insn);
2049 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2050 || INSN_CODE (insn) == CODE_FOR_hbr)
2054 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2055 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2056 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2059 prev_insn = emit_insn_before (gen_lnop (), insn);
2060 PUT_MODE (prev_insn, GET_MODE (insn));
2061 PUT_MODE (insn, TImode);
2062 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2068 if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2070 if (GET_MODE (insn) == TImode)
2071 PUT_MODE (next_insn, TImode);
2073 next_insn = next_active_insn (insn);
2075 addr = INSN_ADDRESSES (INSN_UID (insn));
2076 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2078 if (((addr + length) & 7) != 0)
2080 emit_nop_for_insn (prev_insn);
2084 else if (GET_MODE (insn) == TImode
2085 && ((next_insn && GET_MODE (next_insn) != TImode)
2086 || get_attr_type (insn) == TYPE_MULTI0)
2087 && ((addr + length) & 7) != 0)
2089 /* prev_insn will always be set because the first insn is
2090 always 8-byte aligned. */
2091 emit_nop_for_insn (prev_insn);
2099 /* Routines for branch hints. */
2102 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2103 int distance, sbitmap blocks)
2105 rtx branch_label = 0;
2108 rtx_jump_table_data *table;
2110 if (before == 0 || branch == 0 || target == 0)
2113 /* While scheduling we require hints to be no further than 600, so
2114 we need to enforce that here too */
2118 /* If we have a Basic block note, emit it after the basic block note. */
2119 if (NOTE_INSN_BASIC_BLOCK_P (before))
2120 before = NEXT_INSN (before);
2122 branch_label = gen_label_rtx ();
2123 LABEL_NUSES (branch_label)++;
2124 LABEL_PRESERVE_P (branch_label) = 1;
2125 insn = emit_label_before (branch_label, branch);
2126 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2127 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2129 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2130 recog_memoized (hint);
2131 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2132 HINTED_P (branch) = 1;
2134 if (GET_CODE (target) == LABEL_REF)
2135 HINTED_P (XEXP (target, 0)) = 1;
2136 else if (tablejump_p (branch, 0, &table))
2140 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2141 vec = XVEC (PATTERN (table), 0);
2143 vec = XVEC (PATTERN (table), 1);
2144 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2145 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2148 if (distance >= 588)
2150 /* Make sure the hint isn't scheduled any earlier than this point,
2151 which could make it too far for the branch offest to fit */
2152 insn = emit_insn_before (gen_blockage (), hint);
2153 recog_memoized (insn);
2154 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2156 else if (distance <= 8 * 4)
2158 /* To guarantee at least 8 insns between the hint and branch we
2161 for (d = distance; d < 8 * 4; d += 4)
2164 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2165 recog_memoized (insn);
2166 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2169 /* Make sure any nops inserted aren't scheduled before the hint. */
2170 insn = emit_insn_after (gen_blockage (), hint);
2171 recog_memoized (insn);
2172 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2174 /* Make sure any nops inserted aren't scheduled after the call. */
2175 if (CALL_P (branch) && distance < 8 * 4)
2177 insn = emit_insn_before (gen_blockage (), branch);
2178 recog_memoized (insn);
2179 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2184 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2185 the rtx for the branch target. */
2187 get_branch_target (rtx_insn *branch)
2189 if (JUMP_P (branch))
2193 /* Return statements */
2194 if (GET_CODE (PATTERN (branch)) == RETURN)
2195 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2198 if (extract_asm_operands (PATTERN (branch)) != NULL)
2201 set = single_set (branch);
2202 src = SET_SRC (set);
2203 if (GET_CODE (SET_DEST (set)) != PC)
2206 if (GET_CODE (src) == IF_THEN_ELSE)
2209 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2212 /* If the more probable case is not a fall through, then
2213 try a branch hint. */
2214 int prob = XINT (note, 0);
2215 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2216 && GET_CODE (XEXP (src, 1)) != PC)
2217 lab = XEXP (src, 1);
2218 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2219 && GET_CODE (XEXP (src, 2)) != PC)
2220 lab = XEXP (src, 2);
2224 if (GET_CODE (lab) == RETURN)
2225 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2233 else if (CALL_P (branch))
2236 /* All of our call patterns are in a PARALLEL and the CALL is
2237 the first pattern in the PARALLEL. */
2238 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2240 call = XVECEXP (PATTERN (branch), 0, 0);
2241 if (GET_CODE (call) == SET)
2242 call = SET_SRC (call);
2243 if (GET_CODE (call) != CALL)
2245 return XEXP (XEXP (call, 0), 0);
2250 /* The special $hbr register is used to prevent the insn scheduler from
2251 moving hbr insns across instructions which invalidate them. It
2252 should only be used in a clobber, and this function searches for
2253 insns which clobber it. */
2255 insn_clobbers_hbr (rtx_insn *insn)
2258 && GET_CODE (PATTERN (insn)) == PARALLEL)
2260 rtx parallel = PATTERN (insn);
2263 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2265 clobber = XVECEXP (parallel, 0, j);
2266 if (GET_CODE (clobber) == CLOBBER
2267 && GET_CODE (XEXP (clobber, 0)) == REG
2268 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2275 /* Search up to 32 insns starting at FIRST:
2276 - at any kind of hinted branch, just return
2277 - at any unconditional branch in the first 15 insns, just return
2278 - at a call or indirect branch, after the first 15 insns, force it to
2279 an even address and return
2280 - at any unconditional branch, after the first 15 insns, force it to
2282 At then end of the search, insert an hbrp within 4 insns of FIRST,
2283 and an hbrp within 16 instructions of FIRST.
2286 insert_hbrp_for_ilb_runout (rtx_insn *first)
2288 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2289 int addr = 0, length, first_addr = -1;
2290 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2291 int insert_lnop_after = 0;
2292 for (insn = first; insn; insn = NEXT_INSN (insn))
2295 if (first_addr == -1)
2296 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2297 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2298 length = get_attr_length (insn);
2300 if (before_4 == 0 && addr + length >= 4 * 4)
2302 /* We test for 14 instructions because the first hbrp will add
2303 up to 2 instructions. */
2304 if (before_16 == 0 && addr + length >= 14 * 4)
2307 if (INSN_CODE (insn) == CODE_FOR_hbr)
2309 /* Make sure an hbrp is at least 2 cycles away from a hint.
2310 Insert an lnop after the hbrp when necessary. */
2311 if (before_4 == 0 && addr > 0)
2314 insert_lnop_after |= 1;
2316 else if (before_4 && addr <= 4 * 4)
2317 insert_lnop_after |= 1;
2318 if (before_16 == 0 && addr > 10 * 4)
2321 insert_lnop_after |= 2;
2323 else if (before_16 && addr <= 14 * 4)
2324 insert_lnop_after |= 2;
2327 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2329 if (addr < hbrp_addr0)
2331 else if (addr < hbrp_addr1)
2335 if (CALL_P (insn) || JUMP_P (insn))
2337 if (HINTED_P (insn))
2340 /* Any branch after the first 15 insns should be on an even
2341 address to avoid a special case branch. There might be
2342 some nops and/or hbrps inserted, so we test after 10
2345 SCHED_ON_EVEN_P (insn) = 1;
2348 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2352 if (addr + length >= 32 * 4)
2354 gcc_assert (before_4 && before_16);
2355 if (hbrp_addr0 > 4 * 4)
2358 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2359 recog_memoized (insn);
2360 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2361 INSN_ADDRESSES_NEW (insn,
2362 INSN_ADDRESSES (INSN_UID (before_4)));
2363 PUT_MODE (insn, GET_MODE (before_4));
2364 PUT_MODE (before_4, TImode);
2365 if (insert_lnop_after & 1)
2367 insn = emit_insn_before (gen_lnop (), before_4);
2368 recog_memoized (insn);
2369 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2370 INSN_ADDRESSES_NEW (insn,
2371 INSN_ADDRESSES (INSN_UID (before_4)));
2372 PUT_MODE (insn, TImode);
2375 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2376 && hbrp_addr1 > 16 * 4)
2379 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2380 recog_memoized (insn);
2381 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2382 INSN_ADDRESSES_NEW (insn,
2383 INSN_ADDRESSES (INSN_UID (before_16)));
2384 PUT_MODE (insn, GET_MODE (before_16));
2385 PUT_MODE (before_16, TImode);
2386 if (insert_lnop_after & 2)
2388 insn = emit_insn_before (gen_lnop (), before_16);
2389 recog_memoized (insn);
2390 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2391 INSN_ADDRESSES_NEW (insn,
2392 INSN_ADDRESSES (INSN_UID
2394 PUT_MODE (insn, TImode);
2400 else if (BARRIER_P (insn))
2405 /* The SPU might hang when it executes 48 inline instructions after a
2406 hinted branch jumps to its hinted target. The beginning of a
2407 function and the return from a call might have been hinted, and
2408 must be handled as well. To prevent a hang we insert 2 hbrps. The
2409 first should be within 6 insns of the branch target. The second
2410 should be within 22 insns of the branch target. When determining
2411 if hbrps are necessary, we look for only 32 inline instructions,
2412 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2413 when inserting new hbrps, we insert them within 4 and 16 insns of
2419 if (TARGET_SAFE_HINTS)
2421 shorten_branches (get_insns ());
2422 /* Insert hbrp at beginning of function */
2423 insn = next_active_insn (get_insns ());
2425 insert_hbrp_for_ilb_runout (insn);
2426 /* Insert hbrp after hinted targets. */
2427 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2428 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2429 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2433 static int in_spu_reorg;
2436 spu_var_tracking (void)
2438 if (flag_var_tracking)
2441 timevar_push (TV_VAR_TRACKING);
2442 variable_tracking_main ();
2443 timevar_pop (TV_VAR_TRACKING);
2444 df_finish_pass (false);
2448 /* Insert branch hints. There are no branch optimizations after this
2449 pass, so it's safe to set our branch hints now. */
2451 spu_machine_dependent_reorg (void)
2455 rtx_insn *branch, *insn;
2456 rtx branch_target = 0;
2457 int branch_addr = 0, insn_addr, required_dist = 0;
2461 if (!TARGET_BRANCH_HINTS || optimize == 0)
2463 /* We still do it for unoptimized code because an external
2464 function might have hinted a call or return. */
2465 compute_bb_for_insn ();
2468 spu_var_tracking ();
2469 free_bb_for_insn ();
2473 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2474 bitmap_clear (blocks);
2477 compute_bb_for_insn ();
2479 /* (Re-)discover loops so that bb->loop_father can be used
2480 in the analysis below. */
2481 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2486 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2487 sizeof (struct spu_bb_info));
2489 /* We need exact insn addresses and lengths. */
2490 shorten_branches (get_insns ());
2492 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2494 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2496 if (spu_bb_info[i].prop_jump)
2498 branch = spu_bb_info[i].prop_jump;
2499 branch_target = get_branch_target (branch);
2500 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2501 required_dist = spu_hint_dist;
2503 /* Search from end of a block to beginning. In this loop, find
2504 jumps which need a branch and emit them only when:
2505 - it's an indirect branch and we're at the insn which sets
2507 - we're at an insn that will invalidate the hint. e.g., a
2508 call, another hint insn, inline asm that clobbers $hbr, and
2509 some inlined operations (divmodsi4). Don't consider jumps
2510 because they are only at the end of a block and are
2511 considered when we are deciding whether to propagate
2512 - we're getting too far away from the branch. The hbr insns
2513 only have a signed 10 bit offset
2514 We go back as far as possible so the branch will be considered
2515 for propagation when we get to the beginning of the block. */
2516 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2520 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2522 && ((GET_CODE (branch_target) == REG
2523 && set_of (branch_target, insn) != NULL_RTX)
2524 || insn_clobbers_hbr (insn)
2525 || branch_addr - insn_addr > 600))
2527 rtx_insn *next = NEXT_INSN (insn);
2528 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2529 if (insn != BB_END (bb)
2530 && branch_addr - next_addr >= required_dist)
2534 "hint for %i in block %i before %i\n",
2535 INSN_UID (branch), bb->index,
2537 spu_emit_branch_hint (next, branch, branch_target,
2538 branch_addr - next_addr, blocks);
2543 /* JUMP_P will only be true at the end of a block. When
2544 branch is already set it means we've previously decided
2545 to propagate a hint for that branch into this block. */
2546 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2549 if ((branch_target = get_branch_target (insn)))
2552 branch_addr = insn_addr;
2553 required_dist = spu_hint_dist;
2557 if (insn == BB_HEAD (bb))
2563 /* If we haven't emitted a hint for this branch yet, it might
2564 be profitable to emit it in one of the predecessor blocks,
2565 especially for loops. */
2567 basic_block prev = 0, prop = 0, prev2 = 0;
2568 int loop_exit = 0, simple_loop = 0;
2569 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2571 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2572 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2573 prev = EDGE_PRED (bb, j)->src;
2575 prev2 = EDGE_PRED (bb, j)->src;
2577 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2578 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2580 else if (EDGE_SUCC (bb, j)->dest == bb)
2583 /* If this branch is a loop exit then propagate to previous
2584 fallthru block. This catches the cases when it is a simple
2585 loop or when there is an initial branch into the loop. */
2586 if (prev && (loop_exit || simple_loop)
2587 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2590 /* If there is only one adjacent predecessor. Don't propagate
2591 outside this loop. */
2592 else if (prev && single_pred_p (bb)
2593 && prev->loop_father == bb->loop_father)
2596 /* If this is the JOIN block of a simple IF-THEN then
2597 propagate the hint to the HEADER block. */
2598 else if (prev && prev2
2599 && EDGE_COUNT (bb->preds) == 2
2600 && EDGE_COUNT (prev->preds) == 1
2601 && EDGE_PRED (prev, 0)->src == prev2
2602 && prev2->loop_father == bb->loop_father
2603 && GET_CODE (branch_target) != REG)
2606 /* Don't propagate when:
2607 - this is a simple loop and the hint would be too far
2608 - this is not a simple loop and there are 16 insns in
2610 - the predecessor block ends in a branch that will be
2612 - the predecessor block ends in an insn that invalidates
2616 && (bbend = BB_END (prop))
2617 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2618 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2619 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2622 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2623 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2624 bb->index, prop->index, bb_loop_depth (bb),
2625 INSN_UID (branch), loop_exit, simple_loop,
2626 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2628 spu_bb_info[prop->index].prop_jump = branch;
2629 spu_bb_info[prop->index].bb_index = i;
2631 else if (branch_addr - next_addr >= required_dist)
2634 fprintf (dump_file, "hint for %i in block %i before %i\n",
2635 INSN_UID (branch), bb->index,
2636 INSN_UID (NEXT_INSN (insn)));
2637 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2638 branch_addr - next_addr, blocks);
2645 if (!bitmap_empty_p (blocks))
2646 find_many_sub_basic_blocks (blocks);
2648 /* We have to schedule to make sure alignment is ok. */
2649 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2651 /* The hints need to be scheduled, so call it again. */
2653 df_finish_pass (true);
2659 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2660 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2662 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2663 between its branch label and the branch . We don't move the
2664 label because GCC expects it at the beginning of the block. */
2665 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2666 rtx label_ref = XVECEXP (unspec, 0, 0);
2667 rtx label = XEXP (label_ref, 0);
2670 for (branch = NEXT_INSN (label);
2671 !JUMP_P (branch) && !CALL_P (branch);
2672 branch = NEXT_INSN (branch))
2673 if (NONJUMP_INSN_P (branch))
2674 offset += get_attr_length (branch);
2676 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2679 spu_var_tracking ();
2681 loop_optimizer_finalize ();
2683 free_bb_for_insn ();
2689 /* Insn scheduling routines, primarily for dual issue. */
2691 spu_sched_issue_rate (void)
2697 uses_ls_unit(rtx_insn *insn)
2699 rtx set = single_set (insn);
2701 && (GET_CODE (SET_DEST (set)) == MEM
2702 || GET_CODE (SET_SRC (set)) == MEM))
2708 get_pipe (rtx_insn *insn)
2711 /* Handle inline asm */
2712 if (INSN_CODE (insn) == -1)
2714 t = get_attr_type (insn);
2739 case TYPE_IPREFETCH:
2747 /* haifa-sched.c has a static variable that keeps track of the current
2748 cycle. It is passed to spu_sched_reorder, and we record it here for
2749 use by spu_sched_variable_issue. It won't be accurate if the
2750 scheduler updates it's clock_var between the two calls. */
2751 static int clock_var;
2753 /* This is used to keep track of insn alignment. Set to 0 at the
2754 beginning of each block and increased by the "length" attr of each
2756 static int spu_sched_length;
2758 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2759 ready list appropriately in spu_sched_reorder(). */
2760 static int pipe0_clock;
2761 static int pipe1_clock;
2763 static int prev_clock_var;
2765 static int prev_priority;
2767 /* The SPU needs to load the next ilb sometime during the execution of
2768 the previous ilb. There is a potential conflict if every cycle has a
2769 load or store. To avoid the conflict we make sure the load/store
2770 unit is free for at least one cycle during the execution of insns in
2771 the previous ilb. */
2772 static int spu_ls_first;
2773 static int prev_ls_clock;
2776 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2777 int max_ready ATTRIBUTE_UNUSED)
2779 spu_sched_length = 0;
2783 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2784 int max_ready ATTRIBUTE_UNUSED)
2786 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2788 /* When any block might be at least 8-byte aligned, assume they
2789 will all be at least 8-byte aligned to make sure dual issue
2790 works out correctly. */
2791 spu_sched_length = 0;
2793 spu_ls_first = INT_MAX;
2798 prev_clock_var = -1;
2803 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2804 int verbose ATTRIBUTE_UNUSED,
2805 rtx uncast_insn, int more)
2809 rtx_insn *insn = as_a <rtx_insn *> (uncast_insn);
2810 if (GET_CODE (PATTERN (insn)) == USE
2811 || GET_CODE (PATTERN (insn)) == CLOBBER
2812 || (len = get_attr_length (insn)) == 0)
2815 spu_sched_length += len;
2817 /* Reset on inline asm */
2818 if (INSN_CODE (insn) == -1)
2820 spu_ls_first = INT_MAX;
2825 p = get_pipe (insn);
2827 pipe0_clock = clock_var;
2829 pipe1_clock = clock_var;
2833 if (clock_var - prev_ls_clock > 1
2834 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2835 spu_ls_first = INT_MAX;
2836 if (uses_ls_unit (insn))
2838 if (spu_ls_first == INT_MAX)
2839 spu_ls_first = spu_sched_length;
2840 prev_ls_clock = clock_var;
2843 /* The scheduler hasn't inserted the nop, but we will later on.
2844 Include those nops in spu_sched_length. */
2845 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2846 spu_sched_length += 4;
2847 prev_clock_var = clock_var;
2849 /* more is -1 when called from spu_sched_reorder for new insns
2850 that don't have INSN_PRIORITY */
2852 prev_priority = INSN_PRIORITY (insn);
2855 /* Always try issuing more insns. spu_sched_reorder will decide
2856 when the cycle should be advanced. */
2860 /* This function is called for both TARGET_SCHED_REORDER and
2861 TARGET_SCHED_REORDER2. */
2863 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2864 rtx_insn **ready, int *nreadyp, int clock)
2866 int i, nready = *nreadyp;
2867 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2872 if (nready <= 0 || pipe1_clock >= clock)
2875 /* Find any rtl insns that don't generate assembly insns and schedule
2877 for (i = nready - 1; i >= 0; i--)
2880 if (INSN_CODE (insn) == -1
2881 || INSN_CODE (insn) == CODE_FOR_blockage
2882 || (INSN_P (insn) && get_attr_length (insn) == 0))
2884 ready[i] = ready[nready - 1];
2885 ready[nready - 1] = insn;
2890 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2891 for (i = 0; i < nready; i++)
2892 if (INSN_CODE (ready[i]) != -1)
2895 switch (get_attr_type (insn))
2920 case TYPE_IPREFETCH:
2926 /* In the first scheduling phase, schedule loads and stores together
2927 to increase the chance they will get merged during postreload CSE. */
2928 if (!reload_completed && pipe_ls >= 0)
2930 insn = ready[pipe_ls];
2931 ready[pipe_ls] = ready[nready - 1];
2932 ready[nready - 1] = insn;
2936 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2940 /* When we have loads/stores in every cycle of the last 15 insns and
2941 we are about to schedule another load/store, emit an hbrp insn
2944 && spu_sched_length - spu_ls_first >= 4 * 15
2945 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2947 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2948 recog_memoized (insn);
2949 if (pipe0_clock < clock)
2950 PUT_MODE (insn, TImode);
2951 spu_sched_variable_issue (file, verbose, insn, -1);
2955 /* In general, we want to emit nops to increase dual issue, but dual
2956 issue isn't faster when one of the insns could be scheduled later
2957 without effecting the critical path. We look at INSN_PRIORITY to
2958 make a good guess, but it isn't perfect so -mdual-nops=n can be
2959 used to effect it. */
2960 if (in_spu_reorg && spu_dual_nops < 10)
2962 /* When we are at an even address and we are not issuing nops to
2963 improve scheduling then we need to advance the cycle. */
2964 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2965 && (spu_dual_nops == 0
2968 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2971 /* When at an odd address, schedule the highest priority insn
2972 without considering pipeline. */
2973 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2974 && (spu_dual_nops == 0
2976 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2981 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2982 pipe0 insn in the ready list, schedule it. */
2983 if (pipe0_clock < clock && pipe_0 >= 0)
2984 schedule_i = pipe_0;
2986 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2987 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2989 schedule_i = pipe_1;
2991 if (schedule_i > -1)
2993 insn = ready[schedule_i];
2994 ready[schedule_i] = ready[nready - 1];
2995 ready[nready - 1] = insn;
3001 /* INSN is dependent on DEP_INSN. */
3003 spu_sched_adjust_cost (rtx uncast_insn, rtx link, rtx uncast_dep_insn, int cost)
3006 rtx_insn *insn = as_a <rtx_insn *> (uncast_insn);
3007 rtx_insn *dep_insn = as_a <rtx_insn *> (uncast_dep_insn);
3009 /* The blockage pattern is used to prevent instructions from being
3010 moved across it and has no cost. */
3011 if (INSN_CODE (insn) == CODE_FOR_blockage
3012 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3015 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3016 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3019 /* Make sure hbrps are spread out. */
3020 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3021 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3024 /* Make sure hints and hbrps are 2 cycles apart. */
3025 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3026 || INSN_CODE (insn) == CODE_FOR_hbr)
3027 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3028 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3031 /* An hbrp has no real dependency on other insns. */
3032 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3033 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3036 /* Assuming that it is unlikely an argument register will be used in
3037 the first cycle of the called function, we reduce the cost for
3038 slightly better scheduling of dep_insn. When not hinted, the
3039 mispredicted branch would hide the cost as well. */
3042 rtx target = get_branch_target (insn);
3043 if (GET_CODE (target) != REG || !set_of (target, insn))
3048 /* And when returning from a function, let's assume the return values
3049 are completed sooner too. */
3050 if (CALL_P (dep_insn))
3053 /* Make sure an instruction that loads from the back chain is schedule
3054 away from the return instruction so a hint is more likely to get
3056 if (INSN_CODE (insn) == CODE_FOR__return
3057 && (set = single_set (dep_insn))
3058 && GET_CODE (SET_DEST (set)) == REG
3059 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3062 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3063 scheduler makes every insn in a block anti-dependent on the final
3064 jump_insn. We adjust here so higher cost insns will get scheduled
3066 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3067 return insn_cost (dep_insn) - 3;
3072 /* Create a CONST_DOUBLE from a string. */
3074 spu_float_const (const char *string, enum machine_mode mode)
3076 REAL_VALUE_TYPE value;
3077 value = REAL_VALUE_ATOF (string, mode);
3078 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3082 spu_constant_address_p (rtx x)
3084 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3085 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3086 || GET_CODE (x) == HIGH);
3089 static enum spu_immediate
3090 which_immediate_load (HOST_WIDE_INT val)
3092 gcc_assert (val == trunc_int_for_mode (val, SImode));
3094 if (val >= -0x8000 && val <= 0x7fff)
3096 if (val >= 0 && val <= 0x3ffff)
3098 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3100 if ((val & 0xffff) == 0)
3106 /* Return true when OP can be loaded by one of the il instructions, or
3107 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3109 immediate_load_p (rtx op, enum machine_mode mode)
3111 if (CONSTANT_P (op))
3113 enum immediate_class c = classify_immediate (op, mode);
3114 return c == IC_IL1 || c == IC_IL1s
3115 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3120 /* Return true if the first SIZE bytes of arr is a constant that can be
3121 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3122 represent the size and offset of the instruction to use. */
3124 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3126 int cpat, run, i, start;
3130 for (i = 0; i < size && cpat; i++)
3138 else if (arr[i] == 2 && arr[i+1] == 3)
3140 else if (arr[i] == 0)
3142 while (arr[i+run] == run && i+run < 16)
3144 if (run != 4 && run != 8)
3149 if ((i & (run-1)) != 0)
3156 if (cpat && (run || size < 16))
3163 *pstart = start == -1 ? 16-run : start;
3169 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3170 it into a register. MODE is only valid when OP is a CONST_INT. */
3171 static enum immediate_class
3172 classify_immediate (rtx op, enum machine_mode mode)
3175 unsigned char arr[16];
3176 int i, j, repeated, fsmbi, repeat;
3178 gcc_assert (CONSTANT_P (op));
3180 if (GET_MODE (op) != VOIDmode)
3181 mode = GET_MODE (op);
3183 /* A V4SI const_vector with all identical symbols is ok. */
3186 && GET_CODE (op) == CONST_VECTOR
3187 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3188 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3189 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3190 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3191 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3192 op = CONST_VECTOR_ELT (op, 0);
3194 switch (GET_CODE (op))
3198 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3201 /* We can never know if the resulting address fits in 18 bits and can be
3202 loaded with ila. For now, assume the address will not overflow if
3203 the displacement is "small" (fits 'K' constraint). */
3204 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3206 rtx sym = XEXP (XEXP (op, 0), 0);
3207 rtx cst = XEXP (XEXP (op, 0), 1);
3209 if (GET_CODE (sym) == SYMBOL_REF
3210 && GET_CODE (cst) == CONST_INT
3211 && satisfies_constraint_K (cst))
3220 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3221 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3222 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3228 constant_to_array (mode, op, arr);
3230 /* Check that each 4-byte slot is identical. */
3232 for (i = 4; i < 16; i += 4)
3233 for (j = 0; j < 4; j++)
3234 if (arr[j] != arr[i + j])
3239 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3240 val = trunc_int_for_mode (val, SImode);
3242 if (which_immediate_load (val) != SPU_NONE)
3246 /* Any mode of 2 bytes or smaller can be loaded with an il
3248 gcc_assert (GET_MODE_SIZE (mode) > 2);
3252 for (i = 0; i < 16 && fsmbi; i++)
3253 if (arr[i] != 0 && repeat == 0)
3255 else if (arr[i] != 0 && arr[i] != repeat)
3258 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3260 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3273 static enum spu_immediate
3274 which_logical_immediate (HOST_WIDE_INT val)
3276 gcc_assert (val == trunc_int_for_mode (val, SImode));
3278 if (val >= -0x200 && val <= 0x1ff)
3280 if (val >= 0 && val <= 0xffff)
3282 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3284 val = trunc_int_for_mode (val, HImode);
3285 if (val >= -0x200 && val <= 0x1ff)
3287 if ((val & 0xff) == ((val >> 8) & 0xff))
3289 val = trunc_int_for_mode (val, QImode);
3290 if (val >= -0x200 && val <= 0x1ff)
3297 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3300 const_vector_immediate_p (rtx x)
3303 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3304 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3305 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3306 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3312 logical_immediate_p (rtx op, enum machine_mode mode)
3315 unsigned char arr[16];
3318 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3319 || GET_CODE (op) == CONST_VECTOR);
3321 if (GET_CODE (op) == CONST_VECTOR
3322 && !const_vector_immediate_p (op))
3325 if (GET_MODE (op) != VOIDmode)
3326 mode = GET_MODE (op);
3328 constant_to_array (mode, op, arr);
3330 /* Check that bytes are repeated. */
3331 for (i = 4; i < 16; i += 4)
3332 for (j = 0; j < 4; j++)
3333 if (arr[j] != arr[i + j])
3336 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3337 val = trunc_int_for_mode (val, SImode);
3339 i = which_logical_immediate (val);
3340 return i != SPU_NONE && i != SPU_IOHL;
3344 iohl_immediate_p (rtx op, enum machine_mode mode)
3347 unsigned char arr[16];
3350 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3351 || GET_CODE (op) == CONST_VECTOR);
3353 if (GET_CODE (op) == CONST_VECTOR
3354 && !const_vector_immediate_p (op))
3357 if (GET_MODE (op) != VOIDmode)
3358 mode = GET_MODE (op);
3360 constant_to_array (mode, op, arr);
3362 /* Check that bytes are repeated. */
3363 for (i = 4; i < 16; i += 4)
3364 for (j = 0; j < 4; j++)
3365 if (arr[j] != arr[i + j])
3368 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3369 val = trunc_int_for_mode (val, SImode);
3371 return val >= 0 && val <= 0xffff;
3375 arith_immediate_p (rtx op, enum machine_mode mode,
3376 HOST_WIDE_INT low, HOST_WIDE_INT high)
3379 unsigned char arr[16];
3382 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3383 || GET_CODE (op) == CONST_VECTOR);
3385 if (GET_CODE (op) == CONST_VECTOR
3386 && !const_vector_immediate_p (op))
3389 if (GET_MODE (op) != VOIDmode)
3390 mode = GET_MODE (op);
3392 constant_to_array (mode, op, arr);
3394 if (VECTOR_MODE_P (mode))
3395 mode = GET_MODE_INNER (mode);
3397 bytes = GET_MODE_SIZE (mode);
3398 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3400 /* Check that bytes are repeated. */
3401 for (i = bytes; i < 16; i += bytes)
3402 for (j = 0; j < bytes; j++)
3403 if (arr[j] != arr[i + j])
3407 for (j = 1; j < bytes; j++)
3408 val = (val << 8) | arr[j];
3410 val = trunc_int_for_mode (val, mode);
3412 return val >= low && val <= high;
3415 /* TRUE when op is an immediate and an exact power of 2, and given that
3416 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3417 all entries must be the same. */
3419 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3421 enum machine_mode int_mode;
3423 unsigned char arr[16];
3426 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3427 || GET_CODE (op) == CONST_VECTOR);
3429 if (GET_CODE (op) == CONST_VECTOR
3430 && !const_vector_immediate_p (op))
3433 if (GET_MODE (op) != VOIDmode)
3434 mode = GET_MODE (op);
3436 constant_to_array (mode, op, arr);
3438 if (VECTOR_MODE_P (mode))
3439 mode = GET_MODE_INNER (mode);
3441 bytes = GET_MODE_SIZE (mode);
3442 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3444 /* Check that bytes are repeated. */
3445 for (i = bytes; i < 16; i += bytes)
3446 for (j = 0; j < bytes; j++)
3447 if (arr[j] != arr[i + j])
3451 for (j = 1; j < bytes; j++)
3452 val = (val << 8) | arr[j];
3454 val = trunc_int_for_mode (val, int_mode);
3456 /* Currently, we only handle SFmode */
3457 gcc_assert (mode == SFmode);
3460 int exp = (val >> 23) - 127;
3461 return val > 0 && (val & 0x007fffff) == 0
3462 && exp >= low && exp <= high;
3467 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3470 ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3475 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3477 rtx plus = XEXP (x, 0);
3478 rtx op0 = XEXP (plus, 0);
3479 rtx op1 = XEXP (plus, 1);
3480 if (GET_CODE (op1) == CONST_INT)
3484 return (GET_CODE (x) == SYMBOL_REF
3485 && (decl = SYMBOL_REF_DECL (x)) != 0
3486 && TREE_CODE (decl) == VAR_DECL
3487 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3491 - any 32-bit constant (SImode, SFmode)
3492 - any constant that can be generated with fsmbi (any mode)
3493 - a 64-bit constant where the high and low bits are identical
3495 - a 128-bit constant where the four 32-bit words match. */
3497 spu_legitimate_constant_p (enum machine_mode mode, rtx x)
3499 if (GET_CODE (x) == HIGH)
3502 /* Reject any __ea qualified reference. These can't appear in
3503 instructions but must be forced to the constant pool. */
3504 if (for_each_rtx (&x, ea_symbol_ref, 0))
3507 /* V4SI with all identical symbols is valid. */
3510 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3511 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3512 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3513 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3514 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3515 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3517 if (GET_CODE (x) == CONST_VECTOR
3518 && !const_vector_immediate_p (x))
3523 /* Valid address are:
3524 - symbol_ref, label_ref, const
3526 - reg + const_int, where const_int is 16 byte aligned
3527 - reg + reg, alignment doesn't matter
3528 The alignment matters in the reg+const case because lqd and stqd
3529 ignore the 4 least significant bits of the const. We only care about
3530 16 byte modes because the expand phase will change all smaller MEM
3531 references to TImode. */
3533 spu_legitimate_address_p (enum machine_mode mode,
3534 rtx x, bool reg_ok_strict)
3536 int aligned = GET_MODE_SIZE (mode) >= 16;
3538 && GET_CODE (x) == AND
3539 && GET_CODE (XEXP (x, 1)) == CONST_INT
3540 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3542 switch (GET_CODE (x))
3545 return !TARGET_LARGE_MEM;
3549 /* Keep __ea references until reload so that spu_expand_mov can see them
3551 if (ea_symbol_ref (&x, 0))
3552 return !reload_in_progress && !reload_completed;
3553 return !TARGET_LARGE_MEM;
3556 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3564 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3569 rtx op0 = XEXP (x, 0);
3570 rtx op1 = XEXP (x, 1);
3571 if (GET_CODE (op0) == SUBREG)
3572 op0 = XEXP (op0, 0);
3573 if (GET_CODE (op1) == SUBREG)
3574 op1 = XEXP (op1, 0);
3575 if (GET_CODE (op0) == REG
3576 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3577 && GET_CODE (op1) == CONST_INT
3578 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3579 /* If virtual registers are involved, the displacement will
3580 change later on anyway, so checking would be premature.
3581 Reload will make sure the final displacement after
3582 register elimination is OK. */
3583 || op0 == arg_pointer_rtx
3584 || op0 == frame_pointer_rtx
3585 || op0 == virtual_stack_vars_rtx)
3586 && (!aligned || (INTVAL (op1) & 15) == 0))
3588 if (GET_CODE (op0) == REG
3589 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3590 && GET_CODE (op1) == REG
3591 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3602 /* Like spu_legitimate_address_p, except with named addresses. */
3604 spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3605 bool reg_ok_strict, addr_space_t as)
3607 if (as == ADDR_SPACE_EA)
3608 return (REG_P (x) && (GET_MODE (x) == EAmode));
3610 else if (as != ADDR_SPACE_GENERIC)
3613 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3616 /* When the address is reg + const_int, force the const_int into a
3619 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3620 enum machine_mode mode ATTRIBUTE_UNUSED)
3623 /* Make sure both operands are registers. */
3624 if (GET_CODE (x) == PLUS)
3628 if (ALIGNED_SYMBOL_REF_P (op0))
3630 op0 = force_reg (Pmode, op0);
3631 mark_reg_pointer (op0, 128);
3633 else if (GET_CODE (op0) != REG)
3634 op0 = force_reg (Pmode, op0);
3635 if (ALIGNED_SYMBOL_REF_P (op1))
3637 op1 = force_reg (Pmode, op1);
3638 mark_reg_pointer (op1, 128);
3640 else if (GET_CODE (op1) != REG)
3641 op1 = force_reg (Pmode, op1);
3642 x = gen_rtx_PLUS (Pmode, op0, op1);
3647 /* Like spu_legitimate_address, except with named address support. */
3649 spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3652 if (as != ADDR_SPACE_GENERIC)
3655 return spu_legitimize_address (x, oldx, mode);
3658 /* Reload reg + const_int for out-of-range displacements. */
3660 spu_legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
3661 int opnum, int type)
3663 bool removed_and = false;
3665 if (GET_CODE (ad) == AND
3666 && CONST_INT_P (XEXP (ad, 1))
3667 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3673 if (GET_CODE (ad) == PLUS
3674 && REG_P (XEXP (ad, 0))
3675 && CONST_INT_P (XEXP (ad, 1))
3676 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3677 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3679 /* Unshare the sum. */
3682 /* Reload the displacement. */
3683 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3684 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3685 opnum, (enum reload_type) type);
3687 /* Add back AND for alignment if we stripped it. */
3689 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3697 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3698 struct attribute_spec.handler. */
3700 spu_handle_fndecl_attribute (tree * node,
3702 tree args ATTRIBUTE_UNUSED,
3703 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3705 if (TREE_CODE (*node) != FUNCTION_DECL)
3707 warning (0, "%qE attribute only applies to functions",
3709 *no_add_attrs = true;
3715 /* Handle the "vector" attribute. */
3717 spu_handle_vector_attribute (tree * node, tree name,
3718 tree args ATTRIBUTE_UNUSED,
3719 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3721 tree type = *node, result = NULL_TREE;
3722 enum machine_mode mode;
3725 while (POINTER_TYPE_P (type)
3726 || TREE_CODE (type) == FUNCTION_TYPE
3727 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3728 type = TREE_TYPE (type);
3730 mode = TYPE_MODE (type);
3732 unsigned_p = TYPE_UNSIGNED (type);
3736 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3739 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3742 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3745 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3748 result = V4SF_type_node;
3751 result = V2DF_type_node;
3757 /* Propagate qualifiers attached to the element type
3758 onto the vector type. */
3759 if (result && result != type && TYPE_QUALS (type))
3760 result = build_qualified_type (result, TYPE_QUALS (type));
3762 *no_add_attrs = true; /* No need to hang on to the attribute. */
3765 warning (0, "%qE attribute ignored", name);
3767 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3772 /* Return nonzero if FUNC is a naked function. */
3774 spu_naked_function_p (tree func)
3778 if (TREE_CODE (func) != FUNCTION_DECL)
3781 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3782 return a != NULL_TREE;
3786 spu_initial_elimination_offset (int from, int to)
3788 int saved_regs_size = spu_saved_regs_size ();
3790 if (!crtl->is_leaf || crtl->outgoing_args_size
3791 || get_frame_size () || saved_regs_size)
3792 sp_offset = STACK_POINTER_OFFSET;
3793 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3794 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3795 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3796 return get_frame_size ();
3797 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3798 return sp_offset + crtl->outgoing_args_size
3799 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3800 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3801 return get_frame_size () + saved_regs_size + sp_offset;
3807 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3809 enum machine_mode mode = TYPE_MODE (type);
3810 int byte_size = ((mode == BLKmode)
3811 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3813 /* Make sure small structs are left justified in a register. */
3814 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3815 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3817 enum machine_mode smode;
3820 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3821 int n = byte_size / UNITS_PER_WORD;
3822 v = rtvec_alloc (nregs);
3823 for (i = 0; i < n; i++)
3825 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3826 gen_rtx_REG (TImode,
3829 GEN_INT (UNITS_PER_WORD * i));
3830 byte_size -= UNITS_PER_WORD;
3838 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3840 gen_rtx_EXPR_LIST (VOIDmode,
3841 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3842 GEN_INT (UNITS_PER_WORD * n));
3844 return gen_rtx_PARALLEL (mode, v);
3846 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3850 spu_function_arg (cumulative_args_t cum_v,
3851 enum machine_mode mode,
3852 const_tree type, bool named ATTRIBUTE_UNUSED)
3854 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3857 if (*cum >= MAX_REGISTER_ARGS)
3860 byte_size = ((mode == BLKmode)
3861 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3863 /* The ABI does not allow parameters to be passed partially in
3864 reg and partially in stack. */
3865 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3868 /* Make sure small structs are left justified in a register. */
3869 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3870 && byte_size < UNITS_PER_WORD && byte_size > 0)
3872 enum machine_mode smode;
3876 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3877 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3878 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3880 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3883 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3887 spu_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
3888 const_tree type, bool named ATTRIBUTE_UNUSED)
3890 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3892 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3895 ? ((int_size_in_bytes (type) + 15) / 16)
3898 : HARD_REGNO_NREGS (cum, mode));
3901 /* Variable sized types are passed by reference. */
3903 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3904 enum machine_mode mode ATTRIBUTE_UNUSED,
3905 const_tree type, bool named ATTRIBUTE_UNUSED)
3907 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3913 /* Create and return the va_list datatype.
3915 On SPU, va_list is an array type equivalent to
3917 typedef struct __va_list_tag
3919 void *__args __attribute__((__aligned(16)));
3920 void *__skip __attribute__((__aligned(16)));
3924 where __args points to the arg that will be returned by the next
3925 va_arg(), and __skip points to the previous stack frame such that
3926 when __args == __skip we should advance __args by 32 bytes. */
3928 spu_build_builtin_va_list (void)
3930 tree f_args, f_skip, record, type_decl;
3933 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3936 build_decl (BUILTINS_LOCATION,
3937 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3939 f_args = build_decl (BUILTINS_LOCATION,
3940 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3941 f_skip = build_decl (BUILTINS_LOCATION,
3942 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3944 DECL_FIELD_CONTEXT (f_args) = record;
3945 DECL_ALIGN (f_args) = 128;
3946 DECL_USER_ALIGN (f_args) = 1;
3948 DECL_FIELD_CONTEXT (f_skip) = record;
3949 DECL_ALIGN (f_skip) = 128;
3950 DECL_USER_ALIGN (f_skip) = 1;
3952 TYPE_STUB_DECL (record) = type_decl;
3953 TYPE_NAME (record) = type_decl;
3954 TYPE_FIELDS (record) = f_args;
3955 DECL_CHAIN (f_args) = f_skip;
3957 /* We know this is being padded and we want it too. It is an internal
3958 type so hide the warnings from the user. */
3960 warn_padded = false;
3962 layout_type (record);
3966 /* The correct type is an array type of one element. */
3967 return build_array_type (record, build_index_type (size_zero_node));
3970 /* Implement va_start by filling the va_list structure VALIST.
3971 NEXTARG points to the first anonymous stack argument.
3973 The following global variables are used to initialize
3974 the va_list structure:
3977 the CUMULATIVE_ARGS for this function
3979 crtl->args.arg_offset_rtx:
3980 holds the offset of the first anonymous stack argument
3981 (relative to the virtual arg pointer). */
3984 spu_va_start (tree valist, rtx nextarg)
3986 tree f_args, f_skip;
3989 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3990 f_skip = DECL_CHAIN (f_args);
3992 valist = build_simple_mem_ref (valist);
3994 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3996 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3998 /* Find the __args area. */
3999 t = make_tree (TREE_TYPE (args), nextarg);
4000 if (crtl->args.pretend_args_size > 0)
4001 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
4002 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4003 TREE_SIDE_EFFECTS (t) = 1;
4004 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4006 /* Find the __skip area. */
4007 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4008 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4009 - STACK_POINTER_OFFSET));
4010 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4011 TREE_SIDE_EFFECTS (t) = 1;
4012 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4015 /* Gimplify va_arg by updating the va_list structure
4016 VALIST as required to retrieve an argument of type
4017 TYPE, and returning that argument.
4019 ret = va_arg(VALIST, TYPE);
4021 generates code equivalent to:
4023 paddedsize = (sizeof(TYPE) + 15) & -16;
4024 if (VALIST.__args + paddedsize > VALIST.__skip
4025 && VALIST.__args <= VALIST.__skip)
4026 addr = VALIST.__skip + 32;
4028 addr = VALIST.__args;
4029 VALIST.__args = addr + paddedsize;
4030 ret = *(TYPE *)addr;
4033 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4034 gimple_seq * post_p ATTRIBUTE_UNUSED)
4036 tree f_args, f_skip;
4038 HOST_WIDE_INT size, rsize;
4040 bool pass_by_reference_p;
4042 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4043 f_skip = DECL_CHAIN (f_args);
4045 valist = build_simple_mem_ref (valist);
4047 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4049 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4051 addr = create_tmp_var (ptr_type_node, "va_arg");
4053 /* if an object is dynamically sized, a pointer to it is passed
4054 instead of the object itself. */
4055 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4057 if (pass_by_reference_p)
4058 type = build_pointer_type (type);
4059 size = int_size_in_bytes (type);
4060 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4062 /* build conditional expression to calculate addr. The expression
4063 will be gimplified later. */
4064 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4065 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4066 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4067 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4068 unshare_expr (skip)));
4070 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4071 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4072 unshare_expr (args));
4074 gimplify_assign (addr, tmp, pre_p);
4076 /* update VALIST.__args */
4077 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4078 gimplify_assign (unshare_expr (args), tmp, pre_p);
4080 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4083 if (pass_by_reference_p)
4084 addr = build_va_arg_indirect_ref (addr);
4086 return build_va_arg_indirect_ref (addr);
4089 /* Save parameter registers starting with the register that corresponds
4090 to the first unnamed parameters. If the first unnamed parameter is
4091 in the stack then save no registers. Set pretend_args_size to the
4092 amount of space needed to save the registers. */
4094 spu_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
4095 tree type, int *pretend_size, int no_rtl)
4102 int ncum = *get_cumulative_args (cum);
4104 /* cum currently points to the last named argument, we want to
4105 start at the next argument. */
4106 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4108 offset = -STACK_POINTER_OFFSET;
4109 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4111 tmp = gen_frame_mem (V4SImode,
4112 plus_constant (Pmode, virtual_incoming_args_rtx,
4114 emit_move_insn (tmp,
4115 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4118 *pretend_size = offset + STACK_POINTER_OFFSET;
4123 spu_conditional_register_usage (void)
4127 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4128 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4132 /* This is called any time we inspect the alignment of a register for
4135 reg_aligned_for_addr (rtx x)
4138 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4139 return REGNO_POINTER_ALIGN (regno) >= 128;
4142 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4143 into its SYMBOL_REF_FLAGS. */
4145 spu_encode_section_info (tree decl, rtx rtl, int first)
4147 default_encode_section_info (decl, rtl, first);
4149 /* If a variable has a forced alignment to < 16 bytes, mark it with
4150 SYMBOL_FLAG_ALIGN1. */
4151 if (TREE_CODE (decl) == VAR_DECL
4152 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4153 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4156 /* Return TRUE if we are certain the mem refers to a complete object
4157 which is both 16-byte aligned and padded to a 16-byte boundary. This
4158 would make it safe to store with a single instruction.
4159 We guarantee the alignment and padding for static objects by aligning
4160 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4161 FIXME: We currently cannot guarantee this for objects on the stack
4162 because assign_parm_setup_stack calls assign_stack_local with the
4163 alignment of the parameter mode and in that case the alignment never
4164 gets adjusted by LOCAL_ALIGNMENT. */
4166 store_with_one_insn_p (rtx mem)
4168 enum machine_mode mode = GET_MODE (mem);
4169 rtx addr = XEXP (mem, 0);
4170 if (mode == BLKmode)
4172 if (GET_MODE_SIZE (mode) >= 16)
4174 /* Only static objects. */
4175 if (GET_CODE (addr) == SYMBOL_REF)
4177 /* We use the associated declaration to make sure the access is
4178 referring to the whole object.
4179 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4180 if it is necessary. Will there be cases where one exists, and
4181 the other does not? Will there be cases where both exist, but
4182 have different types? */
4183 tree decl = MEM_EXPR (mem);
4185 && TREE_CODE (decl) == VAR_DECL
4186 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4188 decl = SYMBOL_REF_DECL (addr);
4190 && TREE_CODE (decl) == VAR_DECL
4191 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4197 /* Return 1 when the address is not valid for a simple load and store as
4198 required by the '_mov*' patterns. We could make this less strict
4199 for loads, but we prefer mem's to look the same so they are more
4200 likely to be merged. */
4202 address_needs_split (rtx mem)
4204 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4205 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4206 || !(store_with_one_insn_p (mem)
4207 || mem_is_padded_component_ref (mem))))
4213 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4214 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4215 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4217 /* MEM is known to be an __ea qualified memory access. Emit a call to
4218 fetch the ppu memory to local store, and return its address in local
4222 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4226 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4227 if (!cache_fetch_dirty)
4228 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4229 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4230 2, ea_addr, EAmode, ndirty, SImode);
4235 cache_fetch = init_one_libfunc ("__cache_fetch");
4236 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4237 1, ea_addr, EAmode);
4241 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4242 dirty bit marking, inline.
4244 The cache control data structure is an array of
4246 struct __cache_tag_array
4248 unsigned int tag_lo[4];
4249 unsigned int tag_hi[4];
4250 void *data_pointer[4];
4252 vector unsigned short dirty_bits[4];
4256 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4260 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4261 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4262 rtx index_mask = gen_reg_rtx (SImode);
4263 rtx tag_arr = gen_reg_rtx (Pmode);
4264 rtx splat_mask = gen_reg_rtx (TImode);
4265 rtx splat = gen_reg_rtx (V4SImode);
4266 rtx splat_hi = NULL_RTX;
4267 rtx tag_index = gen_reg_rtx (Pmode);
4268 rtx block_off = gen_reg_rtx (SImode);
4269 rtx tag_addr = gen_reg_rtx (Pmode);
4270 rtx tag = gen_reg_rtx (V4SImode);
4271 rtx cache_tag = gen_reg_rtx (V4SImode);
4272 rtx cache_tag_hi = NULL_RTX;
4273 rtx cache_ptrs = gen_reg_rtx (TImode);
4274 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4275 rtx tag_equal = gen_reg_rtx (V4SImode);
4276 rtx tag_equal_hi = NULL_RTX;
4277 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4278 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4279 rtx eq_index = gen_reg_rtx (SImode);
4280 rtx bcomp, hit_label, hit_ref, cont_label;
4283 if (spu_ea_model != 32)
4285 splat_hi = gen_reg_rtx (V4SImode);
4286 cache_tag_hi = gen_reg_rtx (V4SImode);
4287 tag_equal_hi = gen_reg_rtx (V4SImode);
4290 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4291 emit_move_insn (tag_arr, tag_arr_sym);
4292 v = 0x0001020300010203LL;
4293 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4294 ea_addr_si = ea_addr;
4295 if (spu_ea_model != 32)
4296 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4298 /* tag_index = ea_addr & (tag_array_size - 128) */
4299 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4301 /* splat ea_addr to all 4 slots. */
4302 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4303 /* Similarly for high 32 bits of ea_addr. */
4304 if (spu_ea_model != 32)
4305 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4307 /* block_off = ea_addr & 127 */
4308 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4310 /* tag_addr = tag_arr + tag_index */
4311 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4313 /* Read cache tags. */
4314 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4315 if (spu_ea_model != 32)
4316 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4317 plus_constant (Pmode,
4320 /* tag = ea_addr & -128 */
4321 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4323 /* Read all four cache data pointers. */
4324 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4325 plus_constant (Pmode,
4329 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4330 if (spu_ea_model != 32)
4332 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4333 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4336 /* At most one of the tags compare equal, so tag_equal has one
4337 32-bit slot set to all 1's, with the other slots all zero.
4338 gbb picks off low bit from each byte in the 128-bit registers,
4339 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4341 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4342 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4344 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4345 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4347 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4348 (rotating eq_index mod 16 bytes). */
4349 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4350 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4352 /* Add block offset to form final data address. */
4353 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4355 /* Check that we did hit. */
4356 hit_label = gen_label_rtx ();
4357 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4358 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4359 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4360 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4362 /* Say that this branch is very likely to happen. */
4363 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4364 add_int_reg_note (insn, REG_BR_PROB, v);
4366 ea_load_store (mem, is_store, ea_addr, data_addr);
4367 cont_label = gen_label_rtx ();
4368 emit_jump_insn (gen_jump (cont_label));
4371 emit_label (hit_label);
4376 rtx dirty_bits = gen_reg_rtx (TImode);
4377 rtx dirty_off = gen_reg_rtx (SImode);
4378 rtx dirty_128 = gen_reg_rtx (TImode);
4379 rtx neg_block_off = gen_reg_rtx (SImode);
4381 /* Set up mask with one dirty bit per byte of the mem we are
4382 writing, starting from top bit. */
4384 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4385 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4390 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4392 /* Form index into cache dirty_bits. eq_index is one of
4393 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4394 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4395 offset to each of the four dirty_bits elements. */
4396 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4398 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4400 /* Rotate bit mask to proper bit. */
4401 emit_insn (gen_negsi2 (neg_block_off, block_off));
4402 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4403 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4405 /* Or in the new dirty bits. */
4406 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4409 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4412 emit_label (cont_label);
4416 expand_ea_mem (rtx mem, bool is_store)
4419 rtx data_addr = gen_reg_rtx (Pmode);
4422 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4423 if (optimize_size || optimize == 0)
4424 ea_load_store (mem, is_store, ea_addr, data_addr);
4426 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4428 if (ea_alias_set == -1)
4429 ea_alias_set = new_alias_set ();
4431 /* We generate a new MEM RTX to refer to the copy of the data
4432 in the cache. We do not copy memory attributes (except the
4433 alignment) from the original MEM, as they may no longer apply
4434 to the cache copy. */
4435 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4436 set_mem_alias_set (new_mem, ea_alias_set);
4437 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4443 spu_expand_mov (rtx * ops, enum machine_mode mode)
4445 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4447 /* Perform the move in the destination SUBREG's inner mode. */
4448 ops[0] = SUBREG_REG (ops[0]);
4449 mode = GET_MODE (ops[0]);
4450 ops[1] = gen_lowpart_common (mode, ops[1]);
4451 gcc_assert (ops[1]);
4454 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4456 rtx from = SUBREG_REG (ops[1]);
4457 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4459 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4460 && GET_MODE_CLASS (imode) == MODE_INT
4461 && subreg_lowpart_p (ops[1]));
4463 if (GET_MODE_SIZE (imode) < 4)
4465 if (imode != GET_MODE (from))
4466 from = gen_rtx_SUBREG (imode, from, 0);
4468 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4470 enum insn_code icode = convert_optab_handler (trunc_optab,
4472 emit_insn (GEN_FCN (icode) (ops[0], from));
4475 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4479 /* At least one of the operands needs to be a register. */
4480 if ((reload_in_progress | reload_completed) == 0
4481 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4483 rtx temp = force_reg (mode, ops[1]);
4484 emit_move_insn (ops[0], temp);
4487 if (reload_in_progress || reload_completed)
4489 if (CONSTANT_P (ops[1]))
4490 return spu_split_immediate (ops);
4494 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4496 if (GET_CODE (ops[1]) == CONST_INT)
4498 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4499 if (val != INTVAL (ops[1]))
4501 emit_move_insn (ops[0], GEN_INT (val));
4507 if (MEM_ADDR_SPACE (ops[0]))
4508 ops[0] = expand_ea_mem (ops[0], true);
4509 return spu_split_store (ops);
4513 if (MEM_ADDR_SPACE (ops[1]))
4514 ops[1] = expand_ea_mem (ops[1], false);
4515 return spu_split_load (ops);
4522 spu_convert_move (rtx dst, rtx src)
4524 enum machine_mode mode = GET_MODE (dst);
4525 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4527 gcc_assert (GET_MODE (src) == TImode);
4528 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4529 emit_insn (gen_rtx_SET (VOIDmode, reg,
4530 gen_rtx_TRUNCATE (int_mode,
4531 gen_rtx_LSHIFTRT (TImode, src,
4532 GEN_INT (int_mode == DImode ? 64 : 96)))));
4533 if (int_mode != mode)
4535 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4536 emit_move_insn (dst, reg);
4540 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4541 the address from SRC and SRC+16. Return a REG or CONST_INT that
4542 specifies how many bytes to rotate the loaded registers, plus any
4543 extra from EXTRA_ROTQBY. The address and rotate amounts are
4544 normalized to improve merging of loads and rotate computations. */
4546 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4548 rtx addr = XEXP (src, 0);
4549 rtx p0, p1, rot, addr0, addr1;
4555 if (MEM_ALIGN (src) >= 128)
4556 /* Address is already aligned; simply perform a TImode load. */ ;
4557 else if (GET_CODE (addr) == PLUS)
4560 aligned reg + aligned reg => lqx
4561 aligned reg + unaligned reg => lqx, rotqby
4562 aligned reg + aligned const => lqd
4563 aligned reg + unaligned const => lqd, rotqbyi
4564 unaligned reg + aligned reg => lqx, rotqby
4565 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4566 unaligned reg + aligned const => lqd, rotqby
4567 unaligned reg + unaligned const -> not allowed by legitimate address
4569 p0 = XEXP (addr, 0);
4570 p1 = XEXP (addr, 1);
4571 if (!reg_aligned_for_addr (p0))
4573 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4575 rot = gen_reg_rtx (SImode);
4576 emit_insn (gen_addsi3 (rot, p0, p1));
4578 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4582 && INTVAL (p1) * BITS_PER_UNIT
4583 < REGNO_POINTER_ALIGN (REGNO (p0)))
4585 rot = gen_reg_rtx (SImode);
4586 emit_insn (gen_addsi3 (rot, p0, p1));
4591 rtx x = gen_reg_rtx (SImode);
4592 emit_move_insn (x, p1);
4593 if (!spu_arith_operand (p1, SImode))
4595 rot = gen_reg_rtx (SImode);
4596 emit_insn (gen_addsi3 (rot, p0, p1));
4597 addr = gen_rtx_PLUS (Pmode, p0, x);
4605 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4607 rot_amt = INTVAL (p1) & 15;
4608 if (INTVAL (p1) & -16)
4610 p1 = GEN_INT (INTVAL (p1) & -16);
4611 addr = gen_rtx_PLUS (SImode, p0, p1);
4616 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4620 else if (REG_P (addr))
4622 if (!reg_aligned_for_addr (addr))
4625 else if (GET_CODE (addr) == CONST)
4627 if (GET_CODE (XEXP (addr, 0)) == PLUS
4628 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4629 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4631 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4633 addr = gen_rtx_CONST (Pmode,
4634 gen_rtx_PLUS (Pmode,
4635 XEXP (XEXP (addr, 0), 0),
4636 GEN_INT (rot_amt & -16)));
4638 addr = XEXP (XEXP (addr, 0), 0);
4642 rot = gen_reg_rtx (Pmode);
4643 emit_move_insn (rot, addr);
4646 else if (GET_CODE (addr) == CONST_INT)
4648 rot_amt = INTVAL (addr);
4649 addr = GEN_INT (rot_amt & -16);
4651 else if (!ALIGNED_SYMBOL_REF_P (addr))
4653 rot = gen_reg_rtx (Pmode);
4654 emit_move_insn (rot, addr);
4657 rot_amt += extra_rotby;
4663 rtx x = gen_reg_rtx (SImode);
4664 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4668 if (!rot && rot_amt)
4669 rot = GEN_INT (rot_amt);
4671 addr0 = copy_rtx (addr);
4672 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4673 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4677 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4678 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4679 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4686 spu_split_load (rtx * ops)
4688 enum machine_mode mode = GET_MODE (ops[0]);
4689 rtx addr, load, rot;
4692 if (GET_MODE_SIZE (mode) >= 16)
4695 addr = XEXP (ops[1], 0);
4696 gcc_assert (GET_CODE (addr) != AND);
4698 if (!address_needs_split (ops[1]))
4700 ops[1] = change_address (ops[1], TImode, addr);
4701 load = gen_reg_rtx (TImode);
4702 emit_insn (gen__movti (load, ops[1]));
4703 spu_convert_move (ops[0], load);
4707 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4709 load = gen_reg_rtx (TImode);
4710 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4713 emit_insn (gen_rotqby_ti (load, load, rot));
4715 spu_convert_move (ops[0], load);
4720 spu_split_store (rtx * ops)
4722 enum machine_mode mode = GET_MODE (ops[0]);
4724 rtx addr, p0, p1, p1_lo, smem;
4728 if (GET_MODE_SIZE (mode) >= 16)
4731 addr = XEXP (ops[0], 0);
4732 gcc_assert (GET_CODE (addr) != AND);
4734 if (!address_needs_split (ops[0]))
4736 reg = gen_reg_rtx (TImode);
4737 emit_insn (gen_spu_convert (reg, ops[1]));
4738 ops[0] = change_address (ops[0], TImode, addr);
4739 emit_move_insn (ops[0], reg);
4743 if (GET_CODE (addr) == PLUS)
4746 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4747 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4748 aligned reg + aligned const => lqd, c?d, shuf, stqx
4749 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4750 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4751 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4752 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4753 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4756 p0 = XEXP (addr, 0);
4757 p1 = p1_lo = XEXP (addr, 1);
4758 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4760 p1_lo = GEN_INT (INTVAL (p1) & 15);
4761 if (reg_aligned_for_addr (p0))
4763 p1 = GEN_INT (INTVAL (p1) & -16);
4764 if (p1 == const0_rtx)
4767 addr = gen_rtx_PLUS (SImode, p0, p1);
4771 rtx x = gen_reg_rtx (SImode);
4772 emit_move_insn (x, p1);
4773 addr = gen_rtx_PLUS (SImode, p0, x);
4777 else if (REG_P (addr))
4781 p1 = p1_lo = const0_rtx;
4786 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4787 p1 = 0; /* aform doesn't use p1 */
4789 if (ALIGNED_SYMBOL_REF_P (addr))
4791 else if (GET_CODE (addr) == CONST
4792 && GET_CODE (XEXP (addr, 0)) == PLUS
4793 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4794 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4796 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4798 addr = gen_rtx_CONST (Pmode,
4799 gen_rtx_PLUS (Pmode,
4800 XEXP (XEXP (addr, 0), 0),
4801 GEN_INT (v & -16)));
4803 addr = XEXP (XEXP (addr, 0), 0);
4804 p1_lo = GEN_INT (v & 15);
4806 else if (GET_CODE (addr) == CONST_INT)
4808 p1_lo = GEN_INT (INTVAL (addr) & 15);
4809 addr = GEN_INT (INTVAL (addr) & -16);
4813 p1_lo = gen_reg_rtx (SImode);
4814 emit_move_insn (p1_lo, addr);
4818 gcc_assert (aform == 0 || aform == 1);
4819 reg = gen_reg_rtx (TImode);
4821 scalar = store_with_one_insn_p (ops[0]);
4824 /* We could copy the flags from the ops[0] MEM to mem here,
4825 We don't because we want this load to be optimized away if
4826 possible, and copying the flags will prevent that in certain
4827 cases, e.g. consider the volatile flag. */
4829 rtx pat = gen_reg_rtx (TImode);
4830 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4831 set_mem_alias_set (lmem, 0);
4832 emit_insn (gen_movti (reg, lmem));
4834 if (!p0 || reg_aligned_for_addr (p0))
4835 p0 = stack_pointer_rtx;
4839 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4840 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4844 if (GET_CODE (ops[1]) == REG)
4845 emit_insn (gen_spu_convert (reg, ops[1]));
4846 else if (GET_CODE (ops[1]) == SUBREG)
4847 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4852 if (GET_MODE_SIZE (mode) < 4 && scalar)
4853 emit_insn (gen_ashlti3
4854 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4856 smem = change_address (ops[0], TImode, copy_rtx (addr));
4857 /* We can't use the previous alias set because the memory has changed
4858 size and can potentially overlap objects of other types. */
4859 set_mem_alias_set (smem, 0);
4861 emit_insn (gen_movti (smem, reg));
4865 /* Return TRUE if X is MEM which is a struct member reference
4866 and the member can safely be loaded and stored with a single
4867 instruction because it is padded. */
4869 mem_is_padded_component_ref (rtx x)
4871 tree t = MEM_EXPR (x);
4873 if (!t || TREE_CODE (t) != COMPONENT_REF)
4875 t = TREE_OPERAND (t, 1);
4876 if (!t || TREE_CODE (t) != FIELD_DECL
4877 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4879 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4880 r = DECL_FIELD_CONTEXT (t);
4881 if (!r || TREE_CODE (r) != RECORD_TYPE)
4883 /* Make sure they are the same mode */
4884 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4886 /* If there are no following fields then the field alignment assures
4887 the structure is padded to the alignment which means this field is
4889 if (TREE_CHAIN (t) == 0)
4891 /* If the following field is also aligned then this field will be
4894 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4899 /* Parse the -mfixed-range= option string. */
4901 fix_range (const char *const_str)
4904 char *str, *dash, *comma;
4906 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4907 REG2 are either register names or register numbers. The effect
4908 of this option is to mark the registers in the range from REG1 to
4909 REG2 as ``fixed'' so they won't be used by the compiler. */
4911 i = strlen (const_str);
4912 str = (char *) alloca (i + 1);
4913 memcpy (str, const_str, i + 1);
4917 dash = strchr (str, '-');
4920 warning (0, "value of -mfixed-range must have form REG1-REG2");
4924 comma = strchr (dash + 1, ',');
4928 first = decode_reg_name (str);
4931 warning (0, "unknown register name: %s", str);
4935 last = decode_reg_name (dash + 1);
4938 warning (0, "unknown register name: %s", dash + 1);
4946 warning (0, "%s-%s is an empty range", str, dash + 1);
4950 for (i = first; i <= last; ++i)
4951 fixed_regs[i] = call_used_regs[i] = 1;
4961 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4962 can be generated using the fsmbi instruction. */
4964 fsmbi_const_p (rtx x)
4968 /* We can always choose TImode for CONST_INT because the high bits
4969 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4970 enum immediate_class c = classify_immediate (x, TImode);
4971 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4976 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4977 can be generated using the cbd, chd, cwd or cdd instruction. */
4979 cpat_const_p (rtx x, enum machine_mode mode)
4983 enum immediate_class c = classify_immediate (x, mode);
4984 return c == IC_CPAT;
4990 gen_cpat_const (rtx * ops)
4992 unsigned char dst[16];
4993 int i, offset, shift, isize;
4994 if (GET_CODE (ops[3]) != CONST_INT
4995 || GET_CODE (ops[2]) != CONST_INT
4996 || (GET_CODE (ops[1]) != CONST_INT
4997 && GET_CODE (ops[1]) != REG))
4999 if (GET_CODE (ops[1]) == REG
5000 && (!REG_POINTER (ops[1])
5001 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5004 for (i = 0; i < 16; i++)
5006 isize = INTVAL (ops[3]);
5009 else if (isize == 2)
5013 offset = (INTVAL (ops[2]) +
5014 (GET_CODE (ops[1]) ==
5015 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5016 for (i = 0; i < isize; i++)
5017 dst[offset + i] = i + shift;
5018 return array_to_constant (TImode, dst);
5021 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5022 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5023 than 16 bytes, the value is repeated across the rest of the array. */
5025 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5030 memset (arr, 0, 16);
5031 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5032 if (GET_CODE (x) == CONST_INT
5033 || (GET_CODE (x) == CONST_DOUBLE
5034 && (mode == SFmode || mode == DFmode)))
5036 gcc_assert (mode != VOIDmode && mode != BLKmode);
5038 if (GET_CODE (x) == CONST_DOUBLE)
5039 val = const_double_to_hwint (x);
5042 first = GET_MODE_SIZE (mode) - 1;
5043 for (i = first; i >= 0; i--)
5045 arr[i] = val & 0xff;
5048 /* Splat the constant across the whole array. */
5049 for (j = 0, i = first + 1; i < 16; i++)
5052 j = (j == first) ? 0 : j + 1;
5055 else if (GET_CODE (x) == CONST_DOUBLE)
5057 val = CONST_DOUBLE_LOW (x);
5058 for (i = 15; i >= 8; i--)
5060 arr[i] = val & 0xff;
5063 val = CONST_DOUBLE_HIGH (x);
5064 for (i = 7; i >= 0; i--)
5066 arr[i] = val & 0xff;
5070 else if (GET_CODE (x) == CONST_VECTOR)
5074 mode = GET_MODE_INNER (mode);
5075 units = CONST_VECTOR_NUNITS (x);
5076 for (i = 0; i < units; i++)
5078 elt = CONST_VECTOR_ELT (x, i);
5079 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5081 if (GET_CODE (elt) == CONST_DOUBLE)
5082 val = const_double_to_hwint (elt);
5085 first = GET_MODE_SIZE (mode) - 1;
5086 if (first + i * GET_MODE_SIZE (mode) > 16)
5088 for (j = first; j >= 0; j--)
5090 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5100 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5101 smaller than 16 bytes, use the bytes that would represent that value
5102 in a register, e.g., for QImode return the value of arr[3]. */
5104 array_to_constant (enum machine_mode mode, const unsigned char arr[16])
5106 enum machine_mode inner_mode;
5108 int units, size, i, j, k;
5111 if (GET_MODE_CLASS (mode) == MODE_INT
5112 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5114 j = GET_MODE_SIZE (mode);
5115 i = j < 4 ? 4 - j : 0;
5116 for (val = 0; i < j; i++)
5117 val = (val << 8) | arr[i];
5118 val = trunc_int_for_mode (val, mode);
5119 return GEN_INT (val);
5125 for (i = high = 0; i < 8; i++)
5126 high = (high << 8) | arr[i];
5127 for (i = 8, val = 0; i < 16; i++)
5128 val = (val << 8) | arr[i];
5129 return immed_double_const (val, high, TImode);
5133 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5134 val = trunc_int_for_mode (val, SImode);
5135 return hwint_to_const_double (SFmode, val);
5139 for (i = 0, val = 0; i < 8; i++)
5140 val = (val << 8) | arr[i];
5141 return hwint_to_const_double (DFmode, val);
5144 if (!VECTOR_MODE_P (mode))
5147 units = GET_MODE_NUNITS (mode);
5148 size = GET_MODE_UNIT_SIZE (mode);
5149 inner_mode = GET_MODE_INNER (mode);
5150 v = rtvec_alloc (units);
5152 for (k = i = 0; i < units; ++i)
5155 for (j = 0; j < size; j++, k++)
5156 val = (val << 8) | arr[k];
5158 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5159 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5161 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5166 return gen_rtx_CONST_VECTOR (mode, v);
5170 reloc_diagnostic (rtx x)
5173 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5176 if (GET_CODE (x) == SYMBOL_REF)
5177 decl = SYMBOL_REF_DECL (x);
5178 else if (GET_CODE (x) == CONST
5179 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5180 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5182 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5183 if (decl && !DECL_P (decl))
5186 /* The decl could be a string constant. */
5187 if (decl && DECL_P (decl))
5190 /* We use last_assemble_variable_decl to get line information. It's
5191 not always going to be right and might not even be close, but will
5192 be right for the more common cases. */
5193 if (!last_assemble_variable_decl || in_section == ctors_section)
5194 loc = DECL_SOURCE_LOCATION (decl);
5196 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5198 if (TARGET_WARN_RELOC)
5200 "creating run-time relocation for %qD", decl);
5203 "creating run-time relocation for %qD", decl);
5207 if (TARGET_WARN_RELOC)
5208 warning_at (input_location, 0, "creating run-time relocation");
5210 error_at (input_location, "creating run-time relocation");
5214 /* Hook into assemble_integer so we can generate an error for run-time
5215 relocations. The SPU ABI disallows them. */
5217 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5219 /* By default run-time relocations aren't supported, but we allow them
5220 in case users support it in their own run-time loader. And we provide
5221 a warning for those users that don't. */
5222 if ((GET_CODE (x) == SYMBOL_REF)
5223 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5224 reloc_diagnostic (x);
5226 return default_assemble_integer (x, size, aligned_p);
5230 spu_asm_globalize_label (FILE * file, const char *name)
5232 fputs ("\t.global\t", file);
5233 assemble_name (file, name);
5238 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
5239 int opno ATTRIBUTE_UNUSED, int *total,
5240 bool speed ATTRIBUTE_UNUSED)
5242 enum machine_mode mode = GET_MODE (x);
5243 int cost = COSTS_N_INSNS (2);
5245 /* Folding to a CONST_VECTOR will use extra space but there might
5246 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5247 only if it allows us to fold away multiple insns. Changing the cost
5248 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5249 because this cost will only be compared against a single insn.
5250 if (code == CONST_VECTOR)
5251 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5254 /* Use defaults for float operations. Not accurate but good enough. */
5257 *total = COSTS_N_INSNS (13);
5262 *total = COSTS_N_INSNS (6);
5268 if (satisfies_constraint_K (x))
5270 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5271 *total = COSTS_N_INSNS (1);
5273 *total = COSTS_N_INSNS (3);
5277 *total = COSTS_N_INSNS (3);
5282 *total = COSTS_N_INSNS (0);
5286 *total = COSTS_N_INSNS (5);
5290 case FLOAT_TRUNCATE:
5292 case UNSIGNED_FLOAT:
5295 *total = COSTS_N_INSNS (7);
5301 *total = COSTS_N_INSNS (9);
5308 GET_CODE (XEXP (x, 0)) ==
5309 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5310 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5312 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5314 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5315 cost = COSTS_N_INSNS (14);
5316 if ((val & 0xffff) == 0)
5317 cost = COSTS_N_INSNS (9);
5318 else if (val > 0 && val < 0x10000)
5319 cost = COSTS_N_INSNS (11);
5328 *total = COSTS_N_INSNS (20);
5335 *total = COSTS_N_INSNS (4);
5338 if (XINT (x, 1) == UNSPEC_CONVERT)
5339 *total = COSTS_N_INSNS (0);
5341 *total = COSTS_N_INSNS (4);
5344 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5345 if (GET_MODE_CLASS (mode) == MODE_INT
5346 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5347 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5348 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5353 static enum machine_mode
5354 spu_unwind_word_mode (void)
5359 /* Decide whether we can make a sibling call to a function. DECL is the
5360 declaration of the function being targeted by the call and EXP is the
5361 CALL_EXPR representing the call. */
5363 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5365 return decl && !TARGET_LARGE_MEM;
5368 /* We need to correctly update the back chain pointer and the Available
5369 Stack Size (which is in the second slot of the sp register.) */
5371 spu_allocate_stack (rtx op0, rtx op1)
5374 rtx chain = gen_reg_rtx (V4SImode);
5375 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5376 rtx sp = gen_reg_rtx (V4SImode);
5377 rtx splatted = gen_reg_rtx (V4SImode);
5378 rtx pat = gen_reg_rtx (TImode);
5380 /* copy the back chain so we can save it back again. */
5381 emit_move_insn (chain, stack_bot);
5383 op1 = force_reg (SImode, op1);
5385 v = 0x1020300010203ll;
5386 emit_move_insn (pat, immed_double_const (v, v, TImode));
5387 emit_insn (gen_shufb (splatted, op1, op1, pat));
5389 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5390 emit_insn (gen_subv4si3 (sp, sp, splatted));
5392 if (flag_stack_check)
5394 rtx avail = gen_reg_rtx(SImode);
5395 rtx result = gen_reg_rtx(SImode);
5396 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5397 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5398 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5401 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5403 emit_move_insn (stack_bot, chain);
5405 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5409 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5411 static unsigned char arr[16] =
5412 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5413 rtx temp = gen_reg_rtx (SImode);
5414 rtx temp2 = gen_reg_rtx (SImode);
5415 rtx temp3 = gen_reg_rtx (V4SImode);
5416 rtx temp4 = gen_reg_rtx (V4SImode);
5417 rtx pat = gen_reg_rtx (TImode);
5418 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5420 /* Restore the backchain from the first word, sp from the second. */
5421 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5422 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5424 emit_move_insn (pat, array_to_constant (TImode, arr));
5426 /* Compute Available Stack Size for sp */
5427 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5428 emit_insn (gen_shufb (temp3, temp, temp, pat));
5430 /* Compute Available Stack Size for back chain */
5431 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5432 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5433 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5435 emit_insn (gen_addv4si3 (sp, sp, temp3));
5436 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5440 spu_init_libfuncs (void)
5442 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5443 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5444 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5445 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5446 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5447 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5448 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5449 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5450 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5451 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5452 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5453 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5455 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5456 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5458 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5459 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5460 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5461 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5462 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5463 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5464 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5465 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5466 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5467 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5468 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5469 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5471 set_optab_libfunc (smul_optab, TImode, "__multi3");
5472 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5473 set_optab_libfunc (smod_optab, TImode, "__modti3");
5474 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5475 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5476 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5479 /* Make a subreg, stripping any existing subreg. We could possibly just
5480 call simplify_subreg, but in this case we know what we want. */
5482 spu_gen_subreg (enum machine_mode mode, rtx x)
5484 if (GET_CODE (x) == SUBREG)
5486 if (GET_MODE (x) == mode)
5488 return gen_rtx_SUBREG (mode, x, 0);
5492 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5494 return (TYPE_MODE (type) == BLKmode
5496 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5497 || int_size_in_bytes (type) >
5498 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5501 /* Create the built-in types and functions */
5503 enum spu_function_code
5505 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5506 #include "spu-builtins.def"
5511 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5513 struct spu_builtin_description spu_builtins[] = {
5514 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5515 {fcode, icode, name, type, params},
5516 #include "spu-builtins.def"
5520 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5522 /* Returns the spu builtin decl for CODE. */
5525 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5527 if (code >= NUM_SPU_BUILTINS)
5528 return error_mark_node;
5530 return spu_builtin_decls[code];
5535 spu_init_builtins (void)
5537 struct spu_builtin_description *d;
5540 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5541 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5542 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5543 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5544 V4SF_type_node = build_vector_type (float_type_node, 4);
5545 V2DF_type_node = build_vector_type (double_type_node, 2);
5547 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5548 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5549 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5550 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5552 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5554 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5555 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5556 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5557 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5558 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5559 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5560 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5561 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5562 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5563 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5564 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5565 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5567 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5568 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5569 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5570 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5571 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5572 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5573 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5574 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5576 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5577 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5579 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5581 spu_builtin_types[SPU_BTI_PTR] =
5582 build_pointer_type (build_qualified_type
5584 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5586 /* For each builtin we build a new prototype. The tree code will make
5587 sure nodes are shared. */
5588 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5591 char name[64]; /* build_function will make a copy. */
5597 /* Find last parm. */
5598 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5603 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5605 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5607 sprintf (name, "__builtin_%s", d->name);
5608 spu_builtin_decls[i] =
5609 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5610 if (d->fcode == SPU_MASK_FOR_LOAD)
5611 TREE_READONLY (spu_builtin_decls[i]) = 1;
5613 /* These builtins don't throw. */
5614 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5619 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5621 static unsigned char arr[16] =
5622 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5624 rtx temp = gen_reg_rtx (Pmode);
5625 rtx temp2 = gen_reg_rtx (V4SImode);
5626 rtx temp3 = gen_reg_rtx (V4SImode);
5627 rtx pat = gen_reg_rtx (TImode);
5628 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5630 emit_move_insn (pat, array_to_constant (TImode, arr));
5632 /* Restore the sp. */
5633 emit_move_insn (temp, op1);
5634 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5636 /* Compute available stack size for sp. */
5637 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5638 emit_insn (gen_shufb (temp3, temp, temp, pat));
5640 emit_insn (gen_addv4si3 (sp, sp, temp3));
5641 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5645 spu_safe_dma (HOST_WIDE_INT channel)
5647 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5651 spu_builtin_splats (rtx ops[])
5653 enum machine_mode mode = GET_MODE (ops[0]);
5654 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5656 unsigned char arr[16];
5657 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5658 emit_move_insn (ops[0], array_to_constant (mode, arr));
5662 rtx reg = gen_reg_rtx (TImode);
5664 if (GET_CODE (ops[1]) != REG
5665 && GET_CODE (ops[1]) != SUBREG)
5666 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5672 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5678 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5683 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5688 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5694 emit_move_insn (reg, shuf);
5695 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5700 spu_builtin_extract (rtx ops[])
5702 enum machine_mode mode;
5705 mode = GET_MODE (ops[1]);
5707 if (GET_CODE (ops[2]) == CONST_INT)
5712 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5715 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5718 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5721 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5724 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5727 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5735 from = spu_gen_subreg (TImode, ops[1]);
5736 rot = gen_reg_rtx (TImode);
5737 tmp = gen_reg_rtx (SImode);
5742 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5745 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5746 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5750 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5754 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5759 emit_insn (gen_rotqby_ti (rot, from, tmp));
5761 emit_insn (gen_spu_convert (ops[0], rot));
5765 spu_builtin_insert (rtx ops[])
5767 enum machine_mode mode = GET_MODE (ops[0]);
5768 enum machine_mode imode = GET_MODE_INNER (mode);
5769 rtx mask = gen_reg_rtx (TImode);
5772 if (GET_CODE (ops[3]) == CONST_INT)
5773 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5776 offset = gen_reg_rtx (SImode);
5777 emit_insn (gen_mulsi3
5778 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5781 (mask, stack_pointer_rtx, offset,
5782 GEN_INT (GET_MODE_SIZE (imode))));
5783 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5787 spu_builtin_promote (rtx ops[])
5789 enum machine_mode mode, imode;
5790 rtx rot, from, offset;
5793 mode = GET_MODE (ops[0]);
5794 imode = GET_MODE_INNER (mode);
5796 from = gen_reg_rtx (TImode);
5797 rot = spu_gen_subreg (TImode, ops[0]);
5799 emit_insn (gen_spu_convert (from, ops[1]));
5801 if (GET_CODE (ops[2]) == CONST_INT)
5803 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5804 if (GET_MODE_SIZE (imode) < 4)
5805 pos += 4 - GET_MODE_SIZE (imode);
5806 offset = GEN_INT (pos & 15);
5810 offset = gen_reg_rtx (SImode);
5814 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5817 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5818 emit_insn (gen_addsi3 (offset, offset, offset));
5822 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5823 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5827 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5833 emit_insn (gen_rotqby_ti (rot, from, offset));
5837 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5839 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5840 rtx shuf = gen_reg_rtx (V4SImode);
5841 rtx insn = gen_reg_rtx (V4SImode);
5846 fnaddr = force_reg (SImode, fnaddr);
5847 cxt = force_reg (SImode, cxt);
5849 if (TARGET_LARGE_MEM)
5851 rtx rotl = gen_reg_rtx (V4SImode);
5852 rtx mask = gen_reg_rtx (V4SImode);
5853 rtx bi = gen_reg_rtx (SImode);
5854 static unsigned char const shufa[16] = {
5855 2, 3, 0, 1, 18, 19, 16, 17,
5856 0, 1, 2, 3, 16, 17, 18, 19
5858 static unsigned char const insna[16] = {
5860 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5862 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5865 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5866 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5868 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5869 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5870 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5871 emit_insn (gen_selb (insn, insnc, rotl, mask));
5873 mem = adjust_address (m_tramp, V4SImode, 0);
5874 emit_move_insn (mem, insn);
5876 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5877 mem = adjust_address (m_tramp, Pmode, 16);
5878 emit_move_insn (mem, bi);
5882 rtx scxt = gen_reg_rtx (SImode);
5883 rtx sfnaddr = gen_reg_rtx (SImode);
5884 static unsigned char const insna[16] = {
5885 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5891 shufc = gen_reg_rtx (TImode);
5892 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5894 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5895 fits 18 bits and the last 4 are zeros. This will be true if
5896 the stack pointer is initialized to 0x3fff0 at program start,
5897 otherwise the ila instruction will be garbage. */
5899 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5900 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5902 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5903 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5904 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5906 mem = adjust_address (m_tramp, V4SImode, 0);
5907 emit_move_insn (mem, insn);
5909 emit_insn (gen_sync ());
5913 spu_warn_func_return (tree decl)
5915 /* Naked functions are implemented entirely in assembly, including the
5916 return sequence, so suppress warnings about this. */
5917 return !spu_naked_function_p (decl);
5921 spu_expand_sign_extend (rtx ops[])
5923 unsigned char arr[16];
5924 rtx pat = gen_reg_rtx (TImode);
5927 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5928 if (GET_MODE (ops[1]) == QImode)
5930 sign = gen_reg_rtx (HImode);
5931 emit_insn (gen_extendqihi2 (sign, ops[1]));
5932 for (i = 0; i < 16; i++)
5938 for (i = 0; i < 16; i++)
5940 switch (GET_MODE (ops[1]))
5943 sign = gen_reg_rtx (SImode);
5944 emit_insn (gen_extendhisi2 (sign, ops[1]));
5946 arr[last - 1] = 0x02;
5949 sign = gen_reg_rtx (SImode);
5950 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5951 for (i = 0; i < 4; i++)
5952 arr[last - i] = 3 - i;
5955 sign = gen_reg_rtx (SImode);
5956 c = gen_reg_rtx (SImode);
5957 emit_insn (gen_spu_convert (c, ops[1]));
5958 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5959 for (i = 0; i < 8; i++)
5960 arr[last - i] = 7 - i;
5966 emit_move_insn (pat, array_to_constant (TImode, arr));
5967 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5970 /* expand vector initialization. If there are any constant parts,
5971 load constant parts first. Then load any non-constant parts. */
5973 spu_expand_vector_init (rtx target, rtx vals)
5975 enum machine_mode mode = GET_MODE (target);
5976 int n_elts = GET_MODE_NUNITS (mode);
5978 bool all_same = true;
5979 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5982 first = XVECEXP (vals, 0, 0);
5983 for (i = 0; i < n_elts; ++i)
5985 x = XVECEXP (vals, 0, i);
5986 if (!(CONST_INT_P (x)
5987 || GET_CODE (x) == CONST_DOUBLE
5988 || GET_CODE (x) == CONST_FIXED))
5992 if (first_constant == NULL_RTX)
5995 if (i > 0 && !rtx_equal_p (x, first))
5999 /* if all elements are the same, use splats to repeat elements */
6002 if (!CONSTANT_P (first)
6003 && !register_operand (first, GET_MODE (x)))
6004 first = force_reg (GET_MODE (first), first);
6005 emit_insn (gen_spu_splats (target, first));
6009 /* load constant parts */
6010 if (n_var != n_elts)
6014 emit_move_insn (target,
6015 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6019 rtx constant_parts_rtx = copy_rtx (vals);
6021 gcc_assert (first_constant != NULL_RTX);
6022 /* fill empty slots with the first constant, this increases
6023 our chance of using splats in the recursive call below. */
6024 for (i = 0; i < n_elts; ++i)
6026 x = XVECEXP (constant_parts_rtx, 0, i);
6027 if (!(CONST_INT_P (x)
6028 || GET_CODE (x) == CONST_DOUBLE
6029 || GET_CODE (x) == CONST_FIXED))
6030 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6033 spu_expand_vector_init (target, constant_parts_rtx);
6037 /* load variable parts */
6040 rtx insert_operands[4];
6042 insert_operands[0] = target;
6043 insert_operands[2] = target;
6044 for (i = 0; i < n_elts; ++i)
6046 x = XVECEXP (vals, 0, i);
6047 if (!(CONST_INT_P (x)
6048 || GET_CODE (x) == CONST_DOUBLE
6049 || GET_CODE (x) == CONST_FIXED))
6051 if (!register_operand (x, GET_MODE (x)))
6052 x = force_reg (GET_MODE (x), x);
6053 insert_operands[1] = x;
6054 insert_operands[3] = GEN_INT (i);
6055 spu_builtin_insert (insert_operands);
6061 /* Return insn index for the vector compare instruction for given CODE,
6062 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6065 get_vec_cmp_insn (enum rtx_code code,
6066 enum machine_mode dest_mode,
6067 enum machine_mode op_mode)
6073 if (dest_mode == V16QImode && op_mode == V16QImode)
6074 return CODE_FOR_ceq_v16qi;
6075 if (dest_mode == V8HImode && op_mode == V8HImode)
6076 return CODE_FOR_ceq_v8hi;
6077 if (dest_mode == V4SImode && op_mode == V4SImode)
6078 return CODE_FOR_ceq_v4si;
6079 if (dest_mode == V4SImode && op_mode == V4SFmode)
6080 return CODE_FOR_ceq_v4sf;
6081 if (dest_mode == V2DImode && op_mode == V2DFmode)
6082 return CODE_FOR_ceq_v2df;
6085 if (dest_mode == V16QImode && op_mode == V16QImode)
6086 return CODE_FOR_cgt_v16qi;
6087 if (dest_mode == V8HImode && op_mode == V8HImode)
6088 return CODE_FOR_cgt_v8hi;
6089 if (dest_mode == V4SImode && op_mode == V4SImode)
6090 return CODE_FOR_cgt_v4si;
6091 if (dest_mode == V4SImode && op_mode == V4SFmode)
6092 return CODE_FOR_cgt_v4sf;
6093 if (dest_mode == V2DImode && op_mode == V2DFmode)
6094 return CODE_FOR_cgt_v2df;
6097 if (dest_mode == V16QImode && op_mode == V16QImode)
6098 return CODE_FOR_clgt_v16qi;
6099 if (dest_mode == V8HImode && op_mode == V8HImode)
6100 return CODE_FOR_clgt_v8hi;
6101 if (dest_mode == V4SImode && op_mode == V4SImode)
6102 return CODE_FOR_clgt_v4si;
6110 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6111 DMODE is expected destination mode. This is a recursive function. */
6114 spu_emit_vector_compare (enum rtx_code rcode,
6116 enum machine_mode dmode)
6120 enum machine_mode dest_mode;
6121 enum machine_mode op_mode = GET_MODE (op1);
6123 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6125 /* Floating point vector compare instructions uses destination V4SImode.
6126 Double floating point vector compare instructions uses destination V2DImode.
6127 Move destination to appropriate mode later. */
6128 if (dmode == V4SFmode)
6129 dest_mode = V4SImode;
6130 else if (dmode == V2DFmode)
6131 dest_mode = V2DImode;
6135 mask = gen_reg_rtx (dest_mode);
6136 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6138 if (vec_cmp_insn == -1)
6140 bool swap_operands = false;
6141 bool try_again = false;
6146 swap_operands = true;
6151 swap_operands = true;
6161 /* Treat A != B as ~(A==B). */
6163 enum rtx_code rev_code;
6164 enum insn_code nor_code;
6167 rev_code = reverse_condition_maybe_unordered (rcode);
6168 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6170 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6171 gcc_assert (nor_code != CODE_FOR_nothing);
6172 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6173 if (dmode != dest_mode)
6175 rtx temp = gen_reg_rtx (dest_mode);
6176 convert_move (temp, mask, 0);
6186 /* Try GT/GTU/LT/LTU OR EQ */
6189 enum insn_code ior_code;
6190 enum rtx_code new_code;
6194 case GE: new_code = GT; break;
6195 case GEU: new_code = GTU; break;
6196 case LE: new_code = LT; break;
6197 case LEU: new_code = LTU; break;
6202 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6203 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6205 ior_code = optab_handler (ior_optab, dest_mode);
6206 gcc_assert (ior_code != CODE_FOR_nothing);
6207 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6208 if (dmode != dest_mode)
6210 rtx temp = gen_reg_rtx (dest_mode);
6211 convert_move (temp, mask, 0);
6221 enum insn_code ior_code;
6223 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6224 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6226 ior_code = optab_handler (ior_optab, dest_mode);
6227 gcc_assert (ior_code != CODE_FOR_nothing);
6228 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6229 if (dmode != dest_mode)
6231 rtx temp = gen_reg_rtx (dest_mode);
6232 convert_move (temp, mask, 0);
6239 /* Implement as (A==A) & (B==B) */
6242 enum insn_code and_code;
6244 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6245 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6247 and_code = optab_handler (and_optab, dest_mode);
6248 gcc_assert (and_code != CODE_FOR_nothing);
6249 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6250 if (dmode != dest_mode)
6252 rtx temp = gen_reg_rtx (dest_mode);
6253 convert_move (temp, mask, 0);
6263 /* You only get two chances. */
6265 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6267 gcc_assert (vec_cmp_insn != -1);
6278 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6279 if (dmode != dest_mode)
6281 rtx temp = gen_reg_rtx (dest_mode);
6282 convert_move (temp, mask, 0);
6289 /* Emit vector conditional expression.
6290 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6291 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6294 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6295 rtx cond, rtx cc_op0, rtx cc_op1)
6297 enum machine_mode dest_mode = GET_MODE (dest);
6298 enum rtx_code rcode = GET_CODE (cond);
6301 /* Get the vector mask for the given relational operations. */
6302 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6304 emit_insn(gen_selb (dest, op2, op1, mask));
6310 spu_force_reg (enum machine_mode mode, rtx op)
6313 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6315 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6316 || GET_MODE (op) == BLKmode)
6317 return force_reg (mode, convert_to_mode (mode, op, 0));
6321 r = force_reg (GET_MODE (op), op);
6322 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6324 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6329 x = gen_reg_rtx (mode);
6330 emit_insn (gen_spu_convert (x, r));
6335 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6337 HOST_WIDE_INT v = 0;
6339 /* Check the range of immediate operands. */
6340 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6342 int range = p - SPU_BTI_7;
6344 if (!CONSTANT_P (op))
6345 error ("%s expects an integer literal in the range [%d, %d]",
6347 spu_builtin_range[range].low, spu_builtin_range[range].high);
6349 if (GET_CODE (op) == CONST
6350 && (GET_CODE (XEXP (op, 0)) == PLUS
6351 || GET_CODE (XEXP (op, 0)) == MINUS))
6353 v = INTVAL (XEXP (XEXP (op, 0), 1));
6354 op = XEXP (XEXP (op, 0), 0);
6356 else if (GET_CODE (op) == CONST_INT)
6358 else if (GET_CODE (op) == CONST_VECTOR
6359 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6360 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6362 /* The default for v is 0 which is valid in every range. */
6363 if (v < spu_builtin_range[range].low
6364 || v > spu_builtin_range[range].high)
6365 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6367 spu_builtin_range[range].low, spu_builtin_range[range].high,
6376 /* This is only used in lqa, and stqa. Even though the insns
6377 encode 16 bits of the address (all but the 2 least
6378 significant), only 14 bits are used because it is masked to
6379 be 16 byte aligned. */
6383 /* This is used for lqr and stqr. */
6390 if (GET_CODE (op) == LABEL_REF
6391 || (GET_CODE (op) == SYMBOL_REF
6392 && SYMBOL_REF_FUNCTION_P (op))
6393 || (v & ((1 << lsbits) - 1)) != 0)
6394 warning (0, "%d least significant bits of %s are ignored", lsbits,
6401 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6402 rtx target, rtx ops[])
6404 enum insn_code icode = (enum insn_code) d->icode;
6407 /* Expand the arguments into rtl. */
6409 if (d->parm[0] != SPU_BTI_VOID)
6412 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6414 tree arg = CALL_EXPR_ARG (exp, a);
6417 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6420 gcc_assert (i == insn_data[icode].n_generator_args);
6425 spu_expand_builtin_1 (struct spu_builtin_description *d,
6426 tree exp, rtx target)
6430 enum insn_code icode = (enum insn_code) d->icode;
6431 enum machine_mode mode, tmode;
6436 /* Set up ops[] with values from arglist. */
6437 n_operands = expand_builtin_args (d, exp, target, ops);
6439 /* Handle the target operand which must be operand 0. */
6441 if (d->parm[0] != SPU_BTI_VOID)
6444 /* We prefer the mode specified for the match_operand otherwise
6445 use the mode from the builtin function prototype. */
6446 tmode = insn_data[d->icode].operand[0].mode;
6447 if (tmode == VOIDmode)
6448 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6450 /* Try to use target because not using it can lead to extra copies
6451 and when we are using all of the registers extra copies leads
6453 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6456 target = ops[0] = gen_reg_rtx (tmode);
6458 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6464 if (d->fcode == SPU_MASK_FOR_LOAD)
6466 enum machine_mode mode = insn_data[icode].operand[1].mode;
6471 arg = CALL_EXPR_ARG (exp, 0);
6472 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6473 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6474 addr = memory_address (mode, op);
6477 op = gen_reg_rtx (GET_MODE (addr));
6478 emit_insn (gen_rtx_SET (VOIDmode, op,
6479 gen_rtx_NEG (GET_MODE (addr), addr)));
6480 op = gen_rtx_MEM (mode, op);
6482 pat = GEN_FCN (icode) (target, op);
6489 /* Ignore align_hint, but still expand it's args in case they have
6491 if (icode == CODE_FOR_spu_align_hint)
6494 /* Handle the rest of the operands. */
6495 for (p = 1; i < n_operands; i++, p++)
6497 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6498 mode = insn_data[d->icode].operand[i].mode;
6500 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6502 /* mode can be VOIDmode here for labels */
6504 /* For specific intrinsics with an immediate operand, e.g.,
6505 si_ai(), we sometimes need to convert the scalar argument to a
6506 vector argument by splatting the scalar. */
6507 if (VECTOR_MODE_P (mode)
6508 && (GET_CODE (ops[i]) == CONST_INT
6509 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6510 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6512 if (GET_CODE (ops[i]) == CONST_INT)
6513 ops[i] = spu_const (mode, INTVAL (ops[i]));
6516 rtx reg = gen_reg_rtx (mode);
6517 enum machine_mode imode = GET_MODE_INNER (mode);
6518 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6519 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6520 if (imode != GET_MODE (ops[i]))
6521 ops[i] = convert_to_mode (imode, ops[i],
6522 TYPE_UNSIGNED (spu_builtin_types
6524 emit_insn (gen_spu_splats (reg, ops[i]));
6529 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6531 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6532 ops[i] = spu_force_reg (mode, ops[i]);
6538 pat = GEN_FCN (icode) (0);
6541 pat = GEN_FCN (icode) (ops[0]);
6544 pat = GEN_FCN (icode) (ops[0], ops[1]);
6547 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6550 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6553 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6556 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6565 if (d->type == B_CALL || d->type == B_BISLED)
6566 emit_call_insn (pat);
6567 else if (d->type == B_JUMP)
6569 emit_jump_insn (pat);
6575 return_type = spu_builtin_types[d->parm[0]];
6576 if (d->parm[0] != SPU_BTI_VOID
6577 && GET_MODE (target) != TYPE_MODE (return_type))
6579 /* target is the return value. It should always be the mode of
6580 the builtin function prototype. */
6581 target = spu_force_reg (TYPE_MODE (return_type), target);
6588 spu_expand_builtin (tree exp,
6590 rtx subtarget ATTRIBUTE_UNUSED,
6591 enum machine_mode mode ATTRIBUTE_UNUSED,
6592 int ignore ATTRIBUTE_UNUSED)
6594 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6595 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6596 struct spu_builtin_description *d;
6598 if (fcode < NUM_SPU_BUILTINS)
6600 d = &spu_builtins[fcode];
6602 return spu_expand_builtin_1 (d, exp, target);
6607 /* Implement targetm.vectorize.builtin_mask_for_load. */
6609 spu_builtin_mask_for_load (void)
6611 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6614 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6616 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6618 int misalign ATTRIBUTE_UNUSED)
6622 switch (type_of_cost)
6630 case cond_branch_not_taken:
6632 case vec_promote_demote:
6639 /* Load + rotate. */
6642 case unaligned_load:
6645 case cond_branch_taken:
6649 elements = TYPE_VECTOR_SUBPARTS (vectype);
6650 return elements / 2 + 1;
6657 /* Implement targetm.vectorize.init_cost. */
6660 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6662 unsigned *cost = XNEWVEC (unsigned, 3);
6663 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6667 /* Implement targetm.vectorize.add_stmt_cost. */
6670 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6671 struct _stmt_vec_info *stmt_info, int misalign,
6672 enum vect_cost_model_location where)
6674 unsigned *cost = (unsigned *) data;
6675 unsigned retval = 0;
6677 if (flag_vect_cost_model)
6679 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6680 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6682 /* Statements in an inner loop relative to the loop being
6683 vectorized are weighted more heavily. The value here is
6684 arbitrary and could potentially be improved with analysis. */
6685 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6686 count *= 50; /* FIXME. */
6688 retval = (unsigned) (count * stmt_cost);
6689 cost[where] += retval;
6695 /* Implement targetm.vectorize.finish_cost. */
6698 spu_finish_cost (void *data, unsigned *prologue_cost,
6699 unsigned *body_cost, unsigned *epilogue_cost)
6701 unsigned *cost = (unsigned *) data;
6702 *prologue_cost = cost[vect_prologue];
6703 *body_cost = cost[vect_body];
6704 *epilogue_cost = cost[vect_epilogue];
6707 /* Implement targetm.vectorize.destroy_cost_data. */
6710 spu_destroy_cost_data (void *data)
6715 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6716 after applying N number of iterations. This routine does not determine
6717 how may iterations are required to reach desired alignment. */
6720 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6725 /* All other types are naturally aligned. */
6729 /* Return the appropriate mode for a named address pointer. */
6730 static enum machine_mode
6731 spu_addr_space_pointer_mode (addr_space_t addrspace)
6735 case ADDR_SPACE_GENERIC:
6744 /* Return the appropriate mode for a named address address. */
6745 static enum machine_mode
6746 spu_addr_space_address_mode (addr_space_t addrspace)
6750 case ADDR_SPACE_GENERIC:
6759 /* Determine if one named address space is a subset of another. */
6762 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6764 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6765 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6767 if (subset == superset)
6770 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6771 being subsets but instead as disjoint address spaces. */
6772 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6776 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6779 /* Convert from one address space to another. */
6781 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6783 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6784 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6786 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6787 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6789 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6793 ls = gen_const_mem (DImode,
6794 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6795 set_mem_align (ls, 128);
6797 result = gen_reg_rtx (Pmode);
6798 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6799 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6800 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6801 ls, const0_rtx, Pmode, 1);
6803 emit_insn (gen_subsi3 (result, op, ls));
6808 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6812 ls = gen_const_mem (DImode,
6813 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6814 set_mem_align (ls, 128);
6816 result = gen_reg_rtx (EAmode);
6817 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6818 op = force_reg (Pmode, op);
6819 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6820 ls, const0_rtx, EAmode, 1);
6821 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6823 if (EAmode == SImode)
6824 emit_insn (gen_addsi3 (result, op, ls));
6826 emit_insn (gen_adddi3 (result, op, ls));
6836 /* Count the total number of instructions in each pipe and return the
6837 maximum, which is used as the Minimum Iteration Interval (MII)
6838 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6839 -2 are instructions that can go in pipe0 or pipe1. */
6841 spu_sms_res_mii (struct ddg *g)
6844 unsigned t[4] = {0, 0, 0, 0};
6846 for (i = 0; i < g->num_nodes; i++)
6848 rtx_insn *insn = g->nodes[i].insn;
6849 int p = get_pipe (insn) + 2;
6851 gcc_assert (p >= 0);
6855 if (dump_file && INSN_P (insn))
6856 fprintf (dump_file, "i%d %s %d %d\n",
6858 insn_data[INSN_CODE(insn)].name,
6862 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6864 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6869 spu_init_expanders (void)
6874 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6875 frame_pointer_needed is true. We don't know that until we're
6876 expanding the prologue. */
6877 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6879 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6880 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6881 to be treated as aligned, so generate them here. */
6882 r0 = gen_reg_rtx (SImode);
6883 r1 = gen_reg_rtx (SImode);
6884 mark_reg_pointer (r0, 128);
6885 mark_reg_pointer (r1, 128);
6886 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6887 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6891 static enum machine_mode
6892 spu_libgcc_cmp_return_mode (void)
6895 /* For SPU word mode is TI mode so it is better to use SImode
6896 for compare returns. */
6900 static enum machine_mode
6901 spu_libgcc_shift_count_mode (void)
6903 /* For SPU word mode is TI mode so it is better to use SImode
6904 for shift counts. */
6908 /* Implement targetm.section_type_flags. */
6910 spu_section_type_flags (tree decl, const char *name, int reloc)
6912 /* .toe needs to have type @nobits. */
6913 if (strcmp (name, ".toe") == 0)
6915 /* Don't load _ea into the current address space. */
6916 if (strcmp (name, "._ea") == 0)
6917 return SECTION_WRITE | SECTION_DEBUG;
6918 return default_section_type_flags (decl, name, reloc);
6921 /* Implement targetm.select_section. */
6923 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6925 /* Variables and constants defined in the __ea address space
6926 go into a special section named "._ea". */
6927 if (TREE_TYPE (decl) != error_mark_node
6928 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6930 /* We might get called with string constants, but get_named_section
6931 doesn't like them as they are not DECLs. Also, we need to set
6932 flags in that case. */
6934 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6936 return get_named_section (decl, "._ea", reloc);
6939 return default_elf_select_section (decl, reloc, align);
6942 /* Implement targetm.unique_section. */
6944 spu_unique_section (tree decl, int reloc)
6946 /* We don't support unique section names in the __ea address
6948 if (TREE_TYPE (decl) != error_mark_node
6949 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6952 default_unique_section (decl, reloc);
6955 /* Generate a constant or register which contains 2^SCALE. We assume
6956 the result is valid for MODE. Currently, MODE must be V4SFmode and
6957 SCALE must be SImode. */
6959 spu_gen_exp2 (enum machine_mode mode, rtx scale)
6961 gcc_assert (mode == V4SFmode);
6962 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6963 if (GET_CODE (scale) != CONST_INT)
6965 /* unsigned int exp = (127 + scale) << 23;
6966 __vector float m = (__vector float) spu_splats (exp); */
6967 rtx reg = force_reg (SImode, scale);
6968 rtx exp = gen_reg_rtx (SImode);
6969 rtx mul = gen_reg_rtx (mode);
6970 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6971 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6972 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6977 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6978 unsigned char arr[16];
6979 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6980 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6981 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6982 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6983 return array_to_constant (mode, arr);
6987 /* After reload, just change the convert into a move instruction
6988 or a dead instruction. */
6990 spu_split_convert (rtx ops[])
6992 if (REGNO (ops[0]) == REGNO (ops[1]))
6993 emit_note (NOTE_INSN_DELETED);
6996 /* Use TImode always as this might help hard reg copyprop. */
6997 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6998 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6999 emit_insn (gen_move_insn (op0, op1));
7004 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
7006 fprintf (file, "# profile\n");
7007 fprintf (file, "brsl $75, _mcount\n");
7010 /* Implement targetm.ref_may_alias_errno. */
7012 spu_ref_may_alias_errno (ao_ref *ref)
7014 tree base = ao_ref_base (ref);
7016 /* With SPU newlib, errno is defined as something like
7018 The default implementation of this target macro does not
7019 recognize such expressions, so special-code for it here. */
7021 if (TREE_CODE (base) == VAR_DECL
7022 && !TREE_STATIC (base)
7023 && DECL_EXTERNAL (base)
7024 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7025 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7026 "_impure_data") == 0
7027 /* _errno is the first member of _impure_data. */
7028 && ref->offset == 0)
7031 return default_ref_may_alias_errno (ref);
7034 /* Output thunk to FILE that implements a C++ virtual function call (with
7035 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7036 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7037 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7038 relative to the resulting this pointer. */
7041 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7042 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7047 /* Make sure unwind info is emitted for the thunk if needed. */
7048 final_start_function (emit_barrier (), file, 1);
7050 /* Operand 0 is the target function. */
7051 op[0] = XEXP (DECL_RTL (function), 0);
7053 /* Operand 1 is the 'this' pointer. */
7054 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7055 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7057 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7059 /* Operands 2/3 are the low/high halfwords of delta. */
7060 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7061 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7063 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7064 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7065 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7067 /* Operands 6/7 are temporary registers. */
7068 op[6] = gen_rtx_REG (Pmode, 79);
7069 op[7] = gen_rtx_REG (Pmode, 78);
7071 /* Add DELTA to this pointer. */
7074 if (delta >= -0x200 && delta < 0x200)
7075 output_asm_insn ("ai\t%1,%1,%2", op);
7076 else if (delta >= -0x8000 && delta < 0x8000)
7078 output_asm_insn ("il\t%6,%2", op);
7079 output_asm_insn ("a\t%1,%1,%6", op);
7083 output_asm_insn ("ilhu\t%6,%3", op);
7084 output_asm_insn ("iohl\t%6,%2", op);
7085 output_asm_insn ("a\t%1,%1,%6", op);
7089 /* Perform vcall adjustment. */
7092 output_asm_insn ("lqd\t%7,0(%1)", op);
7093 output_asm_insn ("rotqby\t%7,%7,%1", op);
7095 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7096 output_asm_insn ("ai\t%7,%7,%4", op);
7097 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7099 output_asm_insn ("il\t%6,%4", op);
7100 output_asm_insn ("a\t%7,%7,%6", op);
7104 output_asm_insn ("ilhu\t%6,%5", op);
7105 output_asm_insn ("iohl\t%6,%4", op);
7106 output_asm_insn ("a\t%7,%7,%6", op);
7109 output_asm_insn ("lqd\t%6,0(%7)", op);
7110 output_asm_insn ("rotqby\t%6,%6,%7", op);
7111 output_asm_insn ("a\t%1,%1,%6", op);
7114 /* Jump to target. */
7115 output_asm_insn ("br\t%0", op);
7117 final_end_function ();
7120 /* Canonicalize a comparison from one we don't have to one we do have. */
7122 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7123 bool op0_preserve_value)
7125 if (!op0_preserve_value
7126 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7131 *code = (int)swap_condition ((enum rtx_code)*code);
7135 /* Table of machine attributes. */
7136 static const struct attribute_spec spu_attribute_table[] =
7138 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7139 affects_type_identity } */
7140 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7142 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7144 { NULL, 0, 0, false, false, false, NULL, false }
7147 /* TARGET overrides. */
7149 #undef TARGET_ADDR_SPACE_POINTER_MODE
7150 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7152 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7153 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7155 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7156 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7157 spu_addr_space_legitimate_address_p
7159 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7160 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7162 #undef TARGET_ADDR_SPACE_SUBSET_P
7163 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7165 #undef TARGET_ADDR_SPACE_CONVERT
7166 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7168 #undef TARGET_INIT_BUILTINS
7169 #define TARGET_INIT_BUILTINS spu_init_builtins
7170 #undef TARGET_BUILTIN_DECL
7171 #define TARGET_BUILTIN_DECL spu_builtin_decl
7173 #undef TARGET_EXPAND_BUILTIN
7174 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7176 #undef TARGET_UNWIND_WORD_MODE
7177 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7179 #undef TARGET_LEGITIMIZE_ADDRESS
7180 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7182 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7183 and .quad for the debugger. When it is known that the assembler is fixed,
7184 these can be removed. */
7185 #undef TARGET_ASM_UNALIGNED_SI_OP
7186 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7188 #undef TARGET_ASM_ALIGNED_DI_OP
7189 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7191 /* The .8byte directive doesn't seem to work well for a 32 bit
7193 #undef TARGET_ASM_UNALIGNED_DI_OP
7194 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7196 #undef TARGET_RTX_COSTS
7197 #define TARGET_RTX_COSTS spu_rtx_costs
7199 #undef TARGET_ADDRESS_COST
7200 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7202 #undef TARGET_SCHED_ISSUE_RATE
7203 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7205 #undef TARGET_SCHED_INIT_GLOBAL
7206 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7208 #undef TARGET_SCHED_INIT
7209 #define TARGET_SCHED_INIT spu_sched_init
7211 #undef TARGET_SCHED_VARIABLE_ISSUE
7212 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7214 #undef TARGET_SCHED_REORDER
7215 #define TARGET_SCHED_REORDER spu_sched_reorder
7217 #undef TARGET_SCHED_REORDER2
7218 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7220 #undef TARGET_SCHED_ADJUST_COST
7221 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7223 #undef TARGET_ATTRIBUTE_TABLE
7224 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7226 #undef TARGET_ASM_INTEGER
7227 #define TARGET_ASM_INTEGER spu_assemble_integer
7229 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7230 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7232 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7233 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7235 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7236 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7238 #undef TARGET_ASM_GLOBALIZE_LABEL
7239 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7241 #undef TARGET_PASS_BY_REFERENCE
7242 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7244 #undef TARGET_FUNCTION_ARG
7245 #define TARGET_FUNCTION_ARG spu_function_arg
7247 #undef TARGET_FUNCTION_ARG_ADVANCE
7248 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7250 #undef TARGET_MUST_PASS_IN_STACK
7251 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7253 #undef TARGET_BUILD_BUILTIN_VA_LIST
7254 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7256 #undef TARGET_EXPAND_BUILTIN_VA_START
7257 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7259 #undef TARGET_SETUP_INCOMING_VARARGS
7260 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7262 #undef TARGET_MACHINE_DEPENDENT_REORG
7263 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7265 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7266 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7268 #undef TARGET_INIT_LIBFUNCS
7269 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7271 #undef TARGET_RETURN_IN_MEMORY
7272 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7274 #undef TARGET_ENCODE_SECTION_INFO
7275 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7277 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7278 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7280 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7281 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7283 #undef TARGET_VECTORIZE_INIT_COST
7284 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7286 #undef TARGET_VECTORIZE_ADD_STMT_COST
7287 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7289 #undef TARGET_VECTORIZE_FINISH_COST
7290 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7292 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7293 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7295 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7296 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7298 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7299 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7301 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7302 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7304 #undef TARGET_SCHED_SMS_RES_MII
7305 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7307 #undef TARGET_SECTION_TYPE_FLAGS
7308 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7310 #undef TARGET_ASM_SELECT_SECTION
7311 #define TARGET_ASM_SELECT_SECTION spu_select_section
7313 #undef TARGET_ASM_UNIQUE_SECTION
7314 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7316 #undef TARGET_LEGITIMATE_ADDRESS_P
7317 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7319 #undef TARGET_LEGITIMATE_CONSTANT_P
7320 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7322 #undef TARGET_TRAMPOLINE_INIT
7323 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7325 #undef TARGET_WARN_FUNC_RETURN
7326 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7328 #undef TARGET_OPTION_OVERRIDE
7329 #define TARGET_OPTION_OVERRIDE spu_option_override
7331 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7332 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7334 #undef TARGET_REF_MAY_ALIAS_ERRNO
7335 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7337 #undef TARGET_ASM_OUTPUT_MI_THUNK
7338 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7339 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7340 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7342 /* Variable tracking should be run after all optimizations which
7343 change order of insns. It also needs a valid CFG. */
7344 #undef TARGET_DELAY_VARTRACK
7345 #define TARGET_DELAY_VARTRACK true
7347 #undef TARGET_CANONICALIZE_COMPARISON
7348 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7350 #undef TARGET_CAN_USE_DOLOOP_P
7351 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7353 struct gcc_target targetm = TARGET_INITIALIZER;