#include "tm-preds.h"
#include "gt-bfin.h"
#include "basic-block.h"
+#include "timevar.h"
/* A C structure for machine-specific, per-function data.
This is added to the cfun structure. */
/* Nonzero if -mshared-library-id was given. */
static int bfin_lib_id_given;
+/* Nonzero if -fschedule-insns2 was given. We override it and
+ call the scheduler ourselves during reorg. */
+static int bfin_flag_schedule_insns2;
+
+/* Determines whether we run variable tracking in machine dependent
+ reorganization. */
+static int bfin_flag_var_tracking;
+
+int splitting_for_sched;
+
static void
bfin_globalize_label (FILE *stream, const char *name)
{
FILE *file = asm_out_file;
int i;
+ /* Variable tracking should be run after all optimizations which change order
+ of insns. It also needs a valid CFG. This can't be done in
+ override_options, because flag_var_tracking is finalized after
+ that. */
+ bfin_flag_var_tracking = flag_var_tracking;
+ flag_var_tracking = 0;
+
fprintf (file, ".file \"%s\";\n", input_filename);
for (i = 0; arg_regs[i] >= 0; i++)
return;
/* Choose whether to use a sequence using a temporary register, or
- a sequence with multiple adds. We can add a signed 7 bit value
+ a sequence with multiple adds. We can add a signed 7-bit value
in one instruction. */
if (value > 120 || value < -120)
{
/* This predicate is used to compute the length of a load/store insn.
OP is a MEM rtx, we return nonzero if its addressing mode requires a
- 32 bit instruction. */
+ 32-bit instruction. */
int
effective_address_32bit_p (rtx op, enum machine_mode mode)
return 0;
}
+ if (GET_CODE (XEXP (op, 1)) == UNSPEC)
+ return 1;
+
offset = INTVAL (XEXP (op, 1));
- /* All byte loads use a 16 bit offset. */
+ /* All byte loads use a 16-bit offset. */
if (GET_MODE_SIZE (mode) == 1)
return 1;
void
print_operand (FILE *file, rtx x, char code)
{
- enum machine_mode mode = GET_MODE (x);
+ enum machine_mode mode;
+
+ if (code == '!')
+ {
+ if (GET_MODE (current_output_insn) == SImode)
+ fprintf (file, " ||");
+ else
+ fprintf (file, ";");
+ return;
+ }
+
+ mode = GET_MODE (x);
switch (code)
{
x = GEN_INT ((INTVAL (x) >> 16) & 0xffff);
else if (code == 'h')
x = GEN_INT (INTVAL (x) & 0xffff);
+ else if (code == 'N')
+ x = GEN_INT (-INTVAL (x));
else if (code == 'X')
x = GEN_INT (exact_log2 (0xffffffff & INTVAL (x)));
else if (code == 'Y')
if (mode == PDImode || mode == V2PDImode)
return regno == REG_A0 || regno == REG_A1;
- /* Allow all normal 32 bit regs, except REG_M3, in case regclass ever comes
+ /* Allow all normal 32-bit regs, except REG_M3, in case regclass ever comes
up with a bad register class (such as ALL_REGS) for DImode. */
if (mode == DImode)
return regno < REG_M3;
flag_schedule_insns = 0;
+ /* Passes after sched2 can break the helpful TImode annotations that
+ haifa-sched puts on every insn. Just do scheduling in reorg. */
+ bfin_flag_schedule_insns2 = flag_schedule_insns_after_reload;
+ flag_schedule_insns_after_reload = 0;
+
init_machine_status = bfin_init_machine_status;
}
}
\f
/* Return nonzero iff C has exactly one bit set if it is interpreted
- as a 32 bit constant. */
+ as a 32-bit constant. */
int
log2constp (unsigned HOST_WIDE_INT c)
bfin_rtx_costs (rtx x, int code, int outer_code, int *total)
{
int cost2 = COSTS_N_INSNS (1);
+ rtx op0, op1;
switch (code)
{
return true;
case PLUS:
- if (GET_MODE (x) == Pmode)
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+ if (GET_MODE (x) == SImode)
{
- if (GET_CODE (XEXP (x, 0)) == MULT
- && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
+ if (GET_CODE (op0) == MULT
+ && GET_CODE (XEXP (op0, 1)) == CONST_INT)
{
- HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
+ HOST_WIDE_INT val = INTVAL (XEXP (op0, 1));
if (val == 2 || val == 4)
{
*total = cost2;
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
- *total += rtx_cost (XEXP (x, 1), outer_code);
+ *total += rtx_cost (XEXP (op0, 0), outer_code);
+ *total += rtx_cost (op1, outer_code);
return true;
}
}
+ *total = cost2;
+ if (GET_CODE (op0) != REG
+ && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
+ *total += rtx_cost (op0, SET);
+#if 0 /* We'd like to do this for accuracy, but it biases the loop optimizer
+ towards creating too many induction variables. */
+ if (!reg_or_7bit_operand (op1, SImode))
+ *total += rtx_cost (op1, SET);
+#endif
}
-
- /* fall through */
+ else if (GET_MODE (x) == DImode)
+ {
+ *total = 6 * cost2;
+ if (GET_CODE (op1) != CONST_INT
+ || !CONST_7BIT_IMM_P (INTVAL (op1)))
+ *total += rtx_cost (op1, PLUS);
+ if (GET_CODE (op0) != REG
+ && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
+ *total += rtx_cost (op0, PLUS);
+ }
+ return true;
case MINUS:
+ if (GET_MODE (x) == DImode)
+ *total = 6 * cost2;
+ else
+ *total = cost2;
+ return true;
+
case ASHIFT:
case ASHIFTRT:
case LSHIFTRT:
if (GET_MODE (x) == DImode)
*total = 6 * cost2;
- return false;
+ else
+ *total = cost2;
+
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+ if (GET_CODE (op0) != REG
+ && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
+ *total += rtx_cost (op0, code);
+
+ return true;
- case AND:
case IOR:
+ case AND:
case XOR:
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+
+ /* Handle special cases of IOR: rotates, ALIGN insns, movstricthi_high. */
+ if (code == IOR)
+ {
+ if ((GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT)
+ || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == ZERO_EXTEND)
+ || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
+ || (GET_CODE (op0) == AND && GET_CODE (op1) == CONST_INT))
+ {
+ *total = cost2;
+ return true;
+ }
+ }
+
+ if (GET_CODE (op0) != REG
+ && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
+ *total += rtx_cost (op0, code);
+
if (GET_MODE (x) == DImode)
- *total = 2 * cost2;
- return false;
+ {
+ *total = 2 * cost2;
+ return true;
+ }
+ *total = cost2;
+ if (GET_MODE (x) != SImode)
+ return true;
+
+ if (code == AND)
+ {
+ if (! rhs_andsi3_operand (XEXP (x, 1), SImode))
+ *total += rtx_cost (XEXP (x, 1), code);
+ }
+ else
+ {
+ if (! regorlog2_operand (XEXP (x, 1), SImode))
+ *total += rtx_cost (XEXP (x, 1), code);
+ }
+
+ return true;
+
+ case ZERO_EXTRACT:
+ case SIGN_EXTRACT:
+ if (outer_code == SET
+ && XEXP (x, 1) == const1_rtx
+ && GET_CODE (XEXP (x, 2)) == CONST_INT)
+ {
+ *total = 2 * cost2;
+ return true;
+ }
+ /* fall through */
+
+ case SIGN_EXTEND:
+ case ZERO_EXTEND:
+ *total = cost2;
+ return true;
case MULT:
- if (GET_MODE_SIZE (GET_MODE (x)) <= UNITS_PER_WORD)
- *total = COSTS_N_INSNS (3);
- return false;
+ {
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+ if (GET_CODE (op0) == GET_CODE (op1)
+ && (GET_CODE (op0) == ZERO_EXTEND
+ || GET_CODE (op0) == SIGN_EXTEND))
+ {
+ *total = COSTS_N_INSNS (1);
+ op0 = XEXP (op0, 0);
+ op1 = XEXP (op1, 0);
+ }
+ else if (optimize_size)
+ *total = COSTS_N_INSNS (1);
+ else
+ *total = COSTS_N_INSNS (3);
+
+ if (GET_CODE (op0) != REG
+ && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
+ *total += rtx_cost (op0, MULT);
+ if (GET_CODE (op1) != REG
+ && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG))
+ *total += rtx_cost (op1, MULT);
+ }
+ return true;
case UDIV:
case UMOD:
rtx pat = PATTERN (dep_insn);
rtx dest = SET_DEST (pat);
rtx src = SET_SRC (pat);
- if (! ADDRESS_REGNO_P (REGNO (dest)) || ! D_REGNO_P (REGNO (src)))
+ if (! ADDRESS_REGNO_P (REGNO (dest))
+ || ! (MEM_P (src) || D_REGNO_P (REGNO (src))))
return cost;
return cost + (dep_insn_type == TYPE_MOVE ? 4 : 3);
}
}
}
else if (CALL_P (last_insn)
- || get_attr_type (last_insn) == TYPE_SYNC
+ || (GET_CODE (PATTERN (last_insn)) != SEQUENCE
+ && get_attr_type (last_insn) == TYPE_SYNC)
|| recog_memoized (last_insn) == CODE_FOR_return_internal)
{
if (dump_file)
if (GET_CODE (PATTERN (last_insn)) == ASM_INPUT
|| asm_noperands (PATTERN (last_insn)) >= 0
- || get_attr_seq_insns (last_insn) == SEQ_INSNS_MULTI)
+ || (GET_CODE (PATTERN (last_insn)) != SEQUENCE
+ && get_attr_seq_insns (last_insn) == SEQ_INSNS_MULTI))
{
nop_insn = emit_insn_after (gen_nop (), last_insn);
last_insn = nop_insn;
if (dump_file)
print_rtl (dump_file, get_insns ());
+
+ FOR_EACH_BB (bb)
+ bb->aux = NULL;
}
+\f
+/* Possibly generate a SEQUENCE out of three insns found in SLOT.
+ Returns true if we modified the insn chain, false otherwise. */
+static bool
+gen_one_bundle (rtx slot[3])
+{
+ rtx bundle;
+
+ gcc_assert (slot[1] != NULL_RTX);
+
+ /* Verify that we really can do the multi-issue. */
+ if (slot[0])
+ {
+ rtx t = NEXT_INSN (slot[0]);
+ while (t != slot[1])
+ {
+ if (GET_CODE (t) != NOTE
+ || NOTE_LINE_NUMBER (t) != NOTE_INSN_DELETED)
+ return false;
+ t = NEXT_INSN (t);
+ }
+ }
+ if (slot[2])
+ {
+ rtx t = NEXT_INSN (slot[1]);
+ while (t != slot[2])
+ {
+ if (GET_CODE (t) != NOTE
+ || NOTE_LINE_NUMBER (t) != NOTE_INSN_DELETED)
+ return false;
+ t = NEXT_INSN (t);
+ }
+ }
+
+ if (slot[0] == NULL_RTX)
+ slot[0] = emit_insn_before (gen_mnop (), slot[1]);
+ if (slot[2] == NULL_RTX)
+ slot[2] = emit_insn_after (gen_nop (), slot[1]);
+
+ /* Avoid line number information being printed inside one bundle. */
+ if (INSN_LOCATOR (slot[1])
+ && INSN_LOCATOR (slot[1]) != INSN_LOCATOR (slot[0]))
+ INSN_LOCATOR (slot[1]) = INSN_LOCATOR (slot[0]);
+ if (INSN_LOCATOR (slot[2])
+ && INSN_LOCATOR (slot[2]) != INSN_LOCATOR (slot[0]))
+ INSN_LOCATOR (slot[2]) = INSN_LOCATOR (slot[0]);
+
+ /* Terminate them with "|| " instead of ";" in the output. */
+ PUT_MODE (slot[0], SImode);
+ PUT_MODE (slot[1], SImode);
+
+ /* This is a cheat to avoid emit_insn's special handling of SEQUENCEs.
+ Generating a PARALLEL first and changing its code later is the
+ easiest way to emit a SEQUENCE insn. */
+ bundle = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, slot[0], slot[1], slot[2]));
+ emit_insn_before (bundle, slot[0]);
+ remove_insn (slot[0]);
+ remove_insn (slot[1]);
+ remove_insn (slot[2]);
+ PUT_CODE (bundle, SEQUENCE);
+
+ return true;
+}
+
+/* Go through all insns, and use the information generated during scheduling
+ to generate SEQUENCEs to represent bundles of instructions issued
+ simultaneously. */
+static void
+bfin_gen_bundles (void)
+{
+ basic_block bb;
+ FOR_EACH_BB (bb)
+ {
+ rtx insn, next;
+ rtx slot[3];
+ int n_filled = 0;
+
+ slot[0] = slot[1] = slot[2] = NULL_RTX;
+ for (insn = BB_HEAD (bb);; insn = next)
+ {
+ int at_end;
+ if (INSN_P (insn))
+ {
+ if (get_attr_type (insn) == TYPE_DSP32)
+ slot[0] = insn;
+ else if (slot[1] == NULL_RTX)
+ slot[1] = insn;
+ else
+ slot[2] = insn;
+ n_filled++;
+ }
+
+ next = NEXT_INSN (insn);
+ while (next && insn != BB_END (bb)
+ && !(INSN_P (next)
+ && GET_CODE (PATTERN (next)) != USE
+ && GET_CODE (PATTERN (next)) != CLOBBER))
+ {
+ insn = next;
+ next = NEXT_INSN (insn);
+ }
+
+ /* BB_END can change due to emitting extra NOPs, so check here. */
+ at_end = insn == BB_END (bb);
+ if (at_end || GET_MODE (next) == TImode)
+ {
+ if ((n_filled < 2
+ || !gen_one_bundle (slot))
+ && slot[0] != NULL_RTX)
+ {
+ rtx pat = PATTERN (slot[0]);
+ if (GET_CODE (pat) == SET
+ && GET_CODE (SET_SRC (pat)) == UNSPEC
+ && XINT (SET_SRC (pat), 1) == UNSPEC_32BIT)
+ {
+ SET_SRC (pat) = XVECEXP (SET_SRC (pat), 0, 0);
+ INSN_CODE (slot[0]) = -1;
+ }
+ }
+ n_filled = 0;
+ slot[0] = slot[1] = slot[2] = NULL_RTX;
+ }
+ if (at_end)
+ break;
+ }
+ }
+}
\f
+/* Return an insn type for INSN that can be used by the caller for anomaly
+ workarounds. This differs from plain get_attr_type in that it handles
+ SEQUENCEs. */
+
+static enum attr_type
+type_for_anomaly (rtx insn)
+{
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) == SEQUENCE)
+ {
+ enum attr_type t;
+ t = get_attr_type (XVECEXP (pat, 0, 1));
+ if (t == TYPE_MCLD)
+ return t;
+ t = get_attr_type (XVECEXP (pat, 0, 2));
+ if (t == TYPE_MCLD)
+ return t;
+ return TYPE_MCST;
+ }
+ else
+ return get_attr_type (insn);
+}
+
+/* Return nonzero if INSN contains any loads that may trap. It handles
+ SEQUENCEs correctly. */
+
+static bool
+trapping_loads_p (rtx insn)
+{
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) == SEQUENCE)
+ {
+ enum attr_type t;
+ t = get_attr_type (XVECEXP (pat, 0, 1));
+ if (t == TYPE_MCLD
+ && may_trap_p (SET_SRC (PATTERN (XVECEXP (pat, 0, 1)))))
+ return true;
+ t = get_attr_type (XVECEXP (pat, 0, 2));
+ if (t == TYPE_MCLD
+ && may_trap_p (SET_SRC (PATTERN (XVECEXP (pat, 0, 2)))))
+ return true;
+ return false;
+ }
+ else
+ return may_trap_p (SET_SRC (single_set (insn)));
+}
+
/* We use the machine specific reorg pass for emitting CSYNC instructions
after conditional branches as needed.
rtx insn, last_condjump = NULL_RTX;
int cycles_since_jump = INT_MAX;
+ /* We are freeing block_for_insn in the toplev to keep compatibility
+ with old MDEP_REORGS that are not CFG based. Recompute it now. */
+ compute_bb_for_insn ();
+
+ if (bfin_flag_schedule_insns2)
+ {
+ splitting_for_sched = 1;
+ split_all_insns (0);
+ splitting_for_sched = 0;
+
+ update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
+
+ timevar_push (TV_SCHED2);
+ schedule_insns ();
+ timevar_pop (TV_SCHED2);
+
+ /* Examine the schedule and insert nops as necessary for 64-bit parallel
+ instructions. */
+ bfin_gen_bundles ();
+ }
+
/* Doloop optimization */
if (cfun->machine->has_hardware_loops)
bfin_reorg_loops (dump_file);
}
else if (INSN_P (insn))
{
- enum attr_type type = get_attr_type (insn);
+ enum attr_type type = type_for_anomaly (insn);
int delay_needed = 0;
if (cycles_since_jump < INT_MAX)
cycles_since_jump++;
if (type == TYPE_MCLD && TARGET_SPECLD_ANOMALY)
{
- rtx pat = single_set (insn);
- if (may_trap_p (SET_SRC (pat)))
+ if (trapping_loads_p (insn))
delay_needed = 3;
}
else if (type == TYPE_SYNC && TARGET_CSYNC_ANOMALY)
if (INSN_P (target))
{
- enum attr_type type = get_attr_type (target);
+ enum attr_type type = type_for_anomaly (target);
int delay_needed = 0;
if (cycles_since_jump < INT_MAX)
cycles_since_jump++;
}
}
}
+
+ if (bfin_flag_var_tracking)
+ {
+ timevar_push (TV_VAR_TRACKING);
+ variable_tracking_main ();
+ timevar_pop (TV_VAR_TRACKING);
+ }
}
\f
/* Handle interrupt_handler, exception_handler and nmi_handler function
if this is a normal binary op, or one of the MACFLAG_xxx constants. */
static rtx
-bfin_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target,
+bfin_expand_binop_builtin (enum insn_code icode, tree exp, rtx target,
int macflag)
{
rtx pat;
- tree arg0 = TREE_VALUE (arglist);
- tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
enum machine_mode op0mode = GET_MODE (op0);
/* Subroutine of bfin_expand_builtin to take care of unop insns. */
static rtx
-bfin_expand_unop_builtin (enum insn_code icode, tree arglist,
+bfin_expand_unop_builtin (enum insn_code icode, tree exp,
rtx target)
{
rtx pat;
- tree arg0 = TREE_VALUE (arglist);
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
enum machine_mode op0mode = GET_MODE (op0);
enum machine_mode tmode = insn_data[icode].operand[0].mode;
size_t i;
enum insn_code icode;
const struct builtin_description *d;
- tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
- tree arglist = TREE_OPERAND (exp, 1);
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
tree arg0, arg1, arg2;
rtx op0, op1, op2, accvec, pat, tmp1, tmp2;
case BFIN_BUILTIN_DIFFHL_2X16:
case BFIN_BUILTIN_DIFFLH_2X16:
- arg0 = TREE_VALUE (arglist);
+ arg0 = CALL_EXPR_ARG (exp, 0);
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
icode = (fcode == BFIN_BUILTIN_DIFFHL_2X16
? CODE_FOR_subhilov2hi3 : CODE_FOR_sublohiv2hi3);
return target;
case BFIN_BUILTIN_CPLX_MUL_16:
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
accvec = gen_reg_rtx (V2PDImode);
case BFIN_BUILTIN_CPLX_MAC_16:
case BFIN_BUILTIN_CPLX_MSU_16:
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
if (d->code == fcode)
- return bfin_expand_binop_builtin (d->icode, arglist, target,
+ return bfin_expand_binop_builtin (d->icode, exp, target,
d->macflag);
for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
if (d->code == fcode)
- return bfin_expand_unop_builtin (d->icode, arglist, target);
+ return bfin_expand_unop_builtin (d->icode, exp, target);
gcc_unreachable ();
}