2005-04-01 Richard Earnshaw <richard.earnshaw@arm.com>
+ * arm.c (adjacent_mem_locations): Reject volatile memory refs.
+ Also reject cases where this pattern will cause load delay stalls
+ unless optimizing for size and it will produce a shorter sequence.
+ * arm.md (arith_adjacent_mem): Make better use of ldm addressing
+ variants to avoid pre-adjusting the base when possible.
+
+2005-04-01 Richard Earnshaw <richard.earnshaw@arm.com>
+
* arm.md (minmax_arithsi): Reject all eliminable registers, not just
the frame and argument pointers.
(strqi_preinc, strqi_predec, loadqi_preinc, loadqi_predec): Likewise.
int
adjacent_mem_locations (rtx a, rtx b)
{
+ /* We don't guarantee to preserve the order of these memory refs. */
+ if (volatile_refs_p (a) || volatile_refs_p (b))
+ return 0;
+
if ((GET_CODE (XEXP (a, 0)) == REG
|| (GET_CODE (XEXP (a, 0)) == PLUS
&& GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
return 0;
val_diff = val1 - val0;
+
+ if (arm_ld_sched)
+ {
+ /* If the target has load delay slots, then there's no benefit
+ to using an ldm instruction unless the offset is zero and
+ we are optimizing for size. */
+ return (optimize_size && (REGNO (reg0) == REGNO (reg1))
+ && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
+ && (val_diff == 4 || val_diff == -4));
+ }
+
return ((REGNO (reg0) == REGNO (reg1))
&& (val_diff == 4 || val_diff == -4));
}
{
rtx ldm[3];
rtx arith[4];
- int val1 = 0, val2 = 0;
+ rtx base_reg;
+ HOST_WIDE_INT val1 = 0, val2 = 0;
if (REGNO (operands[0]) > REGNO (operands[4]))
{
ldm[1] = operands[0];
ldm[2] = operands[4];
}
- if (GET_CODE (XEXP (operands[2], 0)) != REG)
- val1 = INTVAL (XEXP (XEXP (operands[2], 0), 1));
- if (GET_CODE (XEXP (operands[3], 0)) != REG)
+
+ base_reg = XEXP (operands[2], 0);
+
+ if (!REG_P (base_reg))
+ {
+ val1 = INTVAL (XEXP (base_reg, 1));
+ base_reg = XEXP (base_reg, 0);
+ }
+
+ if (!REG_P (XEXP (operands[3], 0)))
val2 = INTVAL (XEXP (XEXP (operands[3], 0), 1));
+
arith[0] = operands[0];
arith[3] = operands[1];
+
if (val1 < val2)
{
arith[1] = ldm[1];
arith[1] = ldm[2];
arith[2] = ldm[1];
}
- if (val1 && val2)
+
+ ldm[0] = base_reg;
+ if (val1 !=0 && val2 != 0)
{
- rtx ops[3];
- ldm[0] = ops[0] = operands[4];
- ops[1] = XEXP (XEXP (operands[2], 0), 0);
- ops[2] = XEXP (XEXP (operands[2], 0), 1);
- output_add_immediate (ops);
- if (val1 < val2)
- output_asm_insn (\"ldm%?ia\\t%0, {%1, %2}\", ldm);
+ if (val1 == 4 || val2 == 4)
+ /* Other val must be 8, since we know they are adjacent and neither
+ is zero. */
+ output_asm_insn (\"ldm%?ib\\t%0, {%1, %2}\", ldm);
else
- output_asm_insn (\"ldm%?da\\t%0, {%1, %2}\", ldm);
+ {
+ rtx ops[3];
+
+ ldm[0] = ops[0] = operands[4];
+ ops[1] = base_reg;
+ ops[2] = GEN_INT (val1);
+ output_add_immediate (ops);
+ if (val1 < val2)
+ output_asm_insn (\"ldm%?ia\\t%0, {%1, %2}\", ldm);
+ else
+ output_asm_insn (\"ldm%?da\\t%0, {%1, %2}\", ldm);
+ }
}
- else if (val1)
+ else if (val1 != 0)
{
- ldm[0] = XEXP (operands[3], 0);
if (val1 < val2)
output_asm_insn (\"ldm%?da\\t%0, {%1, %2}\", ldm);
else
}
else
{
- ldm[0] = XEXP (operands[2], 0);
if (val1 < val2)
output_asm_insn (\"ldm%?ia\\t%0, {%1, %2}\", ldm);
else