+2013-11-25 Terry Guo <terry.guo@arm.com>
+
+ * doc/invoke.texi (-mslow-flash-data): Document new option.
+ * config/arm/arm.opt (mslow-flash-data): New option.
+ * config/arm/arm-protos.h (arm_max_const_double_inline_cost): Declare
+ it.
+ * config/arm/arm.h (TARGET_USE_MOVT): Always true when literal pools
+ are disabled.
+ (arm_disable_literal_pool): Declare it.
+ * config/arm/arm.c (arm_disable_literal_pool): New variable.
+ (arm_option_override): Handle new option.
+ (thumb2_legitimate_address_p): Don't allow symbol references when
+ literal pools are disabled.
+ (arm_max_const_double_inline_cost): New function.
+ * config/arm/arm.md (types.md): Include it before ...
+ (use_literal_pool): New attribute.
+ (enabled): Use new attribute.
+ (split pattern): Replace symbol+offset with MOVW/MOVT.
+
2013-11-24 Steven Bosscher <steven@gcc.gnu.org>
PR bootstrap/59279
extern rtx arm_gen_return_addr_mask (void);
extern void arm_reload_in_hi (rtx *);
extern void arm_reload_out_hi (rtx *);
+extern int arm_max_const_double_inline_cost (void);
extern int arm_const_double_inline_cost (rtx);
extern bool arm_const_double_by_parts (rtx);
extern bool arm_const_double_by_immediates (rtx);
than core registers. */
int prefer_neon_for_64bits = 0;
+/* Nonzero if we shouldn't use literal pools. */
+bool arm_disable_literal_pool = false;
+
/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
we must report the mode of the memory reference from
TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
if (TARGET_APCS_FRAME)
flag_shrink_wrap = false;
+ /* We only support -mslow-flash-data on armv7-m targets. */
+ if (target_slow_flash_data
+ && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
+ || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
+ error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
+
+ /* Currently, for slow flash data, we just disable literal pools. */
+ if (target_slow_flash_data)
+ arm_disable_literal_pool = true;
+
/* Register global variables with the garbage collector. */
arm_add_gc_roots ();
}
&& thumb2_legitimate_index_p (mode, xop0, strict_p)));
}
+ /* Normally we can assign constant values to target registers without
+ the help of constant pool. But there are cases we have to use constant
+ pool like:
+ 1) assign a label to register.
+ 2) sign-extend a 8bit value to 32bit and then assign to register.
+
+ Constant pool access in format:
+ (set (reg r0) (mem (symbol_ref (".LC0"))))
+ will cause the use of literal pool (later in function arm_reorg).
+ So here we mark such format as an invalid format, then the compiler
+ will adjust it into:
+ (set (reg r0) (symbol_ref (".LC0")))
+ (set (reg r0) (mem (reg r0))).
+ No extra register is required, and (mem (reg r0)) won't cause the use
+ of literal pools. */
+ else if (arm_disable_literal_pool && code == SYMBOL_REF
+ && CONSTANT_POOL_ADDRESS_P (x))
+ return 0;
+
else if (GET_MODE_CLASS (mode) != MODE_FLOAT
&& code == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x)
minipool_fix_tail = fix;
}
+/* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
+ Returns the number of insns needed, or 99 if we always want to synthesize
+ the value. */
+int
+arm_max_const_double_inline_cost ()
+{
+ /* Let the value get synthesized to avoid the use of literal pools. */
+ if (arm_disable_literal_pool)
+ return 99;
+
+ return ((optimize_size || arm_ld_sched) ? 3 : 4);
+}
+
/* Return the cost of synthesizing a 64-bit constant VAL inline.
Returns the number of insns needed, or 99 if we don't know how to
do it. */
/* Should MOVW/MOVT be used in preference to a constant pool. */
#define TARGET_USE_MOVT \
- (arm_arch_thumb2 && !optimize_size && !current_tune->prefer_constant_pool)
+ (arm_arch_thumb2 \
+ && (arm_disable_literal_pool \
+ || (!optimize_size && !current_tune->prefer_constant_pool)))
/* We could use unified syntax for arm mode, but for now we just use it
for Thumb-2. */
than core registers. */
extern int prefer_neon_for_64bits;
+/* Nonzero if we shouldn't use literal pools. */
+#ifndef USED_FOR_TARGET
+extern bool arm_disable_literal_pool;
+#endif
+
#ifndef TARGET_DEFAULT
#define TARGET_DEFAULT (MASK_APCS_FRAME)
#endif
;; Processor type. This is created automatically from arm-cores.def.
(include "arm-tune.md")
+;; Instruction classification types
+(include "types.md")
+
; IS_THUMB is set to 'yes' when we are generating Thumb code, and 'no' when
; generating ARM code. This is used to control the length of some insn
; patterns that share the same RTL in both ARM and Thumb code.
(const_string "yes")]
(const_string "no")))
+(define_attr "use_literal_pool" "no,yes"
+ (cond [(and (eq_attr "type" "f_loads,f_loadd")
+ (match_test "CONSTANT_P (operands[1])"))
+ (const_string "yes")]
+ (const_string "no")))
+
; Allows an insn to disable certain alternatives for reasons other than
; arch support.
(define_attr "insn_enabled" "no,yes"
(match_test "arm_restrict_it"))
(const_string "no")
+ (and (eq_attr "use_literal_pool" "yes")
+ (match_test "arm_disable_literal_pool"))
+ (const_string "no")
+
(eq_attr "arch_enabled" "no")
(const_string "no")
(set_attr "length" "4")
(set_attr "pool_range" "250")])
-;; Instruction classification types
-(include "types.md")
-
; Load scheduling, set from the arm_ld_sched variable
; initialized by arm_option_override()
(define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
"TARGET_32BIT
&& reload_completed
&& (arm_const_double_inline_cost (operands[1])
- <= ((optimize_size || arm_ld_sched) ? 3 : 4))"
+ <= arm_max_const_double_inline_cost ())"
[(const_int 0)]
"
arm_split_constant (SET, SImode, curr_insn,
"
)
+;; A normal way to do (symbol + offset) requires three instructions at least
+;; (depends on how big the offset is) as below:
+;; movw r0, #:lower16:g
+;; movw r0, #:upper16:g
+;; adds r0, #4
+;;
+;; A better way would be:
+;; movw r0, #:lower16:g+4
+;; movw r0, #:upper16:g+4
+;;
+;; The limitation of this way is that the length of offset should be a 16-bit
+;; signed value, because current assembler only supports REL type relocation for
+;; such case. If the more powerful RELA type is supported in future, we should
+;; update this pattern to go with better way.
+(define_split
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
+ (const:SI (plus:SI (match_operand:SI 1 "general_operand" "")
+ (match_operand:SI 2 "const_int_operand" ""))))]
+ "TARGET_THUMB2
+ && arm_disable_literal_pool
+ && reload_completed
+ && GET_CODE (operands[1]) == SYMBOL_REF"
+ [(clobber (const_int 0))]
+ "
+ int offset = INTVAL (operands[2]);
+
+ if (offset < -0x8000 || offset > 0x7fff)
+ {
+ arm_emit_movpair (operands[0], operands[1]);
+ emit_insn (gen_rtx_SET (SImode, operands[0],
+ gen_rtx_PLUS (SImode, operands[0], operands[2])));
+ }
+ else
+ {
+ rtx op = gen_rtx_CONST (SImode,
+ gen_rtx_PLUS (SImode, operands[1], operands[2]));
+ arm_emit_movpair (operands[0], op);
+ }
+ "
+)
+
;; Split symbol_refs at the later stage (after cprop), instead of generating
;; movt/movw pair directly at expand. Otherwise corresponding high_sum
;; and lo_sum would be merged back into memory load at cprop. However,
mneon-for-64bits
Target Report RejectNegative Var(use_neon_for_64bits) Init(0)
Use Neon to perform 64-bits operations rather than core registers.
+
+mslow-flash-data
+Target Report Var(target_slow_flash_data) Init(0)
+Assume loading data from flash is slower than fetching instructions.
-mfix-cortex-m3-ldrd @gol
-munaligned-access @gol
-mneon-for-64bits @gol
+-mslow-flash-data @gol
-mrestrict-it}
@emph{AVR Options}
disabled by default since the cost of moving data from core registers
to Neon is high.
+@item -mslow-flash-data
+@opindex mslow-flash-data
+Assume loading data from flash is slower than fetching instruction.
+Therefore literal load is minimized for better performance.
+This option is only supported when compiling for ARMv7 M-profile and
+off by default.
+
@item -mrestrict-it
@opindex mrestrict-it
Restricts generation of IT blocks to conform to the rules of ARMv8.
+2013-11-25 Terry Guo <terry.guo@arm.com>
+
+ * gcc.target/arm/thumb2-slow-flash-data.c: New.
+
2013-11-23 Uros Bizjak <ubizjak@gmail.com>
* gcc.dg/float-exact-1.c: Use dg-add-options ieee.
--- /dev/null
+/* The option -mslow-flash-data is just for performance tuning, it
+ doesn't totally disable the use of literal pools. But for below
+ simple cases, the use of literal pool should be replaced by
+ movw/movt or read-only constant pool. */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_cortex_m } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-options "-O2 -mthumb -mslow-flash-data" } */
+
+float sf;
+double df;
+long long l;
+static char *p = "Hello World";
+
+float
+testsf (float *p)
+{
+ if (*p > 1.1234f)
+ return 2.1234f;
+ else
+ return 3.1234f;
+}
+
+double
+testdf (double *p)
+{
+ if (*p > 4.1234)
+ return 2.1234;
+ else
+ return 3.1234;
+}
+
+long long
+testll (long long *p)
+{
+ if (*p > 0x123456789ABCDEFll)
+ return 0x111111111ll;
+ else
+ return 0x222222222ll;
+}
+
+char *
+testchar ()
+{
+ return p + 4;
+}
+
+int
+foo (int a, int b)
+{
+ int i;
+ volatile *labelref = &&label1;
+
+ if (a > b)
+ {
+ while (i < b)
+ {
+ a += *labelref;
+ i += 1;
+ }
+ goto *labelref;
+ }
+ else
+ b = b + 3;
+
+ a = a * b;
+
+label1:
+ return a + b;
+}
+
+/* { dg-final { scan-assembler-times "movt" 13 } } */
+/* { dg-final { scan-assembler-times "movt.*LC0\\+4" 1 } } */