2013-01-29 Greta Yorsh <Greta.Yorsh@arm.com>
+ * config/arm/arm-protos.h (arm_mac_accumulator_is_result): New
+ declaration.
+ * config/arm/arm.c (arm_mac_accumulator_is_result): New function.
+ * config/arm/cortex-a7.md: New bypasses using
+ arm_mac_accumulator_is_result.
+
+2013-01-29 Greta Yorsh <Greta.Yorsh@arm.com>
+
* config/arm/cortex-a7.md (cortex_a7_neon_mul): New reservation.
(cortex_a7_neon_mla): Likewise.
(cortex_a7_fpfmad): New reservation.
return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
}
+/* Return non-zero iff the consumer (a multiply-accumulate or a
+ multiple-subtract instruction) has an accumulator dependency on the
+ result of the producer and no other dependency on that result. It
+ does not check if the producer is multiply-accumulate instruction. */
+int
+arm_mac_accumulator_is_result (rtx producer, rtx consumer)
+{
+ rtx result;
+ rtx op0, op1, acc;
+
+ producer = PATTERN (producer);
+ consumer = PATTERN (consumer);
+
+ if (GET_CODE (producer) == COND_EXEC)
+ producer = COND_EXEC_CODE (producer);
+ if (GET_CODE (consumer) == COND_EXEC)
+ consumer = COND_EXEC_CODE (consumer);
+
+ if (GET_CODE (producer) != SET)
+ return 0;
+
+ result = XEXP (producer, 0);
+
+ if (GET_CODE (consumer) != SET)
+ return 0;
+
+ /* Check that the consumer is of the form
+ (set (...) (plus (mult ...) (...)))
+ or
+ (set (...) (minus (...) (mult ...))). */
+ if (GET_CODE (XEXP (consumer, 1)) == PLUS)
+ {
+ if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
+ return 0;
+
+ op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
+ op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
+ acc = XEXP (XEXP (consumer, 1), 1);
+ }
+ else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
+ {
+ if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
+ return 0;
+
+ op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
+ op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
+ acc = XEXP (XEXP (consumer, 1), 0);
+ }
+ else
+ return 0;
+
+ return (reg_overlap_mentioned_p (result, acc)
+ && !reg_overlap_mentioned_p (result, op0)
+ && !reg_overlap_mentioned_p (result, op1));
+}
+
/* Return non-zero if the consumer (a multiply-accumulate instruction)
has an accumulator dependency on the result of the producer (a
multiplication instruction) and no other dependency on that result. */
(eq_attr "neon_type" "none")))
"cortex_a7_both")
+;; Forward the result of a multiply operation to the accumulator
+;; of the following multiply and accumulate instruction.
+(define_bypass 1 "cortex_a7_mul"
+ "cortex_a7_mul"
+ "arm_mac_accumulator_is_result")
+
;; The latency depends on the operands, so we use an estimate here.
(define_insn_reservation "cortex_a7_idiv" 5
(and (eq_attr "tune" "cortexa7")
neon_fp_vmla_qqq_scalar"))
"cortex_a7_both+cortex_a7_fpmul_pipe")
+(define_bypass 4 "cortex_a7_fpmacs,cortex_a7_neon_mla"
+ "cortex_a7_fpmacs,cortex_a7_neon_mla"
+ "arm_mac_accumulator_is_result")
+
;; Non-multiply instructions can issue between two cycles of a
;; double-precision multiply.
(eq_attr "neon_type" "none")))
"cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*4")
+(define_bypass 7 "cortex_a7_fpmacd"
+ "cortex_a7_fpmacd,cortex_a7_fpfmad"
+ "arm_mac_accumulator_is_result")
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Floating-point divide/square root instructions.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;