+2010-11-04 Richard Guenther <rguenther@suse.de>
+ Richard Henderson <rth@redhat.com>
+
+ * tree.def (FMA_EXPR): New tree code.
+ * expr.c (expand_expr_real_2): Add FMA_EXPR expansion code.
+ * gimple.c (gimple_rhs_class_table): FMA_EXPR is a GIMPLE_TERNARY_RHS.
+ * tree-cfg.c (verify_gimple_assign_ternary): Verify FMA_EXPR types.
+ * tree-inline.c (estimate_operator_cost): Handle FMA_EXPR.
+ * gimple-pretty-print.c (dump_ternary_rhs): Likewise.
+ * tree-ssa-math-opts.c (convert_mult_to_fma): New function.
+ (execute_optimize_widening_mul): Call it. Reorganize to allow
+ dead stmt removal. Move TODO flags ...
+ (pass_optimize_widening_mul): ... here.
+ * flag-types.h (enum fp_contract_mode): New enum.
+ * common.opt (flag_fp_contract_mode): New variable.
+ (-ffp-contract): New option.
+ * opts.c (common_handle_option): Handle it.
+ * doc/invoke.texi (-ffp-contract): Document.
+ * tree.h (fold_fma): Declare.
+ * builtins.c (fold_fma): New function.
+ (fold_builtin_fma): Likewise.
+ (fold_builtin_3): Call it for fma.
+ * fold-const.c (fold_ternary_loc): Fold FMA_EXPR.
+ * optabs.c (optab_for_tree_code): Handle FMA_EXPR.
+ * config/i386/sse.md (fms<mode>4, fnma<mode>, fnms<mode>4):
+ New expanders.
+ * doc/md.texi (fms<mode>4, fnma<mode>, fnms<mode>4): Document new
+ named patterns.
+ * genopinit.c (optabs): Initialize fms_optab, fnma_optab and fnms_optab.
+ * optabs.h (enum optab_index): Add OTI_fms, OTI_fnma and OTI_fnms.
+ (fms_optab, fnma_optab, fnms_optab): New defines.
+ * gimplify.c (gimplify_expr): Handle binary truth expressions
+ explicitly. Handle FMA_EXPR.
+ * tree-vect-stmts.c (vectorizable_operation): Handle ternary
+ operations.
+
2010-11-04 Artjoms Sinkarovs <artyom.shinakroff@gmail.com>
Richard Guenther <rguenther@suse.de>
return fold_build1_loc (loc, ABS_EXPR, type, arg);
}
+/* Fold a fma operation with arguments ARG[012]. */
+
+tree
+fold_fma (location_t loc ATTRIBUTE_UNUSED,
+ tree type, tree arg0, tree arg1, tree arg2)
+{
+ if (TREE_CODE (arg0) == REAL_CST
+ && TREE_CODE (arg1) == REAL_CST
+ && TREE_CODE (arg2) == REAL_CST)
+ return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma);
+
+ return NULL_TREE;
+}
+
+/* Fold a call to fma, fmaf, or fmal with arguments ARG[012]. */
+
+static tree
+fold_builtin_fma (location_t loc, tree arg0, tree arg1, tree arg2, tree type)
+{
+ if (validate_arg (arg0, REAL_TYPE)
+ && validate_arg(arg1, REAL_TYPE)
+ && validate_arg(arg2, REAL_TYPE))
+ {
+ tree tem = fold_fma (loc, type, arg0, arg1, arg2);
+ if (tem)
+ return tem;
+
+ /* ??? Only expand to FMA_EXPR if it's directly supported. */
+ if (optab_handler (fma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
+ return fold_build3_loc (loc, FMA_EXPR, type, arg0, arg1, arg2);
+ }
+ return NULL_TREE;
+}
+
/* Fold a call to builtin fmin or fmax. */
static tree
return fold_builtin_sincos (loc, arg0, arg1, arg2);
CASE_FLT_FN (BUILT_IN_FMA):
- if (validate_arg (arg0, REAL_TYPE)
- && validate_arg(arg1, REAL_TYPE)
- && validate_arg(arg2, REAL_TYPE))
- return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma);
+ return fold_builtin_fma (loc, arg0, arg1, arg2, type);
break;
CASE_FLT_FN (BUILT_IN_REMQUO):
Variable
int *param_values
+; Floating-point contraction mode, fast by default.
+Variable
+enum fp_contract_mode flag_fp_contract_mode = FP_CONTRACT_FAST
+
###
Driver
Common Report Var(flag_forward_propagate) Optimization
Perform a forward propagation pass on RTL
+ffp-contract=
+Common Joined RejectNegative
+-ffp-contract=[off|on|fast] Perform floating-point expression contraction.
+
; Nonzero means don't put addresses of constant functions in registers.
; Used for compiling the Unix kernel, where strange substitutions are
; done on the assembly output.
;; Intrinsic FMA operations.
-;; The standard name for fma is only available with SSE math enabled.
+;; The standard names for fma is only available with SSE math enabled.
(define_expand "fma<mode>4"
[(set (match_operand:FMAMODE 0 "register_operand")
(fma:FMAMODE
"(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
"")
+(define_expand "fms<mode>4"
+ [(set (match_operand:FMAMODE 0 "register_operand")
+ (fma:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand")
+ (match_operand:FMAMODE 2 "nonimmediate_operand")
+ (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
+ "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
+ "")
+
+(define_expand "fnma<mode>4"
+ [(set (match_operand:FMAMODE 0 "register_operand")
+ (fma:FMAMODE
+ (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand")
+ (match_operand:FMAMODE 3 "nonimmediate_operand")))]
+ "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
+ "")
+
+(define_expand "fnms<mode>4"
+ [(set (match_operand:FMAMODE 0 "register_operand")
+ (fma:FMAMODE
+ (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand")
+ (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
+ "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
+ "")
+
;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
(define_expand "fma4i_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand")
-fdelayed-branch -fdelete-null-pointer-checks -fdse -fdse @gol
-fearly-inlining -fipa-sra -fexpensive-optimizations -ffast-math @gol
-ffinite-math-only -ffloat-store -fexcess-precision=@var{style} @gol
--fforward-propagate -ffunction-sections @gol
+-fforward-propagate -ffp-contract=@var{style} -ffunction-sections @gol
-fgcse -fgcse-after-reload -fgcse-las -fgcse-lm -fgraphite-identity @gol
-fgcse-sm -fif-conversion -fif-conversion2 -findirect-inlining @gol
-finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol
This option is enabled by default at optimization levels @option{-O},
@option{-O2}, @option{-O3}, @option{-Os}.
+@item -ffp-contract=@var{style}
+@opindex ffp-contract
+@option{-ffp-contract=off} disables floating-point expression contraction.
+@option{-ffp-contract=fast} enables floating-point expression contraction
+such as forming of fused multiply-add operations if the target has
+native support for them.
+@option{-ffp-contract=on} enables floating-point expression contraction
+if allowed by the language standard. This is currently not implemented
+and treated equal to @option{-ffp-contract=off}.
+
+The default is @option{-ffp-contract=fast}.
+
@item -fomit-frame-pointer
@opindex fomit-frame-pointer
Don't keep the frame pointer in a register for functions that
multiply followed by the add if the machine does not perform a
rounding step between the operations.
+@cindex @code{fms@var{m}4} instruction pattern
+@item @samp{fms@var{m}4}
+Like @code{fma@var{m}4}, except operand 3 subtracted from the
+product instead of added to the product. This is represented
+in the rtl as
+
+@smallexample
+(fma:@var{m} @var{op1} @var{op2} (neg:@var{m} @var{op3}))
+@end smallexample
+
+@cindex @code{fnma@var{m}4} instruction pattern
+@item @samp{fnma@var{m}4}
+Like @code{fma@var{m}4} except that the intermediate product
+is negated before being added to operand 3. This is represented
+in the rtl as
+
+@smallexample
+(fma:@var{m} (neg:@var{m} @var{op1}) @var{op2} @var{op3})
+@end smallexample
+
+@cindex @code{fnms@var{m}4} instruction pattern
+@item @samp{fnms@var{m}4}
+Like @code{fms@var{m}4} except that the intermediate product
+is negated before subtracting operand 3. This is represented
+in the rtl as
+
+@smallexample
+(fma:@var{m} (neg:@var{m} @var{op1}) @var{op2} (neg:@var{m} @var{op3}))
+@end smallexample
+
@cindex @code{min@var{m}3} instruction pattern
@cindex @code{max@var{m}3} instruction pattern
@item @samp{smin@var{m}3}, @samp{smax@var{m}3}
int ignore;
bool reduce_bit_field;
location_t loc = ops->location;
- tree treeop0, treeop1;
+ tree treeop0, treeop1, treeop2;
#define REDUCE_BIT_FIELD(expr) (reduce_bit_field \
? reduce_to_bit_field_precision ((expr), \
target, \
treeop0 = ops->op0;
treeop1 = ops->op1;
+ treeop2 = ops->op2;
/* We should be called only on simple (binary or unary) expressions,
exactly those that are valid in gimple expressions that aren't
case WIDEN_MULT_PLUS_EXPR:
case WIDEN_MULT_MINUS_EXPR:
expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
- op2 = expand_normal (ops->op2);
+ op2 = expand_normal (treeop2);
target = expand_widen_pattern_expr (ops, op0, op1, op2,
target, unsignedp);
return target;
expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp));
+ case FMA_EXPR:
+ {
+ optab opt = fma_optab;
+ gimple def0, def2;
+
+ def0 = get_def_for_expr (treeop0, NEGATE_EXPR);
+ def2 = get_def_for_expr (treeop2, NEGATE_EXPR);
+
+ op0 = op2 = NULL;
+
+ if (def0 && def2
+ && optab_handler (fnms_optab, mode) != CODE_FOR_nothing)
+ {
+ opt = fnms_optab;
+ op0 = expand_normal (gimple_assign_rhs1 (def0));
+ op2 = expand_normal (gimple_assign_rhs1 (def2));
+ }
+ else if (def0
+ && optab_handler (fnma_optab, mode) != CODE_FOR_nothing)
+ {
+ opt = fnma_optab;
+ op0 = expand_normal (gimple_assign_rhs1 (def0));
+ }
+ else if (def2
+ && optab_handler (fms_optab, mode) != CODE_FOR_nothing)
+ {
+ opt = fms_optab;
+ op2 = expand_normal (gimple_assign_rhs1 (def2));
+ }
+
+ if (op0 == NULL)
+ op0 = expand_expr (treeop0, subtarget, VOIDmode, EXPAND_NORMAL);
+ if (op2 == NULL)
+ op2 = expand_normal (treeop2);
+ op1 = expand_normal (treeop1);
+
+ return expand_ternary_op (TYPE_MODE (type), opt,
+ op0, op1, op2, target, 0);
+ }
+
case MULT_EXPR:
/* If this is a fixed-point operation, then we cannot use the code
below because "expand_mult" doesn't support sat/no-sat fixed-point
WARN_STRICT_OVERFLOW_MAGNITUDE = 5
};
+/* Floating-point contraction mode. */
+enum fp_contract_mode {
+ FP_CONTRACT_OFF = 0,
+ FP_CONTRACT_ON = 1,
+ FP_CONTRACT_FAST = 2
+};
+
#endif /* ! GCC_FLAG_TYPES_H */
tree
fold_ternary_loc (location_t loc, enum tree_code code, tree type,
- tree op0, tree op1, tree op2)
+ tree op0, tree op1, tree op2)
{
tree tem;
- tree arg0 = NULL_TREE, arg1 = NULL_TREE;
+ tree arg0 = NULL_TREE, arg1 = NULL_TREE, arg2 = NULL_TREE;
enum tree_code_class kind = TREE_CODE_CLASS (code);
gcc_assert (IS_EXPR_CODE_CLASS (kind)
STRIP_NOPS (arg1);
}
+ if (op2)
+ {
+ arg2 = op2;
+ STRIP_NOPS (arg2);
+ }
+
switch (code)
{
case COMPONENT_REF:
return NULL_TREE;
+ case FMA_EXPR:
+ /* For integers we can decompose the FMA if possible. */
+ if (TREE_CODE (arg0) == INTEGER_CST
+ && TREE_CODE (arg1) == INTEGER_CST)
+ return fold_build2_loc (loc, PLUS_EXPR, type,
+ const_binop (MULT_EXPR, arg0, arg1), arg2);
+ if (integer_zerop (arg2))
+ return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1);
+
+ return fold_fma (loc, type, arg0, arg1, arg2);
+
default:
return NULL_TREE;
} /* switch (code) */
"set_optab_handler (floor_optab, $A, CODE_FOR_$(floor$a2$))",
"set_convert_optab_handler (lfloor_optab, $B, $A, CODE_FOR_$(lfloor$F$a$I$b2$))",
"set_optab_handler (fma_optab, $A, CODE_FOR_$(fma$a4$))",
+ "set_optab_handler (fms_optab, $A, CODE_FOR_$(fms$a4$))",
+ "set_optab_handler (fnma_optab, $A, CODE_FOR_$(fnma$a4$))",
+ "set_optab_handler (fnms_optab, $A, CODE_FOR_$(fnms$a4$))",
"set_optab_handler (ceil_optab, $A, CODE_FOR_$(ceil$a2$))",
"set_convert_optab_handler (lceil_optab, $B, $A, CODE_FOR_$(lceil$F$a$I$b2$))",
"set_optab_handler (round_optab, $A, CODE_FOR_$(round$a2$))",
pp_character (buffer, '>');
break;
+ case FMA_EXPR:
+ dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false);
+ pp_string (buffer, " * ");
+ dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false);
+ pp_string (buffer, " + ");
+ dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
+ break;
+
default:
gcc_unreachable ();
}
|| (SYM) == TRUTH_XOR_EXPR) ? GIMPLE_BINARY_RHS \
: (SYM) == TRUTH_NOT_EXPR ? GIMPLE_UNARY_RHS \
: ((SYM) == WIDEN_MULT_PLUS_EXPR \
- || (SYM) == WIDEN_MULT_MINUS_EXPR) ? GIMPLE_TERNARY_RHS \
+ || (SYM) == WIDEN_MULT_MINUS_EXPR \
+ || (SYM) == FMA_EXPR) ? GIMPLE_TERNARY_RHS \
: ((SYM) == COND_EXPR \
|| (SYM) == CONSTRUCTOR \
|| (SYM) == OBJ_TYPE_REF \
ret = gimplify_omp_atomic (expr_p, pre_p);
break;
+ case TRUTH_AND_EXPR:
+ case TRUTH_OR_EXPR:
+ case TRUTH_XOR_EXPR:
+ /* Classified as tcc_expression. */
+ goto expr_2;
+
+ case FMA_EXPR:
+ /* Classified as tcc_expression. */
+ goto expr_3;
+
case POINTER_PLUS_EXPR:
/* Convert ((type *)A)+offset into &A->field_of_type_and_offset.
The second is gimple immediate saving a need for extra statement.
break;
}
+ expr_3:
+ {
+ enum gimplify_status r0, r1, r2;
+
+ r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
+ post_p, is_gimple_val, fb_rvalue);
+ r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
+ post_p, is_gimple_val, fb_rvalue);
+ r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p,
+ post_p, is_gimple_val, fb_rvalue);
+
+ ret = MIN (MIN (r0, r1), r2);
+ break;
+ }
+
case tcc_declaration:
case tcc_constant:
ret = GS_ALL_DONE;
goto dont_recalculate;
default:
- gcc_assert (TREE_CODE (*expr_p) == TRUTH_AND_EXPR
- || TREE_CODE (*expr_p) == TRUTH_OR_EXPR
- || TREE_CODE (*expr_p) == TRUTH_XOR_EXPR);
- goto expr_2;
+ gcc_unreachable ();
}
recalculate_side_effects (*expr_p);
: (TYPE_SATURATING (type)
? ssmsub_widen_optab : smsub_widen_optab));
+ case FMA_EXPR:
+ return fma_optab;
+
case REDUC_MAX_EXPR:
return TYPE_UNSIGNED (type) ? reduc_umax_optab : reduc_smax_optab;
OTI_atan2,
/* Floating multiply/add */
OTI_fma,
+ OTI_fms,
+ OTI_fnma,
+ OTI_fnms,
/* Move instruction. */
OTI_mov,
#define pow_optab (&optab_table[OTI_pow])
#define atan2_optab (&optab_table[OTI_atan2])
#define fma_optab (&optab_table[OTI_fma])
+#define fms_optab (&optab_table[OTI_fms])
+#define fnma_optab (&optab_table[OTI_fnma])
+#define fnms_optab (&optab_table[OTI_fnms])
#define mov_optab (&optab_table[OTI_mov])
#define movstrict_optab (&optab_table[OTI_movstrict])
return false;
break;
+ case OPT_ffp_contract_:
+ if (!strcmp (arg, "on"))
+ /* Not implemented, fall back to conservative FP_CONTRACT_OFF. */
+ flag_fp_contract_mode = FP_CONTRACT_OFF;
+ else if (!strcmp (arg, "off"))
+ flag_fp_contract_mode = FP_CONTRACT_OFF;
+ else if (!strcmp (arg, "fast"))
+ flag_fp_contract_mode = FP_CONTRACT_FAST;
+ else
+ error ("unknown floating point contraction style \"%s\"", arg);
+ break;
+
case OPT_fexcess_precision_:
if (!strcmp (arg, "fast"))
flag_excess_precision_cmdline = EXCESS_PRECISION_FAST;
+2010-11-04 Richard Guenther <rguenther@suse.de>
+ Richard Henderson <rth@redhat.com>
+
+ * gcc.target/i386/fma4-vector-2.c: New testcase.
+
2010-11-04 Artjoms Sinkarovs <artyom.shinakroff@gmail.com>
Richard Guenther <rguenther@suse.de>
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -mfma4 -ftree-vectorize -mtune=generic" } */
+
+float r[256], s[256];
+float x[256];
+float y[256];
+float z[256];
+
+void foo (void)
+{
+ int i;
+ for (i = 0; i < 256; ++i)
+ {
+ r[i] = x[i] * y[i] - z[i];
+ s[i] = x[i] * y[i] + z[i];
+ }
+}
+
+/* { dg-final { scan-assembler "vfmaddps" } } */
+/* { dg-final { scan-assembler "vfmsubps" } } */
}
break;
+ case FMA_EXPR:
+ if (!useless_type_conversion_p (lhs_type, rhs1_type)
+ || !useless_type_conversion_p (lhs_type, rhs2_type)
+ || !useless_type_conversion_p (lhs_type, rhs3_type))
+ {
+ error ("type mismatch in fused multiply-add expression");
+ debug_generic_expr (lhs_type);
+ debug_generic_expr (rhs1_type);
+ debug_generic_expr (rhs2_type);
+ debug_generic_expr (rhs3_type);
+ return true;
+ }
+ break;
+
default:
gcc_unreachable ();
}
case POINTER_PLUS_EXPR:
case MINUS_EXPR:
case MULT_EXPR:
+ case FMA_EXPR:
case ADDR_SPACE_CONVERT_EXPR:
case FIXED_CONVERT_EXPR:
return true;
}
+/* Combine the multiplication at MUL_STMT with uses in additions and
+ subtractions to form fused multiply-add operations. Returns true
+ if successful and MUL_STMT should be removed. */
+
+static bool
+convert_mult_to_fma (gimple mul_stmt)
+{
+ tree mul_result = gimple_assign_lhs (mul_stmt);
+ tree type = TREE_TYPE (mul_result);
+ gimple use_stmt, fma_stmt;
+ use_operand_p use_p;
+ imm_use_iterator imm_iter;
+
+ if (FLOAT_TYPE_P (type)
+ && flag_fp_contract_mode == FP_CONTRACT_OFF)
+ return false;
+
+ /* We don't want to do bitfield reduction ops. */
+ if (INTEGRAL_TYPE_P (type)
+ && (TYPE_PRECISION (type)
+ != GET_MODE_PRECISION (TYPE_MODE (type))))
+ return false;
+
+ /* If the target doesn't support it, don't generate it. We assume that
+ if fma isn't available then fms, fnma or fnms are not either. */
+ if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return false;
+
+ /* Make sure that the multiplication statement becomes dead after
+ the transformation, thus that all uses are transformed to FMAs.
+ This means we assume that an FMA operation has the same cost
+ as an addition. */
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
+ {
+ enum tree_code use_code;
+
+ use_stmt = USE_STMT (use_p);
+
+ if (!is_gimple_assign (use_stmt))
+ return false;
+ use_code = gimple_assign_rhs_code (use_stmt);
+ /* ??? We need to handle NEGATE_EXPR to eventually form fnms. */
+ if (use_code != PLUS_EXPR
+ && use_code != MINUS_EXPR)
+ return false;
+
+ /* For now restrict this operations to single basic blocks. In theory
+ we would want to support sinking the multiplication in
+ m = a*b;
+ if ()
+ ma = m + c;
+ else
+ d = m;
+ to form a fma in the then block and sink the multiplication to the
+ else block. */
+ if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
+ return false;
+
+ /* We can't handle a * b + a * b. */
+ if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
+ return false;
+
+ /* If the target doesn't support a * b - c then drop the ball. */
+ if (gimple_assign_rhs1 (use_stmt) == mul_result
+ && use_code == MINUS_EXPR
+ && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return false;
+
+ /* If the target doesn't support -a * b + c then drop the ball. */
+ if (gimple_assign_rhs2 (use_stmt) == mul_result
+ && use_code == MINUS_EXPR
+ && optab_handler (fnma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return false;
+
+ /* We don't yet generate -a * b - c below yet. */
+ }
+
+ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
+ {
+ tree addop, mulop1;
+ gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
+
+ mulop1 = gimple_assign_rhs1 (mul_stmt);
+ if (gimple_assign_rhs1 (use_stmt) == mul_result)
+ {
+ addop = gimple_assign_rhs2 (use_stmt);
+ /* a * b - c -> a * b + (-c) */
+ if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
+ addop = force_gimple_operand_gsi (&gsi,
+ build1 (NEGATE_EXPR,
+ type, addop),
+ true, NULL_TREE, true,
+ GSI_SAME_STMT);
+ }
+ else
+ {
+ addop = gimple_assign_rhs1 (use_stmt);
+ /* a - b * c -> (-b) * c + a */
+ if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
+ mulop1 = force_gimple_operand_gsi (&gsi,
+ build1 (NEGATE_EXPR,
+ type, mulop1),
+ true, NULL_TREE, true,
+ GSI_SAME_STMT);
+ }
+
+ fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR,
+ gimple_assign_lhs (use_stmt),
+ mulop1,
+ gimple_assign_rhs2 (mul_stmt),
+ addop);
+ gsi_replace (&gsi, fma_stmt, true);
+ }
+
+ return true;
+}
+
/* Find integer multiplications where the operands are extended from
smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
where appropriate. */
static unsigned int
execute_optimize_widening_mul (void)
{
- bool changed = false;
basic_block bb;
FOR_EACH_BB (bb)
{
gimple_stmt_iterator gsi;
- for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
{
gimple stmt = gsi_stmt (gsi);
enum tree_code code;
- if (!is_gimple_assign (stmt))
- continue;
+ if (is_gimple_assign (stmt))
+ {
+ code = gimple_assign_rhs_code (stmt);
+ switch (code)
+ {
+ case MULT_EXPR:
+ if (!convert_mult_to_widen (stmt)
+ && convert_mult_to_fma (stmt))
+ {
+ gsi_remove (&gsi, true);
+ release_defs (stmt);
+ continue;
+ }
+ break;
+
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ convert_plusminus_to_widen (&gsi, stmt, code);
+ break;
- code = gimple_assign_rhs_code (stmt);
- if (code == MULT_EXPR)
- changed |= convert_mult_to_widen (stmt);
- else if (code == PLUS_EXPR || code == MINUS_EXPR)
- changed |= convert_plusminus_to_widen (&gsi, stmt, code);
+ default:;
+ }
+ }
+ gsi_next (&gsi);
}
}
- return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
- | TODO_verify_stmts : 0);
+ return 0;
}
static bool
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
- 0 /* todo_flags_finish */
+ TODO_verify_ssa
+ | TODO_verify_stmts
+ | TODO_dump_func
+ | TODO_update_ssa /* todo_flags_finish */
}
};
/* Function vectorizable_operation.
- Check if STMT performs a binary or unary operation that can be vectorized.
+ Check if STMT performs a binary, unary or ternary operation that can
+ be vectorized.
If VEC_STMT is also passed, vectorize the STMT: create a vectorized
stmt to replace it, put it in VEC_STMT, and insert it at BSI.
Return FALSE if not a vectorizable STMT, TRUE otherwise. */
{
tree vec_dest;
tree scalar_dest;
- tree op0, op1 = NULL;
+ tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
int icode;
tree def;
gimple def_stmt;
- enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
+ enum vect_def_type dt[3]
+ = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
gimple new_stmt = NULL;
stmt_vec_info prev_stmt_info;
int nunits_in;
tree vectype_out;
int ncopies;
int j, i;
- VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
- tree vop0, vop1;
+ VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
+ tree vop0, vop1, vop2;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
int vf;
/* Support only unary or binary operations. */
op_type = TREE_CODE_LENGTH (code);
- if (op_type != unary_op && op_type != binary_op)
+ if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
{
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
+ fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
+ op_type);
return false;
}
if (nunits_out != nunits_in)
return false;
- if (op_type == binary_op)
+ if (op_type == binary_op || op_type == ternary_op)
{
op1 = gimple_assign_rhs2 (stmt);
if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
return false;
}
}
+ if (op_type == ternary_op)
+ {
+ op2 = gimple_assign_rhs3 (stmt);
+ if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
+ &dt[2]))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "use not simple.");
+ return false;
+ }
+ }
if (loop_vinfo)
vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|| code == RROTATE_EXPR)
return false;
- optab = optab_for_tree_code (code, vectype, optab_default);
+ optab = optab_for_tree_code (code, vectype, optab_default);
/* Supportable by target? */
if (!optab)
if (!slp_node)
{
vec_oprnds0 = VEC_alloc (tree, heap, 1);
- if (op_type == binary_op)
+ if (op_type == binary_op || op_type == ternary_op)
vec_oprnds1 = VEC_alloc (tree, heap, 1);
+ if (op_type == ternary_op)
+ vec_oprnds2 = VEC_alloc (tree, heap, 1);
}
/* In case the vectorization factor (VF) is bigger than the number
/* Handle uses. */
if (j == 0)
{
- if (op_type == binary_op)
+ if (op_type == binary_op || op_type == ternary_op)
vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
slp_node);
else
vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
slp_node);
+ if (op_type == ternary_op)
+ {
+ vec_oprnds2 = VEC_alloc (tree, heap, 1);
+ VEC_quick_push (tree, vec_oprnds2,
+ vect_get_vec_def_for_operand (op2, stmt, NULL));
+ }
}
else
- vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
+ {
+ vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
+ if (op_type == ternary_op)
+ {
+ tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
+ VEC_quick_push (tree, vec_oprnds2,
+ vect_get_vec_def_for_stmt_copy (dt[2],
+ vec_oprnd));
+ }
+ }
/* Arguments are ready. Create the new vector stmt. */
FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
{
- vop1 = ((op_type == binary_op)
- ? VEC_index (tree, vec_oprnds1, i) : NULL);
- new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
+ vop1 = ((op_type == binary_op || op_type == ternary_op)
+ ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
+ vop2 = ((op_type == ternary_op)
+ ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
+ new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
+ vop0, vop1, vop2);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
VEC_free (tree, heap, vec_oprnds0);
if (vec_oprnds1)
VEC_free (tree, heap, vec_oprnds1);
+ if (vec_oprnds2)
+ VEC_free (tree, heap, vec_oprnds2);
return true;
}
is subtracted from t3. */
DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_plus_expr", tcc_expression, 3)
+/* Fused multiply-add.
+ All operands and the result are of the same type. No intermediate
+ rounding is performed after multiplying operand one with operand two
+ before adding operand three. */
+DEFTREECODE (FMA_EXPR, "fma_expr", tcc_expression, 3)
+
/* Whole vector left/right shift in bits.
Operand 0 is a vector to be shifted.
Operand 1 is an integer shift amount in bits. */
extern void fold_undefer_overflow_warnings (bool, const_gimple, int);
extern void fold_undefer_and_ignore_overflow_warnings (void);
extern bool fold_deferring_overflow_warnings_p (void);
+extern tree fold_fma (location_t, tree, tree, tree, tree);
enum operand_equal_flag
{