From 36d387f2f098d6d1e1e77d19e38f3b6221ccde2b Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 7 Oct 2019 20:41:17 +0200 Subject: [PATCH] i386-expand.c (ix86_expand_floorceildf_32, [...]): Reorder functions. * config/i386/i386-expand.c (ix86_expand_floorceildf_32, ix86_expand_rounddf_32): Reorder functions. * config/i386/i386-protos.h: Update.. From-SVN: r276668 --- gcc/ChangeLog | 14 ++- gcc/config/i386/i386-expand.c | 220 +++++++++++++++++++++--------------------- gcc/config/i386/i386-protos.h | 6 +- 3 files changed, 124 insertions(+), 116 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 74afcaa..bbdce86 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2019-10-07 Uroš Bizjak + + * config/i386/i386-expand.c (ix86_expand_floorceildf_32, + ix86_expand_rounddf_32): Reorder functions. + * config/i386/i386-protos.h: Update.. + 2019-10-07 Jozef Lawrynowicz * config.in: Regenerate. @@ -20,14 +26,14 @@ ".lower" prefix if -m{code,data}-region=lower have been passed. (msp430_output_aligned_decl_common): Emit common symbols when -mdata-region=lower is passed unless TARGET_USE_LOWER_REGION_PREFIX is - set. + set. (TARGET_ASM_FILE_END): Define. (msp430_file_end): New function. (msp430_do_not_relax_short_jumps): Allow relaxation when function will be in the lower region. (msp430_op_not_in_high_mem): New function. (msp430_print_operand): Check "msp430_op_not_in_high_mem" for - the 'X' operand selector. + the 'X' operand selector. Clarify comment for 'x' operand selector. * config/msp430/msp430.h (LINK_SPEC): Propagate -m{code,data}-region to the linker via spec function @@ -74,9 +80,9 @@ (mdata-region=): Likewise. (muse-lower-region-prefix): New option. * config/msp430/t-msp430 (MULTILIB_OPTIONS): Add - mdata-region=none multilib. + mdata-region=none multilib. (MULTILIB_MATCHES): Set mdata-region={upper,either} to match - mdata-region=none multilib. + mdata-region=none multilib. MULTILIB_EXCEPTIONS: Remove. MULTILIB_REQUIRED: Define. * configure: Regenerate. diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 5e377d6..6d3d14c 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -15903,71 +15903,8 @@ ix86_expand_rint (rtx operand0, rtx operand1) emit_move_insn (operand0, res); } -/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing - into OPERAND0. */ -void -ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor) -{ - /* C code for the stuff we expand below. - double xa = fabs (x), x2; - if (!isless (xa, TWO52)) - return x; - xa = xa + TWO52 - TWO52; - x2 = copysign (xa, x); - Compensate. Floor: - if (x2 > x) - x2 -= 1; - Compensate. Ceil: - if (x2 < x) - x2 += 1; - if (HONOR_SIGNED_ZEROS (mode)) - x2 = copysign (x2, x); - return x2; - */ - machine_mode mode = GET_MODE (operand0); - rtx xa, TWO52, tmp, one, res, mask; - rtx_code_label *label; - - TWO52 = ix86_gen_TWO52 (mode); - - /* Temporary for holding the result, initialized to the input - operand to ease control flow. */ - res = gen_reg_rtx (mode); - emit_move_insn (res, operand1); - - /* xa = abs (operand1) */ - xa = ix86_expand_sse_fabs (res, &mask); - - /* if (!isless (xa, TWO52)) goto label; */ - label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); - - /* xa = xa + TWO52 - TWO52; */ - xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); - xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT); - - /* xa = copysign (xa, operand1) */ - ix86_sse_copysign_to_positive (xa, xa, res, mask); - - /* generate 1.0 */ - one = force_reg (mode, const_double_from_real_value (dconst1, mode)); - - /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ - tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); - emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); - tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS, - xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); - if (!do_floor && HONOR_SIGNED_ZEROS (mode)) - ix86_sse_copysign_to_positive (tmp, tmp, res, mask); - emit_move_insn (res, tmp); - - emit_label (label); - LABEL_NUSES (label) = 1; - - emit_move_insn (operand0, res); -} - -/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing - into OPERAND0. */ +/* Expand SSE2 sequence for computing floor or ceil + from OPERAND1 storing into OPERAND0. */ void ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor) { @@ -16027,30 +15964,30 @@ ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor) emit_move_insn (operand0, res); } -/* Expand SSE sequence for computing round from OPERAND1 storing - into OPERAND0. Sequence that works without relying on DImode truncation - via cvttsd2siq that is only available on 64bit targets. */ +/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing + into OPERAND0 without relying on DImode truncation via cvttsd2siq + that is only available on 64bit targets. */ void -ix86_expand_rounddf_32 (rtx operand0, rtx operand1) +ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor) { /* C code for the stuff we expand below. - double xa = fabs (x), xa2, x2; + double xa = fabs (x), x2; if (!isless (xa, TWO52)) return x; - Using the absolute value and copying back sign makes - -0.0 -> -0.0 correct. - xa2 = xa + TWO52 - TWO52; - Compensate. - dxa = xa2 - xa; - if (dxa <= -0.5) - xa2 += 1; - else if (dxa > 0.5) - xa2 -= 1; - x2 = copysign (xa2, x); - return x2; + xa = xa + TWO52 - TWO52; + x2 = copysign (xa, x); + Compensate. Floor: + if (x2 > x) + x2 -= 1; + Compensate. Ceil: + if (x2 < x) + x2 += 1; + if (HONOR_SIGNED_ZEROS (mode)) + x2 = copysign (x2, x); + return x2; */ machine_mode mode = GET_MODE (operand0); - rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask; + rtx xa, TWO52, tmp, one, res, mask; rtx_code_label *label; TWO52 = ix86_gen_TWO52 (mode); @@ -16066,31 +16003,24 @@ ix86_expand_rounddf_32 (rtx operand0, rtx operand1) /* if (!isless (xa, TWO52)) goto label; */ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); - /* xa2 = xa + TWO52 - TWO52; */ - xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); - xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT); - - /* dxa = xa2 - xa; */ - dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT); + /* xa = xa + TWO52 - TWO52; */ + xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); + xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT); - /* generate 0.5, 1.0 and -0.5 */ - half = force_reg (mode, const_double_from_real_value (dconsthalf, mode)); - one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT); - mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX, - 0, OPTAB_DIRECT); + /* xa = copysign (xa, operand1) */ + ix86_sse_copysign_to_positive (xa, xa, res, mask); - /* Compensate. */ - /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */ - tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false); - emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, tmp, one))); - xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); - /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */ - tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false); - emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, tmp, one))); - xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); + /* generate 1.0 */ + one = force_reg (mode, const_double_from_real_value (dconst1, mode)); - /* res = copysign (xa2, operand1) */ - ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask); + /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ + tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); + emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); + tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS, + xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); + if (!do_floor && HONOR_SIGNED_ZEROS (mode)) + ix86_sse_copysign_to_positive (tmp, tmp, res, mask); + emit_move_insn (res, tmp); emit_label (label); LABEL_NUSES (label) = 1; @@ -16098,8 +16028,8 @@ ix86_expand_rounddf_32 (rtx operand0, rtx operand1) emit_move_insn (operand0, res); } -/* Expand SSE sequence for computing trunc from OPERAND1 storing - into OPERAND0. */ +/* Expand SSE sequence for computing trunc + from OPERAND1 storing into OPERAND0. */ void ix86_expand_trunc (rtx operand0, rtx operand1) { @@ -16144,7 +16074,8 @@ ix86_expand_trunc (rtx operand0, rtx operand1) } /* Expand SSE sequence for computing trunc from OPERAND1 storing - into OPERAND0. */ + into OPERAND0 without relying on DImode truncation via cvttsd2siq + that is only available on 64bit targets. */ void ix86_expand_truncdf_32 (rtx operand0, rtx operand1) { @@ -16201,8 +16132,8 @@ ix86_expand_truncdf_32 (rtx operand0, rtx operand1) emit_move_insn (operand0, res); } -/* Expand SSE sequence for computing round from OPERAND1 storing - into OPERAND0. */ +/* Expand SSE sequence for computing round + from OPERAND1 storing into OPERAND0. */ void ix86_expand_round (rtx operand0, rtx operand1) { @@ -16251,6 +16182,77 @@ ix86_expand_round (rtx operand0, rtx operand1) emit_move_insn (operand0, res); } +/* Expand SSE sequence for computing round from OPERAND1 storing + into OPERAND0 without relying on DImode truncation via cvttsd2siq + that is only available on 64bit targets. */ +void +ix86_expand_rounddf_32 (rtx operand0, rtx operand1) +{ + /* C code for the stuff we expand below. + double xa = fabs (x), xa2, x2; + if (!isless (xa, TWO52)) + return x; + Using the absolute value and copying back sign makes + -0.0 -> -0.0 correct. + xa2 = xa + TWO52 - TWO52; + Compensate. + dxa = xa2 - xa; + if (dxa <= -0.5) + xa2 += 1; + else if (dxa > 0.5) + xa2 -= 1; + x2 = copysign (xa2, x); + return x2; + */ + machine_mode mode = GET_MODE (operand0); + rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask; + rtx_code_label *label; + + TWO52 = ix86_gen_TWO52 (mode); + + /* Temporary for holding the result, initialized to the input + operand to ease control flow. */ + res = gen_reg_rtx (mode); + emit_move_insn (res, operand1); + + /* xa = abs (operand1) */ + xa = ix86_expand_sse_fabs (res, &mask); + + /* if (!isless (xa, TWO52)) goto label; */ + label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); + + /* xa2 = xa + TWO52 - TWO52; */ + xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); + xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT); + + /* dxa = xa2 - xa; */ + dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT); + + /* generate 0.5, 1.0 and -0.5 */ + half = force_reg (mode, const_double_from_real_value (dconsthalf, mode)); + one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT); + mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX, + 0, OPTAB_DIRECT); + + /* Compensate. */ + /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */ + tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false); + emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, tmp, one))); + xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); + /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */ + tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false); + emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, tmp, one))); + xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); + + /* res = copysign (xa2, operand1) */ + ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask); + + emit_label (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operand0, res); +} + /* Expand SSE sequence for computing round from OP1 storing into OP0 using sse4 round insn. */ void diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 4d6e76d..c07dfe5 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -193,11 +193,11 @@ extern void ix86_expand_lfloorceil (rtx, rtx, bool); extern void ix86_expand_rint (rtx, rtx); extern void ix86_expand_floorceil (rtx, rtx, bool); extern void ix86_expand_floorceildf_32 (rtx, rtx, bool); -extern void ix86_expand_round_sse4 (rtx, rtx); -extern void ix86_expand_round (rtx, rtx); -extern void ix86_expand_rounddf_32 (rtx, rtx); extern void ix86_expand_trunc (rtx, rtx); extern void ix86_expand_truncdf_32 (rtx, rtx); +extern void ix86_expand_round (rtx, rtx); +extern void ix86_expand_rounddf_32 (rtx, rtx); +extern void ix86_expand_round_sse4 (rtx, rtx); extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, rtx, rtx); -- 2.7.4