From 76405cce3bd251fc742938b1b9cb6c5c7d81d566 Mon Sep 17 00:00:00 2001 From: rth Date: Thu, 14 Apr 2005 17:46:08 +0000 Subject: [PATCH] * config/i386/i386.c (ix86_expand_sse_cmp): Split out from ... (ix86_expand_sse_movcc): ... here. Take cmp as a pre-computed register. (ix86_expand_fp_movcc): Update to match. (ix86_expand_fp_vcond, ix86_expand_int_vcond): New. * config/i386/i386-protos.h: Update. * config/i386/sse.md (vcondv4sf, vcondv2df): New. (vcond, vcondu): New. * lib/target-supports.exp (check_effective_target_vect_condition): Add ia64, i?86, and x86_64. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@98146 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 11 +++ gcc/config/i386/i386-protos.h | 2 + gcc/config/i386/i386.c | 162 ++++++++++++++++++++++++++++++---- gcc/config/i386/sse.md | 64 ++++++++++++++ gcc/testsuite/ChangeLog | 5 ++ gcc/testsuite/lib/target-supports.exp | 5 +- 6 files changed, 233 insertions(+), 16 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7a08b47..ba76cb6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2005-04-14 Richard Henderson + + * config/i386/i386.c (ix86_expand_sse_cmp): Split out from ... + (ix86_expand_sse_movcc): ... here. Take cmp as a pre-computed + register. + (ix86_expand_fp_movcc): Update to match. + (ix86_expand_fp_vcond, ix86_expand_int_vcond): New. + * config/i386/i386-protos.h: Update. + * config/i386/sse.md (vcondv4sf, vcondv2df): New. + (vcond, vcondu): New. + 2005-04-14 Joseph S. Myers * doc/cpp.texi, doc/install.texi: Change references to GCC 3.5 to diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 41a70a8..4834071 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -150,6 +150,8 @@ extern void ix86_expand_branch (enum rtx_code, rtx); extern int ix86_expand_setcc (enum rtx_code, rtx); extern int ix86_expand_int_movcc (rtx[]); extern int ix86_expand_fp_movcc (rtx[]); +extern bool ix86_expand_fp_vcond (rtx[]); +extern bool ix86_expand_int_vcond (rtx[], bool); extern int ix86_expand_int_addcc (rtx[]); extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int); extern void x86_initialize_trampoline (rtx, rtx, rtx); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e82cefc..2c68075 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -10141,12 +10141,14 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, return true; } -static void -ix86_expand_sse_movcc (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, - rtx op_true, rtx op_false) +/* Expand an sse vector comparison. Return the register with the result. */ + +static rtx +ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, + rtx op_true, rtx op_false) { enum machine_mode mode = GET_MODE (dest); - rtx t1, t2, t3, x; + rtx x; cmp_op0 = force_reg (mode, cmp_op0); if (!nonimmediate_operand (cmp_op1, mode)) @@ -10155,24 +10157,33 @@ ix86_expand_sse_movcc (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, if (optimize || reg_overlap_mentioned_p (dest, op_true) || reg_overlap_mentioned_p (dest, op_false)) - t1 = gen_reg_rtx (mode); - else - t1 = dest; + dest = gen_reg_rtx (mode); x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1); - gcc_assert (sse_comparison_operator (x, VOIDmode)); - emit_insn (gen_rtx_SET (VOIDmode, t1, x)); + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + + return dest; +} + +/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical + operations. This is used for both scalar and vector conditional moves. */ + +static void +ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) +{ + enum machine_mode mode = GET_MODE (dest); + rtx t2, t3, x; if (op_false == CONST0_RTX (mode)) { op_true = force_reg (mode, op_true); - x = gen_rtx_AND (mode, t1, op_true); + x = gen_rtx_AND (mode, cmp, op_true); emit_insn (gen_rtx_SET (VOIDmode, dest, x)); } else if (op_true == CONST0_RTX (mode)) { op_false = force_reg (mode, op_false); - x = gen_rtx_NOT (mode, t1); + x = gen_rtx_NOT (mode, cmp); x = gen_rtx_AND (mode, x, op_false); emit_insn (gen_rtx_SET (VOIDmode, dest, x)); } @@ -10187,10 +10198,10 @@ ix86_expand_sse_movcc (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, else t3 = dest; - x = gen_rtx_AND (mode, op_true, t1); + x = gen_rtx_AND (mode, op_true, cmp); emit_insn (gen_rtx_SET (VOIDmode, t2, x)); - x = gen_rtx_NOT (mode, t1); + x = gen_rtx_NOT (mode, cmp); x = gen_rtx_AND (mode, x, op_false); emit_insn (gen_rtx_SET (VOIDmode, t3, x)); @@ -10199,6 +10210,8 @@ ix86_expand_sse_movcc (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, } } +/* Expand a floating-point conditional move. Return true if successful. */ + int ix86_expand_fp_movcc (rtx operands[]) { @@ -10230,8 +10243,9 @@ ix86_expand_fp_movcc (rtx operands[]) operands[3])) return 1; - ix86_expand_sse_movcc (operands[0], code, ix86_compare_op0, - ix86_compare_op1, operands[2], operands[3]); + tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0, + ix86_compare_op1, operands[2], operands[3]); + ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]); return 1; } @@ -10282,6 +10296,124 @@ ix86_expand_fp_movcc (rtx operands[]) return 1; } +/* Expand a floating-point vector conditional move; a vcond operation + rather than a movcc operation. */ + +bool +ix86_expand_fp_vcond (rtx operands[]) +{ + enum rtx_code code = GET_CODE (operands[3]); + rtx cmp; + + code = ix86_prepare_sse_fp_compare_args (operands[0], code, + &operands[4], &operands[5]); + if (code == UNKNOWN) + return false; + + if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], + operands[5], operands[1], operands[2])) + return true; + + cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], + operands[1], operands[2]); + ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); + return true; +} + +/* Expand a signed integral vector conditional move. */ + +bool +ix86_expand_int_vcond (rtx operands[], bool unsignedp) +{ + enum machine_mode mode = GET_MODE (operands[0]); + enum rtx_code code = GET_CODE (operands[3]); + rtx cmp, x; + + if (unsignedp) + code = signed_condition (code); + if (code == NE || code == LE || code == GE) + { + /* Inverse of a supported code. */ + x = operands[1]; + operands[1] = operands[2]; + operands[2] = x; + code = reverse_condition (code); + } + if (code == LT) + { + /* Swap of a supported code. */ + x = operands[4]; + operands[4] = operands[5]; + operands[5] = x; + code = swap_condition (code); + } + gcc_assert (code == EQ || code == GT); + + /* Unlike floating-point, we can rely on the optimizers to have already + converted to MIN/MAX expressions, so we don't have to handle that. */ + + /* Unsigned GT is not directly supported. We can zero-extend QI and + HImode elements to the next wider element size, use a signed compare, + then repack. For three extra instructions, this is definitely a win. */ + if (code == GT && unsignedp) + { + rtx o0l, o0h, o1l, o1h, cl, ch, zero; + enum machine_mode wider; + rtx (*unpackl) (rtx, rtx, rtx); + rtx (*unpackh) (rtx, rtx, rtx); + rtx (*pack) (rtx, rtx, rtx); + + switch (mode) + { + case V16QImode: + wider = V8HImode; + unpackl = gen_sse2_punpcklbw; + unpackh = gen_sse2_punpckhbw; + pack = gen_sse2_packsswb; + break; + case V8HImode: + wider = V4SImode; + unpackl = gen_sse2_punpcklwd; + unpackh = gen_sse2_punpckhwd; + pack = gen_sse2_packssdw; + break; + default: + gcc_unreachable (); + } + + operands[4] = force_reg (mode, operands[4]); + operands[5] = force_reg (mode, operands[5]); + + o0l = gen_reg_rtx (wider); + o0h = gen_reg_rtx (wider); + o1l = gen_reg_rtx (wider); + o1h = gen_reg_rtx (wider); + cl = gen_reg_rtx (wider); + ch = gen_reg_rtx (wider); + cmp = gen_reg_rtx (mode); + zero = force_reg (mode, CONST0_RTX (mode)); + + emit_insn (unpackl (gen_lowpart (mode, o0l), operands[4], zero)); + emit_insn (unpackh (gen_lowpart (mode, o0h), operands[4], zero)); + emit_insn (unpackl (gen_lowpart (mode, o1l), operands[5], zero)); + emit_insn (unpackh (gen_lowpart (mode, o1h), operands[5], zero)); + + x = gen_rtx_GT (wider, o0l, o1l); + emit_insn (gen_rtx_SET (VOIDmode, cl, x)); + + x = gen_rtx_GT (wider, o0h, o1h); + emit_insn (gen_rtx_SET (VOIDmode, ch, x)); + + emit_insn (pack (cmp, cl, ch)); + } + else + cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], + operands[1], operands[2]); + + ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); + return true; +} + /* Expand conditional increment or decrement using adb/sbb instructions. The default case using setcc followed by the conditional move can be done by generic code. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index eca5486..0f6c7ea 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -710,6 +710,22 @@ [(set_attr "type" "ssecomi") (set_attr "mode" "SF")]) +(define_expand "vcondv4sf" + [(set (match_operand:V4SF 0 "register_operand" "") + (if_then_else:V4SF + (match_operator 3 "" + [(match_operand:V4SF 4 "nonimmediate_operand" "") + (match_operand:V4SF 5 "nonimmediate_operand" "")]) + (match_operand:V4SF 1 "general_operand" "") + (match_operand:V4SF 2 "general_operand" "")))] + "TARGET_SSE" +{ + if (ix86_expand_fp_vcond (operands)) + DONE; + else + FAIL; +}) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel single-precision floating point logical operations @@ -1648,6 +1664,22 @@ [(set_attr "type" "ssecomi") (set_attr "mode" "DF")]) +(define_expand "vcondv2df" + [(set (match_operand:V2DF 0 "register_operand" "") + (if_then_else:V2DF + (match_operator 3 "" + [(match_operand:V2DF 4 "nonimmediate_operand" "") + (match_operand:V2DF 5 "nonimmediate_operand" "")]) + (match_operand:V2DF 1 "general_operand" "") + (match_operand:V2DF 2 "general_operand" "")))] + "TARGET_SSE2" +{ + if (ix86_expand_fp_vcond (operands)) + DONE; + else + FAIL; +}) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel double-precision floating point logical operations @@ -2602,6 +2634,38 @@ [(set_attr "type" "ssecmp") (set_attr "mode" "TI")]) +(define_expand "vcond" + [(set (match_operand:SSEMODE124 0 "register_operand" "") + (if_then_else:SSEMODE124 + (match_operator 3 "" + [(match_operand:SSEMODE124 4 "nonimmediate_operand" "") + (match_operand:SSEMODE124 5 "nonimmediate_operand" "")]) + (match_operand:SSEMODE124 1 "general_operand" "") + (match_operand:SSEMODE124 2 "general_operand" "")))] + "TARGET_SSE2" +{ + if (ix86_expand_int_vcond (operands, false)) + DONE; + else + FAIL; +}) + +(define_expand "vcondu" + [(set (match_operand:SSEMODE12 0 "register_operand" "") + (if_then_else:SSEMODE12 + (match_operator 3 "" + [(match_operand:SSEMODE12 4 "nonimmediate_operand" "") + (match_operand:SSEMODE12 5 "nonimmediate_operand" "")]) + (match_operand:SSEMODE12 1 "general_operand" "") + (match_operand:SSEMODE12 2 "general_operand" "")))] + "TARGET_SSE2" +{ + if (ix86_expand_int_vcond (operands, true)) + DONE; + else + FAIL; +}) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel integral logical operations diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ef5ebd4..cb73cde 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2005-04-14 Richard Henderson + + * lib/target-supports.exp (check_effective_target_vect_condition): + Add ia64, i?86, and x86_64. + 2005-04-14 Steven G. Kargl * gfortran.dg/underflow.f90: Use tiny(x)/huge(x). diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 52200e9..784c435 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -844,7 +844,10 @@ proc check_effective_target_vect_condition { } { verbose "check_effective_target_vect_cond: using cached result" 2 } else { set et_vect_cond_saved 0 - if { [istarget powerpc*-*-*] } { + if { [istarget powerpc*-*-*] + || [istarget ia64-*-*] + || [istarget i?86-*-*] + || [istarget x86_64-*-*] } { set et_vect_cond_saved 1 } } -- 2.7.4