+2009-07-14 Uros Bizjak <ubizjak@gmail.com>
+
+ * config/i386/sse.md (copysign<mode>3): New expander.
+ * config/i386/i386-protos.h (ix86_build_signbit_mask): New prototype.
+ * config/i386/i386.c (ix86_build_signbit_mask): Make public.
+ Use ix86_build_const_vector.
+ (enum ix86_builtins): Add IX86_BUILTIN_CPYSGNPS and
+ IX86_BUILTIN_CPYSGNPD.
+ (builtin_description): Add __builtin_ia32_copysignps and
+ __builtin_ia32_copysignpd.
+ (ix86_builtin_vectorized_function): Handle BUILT_IN_COPYSIGN
+ and BUILT_IN_COPYSIGNF.
+
2009-07-13 Jason Merrill <jason@redhat.com>
* builtins.c (can_trust_pointer_alignment): New fn.
* dwarf2.out.c (gen_type_die_with_usage): Added comment.
2009-07-14 Richard Guenther <rguenther@suse.de>
+ Andrey Belevantsev <abel@ispras.ru>
PR middle-end/40745
* cfgexpand.c (partition_stack_vars): Do not bother to update
2009-07-13 Ghassan Shobaki <ghassan.shobaki@amd.com>
- * haifa-sched.c
- (rank_for_schedule): Introduced flags to enable/disable
- individual scheduling heuristics.
- * common.opt: Introduced flags to enable/disable
- individual heuristics in the scheduler.
- * doc/invoke.texi: Introduced flags to enable/disable
- individual heuristics in the scheduler.
+ * haifa-sched.c (rank_for_schedule): Introduced flags to
+ enable/disable individual scheduling heuristics.
+ * common.opt: Introduced flags to enable/disable individual
+ heuristics in the scheduler.
+ * doc/invoke.texi: Introduced flags to enable/disable individual
+ heuristics in the scheduler.
2009-07-13 Kai Tietz <kai.tietz@onevision.com>
extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
rtx[]);
extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
+extern rtx ix86_build_signbit_mask (enum machine_mode, bool, bool);
extern void ix86_split_convert_uns_si_sse (rtx[]);
extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
extern void ix86_expand_convert_uns_sixf_sse (rtx, rtx);
all elements of the vector register. If INVERT is true, then create
a mask excluding the sign bit. */
-static rtx
+rtx
ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
{
enum machine_mode vec_mode, imode;
op0 = CONST0_RTX (vmode);
else
{
- rtvec v;
-
- if (mode == SFmode)
- v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
- CONST0_RTX (SFmode), CONST0_RTX (SFmode));
- else
- v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
+ rtx v = ix86_build_const_vector (mode, false, op0);
- op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
+ op0 = force_reg (vmode, v);
}
}
else if (op0 != CONST0_RTX (mode))
IX86_BUILTIN_FABSQ,
IX86_BUILTIN_COPYSIGNQ,
+ /* Vectorizer support builtins. */
+ IX86_BUILTIN_CPYSGNPS,
+ IX86_BUILTIN_CPYSGNPD,
+
/* SSE5 instructions */
IX86_BUILTIN_FMADDSS,
IX86_BUILTIN_FMADDSD,
{ OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
break;
+ case BUILT_IN_COPYSIGN:
+ if (out_mode == DFmode && out_n == 2
+ && in_mode == DFmode && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
+ break;
+
+ case BUILT_IN_COPYSIGNF:
+ if (out_mode == SFmode && out_n == 4
+ && in_mode == SFmode && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
+ break;
+
default:
;
}
[(set_attr "type" "sselog")
(set_attr "mode" "<MODE>")])
+(define_expand "copysign<mode>3"
+ [(set (match_dup 5)
+ (and:SSEMODEF2P (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_dup 3)))
+ (set (match_dup 6)
+ (and:SSEMODEF2P (match_operand:SSEMODEF2P 2 "register_operand" "")
+ (match_dup 4)))
+ (set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (ior:SSEMODEF2P (match_dup 5) (match_dup 6)))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+{
+ int i;
+
+ for (i = 3; i < 7; i++)
+ operands[i] = gen_reg_rtx (<MODE>mode);
+
+ operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 1);
+ operands[4] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
+})
+
;; Also define scalar versions. These are used for abs, neg, and
;; conditional move. Using subregs into vector modes causes register
;; allocation lossage. These patterns do not allow memory operands
+2009-07-14 Uros Bizjak <ubizjak@gmail.com>
+
+ * gcc.target/i386/sse-copysignf-vec.c: New test.
+ * gcc.target/i386/sse2-copysign-vec.c: Ditto.
+
2009-07-14 Jason Merrill <jason@redhat.com>
PR c++/37276
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -msse" } */
+
+#include "sse-check.h"
+
+extern float copysignf (float, float);
+
+#define N 16
+
+float a[N] = {-0.1f,-3.2f,-6.3f,-9.4f,-12.5f,-15.6f,-18.7f,-21.8f,24.9f,27.1f,30.2f,33.3f,36.4f,39.5f,42.6f,45.7f};
+float b[N] = {-1.2f,3.4f,-5.6f,7.8f,-9.0f,1.0f,-2.0f,3.0f,-4.0f,-5.0f,6.0f,7.0f,-8.0f,-9.0f,10.0f,11.0f};
+float r[N];
+
+static void
+sse_test (void)
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ r[i] = copysignf (a[i], b[i]);
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ if (r[i] != copysignf (a[i], b[i]))
+ abort ();
+}
+
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+#include "sse2-check.h"
+
+extern double copysign (double, double);
+
+#define N 16
+
+double a[N] = {-0.1,-3.2,-6.3,-9.4,-12.5,-15.6,-18.7,-21.8,24.9,27.1,30.2,33.3,36.4,39.5,42.6,45.7};
+double b[N] = {-1.2,3.4,-5.6,7.8,-9.0,1.0,-2.0,3.0,-4.0,-5.0,6.0,7.0,-8.0,-9.0,10.0,11.0};
+double r[N];
+
+static void
+sse2_test (void)
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ r[i] = copysign (a[i], b[i]);
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ if (r[i] != copysign (a[i], b[i]))
+ abort ();
+}
+