From 7e8d812e44fdbe205c18f462aa3977960d174728 Mon Sep 17 00:00:00 2001 From: rth Date: Thu, 5 Aug 2010 19:41:31 +0000 Subject: [PATCH] Replace exact_log2(x & -x) in favor of more direct computation. * toplev.h (ctz_hwi, clz_hwi, ffs_hwi): New. (floor_log2): Use clz_hwi. (exact_log2): Use ctz_hwi. * toplev.c (ctz_hwi, clz_hwi, ffs_hwi): New. * builtins.c (fold_builtin_bitop): Use them. * simplify-rtx.c (simplify_const_unary_operation): Likewise. * combine.c (get_pos_from_mask): Use ctz_hwi. * double-int.c (double_int_ctz): Likewise. * explow.c (force_reg): Likewise. * tree.h (SET_DECL_OFFSET_ALIGN): Use ffs_hwi. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@162920 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 13 +++++++++++++ gcc/builtins.c | 12 ++++++------ gcc/combine.c | 2 +- gcc/double-int.c | 10 +--------- gcc/explow.c | 10 +++++++--- gcc/simplify-rtx.c | 23 +++++++++-------------- gcc/toplev.c | 32 +++++++++++++++++++++++++++++--- gcc/toplev.h | 50 ++++++++++++++++++++++++++++++++++++++++++-------- gcc/tree.h | 4 +--- 9 files changed, 109 insertions(+), 47 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0f58431..b7de0ed 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,18 @@ 2010-08-05 Richard Henderson + * toplev.h (ctz_hwi, clz_hwi, ffs_hwi): New. + (floor_log2): Use clz_hwi. + (exact_log2): Use ctz_hwi. + * toplev.c (ctz_hwi, clz_hwi, ffs_hwi): New. + * builtins.c (fold_builtin_bitop): Use them. + * simplify-rtx.c (simplify_const_unary_operation): Likewise. + * combine.c (get_pos_from_mask): Use ctz_hwi. + * double-int.c (double_int_ctz): Likewise. + * explow.c (force_reg): Likewise. + * tree.h (SET_DECL_OFFSET_ALIGN): Use ffs_hwi. + +2010-08-05 Richard Henderson + PR target/45189 * config/i386/i386.c (ix86_emit_save_reg_using_mov): Make sure the alignment constant is properly sign-extended. diff --git a/gcc/builtins.c b/gcc/builtins.c index b20426c..096fec6 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -7676,9 +7676,9 @@ fold_builtin_bitop (tree fndecl, tree arg) { CASE_INT_FN (BUILT_IN_FFS): if (lo != 0) - result = exact_log2 (lo & -lo) + 1; + result = ffs_hwi (lo); else if (hi != 0) - result = HOST_BITS_PER_WIDE_INT + exact_log2 (hi & -hi) + 1; + result = HOST_BITS_PER_WIDE_INT + ffs_hwi (hi); else result = 0; break; @@ -7694,9 +7694,9 @@ fold_builtin_bitop (tree fndecl, tree arg) CASE_INT_FN (BUILT_IN_CTZ): if (lo != 0) - result = exact_log2 (lo & -lo); + result = ctz_hwi (lo); else if (hi != 0) - result = HOST_BITS_PER_WIDE_INT + exact_log2 (hi & -hi); + result = HOST_BITS_PER_WIDE_INT + ctz_hwi (hi); else if (! CTZ_DEFINED_VALUE_AT_ZERO (TYPE_MODE (type), result)) result = width; break; @@ -7706,7 +7706,7 @@ fold_builtin_bitop (tree fndecl, tree arg) while (lo) result++, lo &= lo - 1; while (hi) - result++, hi &= hi - 1; + result++, hi &= (unsigned HOST_WIDE_INT) hi - 1; break; CASE_INT_FN (BUILT_IN_PARITY): @@ -7714,7 +7714,7 @@ fold_builtin_bitop (tree fndecl, tree arg) while (lo) result++, lo &= lo - 1; while (hi) - result++, hi &= hi - 1; + result++, hi &= (unsigned HOST_WIDE_INT) hi - 1; result &= 1; break; diff --git a/gcc/combine.c b/gcc/combine.c index 0725c86..41a0ec1 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -7436,7 +7436,7 @@ static int get_pos_from_mask (unsigned HOST_WIDE_INT m, unsigned HOST_WIDE_INT *plen) { /* Get the bit number of the first 1 bit from the right, -1 if none. */ - int pos = exact_log2 (m & -m); + int pos = m ? ctz_hwi (m) : -1; int len = 0; if (pos >= 0) diff --git a/gcc/double-int.c b/gcc/double-int.c index 29e720b..cb63f85 100644 --- a/gcc/double-int.c +++ b/gcc/double-int.c @@ -859,15 +859,7 @@ double_int_ctz (double_int a) unsigned bits = a.low ? 0 : HOST_BITS_PER_WIDE_INT; if (!w) return HOST_BITS_PER_DOUBLE_INT; -#if (GCC_VERSION >= 3004) - bits += CTZ_HWI (w); -#else - while (!(w & 1)) - { - w >>= 1; - bits += 1; - } -#endif + bits += ctz_hwi (w); return bits; } diff --git a/gcc/explow.c b/gcc/explow.c index 09f786a..6f60b2e 100644 --- a/gcc/explow.c +++ b/gcc/explow.c @@ -707,9 +707,13 @@ force_reg (enum machine_mode mode, rtx x) if (SYMBOL_REF_DECL (s) && DECL_P (SYMBOL_REF_DECL (s))) sa = DECL_ALIGN (SYMBOL_REF_DECL (s)); - ca = exact_log2 (INTVAL (c) & -INTVAL (c)) * BITS_PER_UNIT; - - align = MIN (sa, ca); + if (INTVAL (c) == 0) + align = sa; + else + { + ca = ctz_hwi (INTVAL (c)) * BITS_PER_UNIT; + align = MIN (sa, ca); + } } if (align || (MEM_P (x) && MEM_POINTER (x))) diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 86de77e..a7a91e5 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -1197,10 +1197,8 @@ simplify_const_unary_operation (enum rtx_code code, enum machine_mode mode, break; case FFS: - /* Don't use ffs here. Instead, get low order bit and then its - number. If arg0 is zero, this will return 0, as desired. */ arg0 &= GET_MODE_MASK (mode); - val = exact_log2 (arg0 & (- arg0)) + 1; + val = ffs_hwi (arg0); break; case CLZ: @@ -1221,7 +1219,7 @@ simplify_const_unary_operation (enum rtx_code code, enum machine_mode mode, val = GET_MODE_BITSIZE (mode); } else - val = exact_log2 (arg0 & -arg0); + val = ctz_hwi (arg0); break; case POPCOUNT: @@ -1351,15 +1349,12 @@ simplify_const_unary_operation (enum rtx_code code, enum machine_mode mode, case FFS: hv = 0; - if (l1 == 0) - { - if (h1 == 0) - lv = 0; - else - lv = HOST_BITS_PER_WIDE_INT + exact_log2 (h1 & -h1) + 1; - } + if (l1 != 0) + lv = ffs_hwi (l1); + else if (h1 != 0) + lv = HOST_BITS_PER_WIDE_INT + ffs_hwi (h1); else - lv = exact_log2 (l1 & -l1) + 1; + lv = 0; break; case CLZ: @@ -1376,9 +1371,9 @@ simplify_const_unary_operation (enum rtx_code code, enum machine_mode mode, case CTZ: hv = 0; if (l1 != 0) - lv = exact_log2 (l1 & -l1); + lv = ctz_hwi (l1); else if (h1 != 0) - lv = HOST_BITS_PER_WIDE_INT + exact_log2 (h1 & -h1); + lv = HOST_BITS_PER_WIDE_INT + ctz_hwi (h1); else if (! CTZ_DEFINED_VALUE_AT_ZERO (mode, lv)) lv = GET_MODE_BITSIZE (mode); break; diff --git a/gcc/toplev.c b/gcc/toplev.c index 3836d0c..ff82466 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -484,9 +484,9 @@ set_random_seed (const char *val) #if GCC_VERSION < 3004 -/* The functions floor_log2 and exact_log2 are defined as inline - functions in toplev.h if GCC_VERSION >= 3004. The definitions here - are used for older versions of gcc. */ +/* The functions clz_hwi, ctz_hwi, ffs_hwi, floor_log2 and exact_log2 + are defined as inline functions in toplev.h if GCC_VERSION >= 3004. + The definitions here are used for older versions of gcc. */ /* Given X, an unsigned number, return the largest int Y such that 2**Y <= X. If X is 0, return -1. */ @@ -530,6 +530,32 @@ exact_log2 (unsigned HOST_WIDE_INT x) return floor_log2 (x); } +/* Given X, an unsigned number, return the number of least significant bits + that are zero. When X == 0, the result is the word size. */ + +int +ctz_hwi (unsigned HOST_WIDE_INT x) +{ + return x ? floor_log2 (x & -x) : HOST_BITS_PER_WIDE_INT; +} + +/* Similarly for most significant bits. */ + +int +clz_hwi (unsigned HOST_WIDE_INT x) +{ + return HOST_BITS_PER_WIDE_INT - 1 - floor_log2(x); +} + +/* Similar to ctz_hwi, except that the least significant bit is numbered + starting from 1, and X == 0 yields 0. */ + +int +ffs_hwi (unsigned HOST_WIDE_INT x) +{ + return 1 + floor_log2 (x & -x); +} + #endif /* GCC_VERSION < 3004 */ /* Handler for fatal signals, such as SIGSEGV. These are transformed diff --git a/gcc/toplev.h b/gcc/toplev.h index 44920ed..6de27a0 100644 --- a/gcc/toplev.h +++ b/gcc/toplev.h @@ -111,6 +111,10 @@ extern bool fast_math_flags_struct_set_p (struct cl_optimization *); /* Inline versions of the above for speed. */ #if GCC_VERSION < 3004 +extern int clz_hwi (unsigned HOST_WIDE_INT x); +extern int ctz_hwi (unsigned HOST_WIDE_INT x); +extern int ffs_hwi (unsigned HOST_WIDE_INT x); + /* Return log2, or -1 if not exact. */ extern int exact_log2 (unsigned HOST_WIDE_INT); @@ -119,27 +123,57 @@ extern int floor_log2 (unsigned HOST_WIDE_INT); #else /* GCC_VERSION >= 3004 */ +/* For convenience, define 0 -> word_size. */ +static inline int +clz_hwi (unsigned HOST_WIDE_INT x) +{ + if (x == 0) + return HOST_BITS_PER_WIDE_INT; +# if HOST_BITS_PER_WIDE_INT == HOST_BITS_PER_LONG + return __builtin_clzl (x); +# elif HOST_BITS_PER_WIDE_INT == HOST_BITS_PER_LONGLONG + return __builtin_clzll (x); +# else + return __builtin_clz (x); +# endif +} + +static inline int +ctz_hwi (unsigned HOST_WIDE_INT x) +{ + if (x == 0) + return HOST_BITS_PER_WIDE_INT; +# if HOST_BITS_PER_WIDE_INT == HOST_BITS_PER_LONG + return __builtin_ctzl (x); +# elif HOST_BITS_PER_WIDE_INT == HOST_BITS_PER_LONGLONG + return __builtin_ctzll (x); +# else + return __builtin_ctz (x); +# endif +} + +static inline int +ffs_hwi (unsigned HOST_WIDE_INT x) +{ # if HOST_BITS_PER_WIDE_INT == HOST_BITS_PER_LONG -# define CLZ_HWI __builtin_clzl -# define CTZ_HWI __builtin_ctzl + return __builtin_ffsl (x); # elif HOST_BITS_PER_WIDE_INT == HOST_BITS_PER_LONGLONG -# define CLZ_HWI __builtin_clzll -# define CTZ_HWI __builtin_ctzll + return __builtin_ffsll (x); # else -# define CLZ_HWI __builtin_clz -# define CTZ_HWI __builtin_ctz + return __builtin_ffs (x); # endif +} static inline int floor_log2 (unsigned HOST_WIDE_INT x) { - return x ? HOST_BITS_PER_WIDE_INT - 1 - (int) CLZ_HWI (x) : -1; + return HOST_BITS_PER_WIDE_INT - 1 - clz_hwi (x); } static inline int exact_log2 (unsigned HOST_WIDE_INT x) { - return x == (x & -x) && x ? (int) CTZ_HWI (x) : -1; + return x == (x & -x) && x ? ctz_hwi (x) : -1; } #endif /* GCC_VERSION >= 3004 */ diff --git a/gcc/tree.h b/gcc/tree.h index af8345a..931155c 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -2850,9 +2850,7 @@ struct GTY(()) tree_decl_with_rtl { /* Specify that DECL_ALIGN(NODE) is a multiple of X. */ #define SET_DECL_OFFSET_ALIGN(NODE, X) \ - (FIELD_DECL_CHECK (NODE)->decl_common.off_align = exact_log2 ((X) & -(X))) -/* 1 if the alignment for this type was requested by "aligned" attribute, - 0 if it is the default for this type. */ + (FIELD_DECL_CHECK (NODE)->decl_common.off_align = ffs_hwi (X) - 1) /* For FIELD_DECLS, DECL_FCONTEXT is the *first* baseclass in which this FIELD_DECL is defined. This information is needed when -- 2.7.4