void
find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
- uint64_t *cmpnop)
+ uint64_t *cmpnop, bool *cast64_to_32)
{
unsigned rsize;
uint64_t tmpn, mask;
according to the size of the symbolic number before using it. */
*cmpxchg = CMPXCHG;
*cmpnop = CMPNOP;
+ *cast64_to_32 = false;
/* Find real size of result (highest non-zero byte). */
if (n->base_addr)
if (n->range < (int) sizeof (int64_t))
{
mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
- *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
+ if (n->base_addr == NULL
+ && n->range == 4
+ && int_size_in_bytes (TREE_TYPE (n->src)) == 8)
+ {
+ /* If all bytes in n->n are either 0 or in [5..8] range, this
+ might be a candidate for (unsigned) __builtin_bswap64 (src).
+ It is not worth it for (unsigned short) __builtin_bswap64 (src)
+ or (unsigned short) __builtin_bswap32 (src). */
+ *cast64_to_32 = true;
+ for (tmpn = n->n; tmpn; tmpn >>= BITS_PER_MARKER)
+ if ((tmpn & MARKER_MASK)
+ && ((tmpn & MARKER_MASK) <= 4 || (tmpn & MARKER_MASK) > 8))
+ {
+ *cast64_to_32 = false;
+ break;
+ }
+ }
+ if (*cast64_to_32)
+ *cmpxchg &= mask;
+ else
+ *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
*cmpnop &= mask;
}
n->range = rsize;
}
+ if (*cast64_to_32)
+ n->range = 8;
n->range *= BITS_PER_UNIT;
}
expression. */
gimple *
-find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
+find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap,
+ bool *cast64_to_32, uint64_t *mask)
{
tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt)));
if (!tree_fits_uhwi_p (type_size))
}
uint64_t cmpxchg, cmpnop;
- find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
+ find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop, cast64_to_32);
/* A complete byte swap should make the symbolic number to start with
the largest digit in the highest order byte. Unchanged symbolic
number indicates a read with same endianness as target architecture. */
+ *mask = ~(uint64_t) 0;
if (n->n == cmpnop)
*bswap = false;
else if (n->n == cmpxchg)
*bswap = true;
else
- return NULL;
+ {
+ int set = 0;
+ for (uint64_t msk = MARKER_MASK; msk; msk <<= BITS_PER_MARKER)
+ if ((n->n & msk) == 0)
+ *mask &= ~msk;
+ else if ((n->n & msk) == (cmpxchg & msk))
+ set++;
+ else
+ return NULL;
+ if (set < 2)
+ return NULL;
+ *bswap = true;
+ }
/* Useless bit manipulation performed by code. */
if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
tree
bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
tree bswap_type, tree load_type, struct symbolic_number *n,
- bool bswap)
+ bool bswap, uint64_t mask)
{
tree src, tmp, tgt = NULL_TREE;
- gimple *bswap_stmt;
+ gimple *bswap_stmt, *mask_stmt = NULL;
tree_code conv_code = NOP_EXPR;
gimple *cur_stmt = gsi_stmt (gsi);
tgt = make_ssa_name (bswap_type);
tmp = tgt;
+ if (mask != ~(uint64_t) 0)
+ {
+ tree m = build_int_cst (bswap_type, mask);
+ tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
+ gimple_set_lhs (bswap_stmt, tmp);
+ mask_stmt = gimple_build_assign (tgt, BIT_AND_EXPR, tmp, m);
+ tmp = tgt;
+ }
+
/* Convert the result if necessary. */
if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
{
gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
}
- gimple_set_lhs (bswap_stmt, tmp);
+ gimple_set_lhs (mask_stmt ? mask_stmt : bswap_stmt, tmp);
if (dump_file)
{
if (cur_stmt)
{
+ if (mask_stmt)
+ gsi_insert_after (&gsi, mask_stmt, GSI_SAME_STMT);
gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
gsi_remove (&gsi, true);
}
else
- gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
+ {
+ gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
+ if (mask_stmt)
+ gsi_insert_before (&gsi, mask_stmt, GSI_SAME_STMT);
+ }
return tgt;
}
return false;
}
- gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
- if (!ins_stmt || n.range != (unsigned HOST_WIDE_INT) sz)
+ bool cast64_to_32;
+ uint64_t mask;
+ gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
+ &cast64_to_32, &mask);
+ if (!ins_stmt
+ || n.range != (unsigned HOST_WIDE_INT) sz
+ || cast64_to_32
+ || mask != ~(uint64_t) 0)
return false;
if (bswap && !fndecl && n.range != 16)
memset (&nop_stats, 0, sizeof (nop_stats));
memset (&bswap_stats, 0, sizeof (bswap_stats));
return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
- bswap_type, load_type, &n, bswap) != NULL_TREE;
+ bswap_type, load_type, &n, bswap, mask) != NULL_TREE;
}
/* Find manual byte swap implementations as well as load in a given
tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
enum tree_code code;
struct symbolic_number n;
- bool bswap;
+ bool bswap, cast64_to_32;
+ uint64_t mask;
/* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
might be moved to a different basic block by bswap_replace and gsi
continue;
}
- ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
+ ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
+ &cast64_to_32, &mask);
if (!ins_stmt)
continue;
continue;
if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
- bswap_type, load_type, &n, bswap))
+ bswap_type, load_type, &n, bswap, mask))
changed = true;
}
}
}
uint64_t cmpxchg, cmpnop;
- find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
+ bool cast64_to_32;
+ find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop, &cast64_to_32);
/* A complete byte swap should make the symbolic number to start with
the largest digit in the highest order byte. Unchanged symbolic
if (n.n != cmpnop && n.n != cmpxchg)
return false;
+ /* For now. */
+ if (cast64_to_32)
+ return false;
+
if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
return false;
n->vuse = gimple_vuse (ins_stmt);
}
bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
- bswap_type, load_type, n, bswap);
+ bswap_type, load_type, n, bswap,
+ ~(uint64_t) 0);
gcc_assert (bswap_res);
}
--- /dev/null
+/* PR tree-optimization/86723 */
+/* { dg-do run { target { ilp32 || lp64 } } } */
+/* { dg-options "-O2" } */
+
+__attribute__((noipa)) int
+foo (unsigned long long value)
+{
+ return (((value & 0x00000000000000ffull) << 56)
+ | ((value & 0x000000000000ff00ull) << 40)
+ | ((value & 0x0000000000ff0000ull) << 24)
+ | ((value & 0x00000000ff000000ull) << 8)
+ | ((value & 0x000000ff00000000ull) >> 8)
+ | ((value & 0x0000ff0000000000ull) >> 24)
+ | ((value & 0x00ff000000000000ull) >> 40)
+ | ((value & 0xff00000000000000ull) >> 56));
+}
+
+__attribute__((noipa)) int
+bar (unsigned long long value)
+{
+ return (((value & 0x000000ff00000000ull) >> 8)
+ | ((value & 0x0000ff0000000000ull) >> 24)
+ | ((value & 0x00ff000000000000ull) >> 40)
+ | ((value & 0xff00000000000000ull) >> 56));
+}
+
+__attribute__((noipa)) unsigned long long
+baz (unsigned long long value)
+{
+ return (((value & 0x00000000000000ffull) << 56)
+ | ((value & 0x000000000000ff00ull) << 40)
+ | ((value & 0x00000000ff000000ull) << 8)
+ | ((value & 0x000000ff00000000ull) >> 8)
+ | ((value & 0x0000ff0000000000ull) >> 24)
+ | ((value & 0xff00000000000000ull) >> 56));
+}
+
+__attribute__((noipa)) unsigned int
+qux (unsigned int value)
+{
+ return (((value & 0x000000ff) << 24)
+ | ((value & 0x00ff0000) >> 8)
+ | ((value & 0xff000000) >> 24));
+}
+
+__attribute__((noipa)) unsigned int
+corge (unsigned int value)
+{
+ return (((value & 0x000000ff) << 24)
+ | ((value & 0xff000000) >> 24));
+}
+
+int
+main ()
+{
+ if (foo (0x0102030405060708ull) != 0x04030201
+ || bar (0x0102030405060708ull) != 0x04030201
+ || baz (0x0102030405060708ull) != 0x0807000504030001ull
+ || qux (0x01020304) != 0x04000201
+ || corge (0x01020304) != 0x04000001)
+ __builtin_abort ();
+ return 0;
+}
--- /dev/null
+/* PR tree-optimization/86723 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "\tbswap\t" 5 } } */
+
+int
+foo (unsigned long long value)
+{
+ return (((value & 0x00000000000000ffull) << 56)
+ | ((value & 0x000000000000ff00ull) << 40)
+ | ((value & 0x0000000000ff0000ull) << 24)
+ | ((value & 0x00000000ff000000ull) << 8)
+ | ((value & 0x000000ff00000000ull) >> 8)
+ | ((value & 0x0000ff0000000000ull) >> 24)
+ | ((value & 0x00ff000000000000ull) >> 40)
+ | ((value & 0xff00000000000000ull) >> 56));
+}
+
+int
+bar (unsigned long long value)
+{
+ return (((value & 0x000000ff00000000ull) >> 8)
+ | ((value & 0x0000ff0000000000ull) >> 24)
+ | ((value & 0x00ff000000000000ull) >> 40)
+ | ((value & 0xff00000000000000ull) >> 56));
+}
+
+unsigned long long
+baz (unsigned long long value)
+{
+ return (((value & 0x00000000000000ffull) << 56)
+ | ((value & 0x000000000000ff00ull) << 40)
+ | ((value & 0x00000000ff000000ull) << 8)
+ | ((value & 0x000000ff00000000ull) >> 8)
+ | ((value & 0x0000ff0000000000ull) >> 24)
+ | ((value & 0xff00000000000000ull) >> 56));
+}
+
+unsigned int
+qux (unsigned int value)
+{
+ return (((value & 0x000000ff) << 24)
+ | ((value & 0x00ff0000) >> 8)
+ | ((value & 0xff000000) >> 24));
+}
+
+unsigned int
+corge (unsigned int value)
+{
+ return (((value & 0x000000ff) << 24)
+ | ((value & 0xff000000) >> 24));
+}