Improve integer bit test on __atomic_fetch_[or|and]_* returns
commit
adedd5c173388ae505470df152b9cb3947339566
Author: Jakub Jelinek <jakub@redhat.com>
Date: Tue May 3 13:37:25 2016 +0200
re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')
optimized bit test on __atomic_fetch_or_* and __atomic_fetch_and_* returns
with lock bts/btr/btc by turning
mask_2 = 1 << cnt_1;
_4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
_5 = _4 & mask_2;
into
_4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
_5 = _4;
and
mask_6 = 1 << bit_5(D);
_1 = ~mask_6;
_2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
_3 = _2 & mask_6;
_4 = _3 != 0;
into
mask_6 = 1 << bit_5(D);
_1 = ~mask_6;
_11 = .ATOMIC_BIT_TEST_AND_RESET (v_8(D), bit_5(D), 1, 0);
_4 = _11 != 0;
But it failed to optimize many equivalent, but slighly different cases:
1.
_1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
_4 = (_Bool) _1;
2.
_1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
_4 = (_Bool) _1;
3.
_1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
_7 = ~_1;
_5 = (_Bool) _7;
4.
_1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
_7 = ~_1;
_5 = (_Bool) _7;
5.
_1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
_2 = (int) _1;
_7 = ~_2;
_5 = (_Bool) _7;
6.
_1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
_2 = (int) _1;
_7 = ~_2;
_5 = (_Bool) _7;
7.
_1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
_5 = (signed int) _1;
_4 = _5 < 0;
8.
_1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
_5 = (signed int) _1;
_4 = _5 < 0;
9.
_1 = 1 << bit_4(D);
mask_5 = (unsigned int) _1;
_2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
_3 = _2 & mask_5;
10.
mask_7 = 1 << bit_6(D);
_1 = ~mask_7;
_2 = (unsigned int) _1;
_3 = __atomic_fetch_and_4 (v_9(D), _2, 0);
_4 = (int) _3;
_5 = _4 & mask_7;
We make
mask_2 = 1 << cnt_1;
_4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
_5 = _4 & mask_2;
and
mask_6 = 1 << bit_5(D);
_1 = ~mask_6;
_2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
_3 = _2 & mask_6;
_4 = _3 != 0;
the canonical forms for this optimization and transform cases 1-9 to the
equivalent canonical form. For cases 10 and 11, we simply remove the cast
before __atomic_fetch_or_4/__atomic_fetch_and_4 with
_1 = 1 << bit_4(D);
_2 = __atomic_fetch_or_4 (v_7(D), _1, 0);
_3 = _2 & _1;
and
mask_7 = 1 << bit_6(D);
_1 = ~mask_7;
_3 = __atomic_fetch_and_4 (v_9(D), _1, 0);
_6 = _3 & mask_7;
_5 = (int) _6;
2021-11-04 H.J. Lu <hongjiu.lu@intel.com>
Hongtao Liu <hongtao.liu@intel.com>
gcc/
PR middle-end/102566
* match.pd (nop_atomic_bit_test_and_p): New match.
* tree-ssa-ccp.c (convert_atomic_bit_not): New function.
(gimple_nop_atomic_bit_test_and_p): New prototype.
(optimize_atomic_bit_test_and): Transform equivalent, but slighly
different cases to their canonical forms.
gcc/testsuite/
PR middle-end/102566
* g++.target/i386/pr102566-1.C: New test.
* g++.target/i386/pr102566-2.C: Likewise.
* g++.target/i386/pr102566-3.C: Likewise.
* g++.target/i386/pr102566-4.C: Likewise.
* g++.target/i386/pr102566-5a.C: Likewise.
* g++.target/i386/pr102566-5b.C: Likewise.
* g++.target/i386/pr102566-6a.C: Likewise.
* g++.target/i386/pr102566-6b.C: Likewise.
* gcc.target/i386/pr102566-1a.c: Likewise.
* gcc.target/i386/pr102566-1b.c: Likewise.
* gcc.target/i386/pr102566-2.c: Likewise.
* gcc.target/i386/pr102566-3a.c: Likewise.
* gcc.target/i386/pr102566-3b.c: Likewise.
* gcc.target/i386/pr102566-4.c: Likewise.
* gcc.target/i386/pr102566-5.c: Likewise.
* gcc.target/i386/pr102566-6.c: Likewise.
* gcc.target/i386/pr102566-7.c: Likewise.
* gcc.target/i386/pr102566-8a.c: Likewise.
* gcc.target/i386/pr102566-8b.c: Likewise.
* gcc.target/i386/pr102566-9a.c: Likewise.
* gcc.target/i386/pr102566-9b.c: Likewise.
* gcc.target/i386/pr102566-10a.c: Likewise.
* gcc.target/i386/pr102566-10b.c: Likewise.
* gcc.target/i386/pr102566-11.c: Likewise.
* gcc.target/i386/pr102566-12.c: Likewise.
* gcc.target/i386/pr102566-13.c: New test.
* gcc.target/i386/pr102566-14.c: New test.