Enhance optimize_atomic_bit_test_and to handle truncation.
authorliuhongt <hongtao.liu@intel.com>
Tue, 16 Nov 2021 05:36:36 +0000 (13:36 +0800)
committerliuhongt <hongtao.liu@intel.com>
Wed, 24 Nov 2021 01:00:58 +0000 (09:00 +0800)
r12-5102-gfb161782545224f5 improves integer bit test on
__atomic_fetch_[or|and]_* returns only for nop_convert, .i.e.

transfrom

  mask_5 = 1 << bit_4(D);
  mask.0_1 = (unsigned int) mask_5;
  _2 = __atomic_fetch_or_4 (a_7(D), mask.0_1, 0);
  t1_9 = (int) _2;
  t2_10 = mask_5 & t1_9;

to

  mask_5 = 1 << n_4(D);
  mask.1_1 = (unsigned int) mask_5;
  _11 = .ATOMIC_BIT_TEST_AND_SET (&pscc_a_1_4, n_4(D), 0);
  _8 = (int) _11;

And this patch extend the original patch to handle truncation.
.i.e.

transform

  long int mask;
  mask_8 = 1 << n_7(D);
  mask.0_1 = (long unsigned int) mask_8;
  _2 = __sync_fetch_and_or_8 (&pscc_a_2_3, mask.0_1);
  _3 = (unsigned int) _2;
  _4 = (unsigned int) mask_8;
  _5 = _3 & _4;
  _6 = (int) _5;

to

  long int mask;
  mask_8 = 1 << n_7(D);
  mask.0_1 = (long unsigned int) mask_8;
  _14 = .ATOMIC_BIT_TEST_AND_SET (&pscc_a_2_3, n_7(D), 0);
  _5 = (unsigned int) _14;
  _6 = (int) _5;

2021-11-17  Hongtao Liu  <hongtao.liu@intel.com>
    H.J. Lu  <hongjiu.lu@intel.com>

gcc/ChangeLog:

PR tree-optimization/103194
* match.pd (gimple_nop_atomic_bit_test_and_p): Extended to
match truncation.
* tree-ssa-ccp.c (gimple_nop_convert): Declare.
(optimize_atomic_bit_test_and): Enhance
optimize_atomic_bit_test_and to handle truncation.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr103194-2.c: New test.
* gcc.target/i386/pr103194-3.c: New test.
* gcc.target/i386/pr103194-4.c: New test.
* gcc.target/i386/pr103194-5.c: New test.
* gcc.target/i386/pr103194.c: New test.

gcc/match.pd
gcc/testsuite/gcc.target/i386/pr103194-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr103194-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr103194-4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr103194-5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr103194.c [new file with mode: 0644]
gcc/tree-ssa-ccp.c

index 886f807..60b4ad5 100644 (file)
@@ -4044,39 +4044,43 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
 #if GIMPLE
 (match (nop_atomic_bit_test_and_p @0 @1 @4)
- (bit_and (nop_convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3))
+ (bit_and (convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3))
           INTEGER_CST@1)
  (with {
         int ibit = tree_log2 (@0);
         int ibit2 = tree_log2 (@1);
        }
   (if (ibit == ibit2
-      && ibit >= 0))))
+      && ibit >= 0
+      && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))))))
 
 (match (nop_atomic_bit_test_and_p @0 @1 @3)
- (bit_and (nop_convert?@3 (SYNC_FETCH_OR_XOR_N @2 INTEGER_CST@0))
+ (bit_and (convert?@3 (SYNC_FETCH_OR_XOR_N @2 INTEGER_CST@0))
          INTEGER_CST@1)
  (with {
         int ibit = tree_log2 (@0);
         int ibit2 = tree_log2 (@1);
        }
   (if (ibit == ibit2
-      && ibit >= 0))))
+      && ibit >= 0
+      && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))))))
 
 (match (nop_atomic_bit_test_and_p @0 @0 @4)
  (bit_and:c
-  (nop_convert?@4
+  (convert1?@4
    (ATOMIC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@5 @6)) @3))
-  @0))
+  (convert2? @0))
+ (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)))))
 
 (match (nop_atomic_bit_test_and_p @0 @0 @4)
  (bit_and:c
-  (nop_convert?@4
+  (convert1?@4
    (SYNC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@3 @5))))
-  @0))
+  (convert2? @0))
+ (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)))))
 
 (match (nop_atomic_bit_test_and_p @0 @1 @3)
- (bit_and@4 (nop_convert?@3 (ATOMIC_FETCH_AND_N @2 INTEGER_CST@0 @5))
+ (bit_and@4 (convert?@3 (ATOMIC_FETCH_AND_N @2 INTEGER_CST@0 @5))
            INTEGER_CST@1)
  (with {
         int ibit = wi::exact_log2 (wi::zext (wi::bit_not (wi::to_wide (@0)),
@@ -4084,11 +4088,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
         int ibit2 = tree_log2 (@1);
        }
   (if (ibit == ibit2
-      && ibit >= 0))))
+      && ibit >= 0
+      && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))))))
 
 (match (nop_atomic_bit_test_and_p @0 @1 @3)
  (bit_and@4
-  (nop_convert?@3 (SYNC_FETCH_AND_AND_N @2 INTEGER_CST@0))
+  (convert?@3 (SYNC_FETCH_AND_AND_N @2 INTEGER_CST@0))
   INTEGER_CST@1)
  (with {
         int ibit = wi::exact_log2 (wi::zext (wi::bit_not (wi::to_wide (@0)),
@@ -4096,19 +4101,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
         int ibit2 = tree_log2 (@1);
        }
   (if (ibit == ibit2
-      && ibit >= 0))))
+      && ibit >= 0
+      && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))))))
 
-(match (nop_atomic_bit_test_and_p @0 @0 @3)
+(match (nop_atomic_bit_test_and_p @4 @0 @3)
  (bit_and:c
-  (nop_convert?@3
-   (ATOMIC_FETCH_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7))) @5))
-   @0))
+  (convert1?@3
+   (ATOMIC_FETCH_AND_N @2 (nop_convert?@4 (bit_not (lshift@0 integer_onep@6 @7))) @5))
+  (convert2? @0))
+ (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)))))
 
-(match (nop_atomic_bit_test_and_p @0 @0 @3)
+(match (nop_atomic_bit_test_and_p @4 @0 @3)
  (bit_and:c
-  (nop_convert?@3
-   (SYNC_FETCH_AND_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7)))))
-   @0))
+  (convert1?@3
+   (SYNC_FETCH_AND_AND_N @2 (nop_convert?@4 (bit_not (lshift@0 integer_onep@6 @7)))))
+  (convert2? @0))
+  (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)))))
 
 #endif
 
diff --git a/gcc/testsuite/gcc.target/i386/pr103194-2.c b/gcc/testsuite/gcc.target/i386/pr103194-2.c
new file mode 100644 (file)
index 0000000..1a991fe
--- /dev/null
@@ -0,0 +1,64 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define FOO(RTYPE,TYPE,MASK)                                           \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a)                    \
+  {                                                                    \
+    TYPE mask = 1 << MASK;                                             \
+    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;       \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a)                   \
+  {                                                                    \
+    TYPE mask = 1 << MASK;                                             \
+    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;      \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a)                   \
+  {                                                                    \
+    TYPE mask = 1 << MASK;                                             \
+    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;      \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a)                   \
+  {                                                                    \
+    TYPE mask = 1 << MASK;                                             \
+    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;     \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a)                  \
+  {                                                                    \
+    TYPE mask = 1 << MASK;                                             \
+    return __sync_fetch_and_or (a, mask) & mask;                       \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a)                 \
+  {                                                                    \
+    TYPE mask = 1 << MASK;                                             \
+    return __sync_fetch_and_xor (a, mask) & mask;                      \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a)                 \
+  {                                                                    \
+    TYPE mask = 1 << MASK;                                             \
+    return __sync_xor_and_fetch (a, mask) & mask;                      \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a)                 \
+  {                                                                    \
+    TYPE mask = 1 << MASK;                                             \
+    return __sync_fetch_and_and (a, ~mask) & mask;                     \
+  }                                                                    \
+
+FOO(char, short, 0);
+FOO(char, short, 7);
+FOO(short, int, 0);
+FOO(short, int, 15);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 8 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 16 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 8 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr103194-3.c b/gcc/testsuite/gcc.target/i386/pr103194-3.c
new file mode 100644 (file)
index 0000000..4907598
--- /dev/null
@@ -0,0 +1,64 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+typedef long long int64;
+
+#define FOO(RTYPE, TYPE,MASK)                                          \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a)                    \
+  {                                                                    \
+    TYPE mask = 1ll << MASK;                                           \
+    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;       \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a)                   \
+  {                                                                    \
+    TYPE mask = 1ll << MASK;                                           \
+    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;      \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a)                   \
+  {                                                                    \
+    TYPE mask = 1ll << MASK;                                           \
+    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;      \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a)                   \
+  {                                                                    \
+    TYPE mask = 1ll << MASK;                                           \
+    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;     \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a)                  \
+  {                                                                    \
+    TYPE mask = 1ll << MASK;                                           \
+    return __sync_fetch_and_or (a, mask) & mask;                       \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a)                 \
+  {                                                                    \
+    TYPE mask = 1ll << MASK;                                           \
+    return __sync_fetch_and_xor (a, mask) & mask;                      \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a)                 \
+  {                                                                    \
+    TYPE mask = 1ll << MASK;                                           \
+    return __sync_xor_and_fetch (a, mask) & mask;                      \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a)                 \
+  {                                                                    \
+    TYPE mask = 1ll << MASK;                                           \
+    return __sync_fetch_and_and (a, ~mask) & mask;                     \
+  }                                                                    \
+
+
+FOO(int, int64, 1);
+FOO(int, int64, 31);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 4 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 8 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 4 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr103194-4.c b/gcc/testsuite/gcc.target/i386/pr103194-4.c
new file mode 100644 (file)
index 0000000..8573016
--- /dev/null
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define FOO(RTYPE,TYPE)                                                        \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)         \
+  {                                                                    \
+    TYPE mask = 1 << MASK;                                             \
+    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;       \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)                \
+  {                                                                    \
+    TYPE mask = 1 << MASK;                                             \
+    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;      \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)                \
+  {                                                                    \
+    TYPE mask = 1 << MASK;                                             \
+    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;      \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)                \
+  {                                                                    \
+    TYPE mask = 1 << MASK;                                             \
+    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;     \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)       \
+  {                                                                    \
+    TYPE mask = 1 << MASK;                                             \
+    return __sync_fetch_and_or (a, mask) & mask;                       \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)      \
+  {                                                                    \
+    TYPE mask = 1 << MASK;                                             \
+    return __sync_fetch_and_xor (a, mask) & mask;                      \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)      \
+  {                                                                    \
+    TYPE mask = 1 << MASK;                                             \
+    return __sync_xor_and_fetch (a, mask) & mask;                      \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)      \
+  {                                                                    \
+    TYPE mask = 1 << MASK;                                             \
+    return __sync_fetch_and_and (a, ~mask) & mask;                     \
+  }                                                                    \
+
+FOO(short, int);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 2 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 4 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 2 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr103194-5.c b/gcc/testsuite/gcc.target/i386/pr103194-5.c
new file mode 100644 (file)
index 0000000..dfaddf0
--- /dev/null
@@ -0,0 +1,61 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define FOO(RTYPE,TYPE)                                                        \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)         \
+  {                                                                    \
+    TYPE mask = 1ll << MASK;                                           \
+    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;       \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)                \
+  {                                                                    \
+    TYPE mask = 1ll << MASK;                                           \
+    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;      \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)                \
+  {                                                                    \
+    TYPE mask = 1ll << MASK;                                           \
+    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;      \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)                \
+  {                                                                    \
+    TYPE mask = 1ll << MASK;                                           \
+    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;     \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)       \
+  {                                                                    \
+    TYPE mask = 1ll << MASK;                                           \
+    return __sync_fetch_and_or (a, mask) & mask;                       \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)      \
+  {                                                                    \
+    TYPE mask = 1ll << MASK;                                           \
+    return __sync_fetch_and_xor (a, mask) & mask;                      \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)      \
+  {                                                                    \
+    TYPE mask = 1ll << MASK;                                           \
+    return __sync_xor_and_fetch (a, mask) & mask;                      \
+  }                                                                    \
+  __attribute__((noinline,noclone)) RTYPE                              \
+  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)      \
+  {                                                                    \
+    TYPE mask = 1ll << MASK;                                           \
+    return __sync_fetch_and_and (a, ~mask) & mask;                     \
+  }                                                                    \
+
+FOO(int, long);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 2 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 4 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 2 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr103194.c b/gcc/testsuite/gcc.target/i386/pr103194.c
new file mode 100644 (file)
index 0000000..a6d8433
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+long pscc_a_2_3;
+int pscc_a_1_4;
+void pscc()
+{
+  pscc_a_1_4 = __sync_fetch_and_and(&pscc_a_2_3, 1);
+}
+
+static int si;
+long
+test_types (long n)
+{
+  unsigned int u2 = __atomic_fetch_xor (&si, 0, 5);
+  return u2;
+}
index 18d5772..9e12da8 100644 (file)
@@ -3326,6 +3326,7 @@ convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
  */
 extern bool gimple_nop_atomic_bit_test_and_p (tree, tree *,
                                              tree (*) (tree));
+extern bool gimple_nop_convert (tree, tree*, tree (*) (tree));
 
 /* Optimize
      mask_2 = 1 << cnt_1;
@@ -3462,16 +3463,16 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
          ibit = 0;
        }
       else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
-              == TYPE_PRECISION (TREE_TYPE (use_rhs)))
+              <= TYPE_PRECISION (TREE_TYPE (use_rhs)))
        {
          gimple *use_nop_stmt;
          if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
              || !is_gimple_assign (use_nop_stmt))
            return;
+         tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
          rhs_code = gimple_assign_rhs_code (use_nop_stmt);
          if (rhs_code != BIT_AND_EXPR)
            {
-             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
              if (TREE_CODE (use_nop_lhs) == SSA_NAME
                  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
                return;
@@ -3584,24 +3585,23 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
            }
          else
            {
-             tree and_expr = gimple_assign_lhs (use_nop_stmt);
              tree match_op[3];
              gimple *g;
-             if (!gimple_nop_atomic_bit_test_and_p (and_expr,
+             if (!gimple_nop_atomic_bit_test_and_p (use_nop_lhs,
                                                     &match_op[0], NULL)
                  || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (match_op[2])
                  || !single_imm_use (match_op[2], &use_p, &g)
                  || !is_gimple_assign (g))
                return;
-             mask = match_op[1];
-             if (TREE_CODE (mask) == INTEGER_CST)
+             mask = match_op[0];
+             if (TREE_CODE (match_op[1]) == INTEGER_CST)
                {
-                 ibit = tree_log2 (mask);
+                 ibit = tree_log2 (match_op[1]);
                  gcc_assert (ibit >= 0);
                }
              else
                {
-                 g = SSA_NAME_DEF_STMT (mask);
+                 g = SSA_NAME_DEF_STMT (match_op[1]);
                  gcc_assert (is_gimple_assign (g));
                  bit = gimple_assign_rhs2 (g);
                }
@@ -3623,19 +3623,30 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
                 _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
                 _12 = _3 & mask_7;
                 _5 = (int) _12;
-              */
-             replace_uses_by (use_lhs, lhs);
-             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
-             var = make_ssa_name (TREE_TYPE (use_nop_lhs));
-             gimple_assign_set_lhs (use_nop_stmt, var);
+
+                and Convert
+                _1 = __atomic_fetch_and_4 (ptr_6, ~mask, _3);
+                _2 = (short int) _1;
+                _5 = _2 & mask;
+                to
+                _1 = __atomic_fetch_and_4 (ptr_6, ~mask, _3);
+                _8 = _1 & mask;
+                _5 = (short int) _8;
+             */
+             gimple_seq stmts = NULL;
+             match_op[1] = gimple_convert (&stmts,
+                                           TREE_TYPE (use_rhs),
+                                           match_op[1]);
+             var = gimple_build (&stmts, BIT_AND_EXPR,
+                                 TREE_TYPE (use_rhs), use_rhs, match_op[1]);
              gsi = gsi_for_stmt (use_stmt);
              gsi_remove (&gsi, true);
              release_defs (use_stmt);
-             gsi_remove (gsip, true);
-             g = gimple_build_assign (use_nop_lhs, NOP_EXPR, var);
+             use_stmt = gimple_seq_last_stmt (stmts);
              gsi = gsi_for_stmt (use_nop_stmt);
-             gsi_insert_after (&gsi, g, GSI_NEW_STMT);
-             use_stmt = use_nop_stmt;
+             gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+             gimple_assign_set_rhs_with_ops (&gsi, CONVERT_EXPR, var);
+             update_stmt (use_nop_stmt);
            }
        }
       else
@@ -3671,55 +3682,47 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
       else if (TREE_CODE (mask) == SSA_NAME)
        {
          gimple *g = SSA_NAME_DEF_STMT (mask);
-         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+         tree match_op;
+         if (gimple_nop_convert (mask, &match_op, NULL))
            {
-             if (!is_gimple_assign (g)
-                 || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
-               return;
-             mask = gimple_assign_rhs1 (g);
+             mask = match_op;
              if (TREE_CODE (mask) != SSA_NAME)
                return;
              g = SSA_NAME_DEF_STMT (mask);
            }
          if (!is_gimple_assign (g))
            return;
-         rhs_code = gimple_assign_rhs_code (g);
-         if (rhs_code != LSHIFT_EXPR)
-           {
-             if (rhs_code != NOP_EXPR)
-               return;
 
-             /* Handle
-                _1 = 1 << bit_4(D);
-                mask_5 = (unsigned int) _1;
-                _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
-                _3 = _2 & mask_5;
-                */
-             tree nop_lhs = gimple_assign_lhs (g);
-             tree nop_rhs = gimple_assign_rhs1 (g);
-             if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
-                 != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
+         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+           {
+             if (gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
                return;
-             g = SSA_NAME_DEF_STMT (nop_rhs);
-             if (!is_gimple_assign (g)
-                 || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
+             mask = gimple_assign_rhs1 (g);
+             if (TREE_CODE (mask) != SSA_NAME)
                return;
+             g = SSA_NAME_DEF_STMT (mask);
            }
-         if (!integer_onep (gimple_assign_rhs1 (g)))
+
+         rhs_code = gimple_assign_rhs_code (g);
+         if (rhs_code != LSHIFT_EXPR
+             || !integer_onep (gimple_assign_rhs1 (g)))
            return;
          bit = gimple_assign_rhs2 (g);
        }
       else
        return;
 
+      tree cmp_mask;
       if (gimple_assign_rhs1 (use_stmt) == lhs)
-       {
-         if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
-           return;
-       }
-      else if (gimple_assign_rhs2 (use_stmt) != lhs
-              || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
-                                   mask, 0))
+       cmp_mask = gimple_assign_rhs2 (use_stmt);
+      else
+       cmp_mask = gimple_assign_rhs1 (use_stmt);
+
+      tree match_op;
+      if (gimple_nop_convert (cmp_mask, &match_op, NULL))
+       cmp_mask = match_op;
+
+      if (!operand_equal_p (cmp_mask, mask, 0))
        return;
     }