i386: Introduce peephole2 to use flags from CMPXCHG more [PR96189]
authorUros Bizjak <ubizjak@gmail.com>
Wed, 15 Jul 2020 19:27:00 +0000 (21:27 +0200)
committerUros Bizjak <ubizjak@gmail.com>
Wed, 15 Jul 2020 19:29:59 +0000 (21:29 +0200)
CMPXCHG instruction sets ZF flag if the values in the destination operand
and EAX register are equal; otherwise the ZF flag is cleared and value
from destination operand is loaded to EAX. Following assembly:

        movl    %esi, %eax
        lock cmpxchgl   %edx, (%rdi)
        cmpl    %esi, %eax
        sete    %al

can be optimized by removing the unneeded comparison, since set ZF flag
signals that no update to EAX happened.

2020-15-07  Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:
PR target/95355
* config/i386/sync.md
(peephole2 to remove unneded compare after CMPXCHG): New pattern.

gcc/testsuite/ChangeLog:
PR target/95355
* gcc.target/i386/pr96189.c: New test.

gcc/config/i386/sync.md
gcc/testsuite/gcc.target/i386/pr96189.c [new file with mode: 0644]

index 9ab5456..d203e9d 100644 (file)
   "TARGET_CMPXCHG"
   "lock{%;} %K4cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
 
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+       (match_operand:SWI 1 "general_operand"))
+   (parallel [(set (match_dup 0)
+                  (unspec_volatile:SWI
+                    [(match_operand:SWI 2 "memory_operand")
+                     (match_dup 0)
+                     (match_operand:SWI 3 "register_operand")
+                     (match_operand:SI 4 "const_int_operand")]
+                    UNSPECV_CMPXCHG))
+             (set (match_dup 2)
+                  (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
+             (set (reg:CCZ FLAGS_REG)
+                  (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])
+   (set (reg:CCZ FLAGS_REG)
+       (compare:CCZ (match_operand:SWI 5 "register_operand")
+                    (match_operand:SWI 6 "general_operand")))]
+  "(rtx_equal_p (operands[0], operands[5])
+    && rtx_equal_p (operands[1], operands[6]))
+   || (rtx_equal_p (operands[0], operands[6])
+       && rtx_equal_p (operands[1], operands[5]))"
+  [(set (match_dup 0)
+       (match_dup 1))
+   (parallel [(set (match_dup 0)
+                  (unspec_volatile:SWI
+                    [(match_dup 2)
+                     (match_dup 0)
+                     (match_dup 3)
+                     (match_dup 4)]
+                    UNSPECV_CMPXCHG))
+             (set (match_dup 2)
+                  (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
+             (set (reg:CCZ FLAGS_REG)
+                  (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])])
+
 ;; For operand 2 nonmemory_operand predicate is used instead of
 ;; register_operand to allow combiner to better optimize atomic
 ;; additions of constants.
diff --git a/gcc/testsuite/gcc.target/i386/pr96189.c b/gcc/testsuite/gcc.target/i386/pr96189.c
new file mode 100644 (file)
index 0000000..1505e48
--- /dev/null
@@ -0,0 +1,12 @@
+/* PR target/96176 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "\tcmpb\t" } } */
+
+_Bool
+foo (unsigned char *x, unsigned char y, unsigned char z)
+{
+  unsigned char y_old = y;
+  __atomic_compare_exchange_n (x, &y, z, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+  return y == y_old;
+}