re PR target/82498 (Missed optimization for x86 rotate instruction)
authorJakub Jelinek <jakub@redhat.com>
Fri, 13 Oct 2017 07:28:46 +0000 (09:28 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Fri, 13 Oct 2017 07:28:46 +0000 (09:28 +0200)
PR target/82498
* fold-const.c (fold_binary_loc) <bit_rotate>: Code cleanups,
instead of handling MINUS_EXPR twice (once for each argument),
canonicalize operand order and handle just once, use rtype where
possible.  Handle (A << B) | (A >> (-B & (Z - 1))).

* gcc.dg/tree-ssa/pr82498.c: New test.

From-SVN: r253709

gcc/ChangeLog
gcc/fold-const.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/tree-ssa/pr82498.c [new file with mode: 0644]

index 58923e8..b8346e3 100644 (file)
@@ -1,6 +1,12 @@
 2017-10-13  Jakub Jelinek  <jakub@redhat.com>
 
        PR target/82498
+       * fold-const.c (fold_binary_loc) <bit_rotate>: Code cleanups,
+       instead of handling MINUS_EXPR twice (once for each argument),
+       canonicalize operand order and handle just once, use rtype where
+       possible.  Handle (A << B) | (A >> (-B & (Z - 1))).
+
+       PR target/82498
        * config/i386/ia32intrin.h (__rold, __rord, __rolq, __rorq): Allow
        any values of __C while still being pattern recognizable as a simple
        rotate instruction.
index e22b02f..d1e6191 100644 (file)
@@ -9429,7 +9429,10 @@ fold_binary_loc (location_t loc,
       /* (A << C1) + (A >> C2) if A is unsigned and C1+C2 is the size of A
         is a rotate of A by C1 bits.  */
       /* (A << B) + (A >> (Z - B)) if A is unsigned and Z is the size of A
-        is a rotate of A by B bits.  */
+        is a rotate of A by B bits.
+        Similarly for (A << B) | (A >> (-B & C3)) where C3 is Z-1,
+        though in this case CODE must be | and not + or ^, otherwise
+        it doesn't return A when B is 0.  */
       {
        enum tree_code code0, code1;
        tree rtype;
@@ -9447,25 +9450,32 @@ fold_binary_loc (location_t loc,
                == GET_MODE_UNIT_PRECISION (TYPE_MODE (rtype))))
          {
            tree tree01, tree11;
+           tree orig_tree01, orig_tree11;
            enum tree_code code01, code11;
 
-           tree01 = TREE_OPERAND (arg0, 1);
-           tree11 = TREE_OPERAND (arg1, 1);
+           tree01 = orig_tree01 = TREE_OPERAND (arg0, 1);
+           tree11 = orig_tree11 = TREE_OPERAND (arg1, 1);
            STRIP_NOPS (tree01);
            STRIP_NOPS (tree11);
            code01 = TREE_CODE (tree01);
            code11 = TREE_CODE (tree11);
+           if (code11 != MINUS_EXPR
+               && (code01 == MINUS_EXPR || code01 == BIT_AND_EXPR))
+             {
+               std::swap (code0, code1);
+               std::swap (code01, code11);
+               std::swap (tree01, tree11);
+               std::swap (orig_tree01, orig_tree11);
+             }
            if (code01 == INTEGER_CST
                && code11 == INTEGER_CST
                && (wi::to_widest (tree01) + wi::to_widest (tree11)
-                   == element_precision (TREE_TYPE (TREE_OPERAND (arg0, 0)))))
+                   == element_precision (rtype)))
              {
                tem = build2_loc (loc, LROTATE_EXPR,
-                                 TREE_TYPE (TREE_OPERAND (arg0, 0)),
-                                 TREE_OPERAND (arg0, 0),
+                                 rtype, TREE_OPERAND (arg0, 0),
                                  code0 == LSHIFT_EXPR
-                                 ? TREE_OPERAND (arg0, 1)
-                                 : TREE_OPERAND (arg1, 1));
+                                 ? orig_tree01 : orig_tree11);
                return fold_convert_loc (loc, type, tem);
              }
            else if (code11 == MINUS_EXPR)
@@ -9477,39 +9487,37 @@ fold_binary_loc (location_t loc,
                STRIP_NOPS (tree111);
                if (TREE_CODE (tree110) == INTEGER_CST
                    && 0 == compare_tree_int (tree110,
-                                             element_precision
-                                             (TREE_TYPE (TREE_OPERAND
-                                                         (arg0, 0))))
+                                             element_precision (rtype))
                    && operand_equal_p (tree01, tree111, 0))
-                 return
-                   fold_convert_loc (loc, type,
-                                     build2 ((code0 == LSHIFT_EXPR
-                                              ? LROTATE_EXPR
-                                              : RROTATE_EXPR),
-                                             TREE_TYPE (TREE_OPERAND (arg0, 0)),
-                                             TREE_OPERAND (arg0, 0),
-                                             TREE_OPERAND (arg0, 1)));
+                 {
+                   tem = build2_loc (loc, (code0 == LSHIFT_EXPR
+                                           ? LROTATE_EXPR : RROTATE_EXPR),
+                                     rtype, TREE_OPERAND (arg0, 0),
+                                     orig_tree01);
+                   return fold_convert_loc (loc, type, tem);
+                 }
              }
-           else if (code01 == MINUS_EXPR)
+           else if (code == BIT_IOR_EXPR
+                    && code11 == BIT_AND_EXPR
+                    && pow2p_hwi (element_precision (rtype)))
              {
-               tree tree010, tree011;
-               tree010 = TREE_OPERAND (tree01, 0);
-               tree011 = TREE_OPERAND (tree01, 1);
-               STRIP_NOPS (tree010);
-               STRIP_NOPS (tree011);
-               if (TREE_CODE (tree010) == INTEGER_CST
-                   && 0 == compare_tree_int (tree010,
-                                             element_precision
-                                             (TREE_TYPE (TREE_OPERAND
-                                                         (arg0, 0))))
-                   && operand_equal_p (tree11, tree011, 0))
-                   return fold_convert_loc
-                     (loc, type,
-                      build2 ((code0 != LSHIFT_EXPR
-                               ? LROTATE_EXPR
-                               : RROTATE_EXPR),
-                              TREE_TYPE (TREE_OPERAND (arg0, 0)),
-                              TREE_OPERAND (arg0, 0), TREE_OPERAND (arg1, 1)));
+               tree tree110, tree111;
+               tree110 = TREE_OPERAND (tree11, 0);
+               tree111 = TREE_OPERAND (tree11, 1);
+               STRIP_NOPS (tree110);
+               STRIP_NOPS (tree111);
+               if (TREE_CODE (tree110) == NEGATE_EXPR
+                   && TREE_CODE (tree111) == INTEGER_CST
+                   && 0 == compare_tree_int (tree111,
+                                             element_precision (rtype) - 1)
+                   && operand_equal_p (tree01, TREE_OPERAND (tree110, 0), 0))
+                 {
+                   tem = build2_loc (loc, (code0 == LSHIFT_EXPR
+                                           ? LROTATE_EXPR : RROTATE_EXPR),
+                                     rtype, TREE_OPERAND (arg0, 0),
+                                     orig_tree01);
+                   return fold_convert_loc (loc, type, tem);
+                 }
              }
          }
       }
index 0a8246f..ff6d743 100644 (file)
@@ -1,6 +1,9 @@
 2017-10-13  Jakub Jelinek  <jakub@redhat.com>
 
        PR target/82498
+       * gcc.dg/tree-ssa/pr82498.c: New test.
+
+       PR target/82498
        * gcc.dg/ubsan/pr82498.c: New test.
 
 2017-10-13  Richard Biener  <rguenther@suse.de>
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr82498.c b/gcc/testsuite/gcc.dg/tree-ssa/pr82498.c
new file mode 100644 (file)
index 0000000..19a42f0
--- /dev/null
@@ -0,0 +1,53 @@
+/* PR target/82498 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-original" } */
+/* { dg-final { scan-tree-dump-times "x r<< y" 4 "original" { target int32 } } } */
+/* { dg-final { scan-tree-dump-times "x r>> y" 4 "original" { target int32 } } } */
+
+unsigned
+f1 (unsigned x, int y)
+{
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f2 (unsigned x, int y)
+{
+  return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned
+f3 (unsigned x, int y)
+{
+  return (x >> y) | (x << (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f4 (unsigned x, int y)
+{
+  return (x >> y) | (x << (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned
+f5 (unsigned x, int y)
+{
+  return (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y)) | (x << y);
+}
+
+unsigned
+f6 (unsigned x, int y)
+{
+  return (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (x << y);
+}
+
+unsigned
+f7 (unsigned x, int y)
+{
+  return (x << (__CHAR_BIT__ * __SIZEOF_INT__ - y)) | (x >> y);
+}
+
+unsigned
+f8 (unsigned x, int y)
+{
+  return (x << (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (x >> y);
+}