xtensa: Improve shift operations more
authorTakayuki 'January June' Suwa <jjsuwa_sys3175@yahoo.co.jp>
Mon, 13 Jun 2022 16:38:31 +0000 (01:38 +0900)
committerMax Filippov <jcmvbkbc@gmail.com>
Tue, 14 Jun 2022 00:25:48 +0000 (17:25 -0700)
This patch introduces funnel shifter utilization, and rearranges existing
"per-byte shift" insn patterns.

gcc/ChangeLog:

* config/xtensa/predicates.md (logical_shift_operator,
xtensa_shift_per_byte_operator): New predicates.
* config/xtensa/xtensa-protos.h (xtensa_shlrd_which_direction):
New prototype.
* config/xtensa/xtensa.cc (xtensa_shlrd_which_direction):
New helper function for funnel shift patterns.
* config/xtensa/xtensa.md (ior_op): New code iterator.
(*ashlsi3_1): Replace with new split pattern.
(*shift_per_byte): Unify *ashlsi3_3x, *ashrsi3_3x and *lshrsi3_3x.
(*shift_per_byte_omit_AND_0, *shift_per_byte_omit_AND_1):
New insn-and-split patterns that redirect to *xtensa_shift_per_byte,
in order to omit unnecessary bitwise AND operation.
(*shlrd_reg_<code>, *shlrd_const_<code>, *shlrd_per_byte_<code>,
*shlrd_per_byte_<code>_omit_AND):
New insn patterns for funnel shifts.

gcc/testsuite/ChangeLog:

* gcc.target/xtensa/funnel_shifter.c: New.

gcc/config/xtensa/predicates.md
gcc/config/xtensa/xtensa-protos.h
gcc/config/xtensa/xtensa.cc
gcc/config/xtensa/xtensa.md
gcc/testsuite/gcc.target/xtensa/funnel_shifter.c [new file with mode: 0644]

index a912e6d..bcc83ad 100644 (file)
 (define_predicate "boolean_operator"
   (match_code "eq,ne"))
 
+(define_predicate "logical_shift_operator"
+  (match_code "ashift,lshiftrt"))
+
 (define_predicate "xtensa_cstoresi_operator"
   (match_code "eq,ne,gt,ge,lt,le"))
 
+(define_predicate "xtensa_shift_per_byte_operator"
+  (match_code "ashift,ashiftrt,lshiftrt"))
+
 (define_predicate "tls_symbol_operand"
   (and (match_code "symbol_ref")
        (match_test "SYMBOL_REF_TLS_MODEL (op) != 0")))
index c2fd750..2c08ed4 100644 (file)
@@ -56,6 +56,7 @@ extern char *xtensa_emit_bit_branch (bool, bool, rtx *);
 extern char *xtensa_emit_movcc (bool, bool, bool, rtx *);
 extern char *xtensa_emit_call (int, rtx *);
 extern bool xtensa_tls_referenced_p (rtx);
+extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, int);
index 3477e98..df78af6 100644 (file)
@@ -2403,6 +2403,20 @@ xtensa_tls_referenced_p (rtx x)
 }
 
 
+/* Helper function for "*shlrd_..." patterns.  */
+
+enum rtx_code
+xtensa_shlrd_which_direction (rtx op0, rtx op1)
+{
+  if (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
+    return ASHIFT;     /* shld  */
+  if (GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT)
+    return LSHIFTRT;   /* shrd  */
+
+  return UNKNOWN;
+}
+
+
 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
 
 static bool
index d806d43..cd7ded0 100644 (file)
@@ -83,6 +83,9 @@
 ;; the same template.
 (define_mode_iterator HQI [HI QI])
 
+;; This code iterator is for *shlrd and its variants.
+(define_code_iterator ior_op [ior plus])
+
 \f
 ;; Attributes.
 
   operands[1] = xtensa_copy_incoming_a7 (operands[1]);
 })
 
-(define_insn "*ashlsi3_1"
-  [(set (match_operand:SI 0 "register_operand" "=a")
-       (ashift:SI (match_operand:SI 1 "register_operand" "r")
-                  (const_int 1)))]
-  "TARGET_DENSITY"
-  "add.n\t%0, %1, %1"
-  [(set_attr "type"    "arith")
-   (set_attr "mode"    "SI")
-   (set_attr "length"  "2")])
-
 (define_insn "ashlsi3_internal"
   [(set (match_operand:SI 0 "register_operand" "=a,a")
        (ashift:SI (match_operand:SI 1 "register_operand" "r,r")
    (set_attr "mode"    "SI")
    (set_attr "length"  "3,6")])
 
-(define_insn "*ashlsi3_3x"
-  [(set (match_operand:SI 0 "register_operand" "=a")
-       (ashift:SI (match_operand:SI 1 "register_operand" "r")
-                  (ashift:SI (match_operand:SI 2 "register_operand" "r")
-                             (const_int 3))))]
-  ""
-  "ssa8b\t%2\;sll\t%0, %1"
-  [(set_attr "type"    "arith")
-   (set_attr "mode"    "SI")
-   (set_attr "length"  "6")])
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+       (ashift:SI (match_operand:SI 1 "register_operand")
+                  (const_int 1)))]
+  "TARGET_DENSITY"
+  [(set (match_dup 0)
+       (plus:SI (match_dup 1)
+                (match_dup 1)))])
 
 (define_insn "ashrsi3"
   [(set (match_operand:SI 0 "register_operand" "=a,a")
    (set_attr "mode"    "SI")
    (set_attr "length"  "3,6")])
 
-(define_insn "*ashrsi3_3x"
-  [(set (match_operand:SI 0 "register_operand" "=a")
-       (ashiftrt:SI (match_operand:SI 1 "register_operand" "r")
-                    (ashift:SI (match_operand:SI 2 "register_operand" "r")
-                               (const_int 3))))]
-  ""
-  "ssa8l\t%2\;sra\t%0, %1"
-  [(set_attr "type"    "arith")
-   (set_attr "mode"    "SI")
-   (set_attr "length"  "6")])
-
 (define_insn "lshrsi3"
   [(set (match_operand:SI 0 "register_operand" "=a,a")
        (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
   if (which_alternative == 0)
     {
       if ((INTVAL (operands[2]) & 0x1f) < 16)
-        return "srli\t%0, %1, %R2";
+       return "srli\t%0, %1, %R2";
       else
-       return "extui\t%0, %1, %R2, %L2";
+       return "extui\t%0, %1, %R2, %L2";
     }
   return "ssr\t%2\;srl\t%0, %1";
 }
    (set_attr "mode"    "SI")
    (set_attr "length"  "3,6")])
 
-(define_insn "*lshrsi3_3x"
+(define_insn "*shift_per_byte"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+       (match_operator:SI 3 "xtensa_shift_per_byte_operator"
+               [(match_operand:SI 1 "register_operand" "r")
+                (ashift:SI (match_operand:SI 2 "register_operand" "r")
+                           (const_int 3))]))]
+  "!optimize_debug && optimize"
+{
+  switch (GET_CODE (operands[3]))
+    {
+    case ASHIFT:       return "ssa8b\t%2\;sll\t%0, %1";
+    case ASHIFTRT:     return "ssa8l\t%2\;sra\t%0, %1";
+    case LSHIFTRT:     return "ssa8l\t%2\;srl\t%0, %1";
+    default:           gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "arith")
+   (set_attr "mode"    "SI")
+   (set_attr "length"  "6")])
+
+(define_insn_and_split "*shift_per_byte_omit_AND_0"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+       (match_operator:SI 4 "xtensa_shift_per_byte_operator"
+               [(match_operand:SI 1 "register_operand" "r")
+                (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r")
+                                   (const_int 3))
+                        (match_operand:SI 3 "const_int_operand" "i"))]))]
+  "!optimize_debug && optimize
+   && (INTVAL (operands[3]) & 0x1f) == 3 << 3"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (match_op_dup 4
+               [(match_dup 1)
+                (ashift:SI (match_dup 2)
+                           (const_int 3))]))]
+  ""
+  [(set_attr "type"    "arith")
+   (set_attr "mode"    "SI")
+   (set_attr "length"  "6")])
+
+(define_insn_and_split "*shift_per_byte_omit_AND_1"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+       (match_operator:SI 4 "xtensa_shift_per_byte_operator"
+               [(match_operand:SI 1 "register_operand" "r")
+                (neg:SI (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r")
+                                           (const_int 3))
+                                (match_operand:SI 3 "const_int_operand" "i")))]))]
+  "!optimize_debug && optimize
+   && (INTVAL (operands[3]) & 0x1f) == 3 << 3"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 5)
+       (neg:SI (match_dup 2)))
+   (set (match_dup 0)
+       (match_op_dup 4
+               [(match_dup 1)
+                (ashift:SI (match_dup 5)
+                           (const_int 3))]))]
+{
+  operands[5] = gen_reg_rtx (SImode);
+}
+  [(set_attr "type"    "arith")
+   (set_attr "mode"    "SI")
+   (set_attr "length"  "9")])
+
+(define_insn "*shlrd_reg_<code>"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+       (ior_op:SI (match_operator:SI 4 "logical_shift_operator"
+                       [(match_operand:SI 1 "register_operand" "r")
+                        (match_operand:SI 2 "register_operand" "r")])
+                  (match_operator:SI 5 "logical_shift_operator"
+                       [(match_operand:SI 3 "register_operand" "r")
+                        (neg:SI (match_dup 2))])))]
+  "!optimize_debug && optimize
+   && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN"
+{
+  switch (xtensa_shlrd_which_direction (operands[4], operands[5]))
+    {
+    case ASHIFT:       return "ssl\t%2\;src\t%0, %1, %3";
+    case LSHIFTRT:     return "ssr\t%2\;src\t%0, %3, %1";
+    default:           gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "arith")
+   (set_attr "mode"    "SI")
+   (set_attr "length"  "6")])
+
+(define_insn "*shlrd_const_<code>"
   [(set (match_operand:SI 0 "register_operand" "=a")
-       (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
-                    (ashift:SI (match_operand:SI 2 "register_operand" "r")
-                               (const_int 3))))]
+       (ior_op:SI (match_operator:SI 5 "logical_shift_operator"
+                       [(match_operand:SI 1 "register_operand" "r")
+                        (match_operand:SI 3 "const_int_operand" "i")])
+                  (match_operator:SI 6 "logical_shift_operator"
+                       [(match_operand:SI 2 "register_operand" "r")
+                        (match_operand:SI 4 "const_int_operand" "i")])))]
+  "!optimize_debug && optimize
+   && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN
+   && IN_RANGE (INTVAL (operands[3]), 1, 31)
+   && IN_RANGE (INTVAL (operands[4]), 1, 31)
+   && INTVAL (operands[3]) + INTVAL (operands[4]) == 32"
+{
+  switch (xtensa_shlrd_which_direction (operands[5], operands[6]))
+    {
+    case ASHIFT:       return "ssai\t%L3\;src\t%0, %1, %2";
+    case LSHIFTRT:     return "ssai\t%R3\;src\t%0, %2, %1";
+    default:           gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "arith")
+   (set_attr "mode"    "SI")
+   (set_attr "length"  "6")])
+
+(define_insn "*shlrd_per_byte_<code>"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+       (ior_op:SI (match_operator:SI 4 "logical_shift_operator"
+                       [(match_operand:SI 1 "register_operand" "r")
+                        (ashift:SI (match_operand:SI 2 "register_operand" "r")
+                                   (const_int 3))])
+                  (match_operator:SI 5 "logical_shift_operator"
+                       [(match_operand:SI 3 "register_operand" "r")
+                        (neg:SI (ashift:SI (match_dup 2)
+                                           (const_int 3)))])))]
+  "!optimize_debug && optimize
+   && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN"
+{
+  switch (xtensa_shlrd_which_direction (operands[4], operands[5]))
+    {
+    case ASHIFT:       return "ssa8b\t%2\;src\t%0, %1, %3";
+    case LSHIFTRT:     return "ssa8l\t%2\;src\t%0, %3, %1";
+    default:           gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "arith")
+   (set_attr "mode"    "SI")
+   (set_attr "length"  "6")])
+
+(define_insn_and_split "*shlrd_per_byte_<code>_omit_AND"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+       (ior_op:SI (match_operator:SI 5 "logical_shift_operator"
+                       [(match_operand:SI 1 "register_operand" "r")
+                        (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r")
+                                           (const_int 3))
+                                (match_operand:SI 4 "const_int_operand" "i"))])
+                  (match_operator:SI 6 "logical_shift_operator"
+                       [(match_operand:SI 3 "register_operand" "r")
+                        (neg:SI (and:SI (ashift:SI (match_dup 2)
+                                                   (const_int 3))
+                                        (match_dup 4)))])))]
+  "!optimize_debug && optimize
+   && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN
+   && (INTVAL (operands[4]) & 0x1f) == 3 << 3"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (ior_op:SI (match_op_dup 5
+                       [(match_dup 1)
+                        (ashift:SI (match_dup 2)
+                                   (const_int 3))])
+                  (match_op_dup 6
+                       [(match_dup 3)
+                        (neg:SI (ashift:SI (match_dup 2)
+                                           (const_int 3)))])))]
   ""
-  "ssa8l\t%2\;srl\t%0, %1"
   [(set_attr "type"    "arith")
    (set_attr "mode"    "SI")
    (set_attr "length"  "6")])
diff --git a/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c
new file mode 100644 (file)
index 0000000..c8f987c
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned int test_0(const void *addr)
+{
+  unsigned int n = (unsigned int)addr;
+  const unsigned int *a = (const unsigned int*)(n & ~3);
+  n = (n & 3) * 8;
+  return (a[0] >> n) | (a[1] << (32 - n));
+}
+
+unsigned int test_1(unsigned int a, unsigned int b)
+{
+  return (a >> 16) + (b << 16);
+}
+
+/* { dg-final { scan-assembler-times "src" 2 } } */