x86_64: Avoid rorx rotation instructions with -Os.
authorRoger Sayle <roger@nextmovesoftware.com>
Tue, 16 Nov 2021 08:55:21 +0000 (08:55 +0000)
committerRoger Sayle <roger@nextmovesoftware.com>
Tue, 16 Nov 2021 08:55:21 +0000 (08:55 +0000)
This patch teaches the i386 backend to avoid using BMI2's rorx
instructions when optimizing for size.  The benefits are shown
with the following example:

unsigned int ror1(unsigned int x) { return (x >> 1) | (x << 31); }
unsigned int ror2(unsigned int x) { return (x >> 2) | (x << 30); }
unsigned int rol2(unsigned int x) { return (x >> 30) | (x << 2); }
unsigned int rol1(unsigned int x) { return (x >> 31) | (x << 1); }

which currently with -Os -march=cascadelake generates:

ror1: rorx    $1, %edi, %eax // 6 bytes
        ret
ror2: rorx    $2, %edi, %eax // 6 bytes
        ret
rol2: rorx    $30, %edi, %eax // 6 bytes
        ret
rol1: rorx    $31, %edi, %eax // 6 bytes
        ret

but with this patch now generates:

ror1: movl    %edi, %eax // 2 bytes
        rorl    %eax // 2 bytes
        ret
ror2: movl    %edi, %eax // 2 bytes
        rorl    $2, %eax // 3 bytes
        ret
rol2: movl    %edi, %eax // 2 bytes
        roll    $2, %eax // 3 bytes
        ret
rol1: movl    %edi, %eax // 2 bytes
        roll    %eax // 2 bytes
        ret

I've confirmed that this patch is a win on the CSiBE benchmark,
even though rotations are rare, where for example libmspack/test/md5.o
shrinks from 5824 bytes to 5632 bytes.

2021-11-16  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
* config/i386/i386.md (*bmi2_rorx<mode3>_1): Make conditional
on !optimize_function_for_size_p.
(*<any_rotate><mode>3_1): Add preferred_for_size attribute.
(define_splits): Conditionalize on !optimize_function_for_size_p.
(*bmi2_rorxsi3_1_zext): Likewise.
(*<any_rotate>si2_1_zext): Add preferred_for_size attribute.
(define_splits): Conditionalize on !optimize_function_for_size_p.

gcc/config/i386/i386.md

index 6eb9de8..7394906 100644 (file)
        (rotatert:SWI48
          (match_operand:SWI48 1 "nonimmediate_operand" "rm")
          (match_operand:QI 2 "<rorx_immediate_operand>" "<S>")))]
-  "TARGET_BMI2"
+  "TARGET_BMI2 && !optimize_function_for_size_p (cfun)"
   "rorx\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "rotatex")
    (set_attr "mode" "<MODE>")])
 }
   [(set_attr "isa" "*,bmi2")
    (set_attr "type" "rotate,rotatex")
+   (set (attr "preferred_for_size")
+     (cond [(eq_attr "alternative" "0")
+             (symbol_ref "true")]
+          (symbol_ref "false")))
    (set (attr "length_immediate")
      (if_then_else
        (and (eq_attr "type" "rotate")
        (rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
                      (match_operand:QI 2 "const_int_operand")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_BMI2 && reload_completed"
+  "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)"
   [(set (match_dup 0)
        (rotatert:SWI48 (match_dup 1) (match_dup 2)))]
 {
        (rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
                        (match_operand:QI 2 "const_int_operand")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_BMI2 && reload_completed"
+  "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)"
   [(set (match_dup 0)
        (rotatert:SWI48 (match_dup 1) (match_dup 2)))])
 
        (zero_extend:DI
          (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
                       (match_operand:QI 2 "const_0_to_31_operand" "I"))))]
-  "TARGET_64BIT && TARGET_BMI2"
+  "TARGET_64BIT && TARGET_BMI2 && !optimize_function_for_size_p (cfun)"
   "rorx\t{%2, %1, %k0|%k0, %1, %2}"
   [(set_attr "type" "rotatex")
    (set_attr "mode" "SI")])
 }
   [(set_attr "isa" "*,bmi2")
    (set_attr "type" "rotate,rotatex")
+   (set (attr "preferred_for_size")
+     (cond [(eq_attr "alternative" "0")
+             (symbol_ref "true")]
+          (symbol_ref "false")))
    (set (attr "length_immediate")
      (if_then_else
        (and (eq_attr "type" "rotate")
          (rotate:SI (match_operand:SI 1 "nonimmediate_operand")
                     (match_operand:QI 2 "const_int_operand"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed
+   && !optimize_function_for_size_p (cfun)"
   [(set (match_dup 0)
        (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]
 {
          (rotatert:SI (match_operand:SI 1 "nonimmediate_operand")
                       (match_operand:QI 2 "const_int_operand"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed
+   && !optimize_function_for_size_p (cfun)"
   [(set (match_dup 0)
        (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])