i386.md (*movti_internal_rex64): Avoid MOVAPS size optimization for TARGET_AVX.
authorUros Bizjak <ubizjak@gmail.com>
Thu, 10 May 2012 23:27:55 +0000 (01:27 +0200)
committerUros Bizjak <uros@gcc.gnu.org>
Thu, 10 May 2012 23:27:55 +0000 (01:27 +0200)
* config/i386/i386.md (*movti_internal_rex64): Avoid MOVAPS size
optimization for TARGET_AVX.
(*movti_internal_sse): Ditto.
(*movdi_internal_rex64): Handle TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL.
(*movdi_internal): Ditto.
(*movsi_internal): Ditto.
(*movtf_internal): Avoid MOVAPS size optimization for TARGET_AVX.
(*movdf_internal_rex64): Ditto.
(*movfd_internal): Ditto.
(*movsf_internal): Ditto.
* config/i386/sse.md (mov<mode>): Handle TARGET_SSE_LOAD0_BY_PXOR.

From-SVN: r187386

gcc/ChangeLog
gcc/config/i386/i386.md
gcc/config/i386/sse.md

index 2a27835..6eb4d87 100644 (file)
@@ -1,3 +1,17 @@
+2012-05-11  Uros Bizjak  <ubizjak@gmail.com>
+
+       * config/i386/i386.md (*movti_internal_rex64): Avoid MOVAPS size
+       optimization for TARGET_AVX.
+       (*movti_internal_sse): Ditto.
+       (*movdi_internal_rex64): Handle TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL.
+       (*movdi_internal): Ditto.
+       (*movsi_internal): Ditto.
+       (*movtf_internal): Avoid MOVAPS size optimization for TARGET_AVX.
+       (*movdf_internal_rex64): Ditto.
+       (*movfd_internal): Ditto.
+       (*movsf_internal): Ditto.
+       * config/i386/sse.md (mov<mode>): Handle TARGET_SSE_LOAD0_BY_PXOR.
+
 2012-05-10  Eric Botcazou  <ebotcazou@adacore.com>
 
        * dwarf2out.c (add_byte_size_attribute) <RECORD_TYPE>: Handle variable
index 4a4a481..44eafe0 100644 (file)
    (set (attr "mode")
        (cond [(eq_attr "alternative" "0,1")
                 (const_string "DI")
-              (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-                   (match_test "optimize_function_for_size_p (cfun)"))
+              (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
                 (const_string "V4SF")
               (and (eq_attr "alternative" "4")
                    (match_test "TARGET_SSE_TYPELESS_STORES"))
                 (const_string "V4SF")
+              (match_test "TARGET_AVX")
+                (const_string "TI")
+              (match_test "optimize_function_for_size_p (cfun)")
+                (const_string "V4SF")
               ]
               (const_string "TI")))])
 
   [(set_attr "type" "sselog1,ssemov,ssemov")
    (set_attr "prefix" "maybe_vex")
    (set (attr "mode")
-       (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-                   (match_test "optimize_function_for_size_p (cfun)"))
+       (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
                 (const_string "V4SF")
               (and (eq_attr "alternative" "2")
                    (match_test "TARGET_SSE_TYPELESS_STORES"))
                 (const_string "V4SF")
-              (not (match_test "TARGET_SSE2"))
+              (match_test "TARGET_AVX")
+                (const_string "TI")
+              (ior (not (match_test "TARGET_SSE2"))
+                   (match_test "optimize_function_for_size_p (cfun)"))
                 (const_string "V4SF")
              ]
              (const_string "TI")))])
        return "movdq2q\t{%1, %0|%0, %1}";
 
     case TYPE_SSEMOV:
-      if (get_attr_mode (insn) == MODE_TI)
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "%vmovaps\t{%1, %0|%0, %1}";
+      else if (get_attr_mode (insn) == MODE_TI)
        return "%vmovdqa\t{%1, %0|%0, %1}";
+
       /* Handle broken assemblers that require movd instead of movq.  */
       if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))
        return "%vmovd\t{%1, %0|%0, %1}";
      (if_then_else (eq_attr "alternative" "10,11,12,13,14,15")
        (const_string "maybe_vex")
        (const_string "orig")))
-   (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,TI,DI,TI,DI,DI,DI,DI,DI")])
+   (set (attr "mode")
+       (cond [(eq_attr "alternative" "0,4")
+                 (const_string "SI")
+              (eq_attr "alternative" "10,12")
+                 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+                          (const_string "V4SF")
+                        (match_test "TARGET_AVX")
+                          (const_string "TI")
+                        (match_test "optimize_function_for_size_p (cfun)")
+                          (const_string "V4SF")
+                       ]
+                       (const_string "TI"))
+             ]
+             (const_string "DI")))])
 
 ;; Reload patterns to support multi-word load/store
 ;; with non-offsetable address.
        case MODE_DI:
           return "%vmovq\t{%1, %0|%0, %1}";
        case MODE_V4SF:
-         return "movaps\t{%1, %0|%0, %1}";
+         return "%vmovaps\t{%1, %0|%0, %1}";
        case MODE_V2SF:
          return "movlps\t{%1, %0|%0, %1}";
        default:
      (if_then_else (eq_attr "alternative" "5,6,7,8")
        (const_string "maybe_vex")
        (const_string "orig")))
-   (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF,DI,DI")])
+   (set (attr "mode")
+       (cond [(eq_attr "alternative" "9,11")
+                 (const_string "V4SF")
+              (eq_attr "alternative" "10,12")
+                 (const_string "V2SF")
+              (eq_attr "alternative" "5,7")
+                 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+                          (const_string "V4SF")
+                        (match_test "TARGET_AVX")
+                          (const_string "TI")
+                        (match_test "optimize_function_for_size_p (cfun)")
+                          (const_string "V4SF")
+                       ]
+                       (const_string "TI"))
+             ]
+             (const_string "DI")))])
 
 (define_split
   [(set (match_operand:DI 0 "nonimmediate_operand")
      (cond [(eq_attr "alternative" "2,3")
              (const_string "DI")
            (eq_attr "alternative" "6,7")
-             (if_then_else
-               (not (match_test "TARGET_SSE2"))
-               (const_string "V4SF")
-               (const_string "TI"))
+             (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+                      (const_string "V4SF")
+                    (match_test "TARGET_AVX")
+                      (const_string "TI")
+                    (ior (not (match_test "TARGET_SSE2"))
+                         (match_test "optimize_function_for_size_p (cfun)"))
+                      (const_string "V4SF")
+                   ]
+                   (const_string "TI"))
            (and (eq_attr "alternative" "8,9,10,11")
                 (not (match_test "TARGET_SSE2")))
              (const_string "SF")
    (set (attr "mode")
         (cond [(eq_attr "alternative" "3,4")
                 (const_string "DI")
-              (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-                   (match_test "optimize_function_for_size_p (cfun)"))
+              (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
                 (const_string "V4SF")
               (and (eq_attr "alternative" "2")
                    (match_test "TARGET_SSE_TYPELESS_STORES"))
                 (const_string "V4SF")
+              (match_test "TARGET_AVX")
+                (const_string "TI")
+              (match_test "optimize_function_for_size_p (cfun)")
+                (const_string "V4SF")
               ]
               (const_string "TI")))])
 
               (eq_attr "alternative" "3,4,5,6,11,12")
                 (const_string "DI")
 
-              /* xorps is one byte shorter.  */
+              /* xorps is one byte shorter for !TARGET_AVX.  */
               (eq_attr "alternative" "7")
-                (cond [(match_test "optimize_function_for_size_p (cfun)")
+                (cond [(match_test "TARGET_AVX")
+                         (const_string "V2DF")
+                       (match_test "optimize_function_for_size_p (cfun)")
                          (const_string "V4SF")
                        (match_test "TARGET_SSE_LOAD0_BY_PXOR")
                          (const_string "TI")
                  whole SSE registers use APD move to break dependency
                  chains, otherwise use short move to avoid extra work.
 
-                 movaps encodes one byte shorter.  */
+                 movaps encodes one byte shorter for !TARGET_AVX.  */
               (eq_attr "alternative" "8")
-                (cond
-                  [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-                        (match_test "optimize_function_for_size_p (cfun)"))
-                     (const_string "V4SF")
-                   (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
-                     (const_string "V2DF")
+                (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+                         (const_string "V4SF")
+                       (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+                         (const_string "V2DF")
+                       (match_test "TARGET_AVX")
+                         (const_string "DF")
+                       (match_test "optimize_function_for_size_p (cfun)")
+                         (const_string "V4SF")
                   ]
                   (const_string "DF"))
               /* For architectures resolving dependencies on register
                   (const_string "V4SF")
                   (const_string "V2SF"))
 
-              /* xorps is one byte shorter.  */
+              /* xorps is one byte shorter for !TARGET_AVX.  */
               (eq_attr "alternative" "5,9")
-                (cond [(match_test "optimize_function_for_size_p (cfun)")
+                (cond [(match_test "TARGET_AVX")
+                         (const_string "V2DF")
+                       (match_test "optimize_function_for_size_p (cfun)")
                          (const_string "V4SF")
                        (match_test "TARGET_SSE_LOAD0_BY_PXOR")
                          (const_string "TI")
                  whole SSE registers use APD move to break dependency
                  chains, otherwise use short move to avoid extra work.
 
-                 movaps encodes one byte shorter.  */
+                 movaps encodes one byte shorter for !TARGET_AVX.  */
               (eq_attr "alternative" "6,10")
-                (cond
-                  [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-                        (match_test "optimize_function_for_size_p (cfun)"))
-                     (const_string "V4SF")
-                   (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
-                     (const_string "V2DF")
+                (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+                         (const_string "V4SF")
+                       (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+                         (const_string "V2DF")
+                       (match_test "TARGET_AVX")
+                         (const_string "DF")
+                       (match_test "optimize_function_for_size_p (cfun)")
+                         (const_string "V4SF")
                   ]
                   (const_string "DF"))
+
               /* For architectures resolving dependencies on register
                  parts we may avoid extra work to zero out upper part
                  of register.  */
         (cond [(eq_attr "alternative" "3,4,9,10")
                 (const_string "SI")
               (eq_attr "alternative" "5")
-                (if_then_else
-                  (and (and (match_test "TARGET_SSE_LOAD0_BY_PXOR")
-                            (match_test "TARGET_SSE2"))
-                       (not (match_test "optimize_function_for_size_p (cfun)")))
-                  (const_string "TI")
-                  (const_string "V4SF"))
+                (cond [(match_test "TARGET_AVX")
+                         (const_string "V4SF")
+                       (ior (not (match_test "TARGET_SSE2"))
+                            (match_test "optimize_function_for_size_p (cfun)"))
+                         (const_string "V4SF")
+                       (match_test "TARGET_SSE_LOAD0_BY_PXOR")
+                         (const_string "TI")
+                      ]
+                      (const_string "V4SF"))
+
               /* For architectures resolving dependencies on
                  whole SSE registers use APS move to break dependency
                  chains, otherwise use short move to avoid extra work.
index a014f05..d4b3daa 100644 (file)
               (ior (not (match_test "TARGET_SSE2"))
                    (match_test "optimize_function_for_size_p (cfun)"))
                 (const_string "V4SF")
+              (and (eq_attr "alternative" "0")
+                   (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
+                (const_string "TI")
              ]
              (const_string "<sseinsnmode>")))])