i386: Cleanup V2QI arithmetic instructions
authorUros Bizjak <ubizjak@gmail.com>
Thu, 13 Jan 2022 18:11:41 +0000 (19:11 +0100)
committerUros Bizjak <ubizjak@gmail.com>
Thu, 13 Jan 2022 18:12:41 +0000 (19:12 +0100)
2022-01-13  Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:

* config/i386/mmx.md (negv2qi): Disparage GPR alternative a bit.
Disable for TARGET_PARTIAL_REG_STALL unless optimizing for size.
(negv2qi splitters): Use lowpart_subreg instead of
gen_lowpart to create subreg.
(<plusminus:insn>v2qi3): Disparage GPR alternative a bit.
Disable for TARGET_PARTIAL_REG_STALL unless optimizing for size.
(<plusminus:insn>v2qi3 splitters): Use lowpart_subreg instead of
gen_lowpart to create subreg.
* config/i386/i386.md (*subqi_ext<mode>_2): Move.

gcc/config/i386/i386.md
gcc/config/i386/mmx.md

index 9937643..bcaaa49 100644 (file)
   [(set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
+(define_insn "*subqi_ext<mode>_2"
+  [(set (zero_extract:SWI248
+         (match_operand:SWI248 0 "register_operand" "+Q")
+         (const_int 8)
+         (const_int 8))
+       (subreg:SWI248
+         (minus:QI
+           (subreg:QI
+             (zero_extract:SWI248
+               (match_operand:SWI248 1 "register_operand" "0")
+               (const_int 8)
+               (const_int 8)) 0)
+           (subreg:QI
+             (zero_extract:SWI248
+               (match_operand:SWI248 2 "register_operand" "Q")
+               (const_int 8)
+               (const_int 8)) 0)) 0))
+  (clobber (reg:CC FLAGS_REG))]
+  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   rtx_equal_p (operands[0], operands[1])"
+  "sub{b}\t{%h2, %h0|%h0, %h2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
 ;; Subtract with jump on overflow.
 (define_expand "subv<mode>4"
   [(parallel [(set (reg:CCO FLAGS_REG)
     operands[4] = gen_rtx_SIGN_EXTEND (<DPWI>mode, operands[2]);
 })
 
-(define_insn "*subqi_ext<mode>_2"
-  [(set (zero_extract:SWI248
-         (match_operand:SWI248 0 "register_operand" "+Q")
-         (const_int 8)
-         (const_int 8))
-       (subreg:SWI248
-         (minus:QI
-           (subreg:QI
-             (zero_extract:SWI248
-               (match_operand:SWI248 1 "register_operand" "0")
-               (const_int 8)
-               (const_int 8)) 0)
-           (subreg:QI
-             (zero_extract:SWI248
-               (match_operand:SWI248 2 "register_operand" "Q")
-               (const_int 8)
-               (const_int 8)) 0)) 0))
-  (clobber (reg:CC FLAGS_REG))]
-  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
-   rtx_equal_p (operands[0], operands[1])"
-  "sub{b}\t{%h2, %h0|%h0, %h2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "QI")])
-
 (define_insn "*subv<mode>4"
   [(set (reg:CCO FLAGS_REG)
        (eq:CCO (minus:<DWI>
index 295a132..3d99a5e 100644 (file)
   "TARGET_MMX_WITH_SSE"
   "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
 
+(define_expand "neg<mode>2"
+  [(set (match_operand:VI_32 0 "register_operand")
+       (minus:VI_32
+         (match_dup 2)
+         (match_operand:VI_32 1 "register_operand")))]
+  "TARGET_SSE2"
+  "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
+
 (define_insn "negv2qi2"
   [(set (match_operand:V2QI 0 "register_operand" "=?Q,&Yw")
         (neg:V2QI
          (match_operand:V2QI 1 "register_operand" "0,Yw")))
    (clobber (reg:CC FLAGS_REG))]
-  ""
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
   "#"
   [(set_attr "isa" "*,sse2")
    (set_attr "type" "multi")
                                  (const_int 8)) 0)) 0))
       (clobber (reg:CC FLAGS_REG))])]
 {
-  operands[3] = gen_lowpart (HImode, operands[1]);
-  operands[2] = gen_lowpart (HImode, operands[0]);
-  operands[1] = gen_lowpart (QImode, operands[1]);
-  operands[0] = gen_lowpart (QImode, operands[0]);
+  operands[3] = lowpart_subreg (HImode, operands[1], V2QImode);
+  operands[2] = lowpart_subreg (HImode, operands[0], V2QImode);
+  operands[1] = lowpart_subreg (QImode, operands[1], V2QImode);
+  operands[0] = lowpart_subreg (QImode, operands[0], V2QImode);
 })
 
 (define_split
   "reload_completed"
   [(set (match_dup 0) (match_dup 2))
    (set (match_dup 0)
-       (minus:V4QI (match_dup 0) (match_dup 1)))]
+       (minus:V16QI (match_dup 0) (match_dup 1)))]
 {
-  operands[2] = CONST0_RTX (V4QImode);
-  operands[1] = gen_lowpart (V4QImode, operands[1]);
-  operands[0] = gen_lowpart (V4QImode, operands[0]);
+  operands[2] = CONST0_RTX (V16QImode);
+  operands[1] = lowpart_subreg (V16QImode, operands[1], V2QImode);
+  operands[0] = lowpart_subreg (V16QImode, operands[0], V2QImode);
 })
 
 (define_expand "mmx_<insn><mode>3"
    (set_attr "type" "mmxadd,sseadd,sseadd")
    (set_attr "mode" "DI,TI,TI")])
 
-(define_expand "neg<mode>2"
-  [(set (match_operand:VI_32 0 "register_operand")
-       (minus:VI_32
-         (match_dup 2)
-         (match_operand:VI_32 1 "register_operand")))]
-  "TARGET_SSE2"
-  "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
-
 (define_insn "<insn><mode>3"
   [(set (match_operand:VI_32 0 "register_operand" "=x,Yw")
         (plusminus:VI_32
          (match_operand:V2QI 1 "register_operand" "<comm>0,0,Yw")
          (match_operand:V2QI 2 "register_operand" "Q,x,Yw")))
    (clobber (reg:CC FLAGS_REG))]
-  ""
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
   "#"
   [(set_attr "isa" "*,sse2_noavx,avx")
    (set_attr "type" "multi,sseadd,sseadd")
                                  (const_int 8)) 0)) 0))
       (clobber (reg:CC FLAGS_REG))])]
 {
-  operands[5] = gen_lowpart (HImode, operands[2]);
-  operands[4] = gen_lowpart (HImode, operands[1]);
-  operands[3] = gen_lowpart (HImode, operands[0]);
-  operands[2] = gen_lowpart (QImode, operands[2]);
-  operands[1] = gen_lowpart (QImode, operands[1]);
-  operands[0] = gen_lowpart (QImode, operands[0]);
+  operands[5] = lowpart_subreg (HImode, operands[2], V2QImode);
+  operands[4] = lowpart_subreg (HImode, operands[1], V2QImode);
+  operands[3] = lowpart_subreg (HImode, operands[0], V2QImode);
+  operands[2] = lowpart_subreg (QImode, operands[2], V2QImode);
+  operands[1] = lowpart_subreg (QImode, operands[1], V2QImode);
+  operands[0] = lowpart_subreg (QImode, operands[0], V2QImode);
 })
 
 (define_split
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_SSE2 && reload_completed"
   [(set (match_dup 0)
-        (plusminus:V4QI (match_dup 1) (match_dup 2)))]
+        (plusminus:V16QI (match_dup 1) (match_dup 2)))]
 {
-  operands[2] = gen_lowpart (V4QImode, operands[2]);
-  operands[1] = gen_lowpart (V4QImode, operands[1]);
-  operands[0] = gen_lowpart (V4QImode, operands[0]);
+  operands[2] = lowpart_subreg (V16QImode, operands[2], V2QImode);
+  operands[1] = lowpart_subreg (V16QImode, operands[1], V2QImode);
+  operands[0] = lowpart_subreg (V16QImode, operands[0], V2QImode);
 })
 
 (define_expand "mmx_<insn><mode>3"