From 386b15d6ef2db6bcca1369650f5456728e2d42f1 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 13 Jan 2022 19:11:41 +0100 Subject: [PATCH] i386: Cleanup V2QI arithmetic instructions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit 2022-01-13 Uroš Bizjak gcc/ChangeLog: * config/i386/mmx.md (negv2qi): Disparage GPR alternative a bit. Disable for TARGET_PARTIAL_REG_STALL unless optimizing for size. (negv2qi splitters): Use lowpart_subreg instead of gen_lowpart to create subreg. (v2qi3): Disparage GPR alternative a bit. Disable for TARGET_PARTIAL_REG_STALL unless optimizing for size. (v2qi3 splitters): Use lowpart_subreg instead of gen_lowpart to create subreg. * config/i386/i386.md (*subqi_ext_2): Move. --- gcc/config/i386/i386.md | 48 +++++++++++++++++++++--------------------- gcc/config/i386/mmx.md | 56 ++++++++++++++++++++++++------------------------- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9937643..bcaaa49 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -6905,6 +6905,30 @@ [(set_attr "type" "alu") (set_attr "mode" "SI")]) +(define_insn "*subqi_ext_2" + [(set (zero_extract:SWI248 + (match_operand:SWI248 0 "register_operand" "+Q") + (const_int 8) + (const_int 8)) + (subreg:SWI248 + (minus:QI + (subreg:QI + (zero_extract:SWI248 + (match_operand:SWI248 1 "register_operand" "0") + (const_int 8) + (const_int 8)) 0) + (subreg:QI + (zero_extract:SWI248 + (match_operand:SWI248 2 "register_operand" "Q") + (const_int 8) + (const_int 8)) 0)) 0)) + (clobber (reg:CC FLAGS_REG))] + "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ + rtx_equal_p (operands[0], operands[1])" + "sub{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + ;; Subtract with jump on overflow. (define_expand "subv4" [(parallel [(set (reg:CCO FLAGS_REG) @@ -6932,30 +6956,6 @@ operands[4] = gen_rtx_SIGN_EXTEND (mode, operands[2]); }) -(define_insn "*subqi_ext_2" - [(set (zero_extract:SWI248 - (match_operand:SWI248 0 "register_operand" "+Q") - (const_int 8) - (const_int 8)) - (subreg:SWI248 - (minus:QI - (subreg:QI - (zero_extract:SWI248 - (match_operand:SWI248 1 "register_operand" "0") - (const_int 8) - (const_int 8)) 0) - (subreg:QI - (zero_extract:SWI248 - (match_operand:SWI248 2 "register_operand" "Q") - (const_int 8) - (const_int 8)) 0)) 0)) - (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" - "sub{b}\t{%h2, %h0|%h0, %h2}" - [(set_attr "type" "alu") - (set_attr "mode" "QI")]) - (define_insn "*subv4" [(set (reg:CCO FLAGS_REG) (eq:CCO (minus: diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 295a132..3d99a5e 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1633,12 +1633,20 @@ "TARGET_MMX_WITH_SSE" "operands[2] = force_reg (mode, CONST0_RTX (mode));") +(define_expand "neg2" + [(set (match_operand:VI_32 0 "register_operand") + (minus:VI_32 + (match_dup 2) + (match_operand:VI_32 1 "register_operand")))] + "TARGET_SSE2" + "operands[2] = force_reg (mode, CONST0_RTX (mode));") + (define_insn "negv2qi2" [(set (match_operand:V2QI 0 "register_operand" "=?Q,&Yw") (neg:V2QI (match_operand:V2QI 1 "register_operand" "0,Yw"))) (clobber (reg:CC FLAGS_REG))] - "" + "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "#" [(set_attr "isa" "*,sse2") (set_attr "type" "multi") @@ -1664,10 +1672,10 @@ (const_int 8)) 0)) 0)) (clobber (reg:CC FLAGS_REG))])] { - operands[3] = gen_lowpart (HImode, operands[1]); - operands[2] = gen_lowpart (HImode, operands[0]); - operands[1] = gen_lowpart (QImode, operands[1]); - operands[0] = gen_lowpart (QImode, operands[0]); + operands[3] = lowpart_subreg (HImode, operands[1], V2QImode); + operands[2] = lowpart_subreg (HImode, operands[0], V2QImode); + operands[1] = lowpart_subreg (QImode, operands[1], V2QImode); + operands[0] = lowpart_subreg (QImode, operands[0], V2QImode); }) (define_split @@ -1678,11 +1686,11 @@ "reload_completed" [(set (match_dup 0) (match_dup 2)) (set (match_dup 0) - (minus:V4QI (match_dup 0) (match_dup 1)))] + (minus:V16QI (match_dup 0) (match_dup 1)))] { - operands[2] = CONST0_RTX (V4QImode); - operands[1] = gen_lowpart (V4QImode, operands[1]); - operands[0] = gen_lowpart (V4QImode, operands[0]); + operands[2] = CONST0_RTX (V16QImode); + operands[1] = lowpart_subreg (V16QImode, operands[1], V2QImode); + operands[0] = lowpart_subreg (V16QImode, operands[0], V2QImode); }) (define_expand "mmx_3" @@ -1718,14 +1726,6 @@ (set_attr "type" "mmxadd,sseadd,sseadd") (set_attr "mode" "DI,TI,TI")]) -(define_expand "neg2" - [(set (match_operand:VI_32 0 "register_operand") - (minus:VI_32 - (match_dup 2) - (match_operand:VI_32 1 "register_operand")))] - "TARGET_SSE2" - "operands[2] = force_reg (mode, CONST0_RTX (mode));") - (define_insn "3" [(set (match_operand:VI_32 0 "register_operand" "=x,Yw") (plusminus:VI_32 @@ -1745,7 +1745,7 @@ (match_operand:V2QI 1 "register_operand" "0,0,Yw") (match_operand:V2QI 2 "register_operand" "Q,x,Yw"))) (clobber (reg:CC FLAGS_REG))] - "" + "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "#" [(set_attr "isa" "*,sse2_noavx,avx") (set_attr "type" "multi,sseadd,sseadd") @@ -1776,12 +1776,12 @@ (const_int 8)) 0)) 0)) (clobber (reg:CC FLAGS_REG))])] { - operands[5] = gen_lowpart (HImode, operands[2]); - operands[4] = gen_lowpart (HImode, operands[1]); - operands[3] = gen_lowpart (HImode, operands[0]); - operands[2] = gen_lowpart (QImode, operands[2]); - operands[1] = gen_lowpart (QImode, operands[1]); - operands[0] = gen_lowpart (QImode, operands[0]); + operands[5] = lowpart_subreg (HImode, operands[2], V2QImode); + operands[4] = lowpart_subreg (HImode, operands[1], V2QImode); + operands[3] = lowpart_subreg (HImode, operands[0], V2QImode); + operands[2] = lowpart_subreg (QImode, operands[2], V2QImode); + operands[1] = lowpart_subreg (QImode, operands[1], V2QImode); + operands[0] = lowpart_subreg (QImode, operands[0], V2QImode); }) (define_split @@ -1792,11 +1792,11 @@ (clobber (reg:CC FLAGS_REG))] "TARGET_SSE2 && reload_completed" [(set (match_dup 0) - (plusminus:V4QI (match_dup 1) (match_dup 2)))] + (plusminus:V16QI (match_dup 1) (match_dup 2)))] { - operands[2] = gen_lowpart (V4QImode, operands[2]); - operands[1] = gen_lowpart (V4QImode, operands[1]); - operands[0] = gen_lowpart (V4QImode, operands[0]); + operands[2] = lowpart_subreg (V16QImode, operands[2], V2QImode); + operands[1] = lowpart_subreg (V16QImode, operands[1], V2QImode); + operands[0] = lowpart_subreg (V16QImode, operands[0], V2QImode); }) (define_expand "mmx_3" -- 2.7.4