From c57b4c2208960c3b57abe148a5e9767f8f27747c Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Sun, 5 Jan 2020 13:52:24 +0100 Subject: [PATCH] re PR target/93141 (Missed optimization : Use of adc when checking overflow) PR target/93141 * config/i386/i386.md (SWIDWI): New mode iterator. (DWI, dwi): Add TImode variants. (addv4): Use SWIDWI iterator instead of SWI. Use instead of . Use CONST_SCALAR_INT_P instead of CONST_INT_P. (*addv4_1): Rename to ... (addv4_1): ... this. (QWI): New mode attribute. (*addv4_doubleword, *addv4_doubleword_1): New define_insn_and_split patterns. (*addv4_overflow_1, *addv4_overflow_2): New define_insn patterns. (uaddv4): Use SWIDWI iterator instead of SWI. Use instead of . (*addcarry_1): New define_insn. (*add3_doubleword_cc_overflow_1): New define_insn_and_split. * gcc.target/i386/pr93141-1.c: New test. * gcc.dg/pr67089-6.c: Expect 16 ADD_OVERFLOW calls even on ia32. From-SVN: r279887 --- gcc/ChangeLog | 20 ++ gcc/config/i386/i386.md | 295 ++++++++++++++++++++++++++++-- gcc/testsuite/ChangeLog | 4 + gcc/testsuite/gcc.dg/pr67089-6.c | 3 +- gcc/testsuite/gcc.target/i386/pr93141-1.c | 83 +++++++++ 5 files changed, 384 insertions(+), 21 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr93141-1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 181584a..e0a623e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2020-01-05 Jakub Jelinek + + PR target/93141 + * config/i386/i386.md (SWIDWI): New mode iterator. + (DWI, dwi): Add TImode variants. + (addv4): Use SWIDWI iterator instead of SWI. Use + instead of . Use + CONST_SCALAR_INT_P instead of CONST_INT_P. + (*addv4_1): Rename to ... + (addv4_1): ... this. + (QWI): New mode attribute. + (*addv4_doubleword, *addv4_doubleword_1): New + define_insn_and_split patterns. + (*addv4_overflow_1, *addv4_overflow_2): New define_insn + patterns. + (uaddv4): Use SWIDWI iterator instead of SWI. Use + instead of . + (*addcarry_1): New define_insn. + (*add3_doubleword_cc_overflow_1): New define_insn_and_split. + 2020-01-03 Konstantin Kharlamov * gdbinit.in (pr, prl, pt, pct, pgg, pgq, pgs, pge, pmz, pdd, pbs, pbm): diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 01c7d65..50fb99a 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1036,6 +1036,9 @@ (define_mode_iterator DWI [(DI "!TARGET_64BIT") (TI "TARGET_64BIT")]) +;; SWI and DWI together. +(define_mode_iterator SWIDWI [QI HI SI DI (TI "TARGET_64BIT")]) + ;; GET_MODE_SIZE for selected modes. As GET_MODE_SIZE is not ;; compile time constant, it is faster to use than ;; GET_MODE_SIZE (mode). For XFmode which depends on @@ -1051,8 +1054,8 @@ (V4SF "16") (V8SF "32") (V16SF "64")]) ;; Double word integer modes as mode attribute. -(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI")]) -(define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti")]) +(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")]) +(define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti") (TI "oi")]) ;; LEA mode corresponding to an integer mode (define_mode_attr LEAMODE [(QI "SI") (HI "SI") (SI "SI") (DI "DI")]) @@ -6054,16 +6057,17 @@ ;; Add with jump on overflow. (define_expand "addv4" [(parallel [(set (reg:CCO FLAGS_REG) - (eq:CCO (plus: - (sign_extend: - (match_operand:SWI 1 "nonimmediate_operand")) - (match_dup 4)) - (sign_extend: - (plus:SWI (match_dup 1) - (match_operand:SWI 2 - ""))))) - (set (match_operand:SWI 0 "register_operand") - (plus:SWI (match_dup 1) (match_dup 2)))]) + (eq:CCO + (plus: + (sign_extend: + (match_operand:SWIDWI 1 "nonimmediate_operand")) + (match_dup 4)) + (sign_extend: + (plus:SWIDWI (match_dup 1) + (match_operand:SWIDWI 2 + ""))))) + (set (match_operand:SWIDWI 0 "register_operand") + (plus:SWIDWI (match_dup 1) (match_dup 2)))]) (set (pc) (if_then_else (eq (reg:CCO FLAGS_REG) (const_int 0)) (label_ref (match_operand 3)) @@ -6071,7 +6075,7 @@ "" { ix86_fixup_binary_operands_no_copy (PLUS, mode, operands); - if (CONST_INT_P (operands[2])) + if (CONST_SCALAR_INT_P (operands[2])) operands[4] = operands[2]; else operands[4] = gen_rtx_SIGN_EXTEND (mode, operands[2]); @@ -6093,7 +6097,7 @@ [(set_attr "type" "alu") (set_attr "mode" "")]) -(define_insn "*addv4_1" +(define_insn "addv4_1" [(set (reg:CCO FLAGS_REG) (eq:CCO (plus: (sign_extend: @@ -6118,15 +6122,178 @@ (const_string "4")] (const_string "")))]) +;; Quad word integer modes as mode attribute. +(define_mode_attr QWI [(SI "TI") (DI "OI")]) + +(define_insn_and_split "*addv4_doubleword" + [(set (reg:CCO FLAGS_REG) + (eq:CCO + (plus: + (sign_extend: + (match_operand: 1 "nonimmediate_operand" "%0,0")) + (sign_extend: + (match_operand: 2 "x86_64_hilo_general_operand" "r,o"))) + (sign_extend: + (plus: (match_dup 1) (match_dup 2))))) + (set (match_operand: 0 "nonimmediate_operand" "=ro,r") + (plus: (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, mode, operands)" + "#" + "reload_completed" + [(parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:DWIH (match_dup 1) (match_dup 2)) + (match_dup 1))) + (set (match_dup 0) + (plus:DWIH (match_dup 1) (match_dup 2)))]) + (parallel [(set (reg:CCO FLAGS_REG) + (eq:CCO + (plus: + (plus: + (ltu: (reg:CC FLAGS_REG) (const_int 0)) + (sign_extend: (match_dup 4))) + (sign_extend: (match_dup 5))) + (sign_extend: + (plus:DWIH + (plus:DWIH + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5))))) + (set (match_dup 3) + (plus:DWIH + (plus:DWIH + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5)))])] +{ + split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); +}) + +(define_insn_and_split "*addv4_doubleword_1" + [(set (reg:CCO FLAGS_REG) + (eq:CCO + (plus: + (sign_extend: + (match_operand: 1 "nonimmediate_operand" "%0")) + (match_operand: 3 "const_scalar_int_operand" "")) + (sign_extend: + (plus: + (match_dup 1) + (match_operand: 2 "x86_64_hilo_general_operand" ""))))) + (set (match_operand: 0 "nonimmediate_operand" "=ro") + (plus: (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, mode, operands) + && CONST_SCALAR_INT_P (operands[2]) + && rtx_equal_p (operands[2], operands[3])" + "#" + "reload_completed" + [(parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:DWIH (match_dup 1) (match_dup 2)) + (match_dup 1))) + (set (match_dup 0) + (plus:DWIH (match_dup 1) (match_dup 2)))]) + (parallel [(set (reg:CCO FLAGS_REG) + (eq:CCO + (plus: + (plus: + (ltu: (reg:CC FLAGS_REG) (const_int 0)) + (sign_extend: (match_dup 4))) + (match_dup 5)) + (sign_extend: + (plus:DWIH + (plus:DWIH + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5))))) + (set (match_dup 3) + (plus:DWIH + (plus:DWIH + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5)))])] +{ + split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); + if (operands[2] == const0_rtx) + { + emit_insn (gen_addv4_1 (operands[3], operands[4], operands[5], + operands[5])); + DONE; + } +}) + +(define_insn "*addv4_overflow_1" + [(set (reg:CCO FLAGS_REG) + (eq:CCO + (plus: + (plus: + (match_operator: 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (sign_extend: + (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))) + (sign_extend: + (match_operand:SWI 2 "" "rWe,m"))) + (sign_extend: + (plus:SWI + (plus:SWI + (match_operator:SWI 5 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 2))))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r") + (plus:SWI + (plus:SWI + (match_op_dup 5 [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, mode, operands)" + "adc{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +(define_insn "*addv4_overflow_2" + [(set (reg:CCO FLAGS_REG) + (eq:CCO + (plus: + (plus: + (match_operator: 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (sign_extend: + (match_operand:SWI 1 "nonimmediate_operand" "%0"))) + (match_operand: 6 "const_int_operand" "")) + (sign_extend: + (plus:SWI + (plus:SWI + (match_operator:SWI 5 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_operand:SWI 2 "x86_64_immediate_operand" "e"))))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=rm") + (plus:SWI + (plus:SWI + (match_op_dup 5 [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, mode, operands) + && CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == INTVAL (operands[6])" + "adc{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "") + (set (attr "length_immediate") + (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") + (const_string "1") + (const_string "4")))]) + (define_expand "uaddv4" [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC - (plus:SWI - (match_operand:SWI 1 "nonimmediate_operand") - (match_operand:SWI 2 "")) + (plus:SWIDWI + (match_operand:SWIDWI 1 "nonimmediate_operand") + (match_operand:SWIDWI 2 "")) (match_dup 1))) - (set (match_operand:SWI 0 "register_operand") - (plus:SWI (match_dup 1) (match_dup 2)))]) + (set (match_operand:SWIDWI 0 "register_operand") + (plus:SWIDWI (match_dup 1) (match_dup 2)))]) (set (pc) (if_then_else (ltu (reg:CCC FLAGS_REG) (const_int 0)) (label_ref (match_operand 3)) @@ -6649,6 +6816,48 @@ (plus:SWI48 (match_dup 1) (match_dup 2)))])] "ix86_binary_operator_ok (PLUS, mode, operands)") +(define_insn "*addcarry_1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend: + (plus:SWI48 + (plus:SWI48 + (match_operator:SWI48 5 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (match_operand:SWI48 1 "nonimmediate_operand" "%0")) + (match_operand:SWI48 2 "x86_64_immediate_operand" "e"))) + (plus: + (match_operand: 6 "const_scalar_int_operand" "") + (match_operator: 4 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)])))) + (set (match_operand:SWI48 0 "register_operand" "=r") + (plus:SWI48 (plus:SWI48 (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, mode, operands) + && CONST_INT_P (operands[2]) + /* Check that operands[6] is operands[2] zero extended from + mode to mode. */ + && ((mode == SImode || INTVAL (operands[2]) >= 0) + ? (CONST_INT_P (operands[6]) + && UINTVAL (operands[6]) == (UINTVAL (operands[2]) + & GET_MODE_MASK (mode))) + : (CONST_WIDE_INT_P (operands[6]) + && CONST_WIDE_INT_NUNITS (operands[6]) == 2 + && ((unsigned HOST_WIDE_INT) CONST_WIDE_INT_ELT (operands[6], 0) + == UINTVAL (operands[2])) + && CONST_WIDE_INT_ELT (operands[6], 1) == 0))" + "adc{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "") + (set (attr "length_immediate") + (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") + (const_string "1") + (const_string "4")))]) + (define_insn "@sub3_carry" [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,") (minus:SWI @@ -6885,6 +7094,54 @@ [(set_attr "type" "alu") (set_attr "mode" "SI")]) +(define_insn_and_split "*add3_doubleword_cc_overflow_1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus: + (match_operand: 1 "nonimmediate_operand" "%0,0") + (match_operand: 2 "x86_64_hilo_general_operand" "r,o")) + (match_dup 1))) + (set (match_operand: 0 "nonimmediate_operand" "=ro,r") + (plus: (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, mode, operands)" + "#" + "reload_completed" + [(parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:DWIH (match_dup 1) (match_dup 2)) + (match_dup 1))) + (set (match_dup 0) + (plus:DWIH (match_dup 1) (match_dup 2)))]) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend: + (plus:DWIH + (plus:DWIH + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5))) + (plus: + (match_dup 6) + (ltu: (reg:CC FLAGS_REG) (const_int 0))))) + (set (match_dup 3) + (plus:DWIH + (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5)))])] +{ + split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); + if (operands[2] == const0_rtx) + { + emit_insn (gen_addcarry_0 (operands[3], operands[4], operands[5])); + DONE; + } + if (CONST_INT_P (operands[5])) + operands[6] = simplify_unary_operation (ZERO_EXTEND, mode, + operands[5], mode); + else + operands[6] = gen_rtx_ZERO_EXTEND (mode, operands[5]); +}) + ;; x == 0 with zero flag test can be done also as x < 1U with carry flag ;; test, where the latter is preferrable if we have some carry consuming ;; instruction. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c3f541b..76a25cc 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,9 @@ 2020-01-05 Jakub Jelinek + PR target/93141 + * gcc.target/i386/pr93141-1.c: New test. + * gcc.dg/pr67089-6.c: Expect 16 ADD_OVERFLOW calls even on ia32. + PR c++/93138 * g++.dg/warn/Wredundant-tags-2.C: New test. diff --git a/gcc/testsuite/gcc.dg/pr67089-6.c b/gcc/testsuite/gcc.dg/pr67089-6.c index 9b0708b..db2ffdf 100644 --- a/gcc/testsuite/gcc.dg/pr67089-6.c +++ b/gcc/testsuite/gcc.dg/pr67089-6.c @@ -56,7 +56,6 @@ T (24, unsigned long long, x + y, if (d || y > r) foo (0)) T (25, unsigned short, 2U - x, if (r > 2U) foo (0)) T (26, unsigned char, 2U - x, if (r <= 2U) foo (0)) -/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 16 "widening_mul" { target { { i?86-*-* x86_64-*-* } && { ! ia32 } } } } } */ +/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 16 "widening_mul" { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 11 "widening_mul" { target { { i?86-*-* x86_64-*-* } && { ! ia32 } } } } } */ -/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 12 "widening_mul" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ /* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 9 "widening_mul" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr93141-1.c b/gcc/testsuite/gcc.target/i386/pr93141-1.c new file mode 100644 index 0000000..64e4a10 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr93141-1.c @@ -0,0 +1,83 @@ +/* PR target/93141 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-final { scan-assembler-not "cmp\[lq]\t" } } */ +/* { dg-final { scan-assembler-times "setc\t%" 3 } } */ +/* { dg-final { scan-assembler-times "seto\t%" 5 } } */ +/* { dg-final { scan-assembler-times "adc\[lq]\t" 5 } } */ + +#ifdef __x86_64__ +typedef unsigned __int128 U; +typedef signed __int128 S; +#else +typedef unsigned long long U; +typedef signed long long S; +#endif +int o; + +U +foo (U x, U y) +{ + U z; + o = __builtin_add_overflow (x, y, &z); + return z; +} + +U +bar (U x) +{ + U z; + o = __builtin_add_overflow (x, ((U) 0xdeadbee) << (sizeof (U) * __CHAR_BIT__ / 2), &z); + return z; +} + +U +baz (U x) +{ + U z; + o = __builtin_add_overflow (x, (((U) 0xdeadbee) << (sizeof (U) * __CHAR_BIT__ / 2)) + | (U) 0xbeedead, &z); + return z; +} + +S +qux (S x, S y) +{ + S z; + o = __builtin_add_overflow (x, y, &z); + return z; +} + +S +quux (S x) +{ + S z; + o = __builtin_add_overflow (x, ((S) 0xdeadbee) << (sizeof (S) * __CHAR_BIT__ / 2), &z); + return z; +} + +S +corge (S x) +{ + S z; + o = __builtin_add_overflow (x, (((S) 0xdeadbee) << (sizeof (S) * __CHAR_BIT__ / 2)) + | (S) 0xbeedead, &z); + return z; +} + +S +grault (S x) +{ + S z; + o = __builtin_add_overflow (x, -((S) 0xdeadbee) << (sizeof (S) * __CHAR_BIT__ / 2), &z); + return z; +} + +S +garply (S x) +{ + S z; + o = __builtin_add_overflow (x, (-(((S) 0xdeadbee) << (sizeof (S) * __CHAR_BIT__ / 2))) + | (S) 0xbeedead, &z); + return z; +} -- 2.7.4