the division and modulo and if it emits any library calls or any
{,U}{DIV,MOD} rtxes throw it away and use a divmod optab or
divmod libcall. */
- struct separate_ops ops;
- ops.code = TRUNC_DIV_EXPR;
- ops.type = type;
- ops.op0 = make_tree (ops.type, op0);
- ops.op1 = arg1;
- ops.op2 = NULL_TREE;
- ops.location = gimple_location (call_stmt);
- start_sequence ();
- quotient = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
- if (contains_call_div_mod (get_insns ()))
- quotient = NULL_RTX;
- else
+ scalar_int_mode int_mode;
+ if (remainder == NULL_RTX
+ && optimize
+ && CONST_INT_P (op1)
+ && !pow2p_hwi (INTVAL (op1))
+ && is_int_mode (TYPE_MODE (type), &int_mode)
+ && GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
+ && optab_handler (and_optab, word_mode) != CODE_FOR_nothing
+ && optab_handler (add_optab, word_mode) != CODE_FOR_nothing
+ && optimize_insn_for_speed_p ())
+ {
+ rtx_insn *last = get_last_insn ();
+ remainder = NULL_RTX;
+ quotient = expand_doubleword_divmod (int_mode, op0, op1, &remainder,
+ TYPE_UNSIGNED (type));
+ if (quotient != NULL_RTX)
+ {
+ if (optab_handler (mov_optab, int_mode) != CODE_FOR_nothing)
+ {
+ rtx_insn *move = emit_move_insn (quotient, quotient);
+ set_dst_reg_note (move, REG_EQUAL,
+ gen_rtx_fmt_ee (TYPE_UNSIGNED (type)
+ ? UDIV : DIV, int_mode,
+ copy_rtx (op0), op1),
+ quotient);
+ move = emit_move_insn (remainder, remainder);
+ set_dst_reg_note (move, REG_EQUAL,
+ gen_rtx_fmt_ee (TYPE_UNSIGNED (type)
+ ? UMOD : MOD, int_mode,
+ copy_rtx (op0), op1),
+ quotient);
+ }
+ }
+ else
+ delete_insns_since (last);
+ }
+
+ if (remainder == NULL_RTX)
{
- ops.code = TRUNC_MOD_EXPR;
- remainder = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
+ struct separate_ops ops;
+ ops.code = TRUNC_DIV_EXPR;
+ ops.type = type;
+ ops.op0 = make_tree (ops.type, op0);
+ ops.op1 = arg1;
+ ops.op2 = NULL_TREE;
+ ops.location = gimple_location (call_stmt);
+ start_sequence ();
+ quotient = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
if (contains_call_div_mod (get_insns ()))
- remainder = NULL_RTX;
+ quotient = NULL_RTX;
+ else
+ {
+ ops.code = TRUNC_MOD_EXPR;
+ remainder = expand_expr_real_2 (&ops, NULL_RTX, mode,
+ EXPAND_NORMAL);
+ if (contains_call_div_mod (get_insns ()))
+ remainder = NULL_RTX;
+ }
+ if (remainder)
+ insns = get_insns ();
+ end_sequence ();
}
- if (remainder)
- insns = get_insns ();
- end_sequence ();
}
if (remainder)
}
return NULL_RTX;
}
+
+/* Similarly to the above function, but compute both quotient and remainder.
+ Quotient can be computed from the remainder as:
+ rem = op0 % op1; // Handled using expand_doubleword_mod
+ quot = (op0 - rem) * inv; // inv is multiplicative inverse of op1 modulo
+ // 2 * BITS_PER_WORD
+
+ We can also handle cases where op1 is a multiple of power of two constant
+ and constant handled by expand_doubleword_mod.
+ op11 = 1 << __builtin_ctz (op1);
+ op12 = op1 / op11;
+ rem1 = op0 % op12; // Handled using expand_doubleword_mod
+ quot1 = (op0 - rem1) * inv; // inv is multiplicative inverse of op12 modulo
+ // 2 * BITS_PER_WORD
+ rem = (quot1 % op11) * op12 + rem1;
+ quot = quot1 / op11; */
+
+rtx
+expand_doubleword_divmod (machine_mode mode, rtx op0, rtx op1, rtx *rem,
+ bool unsignedp)
+{
+ *rem = NULL_RTX;
+
+ /* Negative dividend should have been optimized into positive,
+ similarly modulo by 1 and modulo by power of two is optimized
+ differently too. */
+ if (INTVAL (op1) <= 1 || pow2p_hwi (INTVAL (op1)))
+ return NULL_RTX;
+
+ rtx op11 = const1_rtx;
+ rtx op12 = op1;
+ if ((INTVAL (op1) & 1) == 0)
+ {
+ int bit = ctz_hwi (INTVAL (op1));
+ op11 = GEN_INT (HOST_WIDE_INT_1 << bit);
+ op12 = GEN_INT (INTVAL (op1) >> bit);
+ }
+
+ rtx rem1 = expand_doubleword_mod (mode, op0, op12, unsignedp);
+ if (rem1 == NULL_RTX)
+ return NULL_RTX;
+
+ int prec = 2 * BITS_PER_WORD;
+ wide_int a = wide_int::from (INTVAL (op12), prec + 1, UNSIGNED);
+ wide_int b = wi::shifted_mask (prec, 1, false, prec + 1);
+ wide_int m = wide_int::from (wi::mod_inv (a, b), prec, UNSIGNED);
+ rtx inv = immed_wide_int_const (m, mode);
+
+ rtx_insn *last = get_last_insn ();
+ rtx quot1 = expand_simple_binop (mode, MINUS, op0, rem1,
+ NULL_RTX, unsignedp, OPTAB_DIRECT);
+ if (quot1 == NULL_RTX)
+ return NULL_RTX;
+
+ quot1 = expand_simple_binop (mode, MULT, quot1, inv,
+ NULL_RTX, unsignedp, OPTAB_DIRECT);
+ if (quot1 == NULL_RTX)
+ return NULL_RTX;
+
+ if (op11 != const1_rtx)
+ {
+ rtx rem2 = expand_divmod (1, TRUNC_MOD_EXPR, mode, quot1, op11,
+ NULL_RTX, unsignedp);
+ if (rem2 == NULL_RTX)
+ return NULL_RTX;
+
+ rem2 = expand_simple_binop (mode, MULT, rem2, op12, NULL_RTX,
+ unsignedp, OPTAB_DIRECT);
+ if (rem2 == NULL_RTX)
+ return NULL_RTX;
+
+ rem2 = expand_simple_binop (mode, PLUS, rem2, rem1, NULL_RTX,
+ unsignedp, OPTAB_DIRECT);
+ if (rem2 == NULL_RTX)
+ return NULL_RTX;
+
+ rtx quot2 = expand_divmod (0, TRUNC_DIV_EXPR, mode, quot1, op11,
+ NULL_RTX, unsignedp);
+ if (quot2 == NULL_RTX)
+ return NULL_RTX;
+
+ rem1 = rem2;
+ quot1 = quot2;
+ }
+
+ /* Punt if we need any library calls. */
+ for (; last; last = NEXT_INSN (last))
+ if (CALL_P (last))
+ return NULL_RTX;
+
+ *rem = rem1;
+ return quot1;
+}
\f
/* Wrapper around expand_binop which takes an rtx code to specify
the operation to perform, not an optab pointer. All other
}
/* Attempt to synthetize double word modulo by constant divisor. */
- if ((binoptab == umod_optab || binoptab == smod_optab)
+ if ((binoptab == umod_optab
+ || binoptab == smod_optab
+ || binoptab == udiv_optab
+ || binoptab == sdiv_optab)
&& optimize
&& CONST_INT_P (op1)
&& is_int_mode (mode, &int_mode)
&& optab_handler (add_optab, word_mode) != CODE_FOR_nothing
&& optimize_insn_for_speed_p ())
{
- rtx remainder = expand_doubleword_mod (int_mode, op0, op1,
- binoptab == umod_optab);
- if (remainder != NULL_RTX)
+ rtx res = NULL_RTX;
+ if ((binoptab == umod_optab || binoptab == smod_optab)
+ && (INTVAL (op1) & 1) == 0)
+ res = expand_doubleword_mod (int_mode, op0, op1,
+ binoptab == umod_optab);
+ else
+ {
+ rtx quot = expand_doubleword_divmod (int_mode, op0, op1, &res,
+ binoptab == umod_optab
+ || binoptab == udiv_optab);
+ if (quot == NULL_RTX)
+ res = NULL_RTX;
+ else if (binoptab == udiv_optab || binoptab == sdiv_optab)
+ res = quot;
+ }
+ if (res != NULL_RTX)
{
if (optab_handler (mov_optab, int_mode) != CODE_FOR_nothing)
{
- rtx_insn *move = emit_move_insn (target ? target : remainder,
- remainder);
- set_dst_reg_note (move,
- REG_EQUAL,
- gen_rtx_fmt_ee (UMOD, int_mode,
- copy_rtx (op0), op1),
- target ? target : remainder);
+ rtx_insn *move = emit_move_insn (target ? target : res,
+ res);
+ set_dst_reg_note (move, REG_EQUAL,
+ gen_rtx_fmt_ee (optab_to_code (binoptab),
+ int_mode, copy_rtx (op0), op1),
+ target ? target : res);
}
- return remainder;
+ return res;
}
else
delete_insns_since (last);
enum optab_methods);
extern rtx expand_vector_broadcast (machine_mode, rtx);
+extern rtx expand_doubleword_divmod (machine_mode, rtx, rtx, rtx *, bool);
+
/* Generate code for a simple binary or unary operation. "Simple" in
this case means "can be unambiguously described by a (mode, code)
pair and mapped to a single optab." */
#define C3(n) C2(n##0) C2(n##4) C2(n##9)
#define C4(n) C3(n##0) C3(n##3) C3(n##7)
#endif
-#define TESTS C4(1)
+#define TESTS C4(1) C1(10010) C1(10012) C1(16144)
TESTS
#define C3(n) C2(n##0) C2(n##4) C2(n##9)
#define C4(n) C3(n##0) C3(n##3) C3(n##7)
#endif
-#define TESTS C4(1)
+#define TESTS C4(1) C1(10010) C1(10012) C1(16144)
TESTS
--- /dev/null
+/* PR rtl-optimization/97459 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-DEXPENSIVE" { target run_expensive_tests } } */
+
+#ifdef __SIZEOF_INT128__
+typedef __uint128_t T;
+#else
+typedef unsigned long long T;
+#endif
+
+T __attribute__((noipa)) foo (T x, T n) { return x / n; }
+#define C(n) T __attribute__((noipa)) foo##n (T x) { return x / (n - 10000); }
+
+#define C1(n) C(n##1) C(n##3) C(n##5) C(n##7) C(n##9)
+#define C2(n) C1(n##0) C1(n##1) C1(n##2) C1(n##3) C1(n##4) \
+ C1(n##5) C1(n##6) C1(n##7) C1(n##8) C1(n##9)
+#ifdef EXPENSIVE
+#define C3(n) C2(n##0) C2(n##1) C2(n##2) C2(n##3) C2(n##4) \
+ C2(n##5) C2(n##6) C2(n##7) C2(n##8) C2(n##9)
+#define C4(n) C3(n##0) C3(n##1) C3(n##2) C3(n##3) C3(n##4) \
+ C3(n##5) C3(n##6) C3(n##7) C3(n##8) C3(n##9)
+#else
+#define C3(n) C2(n##0) C2(n##4) C2(n##9)
+#define C4(n) C3(n##0) C3(n##3) C3(n##7)
+#endif
+#define TESTS C4(1) C1(10010) C1(10012) C1(16144)
+
+TESTS
+
+struct S { T x; T (*foo) (T); };
+
+#undef C
+#define C(n) { n - 10000, foo##n },
+
+struct S tests[] = {
+TESTS
+ { 0, 0 }
+};
+
+int
+main ()
+{
+ int i, j, k;
+ for (k = 0; tests[k].x; k++)
+ for (i = 0; i < sizeof (T) * __CHAR_BIT__; i++)
+ for (j = -5; j <= 5; j++)
+ {
+ T x = ((T) 1 << i) + j;
+ if (foo (x, tests[k].x) != tests[k].foo (x))
+ __builtin_abort ();
+ }
+ return 0;
+}
--- /dev/null
+/* PR rtl-optimization/97459 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-DEXPENSIVE" { target run_expensive_tests } } */
+
+#ifdef __SIZEOF_INT128__
+typedef __int128_t T;
+typedef __uint128_t U;
+#else
+typedef long long T;
+typedef unsigned long long U;
+#endif
+
+T __attribute__((noipa)) foo (T x, T n) { return x / n; }
+#define C(n) T __attribute__((noipa)) foo##n (T x) { return x / (n - 10000); }
+
+#define C1(n) C(n##1) C(n##3) C(n##5) C(n##7) C(n##9)
+#define C2(n) C1(n##0) C1(n##1) C1(n##2) C1(n##3) C1(n##4) \
+ C1(n##5) C1(n##6) C1(n##7) C1(n##8) C1(n##9)
+#ifdef EXPENSIVE
+#define C3(n) C2(n##0) C2(n##1) C2(n##2) C2(n##3) C2(n##4) \
+ C2(n##5) C2(n##6) C2(n##7) C2(n##8) C2(n##9)
+#define C4(n) C3(n##0) C3(n##1) C3(n##2) C3(n##3) C3(n##4) \
+ C3(n##5) C3(n##6) C3(n##7) C3(n##8) C3(n##9)
+#else
+#define C3(n) C2(n##0) C2(n##4) C2(n##9)
+#define C4(n) C3(n##0) C3(n##3) C3(n##7)
+#endif
+#define TESTS C4(1) C1(10010) C1(10012) C1(16144)
+
+TESTS
+
+struct S { T x; T (*foo) (T); };
+
+#undef C
+#define C(n) { n - 10000, foo##n },
+
+struct S tests[] = {
+TESTS
+ { 0, 0 }
+};
+
+int
+main ()
+{
+ int i, j, k;
+ for (k = 0; tests[k].x; k++)
+ for (i = 0; i < sizeof (T) * __CHAR_BIT__; i++)
+ for (j = -5; j <= 5; j++)
+ {
+ U x = ((U) 1 << i) + j;
+ if (foo ((T) x, tests[k].x) != tests[k].foo ((T) x)
+ || foo ((T) -x, tests[k].x) != tests[k].foo ((T) -x))
+ __builtin_abort ();
+ }
+ return 0;
+}
--- /dev/null
+/* PR rtl-optimization/97459 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-DEXPENSIVE" { target run_expensive_tests } } */
+
+#ifdef __SIZEOF_INT128__
+typedef __uint128_t T;
+#else
+typedef unsigned long long T;
+#endif
+
+T __attribute__((noipa)) foo (T x, T n, T *r) { *r = x % n; return x / n; }
+#define C(n) T __attribute__((noipa)) foo##n (T x, T *r) { *r = x % (n - 10000); return x / (n - 10000); }
+
+#define C1(n) C(n##1) C(n##3) C(n##5) C(n##7) C(n##9)
+#define C2(n) C1(n##0) C1(n##1) C1(n##2) C1(n##3) C1(n##4) \
+ C1(n##5) C1(n##6) C1(n##7) C1(n##8) C1(n##9)
+#ifdef EXPENSIVE
+#define C3(n) C2(n##0) C2(n##1) C2(n##2) C2(n##3) C2(n##4) \
+ C2(n##5) C2(n##6) C2(n##7) C2(n##8) C2(n##9)
+#define C4(n) C3(n##0) C3(n##1) C3(n##2) C3(n##3) C3(n##4) \
+ C3(n##5) C3(n##6) C3(n##7) C3(n##8) C3(n##9)
+#else
+#define C3(n) C2(n##0) C2(n##4) C2(n##9)
+#define C4(n) C3(n##0) C3(n##3) C3(n##7)
+#endif
+#define TESTS C4(1) C1(10010) C1(10012) C1(16144)
+
+TESTS
+
+struct S { T x; T (*foo) (T, T *); };
+
+#undef C
+#define C(n) { n - 10000, foo##n },
+
+struct S tests[] = {
+TESTS
+ { 0, 0 }
+};
+
+int
+main ()
+{
+ int i, j, k;
+ for (k = 0; tests[k].x; k++)
+ for (i = 0; i < sizeof (T) * __CHAR_BIT__; i++)
+ for (j = -5; j <= 5; j++)
+ {
+ T x = ((T) 1 << i) + j;
+ T r1, r2;
+ if (foo (x, tests[k].x, &r1) != tests[k].foo (x, &r2)
+ || r1 != r2)
+ __builtin_abort ();
+ }
+ return 0;
+}
--- /dev/null
+/* PR rtl-optimization/97459 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-DEXPENSIVE" { target run_expensive_tests } } */
+
+#ifdef __SIZEOF_INT128__
+typedef __int128_t T;
+typedef __uint128_t U;
+#else
+typedef long long T;
+typedef unsigned long long U;
+#endif
+
+T __attribute__((noipa)) foo (T x, T n, T *r) { *r = x % n; return x / n; }
+#define C(n) T __attribute__((noipa)) foo##n (T x, T *r) { *r = x % (n - 10000); return x / (n - 10000); }
+
+#define C1(n) C(n##1) C(n##3) C(n##5) C(n##7) C(n##9)
+#define C2(n) C1(n##0) C1(n##1) C1(n##2) C1(n##3) C1(n##4) \
+ C1(n##5) C1(n##6) C1(n##7) C1(n##8) C1(n##9)
+#ifdef EXPENSIVE
+#define C3(n) C2(n##0) C2(n##1) C2(n##2) C2(n##3) C2(n##4) \
+ C2(n##5) C2(n##6) C2(n##7) C2(n##8) C2(n##9)
+#define C4(n) C3(n##0) C3(n##1) C3(n##2) C3(n##3) C3(n##4) \
+ C3(n##5) C3(n##6) C3(n##7) C3(n##8) C3(n##9)
+#else
+#define C3(n) C2(n##0) C2(n##4) C2(n##9)
+#define C4(n) C3(n##0) C3(n##3) C3(n##7)
+#endif
+#define TESTS C4(1) C1(10010) C1(10012) C1(16144)
+
+TESTS
+
+struct S { T x; T (*foo) (T, T *); };
+
+#undef C
+#define C(n) { n - 10000, foo##n },
+
+struct S tests[] = {
+TESTS
+ { 0, 0 }
+};
+
+int
+main ()
+{
+ int i, j, k;
+ for (k = 0; tests[k].x; k++)
+ for (i = 0; i < sizeof (T) * __CHAR_BIT__; i++)
+ for (j = -5; j <= 5; j++)
+ {
+ U x = ((U) 1 << i) + j;
+ T r1 = 0, r2 = 0;
+ if (foo ((T) x, tests[k].x, &r1) != tests[k].foo ((T) x, &r2)
+ || r1 != r2)
+ __builtin_abort ();
+ r1 = 0; r2 = 0;
+ if (foo ((T) -x, tests[k].x, &r1) != tests[k].foo ((T) -x, &r2)
+ || r1 != r2)
+ __builtin_abort ();
+ }
+ return 0;
+}
unsigned long ret = 0;
while (x > 0)
{
- ret = ret + x % 10;
- x = x / 10;
+ ret = ret + x % 123456;
+ x = x / 123456;
}
return ret;
}