From b541196eb45d80f2dacd76e16828963760c3850d Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 10 Apr 2020 15:14:52 -0700 Subject: [PATCH] [builtins] Make __umodsi3/__udivdi3/__umoddi3 standalone (shift and subtract) @kamleshbhalui reported that when the Standard Extension M (Multiplication and Division) is disabled for RISC-V, `__udivdi3` will call __udivmodti4 which will in turn calls `__udivdi3`. This patch moves __udivsi3 (shift and subtract) to int_div_impl.inc `__udivXi3`, optimize a bit, add a `__umodXi3`, and use `__udivXi3` and `__umodXi3` to define `__udivsi3` `__umodsi3` `__udivdi3` `__umoddi3`. Reviewed By: kamleshbhalui Differential Revision: https://reviews.llvm.org/D77912 --- compiler-rt/lib/builtins/int_div_impl.inc | 58 +++++++++++++++++++++++++++++++ compiler-rt/lib/builtins/udivdi3.c | 6 +++- compiler-rt/lib/builtins/udivsi3.c | 47 ++++--------------------- compiler-rt/lib/builtins/umoddi3.c | 8 +++-- compiler-rt/lib/builtins/umodsi3.c | 6 +++- 5 files changed, 79 insertions(+), 46 deletions(-) create mode 100644 compiler-rt/lib/builtins/int_div_impl.inc diff --git a/compiler-rt/lib/builtins/int_div_impl.inc b/compiler-rt/lib/builtins/int_div_impl.inc new file mode 100644 index 0000000..972a108 --- /dev/null +++ b/compiler-rt/lib/builtins/int_div_impl.inc @@ -0,0 +1,58 @@ +#define clz(a) (sizeof(a) == 8 ? __builtin_clzll(a) : __builtin_clz(a)) + +// Adapted from Figure 3-40 of The PowerPC Compiler Writer's Guide +static __inline fixuint_t __udivXi3(fixuint_t n, fixuint_t d) { + const unsigned N = sizeof(fixuint_t) * CHAR_BIT; + // d == 0 cases are unspecified. + unsigned sr = (d ? clz(d) : N) - (n ? clz(n) : N); + // 0 <= sr <= N - 1 or sr is very large. + if (sr > N - 1) // n < d + return 0; + if (sr == N - 1) // d == 1 + return n; + ++sr; + // 1 <= sr <= N - 1. Shifts do not trigger UB. + fixuint_t r = n >> sr; + n <<= N - sr; + fixuint_t carry = 0; + for (; sr > 0; --sr) { + r = (r << 1) | (n >> (N - 1)); + n = (n << 1) | carry; + // Branch-less version of: + // carry = 0; + // if (r >= d) r -= d, carry = 1; + const fixint_t s = (fixint_t)(d - r - 1) >> (N - 1); + carry = s & 1; + r -= d & s; + } + n = (n << 1) | carry; + return n; +} + +// Mostly identical to __udivXi3 but the return values are different. +static __inline fixuint_t __umodXi3(fixuint_t n, fixuint_t d) { + const unsigned N = sizeof(fixuint_t) * CHAR_BIT; + // d == 0 cases are unspecified. + unsigned sr = (d ? clz(d) : N) - (n ? clz(n) : N); + // 0 <= sr <= N - 1 or sr is very large. + if (sr > N - 1) // n < d + return n; + if (sr == N - 1) // d == 1 + return 0; + ++sr; + // 1 <= sr <= N - 1. Shifts do not trigger UB. + fixuint_t r = n >> sr; + n <<= N - sr; + fixuint_t carry = 0; + for (; sr > 0; --sr) { + r = (r << 1) | (n >> (N - 1)); + n = (n << 1) | carry; + // Branch-less version of: + // carry = 0; + // if (r >= d) r -= d, carry = 1; + const fixint_t s = (fixint_t)(d - r - 1) >> (N - 1); + carry = s & 1; + r -= d & s; + } + return r; +} diff --git a/compiler-rt/lib/builtins/udivdi3.c b/compiler-rt/lib/builtins/udivdi3.c index a23139e..74319cb 100644 --- a/compiler-rt/lib/builtins/udivdi3.c +++ b/compiler-rt/lib/builtins/udivdi3.c @@ -12,8 +12,12 @@ #include "int_lib.h" +typedef du_int fixuint_t; +typedef di_int fixint_t; +#include "int_div_impl.inc" + // Returns: a / b COMPILER_RT_ABI du_int __udivdi3(du_int a, du_int b) { - return __udivmoddi4(a, b, 0); + return __udivXi3(a, b); } diff --git a/compiler-rt/lib/builtins/udivsi3.c b/compiler-rt/lib/builtins/udivsi3.c index 18cc96c..3894e15 100644 --- a/compiler-rt/lib/builtins/udivsi3.c +++ b/compiler-rt/lib/builtins/udivsi3.c @@ -12,49 +12,14 @@ #include "int_lib.h" -// Returns: a / b +typedef su_int fixuint_t; +typedef si_int fixint_t; +#include "int_div_impl.inc" -// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide +// Returns: a / b -// This function should not call __divsi3! -COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d) { - const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; - su_int q; - su_int r; - unsigned sr; - // special cases - if (d == 0) - return 0; // ?! - if (n == 0) - return 0; - sr = __builtin_clz(d) - __builtin_clz(n); - // 0 <= sr <= n_uword_bits - 1 or sr large - if (sr > n_uword_bits - 1) // d > r - return 0; - if (sr == n_uword_bits - 1) // d == 1 - return n; - ++sr; - // 1 <= sr <= n_uword_bits - 1 - // Not a special case - q = n << (n_uword_bits - sr); - r = n >> sr; - su_int carry = 0; - for (; sr > 0; --sr) { - // r:q = ((r:q) << 1) | carry - r = (r << 1) | (q >> (n_uword_bits - 1)); - q = (q << 1) | carry; - // carry = 0; - // if (r.all >= d.all) - // { - // r.all -= d.all; - // carry = 1; - // } - const si_int s = (si_int)(d - r - 1) >> (n_uword_bits - 1); - carry = s & 1; - r -= d & s; - } - q = (q << 1) | carry; - return q; +COMPILER_RT_ABI su_int __udivsi3(su_int a, su_int b) { + return __udivXi3(a, b); } #if defined(__ARM_EABI__) diff --git a/compiler-rt/lib/builtins/umoddi3.c b/compiler-rt/lib/builtins/umoddi3.c index 965cf8f..e672da9 100644 --- a/compiler-rt/lib/builtins/umoddi3.c +++ b/compiler-rt/lib/builtins/umoddi3.c @@ -12,10 +12,12 @@ #include "int_lib.h" +typedef du_int fixuint_t; +typedef di_int fixint_t; +#include "int_div_impl.inc" + // Returns: a % b COMPILER_RT_ABI du_int __umoddi3(du_int a, du_int b) { - du_int r; - __udivmoddi4(a, b, &r); - return r; + return __umodXi3(a, b); } diff --git a/compiler-rt/lib/builtins/umodsi3.c b/compiler-rt/lib/builtins/umodsi3.c index ce9abcd..5383aea 100644 --- a/compiler-rt/lib/builtins/umodsi3.c +++ b/compiler-rt/lib/builtins/umodsi3.c @@ -12,8 +12,12 @@ #include "int_lib.h" +typedef su_int fixuint_t; +typedef si_int fixint_t; +#include "int_div_impl.inc" + // Returns: a % b COMPILER_RT_ABI su_int __umodsi3(su_int a, su_int b) { - return a - __udivsi3(a, b) * b; + return __umodXi3(a, b); } -- 2.7.4