From a9be8dddfedb1d19e43b900bdfd33731d3c390c4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 5 Oct 2018 20:42:16 -0500 Subject: [PATCH] util: Add power-of-two divisor support to compute_fast_udiv_info MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Jason Ekstrand Reviewed-by: Marek Olšák --- src/util/fast_idiv_by_const.c | 21 +++++++++++++++++++++ src/util/fast_idiv_by_const.h | 4 ++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/util/fast_idiv_by_const.c b/src/util/fast_idiv_by_const.c index 65a9e64..7b93316 100644 --- a/src/util/fast_idiv_by_const.c +++ b/src/util/fast_idiv_by_const.c @@ -52,6 +52,27 @@ util_compute_fast_udiv_info(uint64_t D, unsigned num_bits, unsigned UINT_BITS) /* The eventual result */ struct util_fast_udiv_info result; + if (util_is_power_of_two_or_zero64(D)) { + unsigned div_shift = util_logbase2_64(D); + + if (div_shift) { + /* Dividing by a power of two. */ + result.multiplier = 1ull << (UINT_BITS - div_shift); + result.pre_shift = 0; + result.post_shift = 0; + result.increment = 0; + return result; + } else { + /* Dividing by 1. */ + /* Assuming: floor((num + 1) * (2^32 - 1) / 2^32) = num */ + result.multiplier = UINT_BITS == 64 ? UINT64_MAX : + (1ull << UINT_BITS) - 1; + result.pre_shift = 0; + result.post_shift = 0; + result.increment = 1; + return result; + } + } /* The extra shift implicit in the difference between UINT_BITS and num_bits */ diff --git a/src/util/fast_idiv_by_const.h b/src/util/fast_idiv_by_const.h index 231311f..92a3ccd 100644 --- a/src/util/fast_idiv_by_const.h +++ b/src/util/fast_idiv_by_const.h @@ -98,8 +98,8 @@ util_compute_fast_sdiv_info(int64_t D, unsigned SINT_BITS); * emit("result >>>= UINT_BITS") * if m.post_shift > 0: emit("result >>>= m.post_shift") * - * The shifts by UINT_BITS may be "free" if the high half of the full multiply - * is put in a separate register. + * This second version works even if D is 1. The shifts by UINT_BITS may be + * "free" if the high half of the full multiply is put in a separate register. * * saturated_increment(n) means "increment n unless it would wrap to 0," i.e. * if n == (1 << UINT_BITS)-1: result = n -- 2.7.4