1 /* cairo - a vector graphics library with display and print output
3 * Copyright © 2004 Keith Packard
5 * This library is free software; you can redistribute it and/or
6 * modify it either under the terms of the GNU Lesser General Public
7 * License version 2.1 as published by the Free Software Foundation
8 * (the "LGPL") or, at your option, under the terms of the Mozilla
9 * Public License Version 1.1 (the "MPL"). If you do not alter this
10 * notice, a recipient may use your version of this file under either
11 * the MPL or the LGPL.
13 * You should have received a copy of the LGPL along with this library
14 * in the file COPYING-LGPL-2.1; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA
16 * You should have received a copy of the MPL along with this library
17 * in the file COPYING-MPL-1.1
19 * The contents of this file are subject to the Mozilla Public License
20 * Version 1.1 (the "License"); you may not use this file except in
21 * compliance with the License. You may obtain a copy of the License at
22 * http://www.mozilla.org/MPL/
24 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY
25 * OF ANY KIND, either express or implied. See the LGPL or the MPL for
26 * the specific language governing rights and limitations.
28 * The Original Code is the cairo graphics library.
30 * The Initial Developer of the Original Code is Keith Packard
33 * Keith R. Packard <keithp@keithp.com>
40 #define uint64_lo32(i) ((i) & 0xffffffff)
41 #define uint64_hi32(i) ((i) >> 32)
42 #define uint64_lo(i) ((i) & 0xffffffff)
43 #define uint64_hi(i) ((i) >> 32)
44 #define uint64_shift32(i) ((i) << 32)
45 #define uint64_carry32 (((uint64_t) 1) << 32)
47 #define _cairo_uint32s_to_uint64(h,l) ((uint64_t) (h) << 32 | (l))
51 #define uint64_lo32(i) ((i).lo)
52 #define uint64_hi32(i) ((i).hi)
55 uint64_lo (cairo_uint64_t i)
65 uint64_hi (cairo_uint64_t i)
75 uint64_shift32 (cairo_uint64_t i)
84 static const cairo_uint64_t uint64_carry32 = { 0, 1 };
87 _cairo_double_to_uint64 (double i)
91 q.hi = i * (1. / 4294967296.);
92 q.lo = i - q.hi * 4294967296.;
97 _cairo_uint64_to_double (cairo_uint64_t i)
99 return i.hi * 4294967296. + i.lo;
103 _cairo_double_to_int64 (double i)
107 q.hi = i * (1. / INT32_MAX);
108 q.lo = i - q.hi * (double)INT32_MAX;
113 _cairo_int64_to_double (cairo_int64_t i)
115 return i.hi * INT32_MAX + i.lo;
119 _cairo_uint32_to_uint64 (uint32_t i)
129 _cairo_int32_to_int64 (int32_t i)
134 q.hi = i < 0 ? -1 : 0;
138 static cairo_uint64_t
139 _cairo_uint32s_to_uint64 (uint32_t h, uint32_t l)
149 _cairo_uint64_add (cairo_uint64_t a, cairo_uint64_t b)
161 _cairo_uint64_sub (cairo_uint64_t a, cairo_uint64_t b)
172 #define uint32_lo(i) ((i) & 0xffff)
173 #define uint32_hi(i) ((i) >> 16)
174 #define uint32_carry16 ((1) << 16)
177 _cairo_uint32x32_64_mul (uint32_t a, uint32_t b)
181 uint16_t ah, al, bh, bl;
182 uint32_t r0, r1, r2, r3;
189 r0 = (uint32_t) al * bl;
190 r1 = (uint32_t) al * bh;
191 r2 = (uint32_t) ah * bl;
192 r3 = (uint32_t) ah * bh;
194 r1 += uint32_hi(r0); /* no carry possible */
195 r1 += r2; /* but this can carry */
196 if (r1 < r2) /* check */
197 r3 += uint32_carry16;
199 s.hi = r3 + uint32_hi(r1);
200 s.lo = (uint32_lo (r1) << 16) + uint32_lo (r0);
205 _cairo_int32x32_64_mul (int32_t a, int32_t b)
208 s = _cairo_uint32x32_64_mul ((uint32_t) a, (uint32_t) b);
217 _cairo_uint64_mul (cairo_uint64_t a, cairo_uint64_t b)
221 s = _cairo_uint32x32_64_mul (a.lo, b.lo);
222 s.hi += a.lo * b.hi + a.hi * b.lo;
227 _cairo_uint64_lsl (cairo_uint64_t a, int shift)
237 a.hi = a.hi << shift | a.lo >> (32 - shift);
238 a.lo = a.lo << shift;
244 _cairo_uint64_rsl (cairo_uint64_t a, int shift)
254 a.lo = a.lo >> shift | a.hi << (32 - shift);
255 a.hi = a.hi >> shift;
260 #define _cairo_uint32_rsa(a,n) ((uint32_t) (((int32_t) (a)) >> (n)))
263 _cairo_uint64_rsa (cairo_int64_t a, int shift)
268 a.hi = _cairo_uint32_rsa (a.hi, 31);
273 a.lo = a.lo >> shift | a.hi << (32 - shift);
274 a.hi = _cairo_uint32_rsa (a.hi, shift);
280 _cairo_uint64_lt (cairo_uint64_t a, cairo_uint64_t b)
282 return (a.hi < b.hi ||
283 (a.hi == b.hi && a.lo < b.lo));
287 _cairo_uint64_eq (cairo_uint64_t a, cairo_uint64_t b)
289 return a.hi == b.hi && a.lo == b.lo;
293 _cairo_int64_lt (cairo_int64_t a, cairo_int64_t b)
295 if (_cairo_int64_negative (a) && !_cairo_int64_negative (b))
297 if (!_cairo_int64_negative (a) && _cairo_int64_negative (b))
299 return _cairo_uint64_lt (a, b);
303 _cairo_uint64_cmp (cairo_uint64_t a, cairo_uint64_t b)
307 else if (a.hi > b.hi)
309 else if (a.lo < b.lo)
311 else if (a.lo > b.lo)
318 _cairo_int64_cmp (cairo_int64_t a, cairo_int64_t b)
320 if (_cairo_int64_negative (a) && !_cairo_int64_negative (b))
322 if (!_cairo_int64_negative (a) && _cairo_int64_negative (b))
325 return _cairo_uint64_cmp (a, b);
329 _cairo_uint64_not (cairo_uint64_t a)
337 _cairo_uint64_negate (cairo_uint64_t a)
347 * Simple bit-at-a-time divide.
350 _cairo_uint64_divrem (cairo_uint64_t num, cairo_uint64_t den)
352 cairo_uquorem64_t qr;
356 bit = _cairo_uint32_to_uint64 (1);
358 /* normalize to make den >= num, but not overflow */
359 while (_cairo_uint64_lt (den, num) && (den.hi & 0x80000000) == 0)
361 bit = _cairo_uint64_lsl (bit, 1);
362 den = _cairo_uint64_lsl (den, 1);
364 quo = _cairo_uint32_to_uint64 (0);
366 /* generate quotient, one bit at a time */
367 while (bit.hi | bit.lo)
369 if (_cairo_uint64_le (den, num))
371 num = _cairo_uint64_sub (num, den);
372 quo = _cairo_uint64_add (quo, bit);
374 bit = _cairo_uint64_rsl (bit, 1);
375 den = _cairo_uint64_rsl (den, 1);
382 #endif /* !HAVE_UINT64_T */
386 _cairo_uint128_divrem (cairo_uint128_t num, cairo_uint128_t den)
388 cairo_uquorem128_t qr;
398 _cairo_uint32_to_uint128 (uint32_t i)
402 q.lo = _cairo_uint32_to_uint64 (i);
403 q.hi = _cairo_uint32_to_uint64 (0);
408 _cairo_int32_to_int128 (int32_t i)
412 q.lo = _cairo_int32_to_int64 (i);
413 q.hi = _cairo_int32_to_int64 (i < 0 ? -1 : 0);
418 _cairo_uint64_to_uint128 (cairo_uint64_t i)
423 q.hi = _cairo_uint32_to_uint64 (0);
428 _cairo_int64_to_int128 (cairo_int64_t i)
433 q.hi = _cairo_int32_to_int64 (_cairo_int64_negative(i) ? -1 : 0);
438 _cairo_uint128_add (cairo_uint128_t a, cairo_uint128_t b)
442 s.hi = _cairo_uint64_add (a.hi, b.hi);
443 s.lo = _cairo_uint64_add (a.lo, b.lo);
444 if (_cairo_uint64_lt (s.lo, a.lo))
445 s.hi = _cairo_uint64_add (s.hi, _cairo_uint32_to_uint64 (1));
450 _cairo_uint128_sub (cairo_uint128_t a, cairo_uint128_t b)
454 s.hi = _cairo_uint64_sub (a.hi, b.hi);
455 s.lo = _cairo_uint64_sub (a.lo, b.lo);
456 if (_cairo_uint64_gt (s.lo, a.lo))
457 s.hi = _cairo_uint64_sub (s.hi, _cairo_uint32_to_uint64(1));
462 _cairo_uint64x64_128_mul (cairo_uint64_t a, cairo_uint64_t b)
465 uint32_t ah, al, bh, bl;
466 cairo_uint64_t r0, r1, r2, r3;
468 al = uint64_lo32 (a);
469 ah = uint64_hi32 (a);
470 bl = uint64_lo32 (b);
471 bh = uint64_hi32 (b);
473 r0 = _cairo_uint32x32_64_mul (al, bl);
474 r1 = _cairo_uint32x32_64_mul (al, bh);
475 r2 = _cairo_uint32x32_64_mul (ah, bl);
476 r3 = _cairo_uint32x32_64_mul (ah, bh);
478 r1 = _cairo_uint64_add (r1, uint64_hi (r0)); /* no carry possible */
479 r1 = _cairo_uint64_add (r1, r2); /* but this can carry */
480 if (_cairo_uint64_lt (r1, r2)) /* check */
481 r3 = _cairo_uint64_add (r3, uint64_carry32);
483 s.hi = _cairo_uint64_add (r3, uint64_hi(r1));
484 s.lo = _cairo_uint64_add (uint64_shift32 (r1),
490 _cairo_int64x64_128_mul (cairo_int64_t a, cairo_int64_t b)
493 s = _cairo_uint64x64_128_mul (_cairo_int64_to_uint64(a),
494 _cairo_int64_to_uint64(b));
495 if (_cairo_int64_negative (a))
496 s.hi = _cairo_uint64_sub (s.hi,
497 _cairo_int64_to_uint64 (b));
498 if (_cairo_int64_negative (b))
499 s.hi = _cairo_uint64_sub (s.hi,
500 _cairo_int64_to_uint64 (a));
505 _cairo_uint128_mul (cairo_uint128_t a, cairo_uint128_t b)
509 s = _cairo_uint64x64_128_mul (a.lo, b.lo);
510 s.hi = _cairo_uint64_add (s.hi,
511 _cairo_uint64_mul (a.lo, b.hi));
512 s.hi = _cairo_uint64_add (s.hi,
513 _cairo_uint64_mul (a.hi, b.lo));
518 _cairo_uint128_lsl (cairo_uint128_t a, int shift)
523 a.lo = _cairo_uint32_to_uint64 (0);
528 a.hi = _cairo_uint64_add (_cairo_uint64_lsl (a.hi, shift),
529 _cairo_uint64_rsl (a.lo, (64 - shift)));
530 a.lo = _cairo_uint64_lsl (a.lo, shift);
536 _cairo_uint128_rsl (cairo_uint128_t a, int shift)
541 a.hi = _cairo_uint32_to_uint64 (0);
546 a.lo = _cairo_uint64_add (_cairo_uint64_rsl (a.lo, shift),
547 _cairo_uint64_lsl (a.hi, (64 - shift)));
548 a.hi = _cairo_uint64_rsl (a.hi, shift);
554 _cairo_uint128_rsa (cairo_int128_t a, int shift)
559 a.hi = _cairo_uint64_rsa (a.hi, 64-1);
564 a.lo = _cairo_uint64_add (_cairo_uint64_rsl (a.lo, shift),
565 _cairo_uint64_lsl (a.hi, (64 - shift)));
566 a.hi = _cairo_uint64_rsa (a.hi, shift);
572 _cairo_uint128_lt (cairo_uint128_t a, cairo_uint128_t b)
574 return (_cairo_uint64_lt (a.hi, b.hi) ||
575 (_cairo_uint64_eq (a.hi, b.hi) &&
576 _cairo_uint64_lt (a.lo, b.lo)));
580 _cairo_int128_lt (cairo_int128_t a, cairo_int128_t b)
582 if (_cairo_int128_negative (a) && !_cairo_int128_negative (b))
584 if (!_cairo_int128_negative (a) && _cairo_int128_negative (b))
586 return _cairo_uint128_lt (a, b);
590 _cairo_uint128_cmp (cairo_uint128_t a, cairo_uint128_t b)
594 cmp = _cairo_uint64_cmp (a.hi, b.hi);
597 return _cairo_uint64_cmp (a.lo, b.lo);
601 _cairo_int128_cmp (cairo_int128_t a, cairo_int128_t b)
603 if (_cairo_int128_negative (a) && !_cairo_int128_negative (b))
605 if (!_cairo_int128_negative (a) && _cairo_int128_negative (b))
608 return _cairo_uint128_cmp (a, b);
612 _cairo_uint128_eq (cairo_uint128_t a, cairo_uint128_t b)
614 return (_cairo_uint64_eq (a.hi, b.hi) &&
615 _cairo_uint64_eq (a.lo, b.lo));
619 #define _cairo_msbset64(q) (q & ((uint64_t) 1 << 63))
621 #define _cairo_msbset64(q) (q.hi & ((uint32_t) 1 << 31))
625 _cairo_uint128_divrem (cairo_uint128_t num, cairo_uint128_t den)
627 cairo_uquorem128_t qr;
631 bit = _cairo_uint32_to_uint128 (1);
633 /* normalize to make den >= num, but not overflow */
634 while (_cairo_uint128_lt (den, num) && !_cairo_msbset64(den.hi))
636 bit = _cairo_uint128_lsl (bit, 1);
637 den = _cairo_uint128_lsl (den, 1);
639 quo = _cairo_uint32_to_uint128 (0);
641 /* generate quotient, one bit at a time */
642 while (_cairo_uint128_ne (bit, _cairo_uint32_to_uint128(0)))
644 if (_cairo_uint128_le (den, num))
646 num = _cairo_uint128_sub (num, den);
647 quo = _cairo_uint128_add (quo, bit);
649 bit = _cairo_uint128_rsl (bit, 1);
650 den = _cairo_uint128_rsl (den, 1);
658 _cairo_uint128_negate (cairo_uint128_t a)
660 a.lo = _cairo_uint64_not (a.lo);
661 a.hi = _cairo_uint64_not (a.hi);
662 return _cairo_uint128_add (a, _cairo_uint32_to_uint128 (1));
666 _cairo_uint128_not (cairo_uint128_t a)
668 a.lo = _cairo_uint64_not (a.lo);
669 a.hi = _cairo_uint64_not (a.hi);
673 #endif /* !HAVE_UINT128_T */
676 _cairo_int128_divrem (cairo_int128_t num, cairo_int128_t den)
678 int num_neg = _cairo_int128_negative (num);
679 int den_neg = _cairo_int128_negative (den);
680 cairo_uquorem128_t uqr;
681 cairo_quorem128_t qr;
684 num = _cairo_int128_negate (num);
686 den = _cairo_int128_negate (den);
687 uqr = _cairo_uint128_divrem (num, den);
689 qr.rem = _cairo_int128_negate (uqr.rem);
692 if (num_neg != den_neg)
693 qr.quo = _cairo_int128_negate (uqr.quo);
700 * _cairo_uint_96by64_32x64_divrem:
702 * Compute a 32 bit quotient and 64 bit remainder of a 96 bit unsigned
703 * dividend and 64 bit divisor. If the quotient doesn't fit into 32
704 * bits then the returned remainder is equal to the divisor, and the
705 * quotient is the largest representable 64 bit integer. It is an
706 * error to call this function with the high 32 bits of @num being
710 _cairo_uint_96by64_32x64_divrem (cairo_uint128_t num,
713 cairo_uquorem64_t result;
714 cairo_uint64_t B = _cairo_uint32s_to_uint64 (1, 0);
716 /* These are the high 64 bits of the *96* bit numerator. We're
717 * going to represent the numerator as xB + y, where x is a 64,
718 * and y is a 32 bit number. */
719 cairo_uint64_t x = _cairo_uint128_to_uint64 (_cairo_uint128_rsl(num, 32));
721 /* Initialise the result to indicate overflow. */
722 result.quo = _cairo_uint32s_to_uint64 (-1U, -1U);
725 /* Don't bother if the quotient is going to overflow. */
726 if (_cairo_uint64_ge (x, den)) {
727 return /* overflow */ result;
730 if (_cairo_uint64_lt (x, B)) {
731 /* When the final quotient is known to fit in 32 bits, then
732 * num < 2^64 if and only if den < 2^32. */
733 return _cairo_uint64_divrem (_cairo_uint128_to_uint64 (num), den);
736 /* Denominator is >= 2^32. the numerator is >= 2^64, and the
737 * division won't overflow: need two divrems. Write the
738 * numerator and denominator as
740 * num = xB + y x : 64 bits, y : 32 bits
741 * den = uB + v u, v : 32 bits
743 uint32_t y = _cairo_uint128_to_uint32 (num);
744 uint32_t u = uint64_hi32 (den);
745 uint32_t v = _cairo_uint64_to_uint32 (den);
747 /* Compute a lower bound approximate quotient of num/den
748 * from x/(u+1). Then we have
750 * x = q(u+1) + r ; q : 32 bits, r <= u : 32 bits.
752 * xB + y = q(u+1)B + (rB+y)
753 * = q(uB + B + v - v) + (rB+y)
754 * = q(uB + v) + qB - qv + (rB+y)
755 * = q(uB + v) + q(B-v) + (rB+y)
757 * The true quotient of num/den then is q plus the
758 * contribution of q(B-v) + (rB+y). The main contribution
759 * comes from the term q(B-v), with the term (rB+y) only
760 * contributing at most one part.
762 * The term q(B-v) must fit into 64 bits, since q fits into 32
763 * bits on account of being a lower bound to the true
764 * quotient, and as B-v <= 2^32, we may safely use a single
765 * 64/64 bit division to find its contribution. */
767 cairo_uquorem64_t quorem;
768 cairo_uint64_t remainder; /* will contain final remainder */
769 uint32_t quotient; /* will contain final quotient. */
773 /* Approximate quotient by dividing the high 64 bits of num by
774 * u+1. Watch out for overflow of u+1. */
776 quorem = _cairo_uint64_divrem (x, _cairo_uint32_to_uint64 (u+1));
777 q = _cairo_uint64_to_uint32 (quorem.quo);
778 r = _cairo_uint64_to_uint32 (quorem.rem);
782 r = _cairo_uint64_to_uint32 (x);
786 /* Add the main term's contribution to quotient. Note B-v =
787 * -v as an uint32 (unless v = 0) */
789 quorem = _cairo_uint64_divrem (_cairo_uint32x32_64_mul (q, -v), den);
791 quorem = _cairo_uint64_divrem (_cairo_uint32s_to_uint64 (q, 0), den);
792 quotient += _cairo_uint64_to_uint32 (quorem.quo);
794 /* Add the contribution of the subterm and start computing the
796 remainder = _cairo_uint32s_to_uint64 (r, y);
797 if (_cairo_uint64_ge (remainder, den)) {
798 remainder = _cairo_uint64_sub (remainder, den);
802 /* Add the contribution of the main term's remainder. The
803 * funky test here checks that remainder + main_rem >= den,
804 * taking into account overflow of the addition. */
805 remainder = _cairo_uint64_add (remainder, quorem.rem);
806 if (_cairo_uint64_ge (remainder, den) ||
807 _cairo_uint64_lt (remainder, quorem.rem))
809 remainder = _cairo_uint64_sub (remainder, den);
813 result.quo = _cairo_uint32_to_uint64 (quotient);
814 result.rem = remainder;
820 _cairo_int_96by64_32x64_divrem (cairo_int128_t num, cairo_int64_t den)
822 int num_neg = _cairo_int128_negative (num);
823 int den_neg = _cairo_int64_negative (den);
824 cairo_uint64_t nonneg_den;
825 cairo_uquorem64_t uqr;
829 num = _cairo_int128_negate (num);
831 nonneg_den = _cairo_int64_negate (den);
835 uqr = _cairo_uint_96by64_32x64_divrem (num, nonneg_den);
836 if (_cairo_uint64_eq (uqr.rem, nonneg_den)) {
837 /* bail on overflow. */
838 qr.quo = _cairo_uint32s_to_uint64 (0x7FFFFFFF, -1U);
844 qr.rem = _cairo_int64_negate (uqr.rem);
847 if (num_neg != den_neg)
848 qr.quo = _cairo_int64_negate (uqr.quo);