src/base/ftcalc.c

   1 /****************************************************************************
   2  *
   3  * ftcalc.c
   4  *
   5  *   Arithmetic computations (body).
   6  *
   7  * Copyright (C) 1996-2023 by
   8  * David Turner, Robert Wilhelm, and Werner Lemberg.
   9  *
  10  * This file is part of the FreeType project, and may only be used,
  11  * modified, and distributed under the terms of the FreeType project
  12  * license, LICENSE.TXT.  By continuing to use, modify, or distribute
  13  * this file you indicate that you have read the license and
  14  * understand and accept it fully.
  15  *
  16  */
  17
  18   /**************************************************************************
  19    *
  20    * Support for 1-complement arithmetic has been totally dropped in this
  21    * release.  You can still write your own code if you need it.
  22    *
  23    */
  24
  25   /**************************************************************************
  26    *
  27    * Implementing basic computation routines.
  28    *
  29    * FT_MulDiv(), FT_MulFix(), FT_DivFix(), FT_RoundFix(), FT_CeilFix(),
  30    * and FT_FloorFix() are declared in freetype.h.
  31    *
  32    */
  33
  34
  35 #include <freetype/ftglyph.h>
  36 #include <freetype/fttrigon.h>
  37 #include <freetype/internal/ftcalc.h>
  38 #include <freetype/internal/ftdebug.h>
  39 #include <freetype/internal/ftobjs.h>
  40
  41
  42 #ifdef FT_MULFIX_ASSEMBLER
  43 #undef FT_MulFix
  44 #endif
  45
  46 /* we need to emulate a 64-bit data type if a real one isn't available */
  47
  48 #ifndef FT_INT64
  49
  50   typedef struct  FT_Int64_
  51   {
  52     FT_UInt32  lo;
  53     FT_UInt32  hi;
  54
  55   } FT_Int64;
  56
  57 #endif /* !FT_INT64 */
  58
  59
  60   /**************************************************************************
  61    *
  62    * The macro FT_COMPONENT is used in trace mode.  It is an implicit
  63    * parameter of the FT_TRACE() and FT_ERROR() macros, used to print/log
  64    * messages during execution.
  65    */
  66 #undef  FT_COMPONENT
  67 #define FT_COMPONENT  calc
  68
  69
  70   /* transfer sign, leaving a positive number;                        */
  71   /* we need an unsigned value to safely negate INT_MIN (or LONG_MIN) */
  72 #define FT_MOVE_SIGN( x, x_unsigned, s ) \
  73   FT_BEGIN_STMNT                         \
  74     if ( x < 0 )                         \
  75     {                                    \
  76       x_unsigned = 0U - (x_unsigned);    \
  77       s          = -s;                   \
  78     }                                    \
  79   FT_END_STMNT
  80
  81   /* The following three functions are available regardless of whether */
  82   /* FT_INT64 is defined.                                              */
  83
  84   /* documentation is in freetype.h */
  85
  86   FT_EXPORT_DEF( FT_Fixed )
  87   FT_RoundFix( FT_Fixed  a )
  88   {
  89     return ( ADD_LONG( a, 0x8000L - ( a < 0 ) ) ) & ~0xFFFFL;
  90   }
  91
  92
  93   /* documentation is in freetype.h */
  94
  95   FT_EXPORT_DEF( FT_Fixed )
  96   FT_CeilFix( FT_Fixed  a )
  97   {
  98     return ( ADD_LONG( a, 0xFFFFL ) ) & ~0xFFFFL;
  99   }
 100
 101
 102   /* documentation is in freetype.h */
 103
 104   FT_EXPORT_DEF( FT_Fixed )
 105   FT_FloorFix( FT_Fixed  a )
 106   {
 107     return a & ~0xFFFFL;
 108   }
 109
 110 #ifndef FT_MSB
 111
 112   FT_BASE_DEF( FT_Int )
 113   FT_MSB( FT_UInt32 z )
 114   {
 115     FT_Int  shift = 0;
 116
 117
 118     /* determine msb bit index in `shift' */
 119     if ( z & 0xFFFF0000UL )
 120     {
 121       z     >>= 16;
 122       shift  += 16;
 123     }
 124     if ( z & 0x0000FF00UL )
 125     {
 126       z     >>= 8;
 127       shift  += 8;
 128     }
 129     if ( z & 0x000000F0UL )
 130     {
 131       z     >>= 4;
 132       shift  += 4;
 133     }
 134     if ( z & 0x0000000CUL )
 135     {
 136       z     >>= 2;
 137       shift  += 2;
 138     }
 139     if ( z & 0x00000002UL )
 140     {
 141    /* z     >>= 1; */
 142       shift  += 1;
 143     }
 144
 145     return shift;
 146   }
 147
 148 #endif /* !FT_MSB */
 149
 150
 151   /* documentation is in ftcalc.h */
 152
 153   FT_BASE_DEF( FT_Fixed )
 154   FT_Hypot( FT_Fixed  x,
 155             FT_Fixed  y )
 156   {
 157     FT_Vector  v;
 158
 159
 160     v.x = x;
 161     v.y = y;
 162
 163     return FT_Vector_Length( &v );
 164   }
 165
 166
 167 #ifdef FT_INT64
 168
 169
 170   /* documentation is in freetype.h */
 171
 172   FT_EXPORT_DEF( FT_Long )
 173   FT_MulDiv( FT_Long  a_,
 174              FT_Long  b_,
 175              FT_Long  c_ )
 176   {
 177     FT_Int     s = 1;
 178     FT_UInt64  a, b, c, d;
 179     FT_Long    d_;
 180
 181
 182     a = (FT_UInt64)a_;
 183     b = (FT_UInt64)b_;
 184     c = (FT_UInt64)c_;
 185
 186     FT_MOVE_SIGN( a_, a, s );
 187     FT_MOVE_SIGN( b_, b, s );
 188     FT_MOVE_SIGN( c_, c, s );
 189
 190     d = c > 0 ? ( a * b + ( c >> 1 ) ) / c
 191               : 0x7FFFFFFFUL;
 192
 193     d_ = (FT_Long)d;
 194
 195     return s < 0 ? NEG_LONG( d_ ) : d_;
 196   }
 197
 198
 199   /* documentation is in ftcalc.h */
 200
 201   FT_BASE_DEF( FT_Long )
 202   FT_MulDiv_No_Round( FT_Long  a_,
 203                       FT_Long  b_,
 204                       FT_Long  c_ )
 205   {
 206     FT_Int     s = 1;
 207     FT_UInt64  a, b, c, d;
 208     FT_Long    d_;
 209
 210
 211     a = (FT_UInt64)a_;
 212     b = (FT_UInt64)b_;
 213     c = (FT_UInt64)c_;
 214
 215     FT_MOVE_SIGN( a_, a, s );
 216     FT_MOVE_SIGN( b_, b, s );
 217     FT_MOVE_SIGN( c_, c, s );
 218
 219     d = c > 0 ? a * b / c
 220               : 0x7FFFFFFFUL;
 221
 222     d_ = (FT_Long)d;
 223
 224     return s < 0 ? NEG_LONG( d_ ) : d_;
 225   }
 226
 227
 228   /* documentation is in freetype.h */
 229
 230   FT_EXPORT_DEF( FT_Long )
 231   FT_MulFix( FT_Long  a_,
 232              FT_Long  b_ )
 233   {
 234 #ifdef FT_MULFIX_ASSEMBLER
 235
 236     return FT_MULFIX_ASSEMBLER( (FT_Int32)a_, (FT_Int32)b_ );
 237
 238 #else
 239
 240     FT_Int64  ab = (FT_Int64)a_ * (FT_Int64)b_;
 241
 242     /* this requires arithmetic right shift of signed numbers */
 243     return (FT_Long)( ( ab + 0x8000L - ( ab < 0 ) ) >> 16 );
 244
 245 #endif /* FT_MULFIX_ASSEMBLER */
 246   }
 247
 248
 249   /* documentation is in freetype.h */
 250
 251   FT_EXPORT_DEF( FT_Long )
 252   FT_DivFix( FT_Long  a_,
 253              FT_Long  b_ )
 254   {
 255     FT_Int     s = 1;
 256     FT_UInt64  a, b, q;
 257     FT_Long    q_;
 258
 259
 260     a = (FT_UInt64)a_;
 261     b = (FT_UInt64)b_;
 262
 263     FT_MOVE_SIGN( a_, a, s );
 264     FT_MOVE_SIGN( b_, b, s );
 265
 266     q = b > 0 ? ( ( a << 16 ) + ( b >> 1 ) ) / b
 267               : 0x7FFFFFFFUL;
 268
 269     q_ = (FT_Long)q;
 270
 271     return s < 0 ? NEG_LONG( q_ ) : q_;
 272   }
 273
 274
 275 #else /* !FT_INT64 */
 276
 277
 278   static void
 279   ft_multo64( FT_UInt32  x,
 280               FT_UInt32  y,
 281               FT_Int64  *z )
 282   {
 283     FT_UInt32  lo1, hi1, lo2, hi2, lo, hi, i1, i2;
 284
 285
 286     lo1 = x & 0x0000FFFFU;  hi1 = x >> 16;
 287     lo2 = y & 0x0000FFFFU;  hi2 = y >> 16;
 288
 289     lo = lo1 * lo2;
 290     i1 = lo1 * hi2;
 291     i2 = lo2 * hi1;
 292     hi = hi1 * hi2;
 293
 294     /* Check carry overflow of i1 + i2 */
 295     i1 += i2;
 296     hi += (FT_UInt32)( i1 < i2 ) << 16;
 297
 298     hi += i1 >> 16;
 299     i1  = i1 << 16;
 300
 301     /* Check carry overflow of i1 + lo */
 302     lo += i1;
 303     hi += ( lo < i1 );
 304
 305     z->lo = lo;
 306     z->hi = hi;
 307   }
 308
 309
 310   static FT_UInt32
 311   ft_div64by32( FT_UInt32  hi,
 312                 FT_UInt32  lo,
 313                 FT_UInt32  y )
 314   {
 315     FT_UInt32  r, q;
 316     FT_Int     i;
 317
 318
 319     if ( hi >= y )
 320       return (FT_UInt32)0x7FFFFFFFL;
 321
 322     /* We shift as many bits as we can into the high register, perform     */
 323     /* 32-bit division with modulo there, then work through the remaining  */
 324     /* bits with long division. This optimization is especially noticeable */
 325     /* for smaller dividends that barely use the high register.            */
 326
 327     i = 31 - FT_MSB( hi );
 328     r = ( hi << i ) | ( lo >> ( 32 - i ) ); lo <<= i; /* left 64-bit shift */
 329     q = r / y;
 330     r -= q * y;   /* remainder */
 331
 332     i = 32 - i;   /* bits remaining in low register */
 333     do
 334     {
 335       q <<= 1;
 336       r   = ( r << 1 ) | ( lo >> 31 ); lo <<= 1;
 337
 338       if ( r >= y )
 339       {
 340         r -= y;
 341         q |= 1;
 342       }
 343     } while ( --i );
 344
 345     return q;
 346   }
 347
 348
 349   static void
 350   FT_Add64( FT_Int64*  x,
 351             FT_Int64*  y,
 352             FT_Int64  *z )
 353   {
 354     FT_UInt32  lo, hi;
 355
 356
 357     lo = x->lo + y->lo;
 358     hi = x->hi + y->hi + ( lo < x->lo );
 359
 360     z->lo = lo;
 361     z->hi = hi;
 362   }
 363
 364
 365   /*  The FT_MulDiv function has been optimized thanks to ideas from     */
 366   /*  Graham Asher and Alexei Podtelezhnikov.  The trick is to optimize  */
 367   /*  a rather common case when everything fits within 32-bits.          */
 368   /*                                                                     */
 369   /*  We compute 'a*b+c/2', then divide it by 'c' (all positive values). */
 370   /*                                                                     */
 371   /*  The product of two positive numbers never exceeds the square of    */
 372   /*  its mean values.  Therefore, we always avoid the overflow by       */
 373   /*  imposing                                                           */
 374   /*                                                                     */
 375   /*    (a + b) / 2 <= sqrt(X - c/2)    ,                                */
 376   /*                                                                     */
 377   /*  where X = 2^32 - 1, the maximum unsigned 32-bit value, and using   */
 378   /*  unsigned arithmetic.  Now we replace `sqrt' with a linear function */
 379   /*  that is smaller or equal for all values of c in the interval       */
 380   /*  [0;X/2]; it should be equal to sqrt(X) and sqrt(3X/4) at the       */
 381   /*  endpoints.  Substituting the linear solution and explicit numbers  */
 382   /*  we get                                                             */
 383   /*                                                                     */
 384   /*    a + b <= 131071.99 - c / 122291.84    .                          */
 385   /*                                                                     */
 386   /*  In practice, we should use a faster and even stronger inequality   */
 387   /*                                                                     */
 388   /*    a + b <= 131071 - (c >> 16)                                      */
 389   /*                                                                     */
 390   /*  or, alternatively,                                                 */
 391   /*                                                                     */
 392   /*    a + b <= 129894 - (c >> 17)    .                                 */
 393   /*                                                                     */
 394   /*  FT_MulFix, on the other hand, is optimized for a small value of    */
 395   /*  the first argument, when the second argument can be much larger.   */
 396   /*  This can be achieved by scaling the second argument and the limit  */
 397   /*  in the above inequalities.  For example,                           */
 398   /*                                                                     */
 399   /*    a + (b >> 8) <= (131071 >> 4)                                    */
 400   /*                                                                     */
 401   /*  covers the practical range of use. The actual test below is a bit  */
 402   /*  tighter to avoid the border case overflows.                        */
 403   /*                                                                     */
 404   /*  In the case of FT_DivFix, the exact overflow check                 */
 405   /*                                                                     */
 406   /*    a << 16 <= X - c/2                                               */
 407   /*                                                                     */
 408   /*  is scaled down by 2^16 and we use                                  */
 409   /*                                                                     */
 410   /*    a <= 65535 - (c >> 17)    .                                      */
 411
 412   /* documentation is in freetype.h */
 413
 414   FT_EXPORT_DEF( FT_Long )
 415   FT_MulDiv( FT_Long  a_,
 416              FT_Long  b_,
 417              FT_Long  c_ )
 418   {
 419     FT_Int     s = 1;
 420     FT_UInt32  a, b, c;
 421
 422
 423     /* XXX: this function does not allow 64-bit arguments */
 424
 425     a = (FT_UInt32)a_;
 426     b = (FT_UInt32)b_;
 427     c = (FT_UInt32)c_;
 428
 429     FT_MOVE_SIGN( a_, a, s );
 430     FT_MOVE_SIGN( b_, b, s );
 431     FT_MOVE_SIGN( c_, c, s );
 432
 433     if ( c == 0 )
 434       a = 0x7FFFFFFFUL;
 435
 436     else if ( a + b <= 129894UL - ( c >> 17 ) )
 437       a = ( a * b + ( c >> 1 ) ) / c;
 438
 439     else
 440     {
 441       FT_Int64  temp, temp2;
 442
 443
 444       ft_multo64( a, b, &temp );
 445
 446       temp2.hi = 0;
 447       temp2.lo = c >> 1;
 448
 449       FT_Add64( &temp, &temp2, &temp );
 450
 451       /* last attempt to ditch long division */
 452       a = ( temp.hi == 0 ) ? temp.lo / c
 453                            : ft_div64by32( temp.hi, temp.lo, c );
 454     }
 455
 456     a_ = (FT_Long)a;
 457
 458     return s < 0 ? NEG_LONG( a_ ) : a_;
 459   }
 460
 461
 462   FT_BASE_DEF( FT_Long )
 463   FT_MulDiv_No_Round( FT_Long  a_,
 464                       FT_Long  b_,
 465                       FT_Long  c_ )
 466   {
 467     FT_Int     s = 1;
 468     FT_UInt32  a, b, c;
 469
 470
 471     /* XXX: this function does not allow 64-bit arguments */
 472
 473     a = (FT_UInt32)a_;
 474     b = (FT_UInt32)b_;
 475     c = (FT_UInt32)c_;
 476
 477     FT_MOVE_SIGN( a_, a, s );
 478     FT_MOVE_SIGN( b_, b, s );
 479     FT_MOVE_SIGN( c_, c, s );
 480
 481     if ( c == 0 )
 482       a = 0x7FFFFFFFUL;
 483
 484     else if ( a + b <= 131071UL )
 485       a = a * b / c;
 486
 487     else
 488     {
 489       FT_Int64  temp;
 490
 491
 492       ft_multo64( a, b, &temp );
 493
 494       /* last attempt to ditch long division */
 495       a = ( temp.hi == 0 ) ? temp.lo / c
 496                            : ft_div64by32( temp.hi, temp.lo, c );
 497     }
 498
 499     a_ = (FT_Long)a;
 500
 501     return s < 0 ? NEG_LONG( a_ ) : a_;
 502   }
 503
 504
 505   /* documentation is in freetype.h */
 506
 507   FT_EXPORT_DEF( FT_Long )
 508   FT_MulFix( FT_Long  a_,
 509              FT_Long  b_ )
 510   {
 511 #ifdef FT_MULFIX_ASSEMBLER
 512
 513     return FT_MULFIX_ASSEMBLER( a_, b_ );
 514
 515 #elif 0
 516
 517     /*
 518      * This code is nonportable.  See comment below.
 519      *
 520      * However, on a platform where right-shift of a signed quantity fills
 521      * the leftmost bits by copying the sign bit, it might be faster.
 522      */
 523
 524     FT_Long    sa, sb;
 525     FT_UInt32  a, b;
 526
 527
 528     /*
 529      * This is a clever way of converting a signed number `a' into its
 530      * absolute value (stored back into `a') and its sign.  The sign is
 531      * stored in `sa'; 0 means `a' was positive or zero, and -1 means `a'
 532      * was negative.  (Similarly for `b' and `sb').
 533      *
 534      * Unfortunately, it doesn't work (at least not portably).
 535      *
 536      * It makes the assumption that right-shift on a negative signed value
 537      * fills the leftmost bits by copying the sign bit.  This is wrong.
 538      * According to K&R 2nd ed, section `A7.8 Shift Operators' on page 206,
 539      * the result of right-shift of a negative signed value is
 540      * implementation-defined.  At least one implementation fills the
 541      * leftmost bits with 0s (i.e., it is exactly the same as an unsigned
 542      * right shift).  This means that when `a' is negative, `sa' ends up
 543      * with the value 1 rather than -1.  After that, everything else goes
 544      * wrong.
 545      */
 546     sa = ( a_ >> ( sizeof ( a_ ) * 8 - 1 ) );
 547     a  = ( a_ ^ sa ) - sa;
 548     sb = ( b_ >> ( sizeof ( b_ ) * 8 - 1 ) );
 549     b  = ( b_ ^ sb ) - sb;
 550
 551     a = (FT_UInt32)a_;
 552     b = (FT_UInt32)b_;
 553
 554     if ( a + ( b >> 8 ) <= 8190UL )
 555       a = ( a * b + 0x8000U ) >> 16;
 556     else
 557     {
 558       FT_UInt32  al = a & 0xFFFFUL;
 559
 560
 561       a = ( a >> 16 ) * b + al * ( b >> 16 ) +
 562           ( ( al * ( b & 0xFFFFUL ) + 0x8000UL ) >> 16 );
 563     }
 564
 565     sa ^= sb;
 566     a   = ( a ^ sa ) - sa;
 567
 568     return (FT_Long)a;
 569
 570 #else /* 0 */
 571
 572     FT_Int     s = 1;
 573     FT_UInt32  a, b;
 574
 575
 576     /* XXX: this function does not allow 64-bit arguments */
 577
 578     a = (FT_UInt32)a_;
 579     b = (FT_UInt32)b_;
 580
 581     FT_MOVE_SIGN( a_, a, s );
 582     FT_MOVE_SIGN( b_, b, s );
 583
 584     if ( a + ( b >> 8 ) <= 8190UL )
 585       a = ( a * b + 0x8000UL ) >> 16;
 586     else
 587     {
 588       FT_UInt32  al = a & 0xFFFFUL;
 589
 590
 591       a = ( a >> 16 ) * b + al * ( b >> 16 ) +
 592           ( ( al * ( b & 0xFFFFUL ) + 0x8000UL ) >> 16 );
 593     }
 594
 595     a_ = (FT_Long)a;
 596
 597     return s < 0 ? NEG_LONG( a_ ) : a_;
 598
 599 #endif /* 0 */
 600
 601   }
 602
 603
 604   /* documentation is in freetype.h */
 605
 606   FT_EXPORT_DEF( FT_Long )
 607   FT_DivFix( FT_Long  a_,
 608              FT_Long  b_ )
 609   {
 610     FT_Int     s = 1;
 611     FT_UInt32  a, b, q;
 612     FT_Long    q_;
 613
 614
 615     /* XXX: this function does not allow 64-bit arguments */
 616
 617     a = (FT_UInt32)a_;
 618     b = (FT_UInt32)b_;
 619
 620     FT_MOVE_SIGN( a_, a, s );
 621     FT_MOVE_SIGN( b_, b, s );
 622
 623     if ( b == 0 )
 624     {
 625       /* check for division by 0 */
 626       q = 0x7FFFFFFFUL;
 627     }
 628     else if ( a <= 65535UL - ( b >> 17 ) )
 629     {
 630       /* compute result directly */
 631       q = ( ( a << 16 ) + ( b >> 1 ) ) / b;
 632     }
 633     else
 634     {
 635       /* we need more bits; we have to do it by hand */
 636       FT_Int64  temp, temp2;
 637
 638
 639       temp.hi  = a >> 16;
 640       temp.lo  = a << 16;
 641       temp2.hi = 0;
 642       temp2.lo = b >> 1;
 643
 644       FT_Add64( &temp, &temp2, &temp );
 645       q = ft_div64by32( temp.hi, temp.lo, b );
 646     }
 647
 648     q_ = (FT_Long)q;
 649
 650     return s < 0 ? NEG_LONG( q_ ) : q_;
 651   }
 652
 653
 654 #endif /* !FT_INT64 */
 655
 656
 657   /* documentation is in ftglyph.h */
 658
 659   FT_EXPORT_DEF( void )
 660   FT_Matrix_Multiply( const FT_Matrix*  a,
 661                       FT_Matrix        *b )
 662   {
 663     FT_Fixed  xx, xy, yx, yy;
 664
 665
 666     if ( !a || !b )
 667       return;
 668
 669     xx = ADD_LONG( FT_MulFix( a->xx, b->xx ),
 670                    FT_MulFix( a->xy, b->yx ) );
 671     xy = ADD_LONG( FT_MulFix( a->xx, b->xy ),
 672                    FT_MulFix( a->xy, b->yy ) );
 673     yx = ADD_LONG( FT_MulFix( a->yx, b->xx ),
 674                    FT_MulFix( a->yy, b->yx ) );
 675     yy = ADD_LONG( FT_MulFix( a->yx, b->xy ),
 676                    FT_MulFix( a->yy, b->yy ) );
 677
 678     b->xx = xx;
 679     b->xy = xy;
 680     b->yx = yx;
 681     b->yy = yy;
 682   }
 683
 684
 685   /* documentation is in ftglyph.h */
 686
 687   FT_EXPORT_DEF( FT_Error )
 688   FT_Matrix_Invert( FT_Matrix*  matrix )
 689   {
 690     FT_Pos  delta, xx, yy;
 691
 692
 693     if ( !matrix )
 694       return FT_THROW( Invalid_Argument );
 695
 696     /* compute discriminant */
 697     delta = FT_MulFix( matrix->xx, matrix->yy ) -
 698             FT_MulFix( matrix->xy, matrix->yx );
 699
 700     if ( !delta )
 701       return FT_THROW( Invalid_Argument );  /* matrix can't be inverted */
 702
 703     matrix->xy = -FT_DivFix( matrix->xy, delta );
 704     matrix->yx = -FT_DivFix( matrix->yx, delta );
 705
 706     xx = matrix->xx;
 707     yy = matrix->yy;
 708
 709     matrix->xx = FT_DivFix( yy, delta );
 710     matrix->yy = FT_DivFix( xx, delta );
 711
 712     return FT_Err_Ok;
 713   }
 714
 715
 716   /* documentation is in ftcalc.h */
 717
 718   FT_BASE_DEF( void )
 719   FT_Matrix_Multiply_Scaled( const FT_Matrix*  a,
 720                              FT_Matrix        *b,
 721                              FT_Long           scaling )
 722   {
 723     FT_Fixed  xx, xy, yx, yy;
 724
 725     FT_Long   val = 0x10000L * scaling;
 726
 727
 728     if ( !a || !b )
 729       return;
 730
 731     xx = ADD_LONG( FT_MulDiv( a->xx, b->xx, val ),
 732                    FT_MulDiv( a->xy, b->yx, val ) );
 733     xy = ADD_LONG( FT_MulDiv( a->xx, b->xy, val ),
 734                    FT_MulDiv( a->xy, b->yy, val ) );
 735     yx = ADD_LONG( FT_MulDiv( a->yx, b->xx, val ),
 736                    FT_MulDiv( a->yy, b->yx, val ) );
 737     yy = ADD_LONG( FT_MulDiv( a->yx, b->xy, val ),
 738                    FT_MulDiv( a->yy, b->yy, val ) );
 739
 740     b->xx = xx;
 741     b->xy = xy;
 742     b->yx = yx;
 743     b->yy = yy;
 744   }
 745
 746
 747   /* documentation is in ftcalc.h */
 748
 749   FT_BASE_DEF( FT_Bool )
 750   FT_Matrix_Check( const FT_Matrix*  matrix )
 751   {
 752     FT_Fixed  xx, xy, yx, yy;
 753     FT_Fixed  val;
 754     FT_Int    shift;
 755     FT_ULong  temp1, temp2;
 756
 757
 758     if ( !matrix )
 759       return 0;
 760
 761     xx  = matrix->xx;
 762     xy  = matrix->xy;
 763     yx  = matrix->yx;
 764     yy  = matrix->yy;
 765     val = FT_ABS( xx ) | FT_ABS( xy ) | FT_ABS( yx ) | FT_ABS( yy );
 766
 767     /* we only handle non-zero 32-bit values */
 768     if ( !val || val > 0x7FFFFFFFL )
 769       return 0;
 770
 771     /* Scale matrix to avoid the temp1 overflow, which is */
 772     /* more stringent than avoiding the temp2 overflow.   */
 773
 774     shift = FT_MSB( val ) - 12;
 775
 776     if ( shift > 0 )
 777     {
 778       xx >>= shift;
 779       xy >>= shift;
 780       yx >>= shift;
 781       yy >>= shift;
 782     }
 783
 784     temp1 = 32U * (FT_ULong)FT_ABS( xx * yy - xy * yx );
 785     temp2 = (FT_ULong)( xx * xx ) + (FT_ULong)( xy * xy ) +
 786             (FT_ULong)( yx * yx ) + (FT_ULong)( yy * yy );
 787
 788     if ( temp1 <= temp2 )
 789       return 0;
 790
 791     return 1;
 792   }
 793
 794
 795   /* documentation is in ftcalc.h */
 796
 797   FT_BASE_DEF( void )
 798   FT_Vector_Transform_Scaled( FT_Vector*        vector,
 799                               const FT_Matrix*  matrix,
 800                               FT_Long           scaling )
 801   {
 802     FT_Pos   xz, yz;
 803
 804     FT_Long  val = 0x10000L * scaling;
 805
 806
 807     if ( !vector || !matrix )
 808       return;
 809
 810     xz = ADD_LONG( FT_MulDiv( vector->x, matrix->xx, val ),
 811                    FT_MulDiv( vector->y, matrix->xy, val ) );
 812     yz = ADD_LONG( FT_MulDiv( vector->x, matrix->yx, val ),
 813                    FT_MulDiv( vector->y, matrix->yy, val ) );
 814
 815     vector->x = xz;
 816     vector->y = yz;
 817   }
 818
 819
 820   /* documentation is in ftcalc.h */
 821
 822   FT_BASE_DEF( FT_UInt32 )
 823   FT_Vector_NormLen( FT_Vector*  vector )
 824   {
 825     FT_Int32   x_ = vector->x;
 826     FT_Int32   y_ = vector->y;
 827     FT_Int32   b, z;
 828     FT_UInt32  x, y, u, v, l;
 829     FT_Int     sx = 1, sy = 1, shift;
 830
 831
 832     x = (FT_UInt32)x_;
 833     y = (FT_UInt32)y_;
 834
 835     FT_MOVE_SIGN( x_, x, sx );
 836     FT_MOVE_SIGN( y_, y, sy );
 837
 838     /* trivial cases */
 839     if ( x == 0 )
 840     {
 841       if ( y > 0 )
 842         vector->y = sy * 0x10000;
 843       return y;
 844     }
 845     else if ( y == 0 )
 846     {
 847       if ( x > 0 )
 848         vector->x = sx * 0x10000;
 849       return x;
 850     }
 851
 852     /* Estimate length and prenormalize by shifting so that */
 853     /* the new approximate length is between 2/3 and 4/3.   */
 854     /* The magic constant 0xAAAAAAAAUL (2/3 of 2^32) helps  */
 855     /* achieve this in 16.16 fixed-point representation.    */
 856     l = x > y ? x + ( y >> 1 )
 857               : y + ( x >> 1 );
 858
 859     shift  = 31 - FT_MSB( l );
 860     shift -= 15 + ( l >= ( 0xAAAAAAAAUL >> shift ) );
 861
 862     if ( shift > 0 )
 863     {
 864       x <<= shift;
 865       y <<= shift;
 866
 867       /* re-estimate length for tiny vectors */
 868       l = x > y ? x + ( y >> 1 )
 869                 : y + ( x >> 1 );
 870     }
 871     else
 872     {
 873       x >>= -shift;
 874       y >>= -shift;
 875       l >>= -shift;
 876     }
 877
 878     /* lower linear approximation for reciprocal length minus one */
 879     b = 0x10000 - (FT_Int32)l;
 880
 881     x_ = (FT_Int32)x;
 882     y_ = (FT_Int32)y;
 883
 884     /* Newton's iterations */
 885     do
 886     {
 887       u = (FT_UInt32)( x_ + ( x_ * b >> 16 ) );
 888       v = (FT_UInt32)( y_ + ( y_ * b >> 16 ) );
 889
 890       /* Normalized squared length in the parentheses approaches 2^32. */
 891       /* On two's complement systems, converting to signed gives the   */
 892       /* difference with 2^32 even if the expression wraps around.     */
 893       z = -(FT_Int32)( u * u + v * v ) / 0x200;
 894       z = z * ( ( 0x10000 + b ) >> 8 ) / 0x10000;
 895
 896       b += z;
 897
 898     } while ( z > 0 );
 899
 900     vector->x = sx < 0 ? -(FT_Pos)u : (FT_Pos)u;
 901     vector->y = sy < 0 ? -(FT_Pos)v : (FT_Pos)v;
 902
 903     /* Conversion to signed helps to recover from likely wrap around */
 904     /* in calculating the prenormalized length, because it gives the */
 905     /* correct difference with 2^32 on two's complement systems.     */
 906     l = (FT_UInt32)( 0x10000 + (FT_Int32)( u * x + v * y ) / 0x10000 );
 907     if ( shift > 0 )
 908       l = ( l + ( 1 << ( shift - 1 ) ) ) >> shift;
 909     else
 910       l <<= -shift;
 911
 912     return l;
 913   }
 914
 915
 916 #if 0
 917
 918   /* documentation is in ftcalc.h */
 919
 920   FT_BASE_DEF( FT_Int32 )
 921   FT_SqrtFixed( FT_Int32  x )
 922   {
 923     FT_UInt32  root, rem_hi, rem_lo, test_div;
 924     FT_Int     count;
 925
 926
 927     root = 0;
 928
 929     if ( x > 0 )
 930     {
 931       rem_hi = 0;
 932       rem_lo = (FT_UInt32)x;
 933       count  = 24;
 934       do
 935       {
 936         rem_hi   = ( rem_hi << 2 ) | ( rem_lo >> 30 );
 937         rem_lo <<= 2;
 938         root   <<= 1;
 939         test_div = ( root << 1 ) + 1;
 940
 941         if ( rem_hi >= test_div )
 942         {
 943           rem_hi -= test_div;
 944           root   += 1;
 945         }
 946       } while ( --count );
 947     }
 948
 949     return (FT_Int32)root;
 950   }
 951
 952 #endif /* 0 */
 953
 954
 955   /* documentation is in ftcalc.h */
 956
 957   FT_BASE_DEF( FT_Int )
 958   ft_corner_orientation( FT_Pos  in_x,
 959                          FT_Pos  in_y,
 960                          FT_Pos  out_x,
 961                          FT_Pos  out_y )
 962   {
 963     /* we silently ignore overflow errors since such large values */
 964     /* lead to even more (harmless) rendering errors later on     */
 965
 966 #ifdef FT_INT64
 967
 968     FT_Int64  delta = SUB_INT64( MUL_INT64( in_x, out_y ),
 969                                  MUL_INT64( in_y, out_x ) );
 970
 971
 972     return ( delta > 0 ) - ( delta < 0 );
 973
 974 #else
 975
 976     FT_Int  result;
 977
 978
 979     if ( ADD_LONG( FT_ABS( in_x ), FT_ABS( out_y ) ) <= 131071L &&
 980          ADD_LONG( FT_ABS( in_y ), FT_ABS( out_x ) ) <= 131071L )
 981     {
 982       FT_Long  z1 = MUL_LONG( in_x, out_y );
 983       FT_Long  z2 = MUL_LONG( in_y, out_x );
 984
 985
 986       if ( z1 > z2 )
 987         result = +1;
 988       else if ( z1 < z2 )
 989         result = -1;
 990       else
 991         result = 0;
 992     }
 993     else /* products might overflow 32 bits */
 994     {
 995       FT_Int64  z1, z2;
 996
 997
 998       /* XXX: this function does not allow 64-bit arguments */
 999       ft_multo64( (FT_UInt32)in_x, (FT_UInt32)out_y, &z1 );
1000       ft_multo64( (FT_UInt32)in_y, (FT_UInt32)out_x, &z2 );
1001
1002       if ( z1.hi > z2.hi )
1003         result = +1;
1004       else if ( z1.hi < z2.hi )
1005         result = -1;
1006       else if ( z1.lo > z2.lo )
1007         result = +1;
1008       else if ( z1.lo < z2.lo )
1009         result = -1;
1010       else
1011         result = 0;
1012     }
1013
1014     /* XXX: only the sign of return value, +1/0/-1 must be used */
1015     return result;
1016
1017 #endif
1018   }
1019
1020
1021   /* documentation is in ftcalc.h */
1022
1023   FT_BASE_DEF( FT_Int )
1024   ft_corner_is_flat( FT_Pos  in_x,
1025                      FT_Pos  in_y,
1026                      FT_Pos  out_x,
1027                      FT_Pos  out_y )
1028   {
1029     FT_Pos  ax = in_x + out_x;
1030     FT_Pos  ay = in_y + out_y;
1031
1032     FT_Pos  d_in, d_out, d_hypot;
1033
1034
1035     /* The idea of this function is to compare the length of the */
1036     /* hypotenuse with the `in' and `out' length.  The `corner'  */
1037     /* represented by `in' and `out' is flat if the hypotenuse's */
1038     /* length isn't too large.                                   */
1039     /*                                                           */
1040     /* This approach has the advantage that the angle between    */
1041     /* `in' and `out' is not checked.  In case one of the two    */
1042     /* vectors is `dominant', that is, much larger than the      */
1043     /* other vector, we thus always have a flat corner.          */
1044     /*                                                           */
1045     /*                hypotenuse                                 */
1046     /*       x---------------------------x                       */
1047     /*        \                      /                           */
1048     /*         \                /                                */
1049     /*      in  \          /  out                                */
1050     /*           \    /                                          */
1051     /*            o                                              */
1052     /*              Point                                        */
1053
1054     d_in    = FT_HYPOT(  in_x,  in_y );
1055     d_out   = FT_HYPOT( out_x, out_y );
1056     d_hypot = FT_HYPOT(    ax,    ay );
1057
1058     /* now do a simple length comparison: */
1059     /*                                    */
1060     /*   d_in + d_out < 17/16 d_hypot     */
1061
1062     return ( d_in + d_out - d_hypot ) < ( d_hypot >> 4 );
1063   }
1064
1065
1066   FT_BASE_DEF( FT_Int32 )
1067   FT_MulAddFix( FT_Fixed*  s,
1068                 FT_Int32*  f,
1069                 FT_UInt    count )
1070   {
1071     FT_UInt   i;
1072     FT_Int64  temp;
1073
1074
1075 #ifdef FT_INT64
1076     temp = 0;
1077
1078     for ( i = 0; i < count; ++i )
1079       temp += (FT_Int64)s[i] * f[i];
1080
1081     return (FT_Int32)( ( temp + 0x8000 ) >> 16 );
1082 #else
1083     temp.hi = 0;
1084     temp.lo = 0;
1085
1086     for ( i = 0; i < count; ++i )
1087     {
1088       FT_Int64  multResult;
1089
1090       FT_Int     sign  = 1;
1091       FT_UInt32  carry = 0;
1092
1093       FT_UInt32  scalar;
1094       FT_UInt32  factor;
1095
1096
1097       scalar = (FT_UInt32)s[i];
1098       factor = (FT_UInt32)f[i];
1099
1100       FT_MOVE_SIGN( s[i], scalar, sign );
1101       FT_MOVE_SIGN( f[i], factor, sign );
1102
1103       ft_multo64( scalar, factor, &multResult );
1104
1105       if ( sign < 0 )
1106       {
1107         /* Emulated `FT_Int64` negation. */
1108         carry = ( multResult.lo == 0 );
1109
1110         multResult.lo = ~multResult.lo + 1;
1111         multResult.hi = ~multResult.hi + carry;
1112       }
1113
1114       FT_Add64( &temp, &multResult, &temp );
1115     }
1116
1117     /* Shift and round value. */
1118     return (FT_Int32)( ( ( temp.hi << 16 ) | ( temp.lo >> 16 ) )
1119                                      + ( 1 & ( temp.lo >> 15 ) ) );
1120
1121
1122 #endif /* !FT_INT64 */
1123
1124   }
1125
1126
1127 /* END */