src/base/ftcalc.c

   1 /****************************************************************************
   2  *
   3  * ftcalc.c
   4  *
   5  *   Arithmetic computations (body).
   6  *
   7  * Copyright (C) 1996-2020 by
   8  * David Turner, Robert Wilhelm, and Werner Lemberg.
   9  *
  10  * This file is part of the FreeType project, and may only be used,
  11  * modified, and distributed under the terms of the FreeType project
  12  * license, LICENSE.TXT.  By continuing to use, modify, or distribute
  13  * this file you indicate that you have read the license and
  14  * understand and accept it fully.
  15  *
  16  */
  17
  18   /**************************************************************************
  19    *
  20    * Support for 1-complement arithmetic has been totally dropped in this
  21    * release.  You can still write your own code if you need it.
  22    *
  23    */
  24
  25   /**************************************************************************
  26    *
  27    * Implementing basic computation routines.
  28    *
  29    * FT_MulDiv(), FT_MulFix(), FT_DivFix(), FT_RoundFix(), FT_CeilFix(),
  30    * and FT_FloorFix() are declared in freetype.h.
  31    *
  32    */
  33
  34
  35 #include <freetype/ftglyph.h>
  36 #include <freetype/fttrigon.h>
  37 #include <freetype/internal/ftcalc.h>
  38 #include <freetype/internal/ftdebug.h>
  39 #include <freetype/internal/ftobjs.h>
  40
  41
  42 #ifdef FT_MULFIX_ASSEMBLER
  43 #undef FT_MulFix
  44 #endif
  45
  46 /* we need to emulate a 64-bit data type if a real one isn't available */
  47
  48 #ifndef FT_LONG64
  49
  50   typedef struct  FT_Int64_
  51   {
  52     FT_UInt32  lo;
  53     FT_UInt32  hi;
  54
  55   } FT_Int64;
  56
  57 #endif /* !FT_LONG64 */
  58
  59
  60   /**************************************************************************
  61    *
  62    * The macro FT_COMPONENT is used in trace mode.  It is an implicit
  63    * parameter of the FT_TRACE() and FT_ERROR() macros, used to print/log
  64    * messages during execution.
  65    */
  66 #undef  FT_COMPONENT
  67 #define FT_COMPONENT  calc
  68
  69
  70   /* transfer sign, leaving a positive number;                        */
  71   /* we need an unsigned value to safely negate INT_MIN (or LONG_MIN) */
  72 #define FT_MOVE_SIGN( x, x_unsigned, s ) \
  73   FT_BEGIN_STMNT                         \
  74     if ( x < 0 )                         \
  75     {                                    \
  76       x_unsigned = 0U - (x_unsigned);    \
  77       s          = -s;                   \
  78     }                                    \
  79   FT_END_STMNT
  80
  81   /* The following three functions are available regardless of whether */
  82   /* FT_LONG64 is defined.                                             */
  83
  84   /* documentation is in freetype.h */
  85
  86   FT_EXPORT_DEF( FT_Fixed )
  87   FT_RoundFix( FT_Fixed  a )
  88   {
  89     return ( ADD_LONG( a, 0x8000L - ( a < 0 ) ) ) & ~0xFFFFL;
  90   }
  91
  92
  93   /* documentation is in freetype.h */
  94
  95   FT_EXPORT_DEF( FT_Fixed )
  96   FT_CeilFix( FT_Fixed  a )
  97   {
  98     return ( ADD_LONG( a, 0xFFFFL ) ) & ~0xFFFFL;
  99   }
 100
 101
 102   /* documentation is in freetype.h */
 103
 104   FT_EXPORT_DEF( FT_Fixed )
 105   FT_FloorFix( FT_Fixed  a )
 106   {
 107     return a & ~0xFFFFL;
 108   }
 109
 110 #ifndef FT_MSB
 111
 112   FT_BASE_DEF ( FT_Int )
 113   FT_MSB( FT_UInt32 z )
 114   {
 115     FT_Int  shift = 0;
 116
 117
 118     /* determine msb bit index in `shift' */
 119     if ( z & 0xFFFF0000UL )
 120     {
 121       z     >>= 16;
 122       shift  += 16;
 123     }
 124     if ( z & 0x0000FF00UL )
 125     {
 126       z     >>= 8;
 127       shift  += 8;
 128     }
 129     if ( z & 0x000000F0UL )
 130     {
 131       z     >>= 4;
 132       shift  += 4;
 133     }
 134     if ( z & 0x0000000CUL )
 135     {
 136       z     >>= 2;
 137       shift  += 2;
 138     }
 139     if ( z & 0x00000002UL )
 140     {
 141    /* z     >>= 1; */
 142       shift  += 1;
 143     }
 144
 145     return shift;
 146   }
 147
 148 #endif /* !FT_MSB */
 149
 150
 151   /* documentation is in ftcalc.h */
 152
 153   FT_BASE_DEF( FT_Fixed )
 154   FT_Hypot( FT_Fixed  x,
 155             FT_Fixed  y )
 156   {
 157     FT_Vector  v;
 158
 159
 160     v.x = x;
 161     v.y = y;
 162
 163     return FT_Vector_Length( &v );
 164   }
 165
 166
 167 #ifdef FT_LONG64
 168
 169
 170   /* documentation is in freetype.h */
 171
 172   FT_EXPORT_DEF( FT_Long )
 173   FT_MulDiv( FT_Long  a_,
 174              FT_Long  b_,
 175              FT_Long  c_ )
 176   {
 177     FT_Int     s = 1;
 178     FT_UInt64  a, b, c, d;
 179     FT_Long    d_;
 180
 181
 182     a = (FT_UInt64)a_;
 183     b = (FT_UInt64)b_;
 184     c = (FT_UInt64)c_;
 185
 186     FT_MOVE_SIGN( a_, a, s );
 187     FT_MOVE_SIGN( b_, b, s );
 188     FT_MOVE_SIGN( c_, c, s );
 189
 190     d = c > 0 ? ( a * b + ( c >> 1 ) ) / c
 191               : 0x7FFFFFFFUL;
 192
 193     d_ = (FT_Long)d;
 194
 195     return s < 0 ? NEG_LONG( d_ ) : d_;
 196   }
 197
 198
 199   /* documentation is in ftcalc.h */
 200
 201   FT_BASE_DEF( FT_Long )
 202   FT_MulDiv_No_Round( FT_Long  a_,
 203                       FT_Long  b_,
 204                       FT_Long  c_ )
 205   {
 206     FT_Int     s = 1;
 207     FT_UInt64  a, b, c, d;
 208     FT_Long    d_;
 209
 210
 211     a = (FT_UInt64)a_;
 212     b = (FT_UInt64)b_;
 213     c = (FT_UInt64)c_;
 214
 215     FT_MOVE_SIGN( a_, a, s );
 216     FT_MOVE_SIGN( b_, b, s );
 217     FT_MOVE_SIGN( c_, c, s );
 218
 219     d = c > 0 ? a * b / c
 220               : 0x7FFFFFFFUL;
 221
 222     d_ = (FT_Long)d;
 223
 224     return s < 0 ? NEG_LONG( d_ ) : d_;
 225   }
 226
 227
 228   /* documentation is in freetype.h */
 229
 230   FT_EXPORT_DEF( FT_Long )
 231   FT_MulFix( FT_Long  a_,
 232              FT_Long  b_ )
 233   {
 234 #ifdef FT_MULFIX_ASSEMBLER
 235
 236     return FT_MULFIX_ASSEMBLER( (FT_Int32)a_, (FT_Int32)b_ );
 237
 238 #else
 239
 240     FT_Int64  ab = (FT_Int64)a_ * (FT_Int64)b_;
 241
 242     /* this requires arithmetic right shift of signed numbers */
 243     return (FT_Long)( ( ab + 0x8000L - ( ab < 0 ) ) >> 16 );
 244
 245 #endif /* FT_MULFIX_ASSEMBLER */
 246   }
 247
 248
 249   /* documentation is in freetype.h */
 250
 251   FT_EXPORT_DEF( FT_Long )
 252   FT_DivFix( FT_Long  a_,
 253              FT_Long  b_ )
 254   {
 255     FT_Int     s = 1;
 256     FT_UInt64  a, b, q;
 257     FT_Long    q_;
 258
 259
 260     a = (FT_UInt64)a_;
 261     b = (FT_UInt64)b_;
 262
 263     FT_MOVE_SIGN( a_, a, s );
 264     FT_MOVE_SIGN( b_, b, s );
 265
 266     q = b > 0 ? ( ( a << 16 ) + ( b >> 1 ) ) / b
 267               : 0x7FFFFFFFUL;
 268
 269     q_ = (FT_Long)q;
 270
 271     return s < 0 ? NEG_LONG( q_ ) : q_;
 272   }
 273
 274
 275 #else /* !FT_LONG64 */
 276
 277
 278   static void
 279   ft_multo64( FT_UInt32  x,
 280               FT_UInt32  y,
 281               FT_Int64  *z )
 282   {
 283     FT_UInt32  lo1, hi1, lo2, hi2, lo, hi, i1, i2;
 284
 285
 286     lo1 = x & 0x0000FFFFU;  hi1 = x >> 16;
 287     lo2 = y & 0x0000FFFFU;  hi2 = y >> 16;
 288
 289     lo = lo1 * lo2;
 290     i1 = lo1 * hi2;
 291     i2 = lo2 * hi1;
 292     hi = hi1 * hi2;
 293
 294     /* Check carry overflow of i1 + i2 */
 295     i1 += i2;
 296     hi += (FT_UInt32)( i1 < i2 ) << 16;
 297
 298     hi += i1 >> 16;
 299     i1  = i1 << 16;
 300
 301     /* Check carry overflow of i1 + lo */
 302     lo += i1;
 303     hi += ( lo < i1 );
 304
 305     z->lo = lo;
 306     z->hi = hi;
 307   }
 308
 309
 310   static FT_UInt32
 311   ft_div64by32( FT_UInt32  hi,
 312                 FT_UInt32  lo,
 313                 FT_UInt32  y )
 314   {
 315     FT_UInt32  r, q;
 316     FT_Int     i;
 317
 318
 319     if ( hi >= y )
 320       return (FT_UInt32)0x7FFFFFFFL;
 321
 322     /* We shift as many bits as we can into the high register, perform     */
 323     /* 32-bit division with modulo there, then work through the remaining  */
 324     /* bits with long division. This optimization is especially noticeable */
 325     /* for smaller dividends that barely use the high register.            */
 326
 327     i = 31 - FT_MSB( hi );
 328     r = ( hi << i ) | ( lo >> ( 32 - i ) ); lo <<= i; /* left 64-bit shift */
 329     q = r / y;
 330     r -= q * y;   /* remainder */
 331
 332     i = 32 - i;   /* bits remaining in low register */
 333     do
 334     {
 335       q <<= 1;
 336       r   = ( r << 1 ) | ( lo >> 31 ); lo <<= 1;
 337
 338       if ( r >= y )
 339       {
 340         r -= y;
 341         q |= 1;
 342       }
 343     } while ( --i );
 344
 345     return q;
 346   }
 347
 348
 349   static void
 350   FT_Add64( FT_Int64*  x,
 351             FT_Int64*  y,
 352             FT_Int64  *z )
 353   {
 354     FT_UInt32  lo, hi;
 355
 356
 357     lo = x->lo + y->lo;
 358     hi = x->hi + y->hi + ( lo < x->lo );
 359
 360     z->lo = lo;
 361     z->hi = hi;
 362   }
 363
 364
 365   /*  The FT_MulDiv function has been optimized thanks to ideas from     */
 366   /*  Graham Asher and Alexei Podtelezhnikov.  The trick is to optimize  */
 367   /*  a rather common case when everything fits within 32-bits.          */
 368   /*                                                                     */
 369   /*  We compute 'a*b+c/2', then divide it by 'c' (all positive values). */
 370   /*                                                                     */
 371   /*  The product of two positive numbers never exceeds the square of    */
 372   /*  its mean values.  Therefore, we always avoid the overflow by       */
 373   /*  imposing                                                           */
 374   /*                                                                     */
 375   /*    (a + b) / 2 <= sqrt(X - c/2)    ,                                */
 376   /*                                                                     */
 377   /*  where X = 2^32 - 1, the maximum unsigned 32-bit value, and using   */
 378   /*  unsigned arithmetic.  Now we replace `sqrt' with a linear function */
 379   /*  that is smaller or equal for all values of c in the interval       */
 380   /*  [0;X/2]; it should be equal to sqrt(X) and sqrt(3X/4) at the       */
 381   /*  endpoints.  Substituting the linear solution and explicit numbers  */
 382   /*  we get                                                             */
 383   /*                                                                     */
 384   /*    a + b <= 131071.99 - c / 122291.84    .                          */
 385   /*                                                                     */
 386   /*  In practice, we should use a faster and even stronger inequality   */
 387   /*                                                                     */
 388   /*    a + b <= 131071 - (c >> 16)                                      */
 389   /*                                                                     */
 390   /*  or, alternatively,                                                 */
 391   /*                                                                     */
 392   /*    a + b <= 129894 - (c >> 17)    .                                 */
 393   /*                                                                     */
 394   /*  FT_MulFix, on the other hand, is optimized for a small value of    */
 395   /*  the first argument, when the second argument can be much larger.   */
 396   /*  This can be achieved by scaling the second argument and the limit  */
 397   /*  in the above inequalities.  For example,                           */
 398   /*                                                                     */
 399   /*    a + (b >> 8) <= (131071 >> 4)                                    */
 400   /*                                                                     */
 401   /*  covers the practical range of use. The actual test below is a bit  */
 402   /*  tighter to avoid the border case overflows.                        */
 403   /*                                                                     */
 404   /*  In the case of FT_DivFix, the exact overflow check                 */
 405   /*                                                                     */
 406   /*    a << 16 <= X - c/2                                               */
 407   /*                                                                     */
 408   /*  is scaled down by 2^16 and we use                                  */
 409   /*                                                                     */
 410   /*    a <= 65535 - (c >> 17)    .                                      */
 411
 412   /* documentation is in freetype.h */
 413
 414   FT_EXPORT_DEF( FT_Long )
 415   FT_MulDiv( FT_Long  a_,
 416              FT_Long  b_,
 417              FT_Long  c_ )
 418   {
 419     FT_Int     s = 1;
 420     FT_UInt32  a, b, c;
 421
 422
 423     /* XXX: this function does not allow 64-bit arguments */
 424
 425     a = (FT_UInt32)a_;
 426     b = (FT_UInt32)b_;
 427     c = (FT_UInt32)c_;
 428
 429     FT_MOVE_SIGN( a_, a, s );
 430     FT_MOVE_SIGN( b_, b, s );
 431     FT_MOVE_SIGN( c_, c, s );
 432
 433     if ( c == 0 )
 434       a = 0x7FFFFFFFUL;
 435
 436     else if ( a + b <= 129894UL - ( c >> 17 ) )
 437       a = ( a * b + ( c >> 1 ) ) / c;
 438
 439     else
 440     {
 441       FT_Int64  temp, temp2;
 442
 443
 444       ft_multo64( a, b, &temp );
 445
 446       temp2.hi = 0;
 447       temp2.lo = c >> 1;
 448
 449       FT_Add64( &temp, &temp2, &temp );
 450
 451       /* last attempt to ditch long division */
 452       a = ( temp.hi == 0 ) ? temp.lo / c
 453                            : ft_div64by32( temp.hi, temp.lo, c );
 454     }
 455
 456     a_ = (FT_Long)a;
 457
 458     return s < 0 ? NEG_LONG( a_ ) : a_;
 459   }
 460
 461
 462   FT_BASE_DEF( FT_Long )
 463   FT_MulDiv_No_Round( FT_Long  a_,
 464                       FT_Long  b_,
 465                       FT_Long  c_ )
 466   {
 467     FT_Int     s = 1;
 468     FT_UInt32  a, b, c;
 469
 470
 471     /* XXX: this function does not allow 64-bit arguments */
 472
 473     a = (FT_UInt32)a_;
 474     b = (FT_UInt32)b_;
 475     c = (FT_UInt32)c_;
 476
 477     FT_MOVE_SIGN( a_, a, s );
 478     FT_MOVE_SIGN( b_, b, s );
 479     FT_MOVE_SIGN( c_, c, s );
 480
 481     if ( c == 0 )
 482       a = 0x7FFFFFFFUL;
 483
 484     else if ( a + b <= 131071UL )
 485       a = a * b / c;
 486
 487     else
 488     {
 489       FT_Int64  temp;
 490
 491
 492       ft_multo64( a, b, &temp );
 493
 494       /* last attempt to ditch long division */
 495       a = ( temp.hi == 0 ) ? temp.lo / c
 496                            : ft_div64by32( temp.hi, temp.lo, c );
 497     }
 498
 499     a_ = (FT_Long)a;
 500
 501     return s < 0 ? NEG_LONG( a_ ) : a_;
 502   }
 503
 504
 505   /* documentation is in freetype.h */
 506
 507   FT_EXPORT_DEF( FT_Long )
 508   FT_MulFix( FT_Long  a_,
 509              FT_Long  b_ )
 510   {
 511 #ifdef FT_MULFIX_ASSEMBLER
 512
 513     return FT_MULFIX_ASSEMBLER( a_, b_ );
 514
 515 #elif 0
 516
 517     /*
 518      * This code is nonportable.  See comment below.
 519      *
 520      * However, on a platform where right-shift of a signed quantity fills
 521      * the leftmost bits by copying the sign bit, it might be faster.
 522      */
 523
 524     FT_Long    sa, sb;
 525     FT_UInt32  a, b;
 526
 527
 528     /*
 529      * This is a clever way of converting a signed number `a' into its
 530      * absolute value (stored back into `a') and its sign.  The sign is
 531      * stored in `sa'; 0 means `a' was positive or zero, and -1 means `a'
 532      * was negative.  (Similarly for `b' and `sb').
 533      *
 534      * Unfortunately, it doesn't work (at least not portably).
 535      *
 536      * It makes the assumption that right-shift on a negative signed value
 537      * fills the leftmost bits by copying the sign bit.  This is wrong.
 538      * According to K&R 2nd ed, section `A7.8 Shift Operators' on page 206,
 539      * the result of right-shift of a negative signed value is
 540      * implementation-defined.  At least one implementation fills the
 541      * leftmost bits with 0s (i.e., it is exactly the same as an unsigned
 542      * right shift).  This means that when `a' is negative, `sa' ends up
 543      * with the value 1 rather than -1.  After that, everything else goes
 544      * wrong.
 545      */
 546     sa = ( a_ >> ( sizeof ( a_ ) * 8 - 1 ) );
 547     a  = ( a_ ^ sa ) - sa;
 548     sb = ( b_ >> ( sizeof ( b_ ) * 8 - 1 ) );
 549     b  = ( b_ ^ sb ) - sb;
 550
 551     a = (FT_UInt32)a_;
 552     b = (FT_UInt32)b_;
 553
 554     if ( a + ( b >> 8 ) <= 8190UL )
 555       a = ( a * b + 0x8000U ) >> 16;
 556     else
 557     {
 558       FT_UInt32  al = a & 0xFFFFUL;
 559
 560
 561       a = ( a >> 16 ) * b + al * ( b >> 16 ) +
 562           ( ( al * ( b & 0xFFFFUL ) + 0x8000UL ) >> 16 );
 563     }
 564
 565     sa ^= sb;
 566     a   = ( a ^ sa ) - sa;
 567
 568     return (FT_Long)a;
 569
 570 #else /* 0 */
 571
 572     FT_Int     s = 1;
 573     FT_UInt32  a, b;
 574
 575
 576     /* XXX: this function does not allow 64-bit arguments */
 577
 578     a = (FT_UInt32)a_;
 579     b = (FT_UInt32)b_;
 580
 581     FT_MOVE_SIGN( a_, a, s );
 582     FT_MOVE_SIGN( b_, b, s );
 583
 584     if ( a + ( b >> 8 ) <= 8190UL )
 585       a = ( a * b + 0x8000UL ) >> 16;
 586     else
 587     {
 588       FT_UInt32  al = a & 0xFFFFUL;
 589
 590
 591       a = ( a >> 16 ) * b + al * ( b >> 16 ) +
 592           ( ( al * ( b & 0xFFFFUL ) + 0x8000UL ) >> 16 );
 593     }
 594
 595     a_ = (FT_Long)a;
 596
 597     return s < 0 ? NEG_LONG( a_ ) : a_;
 598
 599 #endif /* 0 */
 600
 601   }
 602
 603
 604   /* documentation is in freetype.h */
 605
 606   FT_EXPORT_DEF( FT_Long )
 607   FT_DivFix( FT_Long  a_,
 608              FT_Long  b_ )
 609   {
 610     FT_Int     s = 1;
 611     FT_UInt32  a, b, q;
 612     FT_Long    q_;
 613
 614
 615     /* XXX: this function does not allow 64-bit arguments */
 616
 617     a = (FT_UInt32)a_;
 618     b = (FT_UInt32)b_;
 619
 620     FT_MOVE_SIGN( a_, a, s );
 621     FT_MOVE_SIGN( b_, b, s );
 622
 623     if ( b == 0 )
 624     {
 625       /* check for division by 0 */
 626       q = 0x7FFFFFFFUL;
 627     }
 628     else if ( a <= 65535UL - ( b >> 17 ) )
 629     {
 630       /* compute result directly */
 631       q = ( ( a << 16 ) + ( b >> 1 ) ) / b;
 632     }
 633     else
 634     {
 635       /* we need more bits; we have to do it by hand */
 636       FT_Int64  temp, temp2;
 637
 638
 639       temp.hi  = a >> 16;
 640       temp.lo  = a << 16;
 641       temp2.hi = 0;
 642       temp2.lo = b >> 1;
 643
 644       FT_Add64( &temp, &temp2, &temp );
 645       q = ft_div64by32( temp.hi, temp.lo, b );
 646     }
 647
 648     q_ = (FT_Long)q;
 649
 650     return s < 0 ? NEG_LONG( q_ ) : q_;
 651   }
 652
 653
 654 #endif /* !FT_LONG64 */
 655
 656
 657   /* documentation is in ftglyph.h */
 658
 659   FT_EXPORT_DEF( void )
 660   FT_Matrix_Multiply( const FT_Matrix*  a,
 661                       FT_Matrix        *b )
 662   {
 663     FT_Fixed  xx, xy, yx, yy;
 664
 665
 666     if ( !a || !b )
 667       return;
 668
 669     xx = ADD_LONG( FT_MulFix( a->xx, b->xx ),
 670                    FT_MulFix( a->xy, b->yx ) );
 671     xy = ADD_LONG( FT_MulFix( a->xx, b->xy ),
 672                    FT_MulFix( a->xy, b->yy ) );
 673     yx = ADD_LONG( FT_MulFix( a->yx, b->xx ),
 674                    FT_MulFix( a->yy, b->yx ) );
 675     yy = ADD_LONG( FT_MulFix( a->yx, b->xy ),
 676                    FT_MulFix( a->yy, b->yy ) );
 677
 678     b->xx = xx;
 679     b->xy = xy;
 680     b->yx = yx;
 681     b->yy = yy;
 682   }
 683
 684
 685   /* documentation is in ftglyph.h */
 686
 687   FT_EXPORT_DEF( FT_Error )
 688   FT_Matrix_Invert( FT_Matrix*  matrix )
 689   {
 690     FT_Pos  delta, xx, yy;
 691
 692
 693     if ( !matrix )
 694       return FT_THROW( Invalid_Argument );
 695
 696     /* compute discriminant */
 697     delta = FT_MulFix( matrix->xx, matrix->yy ) -
 698             FT_MulFix( matrix->xy, matrix->yx );
 699
 700     if ( !delta )
 701       return FT_THROW( Invalid_Argument );  /* matrix can't be inverted */
 702
 703     matrix->xy = -FT_DivFix( matrix->xy, delta );
 704     matrix->yx = -FT_DivFix( matrix->yx, delta );
 705
 706     xx = matrix->xx;
 707     yy = matrix->yy;
 708
 709     matrix->xx = FT_DivFix( yy, delta );
 710     matrix->yy = FT_DivFix( xx, delta );
 711
 712     return FT_Err_Ok;
 713   }
 714
 715
 716   /* documentation is in ftcalc.h */
 717
 718   FT_BASE_DEF( void )
 719   FT_Matrix_Multiply_Scaled( const FT_Matrix*  a,
 720                              FT_Matrix        *b,
 721                              FT_Long           scaling )
 722   {
 723     FT_Fixed  xx, xy, yx, yy;
 724
 725     FT_Long   val = 0x10000L * scaling;
 726
 727
 728     if ( !a || !b )
 729       return;
 730
 731     xx = ADD_LONG( FT_MulDiv( a->xx, b->xx, val ),
 732                    FT_MulDiv( a->xy, b->yx, val ) );
 733     xy = ADD_LONG( FT_MulDiv( a->xx, b->xy, val ),
 734                    FT_MulDiv( a->xy, b->yy, val ) );
 735     yx = ADD_LONG( FT_MulDiv( a->yx, b->xx, val ),
 736                    FT_MulDiv( a->yy, b->yx, val ) );
 737     yy = ADD_LONG( FT_MulDiv( a->yx, b->xy, val ),
 738                    FT_MulDiv( a->yy, b->yy, val ) );
 739
 740     b->xx = xx;
 741     b->xy = xy;
 742     b->yx = yx;
 743     b->yy = yy;
 744   }
 745
 746
 747   /* documentation is in ftcalc.h */
 748
 749   FT_BASE_DEF( FT_Bool )
 750   FT_Matrix_Check( const FT_Matrix*  matrix )
 751   {
 752     FT_Matrix  m;
 753     FT_Fixed   val[4];
 754     FT_Fixed   nonzero_minval, maxval;
 755     FT_Fixed   temp1, temp2;
 756     FT_UInt    i;
 757
 758
 759     if ( !matrix )
 760       return 0;
 761
 762     val[0] = FT_ABS( matrix->xx );
 763     val[1] = FT_ABS( matrix->xy );
 764     val[2] = FT_ABS( matrix->yx );
 765     val[3] = FT_ABS( matrix->yy );
 766
 767     /*
 768      * To avoid overflow, we ensure that each value is not larger than
 769      *
 770      *   int(sqrt(2^31 / 4)) = 23170  ;
 771      *
 772      * we also check that no value becomes zero if we have to scale.
 773      */
 774
 775     maxval         = 0;
 776     nonzero_minval = FT_LONG_MAX;
 777
 778     for ( i = 0; i < 4; i++ )
 779     {
 780       if ( val[i] > maxval )
 781         maxval = val[i];
 782       if ( val[i] && val[i] < nonzero_minval )
 783         nonzero_minval = val[i];
 784     }
 785
 786     /* we only handle 32bit values */
 787     if ( maxval > 0x7FFFFFFFL )
 788       return 0;
 789
 790     if ( maxval > 23170 )
 791     {
 792       FT_Fixed  scale = FT_DivFix( maxval, 23170 );
 793
 794
 795       if ( !FT_DivFix( nonzero_minval, scale ) )
 796         return 0;    /* value range too large */
 797
 798       m.xx = FT_DivFix( matrix->xx, scale );
 799       m.xy = FT_DivFix( matrix->xy, scale );
 800       m.yx = FT_DivFix( matrix->yx, scale );
 801       m.yy = FT_DivFix( matrix->yy, scale );
 802     }
 803     else
 804       m = *matrix;
 805
 806     temp1 = FT_ABS( m.xx * m.yy - m.xy * m.yx );
 807     temp2 = m.xx * m.xx + m.xy * m.xy + m.yx * m.yx + m.yy * m.yy;
 808
 809     if ( temp1 == 0         ||
 810          temp2 / temp1 > 50 )
 811       return 0;
 812
 813     return 1;
 814   }
 815
 816
 817   /* documentation is in ftcalc.h */
 818
 819   FT_BASE_DEF( void )
 820   FT_Vector_Transform_Scaled( FT_Vector*        vector,
 821                               const FT_Matrix*  matrix,
 822                               FT_Long           scaling )
 823   {
 824     FT_Pos   xz, yz;
 825
 826     FT_Long  val = 0x10000L * scaling;
 827
 828
 829     if ( !vector || !matrix )
 830       return;
 831
 832     xz = ADD_LONG( FT_MulDiv( vector->x, matrix->xx, val ),
 833                    FT_MulDiv( vector->y, matrix->xy, val ) );
 834     yz = ADD_LONG( FT_MulDiv( vector->x, matrix->yx, val ),
 835                    FT_MulDiv( vector->y, matrix->yy, val ) );
 836
 837     vector->x = xz;
 838     vector->y = yz;
 839   }
 840
 841
 842   /* documentation is in ftcalc.h */
 843
 844   FT_BASE_DEF( FT_UInt32 )
 845   FT_Vector_NormLen( FT_Vector*  vector )
 846   {
 847     FT_Int32   x_ = vector->x;
 848     FT_Int32   y_ = vector->y;
 849     FT_Int32   b, z;
 850     FT_UInt32  x, y, u, v, l;
 851     FT_Int     sx = 1, sy = 1, shift;
 852
 853
 854     x = (FT_UInt32)x_;
 855     y = (FT_UInt32)y_;
 856
 857     FT_MOVE_SIGN( x_, x, sx );
 858     FT_MOVE_SIGN( y_, y, sy );
 859
 860     /* trivial cases */
 861     if ( x == 0 )
 862     {
 863       if ( y > 0 )
 864         vector->y = sy * 0x10000;
 865       return y;
 866     }
 867     else if ( y == 0 )
 868     {
 869       if ( x > 0 )
 870         vector->x = sx * 0x10000;
 871       return x;
 872     }
 873
 874     /* Estimate length and prenormalize by shifting so that */
 875     /* the new approximate length is between 2/3 and 4/3.   */
 876     /* The magic constant 0xAAAAAAAAUL (2/3 of 2^32) helps  */
 877     /* achieve this in 16.16 fixed-point representation.    */
 878     l = x > y ? x + ( y >> 1 )
 879               : y + ( x >> 1 );
 880
 881     shift  = 31 - FT_MSB( l );
 882     shift -= 15 + ( l >= ( 0xAAAAAAAAUL >> shift ) );
 883
 884     if ( shift > 0 )
 885     {
 886       x <<= shift;
 887       y <<= shift;
 888
 889       /* re-estimate length for tiny vectors */
 890       l = x > y ? x + ( y >> 1 )
 891                 : y + ( x >> 1 );
 892     }
 893     else
 894     {
 895       x >>= -shift;
 896       y >>= -shift;
 897       l >>= -shift;
 898     }
 899
 900     /* lower linear approximation for reciprocal length minus one */
 901     b = 0x10000 - (FT_Int32)l;
 902
 903     x_ = (FT_Int32)x;
 904     y_ = (FT_Int32)y;
 905
 906     /* Newton's iterations */
 907     do
 908     {
 909       u = (FT_UInt32)( x_ + ( x_ * b >> 16 ) );
 910       v = (FT_UInt32)( y_ + ( y_ * b >> 16 ) );
 911
 912       /* Normalized squared length in the parentheses approaches 2^32. */
 913       /* On two's complement systems, converting to signed gives the   */
 914       /* difference with 2^32 even if the expression wraps around.     */
 915       z = -(FT_Int32)( u * u + v * v ) / 0x200;
 916       z = z * ( ( 0x10000 + b ) >> 8 ) / 0x10000;
 917
 918       b += z;
 919
 920     } while ( z > 0 );
 921
 922     vector->x = sx < 0 ? -(FT_Pos)u : (FT_Pos)u;
 923     vector->y = sy < 0 ? -(FT_Pos)v : (FT_Pos)v;
 924
 925     /* Conversion to signed helps to recover from likely wrap around */
 926     /* in calculating the prenormalized length, because it gives the */
 927     /* correct difference with 2^32 on two's complement systems.     */
 928     l = (FT_UInt32)( 0x10000 + (FT_Int32)( u * x + v * y ) / 0x10000 );
 929     if ( shift > 0 )
 930       l = ( l + ( 1 << ( shift - 1 ) ) ) >> shift;
 931     else
 932       l <<= -shift;
 933
 934     return l;
 935   }
 936
 937
 938 #if 0
 939
 940   /* documentation is in ftcalc.h */
 941
 942   FT_BASE_DEF( FT_Int32 )
 943   FT_SqrtFixed( FT_Int32  x )
 944   {
 945     FT_UInt32  root, rem_hi, rem_lo, test_div;
 946     FT_Int     count;
 947
 948
 949     root = 0;
 950
 951     if ( x > 0 )
 952     {
 953       rem_hi = 0;
 954       rem_lo = (FT_UInt32)x;
 955       count  = 24;
 956       do
 957       {
 958         rem_hi   = ( rem_hi << 2 ) | ( rem_lo >> 30 );
 959         rem_lo <<= 2;
 960         root   <<= 1;
 961         test_div = ( root << 1 ) + 1;
 962
 963         if ( rem_hi >= test_div )
 964         {
 965           rem_hi -= test_div;
 966           root   += 1;
 967         }
 968       } while ( --count );
 969     }
 970
 971     return (FT_Int32)root;
 972   }
 973
 974 #endif /* 0 */
 975
 976
 977   /* documentation is in ftcalc.h */
 978
 979   FT_BASE_DEF( FT_Int )
 980   ft_corner_orientation( FT_Pos  in_x,
 981                          FT_Pos  in_y,
 982                          FT_Pos  out_x,
 983                          FT_Pos  out_y )
 984   {
 985     /* we silently ignore overflow errors since such large values */
 986     /* lead to even more (harmless) rendering errors later on     */
 987
 988 #ifdef FT_LONG64
 989
 990     FT_Int64  delta = SUB_INT64( MUL_INT64( in_x, out_y ),
 991                                  MUL_INT64( in_y, out_x ) );
 992
 993
 994     return ( delta > 0 ) - ( delta < 0 );
 995
 996 #else
 997
 998     FT_Int  result;
 999
1000
1001     if ( ADD_LONG( FT_ABS( in_x ), FT_ABS( out_y ) ) <= 131071L &&
1002          ADD_LONG( FT_ABS( in_y ), FT_ABS( out_x ) ) <= 131071L )
1003     {
1004       FT_Long  z1 = MUL_LONG( in_x, out_y );
1005       FT_Long  z2 = MUL_LONG( in_y, out_x );
1006
1007
1008       if ( z1 > z2 )
1009         result = +1;
1010       else if ( z1 < z2 )
1011         result = -1;
1012       else
1013         result = 0;
1014     }
1015     else /* products might overflow 32 bits */
1016     {
1017       FT_Int64  z1, z2;
1018
1019
1020       /* XXX: this function does not allow 64-bit arguments */
1021       ft_multo64( (FT_UInt32)in_x, (FT_UInt32)out_y, &z1 );
1022       ft_multo64( (FT_UInt32)in_y, (FT_UInt32)out_x, &z2 );
1023
1024       if ( z1.hi > z2.hi )
1025         result = +1;
1026       else if ( z1.hi < z2.hi )
1027         result = -1;
1028       else if ( z1.lo > z2.lo )
1029         result = +1;
1030       else if ( z1.lo < z2.lo )
1031         result = -1;
1032       else
1033         result = 0;
1034     }
1035
1036     /* XXX: only the sign of return value, +1/0/-1 must be used */
1037     return result;
1038
1039 #endif
1040   }
1041
1042
1043   /* documentation is in ftcalc.h */
1044
1045   FT_BASE_DEF( FT_Int )
1046   ft_corner_is_flat( FT_Pos  in_x,
1047                      FT_Pos  in_y,
1048                      FT_Pos  out_x,
1049                      FT_Pos  out_y )
1050   {
1051     FT_Pos  ax = in_x + out_x;
1052     FT_Pos  ay = in_y + out_y;
1053
1054     FT_Pos  d_in, d_out, d_hypot;
1055
1056
1057     /* The idea of this function is to compare the length of the */
1058     /* hypotenuse with the `in' and `out' length.  The `corner'  */
1059     /* represented by `in' and `out' is flat if the hypotenuse's */
1060     /* length isn't too large.                                   */
1061     /*                                                           */
1062     /* This approach has the advantage that the angle between    */
1063     /* `in' and `out' is not checked.  In case one of the two    */
1064     /* vectors is `dominant', this is, much larger than the      */
1065     /* other vector, we thus always have a flat corner.          */
1066     /*                                                           */
1067     /*                hypotenuse                                 */
1068     /*       x---------------------------x                       */
1069     /*        \                      /                           */
1070     /*         \                /                                */
1071     /*      in  \          /  out                                */
1072     /*           \    /                                          */
1073     /*            o                                              */
1074     /*              Point                                        */
1075
1076     d_in    = FT_HYPOT(  in_x,  in_y );
1077     d_out   = FT_HYPOT( out_x, out_y );
1078     d_hypot = FT_HYPOT(    ax,    ay );
1079
1080     /* now do a simple length comparison: */
1081     /*                                    */
1082     /*   d_in + d_out < 17/16 d_hypot     */
1083
1084     return ( d_in + d_out - d_hypot ) < ( d_hypot >> 4 );
1085   }
1086
1087
1088 /* END */