include/freetype/internal/ftcalc.h

   1 /****************************************************************************
   2  *
   3  * ftcalc.h
   4  *
   5  *   Arithmetic computations (specification).
   6  *
   7  * Copyright (C) 1996-2023 by
   8  * David Turner, Robert Wilhelm, and Werner Lemberg.
   9  *
  10  * This file is part of the FreeType project, and may only be used,
  11  * modified, and distributed under the terms of the FreeType project
  12  * license, LICENSE.TXT.  By continuing to use, modify, or distribute
  13  * this file you indicate that you have read the license and
  14  * understand and accept it fully.
  15  *
  16  */
  17
  18
  19 #ifndef FTCALC_H_
  20 #define FTCALC_H_
  21
  22
  23 #include <freetype/freetype.h>
  24
  25 #include "compiler-macros.h"
  26
  27 FT_BEGIN_HEADER
  28
  29
  30   /**************************************************************************
  31    *
  32    * FT_MulDiv() and FT_MulFix() are declared in freetype.h.
  33    *
  34    */
  35
  36 #ifndef  FT_CONFIG_OPTION_NO_ASSEMBLER
  37   /* Provide assembler fragments for performance-critical functions. */
  38   /* These must be defined `static __inline__' with GCC.             */
  39
  40 #if defined( __CC_ARM ) || defined( __ARMCC__ )  /* RVCT */
  41
  42 #define FT_MULFIX_ASSEMBLER  FT_MulFix_arm
  43
  44   /* documentation is in freetype.h */
  45
  46   static __inline FT_Int32
  47   FT_MulFix_arm( FT_Int32  a,
  48                  FT_Int32  b )
  49   {
  50     FT_Int32  t, t2;
  51
  52
  53     __asm
  54     {
  55       smull t2, t,  b,  a           /* (lo=t2,hi=t) = a*b */
  56       mov   a,  t,  asr #31         /* a   = (hi >> 31) */
  57       add   a,  a,  #0x8000         /* a  += 0x8000 */
  58       adds  t2, t2, a               /* t2 += a */
  59       adc   t,  t,  #0              /* t  += carry */
  60       mov   a,  t2, lsr #16         /* a   = t2 >> 16 */
  61       orr   a,  a,  t,  lsl #16     /* a  |= t << 16 */
  62     }
  63     return a;
  64   }
  65
  66 #endif /* __CC_ARM || __ARMCC__ */
  67
  68
  69 #ifdef __GNUC__
  70
  71 #if defined( __arm__ )                                 && \
  72     ( !defined( __thumb__ ) || defined( __thumb2__ ) ) && \
  73     !( defined( __CC_ARM ) || defined( __ARMCC__ ) )
  74
  75 #define FT_MULFIX_ASSEMBLER  FT_MulFix_arm
  76
  77   /* documentation is in freetype.h */
  78
  79   static __inline__ FT_Int32
  80   FT_MulFix_arm( FT_Int32  a,
  81                  FT_Int32  b )
  82   {
  83     FT_Int32  t, t2;
  84
  85
  86     __asm__ __volatile__ (
  87       "smull  %1, %2, %4, %3\n\t"       /* (lo=%1,hi=%2) = a*b */
  88       "mov    %0, %2, asr #31\n\t"      /* %0  = (hi >> 31) */
  89 #if defined( __clang__ ) && defined( __thumb2__ )
  90       "add.w  %0, %0, #0x8000\n\t"      /* %0 += 0x8000 */
  91 #else
  92       "add    %0, %0, #0x8000\n\t"      /* %0 += 0x8000 */
  93 #endif
  94       "adds   %1, %1, %0\n\t"           /* %1 += %0 */
  95       "adc    %2, %2, #0\n\t"           /* %2 += carry */
  96       "mov    %0, %1, lsr #16\n\t"      /* %0  = %1 >> 16 */
  97       "orr    %0, %0, %2, lsl #16\n\t"  /* %0 |= %2 << 16 */
  98       : "=r"(a), "=&r"(t2), "=&r"(t)
  99       : "r"(a), "r"(b)
 100       : "cc" );
 101     return a;
 102   }
 103
 104 #endif /* __arm__                      && */
 105        /* ( __thumb2__ || !__thumb__ ) && */
 106        /* !( __CC_ARM || __ARMCC__ )      */
 107
 108
 109 #if defined( __i386__ )
 110
 111 #define FT_MULFIX_ASSEMBLER  FT_MulFix_i386
 112
 113   /* documentation is in freetype.h */
 114
 115   static __inline__ FT_Int32
 116   FT_MulFix_i386( FT_Int32  a,
 117                   FT_Int32  b )
 118   {
 119     FT_Int32  result;
 120
 121
 122     __asm__ __volatile__ (
 123       "imul  %%edx\n"
 124       "movl  %%edx, %%ecx\n"
 125       "sarl  $31, %%ecx\n"
 126       "addl  $0x8000, %%ecx\n"
 127       "addl  %%ecx, %%eax\n"
 128       "adcl  $0, %%edx\n"
 129       "shrl  $16, %%eax\n"
 130       "shll  $16, %%edx\n"
 131       "addl  %%edx, %%eax\n"
 132       : "=a"(result), "=d"(b)
 133       : "a"(a), "d"(b)
 134       : "%ecx", "cc" );
 135     return result;
 136   }
 137
 138 #endif /* i386 */
 139
 140 #endif /* __GNUC__ */
 141
 142
 143 #ifdef _MSC_VER /* Visual C++ */
 144
 145 #ifdef _M_IX86
 146
 147 #define FT_MULFIX_ASSEMBLER  FT_MulFix_i386
 148
 149   /* documentation is in freetype.h */
 150
 151   static __inline FT_Int32
 152   FT_MulFix_i386( FT_Int32  a,
 153                   FT_Int32  b )
 154   {
 155     FT_Int32  result;
 156
 157     __asm
 158     {
 159       mov eax, a
 160       mov edx, b
 161       imul edx
 162       mov ecx, edx
 163       sar ecx, 31
 164       add ecx, 8000h
 165       add eax, ecx
 166       adc edx, 0
 167       shr eax, 16
 168       shl edx, 16
 169       add eax, edx
 170       mov result, eax
 171     }
 172     return result;
 173   }
 174
 175 #endif /* _M_IX86 */
 176
 177 #endif /* _MSC_VER */
 178
 179
 180 #if defined( __GNUC__ ) && defined( __x86_64__ )
 181
 182 #define FT_MULFIX_ASSEMBLER  FT_MulFix_x86_64
 183
 184   static __inline__ FT_Int32
 185   FT_MulFix_x86_64( FT_Int32  a,
 186                     FT_Int32  b )
 187   {
 188     /* Temporarily disable the warning that C90 doesn't support */
 189     /* `long long'.                                             */
 190 #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 6 )
 191 #pragma GCC diagnostic push
 192 #pragma GCC diagnostic ignored "-Wlong-long"
 193 #endif
 194
 195 #if 1
 196     /* Technically not an assembly fragment, but GCC does a really good */
 197     /* job at inlining it and generating good machine code for it.      */
 198     long long  ret, tmp;
 199
 200
 201     ret  = (long long)a * b;
 202     tmp  = ret >> 63;
 203     ret += 0x8000 + tmp;
 204
 205     return (FT_Int32)( ret >> 16 );
 206 #else
 207
 208     /* For some reason, GCC 4.6 on Ubuntu 12.04 generates invalid machine  */
 209     /* code from the lines below.  The main issue is that `wide_a' is not  */
 210     /* properly initialized by sign-extending `a'.  Instead, the generated */
 211     /* machine code assumes that the register that contains `a' on input   */
 212     /* can be used directly as a 64-bit value, which is wrong most of the  */
 213     /* time.                                                               */
 214     long long  wide_a = (long long)a;
 215     long long  wide_b = (long long)b;
 216     long long  result;
 217
 218
 219     __asm__ __volatile__ (
 220       "imul %2, %1\n"
 221       "mov %1, %0\n"
 222       "sar $63, %0\n"
 223       "lea 0x8000(%1, %0), %0\n"
 224       "sar $16, %0\n"
 225       : "=&r"(result), "=&r"(wide_a)
 226       : "r"(wide_b)
 227       : "cc" );
 228
 229     return (FT_Int32)result;
 230 #endif
 231
 232 #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 6 )
 233 #pragma GCC diagnostic pop
 234 #endif
 235   }
 236
 237 #endif /* __GNUC__ && __x86_64__ */
 238
 239 #endif /* !FT_CONFIG_OPTION_NO_ASSEMBLER */
 240
 241
 242 #ifdef FT_CONFIG_OPTION_INLINE_MULFIX
 243 #ifdef FT_MULFIX_ASSEMBLER
 244 #define FT_MulFix( a, b )  FT_MULFIX_ASSEMBLER( (FT_Int32)(a), (FT_Int32)(b) )
 245 #endif
 246 #endif
 247
 248
 249   /**************************************************************************
 250    *
 251    * @function:
 252    *   FT_MulDiv_No_Round
 253    *
 254    * @description:
 255    *   A very simple function used to perform the computation '(a*b)/c'
 256    *   (without rounding) with maximum accuracy (it uses a 64-bit
 257    *   intermediate integer whenever necessary).
 258    *
 259    *   This function isn't necessarily as fast as some processor-specific
 260    *   operations, but is at least completely portable.
 261    *
 262    * @input:
 263    *   a ::
 264    *     The first multiplier.
 265    *   b ::
 266    *     The second multiplier.
 267    *   c ::
 268    *     The divisor.
 269    *
 270    * @return:
 271    *   The result of '(a*b)/c'.  This function never traps when trying to
 272    *   divide by zero; it simply returns 'MaxInt' or 'MinInt' depending on
 273    *   the signs of 'a' and 'b'.
 274    */
 275   FT_BASE( FT_Long )
 276   FT_MulDiv_No_Round( FT_Long  a,
 277                       FT_Long  b,
 278                       FT_Long  c );
 279
 280
 281   /**************************************************************************
 282    *
 283    * @function:
 284    *   FT_MulAddFix
 285    *
 286    * @description:
 287    *   Compute `(s[0] * f[0] + s[1] * f[1] + ...) / 0x10000`, where `s[n]` is
 288    *   usually a 16.16 scalar.
 289    *
 290    * @input:
 291    *   s ::
 292    *     The array of scalars.
 293    *   f ::
 294    *     The array of factors.
 295    *   count ::
 296    *     The number of entries in the array.
 297    *
 298    * @return:
 299    *   The result of `(s[0] * f[0] + s[1] * f[1] + ...) / 0x10000`.
 300    *
 301    * @note:
 302    *   This function is currently used for the scaled delta computation of
 303    *   variation stores.  It internally uses 64-bit data types when
 304    *   available, otherwise it emulates 64-bit math by using 32-bit
 305    *   operations, which produce a correct result but most likely at a slower
 306    *   performance in comparison to the implementation base on `int64_t`.
 307    *
 308    */
 309   FT_BASE( FT_Int32 )
 310   FT_MulAddFix( FT_Fixed*  s,
 311                 FT_Int32*  f,
 312                 FT_UInt    count );
 313
 314
 315   /*
 316    * A variant of FT_Matrix_Multiply which scales its result afterwards.  The
 317    * idea is that both `a' and `b' are scaled by factors of 10 so that the
 318    * values are as precise as possible to get a correct result during the
 319    * 64bit multiplication.  Let `sa' and `sb' be the scaling factors of `a'
 320    * and `b', respectively, then the scaling factor of the result is `sa*sb'.
 321    */
 322   FT_BASE( void )
 323   FT_Matrix_Multiply_Scaled( const FT_Matrix*  a,
 324                              FT_Matrix        *b,
 325                              FT_Long           scaling );
 326
 327
 328   /*
 329    * Check a matrix.  If the transformation would lead to extreme shear or
 330    * extreme scaling, for example, return 0.  If everything is OK, return 1.
 331    *
 332    * Based on geometric considerations we use the following inequality to
 333    * identify a degenerate matrix.
 334    *
 335    *   32 * abs(xx*yy - xy*yx) < xx^2 + xy^2 + yx^2 + yy^2
 336    *
 337    * Value 32 is heuristic.
 338    */
 339   FT_BASE( FT_Bool )
 340   FT_Matrix_Check( const FT_Matrix*  matrix );
 341
 342
 343   /*
 344    * A variant of FT_Vector_Transform.  See comments for
 345    * FT_Matrix_Multiply_Scaled.
 346    */
 347   FT_BASE( void )
 348   FT_Vector_Transform_Scaled( FT_Vector*        vector,
 349                               const FT_Matrix*  matrix,
 350                               FT_Long           scaling );
 351
 352
 353   /*
 354    * This function normalizes a vector and returns its original length.  The
 355    * normalized vector is a 16.16 fixed-point unit vector with length close
 356    * to 0x10000.  The accuracy of the returned length is limited to 16 bits
 357    * also.  The function utilizes quick inverse square root approximation
 358    * without divisions and square roots relying on Newton's iterations
 359    * instead.
 360    */
 361   FT_BASE( FT_UInt32 )
 362   FT_Vector_NormLen( FT_Vector*  vector );
 363
 364
 365   /*
 366    * Return -1, 0, or +1, depending on the orientation of a given corner.  We
 367    * use the Cartesian coordinate system, with positive vertical values going
 368    * upwards.  The function returns +1 if the corner turns to the left, -1 to
 369    * the right, and 0 for undecidable cases.
 370    */
 371   FT_BASE( FT_Int )
 372   ft_corner_orientation( FT_Pos  in_x,
 373                          FT_Pos  in_y,
 374                          FT_Pos  out_x,
 375                          FT_Pos  out_y );
 376
 377
 378   /*
 379    * Return TRUE if a corner is flat or nearly flat.  This is equivalent to
 380    * saying that the corner point is close to its neighbors, or inside an
 381    * ellipse defined by the neighbor focal points to be more precise.
 382    */
 383   FT_BASE( FT_Int )
 384   ft_corner_is_flat( FT_Pos  in_x,
 385                      FT_Pos  in_y,
 386                      FT_Pos  out_x,
 387                      FT_Pos  out_y );
 388
 389
 390   /*
 391    * Return the most significant bit index.
 392    */
 393
 394 #ifndef  FT_CONFIG_OPTION_NO_ASSEMBLER
 395
 396 #if defined( __clang__ ) || ( defined( __GNUC__ )                &&  \
 397     ( __GNUC__ > 3 || ( __GNUC__ == 3 && __GNUC_MINOR__ >= 4 ) ) )
 398
 399 #if FT_SIZEOF_INT == 4
 400
 401 #define FT_MSB( x )  ( 31 - __builtin_clz( x ) )
 402
 403 #elif FT_SIZEOF_LONG == 4
 404
 405 #define FT_MSB( x )  ( 31 - __builtin_clzl( x ) )
 406
 407 #endif
 408
 409 #elif defined( _MSC_VER ) && _MSC_VER >= 1400
 410
 411 #if defined( _WIN32_WCE )
 412
 413 #include <cmnintrin.h>
 414 #pragma intrinsic( _CountLeadingZeros )
 415
 416 #define FT_MSB( x )  ( 31 - _CountLeadingZeros( x ) )
 417
 418 #elif defined( _M_ARM64 ) || defined( _M_ARM )
 419
 420 #include <intrin.h>
 421 #pragma intrinsic( _CountLeadingZeros )
 422
 423 #define FT_MSB( x )  ( 31 - _CountLeadingZeros( x ) )
 424
 425 #elif defined( _M_IX86 ) || defined( _M_AMD64 ) || defined( _M_IA64 )
 426
 427 #include <intrin.h>
 428 #pragma intrinsic( _BitScanReverse )
 429
 430   static __inline FT_Int32
 431   FT_MSB_i386( FT_UInt32  x )
 432   {
 433     unsigned long  where;
 434
 435
 436     _BitScanReverse( &where, x );
 437
 438     return (FT_Int32)where;
 439   }
 440
 441 #define FT_MSB( x )  FT_MSB_i386( x )
 442
 443 #endif
 444
 445 #elif defined( __WATCOMC__ ) && defined( __386__ )
 446
 447   extern __inline FT_Int32
 448   FT_MSB_i386( FT_UInt32  x );
 449
 450 #pragma aux FT_MSB_i386 =             \
 451   "bsr eax, eax"                      \
 452   __parm [__eax] __nomemory           \
 453   __value [__eax]                     \
 454   __modify __exact [__eax] __nomemory;
 455
 456 #define FT_MSB( x )  FT_MSB_i386( x )
 457
 458 #elif defined( __DECC ) || defined( __DECCXX )
 459
 460 #include <builtins.h>
 461
 462 #define FT_MSB( x )  (FT_Int)( 63 - _leadz( x ) )
 463
 464 #elif defined( _CRAYC )
 465
 466 #include <intrinsics.h>
 467
 468 #define FT_MSB( x )  (FT_Int)( 31 - _leadz32( x ) )
 469
 470 #endif /* FT_MSB macro definitions */
 471
 472 #endif /* !FT_CONFIG_OPTION_NO_ASSEMBLER */
 473
 474
 475 #ifndef FT_MSB
 476
 477   FT_BASE( FT_Int )
 478   FT_MSB( FT_UInt32  z );
 479
 480 #endif
 481
 482
 483   /*
 484    * Return sqrt(x*x+y*y), which is the same as `FT_Vector_Length' but uses
 485    * two fixed-point arguments instead.
 486    */
 487   FT_BASE( FT_Fixed )
 488   FT_Hypot( FT_Fixed  x,
 489             FT_Fixed  y );
 490
 491
 492 #if 0
 493
 494   /**************************************************************************
 495    *
 496    * @function:
 497    *   FT_SqrtFixed
 498    *
 499    * @description:
 500    *   Computes the square root of a 16.16 fixed-point value.
 501    *
 502    * @input:
 503    *   x ::
 504    *     The value to compute the root for.
 505    *
 506    * @return:
 507    *   The result of 'sqrt(x)'.
 508    *
 509    * @note:
 510    *   This function is not very fast.
 511    */
 512   FT_BASE( FT_Int32 )
 513   FT_SqrtFixed( FT_Int32  x );
 514
 515 #endif /* 0 */
 516
 517
 518 #define INT_TO_F26DOT6( x )    ( (FT_Long)(x) * 64  )    /* << 6  */
 519 #define INT_TO_F2DOT14( x )    ( (FT_Long)(x) * 16384 )  /* << 14 */
 520 #define INT_TO_FIXED( x )      ( (FT_Long)(x) * 65536 )  /* << 16 */
 521 #define F2DOT14_TO_FIXED( x )  ( (FT_Long)(x) * 4 )      /* << 2  */
 522 #define FIXED_TO_INT( x )      ( FT_RoundFix( x ) >> 16 )
 523
 524 #define ROUND_F26DOT6( x )     ( ( (x) + 32 - ( x < 0 ) ) & -64 )
 525
 526   /*
 527    * The following macros have two purposes.
 528    *
 529    * - Tag places where overflow is expected and harmless.
 530    *
 531    * - Avoid run-time sanitizer errors.
 532    *
 533    * Use with care!
 534    */
 535 #define ADD_INT( a, b )                           \
 536           (FT_Int)( (FT_UInt)(a) + (FT_UInt)(b) )
 537 #define SUB_INT( a, b )                           \
 538           (FT_Int)( (FT_UInt)(a) - (FT_UInt)(b) )
 539 #define MUL_INT( a, b )                           \
 540           (FT_Int)( (FT_UInt)(a) * (FT_UInt)(b) )
 541 #define NEG_INT( a )                              \
 542           (FT_Int)( (FT_UInt)0 - (FT_UInt)(a) )
 543
 544 #define ADD_LONG( a, b )                             \
 545           (FT_Long)( (FT_ULong)(a) + (FT_ULong)(b) )
 546 #define SUB_LONG( a, b )                             \
 547           (FT_Long)( (FT_ULong)(a) - (FT_ULong)(b) )
 548 #define MUL_LONG( a, b )                             \
 549           (FT_Long)( (FT_ULong)(a) * (FT_ULong)(b) )
 550 #define NEG_LONG( a )                                \
 551           (FT_Long)( (FT_ULong)0 - (FT_ULong)(a) )
 552
 553 #define ADD_INT32( a, b )                               \
 554           (FT_Int32)( (FT_UInt32)(a) + (FT_UInt32)(b) )
 555 #define SUB_INT32( a, b )                               \
 556           (FT_Int32)( (FT_UInt32)(a) - (FT_UInt32)(b) )
 557 #define MUL_INT32( a, b )                               \
 558           (FT_Int32)( (FT_UInt32)(a) * (FT_UInt32)(b) )
 559 #define NEG_INT32( a )                                  \
 560           (FT_Int32)( (FT_UInt32)0 - (FT_UInt32)(a) )
 561
 562 #ifdef FT_INT64
 563
 564 #define ADD_INT64( a, b )                               \
 565           (FT_Int64)( (FT_UInt64)(a) + (FT_UInt64)(b) )
 566 #define SUB_INT64( a, b )                               \
 567           (FT_Int64)( (FT_UInt64)(a) - (FT_UInt64)(b) )
 568 #define MUL_INT64( a, b )                               \
 569           (FT_Int64)( (FT_UInt64)(a) * (FT_UInt64)(b) )
 570 #define NEG_INT64( a )                                  \
 571           (FT_Int64)( (FT_UInt64)0 - (FT_UInt64)(a) )
 572
 573 #endif /* FT_INT64 */
 574
 575
 576 FT_END_HEADER
 577
 578 #endif /* FTCALC_H_ */
 579
 580
 581 /* END */