1 /****************************************************************************
5 * Arithmetic computations (specification).
7 * Copyright (C) 1996-2023 by
8 * David Turner, Robert Wilhelm, and Werner Lemberg.
10 * This file is part of the FreeType project, and may only be used,
11 * modified, and distributed under the terms of the FreeType project
12 * license, LICENSE.TXT. By continuing to use, modify, or distribute
13 * this file you indicate that you have read the license and
14 * understand and accept it fully.
23 #include <freetype/freetype.h>
25 #include "compiler-macros.h"
30 /**************************************************************************
32 * FT_MulDiv() and FT_MulFix() are declared in freetype.h.
36 #ifndef FT_CONFIG_OPTION_NO_ASSEMBLER
37 /* Provide assembler fragments for performance-critical functions. */
38 /* These must be defined `static __inline__' with GCC. */
40 #if defined( __CC_ARM ) || defined( __ARMCC__ ) /* RVCT */
42 #define FT_MULFIX_ASSEMBLER FT_MulFix_arm
44 /* documentation is in freetype.h */
46 static __inline FT_Int32
47 FT_MulFix_arm( FT_Int32 a,
55 smull t2, t, b, a /* (lo=t2,hi=t) = a*b */
56 mov a, t, asr #31 /* a = (hi >> 31) */
57 add a, a, #0x8000 /* a += 0x8000 */
58 adds t2, t2, a /* t2 += a */
59 adc t, t, #0 /* t += carry */
60 mov a, t2, lsr #16 /* a = t2 >> 16 */
61 orr a, a, t, lsl #16 /* a |= t << 16 */
66 #endif /* __CC_ARM || __ARMCC__ */
71 #if defined( __arm__ ) && \
72 ( !defined( __thumb__ ) || defined( __thumb2__ ) ) && \
73 !( defined( __CC_ARM ) || defined( __ARMCC__ ) )
75 #define FT_MULFIX_ASSEMBLER FT_MulFix_arm
77 /* documentation is in freetype.h */
79 static __inline__ FT_Int32
80 FT_MulFix_arm( FT_Int32 a,
86 __asm__ __volatile__ (
87 "smull %1, %2, %4, %3\n\t" /* (lo=%1,hi=%2) = a*b */
88 "mov %0, %2, asr #31\n\t" /* %0 = (hi >> 31) */
89 #if defined( __clang__ ) && defined( __thumb2__ )
90 "add.w %0, %0, #0x8000\n\t" /* %0 += 0x8000 */
92 "add %0, %0, #0x8000\n\t" /* %0 += 0x8000 */
94 "adds %1, %1, %0\n\t" /* %1 += %0 */
95 "adc %2, %2, #0\n\t" /* %2 += carry */
96 "mov %0, %1, lsr #16\n\t" /* %0 = %1 >> 16 */
97 "orr %0, %0, %2, lsl #16\n\t" /* %0 |= %2 << 16 */
98 : "=r"(a), "=&r"(t2), "=&r"(t)
104 #endif /* __arm__ && */
105 /* ( __thumb2__ || !__thumb__ ) && */
106 /* !( __CC_ARM || __ARMCC__ ) */
109 #if defined( __i386__ )
111 #define FT_MULFIX_ASSEMBLER FT_MulFix_i386
113 /* documentation is in freetype.h */
115 static __inline__ FT_Int32
116 FT_MulFix_i386( FT_Int32 a,
122 __asm__ __volatile__ (
124 "movl %%edx, %%ecx\n"
126 "addl $0x8000, %%ecx\n"
127 "addl %%ecx, %%eax\n"
131 "addl %%edx, %%eax\n"
132 : "=a"(result), "=d"(b)
140 #endif /* __GNUC__ */
143 #ifdef _MSC_VER /* Visual C++ */
147 #define FT_MULFIX_ASSEMBLER FT_MulFix_i386
149 /* documentation is in freetype.h */
151 static __inline FT_Int32
152 FT_MulFix_i386( FT_Int32 a,
177 #endif /* _MSC_VER */
180 #if defined( __GNUC__ ) && defined( __x86_64__ )
182 #define FT_MULFIX_ASSEMBLER FT_MulFix_x86_64
184 static __inline__ FT_Int32
185 FT_MulFix_x86_64( FT_Int32 a,
188 /* Temporarily disable the warning that C90 doesn't support */
190 #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 6 )
191 #pragma GCC diagnostic push
192 #pragma GCC diagnostic ignored "-Wlong-long"
196 /* Technically not an assembly fragment, but GCC does a really good */
197 /* job at inlining it and generating good machine code for it. */
201 ret = (long long)a * b;
205 return (FT_Int32)( ret >> 16 );
208 /* For some reason, GCC 4.6 on Ubuntu 12.04 generates invalid machine */
209 /* code from the lines below. The main issue is that `wide_a' is not */
210 /* properly initialized by sign-extending `a'. Instead, the generated */
211 /* machine code assumes that the register that contains `a' on input */
212 /* can be used directly as a 64-bit value, which is wrong most of the */
214 long long wide_a = (long long)a;
215 long long wide_b = (long long)b;
219 __asm__ __volatile__ (
223 "lea 0x8000(%1, %0), %0\n"
225 : "=&r"(result), "=&r"(wide_a)
229 return (FT_Int32)result;
232 #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 6 )
233 #pragma GCC diagnostic pop
237 #endif /* __GNUC__ && __x86_64__ */
239 #endif /* !FT_CONFIG_OPTION_NO_ASSEMBLER */
242 #ifdef FT_CONFIG_OPTION_INLINE_MULFIX
243 #ifdef FT_MULFIX_ASSEMBLER
244 #define FT_MulFix( a, b ) FT_MULFIX_ASSEMBLER( (FT_Int32)(a), (FT_Int32)(b) )
249 /**************************************************************************
255 * A very simple function used to perform the computation '(a*b)/c'
256 * (without rounding) with maximum accuracy (it uses a 64-bit
257 * intermediate integer whenever necessary).
259 * This function isn't necessarily as fast as some processor-specific
260 * operations, but is at least completely portable.
264 * The first multiplier.
266 * The second multiplier.
271 * The result of '(a*b)/c'. This function never traps when trying to
272 * divide by zero; it simply returns 'MaxInt' or 'MinInt' depending on
273 * the signs of 'a' and 'b'.
276 FT_MulDiv_No_Round( FT_Long a,
281 /**************************************************************************
287 * Compute `(s[0] * f[0] + s[1] * f[1] + ...) / 0x10000`, where `s[n]` is
288 * usually a 16.16 scalar.
292 * The array of scalars.
294 * The array of factors.
296 * The number of entries in the array.
299 * The result of `(s[0] * f[0] + s[1] * f[1] + ...) / 0x10000`.
302 * This function is currently used for the scaled delta computation of
303 * variation stores. It internally uses 64-bit data types when
304 * available, otherwise it emulates 64-bit math by using 32-bit
305 * operations, which produce a correct result but most likely at a slower
306 * performance in comparison to the implementation base on `int64_t`.
310 FT_MulAddFix( FT_Fixed* s,
316 * A variant of FT_Matrix_Multiply which scales its result afterwards. The
317 * idea is that both `a' and `b' are scaled by factors of 10 so that the
318 * values are as precise as possible to get a correct result during the
319 * 64bit multiplication. Let `sa' and `sb' be the scaling factors of `a'
320 * and `b', respectively, then the scaling factor of the result is `sa*sb'.
323 FT_Matrix_Multiply_Scaled( const FT_Matrix* a,
329 * Check a matrix. If the transformation would lead to extreme shear or
330 * extreme scaling, for example, return 0. If everything is OK, return 1.
332 * Based on geometric considerations we use the following inequality to
333 * identify a degenerate matrix.
335 * 32 * abs(xx*yy - xy*yx) < xx^2 + xy^2 + yx^2 + yy^2
337 * Value 32 is heuristic.
340 FT_Matrix_Check( const FT_Matrix* matrix );
344 * A variant of FT_Vector_Transform. See comments for
345 * FT_Matrix_Multiply_Scaled.
348 FT_Vector_Transform_Scaled( FT_Vector* vector,
349 const FT_Matrix* matrix,
354 * This function normalizes a vector and returns its original length. The
355 * normalized vector is a 16.16 fixed-point unit vector with length close
356 * to 0x10000. The accuracy of the returned length is limited to 16 bits
357 * also. The function utilizes quick inverse square root approximation
358 * without divisions and square roots relying on Newton's iterations
362 FT_Vector_NormLen( FT_Vector* vector );
366 * Return -1, 0, or +1, depending on the orientation of a given corner. We
367 * use the Cartesian coordinate system, with positive vertical values going
368 * upwards. The function returns +1 if the corner turns to the left, -1 to
369 * the right, and 0 for undecidable cases.
372 ft_corner_orientation( FT_Pos in_x,
379 * Return TRUE if a corner is flat or nearly flat. This is equivalent to
380 * saying that the corner point is close to its neighbors, or inside an
381 * ellipse defined by the neighbor focal points to be more precise.
384 ft_corner_is_flat( FT_Pos in_x,
391 * Return the most significant bit index.
394 #ifndef FT_CONFIG_OPTION_NO_ASSEMBLER
396 #if defined( __clang__ ) || ( defined( __GNUC__ ) && \
397 ( __GNUC__ > 3 || ( __GNUC__ == 3 && __GNUC_MINOR__ >= 4 ) ) )
399 #if FT_SIZEOF_INT == 4
401 #define FT_MSB( x ) ( 31 - __builtin_clz( x ) )
403 #elif FT_SIZEOF_LONG == 4
405 #define FT_MSB( x ) ( 31 - __builtin_clzl( x ) )
409 #elif defined( _MSC_VER ) && _MSC_VER >= 1400
411 #if defined( _WIN32_WCE )
413 #include <cmnintrin.h>
414 #pragma intrinsic( _CountLeadingZeros )
416 #define FT_MSB( x ) ( 31 - _CountLeadingZeros( x ) )
418 #elif defined( _M_ARM64 ) || defined( _M_ARM )
421 #pragma intrinsic( _CountLeadingZeros )
423 #define FT_MSB( x ) ( 31 - _CountLeadingZeros( x ) )
425 #elif defined( _M_IX86 ) || defined( _M_AMD64 ) || defined( _M_IA64 )
428 #pragma intrinsic( _BitScanReverse )
430 static __inline FT_Int32
431 FT_MSB_i386( FT_UInt32 x )
436 _BitScanReverse( &where, x );
438 return (FT_Int32)where;
441 #define FT_MSB( x ) FT_MSB_i386( x )
445 #elif defined( __WATCOMC__ ) && defined( __386__ )
447 extern __inline FT_Int32
448 FT_MSB_i386( FT_UInt32 x );
450 #pragma aux FT_MSB_i386 = \
452 __parm [__eax] __nomemory \
454 __modify __exact [__eax] __nomemory;
456 #define FT_MSB( x ) FT_MSB_i386( x )
458 #elif defined( __DECC ) || defined( __DECCXX )
460 #include <builtins.h>
462 #define FT_MSB( x ) (FT_Int)( 63 - _leadz( x ) )
464 #elif defined( _CRAYC )
466 #include <intrinsics.h>
468 #define FT_MSB( x ) (FT_Int)( 31 - _leadz32( x ) )
470 #endif /* FT_MSB macro definitions */
472 #endif /* !FT_CONFIG_OPTION_NO_ASSEMBLER */
478 FT_MSB( FT_UInt32 z );
484 * Return sqrt(x*x+y*y), which is the same as `FT_Vector_Length' but uses
485 * two fixed-point arguments instead.
488 FT_Hypot( FT_Fixed x,
494 /**************************************************************************
500 * Computes the square root of a 16.16 fixed-point value.
504 * The value to compute the root for.
507 * The result of 'sqrt(x)'.
510 * This function is not very fast.
513 FT_SqrtFixed( FT_Int32 x );
518 #define INT_TO_F26DOT6( x ) ( (FT_Long)(x) * 64 ) /* << 6 */
519 #define INT_TO_F2DOT14( x ) ( (FT_Long)(x) * 16384 ) /* << 14 */
520 #define INT_TO_FIXED( x ) ( (FT_Long)(x) * 65536 ) /* << 16 */
521 #define F2DOT14_TO_FIXED( x ) ( (FT_Long)(x) * 4 ) /* << 2 */
522 #define FIXED_TO_INT( x ) ( FT_RoundFix( x ) >> 16 )
524 #define ROUND_F26DOT6( x ) ( ( (x) + 32 - ( x < 0 ) ) & -64 )
527 * The following macros have two purposes.
529 * - Tag places where overflow is expected and harmless.
531 * - Avoid run-time sanitizer errors.
535 #define ADD_INT( a, b ) \
536 (FT_Int)( (FT_UInt)(a) + (FT_UInt)(b) )
537 #define SUB_INT( a, b ) \
538 (FT_Int)( (FT_UInt)(a) - (FT_UInt)(b) )
539 #define MUL_INT( a, b ) \
540 (FT_Int)( (FT_UInt)(a) * (FT_UInt)(b) )
541 #define NEG_INT( a ) \
542 (FT_Int)( (FT_UInt)0 - (FT_UInt)(a) )
544 #define ADD_LONG( a, b ) \
545 (FT_Long)( (FT_ULong)(a) + (FT_ULong)(b) )
546 #define SUB_LONG( a, b ) \
547 (FT_Long)( (FT_ULong)(a) - (FT_ULong)(b) )
548 #define MUL_LONG( a, b ) \
549 (FT_Long)( (FT_ULong)(a) * (FT_ULong)(b) )
550 #define NEG_LONG( a ) \
551 (FT_Long)( (FT_ULong)0 - (FT_ULong)(a) )
553 #define ADD_INT32( a, b ) \
554 (FT_Int32)( (FT_UInt32)(a) + (FT_UInt32)(b) )
555 #define SUB_INT32( a, b ) \
556 (FT_Int32)( (FT_UInt32)(a) - (FT_UInt32)(b) )
557 #define MUL_INT32( a, b ) \
558 (FT_Int32)( (FT_UInt32)(a) * (FT_UInt32)(b) )
559 #define NEG_INT32( a ) \
560 (FT_Int32)( (FT_UInt32)0 - (FT_UInt32)(a) )
564 #define ADD_INT64( a, b ) \
565 (FT_Int64)( (FT_UInt64)(a) + (FT_UInt64)(b) )
566 #define SUB_INT64( a, b ) \
567 (FT_Int64)( (FT_UInt64)(a) - (FT_UInt64)(b) )
568 #define MUL_INT64( a, b ) \
569 (FT_Int64)( (FT_UInt64)(a) * (FT_UInt64)(b) )
570 #define NEG_INT64( a ) \
571 (FT_Int64)( (FT_UInt64)0 - (FT_UInt64)(a) )
573 #endif /* FT_INT64 */
578 #endif /* FTCALC_H_ */