1 /***************************************************************************/
5 /* Arithmetic computations (specification). */
7 /* Copyright 1996-2006, 2008, 2009, 2012-2014 by */
8 /* David Turner, Robert Wilhelm, and Werner Lemberg. */
10 /* This file is part of the FreeType project, and may only be used, */
11 /* modified, and distributed under the terms of the FreeType project */
12 /* license, LICENSE.TXT. By continuing to use, modify, or distribute */
13 /* this file you indicate that you have read the license and */
14 /* understand and accept it fully. */
16 /***************************************************************************/
24 #include FT_FREETYPE_H
30 /*************************************************************************/
32 /* FT_MulDiv() and FT_MulFix() are declared in freetype.h. */
34 /*************************************************************************/
36 #ifndef FT_CONFIG_OPTION_NO_ASSEMBLER
37 /* Provide assembler fragments for performance-critical functions. */
38 /* These must be defined `static __inline__' with GCC. */
40 #if defined( __CC_ARM ) || defined( __ARMCC__ ) /* RVCT */
42 #define FT_MULFIX_ASSEMBLER FT_MulFix_arm
44 /* documentation is in freetype.h */
46 static __inline FT_Int32
47 FT_MulFix_arm( FT_Int32 a,
50 register FT_Int32 t, t2;
55 smull t2, t, b, a /* (lo=t2,hi=t) = a*b */
56 mov a, t, asr #31 /* a = (hi >> 31) */
57 add a, a, #0x8000 /* a += 0x8000 */
58 adds t2, t2, a /* t2 += a */
59 adc t, t, #0 /* t += carry */
60 mov a, t2, lsr #16 /* a = t2 >> 16 */
61 orr a, a, t, lsl #16 /* a |= t << 16 */
66 #endif /* __CC_ARM || __ARMCC__ */
71 #if defined( __arm__ ) && \
72 ( !defined( __thumb__ ) || defined( __thumb2__ ) ) && \
73 !( defined( __CC_ARM ) || defined( __ARMCC__ ) )
75 #define FT_MULFIX_ASSEMBLER FT_MulFix_arm
77 /* documentation is in freetype.h */
79 static __inline__ FT_Int32
80 FT_MulFix_arm( FT_Int32 a,
83 register FT_Int32 t, t2;
86 __asm__ __volatile__ (
87 "smull %1, %2, %4, %3\n\t" /* (lo=%1,hi=%2) = a*b */
88 "mov %0, %2, asr #31\n\t" /* %0 = (hi >> 31) */
89 #if defined( __clang__ ) && defined( __thumb2__ )
90 "add.w %0, %0, #0x8000\n\t" /* %0 += 0x8000 */
92 "add %0, %0, #0x8000\n\t" /* %0 += 0x8000 */
94 "adds %1, %1, %0\n\t" /* %1 += %0 */
95 "adc %2, %2, #0\n\t" /* %2 += carry */
96 "mov %0, %1, lsr #16\n\t" /* %0 = %1 >> 16 */
97 "orr %0, %0, %2, lsl #16\n\t" /* %0 |= %2 << 16 */
98 : "=r"(a), "=&r"(t2), "=&r"(t)
104 #endif /* __arm__ && */
105 /* ( __thumb2__ || !__thumb__ ) && */
106 /* !( __CC_ARM || __ARMCC__ ) */
109 #if defined( __i386__ )
111 #define FT_MULFIX_ASSEMBLER FT_MulFix_i386
113 /* documentation is in freetype.h */
115 static __inline__ FT_Int32
116 FT_MulFix_i386( FT_Int32 a,
119 register FT_Int32 result;
122 __asm__ __volatile__ (
124 "movl %%edx, %%ecx\n"
126 "addl $0x8000, %%ecx\n"
127 "addl %%ecx, %%eax\n"
131 "addl %%edx, %%eax\n"
132 : "=a"(result), "=d"(b)
140 #endif /* __GNUC__ */
143 #ifdef _MSC_VER /* Visual C++ */
147 #define FT_MULFIX_ASSEMBLER FT_MulFix_i386
149 /* documentation is in freetype.h */
151 static __inline FT_Int32
152 FT_MulFix_i386( FT_Int32 a,
155 register FT_Int32 result;
177 #endif /* _MSC_VER */
180 #if defined( __GNUC__ ) && defined( __x86_64__ )
182 #define FT_MULFIX_ASSEMBLER FT_MulFix_x86_64
184 static __inline__ FT_Int32
185 FT_MulFix_x86_64( FT_Int32 a,
188 /* Temporarily disable the warning that C90 doesn't support */
190 #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 6 )
191 #pragma GCC diagnostic push
192 #pragma GCC diagnostic ignored "-Wlong-long"
196 /* Technically not an assembly fragment, but GCC does a really good */
197 /* job at inlining it and generating good machine code for it. */
201 ret = (long long)a * b;
205 return (FT_Int32)( ret >> 16 );
208 /* For some reason, GCC 4.6 on Ubuntu 12.04 generates invalid machine */
209 /* code from the lines below. The main issue is that `wide_a' is not */
210 /* properly initialized by sign-extending `a'. Instead, the generated */
211 /* machine code assumes that the register that contains `a' on input */
212 /* can be used directly as a 64-bit value, which is wrong most of the */
214 long long wide_a = (long long)a;
215 long long wide_b = (long long)b;
219 __asm__ __volatile__ (
223 "lea 0x8000(%1, %0), %0\n"
225 : "=&r"(result), "=&r"(wide_a)
229 return (FT_Int32)result;
232 #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 6 )
233 #pragma GCC diagnostic pop
237 #endif /* __GNUC__ && __x86_64__ */
239 #endif /* !FT_CONFIG_OPTION_NO_ASSEMBLER */
242 #ifdef FT_CONFIG_OPTION_INLINE_MULFIX
243 #ifdef FT_MULFIX_ASSEMBLER
244 #define FT_MulFix( a, b ) FT_MULFIX_ASSEMBLER( (FT_Int32)(a), (FT_Int32)(b) )
249 /*************************************************************************/
252 /* FT_MulDiv_No_Round */
255 /* A very simple function used to perform the computation `(a*b)/c' */
256 /* (without rounding) with maximum accuracy (it uses a 64-bit */
257 /* intermediate integer whenever necessary). */
259 /* This function isn't necessarily as fast as some processor specific */
260 /* operations, but is at least completely portable. */
263 /* a :: The first multiplier. */
264 /* b :: The second multiplier. */
265 /* c :: The divisor. */
268 /* The result of `(a*b)/c'. This function never traps when trying to */
269 /* divide by zero; it simply returns `MaxInt' or `MinInt' depending */
270 /* on the signs of `a' and `b'. */
273 FT_MulDiv_No_Round( FT_Long a,
279 * A variant of FT_Matrix_Multiply which scales its result afterwards.
280 * The idea is that both `a' and `b' are scaled by factors of 10 so that
281 * the values are as precise as possible to get a correct result during
282 * the 64bit multiplication. Let `sa' and `sb' be the scaling factors of
283 * `a' and `b', respectively, then the scaling factor of the result is
287 FT_Matrix_Multiply_Scaled( const FT_Matrix* a,
293 * A variant of FT_Vector_Transform. See comments for
294 * FT_Matrix_Multiply_Scaled.
297 FT_Vector_Transform_Scaled( FT_Vector* vector,
298 const FT_Matrix* matrix,
303 * Return -1, 0, or +1, depending on the orientation of a given corner.
304 * We use the Cartesian coordinate system, with positive vertical values
305 * going upwards. The function returns +1 if the corner turns to the
306 * left, -1 to the right, and 0 for undecidable cases.
309 ft_corner_orientation( FT_Pos in_x,
316 * Return TRUE if a corner is flat or nearly flat. This is equivalent to
317 * saying that the corner point is close to its neighbors, or inside an
318 * ellipse defined by the neighbor focal points to be more precise.
321 ft_corner_is_flat( FT_Pos in_x,
328 * Return the most significant bit index.
331 #ifndef FT_CONFIG_OPTION_NO_ASSEMBLER
332 #if defined( __GNUC__ ) && \
333 ( __GNUC__ > 3 || ( __GNUC__ == 3 && __GNUC_MINOR__ >= 4 ) )
335 #if FT_SIZEOF_INT == 4
337 #define FT_MSB( x ) ( 31 - __builtin_clz( x ) )
339 #elif FT_SIZEOF_LONG == 4
341 #define FT_MSB( x ) ( 31 - __builtin_clzl( x ) )
345 #endif /* __GNUC__ */
346 #endif /* !FT_CONFIG_OPTION_NO_ASSEMBLER */
351 FT_MSB( FT_UInt32 z );
357 * Return sqrt(x*x+y*y), which is the same as `FT_Vector_Length' but uses
358 * two fixed-point arguments instead.
361 FT_Hypot( FT_Fixed x,
367 /*************************************************************************/
373 /* Computes the square root of a 16.16 fixed-point value. */
376 /* x :: The value to compute the root for. */
379 /* The result of `sqrt(x)'. */
382 /* This function is not very fast. */
385 FT_SqrtFixed( FT_Int32 x );
390 #define INT_TO_F26DOT6( x ) ( (FT_Long)(x) << 6 )
391 #define INT_TO_F2DOT14( x ) ( (FT_Long)(x) << 14 )
392 #define INT_TO_FIXED( x ) ( (FT_Long)(x) << 16 )
393 #define F2DOT14_TO_FIXED( x ) ( (FT_Long)(x) << 2 )
394 #define FLOAT_TO_FIXED( x ) ( (FT_Long)( x * 65536.0 ) )
395 #define FIXED_TO_INT( x ) ( FT_RoundFix( x ) >> 16 )
397 #define ROUND_F26DOT6( x ) ( x >= 0 ? ( ( (x) + 32 ) & -64 ) \
398 : ( -( ( 32 - (x) ) & -64 ) ) )
403 #endif /* __FTCALC_H__ */