2 Copyright (c) 2012, Broadcom Europe Ltd
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in the
11 documentation and/or other materials provided with the distribution.
12 * Neither the name of the copyright holder nor the
13 names of its contributors may be used to endorse or promote products
14 derived from this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #ifndef KHRN_INT_UTIL_H
29 #define KHRN_INT_UTIL_H
35 #include "interface/khronos/common/khrn_int_common.h"
36 #if !defined(__VIDEOCORE__) && !defined(WIN32) // threadsx/nucleus define LONG which clashses
37 #include "interface/vcos/vcos.h"
40 /******************************************************************************
41 replacements for videocore intrinsics
42 ******************************************************************************/
45 #include <vc/intrinsics.h>
46 #define _minf(x,y) _min((float)(x),(float)(y))
47 #define _maxf(x,y) _max((float)(x),(float)(y))
49 static INLINE int32_t _bmask(int32_t x, int32_t y)
51 return x & ((1 << (y & 0x1f)) - 1);
54 static INLINE int32_t _min(int32_t x, int32_t y)
59 static INLINE int32_t _max(int32_t x, int32_t y)
65 static INLINE int32_t _msb(uint32_t x)
77 #elif defined __CC_ARM
78 static INLINE int32_t _msb(uint32_t x)
82 #elif defined(__GNUC__)
83 static INLINE int32_t _msb(uint32_t x)
85 return x ? (31 - __builtin_clz(x)) : -1;
88 static INLINE int32_t _msb(uint32_t x) /* unsigned to get lsr */
99 static INLINE uint32_t _count(uint32_t x)
109 #if defined __CC_ARM && __TARGET_ARCH_THUMB >= 4
110 static INLINE uint32_t _bitrev(uint32_t x, uint32_t y)
112 return __rbit(x) >> (32-y);
115 static INLINE uint32_t _bitrev(uint32_t x, uint32_t y)
119 for (i = 0; i != y; ++i) {
120 bitrev |= ((x >> i) & 1) << (y - i - 1);
127 static INLINE int32_t _adds(int32_t x, int32_t y)
132 static INLINE int32_t _subs(int32_t x, int32_t y)
137 static INLINE uint32_t _ror(uint32_t x, uint32_t y)
142 static INLINE int32_t _adds(int32_t x, int32_t y)
145 return (y > 0) ? ((z < x) ? (int32_t)0x7fffffff : z) : ((z > x) ? (int32_t)0x80000000 : z);
148 static INLINE int32_t _subs(int32_t x, int32_t y)
151 return (y > 0) ? ((z > x) ? (int32_t)0x80000000 : z) : ((z < x) ? (int32_t)0x7fffffff : z);
154 static INLINE uint32_t _ror(uint32_t x, uint32_t y)
156 return (x << (32 - y)) | (x >> y);
160 static INLINE int32_t _abs(int32_t x)
162 return x > 0 ? x : -x;
165 static INLINE float _minf(float x, float y)
167 return x < y ? x : y;
170 static INLINE float _maxf(float x, float y)
172 return x > y ? x : y;
175 #endif // !_VIDEOCORE
178 /******************************************************************************
180 ******************************************************************************/
182 #define ARR_COUNT(ARR) (sizeof(ARR) / sizeof(*(ARR)))
184 /* sign-extend 16-bit value with range [-0x4000, 0xbfff] */
185 static INLINE int32_t s_ext_off16(int32_t x)
187 return ((int32_t)(int16_t)(x - 0x4000)) + 0x4000;
190 static INLINE bool is_power_of_2(uint32_t x)
192 return (x != 0) && ((x & (x - 1)) == 0);
195 static INLINE uint32_t next_power_of_2(uint32_t x)
197 return is_power_of_2(x) ? x : (uint32_t)(1 << (_msb(x) + 1));
200 static INLINE uint32_t round_up(uint32_t x, uint32_t y)
202 vcos_assert(is_power_of_2(y));
203 return (x + (y - 1)) & ~(y - 1);
206 static INLINE void *round_up_ptr(void *x, uint32_t y)
208 vcos_assert(is_power_of_2(y));
209 return (void *)(((uintptr_t)x + (uintptr_t)(y - 1)) & ~(uintptr_t)(y - 1));
212 static INLINE uint32_t mod(int32_t x, int32_t y)
215 return (m < 0) ? (m + y) : m;
218 extern int khrn_get_type_size(int type /* GLenum*/);
220 static INLINE int find_max(int count, int size, const void *indices)
228 uint8_t *u = (uint8_t *)indices;
230 for (i = 0; i < count; i++)
231 max = _max( max, (int32_t) u[i]);
237 uint16_t *u = (uint16_t *)indices;
239 for (i = 0; i < count; i++)
240 max = _max( max, (int32_t) u[i]);
252 /******************************************************************************
253 for poking around inside floats (we assume ieee-754)
254 ******************************************************************************/
261 static INLINE uint32_t float_to_bits(float f)
268 static INLINE float float_from_bits(uint32_t bits)
275 /******************************************************************************
277 ******************************************************************************/
279 #include "interface/khronos/common/khrn_int_util_cr.h"
281 static INLINE void clean_floats(float *dst, const float *src, uint32_t count)
284 for (i = 0; i != count; ++i) {
285 dst[i] = clean_float(src[i]);
289 /******************************************************************************
290 float to int conversions
291 ******************************************************************************/
293 static INLINE float r2ni_to_r2n_bias(float f, int32_t shift)
295 vcos_assert((shift >= -129) && (shift <= 124));
296 return f + float_from_bits(((127 - (shift + 2)) << 23) | 0x7fffff);
300 convert float to integer value with shift
301 saturating, round to nearest
303 on videocore, we support shifts in [-32, 31]. we only need to support shifts
304 of 0 and 16 for client-side code
307 static INLINE int32_t float_to_int_shift(float f, int32_t shift)
310 /* floattouint is wrapping, round to negative infinity. shift should be in [-32, 31] */
311 vcos_assert((shift >= -32) && (shift <= 31));
312 f = r2ni_to_r2n_bias(f, shift);
313 if (f < float_from_bits((1 << 31) | ((127 + (31 - shift)) << 23))) { return 0x80000000; }
314 if (f > float_from_bits(((127 + (30 - shift)) << 23) | 0x7fffff)) { return 0x7fffffff; }
315 return _floattouint(f, shift);
317 vcos_assert((shift >= 0) && (shift <= 31));
318 f *= (float)(uint32_t)(1 << shift);
319 f += (f < 0.0f) ? -0.49999997f : 0.49999997f; /* assume float -> int conversion is round to zero */
320 if (f < -2.14748365e9f) { return 0x80000000; }
321 if (f > 2.14748352e9f) { return 0x7fffffff; }
327 convert float to 48-bit integer value with shift
328 saturating, round to nearest
330 this is only supported on videocore. shift should be in [-16, 31]
334 static INLINE int64_t float_to_int48_shift(float f, int32_t shift)
336 /* floattouint is wrapping, round to negative infinity. shift should be in [-32, 31] */
337 vcos_assert((shift >= -16) && (shift <= 31));
338 f = r2ni_to_r2n_bias(f, shift);
339 if (f < float_from_bits((1 << 31) | ((127 + (47 - shift)) << 23))) { return 0xffff800000000000ll; }
340 if (f > float_from_bits(((127 + (46 - shift)) << 23) | 0x7fffff)) { return 0x00007fffffffffffll; }
341 return ((int64_t)(int32_t)_floattouint(f, shift - 16) << 16) | _floattouint(f, shift);
346 convert float to integer value
347 saturating, round to nearest
350 static INLINE int32_t float_to_int(float f)
352 return float_to_int_shift(f, 0);
356 convert float to integer value
357 saturating, round to negative inf
360 static INLINE int32_t float_to_int_floor(float f)
363 special-case handling of small negative floats
364 this is so we return -1 for negative denormals (which the vg cts requires)
365 (we shouldn't need this if the fp library/hw properly handle denormals)
368 uint32_t u = float_to_bits(f);
369 if (((u & (1 << 31)) && (u + u)) && (f > -1.0f)) {
373 f = floorf(f); /* assume float -> int conversion is round to zero */
374 if (f < -2.14748365e9f) { return 0x80000000; }
375 if (f > 2.14748352e9f) { return 0x7fffffff; }
380 convert float to integer value
381 saturating, round to zero
384 static INLINE int32_t float_to_int_zero(float f)
386 /* assume float -> int conversion is round to zero */
387 if (f < -2.14748365e9f) { return 0x80000000; }
388 if (f > 2.14748352e9f) { return 0x7fffffff; }
393 convert float to 16.16 fixed point value
394 saturating, round to nearest
396 Khronos documentation:
398 If a value is so large in magnitude that it cannot be represented with the
399 requested type, then the nearest value representable using the requested type
403 static INLINE int32_t float_to_fixed(float f)
405 return float_to_int_shift(f, 16);
408 /******************************************************************************
409 exact float tests (in case fp library/hw don't handle denormals correctly)
410 ******************************************************************************/
412 static INLINE bool floats_identical(float x, float y)
414 return float_to_bits(x) == float_to_bits(y);
417 static INLINE bool is_zero(float f)
419 uint32_t u = float_to_bits(f);
423 static INLINE bool is_le_zero(float f)
425 uint32_t u = float_to_bits(f);
426 return (u & (1 << 31)) || !u;
429 /******************************************************************************
431 ******************************************************************************/
434 #define alignof(T) __alignof(T)
435 #elif defined(__CC_ARM)
436 #define alignof(T) __alignof__(T)
438 #define alignof(T) (sizeof(struct { T t; char ch; }) - sizeof(T))
442 must use both ALIGNED and ALIGN_TO...
443 ALIGNED(16) int align_me[10];
444 ALIGN_TO(align_me, 16);
448 #define ALIGNED(ALIGNMENT) __declspec(align(ALIGNMENT))
449 #define ALIGN_TO(X, ALIGNMENT)
450 #elif defined(__GNUC__)
451 #define ALIGNED(ALIGNMENT) __attribute__ ((aligned(ALIGNMENT)))
452 #define ALIGN_TO(X, ALIGNMENT)
453 #elif defined(__HIGHC__)
454 #define ALIGNED(ALIGMENT)
455 #define ALIGN_TO(X, ALIGNMENT) pragma Align_to(ALIGNMENT, X)
457 /* leave undefined (will get error on use) */
460 /******************************************************************************
461 range/rect intersect stuff
462 ******************************************************************************/
464 extern void khrn_clip_range(
465 int32_t *x0, int32_t *l0,
466 int32_t x1, int32_t l1);
468 extern void khrn_clip_range2(
469 int32_t *ax0, int32_t *bx0, int32_t *l0,
470 int32_t ax1, int32_t al1,
471 int32_t bx1, int32_t bl1);
473 extern void khrn_clip_rect(
474 int32_t *x0, int32_t *y0, int32_t *w0, int32_t *h0,
475 int32_t x1, int32_t y1, int32_t w1, int32_t h1);
477 extern void khrn_clip_rect2(
478 int32_t *ax0, int32_t *ay0, int32_t *bx0, int32_t *by0, int32_t *w0, int32_t *h0,
479 int32_t ax1, int32_t ay1, int32_t aw1, int32_t ah1,
480 int32_t bx1, int32_t by1, int32_t bw1, int32_t bh1);
482 static INLINE bool khrn_ranges_intersect(
483 int32_t x0, int32_t l0,
484 int32_t x1, int32_t l1)
486 return (x0 < (x1 + l1)) && (x1 < (x0 + l0));
489 static INLINE bool khrn_rects_intersect(
490 int32_t x0, int32_t y0, int32_t w0, int32_t h0,
491 int32_t x1, int32_t y1, int32_t w1, int32_t h1)
493 return khrn_ranges_intersect(x0, w0, x1, w1) && khrn_ranges_intersect(y0, h0, y1, h1);
496 /******************************************************************************
498 ******************************************************************************/
500 #ifdef KHRN_SINGLE_THREADED
501 /* everything is done in one thread, no need for barriers */
502 static INLINE void khrn_barrier(void) {}
503 #elif defined(_VIDEOCORE)
504 /* don't need a real memory barrier
505 * extern function should do as a compiler barrier, but todo: is there a better way? */
506 extern void khrn_barrier(void);
508 /* leave undefined (will get error on use) */