interface/khronos/common/khrn_int_util.h

   1 /*
   2 Copyright (c) 2012, Broadcom Europe Ltd
   3 All rights reserved.
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are met:
   7     * Redistributions of source code must retain the above copyright
   8       notice, this list of conditions and the following disclaimer.
   9     * Redistributions in binary form must reproduce the above copyright
  10       notice, this list of conditions and the following disclaimer in the
  11       documentation and/or other materials provided with the distribution.
  12     * Neither the name of the copyright holder nor the
  13       names of its contributors may be used to endorse or promote products
  14       derived from this software without specific prior written permission.
  15
  16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
  20 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26 */
  27
  28 #ifndef KHRN_INT_UTIL_H
  29 #define KHRN_INT_UTIL_H
  30
  31 #include <ctype.h>
  32 #include <float.h>
  33 #include <math.h>
  34
  35 #include "interface/khronos/common/khrn_int_common.h"
  36 #if !defined(__VIDEOCORE__) && !defined(WIN32)  // threadsx/nucleus define LONG which clashses
  37 #include "interface/vcos/vcos.h"
  38 #endif
  39
  40 /******************************************************************************
  41 replacements for videocore intrinsics
  42 ******************************************************************************/
  43
  44 #ifdef _VIDEOCORE
  45 #include <vc/intrinsics.h>
  46 #define _minf(x,y) _min((float)(x),(float)(y))
  47 #define _maxf(x,y) _max((float)(x),(float)(y))
  48 #else
  49 static INLINE int32_t _bmask(int32_t x, int32_t y)
  50 {
  51    return x & ((1 << (y & 0x1f)) - 1);
  52 }
  53
  54 static INLINE int32_t _min(int32_t x, int32_t y)
  55 {
  56    return x < y ? x : y;
  57 }
  58
  59 static INLINE int32_t _max(int32_t x, int32_t y)
  60 {
  61    return x > y ? x : y;
  62 }
  63
  64 #if defined(_MSC_VER)
  65 static INLINE int32_t _msb(uint32_t x)
  66 {
  67    int32_t l = -1;
  68
  69    if (x)
  70       __asm {
  71          bsr eax, x
  72          mov l, eax
  73       }
  74
  75    return l;
  76 }
  77 #elif defined __CC_ARM
  78 static INLINE int32_t _msb(uint32_t x)
  79 {
  80    return 31 - __clz(x);
  81 }
  82 #elif defined(__GNUC__)
  83 static INLINE int32_t _msb(uint32_t x)
  84 {
  85    return x ? (31 - __builtin_clz(x)) : -1;
  86 }
  87 #else
  88 static INLINE int32_t _msb(uint32_t x) /* unsigned to get lsr */
  89 {
  90    int32_t msb = -1;
  91    while (x != 0) {
  92       ++msb;
  93       x >>= 1;
  94    }
  95    return msb;
  96 }
  97 #endif
  98
  99 static INLINE uint32_t _count(uint32_t x)
 100 {
 101    uint32_t count = 0;
 102    while (x != 0) {
 103       x &= x - 1;
 104       ++count;
 105    }
 106    return count;
 107 }
 108
 109 #if defined __CC_ARM && __TARGET_ARCH_THUMB >= 4
 110 static INLINE uint32_t _bitrev(uint32_t x, uint32_t y)
 111 {
 112    return __rbit(x) >> (32-y);
 113 }
 114 #else
 115 static INLINE uint32_t _bitrev(uint32_t x, uint32_t y)
 116 {
 117    uint32_t bitrev = 0;
 118    uint32_t i;
 119    for (i = 0; i != y; ++i) {
 120       bitrev |= ((x >> i) & 1) << (y - i - 1);
 121    }
 122    return bitrev;
 123 }
 124 #endif
 125
 126 #ifdef __CC_ARM
 127 static INLINE int32_t _adds(int32_t x, int32_t y)
 128 {
 129    return __qadd(x, y);
 130 }
 131
 132 static INLINE int32_t _subs(int32_t x, int32_t y)
 133 {
 134    return __qsub(x, y);
 135 }
 136
 137 static INLINE uint32_t _ror(uint32_t x, uint32_t y)
 138 {
 139    return __ror(x, y);
 140 }
 141 #else
 142 static INLINE int32_t _adds(int32_t x, int32_t y)
 143 {
 144    int32_t z = x + y;
 145    return (y > 0) ? ((z < x) ? (int32_t)0x7fffffff : z) : ((z > x) ? (int32_t)0x80000000 : z);
 146 }
 147
 148 static INLINE int32_t _subs(int32_t x, int32_t y)
 149 {
 150    int32_t z = x - y;
 151    return (y > 0) ? ((z > x) ? (int32_t)0x80000000 : z) : ((z < x) ? (int32_t)0x7fffffff : z);
 152 }
 153
 154 static INLINE uint32_t _ror(uint32_t x, uint32_t y)
 155 {
 156    return (x << (32 - y)) | (x >> y);
 157 }
 158 #endif // __CC_ARM
 159
 160 static INLINE int32_t _abs(int32_t x)
 161 {
 162    return x > 0 ? x : -x;
 163 }
 164
 165 static INLINE float _minf(float x, float y)
 166 {
 167    return x < y ? x : y;
 168 }
 169
 170 static INLINE float _maxf(float x, float y)
 171 {
 172    return x > y ? x : y;
 173 }
 174
 175 #endif // !_VIDEOCORE
 176
 177
 178 /******************************************************************************
 179 misc stuff
 180 ******************************************************************************/
 181
 182 #define ARR_COUNT(ARR) (sizeof(ARR) / sizeof(*(ARR)))
 183
 184 /* sign-extend 16-bit value with range [-0x4000, 0xbfff] */
 185 static INLINE int32_t s_ext_off16(int32_t x)
 186 {
 187    return ((int32_t)(int16_t)(x - 0x4000)) + 0x4000;
 188 }
 189
 190 static INLINE bool is_power_of_2(uint32_t x)
 191 {
 192    return (x != 0) && ((x & (x - 1)) == 0);
 193 }
 194
 195 static INLINE uint32_t next_power_of_2(uint32_t x)
 196 {
 197    return is_power_of_2(x) ? x : (uint32_t)(1 << (_msb(x) + 1));
 198 }
 199
 200 static INLINE uint32_t round_up(uint32_t x, uint32_t y)
 201 {
 202    vcos_assert(is_power_of_2(y));
 203    return (x + (y - 1)) & ~(y - 1);
 204 }
 205
 206 static INLINE void *round_up_ptr(void *x, uint32_t y)
 207 {
 208    vcos_assert(is_power_of_2(y));
 209    return (void *)(((uintptr_t)x + (uintptr_t)(y - 1)) & ~(uintptr_t)(y - 1));
 210 }
 211
 212 static INLINE uint32_t mod(int32_t x, int32_t y)
 213 {
 214    int32_t m = x % y;
 215    return (m < 0) ? (m + y) : m;
 216 }
 217
 218 extern int khrn_get_type_size(int type /* GLenum*/);
 219
 220 static INLINE int find_max(int count, int size, const void *indices)
 221 {
 222    int i;
 223    int32_t max = -1;
 224
 225    switch (size) {
 226    case 1:
 227    {
 228       uint8_t *u = (uint8_t *)indices;
 229
 230       for (i = 0; i < count; i++)
 231          max = _max( max, (int32_t) u[i]);
 232
 233       break;
 234    }
 235    case 2:
 236    {
 237       uint16_t *u = (uint16_t *)indices;
 238
 239       for (i = 0; i < count; i++)
 240          max = _max( max, (int32_t) u[i]);
 241
 242       break;
 243    }
 244    default:
 245       UNREACHABLE();
 246       break;
 247    }
 248
 249    return (int) max;
 250 }
 251
 252 /******************************************************************************
 253 for poking around inside floats (we assume ieee-754)
 254 ******************************************************************************/
 255
 256 typedef union {
 257    float f;
 258    uint32_t bits;
 259 } KHRN_FLOAT_BITS_T;
 260
 261 static INLINE uint32_t float_to_bits(float f)
 262 {
 263    KHRN_FLOAT_BITS_T t;
 264    t.f = f;
 265    return t.bits;
 266 }
 267
 268 static INLINE float float_from_bits(uint32_t bits)
 269 {
 270    KHRN_FLOAT_BITS_T t;
 271    t.bits = bits;
 272    return t.f;
 273 }
 274
 275 /******************************************************************************
 276 input cleaning stuff
 277 ******************************************************************************/
 278
 279 #include "interface/khronos/common/khrn_int_util_cr.h"
 280
 281 static INLINE void clean_floats(float *dst, const float *src, uint32_t count)
 282 {
 283    uint32_t i;
 284    for (i = 0; i != count; ++i) {
 285       dst[i] = clean_float(src[i]);
 286    }
 287 }
 288
 289 /******************************************************************************
 290 float to int conversions
 291 ******************************************************************************/
 292
 293 static INLINE float r2ni_to_r2n_bias(float f, int32_t shift)
 294 {
 295    vcos_assert((shift >= -129) && (shift <= 124));
 296    return f + float_from_bits(((127 - (shift + 2)) << 23) | 0x7fffff);
 297 }
 298
 299 /*
 300    convert float to integer value with shift
 301    saturating, round to nearest
 302
 303    on videocore, we support shifts in [-32, 31]. we only need to support shifts
 304    of 0 and 16 for client-side code
 305 */
 306
 307 static INLINE int32_t float_to_int_shift(float f, int32_t shift)
 308 {
 309 #ifdef _VIDEOCORE
 310    /* floattouint is wrapping, round to negative infinity. shift should be in [-32, 31] */
 311    vcos_assert((shift >= -32) && (shift <= 31));
 312    f = r2ni_to_r2n_bias(f, shift);
 313    if (f < float_from_bits((1 << 31) | ((127 + (31 - shift)) << 23))) { return 0x80000000; }
 314    if (f > float_from_bits(((127 + (30 - shift)) << 23) | 0x7fffff))  { return 0x7fffffff; }
 315    return _floattouint(f, shift);
 316 #else
 317    vcos_assert((shift >= 0) && (shift <= 31));
 318    f *= (float)(uint32_t)(1 << shift);
 319    f += (f < 0.0f) ? -0.49999997f : 0.49999997f; /* assume float -> int conversion is round to zero */
 320    if (f < -2.14748365e9f) { return 0x80000000; }
 321    if (f > 2.14748352e9f)  { return 0x7fffffff; }
 322    return (int32_t)f;
 323 #endif
 324 }
 325
 326 /*
 327    convert float to 48-bit integer value with shift
 328    saturating, round to nearest
 329
 330    this is only supported on videocore. shift should be in [-16, 31]
 331 */
 332
 333 #ifdef _VIDEOCORE
 334 static INLINE int64_t float_to_int48_shift(float f, int32_t shift)
 335 {
 336    /* floattouint is wrapping, round to negative infinity. shift should be in [-32, 31] */
 337    vcos_assert((shift >= -16) && (shift <= 31));
 338    f = r2ni_to_r2n_bias(f, shift);
 339    if (f < float_from_bits((1 << 31) | ((127 + (47 - shift)) << 23))) { return 0xffff800000000000ll; }
 340    if (f > float_from_bits(((127 + (46 - shift)) << 23) | 0x7fffff))  { return 0x00007fffffffffffll; }
 341    return ((int64_t)(int32_t)_floattouint(f, shift - 16) << 16) | _floattouint(f, shift);
 342 }
 343 #endif
 344
 345 /*
 346    convert float to integer value
 347    saturating, round to nearest
 348 */
 349
 350 static INLINE int32_t float_to_int(float f)
 351 {
 352    return float_to_int_shift(f, 0);
 353 }
 354
 355 /*
 356    convert float to integer value
 357    saturating, round to negative inf
 358 */
 359
 360 static INLINE int32_t float_to_int_floor(float f)
 361 {
 362    /*
 363       special-case handling of small negative floats
 364       this is so we return -1 for negative denormals (which the vg cts requires)
 365       (we shouldn't need this if the fp library/hw properly handle denormals)
 366    */
 367
 368    uint32_t u = float_to_bits(f);
 369    if (((u & (1 << 31)) && (u + u)) && (f > -1.0f)) {
 370       return -1;
 371    }
 372
 373    f = floorf(f); /* assume float -> int conversion is round to zero */
 374    if (f < -2.14748365e9f) { return 0x80000000; }
 375    if (f > 2.14748352e9f) { return 0x7fffffff; }
 376    return (int32_t)f;
 377 }
 378
 379 /*
 380    convert float to integer value
 381    saturating, round to zero
 382 */
 383
 384 static INLINE int32_t float_to_int_zero(float f)
 385 {
 386    /* assume float -> int conversion is round to zero */
 387    if (f < -2.14748365e9f) { return 0x80000000; }
 388    if (f > 2.14748352e9f) { return 0x7fffffff; }
 389    return (int32_t)f;
 390 }
 391
 392 /*
 393    convert float to 16.16 fixed point value
 394    saturating, round to nearest
 395
 396    Khronos documentation:
 397
 398    If a value is so large in magnitude that it cannot be represented with the
 399    requested type, then the nearest value representable using the requested type
 400    is returned.
 401 */
 402
 403 static INLINE int32_t float_to_fixed(float f)
 404 {
 405    return float_to_int_shift(f, 16);
 406 }
 407
 408 /******************************************************************************
 409 exact float tests (in case fp library/hw don't handle denormals correctly)
 410 ******************************************************************************/
 411
 412 static INLINE bool floats_identical(float x, float y)
 413 {
 414    return float_to_bits(x) == float_to_bits(y);
 415 }
 416
 417 static INLINE bool is_zero(float f)
 418 {
 419    uint32_t u = float_to_bits(f);
 420    return !(u + u);
 421 }
 422
 423 static INLINE bool is_le_zero(float f)
 424 {
 425    uint32_t u = float_to_bits(f);
 426    return (u & (1 << 31)) || !u;
 427 }
 428
 429 /******************************************************************************
 430 alignment stuff
 431 ******************************************************************************/
 432
 433 #ifdef _MSC_VER
 434    #define alignof(T) __alignof(T)
 435 #elif defined(__CC_ARM)
 436    #define alignof(T) __alignof__(T)
 437 #else
 438    #define alignof(T) (sizeof(struct { T t; char ch; }) - sizeof(T))
 439 #endif
 440
 441 /*
 442    must use both ALIGNED and ALIGN_TO...
 443    ALIGNED(16) int align_me[10];
 444    ALIGN_TO(align_me, 16);
 445 */
 446
 447 #ifdef _MSC_VER
 448    #define ALIGNED(ALIGNMENT) __declspec(align(ALIGNMENT))
 449    #define ALIGN_TO(X, ALIGNMENT)
 450 #elif defined(__GNUC__)
 451    #define ALIGNED(ALIGNMENT) __attribute__ ((aligned(ALIGNMENT)))
 452    #define ALIGN_TO(X, ALIGNMENT)
 453 #elif defined(__HIGHC__)
 454    #define ALIGNED(ALIGMENT)
 455    #define ALIGN_TO(X, ALIGNMENT) pragma Align_to(ALIGNMENT, X)
 456 #else
 457    /* leave undefined (will get error on use) */
 458 #endif
 459
 460 /******************************************************************************
 461 range/rect intersect stuff
 462 ******************************************************************************/
 463
 464 extern void khrn_clip_range(
 465    int32_t *x0, int32_t *l0,
 466    int32_t x1, int32_t l1);
 467
 468 extern void khrn_clip_range2(
 469    int32_t *ax0, int32_t *bx0, int32_t *l0,
 470    int32_t ax1, int32_t al1,
 471    int32_t bx1, int32_t bl1);
 472
 473 extern void khrn_clip_rect(
 474    int32_t *x0, int32_t *y0, int32_t *w0, int32_t *h0,
 475    int32_t x1, int32_t y1, int32_t w1, int32_t h1);
 476
 477 extern void khrn_clip_rect2(
 478    int32_t *ax0, int32_t *ay0, int32_t *bx0, int32_t *by0, int32_t *w0, int32_t *h0,
 479    int32_t ax1, int32_t ay1, int32_t aw1, int32_t ah1,
 480    int32_t bx1, int32_t by1, int32_t bw1, int32_t bh1);
 481
 482 static INLINE bool khrn_ranges_intersect(
 483    int32_t x0, int32_t l0,
 484    int32_t x1, int32_t l1)
 485 {
 486    return (x0 < (x1 + l1)) && (x1 < (x0 + l0));
 487 }
 488
 489 static INLINE bool khrn_rects_intersect(
 490    int32_t x0, int32_t y0, int32_t w0, int32_t h0,
 491    int32_t x1, int32_t y1, int32_t w1, int32_t h1)
 492 {
 493    return khrn_ranges_intersect(x0, w0, x1, w1) && khrn_ranges_intersect(y0, h0, y1, h1);
 494 }
 495
 496 /******************************************************************************
 497 memory barrier
 498 ******************************************************************************/
 499
 500 #ifdef KHRN_SINGLE_THREADED
 501    /* everything is done in one thread, no need for barriers */
 502    static INLINE void khrn_barrier(void) {}
 503 #elif defined(_VIDEOCORE)
 504    /* don't need a real memory barrier
 505     * extern function should do as a compiler barrier, but todo: is there a better way? */
 506    extern void khrn_barrier(void);
 507 #else
 508    /* leave undefined (will get error on use) */
 509 #endif
 510
 511 #endif