modules/core/src/rand.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 /* ////////////////////////////////////////////////////////////////////
  44 //
  45 //  Filling CvMat/IplImage instances with random numbers
  46 //
  47 // */
  48
  49 #include "precomp.hpp"
  50
  51 #if defined _WIN32 || defined WINCE
  52     #include <windows.h>
  53     #undef small
  54     #undef min
  55     #undef max
  56     #undef abs
  57 #endif
  58
  59 #if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
  60     #include "emmintrin.h"
  61 #endif
  62
  63 namespace cv
  64 {
  65
  66 ///////////////////////////// Functions Declaration //////////////////////////////////////
  67
  68 /*
  69    Multiply-with-carry generator is used here:
  70    temp = ( A*X(n) + carry )
  71    X(n+1) = temp mod (2^32)
  72    carry = temp / (2^32)
  73 */
  74
  75 #define  RNG_NEXT(x)    ((uint64)(unsigned)(x)*CV_RNG_COEFF + ((x) >> 32))
  76
  77 /***************************************************************************************\
  78 *                           Pseudo-Random Number Generators (PRNGs)                     *
  79 \***************************************************************************************/
  80
  81 template<typename T> static void
  82 randBits_( T* arr, int len, uint64* state, const Vec2i* p, bool small_flag )
  83 {
  84     uint64 temp = *state;
  85     int i;
  86
  87     if( !small_flag )
  88     {
  89         for( i = 0; i <= len - 4; i += 4 )
  90         {
  91             int t0, t1;
  92
  93             temp = RNG_NEXT(temp);
  94             t0 = ((int)temp & p[i][0]) + p[i][1];
  95             temp = RNG_NEXT(temp);
  96             t1 = ((int)temp & p[i+1][0]) + p[i+1][1];
  97             arr[i] = saturate_cast<T>(t0);
  98             arr[i+1] = saturate_cast<T>(t1);
  99
 100             temp = RNG_NEXT(temp);
 101             t0 = ((int)temp & p[i+2][0]) + p[i+2][1];
 102             temp = RNG_NEXT(temp);
 103             t1 = ((int)temp & p[i+3][0]) + p[i+3][1];
 104             arr[i+2] = saturate_cast<T>(t0);
 105             arr[i+3] = saturate_cast<T>(t1);
 106         }
 107     }
 108     else
 109     {
 110         for( i = 0; i <= len - 4; i += 4 )
 111         {
 112             int t0, t1, t;
 113             temp = RNG_NEXT(temp);
 114             t = (int)temp;
 115             t0 = (t & p[i][0]) + p[i][1];
 116             t1 = ((t >> 8) & p[i+1][0]) + p[i+1][1];
 117             arr[i] = saturate_cast<T>(t0);
 118             arr[i+1] = saturate_cast<T>(t1);
 119
 120             t0 = ((t >> 16) & p[i+2][0]) + p[i+2][1];
 121             t1 = ((t >> 24) & p[i+3][0]) + p[i+3][1];
 122             arr[i+2] = saturate_cast<T>(t0);
 123             arr[i+3] = saturate_cast<T>(t1);
 124         }
 125     }
 126
 127     for( ; i < len; i++ )
 128     {
 129         int t0;
 130         temp = RNG_NEXT(temp);
 131
 132         t0 = ((int)temp & p[i][0]) + p[i][1];
 133         arr[i] = saturate_cast<T>(t0);
 134     }
 135
 136     *state = temp;
 137 }
 138
 139 struct DivStruct
 140 {
 141     unsigned d;
 142     unsigned M;
 143     int sh1, sh2;
 144     int delta;
 145 };
 146
 147 template<typename T> static void
 148 randi_( T* arr, int len, uint64* state, const DivStruct* p )
 149 {
 150     uint64 temp = *state;
 151     int i = 0;
 152     unsigned t0, t1, v0, v1;
 153
 154     for( i = 0; i <= len - 4; i += 4 )
 155     {
 156         temp = RNG_NEXT(temp);
 157         t0 = (unsigned)temp;
 158         temp = RNG_NEXT(temp);
 159         t1 = (unsigned)temp;
 160         v0 = (unsigned)(((uint64)t0 * p[i].M) >> 32);
 161         v1 = (unsigned)(((uint64)t1 * p[i+1].M) >> 32);
 162         v0 = (v0 + ((t0 - v0) >> p[i].sh1)) >> p[i].sh2;
 163         v1 = (v1 + ((t1 - v1) >> p[i+1].sh1)) >> p[i+1].sh2;
 164         v0 = t0 - v0*p[i].d + p[i].delta;
 165         v1 = t1 - v1*p[i+1].d + p[i+1].delta;
 166         arr[i] = saturate_cast<T>((int)v0);
 167         arr[i+1] = saturate_cast<T>((int)v1);
 168
 169         temp = RNG_NEXT(temp);
 170         t0 = (unsigned)temp;
 171         temp = RNG_NEXT(temp);
 172         t1 = (unsigned)temp;
 173         v0 = (unsigned)(((uint64)t0 * p[i+2].M) >> 32);
 174         v1 = (unsigned)(((uint64)t1 * p[i+3].M) >> 32);
 175         v0 = (v0 + ((t0 - v0) >> p[i+2].sh1)) >> p[i+2].sh2;
 176         v1 = (v1 + ((t1 - v1) >> p[i+3].sh1)) >> p[i+3].sh2;
 177         v0 = t0 - v0*p[i+2].d + p[i+2].delta;
 178         v1 = t1 - v1*p[i+3].d + p[i+3].delta;
 179         arr[i+2] = saturate_cast<T>((int)v0);
 180         arr[i+3] = saturate_cast<T>((int)v1);
 181     }
 182
 183     for( ; i < len; i++ )
 184     {
 185         temp = RNG_NEXT(temp);
 186         t0 = (unsigned)temp;
 187         v0 = (unsigned)(((uint64)t0 * p[i].M) >> 32);
 188         v0 = (v0 + ((t0 - v0) >> p[i].sh1)) >> p[i].sh2;
 189         v0 = t0 - v0*p[i].d + p[i].delta;
 190         arr[i] = saturate_cast<T>((int)v0);
 191     }
 192
 193     *state = temp;
 194 }
 195
 196
 197 #define DEF_RANDI_FUNC(suffix, type) \
 198 static void randBits_##suffix(type* arr, int len, uint64* state, \
 199                               const Vec2i* p, bool small_flag) \
 200 { randBits_(arr, len, state, p, small_flag); } \
 201 \
 202 static void randi_##suffix(type* arr, int len, uint64* state, \
 203                            const DivStruct* p, bool ) \
 204 { randi_(arr, len, state, p); }
 205
 206 DEF_RANDI_FUNC(8u, uchar)
 207 DEF_RANDI_FUNC(8s, schar)
 208 DEF_RANDI_FUNC(16u, ushort)
 209 DEF_RANDI_FUNC(16s, short)
 210 DEF_RANDI_FUNC(32s, int)
 211
 212 static void randf_32f( float* arr, int len, uint64* state, const Vec2f* p, bool )
 213 {
 214     uint64 temp = *state;
 215     int i = 0;
 216
 217     for( ; i <= len - 4; i += 4 )
 218     {
 219         float f[4];
 220         f[0] = (float)(int)(temp = RNG_NEXT(temp));
 221         f[1] = (float)(int)(temp = RNG_NEXT(temp));
 222         f[2] = (float)(int)(temp = RNG_NEXT(temp));
 223         f[3] = (float)(int)(temp = RNG_NEXT(temp));
 224
 225         // handwritten SSE is required not for performance but for numerical stability!
 226         // both 32-bit gcc and MSVC compilers trend to generate double precision SSE
 227         // while 64-bit compilers generate single precision SIMD instructions
 228         // so manual vectorisation forces all compilers to the single precision
 229 #if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
 230         __m128 q0 = _mm_loadu_ps((const float*)(p + i));
 231         __m128 q1 = _mm_loadu_ps((const float*)(p + i + 2));
 232
 233         __m128 q01l = _mm_unpacklo_ps(q0, q1);
 234         __m128 q01h = _mm_unpackhi_ps(q0, q1);
 235
 236         __m128 p0 = _mm_unpacklo_ps(q01l, q01h);
 237         __m128 p1 = _mm_unpackhi_ps(q01l, q01h);
 238
 239         _mm_storeu_ps(arr + i, _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(f), p0), p1));
 240 #elif defined __ARM_NEON && defined __aarch64__
 241         // handwritten NEON is required not for performance but for numerical stability!
 242         // 64bit gcc tends to use fmadd instead of separate multiply and add
 243         // use volatile to ensure to separate the multiply and add
 244         float32x4x2_t q = vld2q_f32((const float*)(p + i));
 245
 246         float32x4_t p0 = q.val[0];
 247         float32x4_t p1 = q.val[1];
 248
 249         volatile float32x4_t v0 = vmulq_f32(vld1q_f32(f), p0);
 250         vst1q_f32(arr+i, vaddq_f32(v0, p1));
 251 #else
 252         arr[i+0] = f[0]*p[i+0][0] + p[i+0][1];
 253         arr[i+1] = f[1]*p[i+1][0] + p[i+1][1];
 254         arr[i+2] = f[2]*p[i+2][0] + p[i+2][1];
 255         arr[i+3] = f[3]*p[i+3][0] + p[i+3][1];
 256 #endif
 257     }
 258
 259     for( ; i < len; i++ )
 260     {
 261         temp = RNG_NEXT(temp);
 262 #if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
 263         _mm_store_ss(arr + i, _mm_add_ss(
 264                 _mm_mul_ss(_mm_set_ss((float)(int)temp), _mm_set_ss(p[i][0])),
 265                 _mm_set_ss(p[i][1]))
 266                 );
 267 #elif defined __ARM_NEON && defined __aarch64__
 268         float32x2_t t = vadd_f32(vmul_f32(
 269                 vdup_n_f32((float)(int)temp), vdup_n_f32(p[i][0])),
 270                 vdup_n_f32(p[i][1]));
 271         arr[i] = vget_lane_f32(t, 0);
 272 #else
 273         arr[i] = (int)temp*p[i][0] + p[i][1];
 274 #endif
 275     }
 276
 277     *state = temp;
 278 }
 279
 280
 281 static void
 282 randf_64f( double* arr, int len, uint64* state, const Vec2d* p, bool )
 283 {
 284     uint64 temp = *state;
 285     int64 v = 0;
 286     int i;
 287
 288     for( i = 0; i <= len - 4; i += 4 )
 289     {
 290         double f0, f1;
 291
 292         temp = RNG_NEXT(temp);
 293         v = (temp >> 32)|(temp << 32);
 294         f0 = v*p[i][0] + p[i][1];
 295         temp = RNG_NEXT(temp);
 296         v = (temp >> 32)|(temp << 32);
 297         f1 = v*p[i+1][0] + p[i+1][1];
 298         arr[i] = f0; arr[i+1] = f1;
 299
 300         temp = RNG_NEXT(temp);
 301         v = (temp >> 32)|(temp << 32);
 302         f0 = v*p[i+2][0] + p[i+2][1];
 303         temp = RNG_NEXT(temp);
 304         v = (temp >> 32)|(temp << 32);
 305         f1 = v*p[i+3][0] + p[i+3][1];
 306         arr[i+2] = f0; arr[i+3] = f1;
 307     }
 308
 309     for( ; i < len; i++ )
 310     {
 311         temp = RNG_NEXT(temp);
 312         v = (temp >> 32)|(temp << 32);
 313         arr[i] = v*p[i][0] + p[i][1];
 314     }
 315
 316     *state = temp;
 317 }
 318
 319 typedef void (*RandFunc)(uchar* arr, int len, uint64* state, const void* p, bool small_flag);
 320
 321
 322 static RandFunc randTab[][8] =
 323 {
 324     {
 325         (RandFunc)randi_8u, (RandFunc)randi_8s, (RandFunc)randi_16u, (RandFunc)randi_16s,
 326         (RandFunc)randi_32s, (RandFunc)randf_32f, (RandFunc)randf_64f, 0
 327     },
 328     {
 329         (RandFunc)randBits_8u, (RandFunc)randBits_8s, (RandFunc)randBits_16u, (RandFunc)randBits_16s,
 330         (RandFunc)randBits_32s, 0, 0, 0
 331     }
 332 };
 333
 334 /*
 335    The code below implements the algorithm described in
 336    "The Ziggurat Method for Generating Random Variables"
 337    by Marsaglia and Tsang, Journal of Statistical Software.
 338 */
 339 static void
 340 randn_0_1_32f( float* arr, int len, uint64* state )
 341 {
 342     const float r = 3.442620f; // The start of the right tail
 343     const float rng_flt = 2.3283064365386962890625e-10f; // 2^-32
 344     static unsigned kn[128];
 345     static float wn[128], fn[128];
 346     uint64 temp = *state;
 347     static bool initialized=false;
 348     int i;
 349
 350     if( !initialized )
 351     {
 352         const double m1 = 2147483648.0;
 353         double dn = 3.442619855899, tn = dn, vn = 9.91256303526217e-3;
 354
 355         // Set up the tables
 356         double q = vn/std::exp(-.5*dn*dn);
 357         kn[0] = (unsigned)((dn/q)*m1);
 358         kn[1] = 0;
 359
 360         wn[0] = (float)(q/m1);
 361         wn[127] = (float)(dn/m1);
 362
 363         fn[0] = 1.f;
 364         fn[127] = (float)std::exp(-.5*dn*dn);
 365
 366         for(i=126;i>=1;i--)
 367         {
 368             dn = std::sqrt(-2.*std::log(vn/dn+std::exp(-.5*dn*dn)));
 369             kn[i+1] = (unsigned)((dn/tn)*m1);
 370             tn = dn;
 371             fn[i] = (float)std::exp(-.5*dn*dn);
 372             wn[i] = (float)(dn/m1);
 373         }
 374         initialized = true;
 375     }
 376
 377     for( i = 0; i < len; i++ )
 378     {
 379         float x, y;
 380         for(;;)
 381         {
 382             int hz = (int)temp;
 383             temp = RNG_NEXT(temp);
 384             int iz = hz & 127;
 385             x = hz*wn[iz];
 386             if( (unsigned)std::abs(hz) < kn[iz] )
 387                 break;
 388             if( iz == 0) // iz==0, handles the base strip
 389             {
 390                 do
 391                 {
 392                     x = (unsigned)temp*rng_flt;
 393                     temp = RNG_NEXT(temp);
 394                     y = (unsigned)temp*rng_flt;
 395                     temp = RNG_NEXT(temp);
 396                     x = (float)(-std::log(x+FLT_MIN)*0.2904764);
 397                     y = (float)-std::log(y+FLT_MIN);
 398                 }       // .2904764 is 1/r
 399                 while( y + y < x*x );
 400                 x = hz > 0 ? r + x : -r - x;
 401                 break;
 402             }
 403             // iz > 0, handle the wedges of other strips
 404             y = (unsigned)temp*rng_flt;
 405             temp = RNG_NEXT(temp);
 406             if( fn[iz] + y*(fn[iz - 1] - fn[iz]) < std::exp(-.5*x*x) )
 407                 break;
 408         }
 409         arr[i] = x;
 410     }
 411     *state = temp;
 412 }
 413
 414
 415 double RNG::gaussian(double sigma)
 416 {
 417     float temp;
 418     randn_0_1_32f( &temp, 1, &state );
 419     return temp*sigma;
 420 }
 421
 422
 423 template<typename T, typename PT> static void
 424 randnScale_( const float* src, T* dst, int len, int cn, const PT* mean, const PT* stddev, bool stdmtx )
 425 {
 426     int i, j, k;
 427     if( !stdmtx )
 428     {
 429         if( cn == 1 )
 430         {
 431             PT b = mean[0], a = stddev[0];
 432             for( i = 0; i < len; i++ )
 433                 dst[i] = saturate_cast<T>(src[i]*a + b);
 434         }
 435         else
 436         {
 437             for( i = 0; i < len; i++, src += cn, dst += cn )
 438                 for( k = 0; k < cn; k++ )
 439                     dst[k] = saturate_cast<T>(src[k]*stddev[k] + mean[k]);
 440         }
 441     }
 442     else
 443     {
 444         for( i = 0; i < len; i++, src += cn, dst += cn )
 445         {
 446             for( j = 0; j < cn; j++ )
 447             {
 448                 PT s = mean[j];
 449                 for( k = 0; k < cn; k++ )
 450                     s += src[k]*stddev[j*cn + k];
 451                 dst[j] = saturate_cast<T>(s);
 452             }
 453         }
 454     }
 455 }
 456
 457 static void randnScale_8u( const float* src, uchar* dst, int len, int cn,
 458                             const float* mean, const float* stddev, bool stdmtx )
 459 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 460
 461 static void randnScale_8s( const float* src, schar* dst, int len, int cn,
 462                             const float* mean, const float* stddev, bool stdmtx )
 463 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 464
 465 static void randnScale_16u( const float* src, ushort* dst, int len, int cn,
 466                              const float* mean, const float* stddev, bool stdmtx )
 467 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 468
 469 static void randnScale_16s( const float* src, short* dst, int len, int cn,
 470                              const float* mean, const float* stddev, bool stdmtx )
 471 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 472
 473 static void randnScale_32s( const float* src, int* dst, int len, int cn,
 474                              const float* mean, const float* stddev, bool stdmtx )
 475 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 476
 477 static void randnScale_32f( const float* src, float* dst, int len, int cn,
 478                              const float* mean, const float* stddev, bool stdmtx )
 479 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 480
 481 static void randnScale_64f( const float* src, double* dst, int len, int cn,
 482                              const double* mean, const double* stddev, bool stdmtx )
 483 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 484
 485 typedef void (*RandnScaleFunc)(const float* src, uchar* dst, int len, int cn,
 486                                const uchar*, const uchar*, bool);
 487
 488 static RandnScaleFunc randnScaleTab[] =
 489 {
 490     (RandnScaleFunc)randnScale_8u, (RandnScaleFunc)randnScale_8s, (RandnScaleFunc)randnScale_16u,
 491     (RandnScaleFunc)randnScale_16s, (RandnScaleFunc)randnScale_32s, (RandnScaleFunc)randnScale_32f,
 492     (RandnScaleFunc)randnScale_64f, 0
 493 };
 494
 495 void RNG::fill( InputOutputArray _mat, int disttype,
 496                 InputArray _param1arg, InputArray _param2arg, bool saturateRange )
 497 {
 498     Mat mat = _mat.getMat(), _param1 = _param1arg.getMat(), _param2 = _param2arg.getMat();
 499     int depth = mat.depth(), cn = mat.channels();
 500     AutoBuffer<double> _parambuf;
 501     int j, k;
 502     bool fast_int_mode = false;
 503     bool smallFlag = true;
 504     RandFunc func = 0;
 505     RandnScaleFunc scaleFunc = 0;
 506
 507     CV_Assert(_param1.channels() == 1 && (_param1.rows == 1 || _param1.cols == 1) &&
 508               (_param1.rows + _param1.cols - 1 == cn || _param1.rows + _param1.cols - 1 == 1 ||
 509                (_param1.size() == Size(1, 4) && _param1.type() == CV_64F && cn <= 4)));
 510     CV_Assert( _param2.channels() == 1 &&
 511                (((_param2.rows == 1 || _param2.cols == 1) &&
 512                 (_param2.rows + _param2.cols - 1 == cn || _param2.rows + _param2.cols - 1 == 1 ||
 513                 (_param1.size() == Size(1, 4) && _param1.type() == CV_64F && cn <= 4))) ||
 514                 (_param2.rows == cn && _param2.cols == cn && disttype == NORMAL)));
 515
 516     Vec2i* ip = 0;
 517     Vec2d* dp = 0;
 518     Vec2f* fp = 0;
 519     DivStruct* ds = 0;
 520     uchar* mean = 0;
 521     uchar* stddev = 0;
 522     bool stdmtx = false;
 523     int n1 = (int)_param1.total();
 524     int n2 = (int)_param2.total();
 525
 526     if( disttype == UNIFORM )
 527     {
 528         _parambuf.allocate(cn*8 + n1 + n2);
 529         double* parambuf = _parambuf;
 530         double* p1 = _param1.ptr<double>();
 531         double* p2 = _param2.ptr<double>();
 532
 533         if( !_param1.isContinuous() || _param1.type() != CV_64F || n1 != cn )
 534         {
 535             Mat tmp(_param1.size(), CV_64F, parambuf);
 536             _param1.convertTo(tmp, CV_64F);
 537             p1 = parambuf;
 538             if( n1 < cn )
 539                 for( j = n1; j < cn; j++ )
 540                     p1[j] = p1[j-n1];
 541         }
 542
 543         if( !_param2.isContinuous() || _param2.type() != CV_64F || n2 != cn )
 544         {
 545             Mat tmp(_param2.size(), CV_64F, parambuf + cn);
 546             _param2.convertTo(tmp, CV_64F);
 547             p2 = parambuf + cn;
 548             if( n2 < cn )
 549                 for( j = n2; j < cn; j++ )
 550                     p2[j] = p2[j-n2];
 551         }
 552
 553         if( depth <= CV_32S )
 554         {
 555             ip = (Vec2i*)(parambuf + cn*2);
 556             for( j = 0, fast_int_mode = true; j < cn; j++ )
 557             {
 558                 double a = std::min(p1[j], p2[j]);
 559                 double b = std::max(p1[j], p2[j]);
 560                 if( saturateRange )
 561                 {
 562                     a = std::max(a, depth == CV_8U || depth == CV_16U ? 0. :
 563                             depth == CV_8S ? -128. : depth == CV_16S ? -32768. : (double)INT_MIN);
 564                     b = std::min(b, depth == CV_8U ? 256. : depth == CV_16U ? 65536. :
 565                             depth == CV_8S ? 128. : depth == CV_16S ? 32768. : (double)INT_MAX);
 566                 }
 567                 ip[j][1] = cvCeil(a);
 568                 int idiff = ip[j][0] = cvFloor(b) - ip[j][1] - 1;
 569                 double diff = b - a;
 570
 571                 fast_int_mode = fast_int_mode && diff <= 4294967296. && (idiff & (idiff+1)) == 0;
 572                 if( fast_int_mode )
 573                     smallFlag = smallFlag && (idiff <= 255);
 574                 else
 575                 {
 576                     if( diff > INT_MAX )
 577                         ip[j][0] = INT_MAX;
 578                     if( a < INT_MIN/2 )
 579                         ip[j][1] = INT_MIN/2;
 580                 }
 581             }
 582
 583             if( !fast_int_mode )
 584             {
 585                 ds = (DivStruct*)(ip + cn);
 586                 for( j = 0; j < cn; j++ )
 587                 {
 588                     ds[j].delta = ip[j][1];
 589                     unsigned d = ds[j].d = (unsigned)(ip[j][0]+1);
 590                     int l = 0;
 591                     while(((uint64)1 << l) < d)
 592                         l++;
 593                     ds[j].M = (unsigned)(((uint64)1 << 32)*(((uint64)1 << l) - d)/d) + 1;
 594                     ds[j].sh1 = std::min(l, 1);
 595                     ds[j].sh2 = std::max(l - 1, 0);
 596                 }
 597             }
 598
 599             func = randTab[fast_int_mode ? 1 : 0][depth];
 600         }
 601         else
 602         {
 603             double scale = depth == CV_64F ?
 604                 5.4210108624275221700372640043497e-20 : // 2**-64
 605                 2.3283064365386962890625e-10;           // 2**-32
 606             double maxdiff = saturateRange ? (double)FLT_MAX : DBL_MAX;
 607
 608             // for each channel i compute such dparam[0][i] & dparam[1][i],
 609             // so that a signed 32/64-bit integer X is transformed to
 610             // the range [param1.val[i], param2.val[i]) using
 611             // dparam[1][i]*X + dparam[0][i]
 612             if( depth == CV_32F )
 613             {
 614                 fp = (Vec2f*)(parambuf + cn*2);
 615                 for( j = 0; j < cn; j++ )
 616                 {
 617                     fp[j][0] = (float)(std::min(maxdiff, p2[j] - p1[j])*scale);
 618                     fp[j][1] = (float)((p2[j] + p1[j])*0.5);
 619                 }
 620             }
 621             else
 622             {
 623                 dp = (Vec2d*)(parambuf + cn*2);
 624                 for( j = 0; j < cn; j++ )
 625                 {
 626                     dp[j][0] = std::min(DBL_MAX, p2[j] - p1[j])*scale;
 627                     dp[j][1] = ((p2[j] + p1[j])*0.5);
 628                 }
 629             }
 630
 631             func = randTab[0][depth];
 632         }
 633         CV_Assert( func != 0 );
 634     }
 635     else if( disttype == CV_RAND_NORMAL )
 636     {
 637         _parambuf.allocate(MAX(n1, cn) + MAX(n2, cn));
 638         double* parambuf = _parambuf;
 639
 640         int ptype = depth == CV_64F ? CV_64F : CV_32F;
 641         int esz = (int)CV_ELEM_SIZE(ptype);
 642
 643         if( _param1.isContinuous() && _param1.type() == ptype && n1 >= cn)
 644             mean = _param1.ptr();
 645         else
 646         {
 647             Mat tmp(_param1.size(), ptype, parambuf);
 648             _param1.convertTo(tmp, ptype);
 649             mean = (uchar*)parambuf;
 650         }
 651
 652         if( n1 < cn )
 653             for( j = n1*esz; j < cn*esz; j++ )
 654                 mean[j] = mean[j - n1*esz];
 655
 656         if( _param2.isContinuous() && _param2.type() == ptype && n2 >= cn)
 657             stddev = _param2.ptr();
 658         else
 659         {
 660             Mat tmp(_param2.size(), ptype, parambuf + MAX(n1, cn));
 661             _param2.convertTo(tmp, ptype);
 662             stddev = (uchar*)(parambuf + MAX(n1, cn));
 663         }
 664
 665         if( n2 < cn )
 666             for( j = n2*esz; j < cn*esz; j++ )
 667                 stddev[j] = stddev[j - n2*esz];
 668
 669         stdmtx = _param2.rows == cn && _param2.cols == cn;
 670         scaleFunc = randnScaleTab[depth];
 671         CV_Assert( scaleFunc != 0 );
 672     }
 673     else
 674         CV_Error( CV_StsBadArg, "Unknown distribution type" );
 675
 676     const Mat* arrays[] = {&mat, 0};
 677     uchar* ptr;
 678     NAryMatIterator it(arrays, &ptr, 1);
 679     int total = (int)it.size, blockSize = std::min((BLOCK_SIZE + cn - 1)/cn, total);
 680     size_t esz = mat.elemSize();
 681     AutoBuffer<double> buf;
 682     uchar* param = 0;
 683     float* nbuf = 0;
 684
 685     if( disttype == UNIFORM )
 686     {
 687         buf.allocate(blockSize*cn*4);
 688         param = (uchar*)(double*)buf;
 689
 690         if( depth <= CV_32S )
 691         {
 692             if( !fast_int_mode )
 693             {
 694                 DivStruct* p = (DivStruct*)param;
 695                 for( j = 0; j < blockSize*cn; j += cn )
 696                     for( k = 0; k < cn; k++ )
 697                         p[j + k] = ds[k];
 698             }
 699             else
 700             {
 701                 Vec2i* p = (Vec2i*)param;
 702                 for( j = 0; j < blockSize*cn; j += cn )
 703                     for( k = 0; k < cn; k++ )
 704                         p[j + k] = ip[k];
 705             }
 706         }
 707         else if( depth == CV_32F )
 708         {
 709             Vec2f* p = (Vec2f*)param;
 710             for( j = 0; j < blockSize*cn; j += cn )
 711                 for( k = 0; k < cn; k++ )
 712                     p[j + k] = fp[k];
 713         }
 714         else
 715         {
 716             Vec2d* p = (Vec2d*)param;
 717             for( j = 0; j < blockSize*cn; j += cn )
 718                 for( k = 0; k < cn; k++ )
 719                     p[j + k] = dp[k];
 720         }
 721     }
 722     else
 723     {
 724         buf.allocate((blockSize*cn+1)/2);
 725         nbuf = (float*)(double*)buf;
 726     }
 727
 728     for( size_t i = 0; i < it.nplanes; i++, ++it )
 729     {
 730         for( j = 0; j < total; j += blockSize )
 731         {
 732             int len = std::min(total - j, blockSize);
 733
 734             if( disttype == CV_RAND_UNI )
 735                 func( ptr, len*cn, &state, param, smallFlag );
 736             else
 737             {
 738                 randn_0_1_32f(nbuf, len*cn, &state);
 739                 scaleFunc(nbuf, ptr, len, cn, mean, stddev, stdmtx);
 740             }
 741             ptr += len*esz;
 742         }
 743     }
 744 }
 745
 746 }
 747
 748 cv::RNG& cv::theRNG()
 749 {
 750     return getCoreTlsData().get()->rng;
 751 }
 752
 753 void cv::setRNGSeed(int seed)
 754 {
 755     theRNG() = RNG(static_cast<uint64>(seed));
 756 }
 757
 758
 759 void cv::randu(InputOutputArray dst, InputArray low, InputArray high)
 760 {
 761     CV_INSTRUMENT_REGION()
 762
 763     theRNG().fill(dst, RNG::UNIFORM, low, high);
 764 }
 765
 766 void cv::randn(InputOutputArray dst, InputArray mean, InputArray stddev)
 767 {
 768     CV_INSTRUMENT_REGION()
 769
 770     theRNG().fill(dst, RNG::NORMAL, mean, stddev);
 771 }
 772
 773 namespace cv
 774 {
 775
 776 template<typename T> static void
 777 randShuffle_( Mat& _arr, RNG& rng, double )
 778 {
 779     unsigned sz = (unsigned)_arr.total();
 780     if( _arr.isContinuous() )
 781     {
 782         T* arr = _arr.ptr<T>();
 783         for( unsigned i = 0; i < sz; i++ )
 784         {
 785             unsigned j = (unsigned)rng % sz;
 786             std::swap( arr[j], arr[i] );
 787         }
 788     }
 789     else
 790     {
 791         CV_Assert( _arr.dims <= 2 );
 792         uchar* data = _arr.ptr();
 793         size_t step = _arr.step;
 794         int rows = _arr.rows;
 795         int cols = _arr.cols;
 796         for( int i0 = 0; i0 < rows; i0++ )
 797         {
 798             T* p = _arr.ptr<T>(i0);
 799             for( int j0 = 0; j0 < cols; j0++ )
 800             {
 801                 unsigned k1 = (unsigned)rng % sz;
 802                 int i1 = (int)(k1 / cols);
 803                 int j1 = (int)(k1 - (unsigned)i1*(unsigned)cols);
 804                 std::swap( p[j0], ((T*)(data + step*i1))[j1] );
 805             }
 806         }
 807     }
 808 }
 809
 810 typedef void (*RandShuffleFunc)( Mat& dst, RNG& rng, double iterFactor );
 811
 812 }
 813
 814 void cv::randShuffle( InputOutputArray _dst, double iterFactor, RNG* _rng )
 815 {
 816     CV_INSTRUMENT_REGION()
 817
 818     RandShuffleFunc tab[] =
 819     {
 820         0,
 821         randShuffle_<uchar>, // 1
 822         randShuffle_<ushort>, // 2
 823         randShuffle_<Vec<uchar,3> >, // 3
 824         randShuffle_<int>, // 4
 825         0,
 826         randShuffle_<Vec<ushort,3> >, // 6
 827         0,
 828         randShuffle_<Vec<int,2> >, // 8
 829         0, 0, 0,
 830         randShuffle_<Vec<int,3> >, // 12
 831         0, 0, 0,
 832         randShuffle_<Vec<int,4> >, // 16
 833         0, 0, 0, 0, 0, 0, 0,
 834         randShuffle_<Vec<int,6> >, // 24
 835         0, 0, 0, 0, 0, 0, 0,
 836         randShuffle_<Vec<int,8> > // 32
 837     };
 838
 839     Mat dst = _dst.getMat();
 840     RNG& rng = _rng ? *_rng : theRNG();
 841     CV_Assert( dst.elemSize() <= 32 );
 842     RandShuffleFunc func = tab[dst.elemSize()];
 843     CV_Assert( func != 0 );
 844     func( dst, rng, iterFactor );
 845 }
 846
 847 CV_IMPL void
 848 cvRandArr( CvRNG* _rng, CvArr* arr, int disttype, CvScalar param1, CvScalar param2 )
 849 {
 850     cv::Mat mat = cv::cvarrToMat(arr);
 851     // !!! this will only work for current 64-bit MWC RNG !!!
 852     cv::RNG& rng = _rng ? (cv::RNG&)*_rng : cv::theRNG();
 853     rng.fill(mat, disttype == CV_RAND_NORMAL ?
 854         cv::RNG::NORMAL : cv::RNG::UNIFORM, cv::Scalar(param1), cv::Scalar(param2) );
 855 }
 856
 857 CV_IMPL void cvRandShuffle( CvArr* arr, CvRNG* _rng, double iter_factor )
 858 {
 859     cv::Mat dst = cv::cvarrToMat(arr);
 860     cv::RNG& rng = _rng ? (cv::RNG&)*_rng : cv::theRNG();
 861     cv::randShuffle( dst, iter_factor, &rng );
 862 }
 863
 864 // Mersenne Twister random number generator.
 865 // Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c
 866
 867 /*
 868    A C-program for MT19937, with initialization improved 2002/1/26.
 869    Coded by Takuji Nishimura and Makoto Matsumoto.
 870
 871    Before using, initialize the state by using init_genrand(seed)
 872    or init_by_array(init_key, key_length).
 873
 874    Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
 875    All rights reserved.
 876
 877    Redistribution and use in source and binary forms, with or without
 878    modification, are permitted provided that the following conditions
 879    are met:
 880
 881      1. Redistributions of source code must retain the above copyright
 882         notice, this list of conditions and the following disclaimer.
 883
 884      2. Redistributions in binary form must reproduce the above copyright
 885         notice, this list of conditions and the following disclaimer in the
 886         documentation and/or other materials provided with the distribution.
 887
 888      3. The names of its contributors may not be used to endorse or promote
 889         products derived from this software without specific prior written
 890         permission.
 891
 892    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 893    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 894    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 895    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 896    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 897    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 898    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 899    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 900    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 901    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 902    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 903
 904
 905    Any feedback is very welcome.
 906    http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
 907    email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
 908 */
 909
 910 cv::RNG_MT19937::RNG_MT19937(unsigned s) { seed(s); }
 911
 912 cv::RNG_MT19937::RNG_MT19937() { seed(5489U); }
 913
 914 void cv::RNG_MT19937::seed(unsigned s)
 915 {
 916     state[0]= s;
 917     for (mti = 1; mti < N; mti++)
 918     {
 919         /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
 920         state[mti] = (1812433253U * (state[mti - 1] ^ (state[mti - 1] >> 30)) + mti);
 921     }
 922 }
 923
 924 unsigned cv::RNG_MT19937::next()
 925 {
 926     /* mag01[x] = x * MATRIX_A  for x=0,1 */
 927     static unsigned mag01[2] = { 0x0U, /*MATRIX_A*/ 0x9908b0dfU};
 928
 929     const unsigned UPPER_MASK = 0x80000000U;
 930     const unsigned LOWER_MASK = 0x7fffffffU;
 931
 932     /* generate N words at one time */
 933     if (mti >= N)
 934     {
 935         int kk = 0;
 936
 937         for (; kk < N - M; ++kk)
 938         {
 939             unsigned y = (state[kk] & UPPER_MASK) | (state[kk + 1] & LOWER_MASK);
 940             state[kk] = state[kk + M] ^ (y >> 1) ^ mag01[y & 0x1U];
 941         }
 942
 943         for (; kk < N - 1; ++kk)
 944         {
 945             unsigned y = (state[kk] & UPPER_MASK) | (state[kk + 1] & LOWER_MASK);
 946             state[kk] = state[kk + (M - N)] ^ (y >> 1) ^ mag01[y & 0x1U];
 947         }
 948
 949         unsigned y = (state[N - 1] & UPPER_MASK) | (state[0] & LOWER_MASK);
 950         state[N - 1] = state[M - 1] ^ (y >> 1) ^ mag01[y & 0x1U];
 951
 952         mti = 0;
 953     }
 954
 955     unsigned y = state[mti++];
 956
 957     /* Tempering */
 958     y ^= (y >> 11);
 959     y ^= (y <<  7) & 0x9d2c5680U;
 960     y ^= (y << 15) & 0xefc60000U;
 961     y ^= (y >> 18);
 962
 963     return y;
 964 }
 965
 966 cv::RNG_MT19937::operator unsigned() { return next(); }
 967
 968 cv::RNG_MT19937::operator int() { return (int)next();}
 969
 970 cv::RNG_MT19937::operator float() { return next() * (1.f / 4294967296.f); }
 971
 972 cv::RNG_MT19937::operator double()
 973 {
 974     unsigned a = next() >> 5;
 975     unsigned b = next() >> 6;
 976     return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
 977 }
 978
 979 int cv::RNG_MT19937::uniform(int a, int b) { return (int)(next() % (b - a) + a); }
 980
 981 float cv::RNG_MT19937::uniform(float a, float b) { return ((float)*this)*(b - a) + a; }
 982
 983 double cv::RNG_MT19937::uniform(double a, double b) { return ((double)*this)*(b - a) + a; }
 984
 985 unsigned cv::RNG_MT19937::operator ()(unsigned b) { return next() % b; }
 986
 987 unsigned cv::RNG_MT19937::operator ()() { return next(); }
 988
 989 /* End of file. */