modules/core/src/rand.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 /* ////////////////////////////////////////////////////////////////////
  44 //
  45 //  Filling CvMat/IplImage instances with random numbers
  46 //
  47 // */
  48
  49 #include "precomp.hpp"
  50
  51 #if defined WIN32 || defined WINCE
  52     #include <windows.h>
  53     #undef small
  54     #undef min
  55     #undef max
  56     #undef abs
  57 #endif
  58
  59 #if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
  60     #include "emmintrin.h"
  61 #endif
  62
  63 namespace cv
  64 {
  65
  66 ///////////////////////////// Functions Declaration //////////////////////////////////////
  67
  68 /*
  69    Multiply-with-carry generator is used here:
  70    temp = ( A*X(n) + carry )
  71    X(n+1) = temp mod (2^32)
  72    carry = temp / (2^32)
  73 */
  74
  75 #define  RNG_NEXT(x)    ((uint64)(unsigned)(x)*CV_RNG_COEFF + ((x) >> 32))
  76
  77 /***************************************************************************************\
  78 *                           Pseudo-Random Number Generators (PRNGs)                     *
  79 \***************************************************************************************/
  80
  81 template<typename T> static void
  82 randBits_( T* arr, int len, uint64* state, const Vec2i* p, bool small_flag )
  83 {
  84     uint64 temp = *state;
  85     int i;
  86
  87     if( !small_flag )
  88     {
  89         for( i = 0; i <= len - 4; i += 4 )
  90         {
  91             int t0, t1;
  92
  93             temp = RNG_NEXT(temp);
  94             t0 = ((int)temp & p[i][0]) + p[i][1];
  95             temp = RNG_NEXT(temp);
  96             t1 = ((int)temp & p[i+1][0]) + p[i+1][1];
  97             arr[i] = saturate_cast<T>(t0);
  98             arr[i+1] = saturate_cast<T>(t1);
  99
 100             temp = RNG_NEXT(temp);
 101             t0 = ((int)temp & p[i+2][0]) + p[i+2][1];
 102             temp = RNG_NEXT(temp);
 103             t1 = ((int)temp & p[i+3][0]) + p[i+3][1];
 104             arr[i+2] = saturate_cast<T>(t0);
 105             arr[i+3] = saturate_cast<T>(t1);
 106         }
 107     }
 108     else
 109     {
 110         for( i = 0; i <= len - 4; i += 4 )
 111         {
 112             int t0, t1, t;
 113             temp = RNG_NEXT(temp);
 114             t = (int)temp;
 115             t0 = (t & p[i][0]) + p[i][1];
 116             t1 = ((t >> 8) & p[i+1][0]) + p[i+1][1];
 117             arr[i] = saturate_cast<T>(t0);
 118             arr[i+1] = saturate_cast<T>(t1);
 119
 120             t0 = ((t >> 16) & p[i+2][0]) + p[i+2][1];
 121             t1 = ((t >> 24) & p[i+3][0]) + p[i+3][1];
 122             arr[i+2] = saturate_cast<T>(t0);
 123             arr[i+3] = saturate_cast<T>(t1);
 124         }
 125     }
 126
 127     for( ; i < len; i++ )
 128     {
 129         int t0;
 130         temp = RNG_NEXT(temp);
 131
 132         t0 = ((int)temp & p[i][0]) + p[i][1];
 133         arr[i] = saturate_cast<T>(t0);
 134     }
 135
 136     *state = temp;
 137 }
 138
 139 struct DivStruct
 140 {
 141     unsigned d;
 142     unsigned M;
 143     int sh1, sh2;
 144     int delta;
 145 };
 146
 147 template<typename T> static void
 148 randi_( T* arr, int len, uint64* state, const DivStruct* p )
 149 {
 150     uint64 temp = *state;
 151     int i = 0;
 152     unsigned t0, t1, v0, v1;
 153
 154     for( i = 0; i <= len - 4; i += 4 )
 155     {
 156         temp = RNG_NEXT(temp);
 157         t0 = (unsigned)temp;
 158         temp = RNG_NEXT(temp);
 159         t1 = (unsigned)temp;
 160         v0 = (unsigned)(((uint64)t0 * p[i].M) >> 32);
 161         v1 = (unsigned)(((uint64)t1 * p[i+1].M) >> 32);
 162         v0 = (v0 + ((t0 - v0) >> p[i].sh1)) >> p[i].sh2;
 163         v1 = (v1 + ((t1 - v1) >> p[i+1].sh1)) >> p[i+1].sh2;
 164         v0 = t0 - v0*p[i].d + p[i].delta;
 165         v1 = t1 - v1*p[i+1].d + p[i+1].delta;
 166         arr[i] = saturate_cast<T>((int)v0);
 167         arr[i+1] = saturate_cast<T>((int)v1);
 168
 169         temp = RNG_NEXT(temp);
 170         t0 = (unsigned)temp;
 171         temp = RNG_NEXT(temp);
 172         t1 = (unsigned)temp;
 173         v0 = (unsigned)(((uint64)t0 * p[i+2].M) >> 32);
 174         v1 = (unsigned)(((uint64)t1 * p[i+3].M) >> 32);
 175         v0 = (v0 + ((t0 - v0) >> p[i+2].sh1)) >> p[i+2].sh2;
 176         v1 = (v1 + ((t1 - v1) >> p[i+3].sh1)) >> p[i+3].sh2;
 177         v0 = t0 - v0*p[i+2].d + p[i+2].delta;
 178         v1 = t1 - v1*p[i+3].d + p[i+3].delta;
 179         arr[i+2] = saturate_cast<T>((int)v0);
 180         arr[i+3] = saturate_cast<T>((int)v1);
 181     }
 182
 183     for( ; i < len; i++ )
 184     {
 185         temp = RNG_NEXT(temp);
 186         t0 = (unsigned)temp;
 187         v0 = (unsigned)(((uint64)t0 * p[i].M) >> 32);
 188         v0 = (v0 + ((t0 - v0) >> p[i].sh1)) >> p[i].sh2;
 189         v0 = t0 - v0*p[i].d + p[i].delta;
 190         arr[i] = saturate_cast<T>((int)v0);
 191     }
 192
 193     *state = temp;
 194 }
 195
 196
 197 #define DEF_RANDI_FUNC(suffix, type) \
 198 static void randBits_##suffix(type* arr, int len, uint64* state, \
 199                               const Vec2i* p, bool small_flag) \
 200 { randBits_(arr, len, state, p, small_flag); } \
 201 \
 202 static void randi_##suffix(type* arr, int len, uint64* state, \
 203                            const DivStruct* p, bool ) \
 204 { randi_(arr, len, state, p); }
 205
 206 DEF_RANDI_FUNC(8u, uchar)
 207 DEF_RANDI_FUNC(8s, schar)
 208 DEF_RANDI_FUNC(16u, ushort)
 209 DEF_RANDI_FUNC(16s, short)
 210 DEF_RANDI_FUNC(32s, int)
 211
 212 static void randf_32f( float* arr, int len, uint64* state, const Vec2f* p, bool )
 213 {
 214     uint64 temp = *state;
 215     int i = 0;
 216
 217     for( ; i <= len - 4; i += 4 )
 218     {
 219         float f[4];
 220         f[0] = (float)(int)(temp = RNG_NEXT(temp));
 221         f[1] = (float)(int)(temp = RNG_NEXT(temp));
 222         f[2] = (float)(int)(temp = RNG_NEXT(temp));
 223         f[3] = (float)(int)(temp = RNG_NEXT(temp));
 224
 225         // handwritten SSE is required not for performance but for numerical stability!
 226         // both 32-bit gcc and MSVC compilers trend to generate double precision SSE
 227         // while 64-bit compilers generate single precision SIMD instructions
 228         // so manual vectorisation forces all compilers to the single precision
 229 #if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
 230         __m128 q0 = _mm_loadu_ps((const float*)(p + i));
 231         __m128 q1 = _mm_loadu_ps((const float*)(p + i + 2));
 232
 233         __m128 q01l = _mm_unpacklo_ps(q0, q1);
 234         __m128 q01h = _mm_unpackhi_ps(q0, q1);
 235
 236         __m128 p0 = _mm_unpacklo_ps(q01l, q01h);
 237         __m128 p1 = _mm_unpackhi_ps(q01l, q01h);
 238
 239         _mm_storeu_ps(arr + i, _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(f), p0), p1));
 240 #else
 241         arr[i+0] = f[0]*p[i+0][0] + p[i+0][1];
 242         arr[i+1] = f[1]*p[i+1][0] + p[i+1][1];
 243         arr[i+2] = f[2]*p[i+2][0] + p[i+2][1];
 244         arr[i+3] = f[3]*p[i+3][0] + p[i+3][1];
 245 #endif
 246     }
 247
 248     for( ; i < len; i++ )
 249     {
 250         temp = RNG_NEXT(temp);
 251 #if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
 252         _mm_store_ss(arr + i, _mm_add_ss(
 253                 _mm_mul_ss(_mm_set_ss((float)(int)temp), _mm_set_ss(p[i][0])),
 254                 _mm_set_ss(p[i][1]))
 255                 );
 256 #else
 257         arr[i] = (int)temp*p[i][0] + p[i][1];
 258 #endif
 259     }
 260
 261     *state = temp;
 262 }
 263
 264
 265 static void
 266 randf_64f( double* arr, int len, uint64* state, const Vec2d* p, bool )
 267 {
 268     uint64 temp = *state;
 269     int64 v = 0;
 270     int i;
 271
 272     for( i = 0; i <= len - 4; i += 4 )
 273     {
 274         double f0, f1;
 275
 276         temp = RNG_NEXT(temp);
 277         v = (temp >> 32)|(temp << 32);
 278         f0 = v*p[i][0] + p[i][1];
 279         temp = RNG_NEXT(temp);
 280         v = (temp >> 32)|(temp << 32);
 281         f1 = v*p[i+1][0] + p[i+1][1];
 282         arr[i] = f0; arr[i+1] = f1;
 283
 284         temp = RNG_NEXT(temp);
 285         v = (temp >> 32)|(temp << 32);
 286         f0 = v*p[i+2][0] + p[i+2][1];
 287         temp = RNG_NEXT(temp);
 288         v = (temp >> 32)|(temp << 32);
 289         f1 = v*p[i+3][0] + p[i+3][1];
 290         arr[i+2] = f0; arr[i+3] = f1;
 291     }
 292
 293     for( ; i < len; i++ )
 294     {
 295         temp = RNG_NEXT(temp);
 296         v = (temp >> 32)|(temp << 32);
 297         arr[i] = v*p[i][0] + p[i][1];
 298     }
 299
 300     *state = temp;
 301 }
 302
 303 typedef void (*RandFunc)(uchar* arr, int len, uint64* state, const void* p, bool small_flag);
 304
 305
 306 static RandFunc randTab[][8] =
 307 {
 308     {
 309         (RandFunc)randi_8u, (RandFunc)randi_8s, (RandFunc)randi_16u, (RandFunc)randi_16s,
 310         (RandFunc)randi_32s, (RandFunc)randf_32f, (RandFunc)randf_64f, 0
 311     },
 312     {
 313         (RandFunc)randBits_8u, (RandFunc)randBits_8s, (RandFunc)randBits_16u, (RandFunc)randBits_16s,
 314         (RandFunc)randBits_32s, 0, 0, 0
 315     }
 316 };
 317
 318 /*
 319    The code below implements the algorithm described in
 320    "The Ziggurat Method for Generating Random Variables"
 321    by Marsaglia and Tsang, Journal of Statistical Software.
 322 */
 323 static void
 324 randn_0_1_32f( float* arr, int len, uint64* state )
 325 {
 326     const float r = 3.442620f; // The start of the right tail
 327     const float rng_flt = 2.3283064365386962890625e-10f; // 2^-32
 328     static unsigned kn[128];
 329     static float wn[128], fn[128];
 330     uint64 temp = *state;
 331     static bool initialized=false;
 332     int i;
 333
 334     if( !initialized )
 335     {
 336         const double m1 = 2147483648.0;
 337         double dn = 3.442619855899, tn = dn, vn = 9.91256303526217e-3;
 338
 339         // Set up the tables
 340         double q = vn/std::exp(-.5*dn*dn);
 341         kn[0] = (unsigned)((dn/q)*m1);
 342         kn[1] = 0;
 343
 344         wn[0] = (float)(q/m1);
 345         wn[127] = (float)(dn/m1);
 346
 347         fn[0] = 1.f;
 348         fn[127] = (float)std::exp(-.5*dn*dn);
 349
 350         for(i=126;i>=1;i--)
 351         {
 352             dn = std::sqrt(-2.*std::log(vn/dn+std::exp(-.5*dn*dn)));
 353             kn[i+1] = (unsigned)((dn/tn)*m1);
 354             tn = dn;
 355             fn[i] = (float)std::exp(-.5*dn*dn);
 356             wn[i] = (float)(dn/m1);
 357         }
 358         initialized = true;
 359     }
 360
 361     for( i = 0; i < len; i++ )
 362     {
 363         float x, y;
 364         for(;;)
 365         {
 366             int hz = (int)temp;
 367             temp = RNG_NEXT(temp);
 368             int iz = hz & 127;
 369             x = hz*wn[iz];
 370             if( (unsigned)std::abs(hz) < kn[iz] )
 371                 break;
 372             if( iz == 0) // iz==0, handles the base strip
 373             {
 374                 do
 375                 {
 376                     x = (unsigned)temp*rng_flt;
 377                     temp = RNG_NEXT(temp);
 378                     y = (unsigned)temp*rng_flt;
 379                     temp = RNG_NEXT(temp);
 380                     x = (float)(-std::log(x+FLT_MIN)*0.2904764);
 381                     y = (float)-std::log(y+FLT_MIN);
 382                 }       // .2904764 is 1/r
 383                 while( y + y < x*x );
 384                 x = hz > 0 ? r + x : -r - x;
 385                 break;
 386             }
 387             // iz > 0, handle the wedges of other strips
 388             y = (unsigned)temp*rng_flt;
 389             temp = RNG_NEXT(temp);
 390             if( fn[iz] + y*(fn[iz - 1] - fn[iz]) < std::exp(-.5*x*x) )
 391                 break;
 392         }
 393         arr[i] = x;
 394     }
 395     *state = temp;
 396 }
 397
 398
 399 double RNG::gaussian(double sigma)
 400 {
 401     float temp;
 402     randn_0_1_32f( &temp, 1, &state );
 403     return temp*sigma;
 404 }
 405
 406
 407 template<typename T, typename PT> static void
 408 randnScale_( const float* src, T* dst, int len, int cn, const PT* mean, const PT* stddev, bool stdmtx )
 409 {
 410     int i, j, k;
 411     if( !stdmtx )
 412     {
 413         if( cn == 1 )
 414         {
 415             PT b = mean[0], a = stddev[0];
 416             for( i = 0; i < len; i++ )
 417                 dst[i] = saturate_cast<T>(src[i]*a + b);
 418         }
 419         else
 420         {
 421             for( i = 0; i < len; i++, src += cn, dst += cn )
 422                 for( k = 0; k < cn; k++ )
 423                     dst[k] = saturate_cast<T>(src[k]*stddev[k] + mean[k]);
 424         }
 425     }
 426     else
 427     {
 428         for( i = 0; i < len; i++, src += cn, dst += cn )
 429         {
 430             for( j = 0; j < cn; j++ )
 431             {
 432                 PT s = mean[j];
 433                 for( k = 0; k < cn; k++ )
 434                     s += src[k]*stddev[j*cn + k];
 435                 dst[j] = saturate_cast<T>(s);
 436             }
 437         }
 438     }
 439 }
 440
 441 static void randnScale_8u( const float* src, uchar* dst, int len, int cn,
 442                             const float* mean, const float* stddev, bool stdmtx )
 443 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 444
 445 static void randnScale_8s( const float* src, schar* dst, int len, int cn,
 446                             const float* mean, const float* stddev, bool stdmtx )
 447 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 448
 449 static void randnScale_16u( const float* src, ushort* dst, int len, int cn,
 450                              const float* mean, const float* stddev, bool stdmtx )
 451 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 452
 453 static void randnScale_16s( const float* src, short* dst, int len, int cn,
 454                              const float* mean, const float* stddev, bool stdmtx )
 455 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 456
 457 static void randnScale_32s( const float* src, int* dst, int len, int cn,
 458                              const float* mean, const float* stddev, bool stdmtx )
 459 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 460
 461 static void randnScale_32f( const float* src, float* dst, int len, int cn,
 462                              const float* mean, const float* stddev, bool stdmtx )
 463 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 464
 465 static void randnScale_64f( const float* src, double* dst, int len, int cn,
 466                              const double* mean, const double* stddev, bool stdmtx )
 467 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 468
 469 typedef void (*RandnScaleFunc)(const float* src, uchar* dst, int len, int cn,
 470                                const uchar*, const uchar*, bool);
 471
 472 static RandnScaleFunc randnScaleTab[] =
 473 {
 474     (RandnScaleFunc)randnScale_8u, (RandnScaleFunc)randnScale_8s, (RandnScaleFunc)randnScale_16u,
 475     (RandnScaleFunc)randnScale_16s, (RandnScaleFunc)randnScale_32s, (RandnScaleFunc)randnScale_32f,
 476     (RandnScaleFunc)randnScale_64f, 0
 477 };
 478
 479 void RNG::fill( InputOutputArray _mat, int disttype,
 480                 InputArray _param1arg, InputArray _param2arg, bool saturateRange )
 481 {
 482     Mat mat = _mat.getMat(), _param1 = _param1arg.getMat(), _param2 = _param2arg.getMat();
 483     int depth = mat.depth(), cn = mat.channels();
 484     AutoBuffer<double> _parambuf;
 485     int j, k, fast_int_mode = 0, smallFlag = 1;
 486     RandFunc func = 0;
 487     RandnScaleFunc scaleFunc = 0;
 488
 489     CV_Assert(_param1.channels() == 1 && (_param1.rows == 1 || _param1.cols == 1) &&
 490               (_param1.rows + _param1.cols - 1 == cn || _param1.rows + _param1.cols - 1 == 1 ||
 491                (_param1.size() == Size(1, 4) && _param1.type() == CV_64F && cn <= 4)));
 492     CV_Assert( _param2.channels() == 1 &&
 493                (((_param2.rows == 1 || _param2.cols == 1) &&
 494                 (_param2.rows + _param2.cols - 1 == cn || _param2.rows + _param2.cols - 1 == 1 ||
 495                 (_param1.size() == Size(1, 4) && _param1.type() == CV_64F && cn <= 4))) ||
 496                 (_param2.rows == cn && _param2.cols == cn && disttype == NORMAL)));
 497
 498     Vec2i* ip = 0;
 499     Vec2d* dp = 0;
 500     Vec2f* fp = 0;
 501     DivStruct* ds = 0;
 502     uchar* mean = 0;
 503     uchar* stddev = 0;
 504     bool stdmtx = false;
 505     int n1 = (int)_param1.total();
 506     int n2 = (int)_param2.total();
 507
 508     if( disttype == UNIFORM )
 509     {
 510         _parambuf.allocate(cn*8 + n1 + n2);
 511         double* parambuf = _parambuf;
 512         double* p1 = (double*)_param1.data;
 513         double* p2 = (double*)_param2.data;
 514
 515         if( !_param1.isContinuous() || _param1.type() != CV_64F || n1 != cn )
 516         {
 517             Mat tmp(_param1.size(), CV_64F, parambuf);
 518             _param1.convertTo(tmp, CV_64F);
 519             p1 = parambuf;
 520             if( n1 < cn )
 521                 for( j = n1; j < cn; j++ )
 522                     p1[j] = p1[j-n1];
 523         }
 524
 525         if( !_param2.isContinuous() || _param2.type() != CV_64F || n2 != cn )
 526         {
 527             Mat tmp(_param2.size(), CV_64F, parambuf + cn);
 528             _param2.convertTo(tmp, CV_64F);
 529             p2 = parambuf + cn;
 530             if( n2 < cn )
 531                 for( j = n2; j < cn; j++ )
 532                     p2[j] = p2[j-n2];
 533         }
 534
 535         if( depth <= CV_32S )
 536         {
 537             ip = (Vec2i*)(parambuf + cn*2);
 538             for( j = 0, fast_int_mode = 1; j < cn; j++ )
 539             {
 540                 double a = min(p1[j], p2[j]);
 541                 double b = max(p1[j], p2[j]);
 542                 if( saturateRange )
 543                 {
 544                     a = max(a, depth == CV_8U || depth == CV_16U ? 0. :
 545                             depth == CV_8S ? -128. : depth == CV_16S ? -32768. : (double)INT_MIN);
 546                     b = min(b, depth == CV_8U ? 256. : depth == CV_16U ? 65536. :
 547                             depth == CV_8S ? 128. : depth == CV_16S ? 32768. : (double)INT_MAX);
 548                 }
 549                 ip[j][1] = cvCeil(a);
 550                 int idiff = ip[j][0] = cvFloor(b) - ip[j][1] - 1;
 551                 double diff = b - a;
 552
 553                 fast_int_mode &= diff <= 4294967296. && (idiff & (idiff+1)) == 0;
 554                 if( fast_int_mode )
 555                     smallFlag &= idiff <= 255;
 556                 else
 557                 {
 558                     if( diff > INT_MAX )
 559                         ip[j][0] = INT_MAX;
 560                     if( a < INT_MIN/2 )
 561                         ip[j][1] = INT_MIN/2;
 562                 }
 563             }
 564
 565             if( !fast_int_mode )
 566             {
 567                 ds = (DivStruct*)(ip + cn);
 568                 for( j = 0; j < cn; j++ )
 569                 {
 570                     ds[j].delta = ip[j][1];
 571                     unsigned d = ds[j].d = (unsigned)(ip[j][0]+1);
 572                     int l = 0;
 573                     while(((uint64)1 << l) < d)
 574                         l++;
 575                     ds[j].M = (unsigned)(((uint64)1 << 32)*(((uint64)1 << l) - d)/d) + 1;
 576                     ds[j].sh1 = min(l, 1);
 577                     ds[j].sh2 = max(l - 1, 0);
 578                 }
 579             }
 580
 581             func = randTab[fast_int_mode][depth];
 582         }
 583         else
 584         {
 585             double scale = depth == CV_64F ?
 586                 5.4210108624275221700372640043497e-20 : // 2**-64
 587                 2.3283064365386962890625e-10;           // 2**-32
 588             double maxdiff = saturateRange ? (double)FLT_MAX : DBL_MAX;
 589
 590             // for each channel i compute such dparam[0][i] & dparam[1][i],
 591             // so that a signed 32/64-bit integer X is transformed to
 592             // the range [param1.val[i], param2.val[i]) using
 593             // dparam[1][i]*X + dparam[0][i]
 594             if( depth == CV_32F )
 595             {
 596                 fp = (Vec2f*)(parambuf + cn*2);
 597                 for( j = 0; j < cn; j++ )
 598                 {
 599                     fp[j][0] = (float)(std::min(maxdiff, p2[j] - p1[j])*scale);
 600                     fp[j][1] = (float)((p2[j] + p1[j])*0.5);
 601                 }
 602             }
 603             else
 604             {
 605                 dp = (Vec2d*)(parambuf + cn*2);
 606                 for( j = 0; j < cn; j++ )
 607                 {
 608                     dp[j][0] = std::min(DBL_MAX, p2[j] - p1[j])*scale;
 609                     dp[j][1] = ((p2[j] + p1[j])*0.5);
 610                 }
 611             }
 612
 613             func = randTab[0][depth];
 614         }
 615         CV_Assert( func != 0 );
 616     }
 617     else if( disttype == CV_RAND_NORMAL )
 618     {
 619         _parambuf.allocate(MAX(n1, cn) + MAX(n2, cn));
 620         double* parambuf = _parambuf;
 621
 622         int ptype = depth == CV_64F ? CV_64F : CV_32F;
 623         int esz = (int)CV_ELEM_SIZE(ptype);
 624
 625         if( _param1.isContinuous() && _param1.type() == ptype )
 626             mean = _param1.data;
 627         else
 628         {
 629             Mat tmp(_param1.size(), ptype, parambuf);
 630             _param1.convertTo(tmp, ptype);
 631             mean = (uchar*)parambuf;
 632         }
 633
 634         if( n1 < cn )
 635             for( j = n1*esz; j < cn*esz; j++ )
 636                 mean[j] = mean[j - n1*esz];
 637
 638         if( _param2.isContinuous() && _param2.type() == ptype )
 639             stddev = _param2.data;
 640         else
 641         {
 642             Mat tmp(_param2.size(), ptype, parambuf + cn);
 643             _param2.convertTo(tmp, ptype);
 644             stddev = (uchar*)(parambuf + cn);
 645         }
 646
 647         if( n1 < cn )
 648             for( j = n1*esz; j < cn*esz; j++ )
 649                 stddev[j] = stddev[j - n1*esz];
 650
 651         stdmtx = _param2.rows == cn && _param2.cols == cn;
 652         scaleFunc = randnScaleTab[depth];
 653         CV_Assert( scaleFunc != 0 );
 654     }
 655     else
 656         CV_Error( CV_StsBadArg, "Unknown distribution type" );
 657
 658     const Mat* arrays[] = {&mat, 0};
 659     uchar* ptr;
 660     NAryMatIterator it(arrays, &ptr);
 661     int total = (int)it.size, blockSize = std::min((BLOCK_SIZE + cn - 1)/cn, total);
 662     size_t esz = mat.elemSize();
 663     AutoBuffer<double> buf;
 664     uchar* param = 0;
 665     float* nbuf = 0;
 666
 667     if( disttype == UNIFORM )
 668     {
 669         buf.allocate(blockSize*cn*4);
 670         param = (uchar*)(double*)buf;
 671
 672         if( ip )
 673         {
 674             if( ds )
 675             {
 676                 DivStruct* p = (DivStruct*)param;
 677                 for( j = 0; j < blockSize*cn; j += cn )
 678                     for( k = 0; k < cn; k++ )
 679                         p[j + k] = ds[k];
 680             }
 681             else
 682             {
 683                 Vec2i* p = (Vec2i*)param;
 684                 for( j = 0; j < blockSize*cn; j += cn )
 685                     for( k = 0; k < cn; k++ )
 686                         p[j + k] = ip[k];
 687             }
 688         }
 689         else if( fp )
 690         {
 691             Vec2f* p = (Vec2f*)param;
 692             for( j = 0; j < blockSize*cn; j += cn )
 693                 for( k = 0; k < cn; k++ )
 694                     p[j + k] = fp[k];
 695         }
 696         else
 697         {
 698             Vec2d* p = (Vec2d*)param;
 699             for( j = 0; j < blockSize*cn; j += cn )
 700                 for( k = 0; k < cn; k++ )
 701                     p[j + k] = dp[k];
 702         }
 703     }
 704     else
 705     {
 706         buf.allocate((blockSize*cn+1)/2);
 707         nbuf = (float*)(double*)buf;
 708     }
 709
 710     for( size_t i = 0; i < it.nplanes; i++, ++it )
 711     {
 712         for( j = 0; j < total; j += blockSize )
 713         {
 714             int len = std::min(total - j, blockSize);
 715
 716             if( disttype == CV_RAND_UNI )
 717                 func( ptr, len*cn, &state, param, smallFlag != 0 );
 718             else
 719             {
 720                 randn_0_1_32f(nbuf, len*cn, &state);
 721                 scaleFunc(nbuf, ptr, len, cn, mean, stddev, stdmtx);
 722             }
 723             ptr += len*esz;
 724         }
 725     }
 726 }
 727
 728 #ifdef WIN32
 729 #ifdef WINCE
 730 #       define TLS_OUT_OF_INDEXES ((DWORD)0xFFFFFFFF)
 731 #endif
 732 static DWORD tlsRNGKey = TLS_OUT_OF_INDEXES;
 733
 734 void deleteThreadRNGData()
 735 {
 736     if( tlsRNGKey != TLS_OUT_OF_INDEXES )
 737         delete (RNG*)TlsGetValue( tlsRNGKey );
 738 }
 739
 740 RNG& theRNG()
 741 {
 742     if( tlsRNGKey == TLS_OUT_OF_INDEXES )
 743     {
 744         tlsRNGKey = TlsAlloc();
 745         CV_Assert(tlsRNGKey != TLS_OUT_OF_INDEXES);
 746     }
 747     RNG* rng = (RNG*)TlsGetValue( tlsRNGKey );
 748     if( !rng )
 749     {
 750         rng = new RNG;
 751         TlsSetValue( tlsRNGKey, rng );
 752     }
 753     return *rng;
 754 }
 755
 756 #else
 757
 758 static pthread_key_t tlsRNGKey = 0;
 759 static pthread_once_t tlsRNGKeyOnce = PTHREAD_ONCE_INIT;
 760
 761 static void deleteRNG(void* data)
 762 {
 763     delete (RNG*)data;
 764 }
 765
 766 static void makeRNGKey()
 767 {
 768     int errcode = pthread_key_create(&tlsRNGKey, deleteRNG);
 769     CV_Assert(errcode == 0);
 770 }
 771
 772 RNG& theRNG()
 773 {
 774     pthread_once(&tlsRNGKeyOnce, makeRNGKey);
 775     RNG* rng = (RNG*)pthread_getspecific(tlsRNGKey);
 776     if( !rng )
 777     {
 778         rng = new RNG;
 779         pthread_setspecific(tlsRNGKey, rng);
 780     }
 781     return *rng;
 782 }
 783
 784 #endif
 785
 786 }
 787
 788 void cv::randu(InputOutputArray dst, InputArray low, InputArray high)
 789 {
 790     theRNG().fill(dst, RNG::UNIFORM, low, high);
 791 }
 792
 793 void cv::randn(InputOutputArray dst, InputArray mean, InputArray stddev)
 794 {
 795     theRNG().fill(dst, RNG::NORMAL, mean, stddev);
 796 }
 797
 798 namespace cv
 799 {
 800
 801 template<typename T> static void
 802 randShuffle_( Mat& _arr, RNG& rng, double iterFactor )
 803 {
 804     int sz = _arr.rows*_arr.cols, iters = cvRound(iterFactor*sz);
 805     if( _arr.isContinuous() )
 806     {
 807         T* arr = (T*)_arr.data;
 808         for( int i = 0; i < iters; i++ )
 809         {
 810             int j = (unsigned)rng % sz, k = (unsigned)rng % sz;
 811             std::swap( arr[j], arr[k] );
 812         }
 813     }
 814     else
 815     {
 816         uchar* data = _arr.data;
 817         size_t step = _arr.step;
 818         int cols = _arr.cols;
 819         for( int i = 0; i < iters; i++ )
 820         {
 821             int j1 = (unsigned)rng % sz, k1 = (unsigned)rng % sz;
 822             int j0 = j1/cols, k0 = k1/cols;
 823             j1 -= j0*cols; k1 -= k0*cols;
 824             std::swap( ((T*)(data + step*j0))[j1], ((T*)(data + step*k0))[k1] );
 825         }
 826     }
 827 }
 828
 829 typedef void (*RandShuffleFunc)( Mat& dst, RNG& rng, double iterFactor );
 830
 831 }
 832
 833 void cv::randShuffle( InputOutputArray _dst, double iterFactor, RNG* _rng )
 834 {
 835     RandShuffleFunc tab[] =
 836     {
 837         0,
 838         randShuffle_<uchar>, // 1
 839         randShuffle_<ushort>, // 2
 840         randShuffle_<Vec<uchar,3> >, // 3
 841         randShuffle_<int>, // 4
 842         0,
 843         randShuffle_<Vec<ushort,3> >, // 6
 844         0,
 845         randShuffle_<Vec<int,2> >, // 8
 846         0, 0, 0,
 847         randShuffle_<Vec<int,3> >, // 12
 848         0, 0, 0,
 849         randShuffle_<Vec<int,4> >, // 16
 850         0, 0, 0, 0, 0, 0, 0,
 851         randShuffle_<Vec<int,6> >, // 24
 852         0, 0, 0, 0, 0, 0, 0,
 853         randShuffle_<Vec<int,8> > // 32
 854     };
 855
 856     Mat dst = _dst.getMat();
 857     RNG& rng = _rng ? *_rng : theRNG();
 858     CV_Assert( dst.elemSize() <= 32 );
 859     RandShuffleFunc func = tab[dst.elemSize()];
 860     CV_Assert( func != 0 );
 861     func( dst, rng, iterFactor );
 862 }
 863
 864 void cv::randShuffle_( InputOutputArray _dst, double iterFactor )
 865 {
 866     randShuffle(_dst, iterFactor);
 867 }
 868
 869 CV_IMPL void
 870 cvRandArr( CvRNG* _rng, CvArr* arr, int disttype, CvScalar param1, CvScalar param2 )
 871 {
 872     cv::Mat mat = cv::cvarrToMat(arr);
 873     // !!! this will only work for current 64-bit MWC RNG !!!
 874     cv::RNG& rng = _rng ? (cv::RNG&)*_rng : cv::theRNG();
 875     rng.fill(mat, disttype == CV_RAND_NORMAL ?
 876         cv::RNG::NORMAL : cv::RNG::UNIFORM, cv::Scalar(param1), cv::Scalar(param2) );
 877 }
 878
 879 CV_IMPL void cvRandShuffle( CvArr* arr, CvRNG* _rng, double iter_factor )
 880 {
 881     cv::Mat dst = cv::cvarrToMat(arr);
 882     cv::RNG& rng = _rng ? (cv::RNG&)*_rng : cv::theRNG();
 883     cv::randShuffle( dst, iter_factor, &rng );
 884 }
 885
 886 // Mersenne Twister random number generator.
 887 // Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c
 888
 889 /*
 890    A C-program for MT19937, with initialization improved 2002/1/26.
 891    Coded by Takuji Nishimura and Makoto Matsumoto.
 892
 893    Before using, initialize the state by using init_genrand(seed)
 894    or init_by_array(init_key, key_length).
 895
 896    Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
 897    All rights reserved.
 898
 899    Redistribution and use in source and binary forms, with or without
 900    modification, are permitted provided that the following conditions
 901    are met:
 902
 903      1. Redistributions of source code must retain the above copyright
 904         notice, this list of conditions and the following disclaimer.
 905
 906      2. Redistributions in binary form must reproduce the above copyright
 907         notice, this list of conditions and the following disclaimer in the
 908         documentation and/or other materials provided with the distribution.
 909
 910      3. The names of its contributors may not be used to endorse or promote
 911         products derived from this software without specific prior written
 912         permission.
 913
 914    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 915    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 916    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 917    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 918    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 919    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 920    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 921    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 922    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 923    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 924    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 925
 926
 927    Any feedback is very welcome.
 928    http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
 929    email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
 930 */
 931
 932 cv::RNG_MT19937::RNG_MT19937(unsigned s) { seed(s); }
 933
 934 cv::RNG_MT19937::RNG_MT19937() { seed(5489U); }
 935
 936 void cv::RNG_MT19937::seed(unsigned s)
 937 {
 938     state[0]= s;
 939     for (mti = 1; mti < N; mti++)
 940     {
 941         /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
 942         state[mti] = (1812433253U * (state[mti - 1] ^ (state[mti - 1] >> 30)) + mti);
 943     }
 944 }
 945
 946 unsigned cv::RNG_MT19937::next()
 947 {
 948     /* mag01[x] = x * MATRIX_A  for x=0,1 */
 949     static unsigned mag01[2] = { 0x0U, /*MATRIX_A*/ 0x9908b0dfU};
 950
 951     const unsigned UPPER_MASK = 0x80000000U;
 952     const unsigned LOWER_MASK = 0x7fffffffU;
 953
 954     /* generate N words at one time */
 955     if (mti >= N)
 956     {
 957         int kk = 0;
 958
 959         for (; kk < N - M; ++kk)
 960         {
 961             unsigned y = (state[kk] & UPPER_MASK) | (state[kk + 1] & LOWER_MASK);
 962             state[kk] = state[kk + M] ^ (y >> 1) ^ mag01[y & 0x1U];
 963         }
 964
 965         for (; kk < N - 1; ++kk)
 966         {
 967             unsigned y = (state[kk] & UPPER_MASK) | (state[kk + 1] & LOWER_MASK);
 968             state[kk] = state[kk + (M - N)] ^ (y >> 1) ^ mag01[y & 0x1U];
 969         }
 970
 971         unsigned y = (state[N - 1] & UPPER_MASK) | (state[0] & LOWER_MASK);
 972         state[N - 1] = state[M - 1] ^ (y >> 1) ^ mag01[y & 0x1U];
 973
 974         mti = 0;
 975     }
 976
 977     unsigned y = state[mti++];
 978
 979     /* Tempering */
 980     y ^= (y >> 11);
 981     y ^= (y <<  7) & 0x9d2c5680U;
 982     y ^= (y << 15) & 0xefc60000U;
 983     y ^= (y >> 18);
 984
 985     return y;
 986 }
 987
 988 cv::RNG_MT19937::operator unsigned() { return next(); }
 989
 990 cv::RNG_MT19937::operator int() { return (int)next();}
 991
 992 cv::RNG_MT19937::operator float() { return next() * (1.f / 4294967296.f); }
 993
 994 cv::RNG_MT19937::operator double()
 995 {
 996     unsigned a = next() >> 5;
 997     unsigned b = next() >> 6;
 998     return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
 999 }
1000
1001 int cv::RNG_MT19937::uniform(int a, int b) { return (int)(next() % (b - a) + a); }
1002
1003 float cv::RNG_MT19937::uniform(float a, float b) { return ((float)*this)*(b - a) + a; }
1004
1005 double cv::RNG_MT19937::uniform(double a, double b) { return ((double)*this)*(b - a) + a; }
1006
1007 unsigned cv::RNG_MT19937::operator ()(unsigned b) { return next() % b; }
1008
1009 unsigned cv::RNG_MT19937::operator ()() { return next(); }
1010
1011 /* End of file. */