modules/core/src/rand.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 /* ////////////////////////////////////////////////////////////////////
  44 //
  45 //  Filling CvMat/IplImage instances with random numbers
  46 //
  47 // */
  48
  49 #include "precomp.hpp"
  50
  51 #if defined WIN32 || defined WINCE
  52     #include <windows.h>
  53     #undef small
  54     #undef min
  55     #undef max
  56     #undef abs
  57 #endif
  58
  59 #if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
  60     #include "emmintrin.h"
  61 #endif
  62
  63 namespace cv
  64 {
  65
  66 ///////////////////////////// Functions Declaration //////////////////////////////////////
  67
  68 /*
  69    Multiply-with-carry generator is used here:
  70    temp = ( A*X(n) + carry )
  71    X(n+1) = temp mod (2^32)
  72    carry = temp / (2^32)
  73 */
  74
  75 #define  RNG_NEXT(x)    ((uint64)(unsigned)(x)*CV_RNG_COEFF + ((x) >> 32))
  76
  77 /***************************************************************************************\
  78 *                           Pseudo-Random Number Generators (PRNGs)                     *
  79 \***************************************************************************************/
  80
  81 template<typename T> static void
  82 randBits_( T* arr, int len, uint64* state, const Vec2i* p, bool small_flag )
  83 {
  84     uint64 temp = *state;
  85     int i;
  86
  87     if( !small_flag )
  88     {
  89         for( i = 0; i <= len - 4; i += 4 )
  90         {
  91             int t0, t1;
  92
  93             temp = RNG_NEXT(temp);
  94             t0 = ((int)temp & p[i][0]) + p[i][1];
  95             temp = RNG_NEXT(temp);
  96             t1 = ((int)temp & p[i+1][0]) + p[i+1][1];
  97             arr[i] = saturate_cast<T>(t0);
  98             arr[i+1] = saturate_cast<T>(t1);
  99
 100             temp = RNG_NEXT(temp);
 101             t0 = ((int)temp & p[i+2][0]) + p[i+2][1];
 102             temp = RNG_NEXT(temp);
 103             t1 = ((int)temp & p[i+3][0]) + p[i+3][1];
 104             arr[i+2] = saturate_cast<T>(t0);
 105             arr[i+3] = saturate_cast<T>(t1);
 106         }
 107     }
 108     else
 109     {
 110         for( i = 0; i <= len - 4; i += 4 )
 111         {
 112             int t0, t1, t;
 113             temp = RNG_NEXT(temp);
 114             t = (int)temp;
 115             t0 = (t & p[i][0]) + p[i][1];
 116             t1 = ((t >> 8) & p[i+1][0]) + p[i+1][1];
 117             arr[i] = saturate_cast<T>(t0);
 118             arr[i+1] = saturate_cast<T>(t1);
 119
 120             t0 = ((t >> 16) & p[i+2][0]) + p[i+2][1];
 121             t1 = ((t >> 24) & p[i+3][0]) + p[i+3][1];
 122             arr[i+2] = saturate_cast<T>(t0);
 123             arr[i+3] = saturate_cast<T>(t1);
 124         }
 125     }
 126
 127     for( ; i < len; i++ )
 128     {
 129         int t0;
 130         temp = RNG_NEXT(temp);
 131
 132         t0 = ((int)temp & p[i][0]) + p[i][1];
 133         arr[i] = saturate_cast<T>(t0);
 134     }
 135
 136     *state = temp;
 137 }
 138
 139 struct DivStruct
 140 {
 141     unsigned d;
 142     unsigned M;
 143     int sh1, sh2;
 144     int delta;
 145 };
 146
 147 template<typename T> static void
 148 randi_( T* arr, int len, uint64* state, const DivStruct* p )
 149 {
 150     uint64 temp = *state;
 151     int i = 0;
 152     unsigned t0, t1, v0, v1;
 153
 154     for( i = 0; i <= len - 4; i += 4 )
 155     {
 156         temp = RNG_NEXT(temp);
 157         t0 = (unsigned)temp;
 158         temp = RNG_NEXT(temp);
 159         t1 = (unsigned)temp;
 160         v0 = (unsigned)(((uint64)t0 * p[i].M) >> 32);
 161         v1 = (unsigned)(((uint64)t1 * p[i+1].M) >> 32);
 162         v0 = (v0 + ((t0 - v0) >> p[i].sh1)) >> p[i].sh2;
 163         v1 = (v1 + ((t1 - v1) >> p[i+1].sh1)) >> p[i+1].sh2;
 164         v0 = t0 - v0*p[i].d + p[i].delta;
 165         v1 = t1 - v1*p[i+1].d + p[i+1].delta;
 166         arr[i] = saturate_cast<T>((int)v0);
 167         arr[i+1] = saturate_cast<T>((int)v1);
 168
 169         temp = RNG_NEXT(temp);
 170         t0 = (unsigned)temp;
 171         temp = RNG_NEXT(temp);
 172         t1 = (unsigned)temp;
 173         v0 = (unsigned)(((uint64)t0 * p[i+2].M) >> 32);
 174         v1 = (unsigned)(((uint64)t1 * p[i+3].M) >> 32);
 175         v0 = (v0 + ((t0 - v0) >> p[i+2].sh1)) >> p[i+2].sh2;
 176         v1 = (v1 + ((t1 - v1) >> p[i+3].sh1)) >> p[i+3].sh2;
 177         v0 = t0 - v0*p[i+2].d + p[i+2].delta;
 178         v1 = t1 - v1*p[i+3].d + p[i+3].delta;
 179         arr[i+2] = saturate_cast<T>((int)v0);
 180         arr[i+3] = saturate_cast<T>((int)v1);
 181     }
 182
 183     for( ; i < len; i++ )
 184     {
 185         temp = RNG_NEXT(temp);
 186         t0 = (unsigned)temp;
 187         v0 = (unsigned)(((uint64)t0 * p[i].M) >> 32);
 188         v0 = (v0 + ((t0 - v0) >> p[i].sh1)) >> p[i].sh2;
 189         v0 = t0 - v0*p[i].d + p[i].delta;
 190         arr[i] = saturate_cast<T>((int)v0);
 191     }
 192
 193     *state = temp;
 194 }
 195
 196
 197 #define DEF_RANDI_FUNC(suffix, type) \
 198 static void randBits_##suffix(type* arr, int len, uint64* state, \
 199                               const Vec2i* p, bool small_flag) \
 200 { randBits_(arr, len, state, p, small_flag); } \
 201 \
 202 static void randi_##suffix(type* arr, int len, uint64* state, \
 203                            const DivStruct* p, bool ) \
 204 { randi_(arr, len, state, p); }
 205
 206 DEF_RANDI_FUNC(8u, uchar)
 207 DEF_RANDI_FUNC(8s, schar)
 208 DEF_RANDI_FUNC(16u, ushort)
 209 DEF_RANDI_FUNC(16s, short)
 210 DEF_RANDI_FUNC(32s, int)
 211
 212 static void randf_32f( float* arr, int len, uint64* state, const Vec2f* p, bool )
 213 {
 214     uint64 temp = *state;
 215     int i = 0;
 216
 217     for( ; i <= len - 4; i += 4 )
 218     {
 219         float f[4];
 220         f[0] = (float)(int)(temp = RNG_NEXT(temp));
 221         f[1] = (float)(int)(temp = RNG_NEXT(temp));
 222         f[2] = (float)(int)(temp = RNG_NEXT(temp));
 223         f[3] = (float)(int)(temp = RNG_NEXT(temp));
 224
 225         // handwritten SSE is required not for performance but for numerical stability!
 226         // both 32-bit gcc and MSVC compilers trend to generate double precision SSE
 227         // while 64-bit compilers generate single precision SIMD instructions
 228         // so manual vectorisation forces all compilers to the single precision
 229 #if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
 230         __m128 q0 = _mm_loadu_ps((const float*)(p + i));
 231         __m128 q1 = _mm_loadu_ps((const float*)(p + i + 2));
 232
 233         __m128 q01l = _mm_unpacklo_ps(q0, q1);
 234         __m128 q01h = _mm_unpackhi_ps(q0, q1);
 235
 236         __m128 p0 = _mm_unpacklo_ps(q01l, q01h);
 237         __m128 p1 = _mm_unpackhi_ps(q01l, q01h);
 238
 239         _mm_storeu_ps(arr + i, _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(f), p0), p1));
 240 #else
 241         arr[i+0] = f[0]*p[i+0][0] + p[i+0][1];
 242         arr[i+1] = f[1]*p[i+1][0] + p[i+1][1];
 243         arr[i+2] = f[2]*p[i+2][0] + p[i+2][1];
 244         arr[i+3] = f[3]*p[i+3][0] + p[i+3][1];
 245 #endif
 246     }
 247
 248     for( ; i < len; i++ )
 249     {
 250         temp = RNG_NEXT(temp);
 251 #if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
 252         _mm_store_ss(arr + i, _mm_add_ss(
 253                 _mm_mul_ss(_mm_set_ss((float)(int)temp), _mm_set_ss(p[i][0])),
 254                 _mm_set_ss(p[i][1]))
 255                 );
 256 #else
 257         arr[i] = (int)temp*p[i][0] + p[i][1];
 258 #endif
 259     }
 260
 261     *state = temp;
 262 }
 263
 264
 265 static void
 266 randf_64f( double* arr, int len, uint64* state, const Vec2d* p, bool )
 267 {
 268     uint64 temp = *state;
 269     int64 v = 0;
 270     int i;
 271
 272     for( i = 0; i <= len - 4; i += 4 )
 273     {
 274         double f0, f1;
 275
 276         temp = RNG_NEXT(temp);
 277         v = (temp >> 32)|(temp << 32);
 278         f0 = v*p[i][0] + p[i][1];
 279         temp = RNG_NEXT(temp);
 280         v = (temp >> 32)|(temp << 32);
 281         f1 = v*p[i+1][0] + p[i+1][1];
 282         arr[i] = f0; arr[i+1] = f1;
 283
 284         temp = RNG_NEXT(temp);
 285         v = (temp >> 32)|(temp << 32);
 286         f0 = v*p[i+2][0] + p[i+2][1];
 287         temp = RNG_NEXT(temp);
 288         v = (temp >> 32)|(temp << 32);
 289         f1 = v*p[i+3][0] + p[i+3][1];
 290         arr[i+2] = f0; arr[i+3] = f1;
 291     }
 292
 293     for( ; i < len; i++ )
 294     {
 295         temp = RNG_NEXT(temp);
 296         v = (temp >> 32)|(temp << 32);
 297         arr[i] = v*p[i][0] + p[i][1];
 298     }
 299
 300     *state = temp;
 301 }
 302
 303 typedef void (*RandFunc)(uchar* arr, int len, uint64* state, const void* p, bool small_flag);
 304
 305
 306 static RandFunc randTab[][8] =
 307 {
 308     {
 309         (RandFunc)randi_8u, (RandFunc)randi_8s, (RandFunc)randi_16u, (RandFunc)randi_16s,
 310         (RandFunc)randi_32s, (RandFunc)randf_32f, (RandFunc)randf_64f, 0
 311     },
 312     {
 313         (RandFunc)randBits_8u, (RandFunc)randBits_8s, (RandFunc)randBits_16u, (RandFunc)randBits_16s,
 314         (RandFunc)randBits_32s, 0, 0, 0
 315     }
 316 };
 317
 318 /*
 319    The code below implements the algorithm described in
 320    "The Ziggurat Method for Generating Random Variables"
 321    by Marsaglia and Tsang, Journal of Statistical Software.
 322 */
 323 static void
 324 randn_0_1_32f( float* arr, int len, uint64* state )
 325 {
 326     const float r = 3.442620f; // The start of the right tail
 327     const float rng_flt = 2.3283064365386962890625e-10f; // 2^-32
 328     static unsigned kn[128];
 329     static float wn[128], fn[128];
 330     uint64 temp = *state;
 331     static bool initialized=false;
 332     int i;
 333
 334     if( !initialized )
 335     {
 336         const double m1 = 2147483648.0;
 337         double dn = 3.442619855899, tn = dn, vn = 9.91256303526217e-3;
 338
 339         // Set up the tables
 340         double q = vn/std::exp(-.5*dn*dn);
 341         kn[0] = (unsigned)((dn/q)*m1);
 342         kn[1] = 0;
 343
 344         wn[0] = (float)(q/m1);
 345         wn[127] = (float)(dn/m1);
 346
 347         fn[0] = 1.f;
 348         fn[127] = (float)std::exp(-.5*dn*dn);
 349
 350         for(i=126;i>=1;i--)
 351         {
 352             dn = std::sqrt(-2.*std::log(vn/dn+std::exp(-.5*dn*dn)));
 353             kn[i+1] = (unsigned)((dn/tn)*m1);
 354             tn = dn;
 355             fn[i] = (float)std::exp(-.5*dn*dn);
 356             wn[i] = (float)(dn/m1);
 357         }
 358         initialized = true;
 359     }
 360
 361     for( i = 0; i < len; i++ )
 362     {
 363         float x, y;
 364         for(;;)
 365         {
 366             int hz = (int)temp;
 367             temp = RNG_NEXT(temp);
 368             int iz = hz & 127;
 369             x = hz*wn[iz];
 370             if( (unsigned)std::abs(hz) < kn[iz] )
 371                 break;
 372             if( iz == 0) // iz==0, handles the base strip
 373             {
 374                 do
 375                 {
 376                     x = (unsigned)temp*rng_flt;
 377                     temp = RNG_NEXT(temp);
 378                     y = (unsigned)temp*rng_flt;
 379                     temp = RNG_NEXT(temp);
 380                     x = (float)(-std::log(x+FLT_MIN)*0.2904764);
 381                     y = (float)-std::log(y+FLT_MIN);
 382                 }       // .2904764 is 1/r
 383                 while( y + y < x*x );
 384                 x = hz > 0 ? r + x : -r - x;
 385                 break;
 386             }
 387             // iz > 0, handle the wedges of other strips
 388             y = (unsigned)temp*rng_flt;
 389             temp = RNG_NEXT(temp);
 390             if( fn[iz] + y*(fn[iz - 1] - fn[iz]) < std::exp(-.5*x*x) )
 391                 break;
 392         }
 393         arr[i] = x;
 394     }
 395     *state = temp;
 396 }
 397
 398
 399 double RNG::gaussian(double sigma)
 400 {
 401     float temp;
 402     randn_0_1_32f( &temp, 1, &state );
 403     return temp*sigma;
 404 }
 405
 406
 407 template<typename T, typename PT> static void
 408 randnScale_( const float* src, T* dst, int len, int cn, const PT* mean, const PT* stddev, bool stdmtx )
 409 {
 410     int i, j, k;
 411     if( !stdmtx )
 412     {
 413         if( cn == 1 )
 414         {
 415             PT b = mean[0], a = stddev[0];
 416             for( i = 0; i < len; i++ )
 417                 dst[i] = saturate_cast<T>(src[i]*a + b);
 418         }
 419         else
 420         {
 421             for( i = 0; i < len; i++, src += cn, dst += cn )
 422                 for( k = 0; k < cn; k++ )
 423                     dst[k] = saturate_cast<T>(src[k]*stddev[k] + mean[k]);
 424         }
 425     }
 426     else
 427     {
 428         for( i = 0; i < len; i++, src += cn, dst += cn )
 429         {
 430             for( j = 0; j < cn; j++ )
 431             {
 432                 PT s = mean[j];
 433                 for( k = 0; k < cn; k++ )
 434                     s += src[k]*stddev[j*cn + k];
 435                 dst[j] = saturate_cast<T>(s);
 436             }
 437         }
 438     }
 439 }
 440
 441 static void randnScale_8u( const float* src, uchar* dst, int len, int cn,
 442                             const float* mean, const float* stddev, bool stdmtx )
 443 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 444
 445 static void randnScale_8s( const float* src, schar* dst, int len, int cn,
 446                             const float* mean, const float* stddev, bool stdmtx )
 447 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 448
 449 static void randnScale_16u( const float* src, ushort* dst, int len, int cn,
 450                              const float* mean, const float* stddev, bool stdmtx )
 451 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 452
 453 static void randnScale_16s( const float* src, short* dst, int len, int cn,
 454                              const float* mean, const float* stddev, bool stdmtx )
 455 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 456
 457 static void randnScale_32s( const float* src, int* dst, int len, int cn,
 458                              const float* mean, const float* stddev, bool stdmtx )
 459 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 460
 461 static void randnScale_32f( const float* src, float* dst, int len, int cn,
 462                              const float* mean, const float* stddev, bool stdmtx )
 463 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 464
 465 static void randnScale_64f( const float* src, double* dst, int len, int cn,
 466                              const double* mean, const double* stddev, bool stdmtx )
 467 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 468
 469 typedef void (*RandnScaleFunc)(const float* src, uchar* dst, int len, int cn,
 470                                const uchar*, const uchar*, bool);
 471
 472 static RandnScaleFunc randnScaleTab[] =
 473 {
 474     (RandnScaleFunc)randnScale_8u, (RandnScaleFunc)randnScale_8s, (RandnScaleFunc)randnScale_16u,
 475     (RandnScaleFunc)randnScale_16s, (RandnScaleFunc)randnScale_32s, (RandnScaleFunc)randnScale_32f,
 476     (RandnScaleFunc)randnScale_64f, 0
 477 };
 478
 479 void RNG::fill( InputOutputArray _mat, int disttype,
 480                 InputArray _param1arg, InputArray _param2arg, bool saturateRange )
 481 {
 482     Mat mat = _mat.getMat(), _param1 = _param1arg.getMat(), _param2 = _param2arg.getMat();
 483     int depth = mat.depth(), cn = mat.channels();
 484     AutoBuffer<double> _parambuf;
 485     int j, k, fast_int_mode = 0, smallFlag = 1;
 486     RandFunc func = 0;
 487     RandnScaleFunc scaleFunc = 0;
 488
 489     CV_Assert(_param1.channels() == 1 && (_param1.rows == 1 || _param1.cols == 1) &&
 490               (_param1.rows + _param1.cols - 1 == cn || _param1.rows + _param1.cols - 1 == 1 ||
 491                (_param1.size() == Size(1, 4) && _param1.type() == CV_64F && cn <= 4)));
 492     CV_Assert( _param2.channels() == 1 &&
 493                (((_param2.rows == 1 || _param2.cols == 1) &&
 494                 (_param2.rows + _param2.cols - 1 == cn || _param2.rows + _param2.cols - 1 == 1 ||
 495                 (_param1.size() == Size(1, 4) && _param1.type() == CV_64F && cn <= 4))) ||
 496                 (_param2.rows == cn && _param2.cols == cn && disttype == NORMAL)));
 497
 498     Vec2i* ip = 0;
 499     Vec2d* dp = 0;
 500     Vec2f* fp = 0;
 501     DivStruct* ds = 0;
 502     uchar* mean = 0;
 503     uchar* stddev = 0;
 504     bool stdmtx = false;
 505     int n1 = (int)_param1.total();
 506     int n2 = (int)_param2.total();
 507
 508     if( disttype == UNIFORM )
 509     {
 510         _parambuf.allocate(cn*8 + n1 + n2);
 511         double* parambuf = _parambuf;
 512         double* p1 = (double*)_param1.data;
 513         double* p2 = (double*)_param2.data;
 514
 515         if( !_param1.isContinuous() || _param1.type() != CV_64F || n1 != cn )
 516         {
 517             Mat tmp(_param1.size(), CV_64F, parambuf);
 518             _param1.convertTo(tmp, CV_64F);
 519             p1 = parambuf;
 520             if( n1 < cn )
 521                 for( j = n1; j < cn; j++ )
 522                     p1[j] = p1[j-n1];
 523         }
 524
 525         if( !_param2.isContinuous() || _param2.type() != CV_64F || n2 != cn )
 526         {
 527             Mat tmp(_param2.size(), CV_64F, parambuf + cn);
 528             _param2.convertTo(tmp, CV_64F);
 529             p2 = parambuf + cn;
 530             if( n2 < cn )
 531                 for( j = n2; j < cn; j++ )
 532                     p2[j] = p2[j-n2];
 533         }
 534
 535         if( depth <= CV_32S )
 536         {
 537             ip = (Vec2i*)(parambuf + cn*2);
 538             for( j = 0, fast_int_mode = 1; j < cn; j++ )
 539             {
 540                 double a = min(p1[j], p2[j]);
 541                 double b = max(p1[j], p2[j]);
 542                 if( saturateRange )
 543                 {
 544                     a = max(a, depth == CV_8U || depth == CV_16U ? 0. :
 545                             depth == CV_8S ? -128. : depth == CV_16S ? -32768. : (double)INT_MIN);
 546                     b = min(b, depth == CV_8U ? 256. : depth == CV_16U ? 65536. :
 547                             depth == CV_8S ? 128. : depth == CV_16S ? 32768. : (double)INT_MAX);
 548                 }
 549                 ip[j][1] = cvCeil(a);
 550                 int idiff = ip[j][0] = cvFloor(b) - ip[j][1] - 1;
 551                 double diff = b - a;
 552
 553                 fast_int_mode &= diff <= 4294967296. && (idiff & (idiff+1)) == 0;
 554                 if( fast_int_mode )
 555                     smallFlag &= idiff <= 255;
 556                 else
 557                 {
 558                     if( diff > INT_MAX )
 559                         ip[j][0] = INT_MAX;
 560                     if( a < INT_MIN/2 )
 561                         ip[j][1] = INT_MIN/2;
 562                 }
 563             }
 564
 565             if( !fast_int_mode )
 566             {
 567                 ds = (DivStruct*)(ip + cn);
 568                 for( j = 0; j < cn; j++ )
 569                 {
 570                     ds[j].delta = ip[j][1];
 571                     unsigned d = ds[j].d = (unsigned)(ip[j][0]+1);
 572                     int l = 0;
 573                     while(((uint64)1 << l) < d)
 574                         l++;
 575                     ds[j].M = (unsigned)(((uint64)1 << 32)*(((uint64)1 << l) - d)/d) + 1;
 576                     ds[j].sh1 = min(l, 1);
 577                     ds[j].sh2 = max(l - 1, 0);
 578                 }
 579             }
 580
 581             func = randTab[fast_int_mode][depth];
 582         }
 583         else
 584         {
 585             double scale = depth == CV_64F ?
 586                 5.4210108624275221700372640043497e-20 : // 2**-64
 587                 2.3283064365386962890625e-10;           // 2**-32
 588             double maxdiff = saturateRange ? (double)FLT_MAX : DBL_MAX;
 589
 590             // for each channel i compute such dparam[0][i] & dparam[1][i],
 591             // so that a signed 32/64-bit integer X is transformed to
 592             // the range [param1.val[i], param2.val[i]) using
 593             // dparam[1][i]*X + dparam[0][i]
 594             if( depth == CV_32F )
 595             {
 596                 fp = (Vec2f*)(parambuf + cn*2);
 597                 for( j = 0; j < cn; j++ )
 598                 {
 599                     fp[j][0] = (float)(std::min(maxdiff, p2[j] - p1[j])*scale);
 600                     fp[j][1] = (float)((p2[j] + p1[j])*0.5);
 601                 }
 602             }
 603             else
 604             {
 605                 dp = (Vec2d*)(parambuf + cn*2);
 606                 for( j = 0; j < cn; j++ )
 607                 {
 608                     dp[j][0] = std::min(DBL_MAX, p2[j] - p1[j])*scale;
 609                     dp[j][1] = ((p2[j] + p1[j])*0.5);
 610                 }
 611             }
 612
 613             func = randTab[0][depth];
 614         }
 615         CV_Assert( func != 0 );
 616     }
 617     else if( disttype == CV_RAND_NORMAL )
 618     {
 619         _parambuf.allocate(MAX(n1, cn) + MAX(n2, cn));
 620         double* parambuf = _parambuf;
 621
 622         int ptype = depth == CV_64F ? CV_64F : CV_32F;
 623         int esz = (int)CV_ELEM_SIZE(ptype);
 624
 625         if( _param1.isContinuous() && _param1.type() == ptype )
 626             mean = _param1.data;
 627         else
 628         {
 629             Mat tmp(_param1.size(), ptype, parambuf);
 630             _param1.convertTo(tmp, ptype);
 631             mean = (uchar*)parambuf;
 632         }
 633
 634         if( n1 < cn )
 635             for( j = n1*esz; j < cn*esz; j++ )
 636                 mean[j] = mean[j - n1*esz];
 637
 638         if( _param2.isContinuous() && _param2.type() == ptype )
 639             stddev = _param2.data;
 640         else
 641         {
 642             Mat tmp(_param2.size(), ptype, parambuf + cn);
 643             _param2.convertTo(tmp, ptype);
 644             stddev = (uchar*)(parambuf + cn);
 645         }
 646
 647         if( n1 < cn )
 648             for( j = n1*esz; j < cn*esz; j++ )
 649                 stddev[j] = stddev[j - n1*esz];
 650
 651         stdmtx = _param2.rows == cn && _param2.cols == cn;
 652         scaleFunc = randnScaleTab[depth];
 653         CV_Assert( scaleFunc != 0 );
 654     }
 655     else
 656         CV_Error( CV_StsBadArg, "Unknown distribution type" );
 657
 658     const Mat* arrays[] = {&mat, 0};
 659     uchar* ptr;
 660     NAryMatIterator it(arrays, &ptr);
 661     int total = (int)it.size, blockSize = std::min((BLOCK_SIZE + cn - 1)/cn, total);
 662     size_t esz = mat.elemSize();
 663     AutoBuffer<double> buf;
 664     uchar* param = 0;
 665     float* nbuf = 0;
 666
 667     if( disttype == UNIFORM )
 668     {
 669         buf.allocate(blockSize*cn*4);
 670         param = (uchar*)(double*)buf;
 671
 672         if( ip )
 673         {
 674             if( ds )
 675             {
 676                 DivStruct* p = (DivStruct*)param;
 677                 for( j = 0; j < blockSize*cn; j += cn )
 678                     for( k = 0; k < cn; k++ )
 679                         p[j + k] = ds[k];
 680             }
 681             else
 682             {
 683                 Vec2i* p = (Vec2i*)param;
 684                 for( j = 0; j < blockSize*cn; j += cn )
 685                     for( k = 0; k < cn; k++ )
 686                         p[j + k] = ip[k];
 687             }
 688         }
 689         else if( fp )
 690         {
 691             Vec2f* p = (Vec2f*)param;
 692             for( j = 0; j < blockSize*cn; j += cn )
 693                 for( k = 0; k < cn; k++ )
 694                     p[j + k] = fp[k];
 695         }
 696         else
 697         {
 698             Vec2d* p = (Vec2d*)param;
 699             for( j = 0; j < blockSize*cn; j += cn )
 700                 for( k = 0; k < cn; k++ )
 701                     p[j + k] = dp[k];
 702         }
 703     }
 704     else
 705     {
 706         buf.allocate((blockSize*cn+1)/2);
 707         nbuf = (float*)(double*)buf;
 708     }
 709
 710     for( size_t i = 0; i < it.nplanes; i++, ++it )
 711     {
 712         for( j = 0; j < total; j += blockSize )
 713         {
 714             int len = std::min(total - j, blockSize);
 715
 716             if( disttype == CV_RAND_UNI )
 717                 func( ptr, len*cn, &state, param, smallFlag != 0 );
 718             else
 719             {
 720                 randn_0_1_32f(nbuf, len*cn, &state);
 721                 scaleFunc(nbuf, ptr, len, cn, mean, stddev, stdmtx);
 722             }
 723             ptr += len*esz;
 724         }
 725     }
 726 }
 727
 728 #ifdef WIN32
 729
 730
 731 #ifdef HAVE_WINRT
 732 // using C++11 thread attribute for local thread data
 733 __declspec( thread ) RNG* rng = NULL;
 734
 735  void deleteThreadRNGData()
 736  {
 737     if (rng)
 738         delete rng;
 739 }
 740
 741 RNG& theRNG()
 742 {
 743     if (!rng)
 744     {
 745         rng =  new RNG;
 746     }
 747     return *rng;
 748 }
 749 #else
 750 #ifdef WINCE
 751 #       define TLS_OUT_OF_INDEXES ((DWORD)0xFFFFFFFF)
 752 #endif
 753 static DWORD tlsRNGKey = TLS_OUT_OF_INDEXES;
 754
 755  void deleteThreadRNGData()
 756  {
 757      if( tlsRNGKey != TLS_OUT_OF_INDEXES )
 758          delete (RNG*)TlsGetValue( tlsRNGKey );
 759 }
 760
 761 RNG& theRNG()
 762 {
 763     if( tlsRNGKey == TLS_OUT_OF_INDEXES )
 764     {
 765        tlsRNGKey = TlsAlloc();
 766        CV_Assert(tlsRNGKey != TLS_OUT_OF_INDEXES);
 767     }
 768     RNG* rng = (RNG*)TlsGetValue( tlsRNGKey );
 769     if( !rng )
 770     {
 771        rng = new RNG;
 772        TlsSetValue( tlsRNGKey, rng );
 773     }
 774     return *rng;
 775 }
 776 #endif //HAVE_WINRT
 777 #else
 778
 779 static pthread_key_t tlsRNGKey = 0;
 780 static pthread_once_t tlsRNGKeyOnce = PTHREAD_ONCE_INIT;
 781
 782 static void deleteRNG(void* data)
 783 {
 784     delete (RNG*)data;
 785 }
 786
 787 static void makeRNGKey()
 788 {
 789     int errcode = pthread_key_create(&tlsRNGKey, deleteRNG);
 790     CV_Assert(errcode == 0);
 791 }
 792
 793 RNG& theRNG()
 794 {
 795     pthread_once(&tlsRNGKeyOnce, makeRNGKey);
 796     RNG* rng = (RNG*)pthread_getspecific(tlsRNGKey);
 797     if( !rng )
 798     {
 799         rng = new RNG;
 800         pthread_setspecific(tlsRNGKey, rng);
 801     }
 802     return *rng;
 803 }
 804
 805 #endif
 806
 807 }
 808
 809 void cv::randu(InputOutputArray dst, InputArray low, InputArray high)
 810 {
 811     theRNG().fill(dst, RNG::UNIFORM, low, high);
 812 }
 813
 814 void cv::randn(InputOutputArray dst, InputArray mean, InputArray stddev)
 815 {
 816     theRNG().fill(dst, RNG::NORMAL, mean, stddev);
 817 }
 818
 819 namespace cv
 820 {
 821
 822 template<typename T> static void
 823 randShuffle_( Mat& _arr, RNG& rng, double iterFactor )
 824 {
 825     int sz = _arr.rows*_arr.cols, iters = cvRound(iterFactor*sz);
 826     if( _arr.isContinuous() )
 827     {
 828         T* arr = (T*)_arr.data;
 829         for( int i = 0; i < iters; i++ )
 830         {
 831             int j = (unsigned)rng % sz, k = (unsigned)rng % sz;
 832             std::swap( arr[j], arr[k] );
 833         }
 834     }
 835     else
 836     {
 837         uchar* data = _arr.data;
 838         size_t step = _arr.step;
 839         int cols = _arr.cols;
 840         for( int i = 0; i < iters; i++ )
 841         {
 842             int j1 = (unsigned)rng % sz, k1 = (unsigned)rng % sz;
 843             int j0 = j1/cols, k0 = k1/cols;
 844             j1 -= j0*cols; k1 -= k0*cols;
 845             std::swap( ((T*)(data + step*j0))[j1], ((T*)(data + step*k0))[k1] );
 846         }
 847     }
 848 }
 849
 850 typedef void (*RandShuffleFunc)( Mat& dst, RNG& rng, double iterFactor );
 851
 852 }
 853
 854 void cv::randShuffle( InputOutputArray _dst, double iterFactor, RNG* _rng )
 855 {
 856     RandShuffleFunc tab[] =
 857     {
 858         0,
 859         randShuffle_<uchar>, // 1
 860         randShuffle_<ushort>, // 2
 861         randShuffle_<Vec<uchar,3> >, // 3
 862         randShuffle_<int>, // 4
 863         0,
 864         randShuffle_<Vec<ushort,3> >, // 6
 865         0,
 866         randShuffle_<Vec<int,2> >, // 8
 867         0, 0, 0,
 868         randShuffle_<Vec<int,3> >, // 12
 869         0, 0, 0,
 870         randShuffle_<Vec<int,4> >, // 16
 871         0, 0, 0, 0, 0, 0, 0,
 872         randShuffle_<Vec<int,6> >, // 24
 873         0, 0, 0, 0, 0, 0, 0,
 874         randShuffle_<Vec<int,8> > // 32
 875     };
 876
 877     Mat dst = _dst.getMat();
 878     RNG& rng = _rng ? *_rng : theRNG();
 879     CV_Assert( dst.elemSize() <= 32 );
 880     RandShuffleFunc func = tab[dst.elemSize()];
 881     CV_Assert( func != 0 );
 882     func( dst, rng, iterFactor );
 883 }
 884
 885 void cv::randShuffle_( InputOutputArray _dst, double iterFactor )
 886 {
 887     randShuffle(_dst, iterFactor);
 888 }
 889
 890 CV_IMPL void
 891 cvRandArr( CvRNG* _rng, CvArr* arr, int disttype, CvScalar param1, CvScalar param2 )
 892 {
 893     cv::Mat mat = cv::cvarrToMat(arr);
 894     // !!! this will only work for current 64-bit MWC RNG !!!
 895     cv::RNG& rng = _rng ? (cv::RNG&)*_rng : cv::theRNG();
 896     rng.fill(mat, disttype == CV_RAND_NORMAL ?
 897         cv::RNG::NORMAL : cv::RNG::UNIFORM, cv::Scalar(param1), cv::Scalar(param2) );
 898 }
 899
 900 CV_IMPL void cvRandShuffle( CvArr* arr, CvRNG* _rng, double iter_factor )
 901 {
 902     cv::Mat dst = cv::cvarrToMat(arr);
 903     cv::RNG& rng = _rng ? (cv::RNG&)*_rng : cv::theRNG();
 904     cv::randShuffle( dst, iter_factor, &rng );
 905 }
 906
 907 // Mersenne Twister random number generator.
 908 // Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c
 909
 910 /*
 911    A C-program for MT19937, with initialization improved 2002/1/26.
 912    Coded by Takuji Nishimura and Makoto Matsumoto.
 913
 914    Before using, initialize the state by using init_genrand(seed)
 915    or init_by_array(init_key, key_length).
 916
 917    Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
 918    All rights reserved.
 919
 920    Redistribution and use in source and binary forms, with or without
 921    modification, are permitted provided that the following conditions
 922    are met:
 923
 924      1. Redistributions of source code must retain the above copyright
 925         notice, this list of conditions and the following disclaimer.
 926
 927      2. Redistributions in binary form must reproduce the above copyright
 928         notice, this list of conditions and the following disclaimer in the
 929         documentation and/or other materials provided with the distribution.
 930
 931      3. The names of its contributors may not be used to endorse or promote
 932         products derived from this software without specific prior written
 933         permission.
 934
 935    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 936    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 937    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 938    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 939    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 940    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 941    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 942    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 943    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 944    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 945    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 946
 947
 948    Any feedback is very welcome.
 949    http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
 950    email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
 951 */
 952
 953 cv::RNG_MT19937::RNG_MT19937(unsigned s) { seed(s); }
 954
 955 cv::RNG_MT19937::RNG_MT19937() { seed(5489U); }
 956
 957 void cv::RNG_MT19937::seed(unsigned s)
 958 {
 959     state[0]= s;
 960     for (mti = 1; mti < N; mti++)
 961     {
 962         /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
 963         state[mti] = (1812433253U * (state[mti - 1] ^ (state[mti - 1] >> 30)) + mti);
 964     }
 965 }
 966
 967 unsigned cv::RNG_MT19937::next()
 968 {
 969     /* mag01[x] = x * MATRIX_A  for x=0,1 */
 970     static unsigned mag01[2] = { 0x0U, /*MATRIX_A*/ 0x9908b0dfU};
 971
 972     const unsigned UPPER_MASK = 0x80000000U;
 973     const unsigned LOWER_MASK = 0x7fffffffU;
 974
 975     /* generate N words at one time */
 976     if (mti >= N)
 977     {
 978         int kk = 0;
 979
 980         for (; kk < N - M; ++kk)
 981         {
 982             unsigned y = (state[kk] & UPPER_MASK) | (state[kk + 1] & LOWER_MASK);
 983             state[kk] = state[kk + M] ^ (y >> 1) ^ mag01[y & 0x1U];
 984         }
 985
 986         for (; kk < N - 1; ++kk)
 987         {
 988             unsigned y = (state[kk] & UPPER_MASK) | (state[kk + 1] & LOWER_MASK);
 989             state[kk] = state[kk + (M - N)] ^ (y >> 1) ^ mag01[y & 0x1U];
 990         }
 991
 992         unsigned y = (state[N - 1] & UPPER_MASK) | (state[0] & LOWER_MASK);
 993         state[N - 1] = state[M - 1] ^ (y >> 1) ^ mag01[y & 0x1U];
 994
 995         mti = 0;
 996     }
 997
 998     unsigned y = state[mti++];
 999
1000     /* Tempering */
1001     y ^= (y >> 11);
1002     y ^= (y <<  7) & 0x9d2c5680U;
1003     y ^= (y << 15) & 0xefc60000U;
1004     y ^= (y >> 18);
1005
1006     return y;
1007 }
1008
1009 cv::RNG_MT19937::operator unsigned() { return next(); }
1010
1011 cv::RNG_MT19937::operator int() { return (int)next();}
1012
1013 cv::RNG_MT19937::operator float() { return next() * (1.f / 4294967296.f); }
1014
1015 cv::RNG_MT19937::operator double()
1016 {
1017     unsigned a = next() >> 5;
1018     unsigned b = next() >> 6;
1019     return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
1020 }
1021
1022 int cv::RNG_MT19937::uniform(int a, int b) { return (int)(next() % (b - a) + a); }
1023
1024 float cv::RNG_MT19937::uniform(float a, float b) { return ((float)*this)*(b - a) + a; }
1025
1026 double cv::RNG_MT19937::uniform(double a, double b) { return ((double)*this)*(b - a) + a; }
1027
1028 unsigned cv::RNG_MT19937::operator ()(unsigned b) { return next() % b; }
1029
1030 unsigned cv::RNG_MT19937::operator ()() { return next(); }
1031
1032 /* End of file. */