modules/core/src/rand.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 /* ////////////////////////////////////////////////////////////////////
  44 //
  45 //  Filling CvMat/IplImage instances with random numbers
  46 //
  47 // */
  48
  49 #include "precomp.hpp"
  50
  51 #if defined _WIN32 || defined WINCE
  52     #include <windows.h>
  53     #undef small
  54     #undef min
  55     #undef max
  56     #undef abs
  57 #endif
  58
  59 #if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
  60     #include "emmintrin.h"
  61 #endif
  62
  63 namespace cv
  64 {
  65
  66 ///////////////////////////// Functions Declaration //////////////////////////////////////
  67
  68 /*
  69    Multiply-with-carry generator is used here:
  70    temp = ( A*X(n) + carry )
  71    X(n+1) = temp mod (2^32)
  72    carry = temp / (2^32)
  73 */
  74
  75 #define  RNG_NEXT(x)    ((uint64)(unsigned)(x)*CV_RNG_COEFF + ((x) >> 32))
  76
  77 #ifdef __PPC64__
  78     #define PPC_MUL_ADD(ret, tmp, p0, p1)                           \
  79     asm volatile("fmuls %0,%1,%2\n\t fadds %0,%0,%3" : "=&f" (ret)  \
  80                 : "f" (tmp), "f" (p0), "f" (p1))
  81 #endif
  82
  83 /***************************************************************************************\
  84 *                           Pseudo-Random Number Generators (PRNGs)                     *
  85 \***************************************************************************************/
  86
  87 template<typename T> static void
  88 randBits_( T* arr, int len, uint64* state, const Vec2i* p, bool small_flag )
  89 {
  90     uint64 temp = *state;
  91     int i;
  92
  93     if( !small_flag )
  94     {
  95         for( i = 0; i <= len - 4; i += 4 )
  96         {
  97             int t0, t1;
  98
  99             temp = RNG_NEXT(temp);
 100             t0 = ((int)temp & p[i][0]) + p[i][1];
 101             temp = RNG_NEXT(temp);
 102             t1 = ((int)temp & p[i+1][0]) + p[i+1][1];
 103             arr[i] = saturate_cast<T>(t0);
 104             arr[i+1] = saturate_cast<T>(t1);
 105
 106             temp = RNG_NEXT(temp);
 107             t0 = ((int)temp & p[i+2][0]) + p[i+2][1];
 108             temp = RNG_NEXT(temp);
 109             t1 = ((int)temp & p[i+3][0]) + p[i+3][1];
 110             arr[i+2] = saturate_cast<T>(t0);
 111             arr[i+3] = saturate_cast<T>(t1);
 112         }
 113     }
 114     else
 115     {
 116         for( i = 0; i <= len - 4; i += 4 )
 117         {
 118             int t0, t1, t;
 119             temp = RNG_NEXT(temp);
 120             t = (int)temp;
 121             t0 = (t & p[i][0]) + p[i][1];
 122             t1 = ((t >> 8) & p[i+1][0]) + p[i+1][1];
 123             arr[i] = saturate_cast<T>(t0);
 124             arr[i+1] = saturate_cast<T>(t1);
 125
 126             t0 = ((t >> 16) & p[i+2][0]) + p[i+2][1];
 127             t1 = ((t >> 24) & p[i+3][0]) + p[i+3][1];
 128             arr[i+2] = saturate_cast<T>(t0);
 129             arr[i+3] = saturate_cast<T>(t1);
 130         }
 131     }
 132
 133     for( ; i < len; i++ )
 134     {
 135         int t0;
 136         temp = RNG_NEXT(temp);
 137
 138         t0 = ((int)temp & p[i][0]) + p[i][1];
 139         arr[i] = saturate_cast<T>(t0);
 140     }
 141
 142     *state = temp;
 143 }
 144
 145 struct DivStruct
 146 {
 147     unsigned d;
 148     unsigned M;
 149     int sh1, sh2;
 150     int delta;
 151 };
 152
 153 template<typename T> static void
 154 randi_( T* arr, int len, uint64* state, const DivStruct* p )
 155 {
 156     uint64 temp = *state;
 157     int i = 0;
 158     unsigned t0, t1, v0, v1;
 159
 160     for( i = 0; i <= len - 4; i += 4 )
 161     {
 162         temp = RNG_NEXT(temp);
 163         t0 = (unsigned)temp;
 164         temp = RNG_NEXT(temp);
 165         t1 = (unsigned)temp;
 166         v0 = (unsigned)(((uint64)t0 * p[i].M) >> 32);
 167         v1 = (unsigned)(((uint64)t1 * p[i+1].M) >> 32);
 168         v0 = (v0 + ((t0 - v0) >> p[i].sh1)) >> p[i].sh2;
 169         v1 = (v1 + ((t1 - v1) >> p[i+1].sh1)) >> p[i+1].sh2;
 170         v0 = t0 - v0*p[i].d + p[i].delta;
 171         v1 = t1 - v1*p[i+1].d + p[i+1].delta;
 172         arr[i] = saturate_cast<T>((int)v0);
 173         arr[i+1] = saturate_cast<T>((int)v1);
 174
 175         temp = RNG_NEXT(temp);
 176         t0 = (unsigned)temp;
 177         temp = RNG_NEXT(temp);
 178         t1 = (unsigned)temp;
 179         v0 = (unsigned)(((uint64)t0 * p[i+2].M) >> 32);
 180         v1 = (unsigned)(((uint64)t1 * p[i+3].M) >> 32);
 181         v0 = (v0 + ((t0 - v0) >> p[i+2].sh1)) >> p[i+2].sh2;
 182         v1 = (v1 + ((t1 - v1) >> p[i+3].sh1)) >> p[i+3].sh2;
 183         v0 = t0 - v0*p[i+2].d + p[i+2].delta;
 184         v1 = t1 - v1*p[i+3].d + p[i+3].delta;
 185         arr[i+2] = saturate_cast<T>((int)v0);
 186         arr[i+3] = saturate_cast<T>((int)v1);
 187     }
 188
 189     for( ; i < len; i++ )
 190     {
 191         temp = RNG_NEXT(temp);
 192         t0 = (unsigned)temp;
 193         v0 = (unsigned)(((uint64)t0 * p[i].M) >> 32);
 194         v0 = (v0 + ((t0 - v0) >> p[i].sh1)) >> p[i].sh2;
 195         v0 = t0 - v0*p[i].d + p[i].delta;
 196         arr[i] = saturate_cast<T>((int)v0);
 197     }
 198
 199     *state = temp;
 200 }
 201
 202
 203 #define DEF_RANDI_FUNC(suffix, type) \
 204 static void randBits_##suffix(type* arr, int len, uint64* state, \
 205                               const Vec2i* p, bool small_flag) \
 206 { randBits_(arr, len, state, p, small_flag); } \
 207 \
 208 static void randi_##suffix(type* arr, int len, uint64* state, \
 209                            const DivStruct* p, bool ) \
 210 { randi_(arr, len, state, p); }
 211
 212 DEF_RANDI_FUNC(8u, uchar)
 213 DEF_RANDI_FUNC(8s, schar)
 214 DEF_RANDI_FUNC(16u, ushort)
 215 DEF_RANDI_FUNC(16s, short)
 216 DEF_RANDI_FUNC(32s, int)
 217
 218 static void randf_32f( float* arr, int len, uint64* state, const Vec2f* p, bool )
 219 {
 220     uint64 temp = *state;
 221     int i = 0;
 222
 223     for( ; i <= len - 4; i += 4 )
 224     {
 225         float f[4];
 226         f[0] = (float)(int)(temp = RNG_NEXT(temp));
 227         f[1] = (float)(int)(temp = RNG_NEXT(temp));
 228         f[2] = (float)(int)(temp = RNG_NEXT(temp));
 229         f[3] = (float)(int)(temp = RNG_NEXT(temp));
 230
 231         // handwritten SSE is required not for performance but for numerical stability!
 232         // both 32-bit gcc and MSVC compilers trend to generate double precision SSE
 233         // while 64-bit compilers generate single precision SIMD instructions
 234         // so manual vectorisation forces all compilers to the single precision
 235 #if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
 236         __m128 q0 = _mm_loadu_ps((const float*)(p + i));
 237         __m128 q1 = _mm_loadu_ps((const float*)(p + i + 2));
 238
 239         __m128 q01l = _mm_unpacklo_ps(q0, q1);
 240         __m128 q01h = _mm_unpackhi_ps(q0, q1);
 241
 242         __m128 p0 = _mm_unpacklo_ps(q01l, q01h);
 243         __m128 p1 = _mm_unpackhi_ps(q01l, q01h);
 244
 245         _mm_storeu_ps(arr + i, _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(f), p0), p1));
 246 #elif defined __ARM_NEON && defined __aarch64__
 247         // handwritten NEON is required not for performance but for numerical stability!
 248         // 64bit gcc tends to use fmadd instead of separate multiply and add
 249         // use volatile to ensure to separate the multiply and add
 250         float32x4x2_t q = vld2q_f32((const float*)(p + i));
 251
 252         float32x4_t p0 = q.val[0];
 253         float32x4_t p1 = q.val[1];
 254
 255         volatile float32x4_t v0 = vmulq_f32(vld1q_f32(f), p0);
 256         vst1q_f32(arr+i, vaddq_f32(v0, p1));
 257 #elif defined __PPC64__
 258         // inline asm is required for numerical stability!
 259         // compilers tends to use floating multiply-add single(fmadds)
 260         // instead of separate multiply and add
 261         PPC_MUL_ADD(arr[i+0], f[0], p[i+0][0], p[i+0][1]);
 262         PPC_MUL_ADD(arr[i+1], f[1], p[i+1][0], p[i+1][1]);
 263         PPC_MUL_ADD(arr[i+2], f[2], p[i+2][0], p[i+2][1]);
 264         PPC_MUL_ADD(arr[i+3], f[3], p[i+3][0], p[i+3][1]);
 265 #else
 266         arr[i+0] = f[0]*p[i+0][0] + p[i+0][1];
 267         arr[i+1] = f[1]*p[i+1][0] + p[i+1][1];
 268         arr[i+2] = f[2]*p[i+2][0] + p[i+2][1];
 269         arr[i+3] = f[3]*p[i+3][0] + p[i+3][1];
 270 #endif
 271     }
 272
 273     for( ; i < len; i++ )
 274     {
 275         temp = RNG_NEXT(temp);
 276 #if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
 277         _mm_store_ss(arr + i, _mm_add_ss(
 278                 _mm_mul_ss(_mm_set_ss((float)(int)temp), _mm_set_ss(p[i][0])),
 279                 _mm_set_ss(p[i][1]))
 280                 );
 281 #elif defined __ARM_NEON && defined __aarch64__
 282         float32x2_t t = vadd_f32(vmul_f32(
 283                 vdup_n_f32((float)(int)temp), vdup_n_f32(p[i][0])),
 284                 vdup_n_f32(p[i][1]));
 285         arr[i] = vget_lane_f32(t, 0);
 286 #elif defined __PPC64__
 287         PPC_MUL_ADD(arr[i], (float)(int)temp, p[i][0], p[i][1]);
 288 #else
 289         arr[i] = (int)temp*p[i][0] + p[i][1];
 290 #endif
 291     }
 292
 293     *state = temp;
 294 }
 295
 296
 297 static void
 298 randf_64f( double* arr, int len, uint64* state, const Vec2d* p, bool )
 299 {
 300     uint64 temp = *state;
 301     int64 v = 0;
 302     int i;
 303
 304     for( i = 0; i <= len - 4; i += 4 )
 305     {
 306         double f0, f1;
 307
 308         temp = RNG_NEXT(temp);
 309         v = (temp >> 32)|(temp << 32);
 310         f0 = v*p[i][0] + p[i][1];
 311         temp = RNG_NEXT(temp);
 312         v = (temp >> 32)|(temp << 32);
 313         f1 = v*p[i+1][0] + p[i+1][1];
 314         arr[i] = f0; arr[i+1] = f1;
 315
 316         temp = RNG_NEXT(temp);
 317         v = (temp >> 32)|(temp << 32);
 318         f0 = v*p[i+2][0] + p[i+2][1];
 319         temp = RNG_NEXT(temp);
 320         v = (temp >> 32)|(temp << 32);
 321         f1 = v*p[i+3][0] + p[i+3][1];
 322         arr[i+2] = f0; arr[i+3] = f1;
 323     }
 324
 325     for( ; i < len; i++ )
 326     {
 327         temp = RNG_NEXT(temp);
 328         v = (temp >> 32)|(temp << 32);
 329         arr[i] = v*p[i][0] + p[i][1];
 330     }
 331
 332     *state = temp;
 333 }
 334
 335 typedef void (*RandFunc)(uchar* arr, int len, uint64* state, const void* p, bool small_flag);
 336
 337
 338 static RandFunc randTab[][8] =
 339 {
 340     {
 341         (RandFunc)randi_8u, (RandFunc)randi_8s, (RandFunc)randi_16u, (RandFunc)randi_16s,
 342         (RandFunc)randi_32s, (RandFunc)randf_32f, (RandFunc)randf_64f, 0
 343     },
 344     {
 345         (RandFunc)randBits_8u, (RandFunc)randBits_8s, (RandFunc)randBits_16u, (RandFunc)randBits_16s,
 346         (RandFunc)randBits_32s, 0, 0, 0
 347     }
 348 };
 349
 350 /*
 351    The code below implements the algorithm described in
 352    "The Ziggurat Method for Generating Random Variables"
 353    by Marsaglia and Tsang, Journal of Statistical Software.
 354 */
 355 static void
 356 randn_0_1_32f( float* arr, int len, uint64* state )
 357 {
 358     const float r = 3.442620f; // The start of the right tail
 359     const float rng_flt = 2.3283064365386962890625e-10f; // 2^-32
 360     static unsigned kn[128];
 361     static float wn[128], fn[128];
 362     uint64 temp = *state;
 363     static bool initialized=false;
 364     int i;
 365
 366     if( !initialized )
 367     {
 368         const double m1 = 2147483648.0;
 369         double dn = 3.442619855899, tn = dn, vn = 9.91256303526217e-3;
 370
 371         // Set up the tables
 372         double q = vn/std::exp(-.5*dn*dn);
 373         kn[0] = (unsigned)((dn/q)*m1);
 374         kn[1] = 0;
 375
 376         wn[0] = (float)(q/m1);
 377         wn[127] = (float)(dn/m1);
 378
 379         fn[0] = 1.f;
 380         fn[127] = (float)std::exp(-.5*dn*dn);
 381
 382         for(i=126;i>=1;i--)
 383         {
 384             dn = std::sqrt(-2.*std::log(vn/dn+std::exp(-.5*dn*dn)));
 385             kn[i+1] = (unsigned)((dn/tn)*m1);
 386             tn = dn;
 387             fn[i] = (float)std::exp(-.5*dn*dn);
 388             wn[i] = (float)(dn/m1);
 389         }
 390         initialized = true;
 391     }
 392
 393     for( i = 0; i < len; i++ )
 394     {
 395         float x, y;
 396         for(;;)
 397         {
 398             int hz = (int)temp;
 399             temp = RNG_NEXT(temp);
 400             int iz = hz & 127;
 401             x = hz*wn[iz];
 402             if( (unsigned)std::abs(hz) < kn[iz] )
 403                 break;
 404             if( iz == 0) // iz==0, handles the base strip
 405             {
 406                 do
 407                 {
 408                     x = (unsigned)temp*rng_flt;
 409                     temp = RNG_NEXT(temp);
 410                     y = (unsigned)temp*rng_flt;
 411                     temp = RNG_NEXT(temp);
 412                     x = (float)(-std::log(x+FLT_MIN)*0.2904764);
 413                     y = (float)-std::log(y+FLT_MIN);
 414                 }       // .2904764 is 1/r
 415                 while( y + y < x*x );
 416                 x = hz > 0 ? r + x : -r - x;
 417                 break;
 418             }
 419             // iz > 0, handle the wedges of other strips
 420             y = (unsigned)temp*rng_flt;
 421             temp = RNG_NEXT(temp);
 422             if( fn[iz] + y*(fn[iz - 1] - fn[iz]) < std::exp(-.5*x*x) )
 423                 break;
 424         }
 425         arr[i] = x;
 426     }
 427     *state = temp;
 428 }
 429
 430
 431 double RNG::gaussian(double sigma)
 432 {
 433     float temp;
 434     randn_0_1_32f( &temp, 1, &state );
 435     return temp*sigma;
 436 }
 437
 438
 439 template<typename T, typename PT> static void
 440 randnScale_( const float* src, T* dst, int len, int cn, const PT* mean, const PT* stddev, bool stdmtx )
 441 {
 442     int i, j, k;
 443     if( !stdmtx )
 444     {
 445         if( cn == 1 )
 446         {
 447             PT b = mean[0], a = stddev[0];
 448             for( i = 0; i < len; i++ )
 449                 dst[i] = saturate_cast<T>(src[i]*a + b);
 450         }
 451         else
 452         {
 453             for( i = 0; i < len; i++, src += cn, dst += cn )
 454                 for( k = 0; k < cn; k++ )
 455                     dst[k] = saturate_cast<T>(src[k]*stddev[k] + mean[k]);
 456         }
 457     }
 458     else
 459     {
 460         for( i = 0; i < len; i++, src += cn, dst += cn )
 461         {
 462             for( j = 0; j < cn; j++ )
 463             {
 464                 PT s = mean[j];
 465                 for( k = 0; k < cn; k++ )
 466                     s += src[k]*stddev[j*cn + k];
 467                 dst[j] = saturate_cast<T>(s);
 468             }
 469         }
 470     }
 471 }
 472
 473 static void randnScale_8u( const float* src, uchar* dst, int len, int cn,
 474                             const float* mean, const float* stddev, bool stdmtx )
 475 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 476
 477 static void randnScale_8s( const float* src, schar* dst, int len, int cn,
 478                             const float* mean, const float* stddev, bool stdmtx )
 479 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 480
 481 static void randnScale_16u( const float* src, ushort* dst, int len, int cn,
 482                              const float* mean, const float* stddev, bool stdmtx )
 483 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 484
 485 static void randnScale_16s( const float* src, short* dst, int len, int cn,
 486                              const float* mean, const float* stddev, bool stdmtx )
 487 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 488
 489 static void randnScale_32s( const float* src, int* dst, int len, int cn,
 490                              const float* mean, const float* stddev, bool stdmtx )
 491 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 492
 493 static void randnScale_32f( const float* src, float* dst, int len, int cn,
 494                              const float* mean, const float* stddev, bool stdmtx )
 495 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 496
 497 static void randnScale_64f( const float* src, double* dst, int len, int cn,
 498                              const double* mean, const double* stddev, bool stdmtx )
 499 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
 500
 501 typedef void (*RandnScaleFunc)(const float* src, uchar* dst, int len, int cn,
 502                                const uchar*, const uchar*, bool);
 503
 504 static RandnScaleFunc randnScaleTab[] =
 505 {
 506     (RandnScaleFunc)randnScale_8u, (RandnScaleFunc)randnScale_8s, (RandnScaleFunc)randnScale_16u,
 507     (RandnScaleFunc)randnScale_16s, (RandnScaleFunc)randnScale_32s, (RandnScaleFunc)randnScale_32f,
 508     (RandnScaleFunc)randnScale_64f, 0
 509 };
 510
 511 void RNG::fill( InputOutputArray _mat, int disttype,
 512                 InputArray _param1arg, InputArray _param2arg, bool saturateRange )
 513 {
 514     CV_Assert(!_mat.empty());
 515
 516     Mat mat = _mat.getMat(), _param1 = _param1arg.getMat(), _param2 = _param2arg.getMat();
 517     int depth = mat.depth(), cn = mat.channels();
 518     AutoBuffer<double> _parambuf;
 519     int j, k;
 520     bool fast_int_mode = false;
 521     bool smallFlag = true;
 522     RandFunc func = 0;
 523     RandnScaleFunc scaleFunc = 0;
 524
 525     CV_Assert(_param1.channels() == 1 && (_param1.rows == 1 || _param1.cols == 1) &&
 526               (_param1.rows + _param1.cols - 1 == cn || _param1.rows + _param1.cols - 1 == 1 ||
 527                (_param1.size() == Size(1, 4) && _param1.type() == CV_64F && cn <= 4)));
 528     CV_Assert( _param2.channels() == 1 &&
 529                (((_param2.rows == 1 || _param2.cols == 1) &&
 530                 (_param2.rows + _param2.cols - 1 == cn || _param2.rows + _param2.cols - 1 == 1 ||
 531                 (_param1.size() == Size(1, 4) && _param1.type() == CV_64F && cn <= 4))) ||
 532                 (_param2.rows == cn && _param2.cols == cn && disttype == NORMAL)));
 533
 534     Vec2i* ip = 0;
 535     Vec2d* dp = 0;
 536     Vec2f* fp = 0;
 537     DivStruct* ds = 0;
 538     uchar* mean = 0;
 539     uchar* stddev = 0;
 540     bool stdmtx = false;
 541     int n1 = (int)_param1.total();
 542     int n2 = (int)_param2.total();
 543
 544     if( disttype == UNIFORM )
 545     {
 546         _parambuf.allocate(cn*8 + n1 + n2);
 547         double* parambuf = _parambuf.data();
 548         double* p1 = _param1.ptr<double>();
 549         double* p2 = _param2.ptr<double>();
 550
 551         if( !_param1.isContinuous() || _param1.type() != CV_64F || n1 != cn )
 552         {
 553             Mat tmp(_param1.size(), CV_64F, parambuf);
 554             _param1.convertTo(tmp, CV_64F);
 555             p1 = parambuf;
 556             if( n1 < cn )
 557                 for( j = n1; j < cn; j++ )
 558                     p1[j] = p1[j-n1];
 559         }
 560
 561         if( !_param2.isContinuous() || _param2.type() != CV_64F || n2 != cn )
 562         {
 563             Mat tmp(_param2.size(), CV_64F, parambuf + cn);
 564             _param2.convertTo(tmp, CV_64F);
 565             p2 = parambuf + cn;
 566             if( n2 < cn )
 567                 for( j = n2; j < cn; j++ )
 568                     p2[j] = p2[j-n2];
 569         }
 570
 571         if( depth <= CV_32S )
 572         {
 573             ip = (Vec2i*)(parambuf + cn*2);
 574             for( j = 0, fast_int_mode = true; j < cn; j++ )
 575             {
 576                 double a = std::min(p1[j], p2[j]);
 577                 double b = std::max(p1[j], p2[j]);
 578                 if( saturateRange )
 579                 {
 580                     a = std::max(a, depth == CV_8U || depth == CV_16U ? 0. :
 581                             depth == CV_8S ? -128. : depth == CV_16S ? -32768. : (double)INT_MIN);
 582                     b = std::min(b, depth == CV_8U ? 256. : depth == CV_16U ? 65536. :
 583                             depth == CV_8S ? 128. : depth == CV_16S ? 32768. : (double)INT_MAX);
 584                 }
 585                 ip[j][1] = cvCeil(a);
 586                 int idiff = ip[j][0] = cvFloor(b) - ip[j][1] - 1;
 587                 double diff = b - a;
 588
 589                 fast_int_mode = fast_int_mode && diff <= 4294967296. && (idiff & (idiff+1)) == 0;
 590                 if( fast_int_mode )
 591                     smallFlag = smallFlag && (idiff <= 255);
 592                 else
 593                 {
 594                     if( diff > INT_MAX )
 595                         ip[j][0] = INT_MAX;
 596                     if( a < INT_MIN/2 )
 597                         ip[j][1] = INT_MIN/2;
 598                 }
 599             }
 600
 601             if( !fast_int_mode )
 602             {
 603                 ds = (DivStruct*)(ip + cn);
 604                 for( j = 0; j < cn; j++ )
 605                 {
 606                     ds[j].delta = ip[j][1];
 607                     unsigned d = ds[j].d = (unsigned)(ip[j][0]+1);
 608                     int l = 0;
 609                     while(((uint64)1 << l) < d)
 610                         l++;
 611                     ds[j].M = (unsigned)(((uint64)1 << 32)*(((uint64)1 << l) - d)/d) + 1;
 612                     ds[j].sh1 = std::min(l, 1);
 613                     ds[j].sh2 = std::max(l - 1, 0);
 614                 }
 615             }
 616
 617             func = randTab[fast_int_mode ? 1 : 0][depth];
 618         }
 619         else
 620         {
 621             double scale = depth == CV_64F ?
 622                 5.4210108624275221700372640043497e-20 : // 2**-64
 623                 2.3283064365386962890625e-10;           // 2**-32
 624             double maxdiff = saturateRange ? (double)FLT_MAX : DBL_MAX;
 625
 626             // for each channel i compute such dparam[0][i] & dparam[1][i],
 627             // so that a signed 32/64-bit integer X is transformed to
 628             // the range [param1.val[i], param2.val[i]) using
 629             // dparam[1][i]*X + dparam[0][i]
 630             if( depth == CV_32F )
 631             {
 632                 fp = (Vec2f*)(parambuf + cn*2);
 633                 for( j = 0; j < cn; j++ )
 634                 {
 635                     fp[j][0] = (float)(std::min(maxdiff, p2[j] - p1[j])*scale);
 636                     fp[j][1] = (float)((p2[j] + p1[j])*0.5);
 637                 }
 638             }
 639             else
 640             {
 641                 dp = (Vec2d*)(parambuf + cn*2);
 642                 for( j = 0; j < cn; j++ )
 643                 {
 644                     dp[j][0] = std::min(DBL_MAX, p2[j] - p1[j])*scale;
 645                     dp[j][1] = ((p2[j] + p1[j])*0.5);
 646                 }
 647             }
 648
 649             func = randTab[0][depth];
 650         }
 651         CV_Assert( func != 0 );
 652     }
 653     else if( disttype == CV_RAND_NORMAL )
 654     {
 655         _parambuf.allocate(MAX(n1, cn) + MAX(n2, cn));
 656         double* parambuf = _parambuf.data();
 657
 658         int ptype = depth == CV_64F ? CV_64F : CV_32F;
 659         int esz = (int)CV_ELEM_SIZE(ptype);
 660
 661         if( _param1.isContinuous() && _param1.type() == ptype && n1 >= cn)
 662             mean = _param1.ptr();
 663         else
 664         {
 665             Mat tmp(_param1.size(), ptype, parambuf);
 666             _param1.convertTo(tmp, ptype);
 667             mean = (uchar*)parambuf;
 668         }
 669
 670         if( n1 < cn )
 671             for( j = n1*esz; j < cn*esz; j++ )
 672                 mean[j] = mean[j - n1*esz];
 673
 674         if( _param2.isContinuous() && _param2.type() == ptype && n2 >= cn)
 675             stddev = _param2.ptr();
 676         else
 677         {
 678             Mat tmp(_param2.size(), ptype, parambuf + MAX(n1, cn));
 679             _param2.convertTo(tmp, ptype);
 680             stddev = (uchar*)(parambuf + MAX(n1, cn));
 681         }
 682
 683         if( n2 < cn )
 684             for( j = n2*esz; j < cn*esz; j++ )
 685                 stddev[j] = stddev[j - n2*esz];
 686
 687         stdmtx = _param2.rows == cn && _param2.cols == cn;
 688         scaleFunc = randnScaleTab[depth];
 689         CV_Assert( scaleFunc != 0 );
 690     }
 691     else
 692         CV_Error( CV_StsBadArg, "Unknown distribution type" );
 693
 694     const Mat* arrays[] = {&mat, 0};
 695     uchar* ptr;
 696     NAryMatIterator it(arrays, &ptr, 1);
 697     int total = (int)it.size, blockSize = std::min((BLOCK_SIZE + cn - 1)/cn, total);
 698     size_t esz = mat.elemSize();
 699     AutoBuffer<double> buf;
 700     uchar* param = 0;
 701     float* nbuf = 0;
 702
 703     if( disttype == UNIFORM )
 704     {
 705         buf.allocate(blockSize*cn*4);
 706         param = (uchar*)(double*)buf.data();
 707
 708         if( depth <= CV_32S )
 709         {
 710             if( !fast_int_mode )
 711             {
 712                 DivStruct* p = (DivStruct*)param;
 713                 for( j = 0; j < blockSize*cn; j += cn )
 714                     for( k = 0; k < cn; k++ )
 715                         p[j + k] = ds[k];
 716             }
 717             else
 718             {
 719                 Vec2i* p = (Vec2i*)param;
 720                 for( j = 0; j < blockSize*cn; j += cn )
 721                     for( k = 0; k < cn; k++ )
 722                         p[j + k] = ip[k];
 723             }
 724         }
 725         else if( depth == CV_32F )
 726         {
 727             Vec2f* p = (Vec2f*)param;
 728             for( j = 0; j < blockSize*cn; j += cn )
 729                 for( k = 0; k < cn; k++ )
 730                     p[j + k] = fp[k];
 731         }
 732         else
 733         {
 734             Vec2d* p = (Vec2d*)param;
 735             for( j = 0; j < blockSize*cn; j += cn )
 736                 for( k = 0; k < cn; k++ )
 737                     p[j + k] = dp[k];
 738         }
 739     }
 740     else
 741     {
 742         buf.allocate((blockSize*cn+1)/2);
 743         nbuf = (float*)(double*)buf.data();
 744     }
 745
 746     for( size_t i = 0; i < it.nplanes; i++, ++it )
 747     {
 748         for( j = 0; j < total; j += blockSize )
 749         {
 750             int len = std::min(total - j, blockSize);
 751
 752             if( disttype == CV_RAND_UNI )
 753                 func( ptr, len*cn, &state, param, smallFlag );
 754             else
 755             {
 756                 randn_0_1_32f(nbuf, len*cn, &state);
 757                 scaleFunc(nbuf, ptr, len, cn, mean, stddev, stdmtx);
 758             }
 759             ptr += len*esz;
 760         }
 761     }
 762 }
 763
 764 }
 765
 766 cv::RNG& cv::theRNG()
 767 {
 768     return getCoreTlsData().get()->rng;
 769 }
 770
 771 void cv::setRNGSeed(int seed)
 772 {
 773     theRNG() = RNG(static_cast<uint64>(seed));
 774 }
 775
 776
 777 void cv::randu(InputOutputArray dst, InputArray low, InputArray high)
 778 {
 779     CV_INSTRUMENT_REGION()
 780
 781     theRNG().fill(dst, RNG::UNIFORM, low, high);
 782 }
 783
 784 void cv::randn(InputOutputArray dst, InputArray mean, InputArray stddev)
 785 {
 786     CV_INSTRUMENT_REGION()
 787
 788     theRNG().fill(dst, RNG::NORMAL, mean, stddev);
 789 }
 790
 791 namespace cv
 792 {
 793
 794 template<typename T> static void
 795 randShuffle_( Mat& _arr, RNG& rng, double )
 796 {
 797     unsigned sz = (unsigned)_arr.total();
 798     if( _arr.isContinuous() )
 799     {
 800         T* arr = _arr.ptr<T>();
 801         for( unsigned i = 0; i < sz; i++ )
 802         {
 803             unsigned j = (unsigned)rng % sz;
 804             std::swap( arr[j], arr[i] );
 805         }
 806     }
 807     else
 808     {
 809         CV_Assert( _arr.dims <= 2 );
 810         uchar* data = _arr.ptr();
 811         size_t step = _arr.step;
 812         int rows = _arr.rows;
 813         int cols = _arr.cols;
 814         for( int i0 = 0; i0 < rows; i0++ )
 815         {
 816             T* p = _arr.ptr<T>(i0);
 817             for( int j0 = 0; j0 < cols; j0++ )
 818             {
 819                 unsigned k1 = (unsigned)rng % sz;
 820                 int i1 = (int)(k1 / cols);
 821                 int j1 = (int)(k1 - (unsigned)i1*(unsigned)cols);
 822                 std::swap( p[j0], ((T*)(data + step*i1))[j1] );
 823             }
 824         }
 825     }
 826 }
 827
 828 typedef void (*RandShuffleFunc)( Mat& dst, RNG& rng, double iterFactor );
 829
 830 }
 831
 832 void cv::randShuffle( InputOutputArray _dst, double iterFactor, RNG* _rng )
 833 {
 834     CV_INSTRUMENT_REGION()
 835
 836     RandShuffleFunc tab[] =
 837     {
 838         0,
 839         randShuffle_<uchar>, // 1
 840         randShuffle_<ushort>, // 2
 841         randShuffle_<Vec<uchar,3> >, // 3
 842         randShuffle_<int>, // 4
 843         0,
 844         randShuffle_<Vec<ushort,3> >, // 6
 845         0,
 846         randShuffle_<Vec<int,2> >, // 8
 847         0, 0, 0,
 848         randShuffle_<Vec<int,3> >, // 12
 849         0, 0, 0,
 850         randShuffle_<Vec<int,4> >, // 16
 851         0, 0, 0, 0, 0, 0, 0,
 852         randShuffle_<Vec<int,6> >, // 24
 853         0, 0, 0, 0, 0, 0, 0,
 854         randShuffle_<Vec<int,8> > // 32
 855     };
 856
 857     Mat dst = _dst.getMat();
 858     RNG& rng = _rng ? *_rng : theRNG();
 859     CV_Assert( dst.elemSize() <= 32 );
 860     RandShuffleFunc func = tab[dst.elemSize()];
 861     CV_Assert( func != 0 );
 862     func( dst, rng, iterFactor );
 863 }
 864
 865 CV_IMPL void
 866 cvRandArr( CvRNG* _rng, CvArr* arr, int disttype, CvScalar param1, CvScalar param2 )
 867 {
 868     cv::Mat mat = cv::cvarrToMat(arr);
 869     // !!! this will only work for current 64-bit MWC RNG !!!
 870     cv::RNG& rng = _rng ? (cv::RNG&)*_rng : cv::theRNG();
 871     rng.fill(mat, disttype == CV_RAND_NORMAL ?
 872         cv::RNG::NORMAL : cv::RNG::UNIFORM, cv::Scalar(param1), cv::Scalar(param2) );
 873 }
 874
 875 CV_IMPL void cvRandShuffle( CvArr* arr, CvRNG* _rng, double iter_factor )
 876 {
 877     cv::Mat dst = cv::cvarrToMat(arr);
 878     cv::RNG& rng = _rng ? (cv::RNG&)*_rng : cv::theRNG();
 879     cv::randShuffle( dst, iter_factor, &rng );
 880 }
 881
 882 // Mersenne Twister random number generator.
 883 // Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c
 884
 885 /*
 886    A C-program for MT19937, with initialization improved 2002/1/26.
 887    Coded by Takuji Nishimura and Makoto Matsumoto.
 888
 889    Before using, initialize the state by using init_genrand(seed)
 890    or init_by_array(init_key, key_length).
 891
 892    Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
 893    All rights reserved.
 894
 895    Redistribution and use in source and binary forms, with or without
 896    modification, are permitted provided that the following conditions
 897    are met:
 898
 899      1. Redistributions of source code must retain the above copyright
 900         notice, this list of conditions and the following disclaimer.
 901
 902      2. Redistributions in binary form must reproduce the above copyright
 903         notice, this list of conditions and the following disclaimer in the
 904         documentation and/or other materials provided with the distribution.
 905
 906      3. The names of its contributors may not be used to endorse or promote
 907         products derived from this software without specific prior written
 908         permission.
 909
 910    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 911    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 912    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 913    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 914    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 915    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 916    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 917    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 918    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 919    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 920    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 921
 922
 923    Any feedback is very welcome.
 924    http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
 925    email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
 926 */
 927
 928 cv::RNG_MT19937::RNG_MT19937(unsigned s) { seed(s); }
 929
 930 cv::RNG_MT19937::RNG_MT19937() { seed(5489U); }
 931
 932 void cv::RNG_MT19937::seed(unsigned s)
 933 {
 934     state[0]= s;
 935     for (mti = 1; mti < N; mti++)
 936     {
 937         /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
 938         state[mti] = (1812433253U * (state[mti - 1] ^ (state[mti - 1] >> 30)) + mti);
 939     }
 940 }
 941
 942 unsigned cv::RNG_MT19937::next()
 943 {
 944     /* mag01[x] = x * MATRIX_A  for x=0,1 */
 945     static unsigned mag01[2] = { 0x0U, /*MATRIX_A*/ 0x9908b0dfU};
 946
 947     const unsigned UPPER_MASK = 0x80000000U;
 948     const unsigned LOWER_MASK = 0x7fffffffU;
 949
 950     /* generate N words at one time */
 951     if (mti >= N)
 952     {
 953         int kk = 0;
 954
 955         for (; kk < N - M; ++kk)
 956         {
 957             unsigned y = (state[kk] & UPPER_MASK) | (state[kk + 1] & LOWER_MASK);
 958             state[kk] = state[kk + M] ^ (y >> 1) ^ mag01[y & 0x1U];
 959         }
 960
 961         for (; kk < N - 1; ++kk)
 962         {
 963             unsigned y = (state[kk] & UPPER_MASK) | (state[kk + 1] & LOWER_MASK);
 964             state[kk] = state[kk + (M - N)] ^ (y >> 1) ^ mag01[y & 0x1U];
 965         }
 966
 967         unsigned y = (state[N - 1] & UPPER_MASK) | (state[0] & LOWER_MASK);
 968         state[N - 1] = state[M - 1] ^ (y >> 1) ^ mag01[y & 0x1U];
 969
 970         mti = 0;
 971     }
 972
 973     unsigned y = state[mti++];
 974
 975     /* Tempering */
 976     y ^= (y >> 11);
 977     y ^= (y <<  7) & 0x9d2c5680U;
 978     y ^= (y << 15) & 0xefc60000U;
 979     y ^= (y >> 18);
 980
 981     return y;
 982 }
 983
 984 cv::RNG_MT19937::operator unsigned() { return next(); }
 985
 986 cv::RNG_MT19937::operator int() { return (int)next();}
 987
 988 cv::RNG_MT19937::operator float() { return next() * (1.f / 4294967296.f); }
 989
 990 cv::RNG_MT19937::operator double()
 991 {
 992     unsigned a = next() >> 5;
 993     unsigned b = next() >> 6;
 994     return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
 995 }
 996
 997 int cv::RNG_MT19937::uniform(int a, int b) { return (int)(next() % (b - a) + a); }
 998
 999 float cv::RNG_MT19937::uniform(float a, float b) { return ((float)*this)*(b - a) + a; }
1000
1001 double cv::RNG_MT19937::uniform(double a, double b) { return ((double)*this)*(b - a) + a; }
1002
1003 unsigned cv::RNG_MT19937::operator ()(unsigned b) { return next() % b; }
1004
1005 unsigned cv::RNG_MT19937::operator ()() { return next(); }
1006
1007 /* End of file. */