Imported Upstream version 2.81
[platform/upstream/libbullet.git] / Extras / CDTestFramework / Opcode / Ice / _IceRevisitedRadix.cpp
1 /*
2  *      ICE / OPCODE - Optimized Collision Detection
3  * http://www.codercorner.com/Opcode.htm
4  * 
5  * Copyright (c) 2001-2008 Pierre Terdiman,  pierre@codercorner.com
6
7 This software is provided 'as-is', without any express or implied warranty.
8 In no event will the authors be held liable for any damages arising from the use of this software.
9 Permission is granted to anyone to use this software for any purpose, 
10 including commercial applications, and to alter it and redistribute it freely, 
11 subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
14 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
15 3. This notice may not be removed or altered from any source distribution.
16 */
17 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
18 /**
19  *      Contains source code from the article "Radix Sort Revisited".
20  *      \file           IceRevisitedRadix.cpp
21  *      \author         Pierre Terdiman
22  *      \date           April, 4, 2000
23  */
24 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
25
26 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
27 /**
28  *      Revisited Radix Sort.
29  *      This is my new radix routine:
30  *  - it uses indices and doesn't recopy the values anymore, hence wasting less ram
31  *  - it creates all the histograms in one run instead of four
32  *  - it sorts words faster than dwords and bytes faster than words
33  *  - it correctly sorts negative floating-point values by patching the offsets
34  *  - it automatically takes advantage of temporal coherence
35  *  - multiple keys support is a side effect of temporal coherence
36  *  - it may be worth recoding in asm... (mainly to use FCOMI, FCMOV, etc) [it's probably memory-bound anyway]
37  *
38  *      History:
39  *      - 08.15.98: very first version
40  *      - 04.04.00: recoded for the radix article
41  *      - 12.xx.00: code lifting
42  *      - 09.18.01: faster CHECK_PASS_VALIDITY thanks to Mark D. Shattuck (who provided other tips, not included here)
43  *      - 10.11.01: added local ram support
44  *      - 01.20.02: bugfix! In very particular cases the last pass was skipped in the float code-path, leading to incorrect sorting......
45  *      - 01.02.02:     - "mIndices" renamed => "mRanks". That's a rank sorter after all.
46  *                              - ranks are not "reset" anymore, but implicit on first calls
47  *      - 07.05.02:     - offsets rewritten with one less indirection.
48  *      - 11.03.02:     - "bool" replaced with RadixHint enum
49  *
50  *      \class          RadixSort
51  *      \author         Pierre Terdiman
52  *      \version        1.4
53  *      \date           August, 15, 1998
54  */
55 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
56
57 /*
58 To do:
59         - add an offset parameter between two input values (avoid some data recopy sometimes)
60         - unroll ? asm ?
61         - 11 bits trick & 3 passes as Michael did
62         - prefetch stuff the day I have a P3
63         - make a version with 16-bits indices ?
64 */
65
66 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
67 // Precompiled Header
68 #include "Stdafx.h"
69
70 using namespace IceCore;
71
72 #define INVALIDATE_RANKS        mCurrentSize|=0x80000000
73 #define VALIDATE_RANKS          mCurrentSize&=0x7fffffff
74 #define CURRENT_SIZE            (mCurrentSize&0x7fffffff)
75 #define INVALID_RANKS           (mCurrentSize&0x80000000)
76
77 #define CHECK_RESIZE(n)                                                                                                                                         \
78         if(n!=mPreviousSize)                                                                                                                                    \
79         {                                                                                                                                                                               \
80                                 if(n>mCurrentSize)      Resize(n);                                                                                              \
81                 else                                            ResetRanks();                                                                                   \
82                 mPreviousSize = n;                                                                                                                                      \
83         }
84
85 #define CREATE_HISTOGRAMS(type, buffer)                                                                                                         \
86         /* Clear counters/histograms */                                                                                                                 \
87         ZeroMemory(mHistogram, 256*4*sizeof(udword));                                                                                   \
88                                                                                                                                                                                         \
89         /* Prepare to count */                                                                                                                                  \
90         ubyte* p = (ubyte*)input;                                                                                                                               \
91         ubyte* pe = &p[nb*4];                                                                                                                                   \
92         udword* h0= &mHistogram[0];             /* Histogram for first pass (LSB)       */                                      \
93         udword* h1= &mHistogram[256];   /* Histogram for second pass            */                                      \
94         udword* h2= &mHistogram[512];   /* Histogram for third pass                     */                                      \
95         udword* h3= &mHistogram[768];   /* Histogram for last pass (MSB)        */                                      \
96                                                                                                                                                                                         \
97         bool AlreadySorted = true;      /* Optimism... */                                                                                       \
98                                                                                                                                                                                         \
99         if(INVALID_RANKS)                                                                                                                                               \
100         {                                                                                                                                                                               \
101                 /* Prepare for temporal coherence */                                                                                            \
102                 type* Running = (type*)buffer;                                                                                                          \
103                 type PrevVal = *Running;                                                                                                                        \
104                                                                                                                                                                                         \
105                 while(p!=pe)                                                                                                                                            \
106                 {                                                                                                                                                                       \
107                         /* Read input buffer in previous sorted order */                                                                \
108                         type Val = *Running++;                                                                                                                  \
109                         /* Check whether already sorted or not */                                                                               \
110                         if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */                               \
111                         /* Update for next iteration */                                                                                                 \
112                         PrevVal = Val;                                                                                                                                  \
113                                                                                                                                                                                         \
114                         /* Create histograms */                                                                                                                 \
115                         h0[*p++]++;     h1[*p++]++;     h2[*p++]++;     h3[*p++]++;                                                                     \
116                 }                                                                                                                                                                       \
117                                                                                                                                                                                         \
118                 /* If all input values are already sorted, we just have to return and leave the */      \
119                 /* previous list unchanged. That way the routine may take advantage of temporal */      \
120                 /* coherence, for example when used to sort transparent faces.                                  */      \
121                 if(AlreadySorted)                                                                                                                                       \
122                 {                                                                                                                                                                       \
123                         mNbHits++;                                                                                                                                              \
124                         for(udword i=0;i<nb;i++)        mRanks[i] = i;                                                                          \
125                         return *this;                                                                                                                                   \
126                 }                                                                                                                                                                       \
127         }                                                                                                                                                                               \
128         else                                                                                                                                                                    \
129         {                                                                                                                                                                               \
130                 /* Prepare for temporal coherence */                                                                                            \
131                 udword* Indices = mRanks;                                                                                                                       \
132                 type PrevVal = (type)buffer[*Indices];                                                                                          \
133                                                                                                                                                                                         \
134                 while(p!=pe)                                                                                                                                            \
135                 {                                                                                                                                                                       \
136                         /* Read input buffer in previous sorted order */                                                                \
137                         type Val = (type)buffer[*Indices++];                                                                                    \
138                         /* Check whether already sorted or not */                                                                               \
139                         if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */                               \
140                         /* Update for next iteration */                                                                                                 \
141                         PrevVal = Val;                                                                                                                                  \
142                                                                                                                                                                                         \
143                         /* Create histograms */                                                                                                                 \
144                         h0[*p++]++;     h1[*p++]++;     h2[*p++]++;     h3[*p++]++;                                                                     \
145                 }                                                                                                                                                                       \
146                                                                                                                                                                                         \
147                 /* If all input values are already sorted, we just have to return and leave the */      \
148                 /* previous list unchanged. That way the routine may take advantage of temporal */      \
149                 /* coherence, for example when used to sort transparent faces.                                  */      \
150                 if(AlreadySorted)       { mNbHits++; return *this;      }                                                                       \
151         }                                                                                                                                                                               \
152                                                                                                                                                                                         \
153         /* Else there has been an early out and we must finish computing the histograms */              \
154         while(p!=pe)                                                                                                                                                    \
155         {                                                                                                                                                                               \
156                 /* Create histograms without the previous overhead */                                                           \
157                 h0[*p++]++;     h1[*p++]++;     h2[*p++]++;     h3[*p++]++;                                                                             \
158         }
159
160 #define CHECK_PASS_VALIDITY(pass)                                                                                                                       \
161         /* Shortcut to current counters */                                                                                                              \
162         udword* CurCount = &mHistogram[pass<<8];                                                                                                \
163                                                                                                                                                                                         \
164         /* Reset flag. The sorting pass is supposed to be performed. (default) */                               \
165         bool PerformPass = true;                                                                                                                                \
166                                                                                                                                                                                         \
167         /* Check pass validity */                                                                                                                               \
168                                                                                                                                                                                         \
169         /* If all values have the same byte, sorting is useless. */                                                             \
170         /* It may happen when sorting bytes or words instead of dwords. */                                              \
171         /* This routine actually sorts words faster than dwords, and bytes */                                   \
172         /* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */                             \
173         /* for words and O(n) for bytes. Running time for floats depends on actual values... */ \
174                                                                                                                                                                                         \
175         /* Get first byte */                                                                                                                                    \
176         ubyte UniqueVal = *(((ubyte*)input)+pass);                                                                                              \
177                                                                                                                                                                                         \
178         /* Check that byte's counter */                                                                                                                 \
179         if(CurCount[UniqueVal]==nb)     PerformPass=false;
180
181 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
182 /**
183  *      Constructor.
184  */
185 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
186 RadixSort::RadixSort() : mRanks(null), mRanks2(null), mCurrentSize(0), mTotalCalls(0), mNbHits(0)
187 {
188 #ifndef RADIX_LOCAL_RAM
189         // Allocate input-independent ram
190         mHistogram      = new udword[256*4];
191         mOffset         = new udword[256];
192 #endif
193         // Initialize indices
194         INVALIDATE_RANKS;
195 }
196
197 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
198 /**
199  *      Destructor.
200  */
201 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
202 RadixSort::~RadixSort()
203 {
204         // Release everything
205 #ifndef RADIX_LOCAL_RAM
206         DELETEARRAY(mOffset);
207         DELETEARRAY(mHistogram);
208 #endif
209         DELETEARRAY(mRanks2);
210         DELETEARRAY(mRanks);
211 }
212
213 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
214 /**
215  *      Resizes the inner lists.
216  *      \param          nb      [in] new size (number of dwords)
217  *      \return         true if success
218  */
219 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
220 bool RadixSort::Resize(udword nb)
221 {
222         // Free previously used ram
223         DELETEARRAY(mRanks2);
224         DELETEARRAY(mRanks);
225
226         // Get some fresh one
227         mRanks  = new udword[nb];       CHECKALLOC(mRanks);
228         mRanks2 = new udword[nb];       CHECKALLOC(mRanks2);
229
230         return true;
231 }
232
233 inline_ void RadixSort::CheckResize(udword nb)
234 {
235         udword CurSize = CURRENT_SIZE;
236         if(nb!=CurSize)
237         {
238                 if(nb>CurSize)  Resize(nb);
239                 mCurrentSize = nb;
240                 INVALIDATE_RANKS;
241         }
242 }
243
244 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
245 /**
246  *      Main sort routine.
247  *      This one is for integer values. After the call, mRanks contains a list of indices in sorted order, i.e. in the order you may process your data.
248  *      \param          input   [in] a list of integer values to sort
249  *      \param          nb              [in] number of values to sort, must be < 2^31
250  *      \param          hint    [in] RADIX_SIGNED to handle negative values, RADIX_UNSIGNED if you know your input buffer only contains positive values
251  *      \return         Self-Reference
252  */
253 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
254 RadixSort& RadixSort::Sort(const udword* input, udword nb, RadixHint hint)
255 {
256         // Checkings
257         if(!input || !nb || nb&0x80000000)      return *this;
258
259         // Stats
260         mTotalCalls++;
261
262         // Resize lists if needed
263         CheckResize(nb);
264
265 #ifdef RADIX_LOCAL_RAM
266         // Allocate histograms & offsets on the stack
267         udword mHistogram[256*4];
268 //      udword mOffset[256];
269         udword* mLink[256];
270 #endif
271
272         // Create histograms (counters). Counters for all passes are created in one run.
273         // Pros:        read input buffer once instead of four times
274         // Cons:        mHistogram is 4Kb instead of 1Kb
275         // We must take care of signed/unsigned values for temporal coherence.... I just
276         // have 2 code paths even if just a single opcode changes. Self-modifying code, someone?
277         if(hint==RADIX_UNSIGNED)        { CREATE_HISTOGRAMS(udword, input);     }
278         else                                            { CREATE_HISTOGRAMS(sdword, input);     }
279
280         // Compute #negative values involved if needed
281         udword NbNegativeValues = 0;
282         if(hint==RADIX_SIGNED)
283         {
284                 // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
285                 // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
286                 // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
287                 udword* h3= &mHistogram[768];
288                 for(udword i=128;i<256;i++)     NbNegativeValues += h3[i];      // 768 for last histogram, 128 for negative part
289         }
290
291         // Radix sort, j is the pass number (0=LSB, 3=MSB)
292         for(udword j=0;j<4;j++)
293         {
294                 CHECK_PASS_VALIDITY(j);
295
296                 // Sometimes the fourth (negative) pass is skipped because all numbers are negative and the MSB is 0xFF (for example). This is
297                 // not a problem, numbers are correctly sorted anyway.
298                 if(PerformPass)
299                 {
300                         // Should we care about negative values?
301                         if(j!=3 || hint==RADIX_UNSIGNED)
302                         {
303                                 // Here we deal with positive values only
304
305                                 // Create offsets
306 //                              mOffset[0] = 0;
307 //                              for(udword i=1;i<256;i++)               mOffset[i] = mOffset[i-1] + CurCount[i-1];
308                                 mLink[0] = mRanks2;
309                                 for(udword i=1;i<256;i++)               mLink[i] = mLink[i-1] + CurCount[i-1];
310                         }
311                         else
312                         {
313                                 // This is a special case to correctly handle negative integers. They're sorted in the right order but at the wrong place.
314
315                                 // Create biased offsets, in order for negative numbers to be sorted as well
316 //                              mOffset[0] = NbNegativeValues;                                                                                          // First positive number takes place after the negative ones
317                                 mLink[0] = &mRanks2[NbNegativeValues];                                                                          // First positive number takes place after the negative ones
318 //                              for(udword i=1;i<128;i++)               mOffset[i] = mOffset[i-1] + CurCount[i-1];      // 1 to 128 for positive numbers
319                                 for(udword i=1;i<128;i++)               mLink[i] = mLink[i-1] + CurCount[i-1];          // 1 to 128 for positive numbers
320
321                                 // Fixing the wrong place for negative values
322 //                              mOffset[128] = 0;
323                                 mLink[128] = mRanks2;
324 //                              for(i=129;i<256;i++)                    mOffset[i] = mOffset[i-1] + CurCount[i-1];
325                                 for(udword i=129;i<256;i++)             mLink[i] = mLink[i-1] + CurCount[i-1];
326                         }
327
328                         // Perform Radix Sort
329                         ubyte* InputBytes       = (ubyte*)input;
330                         InputBytes += j;
331                         if(INVALID_RANKS)
332                         {
333 //                              for(udword i=0;i<nb;i++)        mRanks2[mOffset[InputBytes[i<<2]]++] = i;
334                                 for(udword i=0;i<nb;i++)        *mLink[InputBytes[i<<2]]++ = i;
335                                 VALIDATE_RANKS;
336                         }
337                         else
338                         {
339                                 udword* Indices         = mRanks;
340                                 udword* IndicesEnd      = &mRanks[nb];
341                                 while(Indices!=IndicesEnd)
342                                 {
343                                         udword id = *Indices++;
344 //                                      mRanks2[mOffset[InputBytes[id<<2]]++] = id;
345                                         *mLink[InputBytes[id<<2]]++ = id;
346                                 }
347                         }
348
349                         // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
350                         udword* Tmp     = mRanks;       mRanks = mRanks2; mRanks2 = Tmp;
351                 }
352         }
353         return *this;
354 }
355
356 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
357 /**
358  *      Main sort routine.
359  *      This one is for floating-point values. After the call, mRanks contains a list of indices in sorted order, i.e. in the order you may process your data.
360  *      \param          input                   [in] a list of floating-point values to sort
361  *      \param          nb                              [in] number of values to sort, must be < 2^31
362  *      \return         Self-Reference
363  *      \warning        only sorts IEEE floating-point values
364  */
365 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
366 RadixSort& RadixSort::Sort(const float* input2, udword nb)
367 {
368         // Checkings
369         if(!input2 || !nb || nb&0x80000000)     return *this;
370
371         // Stats
372         mTotalCalls++;
373
374         udword* input = (udword*)input2;
375
376         // Resize lists if needed
377         CheckResize(nb);
378
379 #ifdef RADIX_LOCAL_RAM
380         // Allocate histograms & offsets on the stack
381         udword mHistogram[256*4];
382 //      udword mOffset[256];
383         udword* mLink[256];
384 #endif
385
386         // Create histograms (counters). Counters for all passes are created in one run.
387         // Pros:        read input buffer once instead of four times
388         // Cons:        mHistogram is 4Kb instead of 1Kb
389         // Floating-point values are always supposed to be signed values, so there's only one code path there.
390         // Please note the floating point comparison needed for temporal coherence! Although the resulting asm code
391         // is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first
392         // generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
393         // wouldn't work with mixed positive/negative values....
394         { CREATE_HISTOGRAMS(float, input2); }
395
396         // Compute #negative values involved if needed
397         udword NbNegativeValues = 0;
398         // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
399         // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
400         // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
401         udword* h3= &mHistogram[768];
402         for(udword i=128;i<256;i++)     NbNegativeValues += h3[i];      // 768 for last histogram, 128 for negative part
403
404         // Radix sort, j is the pass number (0=LSB, 3=MSB)
405         for(udword j=0;j<4;j++)
406         {
407                 // Should we care about negative values?
408                 if(j!=3)
409                 {
410                         // Here we deal with positive values only
411                         CHECK_PASS_VALIDITY(j);
412
413                         if(PerformPass)
414                         {
415                                 // Create offsets
416 //                              mOffset[0] = 0;
417                                 mLink[0] = mRanks2;
418 //                              for(udword i=1;i<256;i++)               mOffset[i] = mOffset[i-1] + CurCount[i-1];
419                                 for(udword i=1;i<256;i++)               mLink[i] = mLink[i-1] + CurCount[i-1];
420
421                                 // Perform Radix Sort
422                                 ubyte* InputBytes = (ubyte*)input;
423                                 InputBytes += j;
424                                 if(INVALID_RANKS)
425                                 {
426 //                                      for(i=0;i<nb;i++)       mRanks2[mOffset[InputBytes[i<<2]]++] = i;
427                                         for(udword i=0;i<nb;i++)        *mLink[InputBytes[i<<2]]++ = i;
428                                         VALIDATE_RANKS;
429                                 }
430                                 else
431                                 {
432                                         udword* Indices         = mRanks;
433                                         udword* IndicesEnd      = &mRanks[nb];
434                                         while(Indices!=IndicesEnd)
435                                         {
436                                                 udword id = *Indices++;
437 //                                              mRanks2[mOffset[InputBytes[id<<2]]++] = id;
438                                                 *mLink[InputBytes[id<<2]]++ = id;
439                                         }
440                                 }
441
442                                 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
443                                 udword* Tmp     = mRanks;       mRanks = mRanks2; mRanks2 = Tmp;
444                         }
445                 }
446                 else
447                 {
448                         // This is a special case to correctly handle negative values
449                         CHECK_PASS_VALIDITY(j);
450
451                         if(PerformPass)
452                         {
453                                 // Create biased offsets, in order for negative numbers to be sorted as well
454 //                              mOffset[0] = NbNegativeValues;                                                                                          // First positive number takes place after the negative ones
455                                 mLink[0] = &mRanks2[NbNegativeValues];                                                                          // First positive number takes place after the negative ones
456 //                              for(udword i=1;i<128;i++)               mOffset[i] = mOffset[i-1] + CurCount[i-1];      // 1 to 128 for positive numbers
457                                 for(udword i=1;i<128;i++)               mLink[i] = mLink[i-1] + CurCount[i-1];          // 1 to 128 for positive numbers
458
459                                 // We must reverse the sorting order for negative numbers!
460 //                              mOffset[255] = 0;
461                                 mLink[255] = mRanks2;
462 //                              for(i=0;i<127;i++)              mOffset[254-i] = mOffset[255-i] + CurCount[255-i];      // Fixing the wrong order for negative values
463                                 for(udword i=0;i<127;i++)       mLink[254-i] = mLink[255-i] + CurCount[255-i];          // Fixing the wrong order for negative values
464 //                              for(i=128;i<256;i++)    mOffset[i] += CurCount[i];                                                      // Fixing the wrong place for negative values
465                                 for(udword i=128;i<256;i++)     mLink[i] += CurCount[i];                                                        // Fixing the wrong place for negative values
466
467                                 // Perform Radix Sort
468                                 if(INVALID_RANKS)
469                                 {
470                                         for(udword i=0;i<nb;i++)
471                                         {
472                                                 udword Radix = input[i]>>24;                                                    // Radix byte, same as above. AND is useless here (udword).
473                                                 // ### cmp to be killed. Not good. Later.
474 //                                              if(Radix<128)           mRanks2[mOffset[Radix]++] = i;          // Number is positive, same as above
475 //                                              else                            mRanks2[--mOffset[Radix]] = i;          // Number is negative, flip the sorting order
476                                                 if(Radix<128)           *mLink[Radix]++ = i;            // Number is positive, same as above
477                                                 else                            *(--mLink[Radix]) = i;          // Number is negative, flip the sorting order
478                                         }
479                                         VALIDATE_RANKS;
480                                 }
481                                 else
482                                 {
483                                         for(udword i=0;i<nb;i++)
484                                         {
485                                                 udword Radix = input[mRanks[i]]>>24;                                                    // Radix byte, same as above. AND is useless here (udword).
486                                                 // ### cmp to be killed. Not good. Later.
487 //                                              if(Radix<128)           mRanks2[mOffset[Radix]++] = mRanks[i];          // Number is positive, same as above
488 //                                              else                            mRanks2[--mOffset[Radix]] = mRanks[i];          // Number is negative, flip the sorting order
489                                                 if(Radix<128)           *mLink[Radix]++ = mRanks[i];            // Number is positive, same as above
490                                                 else                            *(--mLink[Radix]) = mRanks[i];          // Number is negative, flip the sorting order
491                                         }
492                                 }
493                                 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
494                                 udword* Tmp     = mRanks;       mRanks = mRanks2; mRanks2 = Tmp;
495                         }
496                         else
497                         {
498                                 // The pass is useless, yet we still have to reverse the order of current list if all values are negative.
499                                 if(UniqueVal>=128)
500                                 {
501                                         if(INVALID_RANKS)
502                                         {
503                                                 // ###Possible?
504                                                 for(udword i=0;i<nb;i++)        mRanks2[i] = nb-i-1;
505                                                 VALIDATE_RANKS;
506                                         }
507                                         else
508                                         {
509                                                 for(udword i=0;i<nb;i++)        mRanks2[i] = mRanks[nb-i-1];
510                                         }
511
512                                         // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
513                                         udword* Tmp     = mRanks;       mRanks = mRanks2; mRanks2 = Tmp;
514                                 }
515                         }
516                 }
517         }
518         return *this;
519 }
520
521 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
522 /**
523  *      Gets the ram used.
524  *      \return         memory used in bytes
525  */
526 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
527 udword RadixSort::GetUsedRam() const
528 {
529         udword UsedRam = sizeof(RadixSort);
530 #ifndef RADIX_LOCAL_RAM
531         UsedRam += 256*4*sizeof(udword);                        // Histograms
532         UsedRam += 256*sizeof(udword);                          // Offsets
533 #endif
534         UsedRam += 2*CURRENT_SIZE*sizeof(udword);       // 2 lists of indices
535         return UsedRam;
536 }