1 /* BwtSort.c -- BWT block sorting
\r
9 /* #define BLOCK_SORT_USE_HEAP_SORT */
\r
11 #define NO_INLINE MY_FAST_CALL
\r
13 /* Don't change it !!! */
\r
14 #define kNumHashBytes 2
\r
15 #define kNumHashValues (1 << (kNumHashBytes * 8))
\r
17 /* kNumRefBitsMax must be < (kNumHashBytes * 8) = 16 */
\r
18 #define kNumRefBitsMax 12
\r
20 #define BS_TEMP_SIZE kNumHashValues
\r
22 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
\r
24 /* 32 Flags in UInt32 word */
\r
25 #define kNumFlagsBits 5
\r
26 #define kNumFlagsInWord (1 << kNumFlagsBits)
\r
27 #define kFlagsMask (kNumFlagsInWord - 1)
\r
28 #define kAllFlags 0xFFFFFFFF
\r
32 #define kNumBitsMax 20
\r
33 #define kIndexMask ((1 << kNumBitsMax) - 1)
\r
34 #define kNumExtraBits (32 - kNumBitsMax)
\r
35 #define kNumExtra0Bits (kNumExtraBits - 2)
\r
36 #define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1)
\r
38 #define SetFinishedGroupSize(p, size) \
\r
39 { *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \
\r
40 if ((size) > (1 << kNumExtra0Bits)) { \
\r
41 *(p) |= 0x40000000; *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \
\r
43 static void SetGroupSize(UInt32 *p, UInt32 size)
\r
47 *p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax);
\r
48 if (size >= (1 << kNumExtra0Bits))
\r
51 p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax);
\r
58 SortGroup - is recursive Range-Sort function with HeapSort optimization for small blocks
\r
59 "range" is not real range. It's only for optimization.
\r
60 returns: 1 - if there are groups, 0 - no more groups
\r
63 UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices
\r
64 #ifndef BLOCK_SORT_USE_HEAP_SORT
\r
65 , UInt32 left, UInt32 range
\r
69 UInt32 *ind2 = Indices + groupOffset;
\r
74 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
\r
75 SetFinishedGroupSize(ind2, 1);
\r
80 Groups = Indices + BlockSize + BS_TEMP_SIZE;
\r
81 if (groupSize <= ((UInt32)1 << NumRefBits)
\r
82 #ifndef BLOCK_SORT_USE_HEAP_SORT
\r
83 && groupSize <= range
\r
87 UInt32 *temp = Indices + BlockSize;
\r
89 UInt32 mask, thereAreGroups, group, cg;
\r
94 UInt32 sp = ind2[0] + NumSortedBytes;
\r
95 if (sp >= BlockSize) sp -= BlockSize;
\r
97 temp[0] = (gPrev << NumRefBits);
\r
100 for (j = 1; j < groupSize; j++)
\r
102 UInt32 sp = ind2[j] + NumSortedBytes;
\r
104 if (sp >= BlockSize) sp -= BlockSize;
\r
106 temp[j] = (g << NumRefBits) | j;
\r
107 gRes |= (gPrev ^ g);
\r
111 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
\r
112 SetGroupSize(ind2, groupSize);
\r
118 HeapSort(temp, groupSize);
\r
119 mask = ((1 << NumRefBits) - 1);
\r
120 thereAreGroups = 0;
\r
122 group = groupOffset;
\r
123 cg = (temp[0] >> NumRefBits);
\r
124 temp[0] = ind2[temp[0] & mask];
\r
127 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
\r
128 UInt32 *Flags = Groups + BlockSize;
\r
130 UInt32 prevGroupStart = 0;
\r
133 for (j = 1; j < groupSize; j++)
\r
135 UInt32 val = temp[j];
\r
136 UInt32 cgCur = (val >> NumRefBits);
\r
141 group = groupOffset + j;
\r
143 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
\r
145 UInt32 t = group - 1;
\r
146 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
\r
149 SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
\r
150 prevGroupStart = j;
\r
154 thereAreGroups = 1;
\r
156 UInt32 ind = ind2[val & mask];
\r
158 Groups[ind] = group;
\r
162 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
\r
163 SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
\r
167 for (j = 0; j < groupSize; j++)
\r
169 return thereAreGroups;
\r
172 /* Check that all strings are in one group (cannot sort) */
\r
175 UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
\r
176 group = Groups[sp];
\r
177 for (j = 1; j < groupSize; j++)
\r
179 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
\r
180 if (Groups[sp] != group)
\r
183 if (j == groupSize)
\r
185 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
\r
186 SetGroupSize(ind2, groupSize);
\r
192 #ifndef BLOCK_SORT_USE_HEAP_SORT
\r
194 /* ---------- Range Sort ---------- */
\r
202 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
\r
203 SetGroupSize(ind2, groupSize);
\r
207 mid = left + ((range + 1) >> 1);
\r
212 UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
\r
213 if (Groups[sp] >= mid)
\r
215 for (j--; j > i; j--)
\r
217 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
\r
218 if (Groups[sp] < mid)
\r
220 UInt32 temp = ind2[i]; ind2[i] = ind2[j]; ind2[j] = temp;
\r
231 range = range - (mid - left);
\r
234 else if (i == groupSize)
\r
235 range = (mid - left);
\r
240 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
\r
242 UInt32 t = (groupOffset + i - 1);
\r
243 UInt32 *Flags = Groups + BlockSize;
\r
244 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
\r
250 for (j = i; j < groupSize; j++)
\r
251 Groups[ind2[j]] = groupOffset + i;
\r
255 UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left);
\r
256 return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left));
\r
263 /* ---------- Heap Sort ---------- */
\r
267 for (j = 0; j < groupSize; j++)
\r
269 UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
\r
273 HeapSortRef(ind2, Groups, groupSize);
\r
277 UInt32 sp = ind2[0];
\r
278 UInt32 group = Groups[sp];
\r
280 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
\r
281 UInt32 *Flags = Groups + BlockSize;
\r
283 UInt32 prevGroupStart = 0;
\r
286 for (j = 1; j < groupSize; j++)
\r
289 if (Groups[sp] != group)
\r
291 group = Groups[sp];
\r
292 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
\r
294 UInt32 t = groupOffset + j - 1;
\r
295 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
\r
298 SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
\r
299 prevGroupStart = j;
\r
304 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
\r
305 SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
\r
309 /* Write new Groups values and Check that there are groups */
\r
310 UInt32 thereAreGroups = 0;
\r
311 for (j = 0; j < groupSize; j++)
\r
313 UInt32 group = groupOffset + j;
\r
314 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
\r
315 UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax);
\r
316 if ((ind2[j] & 0x40000000) != 0)
\r
317 subGroupSize += ((ind2[j + 1] >> kNumBitsMax) << kNumExtra0Bits);
\r
321 UInt32 original = ind2[j];
\r
322 UInt32 sp = original & kIndexMask;
\r
323 if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
\r
324 ind2[j] = sp | (original & ~kIndexMask);
\r
325 Groups[sp] = group;
\r
326 if (--subGroupSize == 0)
\r
329 thereAreGroups = 1;
\r
332 UInt32 *Flags = Groups + BlockSize;
\r
335 UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
\r
337 Groups[sp] = group;
\r
338 if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0)
\r
341 thereAreGroups = 1;
\r
345 return thereAreGroups;
\r
351 /* conditions: blockSize > 0 */
\r
352 UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize)
\r
354 UInt32 *counters = Indices + blockSize;
\r
357 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
\r
361 /* Radix-Sort for 2 bytes */
\r
362 for (i = 0; i < kNumHashValues; i++)
\r
364 for (i = 0; i < blockSize - 1; i++)
\r
365 counters[((UInt32)data[i] << 8) | data[i + 1]]++;
\r
366 counters[((UInt32)data[i] << 8) | data[0]]++;
\r
368 Groups = counters + BS_TEMP_SIZE;
\r
369 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
\r
370 Flags = Groups + blockSize;
\r
372 UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits;
\r
373 for (i = 0; i < numWords; i++)
\r
374 Flags[i] = kAllFlags;
\r
380 for (i = 0; i < kNumHashValues; i++)
\r
382 UInt32 groupSize = counters[i];
\r
385 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
\r
386 UInt32 t = sum + groupSize - 1;
\r
387 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
\r
391 counters[i] = sum - groupSize;
\r
394 for (i = 0; i < blockSize - 1; i++)
\r
395 Groups[i] = counters[((UInt32)data[i] << 8) | data[i + 1]];
\r
396 Groups[i] = counters[((UInt32)data[i] << 8) | data[0]];
\r
398 for (i = 0; i < blockSize - 1; i++)
\r
399 Indices[counters[((UInt32)data[i] << 8) | data[i + 1]]++] = i;
\r
400 Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i;
\r
402 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
\r
405 for (i = 0; i < kNumHashValues; i++)
\r
407 UInt32 prevGroupSize = counters[i] - prev;
\r
408 if (prevGroupSize == 0)
\r
410 SetGroupSize(Indices + prev, prevGroupSize);
\r
411 prev = counters[i];
\r
419 UInt32 NumSortedBytes;
\r
420 for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++);
\r
421 NumRefBits = 32 - NumRefBits;
\r
422 if (NumRefBits > kNumRefBitsMax)
\r
423 NumRefBits = kNumRefBitsMax;
\r
425 for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1)
\r
427 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
\r
428 UInt32 finishedGroupSize = 0;
\r
430 UInt32 newLimit = 0;
\r
431 for (i = 0; i < blockSize;)
\r
434 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
\r
436 if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0)
\r
441 for (groupSize = 1;
\r
442 (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0;
\r
449 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
\r
451 Bool finishedGroup = ((Indices[i] & 0x80000000) == 0);
\r
452 if ((Indices[i] & 0x40000000) != 0)
\r
454 groupSize += ((Indices[i + 1] >> kNumBitsMax) << kNumExtra0Bits);
\r
455 Indices[i + 1] &= kIndexMask;
\r
457 Indices[i] &= kIndexMask;
\r
459 if (finishedGroup || groupSize == 1)
\r
461 Indices[i - finishedGroupSize] &= kIndexMask;
\r
462 if (finishedGroupSize > 1)
\r
463 Indices[i - finishedGroupSize + 1] &= kIndexMask;
\r
465 UInt32 newGroupSize = groupSize + finishedGroupSize;
\r
466 SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize);
\r
467 finishedGroupSize = newGroupSize;
\r
472 finishedGroupSize = 0;
\r
477 if (NumSortedBytes >= blockSize)
\r
480 for (j = 0; j < groupSize; j++)
\r
482 UInt32 t = (i + j);
\r
483 /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */
\r
484 Groups[Indices[t]] = t;
\r
488 if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices
\r
489 #ifndef BLOCK_SORT_USE_HEAP_SORT
\r
493 newLimit = i + groupSize;
\r
500 #ifndef BLOCK_SORT_EXTERNAL_FLAGS
\r
501 for (i = 0; i < blockSize;)
\r
503 UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
\r
504 if ((Indices[i] & 0x40000000) != 0)
\r
506 groupSize += ((Indices[i + 1] >> kNumBitsMax) << kNumExtra0Bits);
\r
507 Indices[i + 1] &= kIndexMask;
\r
509 Indices[i] &= kIndexMask;
\r