1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2013-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * collationrootelements.cpp
10 * created on: 2013mar05
11 * created by: Markus W. Scherer
14 #include "unicode/utypes.h"
16 #if !UCONFIG_NO_COLLATION
18 #include "collation.h"
19 #include "collationrootelements.h"
25 CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const {
26 if(p == 0) { return 0; }
27 U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]);
28 int32_t index = findP(p);
29 uint32_t q = elements[index];
31 if(p == (q & 0xffffff00)) {
32 // p == elements[index] is a root primary. Find the CE before it.
33 // We must not be in a primary range.
34 U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
35 secTer = elements[index - 1];
36 if((secTer & SEC_TER_DELTA_FLAG) == 0) {
37 // Primary CE just before p.
38 p = secTer & 0xffffff00;
39 secTer = Collation::COMMON_SEC_AND_TER_CE;
41 // secTer = last secondary & tertiary for the previous primary
45 if((p & SEC_TER_DELTA_FLAG) == 0) {
53 // p > elements[index] which is the previous primary.
54 // Find the last secondary & tertiary weights for it.
56 secTer = Collation::COMMON_SEC_AND_TER_CE;
58 q = elements[++index];
59 if((q & SEC_TER_DELTA_FLAG) == 0) {
60 // We must not be in a primary range.
61 U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
67 return ((int64_t)p << 32) | (secTer & ~SEC_TER_DELTA_FLAG);
71 CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const {
72 if(p == 0) { return 0; }
73 int32_t index = findP(p);
74 if(p != (elements[index] & 0xffffff00)) {
76 p = elements[++index];
77 if((p & SEC_TER_DELTA_FLAG) == 0) {
78 // First primary after p. We must not be in a primary range.
79 U_ASSERT((p & PRIMARY_STEP_MASK) == 0);
84 // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
85 return ((int64_t)p << 32) | Collation::COMMON_SEC_AND_TER_CE;
89 CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const {
90 int32_t index = findPrimary(p);
92 uint32_t q = elements[index];
93 if(p == (q & 0xffffff00)) {
94 // Found p itself. Return the previous primary.
95 // See if p is at the end of a previous range.
96 step = (int32_t)q & PRIMARY_STEP_MASK;
98 // p is not at the end of a range. Look for the previous primary.
100 p = elements[--index];
101 } while((p & SEC_TER_DELTA_FLAG) != 0);
102 return p & 0xffffff00;
105 // p is in a range, and not at the start.
106 uint32_t nextElement = elements[index + 1];
107 U_ASSERT(isEndOfPrimaryRange(nextElement));
108 step = (int32_t)nextElement & PRIMARY_STEP_MASK;
110 // Return the previous range primary.
111 if((p & 0xffff) == 0) {
112 return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step);
114 return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step);
119 CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const {
121 uint32_t previousSec, sec;
123 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
124 // Gap at the beginning of the secondary CE range.
126 sec = elements[index] >> 16;
128 index = findPrimary(p) + 1;
129 previousSec = Collation::BEFORE_WEIGHT16;
130 sec = getFirstSecTerForPrimary(index) >> 16;
135 U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
136 sec = elements[index++] >> 16;
143 CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const {
144 U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0);
146 uint32_t previousTer, secTer;
149 index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX];
150 // Gap at the beginning of the tertiary CE range.
153 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
154 previousTer = Collation::BEFORE_WEIGHT16;
156 secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
158 index = findPrimary(p) + 1;
159 previousTer = Collation::BEFORE_WEIGHT16;
160 secTer = getFirstSecTerForPrimary(index);
162 uint32_t st = (s << 16) | t;
164 if((secTer >> 16) == s) { previousTer = secTer; }
165 U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
166 secTer = elements[index++] & ~SEC_TER_DELTA_FLAG;
168 U_ASSERT(secTer == st);
169 return previousTer & 0xffff;
173 CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const {
174 U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1]));
175 uint32_t q = elements[++index];
177 if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int32_t)q & PRIMARY_STEP_MASK) != 0) {
178 // Return the next primary in this range.
179 if((p & 0xffff) == 0) {
180 return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step);
182 return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step);
185 // Return the next primary in the list.
186 while((q & SEC_TER_DELTA_FLAG) != 0) {
187 q = elements[++index];
189 U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
195 CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const {
201 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
202 secTer = elements[index];
203 // Gap at the end of the secondary CE range.
206 U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
207 secTer = getFirstSecTerForPrimary(index + 1);
208 // If this is an explicit sec/ter unit, then it will be read once more.
209 // Gap for secondaries of primary CEs.
210 secLimit = getSecondaryBoundary();
213 uint32_t sec = secTer >> 16;
214 if(sec > s) { return sec; }
215 secTer = elements[++index];
216 if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
221 CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const {
228 index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX];
229 // Gap at the end of the tertiary CE range.
232 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
233 // Gap for tertiaries of primary/secondary CEs.
234 terLimit = getTertiaryBoundary();
236 secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
238 U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
239 secTer = getFirstSecTerForPrimary(index + 1);
240 // If this is an explicit sec/ter unit, then it will be read once more.
241 terLimit = getTertiaryBoundary();
243 uint32_t st = (s << 16) | t;
246 U_ASSERT((secTer >> 16) == s);
247 return secTer & 0xffff;
249 secTer = elements[++index];
250 // No tertiary greater than t for this primary+secondary.
251 if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
252 secTer &= ~SEC_TER_DELTA_FLAG;
257 CollationRootElements::getFirstSecTerForPrimary(int32_t index) const {
258 uint32_t secTer = elements[index];
259 if((secTer & SEC_TER_DELTA_FLAG) == 0) {
261 return Collation::COMMON_SEC_AND_TER_CE;
263 secTer &= ~SEC_TER_DELTA_FLAG;
264 if(secTer > Collation::COMMON_SEC_AND_TER_CE) {
266 return Collation::COMMON_SEC_AND_TER_CE;
268 // Explicit sec/ter below common/common.
273 CollationRootElements::findPrimary(uint32_t p) const {
274 // Requirement: p must occur as a root primary.
275 U_ASSERT((p & 0xff) == 0); // at most a 3-byte primary
276 int32_t index = findP(p);
277 // If p is in a range, then we just assume that p is an actual primary in this range.
278 // (Too cumbersome/expensive to check.)
279 // Otherwise, it must be an exact match.
280 U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00));
285 CollationRootElements::findP(uint32_t p) const {
286 // p need not occur as a root primary.
287 // For example, it might be a reordering group boundary.
288 U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE);
289 // modified binary search
290 int32_t start = (int32_t)elements[IX_FIRST_PRIMARY_INDEX];
291 U_ASSERT(p >= elements[start]);
292 int32_t limit = length - 1;
293 U_ASSERT(elements[limit] >= PRIMARY_SENTINEL);
294 U_ASSERT(p < elements[limit]);
295 while((start + 1) < limit) {
296 // Invariant: elements[start] and elements[limit] are primaries,
297 // and elements[start]<=p<=elements[limit].
298 int32_t i = (start + limit) / 2;
299 uint32_t q = elements[i];
300 if((q & SEC_TER_DELTA_FLAG) != 0) {
301 // Find the next primary.
304 if(j == limit) { break; }
306 if((q & SEC_TER_DELTA_FLAG) == 0) {
312 if((q & SEC_TER_DELTA_FLAG) != 0) {
313 // Find the preceding primary.
316 if(j == start) { break; }
318 if((q & SEC_TER_DELTA_FLAG) == 0) {
324 if((q & SEC_TER_DELTA_FLAG) != 0) {
325 // No primary between start and limit.
330 if(p < (q & 0xffffff00)) { // Reset the "step" bits of a range end primary.
341 #endif // !UCONFIG_NO_COLLATION