Imported Upstream version 58.1
[platform/upstream/icu.git] / source / i18n / collationsettings.cpp
1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2013-2015, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 * collationsettings.cpp
9 *
10 * created on: 2013feb07
11 * created by: Markus W. Scherer
12 */
13
14 #include "unicode/utypes.h"
15
16 #if !UCONFIG_NO_COLLATION
17
18 #include "unicode/ucol.h"
19 #include "cmemory.h"
20 #include "collation.h"
21 #include "collationdata.h"
22 #include "collationsettings.h"
23 #include "sharedobject.h"
24 #include "uassert.h"
25 #include "umutex.h"
26 #include "uvectr32.h"
27
28 U_NAMESPACE_BEGIN
29
30 CollationSettings::CollationSettings(const CollationSettings &other)
31         : SharedObject(other),
32           options(other.options), variableTop(other.variableTop),
33           reorderTable(NULL),
34           minHighNoReorder(other.minHighNoReorder),
35           reorderRanges(NULL), reorderRangesLength(0),
36           reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
37           fastLatinOptions(other.fastLatinOptions) {
38     UErrorCode errorCode = U_ZERO_ERROR;
39     copyReorderingFrom(other, errorCode);
40     if(fastLatinOptions >= 0) {
41         uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries));
42     }
43 }
44
45 CollationSettings::~CollationSettings() {
46     if(reorderCodesCapacity != 0) {
47         uprv_free(const_cast<int32_t *>(reorderCodes));
48     }
49 }
50
51 UBool
52 CollationSettings::operator==(const CollationSettings &other) const {
53     if(options != other.options) { return FALSE; }
54     if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; }
55     if(reorderCodesLength != other.reorderCodesLength) { return FALSE; }
56     for(int32_t i = 0; i < reorderCodesLength; ++i) {
57         if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; }
58     }
59     return TRUE;
60 }
61
62 int32_t
63 CollationSettings::hashCode() const {
64     int32_t h = options << 8;
65     if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; }
66     h ^= reorderCodesLength;
67     for(int32_t i = 0; i < reorderCodesLength; ++i) {
68         h ^= (reorderCodes[i] << i);
69     }
70     return h;
71 }
72
73 void
74 CollationSettings::resetReordering() {
75     // When we turn off reordering, we want to set a NULL permutation
76     // rather than a no-op permutation.
77     // Keep the memory via reorderCodes and its capacity.
78     reorderTable = NULL;
79     minHighNoReorder = 0;
80     reorderRangesLength = 0;
81     reorderCodesLength = 0;
82 }
83
84 void
85 CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length,
86                                    const uint32_t *ranges, int32_t rangesLength,
87                                    const uint8_t *table, UErrorCode &errorCode) {
88     if(U_FAILURE(errorCode)) { return; }
89     if(table != NULL &&
90             (rangesLength == 0 ?
91                     !reorderTableHasSplitBytes(table) :
92                     rangesLength >= 2 &&
93                     // The first offset must be 0. The last offset must not be 0.
94                     (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) {
95         // We need to release the memory before setting the alias pointer.
96         if(reorderCodesCapacity != 0) {
97             uprv_free(const_cast<int32_t *>(reorderCodes));
98             reorderCodesCapacity = 0;
99         }
100         reorderTable = table;
101         reorderCodes = codes;
102         reorderCodesLength = length;
103         // Drop ranges before the first split byte. They are reordered by the table.
104         // This then speeds up reordering of the remaining ranges.
105         int32_t firstSplitByteRangeIndex = 0;
106         while(firstSplitByteRangeIndex < rangesLength &&
107                 (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
108             // The second byte of the primary limit is 0.
109             ++firstSplitByteRangeIndex;
110         }
111         if(firstSplitByteRangeIndex == rangesLength) {
112             U_ASSERT(!reorderTableHasSplitBytes(table));
113             minHighNoReorder = 0;
114             reorderRanges = NULL;
115             reorderRangesLength = 0;
116         } else {
117             U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0);
118             minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
119             reorderRanges = ranges + firstSplitByteRangeIndex;
120             reorderRangesLength = rangesLength - firstSplitByteRangeIndex;
121         }
122         return;
123     }
124     // Regenerate missing data.
125     setReordering(data, codes, length, errorCode);
126 }
127
128 void
129 CollationSettings::setReordering(const CollationData &data,
130                                  const int32_t *codes, int32_t codesLength,
131                                  UErrorCode &errorCode) {
132     if(U_FAILURE(errorCode)) { return; }
133     if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) {
134         resetReordering();
135         return;
136     }
137     UVector32 rangesList(errorCode);
138     data.makeReorderRanges(codes, codesLength, rangesList, errorCode);
139     if(U_FAILURE(errorCode)) { return; }
140     int32_t rangesLength = rangesList.size();
141     if(rangesLength == 0) {
142         resetReordering();
143         return;
144     }
145     const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer());
146     // ranges[] contains at least two (limit, offset) pairs.
147     // The first offset must be 0. The last offset must not be 0.
148     // Separators (at the low end) and trailing weights (at the high end)
149     // are never reordered.
150     U_ASSERT(rangesLength >= 2);
151     U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
152     minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
153
154     // Write the lead byte permutation table.
155     // Set a 0 for each lead byte that has a range boundary in the middle.
156     uint8_t table[256];
157     int32_t b = 0;
158     int32_t firstSplitByteRangeIndex = -1;
159     for(int32_t i = 0; i < rangesLength; ++i) {
160         uint32_t pair = ranges[i];
161         int32_t limit1 = (int32_t)(pair >> 24);
162         while(b < limit1) {
163             table[b] = (uint8_t)(b + pair);
164             ++b;
165         }
166         // Check the second byte of the limit.
167         if((pair & 0xff0000) != 0) {
168             table[limit1] = 0;
169             b = limit1 + 1;
170             if(firstSplitByteRangeIndex < 0) {
171                 firstSplitByteRangeIndex = i;
172             }
173         }
174     }
175     while(b <= 0xff) {
176         table[b] = (uint8_t)b;
177         ++b;
178     }
179     if(firstSplitByteRangeIndex < 0) {
180         // The lead byte permutation table alone suffices for reordering.
181         rangesLength = 0;
182     } else {
183         // Remove the ranges below the first split byte.
184         ranges += firstSplitByteRangeIndex;
185         rangesLength -= firstSplitByteRangeIndex;
186     }
187     setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode);
188 }
189
190 void
191 CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength,
192                                     const uint32_t *ranges, int32_t rangesLength,
193                                     const uint8_t *table, UErrorCode &errorCode) {
194     if(U_FAILURE(errorCode)) { return; }
195     int32_t *ownedCodes;
196     int32_t totalLength = codesLength + rangesLength;
197     U_ASSERT(totalLength > 0);
198     if(totalLength <= reorderCodesCapacity) {
199         ownedCodes = const_cast<int32_t *>(reorderCodes);
200     } else {
201         // Allocate one memory block for the codes, the ranges, and the 16-aligned table.
202         int32_t capacity = (totalLength + 3) & ~3;  // round up to a multiple of 4 ints
203         ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256);
204         if(ownedCodes == NULL) {
205             resetReordering();
206             errorCode = U_MEMORY_ALLOCATION_ERROR;
207             return;
208         }
209         if(reorderCodesCapacity != 0) {
210             uprv_free(const_cast<int32_t *>(reorderCodes));
211         }
212         reorderCodes = ownedCodes;
213         reorderCodesCapacity = capacity;
214     }
215     uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256);
216     uprv_memcpy(ownedCodes, codes, codesLength * 4);
217     uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4);
218     reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity);
219     reorderCodesLength = codesLength;
220     reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength;
221     reorderRangesLength = rangesLength;
222 }
223
224 void
225 CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) {
226     if(U_FAILURE(errorCode)) { return; }
227     if(!other.hasReordering()) {
228         resetReordering();
229         return;
230     }
231     minHighNoReorder = other.minHighNoReorder;
232     if(other.reorderCodesCapacity == 0) {
233         // The reorder arrays are aliased to memory-mapped data.
234         reorderTable = other.reorderTable;
235         reorderRanges = other.reorderRanges;
236         reorderRangesLength = other.reorderRangesLength;
237         reorderCodes = other.reorderCodes;
238         reorderCodesLength = other.reorderCodesLength;
239     } else {
240         setReorderArrays(other.reorderCodes, other.reorderCodesLength,
241                          other.reorderRanges, other.reorderRangesLength,
242                          other.reorderTable, errorCode);
243     }
244 }
245
246 UBool
247 CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) {
248     U_ASSERT(table[0] == 0);
249     for(int32_t i = 1; i < 256; ++i) {
250         if(table[i] == 0) {
251             return TRUE;
252         }
253     }
254     return FALSE;
255 }
256
257 uint32_t
258 CollationSettings::reorderEx(uint32_t p) const {
259     if(p >= minHighNoReorder) { return p; }
260     // Round up p so that its lower 16 bits are >= any offset bits.
261     // Then compare q directly with (limit, offset) pairs.
262     uint32_t q = p | 0xffff;
263     uint32_t r;
264     const uint32_t *ranges = reorderRanges;
265     while(q >= (r = *ranges)) { ++ranges; }
266     return p + (r << 24);
267 }
268
269 void
270 CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
271     if(U_FAILURE(errorCode)) { return; }
272     int32_t noStrength = options & ~STRENGTH_MASK;
273     switch(value) {
274     case UCOL_PRIMARY:
275     case UCOL_SECONDARY:
276     case UCOL_TERTIARY:
277     case UCOL_QUATERNARY:
278     case UCOL_IDENTICAL:
279         options = noStrength | (value << STRENGTH_SHIFT);
280         break;
281     case UCOL_DEFAULT:
282         options = noStrength | (defaultOptions & STRENGTH_MASK);
283         break;
284     default:
285         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
286         break;
287     }
288 }
289
290 void
291 CollationSettings::setFlag(int32_t bit, UColAttributeValue value,
292                            int32_t defaultOptions, UErrorCode &errorCode) {
293     if(U_FAILURE(errorCode)) { return; }
294     switch(value) {
295     case UCOL_ON:
296         options |= bit;
297         break;
298     case UCOL_OFF:
299         options &= ~bit;
300         break;
301     case UCOL_DEFAULT:
302         options = (options & ~bit) | (defaultOptions & bit);
303         break;
304     default:
305         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
306         break;
307     }
308 }
309
310 void
311 CollationSettings::setCaseFirst(UColAttributeValue value,
312                                 int32_t defaultOptions, UErrorCode &errorCode) {
313     if(U_FAILURE(errorCode)) { return; }
314     int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
315     switch(value) {
316     case UCOL_OFF:
317         options = noCaseFirst;
318         break;
319     case UCOL_LOWER_FIRST:
320         options = noCaseFirst | CASE_FIRST;
321         break;
322     case UCOL_UPPER_FIRST:
323         options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK;
324         break;
325     case UCOL_DEFAULT:
326         options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK);
327         break;
328     default:
329         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
330         break;
331     }
332 }
333
334 void
335 CollationSettings::setAlternateHandling(UColAttributeValue value,
336                                         int32_t defaultOptions, UErrorCode &errorCode) {
337     if(U_FAILURE(errorCode)) { return; }
338     int32_t noAlternate = options & ~ALTERNATE_MASK;
339     switch(value) {
340     case UCOL_NON_IGNORABLE:
341         options = noAlternate;
342         break;
343     case UCOL_SHIFTED:
344         options = noAlternate | SHIFTED;
345         break;
346     case UCOL_DEFAULT:
347         options = noAlternate | (defaultOptions & ALTERNATE_MASK);
348         break;
349     default:
350         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
351         break;
352     }
353 }
354
355 void
356 CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
357     if(U_FAILURE(errorCode)) { return; }
358     int32_t noMax = options & ~MAX_VARIABLE_MASK;
359     switch(value) {
360     case MAX_VAR_SPACE:
361     case MAX_VAR_PUNCT:
362     case MAX_VAR_SYMBOL:
363     case MAX_VAR_CURRENCY:
364         options = noMax | (value << MAX_VARIABLE_SHIFT);
365         break;
366     case UCOL_DEFAULT:
367         options = noMax | (defaultOptions & MAX_VARIABLE_MASK);
368         break;
369     default:
370         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
371         break;
372     }
373 }
374
375 U_NAMESPACE_END
376
377 #endif  // !UCONFIG_NO_COLLATION