1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 1996-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * Modification history
15 * 1996-1999 various members of ICU team maintained C API for collation framework
16 * 02/16/2001 synwee Added internal method getPrevSpecialCE
17 * 03/01/2001 synwee Added maxexpansion functionality.
18 * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
19 * 2012-2014 markus Rewritten in C++ again.
22 #include "unicode/utypes.h"
24 #if !UCONFIG_NO_COLLATION
26 #include "unicode/coll.h"
27 #include "unicode/tblcoll.h"
28 #include "unicode/bytestream.h"
29 #include "unicode/coleitr.h"
30 #include "unicode/ucoleitr.h"
31 #include "unicode/ustring.h"
33 #include "collation.h"
41 U_CAPI UCollator* U_EXPORT2
42 ucol_openBinary(const uint8_t *bin, int32_t length,
43 const UCollator *base,
46 if(U_FAILURE(*status)) { return NULL; }
47 RuleBasedCollator *coll = new RuleBasedCollator(
49 RuleBasedCollator::rbcFromUCollator(base),
52 *status = U_MEMORY_ALLOCATION_ERROR;
55 if(U_FAILURE(*status)) {
59 return coll->toUCollator();
62 U_CAPI int32_t U_EXPORT2
63 ucol_cloneBinary(const UCollator *coll,
64 uint8_t *buffer, int32_t capacity,
67 if(U_FAILURE(*status)) {
70 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
71 if(rbc == NULL && coll != NULL) {
72 *status = U_UNSUPPORTED_ERROR;
75 return rbc->cloneBinary(buffer, capacity, *status);
78 U_CAPI UCollator* U_EXPORT2
79 ucol_safeClone(const UCollator *coll, void * /*stackBuffer*/, int32_t * pBufferSize, UErrorCode *status)
81 if (status == NULL || U_FAILURE(*status)){
85 *status = U_ILLEGAL_ARGUMENT_ERROR;
88 if (pBufferSize != NULL) {
89 int32_t inputSize = *pBufferSize;
92 return NULL; // preflighting for deprecated functionality
95 Collator *newColl = Collator::fromUCollator(coll)->clone();
96 if (newColl == NULL) {
97 *status = U_MEMORY_ALLOCATION_ERROR;
99 *status = U_SAFECLONE_ALLOCATED_WARNING;
101 return newColl->toUCollator();
104 U_CAPI void U_EXPORT2
105 ucol_close(UCollator *coll)
107 UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE);
108 UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll);
110 delete Collator::fromUCollator(coll);
115 U_CAPI int32_t U_EXPORT2
116 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
117 const uint8_t *src2, int32_t src2Length,
118 uint8_t *dest, int32_t destCapacity) {
119 /* check arguments */
120 if( src1==NULL || src1Length<-1 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) ||
121 src2==NULL || src2Length<-1 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) ||
122 destCapacity<0 || (destCapacity>0 && dest==NULL)
124 /* error, attempt to write a zero byte and return 0 */
125 if(dest!=NULL && destCapacity>0) {
131 /* check lengths and capacity */
133 src1Length=(int32_t)uprv_strlen((const char *)src1)+1;
136 src2Length=(int32_t)uprv_strlen((const char *)src2)+1;
139 int32_t destLength=src1Length+src2Length;
140 if(destLength>destCapacity) {
141 /* the merged sort key does not fit into the destination */
145 /* merge the sort keys with the same number of levels */
148 /* copy level from src1 not including 00 or 01 */
150 while((b=*src1)>=2) {
155 /* add a 02 merge separator */
158 /* copy level from src2 not including 00 or 01 */
159 while((b=*src2)>=2) {
164 /* if both sort keys have another level, then add a 01 level separator and continue */
165 if(*src1==1 && *src2==1) {
175 * here, at least one sort key is finished now, but the other one
176 * might have some contents left from containing more levels;
177 * that contents is just appended to the result
180 /* src1 is not finished, therefore *src2==0, and src1 is appended */
183 /* append src2, "the other, unfinished sort key" */
184 while((*p++=*src2++)!=0) {}
186 /* the actual length might be less than destLength if either sort key contained illegally embedded zero bytes */
187 return (int32_t)(p-dest);
190 U_CAPI int32_t U_EXPORT2
191 ucol_getSortKey(const UCollator *coll,
193 int32_t sourceLength,
195 int32_t resultLength)
197 UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY);
198 if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
199 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source,
200 ((sourceLength==-1 && source!=NULL) ? u_strlen(source) : sourceLength));
203 int32_t keySize = Collator::fromUCollator(coll)->
204 getSortKey(source, sourceLength, result, resultLength);
206 UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize);
207 UTRACE_EXIT_VALUE(keySize);
211 U_CAPI int32_t U_EXPORT2
212 ucol_nextSortKeyPart(const UCollator *coll,
215 uint8_t *dest, int32_t count,
219 if(status==NULL || U_FAILURE(*status)) {
222 UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART);
223 UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d",
224 coll, iter, state[0], state[1], dest, count);
226 int32_t i = Collator::fromUCollator(coll)->
227 internalNextSortKeyPart(iter, state, dest, count, *status);
229 // Return number of meaningful sortkey bytes.
230 UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d",
231 dest,i, state[0], state[1]);
232 UTRACE_EXIT_VALUE_STATUS(i, *status);
237 * Produce a bound for a given sortkey and a number of levels.
239 U_CAPI int32_t U_EXPORT2
240 ucol_getBound(const uint8_t *source,
241 int32_t sourceLength,
242 UColBoundMode boundType,
245 int32_t resultLength,
248 // consistency checks
249 if(status == NULL || U_FAILURE(*status)) {
253 *status = U_ILLEGAL_ARGUMENT_ERROR;
257 int32_t sourceIndex = 0;
258 // Scan the string until we skip enough of the key OR reach the end of the key
261 if(source[sourceIndex] == Collation::LEVEL_SEPARATOR_BYTE) {
264 } while (noOfLevels > 0
265 && (source[sourceIndex] != 0 || sourceIndex < sourceLength));
267 if((source[sourceIndex] == 0 || sourceIndex == sourceLength)
269 *status = U_SORT_KEY_TOO_SHORT_WARNING;
273 // READ ME: this code assumes that the values for boundType
274 // enum will not changes. They are set so that the enum value
275 // corresponds to the number of extra bytes each bound type
277 if(result != NULL && resultLength >= sourceIndex+boundType) {
278 uprv_memcpy(result, source, sourceIndex);
280 // Lower bound just gets terminated. No extra bytes
281 case UCOL_BOUND_LOWER: // = 0
283 // Upper bound needs one extra byte
284 case UCOL_BOUND_UPPER: // = 1
285 result[sourceIndex++] = 2;
287 // Upper long bound needs two extra bytes
288 case UCOL_BOUND_UPPER_LONG: // = 2
289 result[sourceIndex++] = 0xFF;
290 result[sourceIndex++] = 0xFF;
293 *status = U_ILLEGAL_ARGUMENT_ERROR;
296 result[sourceIndex++] = 0;
300 return sourceIndex+boundType+1;
304 U_CAPI void U_EXPORT2
305 ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode) {
306 if(U_FAILURE(*pErrorCode)) { return; }
307 Collator::fromUCollator(coll)->setMaxVariable(group, *pErrorCode);
310 U_CAPI UColReorderCode U_EXPORT2
311 ucol_getMaxVariable(const UCollator *coll) {
312 return Collator::fromUCollator(coll)->getMaxVariable();
315 U_CAPI uint32_t U_EXPORT2
316 ucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCode *status) {
317 if(U_FAILURE(*status) || coll == NULL) {
320 return Collator::fromUCollator(coll)->setVariableTop(varTop, len, *status);
323 U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status) {
324 if(U_FAILURE(*status) || coll == NULL) {
327 return Collator::fromUCollator(coll)->getVariableTop(*status);
330 U_CAPI void U_EXPORT2
331 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status) {
332 if(U_FAILURE(*status) || coll == NULL) {
335 Collator::fromUCollator(coll)->setVariableTop(varTop, *status);
338 U_CAPI void U_EXPORT2
339 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) {
340 if(U_FAILURE(*status) || coll == NULL) {
344 Collator::fromUCollator(coll)->setAttribute(attr, value, *status);
347 U_CAPI UColAttributeValue U_EXPORT2
348 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) {
349 if(U_FAILURE(*status) || coll == NULL) {
353 return Collator::fromUCollator(coll)->getAttribute(attr, *status);
356 U_CAPI void U_EXPORT2
357 ucol_setStrength( UCollator *coll,
358 UCollationStrength strength)
360 UErrorCode status = U_ZERO_ERROR;
361 ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status);
364 U_CAPI UCollationStrength U_EXPORT2
365 ucol_getStrength(const UCollator *coll)
367 UErrorCode status = U_ZERO_ERROR;
368 return ucol_getAttribute(coll, UCOL_STRENGTH, &status);
371 U_CAPI int32_t U_EXPORT2
372 ucol_getReorderCodes(const UCollator *coll,
374 int32_t destCapacity,
375 UErrorCode *status) {
376 if (U_FAILURE(*status)) {
380 return Collator::fromUCollator(coll)->getReorderCodes(dest, destCapacity, *status);
383 U_CAPI void U_EXPORT2
384 ucol_setReorderCodes(UCollator* coll,
385 const int32_t* reorderCodes,
386 int32_t reorderCodesLength,
387 UErrorCode *status) {
388 if (U_FAILURE(*status)) {
392 Collator::fromUCollator(coll)->setReorderCodes(reorderCodes, reorderCodesLength, *status);
395 U_CAPI int32_t U_EXPORT2
396 ucol_getEquivalentReorderCodes(int32_t reorderCode,
398 int32_t destCapacity,
399 UErrorCode *pErrorCode) {
400 return Collator::getEquivalentReorderCodes(reorderCode, dest, destCapacity, *pErrorCode);
403 U_CAPI void U_EXPORT2
404 ucol_getVersion(const UCollator* coll,
405 UVersionInfo versionInfo)
407 Collator::fromUCollator(coll)->getVersion(versionInfo);
410 U_CAPI UCollationResult U_EXPORT2
411 ucol_strcollIter( const UCollator *coll,
412 UCharIterator *sIter,
413 UCharIterator *tIter,
416 if(!status || U_FAILURE(*status)) {
420 UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER);
421 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter);
423 if(sIter == NULL || tIter == NULL || coll == NULL) {
424 *status = U_ILLEGAL_ARGUMENT_ERROR;
425 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status);
429 UCollationResult result = Collator::fromUCollator(coll)->compare(*sIter, *tIter, *status);
431 UTRACE_EXIT_VALUE_STATUS(result, *status);
437 /* ucol_strcoll Main public API string comparison function */
439 U_CAPI UCollationResult U_EXPORT2
440 ucol_strcoll( const UCollator *coll,
442 int32_t sourceLength,
444 int32_t targetLength)
446 UTRACE_ENTRY(UTRACE_UCOL_STRCOLL);
447 if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
448 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
449 UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength);
450 UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength);
453 UErrorCode status = U_ZERO_ERROR;
454 UCollationResult returnVal = Collator::fromUCollator(coll)->
455 compare(source, sourceLength, target, targetLength, status);
456 UTRACE_EXIT_VALUE_STATUS(returnVal, status);
460 U_CAPI UCollationResult U_EXPORT2
462 const UCollator *coll,
464 int32_t sourceLength,
466 int32_t targetLength,
469 UTRACE_ENTRY(UTRACE_UCOL_STRCOLLUTF8);
470 if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
471 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
472 UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vb ", source, sourceLength);
473 UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vb ", target, targetLength);
476 if (U_FAILURE(*status)) {
478 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status);
482 UCollationResult returnVal = Collator::fromUCollator(coll)->internalCompareUTF8(
483 source, sourceLength, target, targetLength, *status);
484 UTRACE_EXIT_VALUE_STATUS(returnVal, *status);
489 /* convenience function for comparing strings */
490 U_CAPI UBool U_EXPORT2
491 ucol_greater( const UCollator *coll,
493 int32_t sourceLength,
495 int32_t targetLength)
497 return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
501 /* convenience function for comparing strings */
502 U_CAPI UBool U_EXPORT2
503 ucol_greaterOrEqual( const UCollator *coll,
505 int32_t sourceLength,
507 int32_t targetLength)
509 return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
513 /* convenience function for comparing strings */
514 U_CAPI UBool U_EXPORT2
515 ucol_equal( const UCollator *coll,
517 int32_t sourceLength,
519 int32_t targetLength)
521 return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
525 U_CAPI void U_EXPORT2
526 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) {
527 const Collator *c = Collator::fromUCollator(coll);
531 // Note: This is tied to how the current implementation encodes the UCA version
532 // in the overall getVersion().
533 // Alternatively, we could load the root collator and get at lower-level data from there.
534 // Either way, it will reflect the input collator's UCA version only
535 // if it is a known implementation.
536 // It would be cleaner to make this a virtual Collator method.
544 U_CAPI const UChar * U_EXPORT2
545 ucol_getRules(const UCollator *coll, int32_t *length) {
546 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
547 // OK to crash if coll==NULL: We do not want to check "this" pointers.
548 if(rbc != NULL || coll == NULL) {
549 const UnicodeString &rules = rbc->getRules();
550 U_ASSERT(rules.getBuffer()[rules.length()] == 0);
551 *length = rules.length();
552 return rules.getBuffer();
554 static const UChar _NUL = 0;
559 U_CAPI int32_t U_EXPORT2
560 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) {
562 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
563 if(rbc != NULL || coll == NULL) {
564 rbc->getRules(delta, rules);
566 if(buffer != NULL && bufferLen > 0) {
567 UErrorCode errorCode = U_ZERO_ERROR;
568 return rules.extract(buffer, bufferLen, errorCode);
570 return rules.length();
574 U_CAPI const char * U_EXPORT2
575 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
576 return ucol_getLocaleByType(coll, type, status);
579 U_CAPI const char * U_EXPORT2
580 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
581 if(U_FAILURE(*status)) {
584 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE);
585 UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll);
588 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
589 if(rbc == NULL && coll != NULL) {
590 *status = U_UNSUPPORTED_ERROR;
593 result = rbc->internalGetLocaleID(type, *status);
596 UTRACE_DATA1(UTRACE_INFO, "result = %s", result);
597 UTRACE_EXIT_STATUS(*status);
601 U_CAPI USet * U_EXPORT2
602 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) {
603 if(U_FAILURE(*status)) {
606 UnicodeSet *set = Collator::fromUCollator(coll)->getTailoredSet(*status);
607 if(U_FAILURE(*status)) {
611 return set->toUSet();
614 U_CAPI UBool U_EXPORT2
615 ucol_equals(const UCollator *source, const UCollator *target) {
616 return source == target ||
617 (*Collator::fromUCollator(source)) == (*Collator::fromUCollator(target));
620 #endif /* #if !UCONFIG_NO_COLLATION */