/*
*******************************************************************************
-* Copyright (C) 1996-2010, International Business Machines
+* Copyright (C) 1996-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: ucol_res.cpp
#include "putilimp.h"
#include "utracimp.h"
#include "cmemory.h"
+#include "uassert.h"
#include "uenumimp.h"
#include "ulist.h"
// static UCA. There is only one. Collators don't use it.
// It is referenced only in ucol_initUCA and ucol_cleanup
static UCollator* _staticUCA = NULL;
+static icu::UInitOnce gStaticUCAInitOnce = U_INITONCE_INITIALIZER;
// static pointer to udata memory. Inited in ucol_initUCA
// used for cleanup in ucol_cleanup
static UDataMemory* UCA_DATA_MEM = NULL;
ucol_close(_staticUCA);
_staticUCA = NULL;
}
+ gStaticUCAInitOnce.reset();
return TRUE;
}
pInfo->dataFormat[1]==UCA_DATA_FORMAT_1 &&
pInfo->dataFormat[2]==UCA_DATA_FORMAT_2 &&
pInfo->dataFormat[3]==UCA_DATA_FORMAT_3 &&
- pInfo->formatVersion[0]==UCA_FORMAT_VERSION_0 &&
- pInfo->formatVersion[1]>=UCA_FORMAT_VERSION_1// &&
+ pInfo->formatVersion[0]==UCA_FORMAT_VERSION_0
+#if UCA_FORMAT_VERSION_1!=0
+ && pInfo->formatVersion[1]>=UCA_FORMAT_VERSION_1
+#endif
//pInfo->formatVersion[1]==UCA_FORMAT_VERSION_1 &&
//pInfo->formatVersion[2]==UCA_FORMAT_VERSION_2 && // Too harsh
//pInfo->formatVersion[3]==UCA_FORMAT_VERSION_3 && // Too harsh
) {
- UVersionInfo UCDVersion;
- u_getUnicodeVersion(UCDVersion);
- return (UBool)(pInfo->dataVersion[0]==UCDVersion[0]
- && pInfo->dataVersion[1]==UCDVersion[1]);
- //&& pInfo->dataVersion[2]==ucaDataInfo.dataVersion[2]
- //&& pInfo->dataVersion[3]==ucaDataInfo.dataVersion[3]);
+ return TRUE;
+ // Note: In ICU 51 and earlier,
+ // we used to check that the UCA data version (pInfo->dataVersion)
+ // matches the UCD version (u_getUnicodeVersion())
+ // but that complicated version updates, and
+ // a mismatch is "only" a problem for handling canonical equivalence.
+ // It need not be a fatal error.
} else {
return FALSE;
}
}
U_CDECL_END
-/* do not close UCA returned by ucol_initUCA! */
-UCollator *
-ucol_initUCA(UErrorCode *status) {
- if(U_FAILURE(*status)) {
- return NULL;
+static void U_CALLCONV ucol_initStaticUCA(UErrorCode &status) {
+ U_ASSERT(_staticUCA == NULL);
+ U_ASSERT(UCA_DATA_MEM == NULL);
+ ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup);
+
+ UDataMemory *result = udata_openChoice(U_ICUDATA_COLL, UCA_DATA_TYPE, UCA_DATA_NAME, isAcceptableUCA, NULL, &status);
+ if(U_FAILURE(status)){
+ udata_close(result);
+ return;
}
- UBool needsInit;
- UMTX_CHECK(NULL, (_staticUCA == NULL), needsInit);
- if(needsInit) {
- UDataMemory *result = udata_openChoice(U_ICUDATA_COLL, UCA_DATA_TYPE, UCA_DATA_NAME, isAcceptableUCA, NULL, status);
+ _staticUCA = ucol_initCollator((const UCATableHeader *)udata_getMemory(result), NULL, NULL, &status);
+ if(U_SUCCESS(status)){
+ // Initalize variables for implicit generation
+ uprv_uca_initImplicitConstants(&status);
+ UCA_DATA_MEM = result;
- if(U_SUCCESS(*status)){
- UCollator *newUCA = ucol_initCollator((const UCATableHeader *)udata_getMemory(result), NULL, NULL, status);
- if(U_SUCCESS(*status)){
- // Initalize variables for implicit generation
- uprv_uca_initImplicitConstants(status);
+ }else{
+ ucol_close(_staticUCA);
+ _staticUCA = NULL;
+ udata_close(result);
+ }
+}
- umtx_lock(NULL);
- if(_staticUCA == NULL) {
- UCA_DATA_MEM = result;
- _staticUCA = newUCA;
- newUCA = NULL;
- result = NULL;
- }
- umtx_unlock(NULL);
- ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup);
- if(newUCA != NULL) {
- ucol_close(newUCA);
- udata_close(result);
- }
- }else{
- ucol_close(newUCA);
- udata_close(result);
- }
- }
- else {
- udata_close(result);
- }
- }
+/* do not close UCA returned by ucol_initUCA! */
+UCollator *
+ucol_initUCA(UErrorCode *status) {
+ umtx_initOnce(gStaticUCAInitOnce, &ucol_initStaticUCA, *status);
return _staticUCA;
}
{
_staticUCA = NULL;
UCA_DATA_MEM = NULL;
+ gStaticUCAInitOnce.reset();
}
/****************************************************************************/
if (U_SUCCESS(intStatus)) {
int32_t reorderCodesLen = 0;
const int32_t* reorderCodes = ures_getIntVector(reorderRes, &reorderCodesLen, status);
- ucol_setReorderCodes(result, reorderCodes, reorderCodesLen, status);
+ if (reorderCodesLen > 0) {
+ ucol_setReorderCodes(result, reorderCodes, reorderCodesLen, status);
+ // copy the reorder codes into the default reorder codes
+ result->defaultReorderCodesLength = result->reorderCodesLength;
+ result->defaultReorderCodes = (int32_t*) uprv_malloc(result->defaultReorderCodesLength * sizeof(int32_t));
+ uprv_memcpy(result->defaultReorderCodes, result->reorderCodes, result->defaultReorderCodesLength * sizeof(int32_t));
+ result->freeDefaultReorderCodesOnClose = TRUE;
+ }
if (U_FAILURE(*status)) {
goto clean;
}
goto cleanup;
}
- if(src.resultLen > 0 || src.removeSet != NULL) { /* we have a set of rules, let's make something of it */
+ /* if we have a set of rules, let's make something of it */
+ if(src.resultLen > 0 || src.removeSet != NULL) {
/* also, if we wanted to remove some contractions, we should make a tailoring */
table = ucol_assembleTailoringTable(&src, status);
if(U_SUCCESS(*status)) {
}
result->hasRealData = TRUE;
result->freeImageOnClose = TRUE;
+ } else {
+ goto cleanup;
}
} else { /* no rules, but no error either */
// must be only options
}
uprv_memcpy(opts, src.opts, sizeof(UColOptionSet));
ucol_setOptionsFromHeader(result, opts, status);
- ucol_setReorderCodesFromParser(result, &src, status);
result->freeOptionsOnClose = TRUE;
result->hasRealData = FALSE;
result->freeImageOnClose = FALSE;
}
+ ucol_setReorderCodesFromParser(result, &src, status);
+
if(U_SUCCESS(*status)) {
UChar *newRules;
result->dataVersion[0] = UCOL_BUILDER_VERSION;
if (U_FAILURE(*status)) {
return NULL;
}
- StringEnumeration *s = Collator::getAvailableLocales();
+ StringEnumeration *s = icu::Collator::getAvailableLocales();
if (s == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE);
UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll);
+ if(coll->delegate!=NULL) {
+ return ((const Collator*)coll->delegate)->getLocale(type, *status).getName();
+ }
switch(type) {
case ULOC_ACTUAL_LOCALE:
result = coll->actualLocale;
* Collation Reordering
*/
-static void ucol_setReorderCodesFromParser(UCollator *coll, UColTokenParser *parser, UErrorCode *status) {
+void ucol_setReorderCodesFromParser(UCollator *coll, UColTokenParser *parser, UErrorCode *status) {
if (U_FAILURE(*status)) {
return;
}
+ if (parser->reorderCodesLength == 0 || parser->reorderCodes == NULL) {
+ return;
+ }
+
coll->reorderCodesLength = 0;
- if (coll->reorderCodes != NULL) {
+ if (coll->reorderCodes != NULL && coll->freeReorderCodesOnClose == TRUE) {
uprv_free(coll->reorderCodes);
}
-
- if (parser->reorderCodesLength == 0 || parser->reorderCodes == NULL) {
+ coll->reorderCodes = NULL;
+ coll->freeReorderCodesOnClose = FALSE;
+
+ if (coll->defaultReorderCodes != NULL && coll->freeDefaultReorderCodesOnClose == TRUE) {
+ uprv_free(coll->defaultReorderCodes);
+ }
+ coll->freeDefaultReorderCodesOnClose = FALSE;
+ coll->defaultReorderCodesLength = parser->reorderCodesLength;
+ coll->defaultReorderCodes = (int32_t*) uprv_malloc(coll->defaultReorderCodesLength * sizeof(int32_t));
+ if (coll->defaultReorderCodes == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
return;
}
+ uprv_memcpy(coll->defaultReorderCodes, parser->reorderCodes, coll->defaultReorderCodesLength * sizeof(int32_t));
+ coll->freeDefaultReorderCodesOnClose = TRUE;
coll->reorderCodesLength = parser->reorderCodesLength;
coll->reorderCodes = (int32_t*) uprv_malloc(coll->reorderCodesLength * sizeof(int32_t));
+ if (coll->reorderCodes == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
uprv_memcpy(coll->reorderCodes, parser->reorderCodes, coll->reorderCodesLength * sizeof(int32_t));
+ coll->freeReorderCodesOnClose = TRUE;
}
-static int ucol_getLeadBytesForReorderCode(UCollator *coll, int reorderCode, uint16_t* returnLeadBytes, int returnCapacity) {
- uint16_t reorderCodeIndexLength = *((uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->scriptToLeadByte));
- uint16_t* reorderCodeIndex = (uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->scriptToLeadByte + 2 *sizeof(uint16_t));
+/*
+ * Data is stored in the reorder code to lead byte table as:
+ * index count - unsigned short (2 bytes) - number of index entries
+ * data size - unsigned short (2 bytes) - number of unsigned short data elements
+ * index[index count] - array of 2 unsigned shorts (4 bytes each entry)
+ * - reorder code, offset
+ * - index is sorted by reorder code
+ * - if an offset has the high bit set then it is not an offset but a single data entry
+ * once the high bit is stripped off
+ * data[data size] - array of unsigned short (2 bytes each entry)
+ * - the data is an usigned short count followed by count number
+ * of lead bytes stored in an unsigned short
+ */
+U_CFUNC int U_EXPORT2
+ucol_getLeadBytesForReorderCode(const UCollator *uca, int reorderCode, uint16_t* returnLeadBytes, int returnCapacity) {
+ uint16_t reorderCodeIndexLength = *((uint16_t*) ((uint8_t *)uca->image + uca->image->scriptToLeadByte));
+ uint16_t* reorderCodeIndex = (uint16_t*) ((uint8_t *)uca->image + uca->image->scriptToLeadByte + 2 *sizeof(uint16_t));
- // TODO - replace with a binary search
// reorder code index is 2 uint16_t's - reorder code + offset
for (int i = 0; i < reorderCodeIndexLength; i++) {
if (reorderCode == reorderCodeIndex[i*2]) {
return 0;
}
-static int ucol_getReorderCodesForLeadByte(UCollator *coll, int leadByte, int16_t* returnReorderCodes, int returnCapacity) {
- int leadByteIndexLength = *((uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->leadByteToScript));
- uint16_t* leadByteIndex = (uint16_t*) ((uint8_t *)coll->UCA->image + coll->UCA->image->leadByteToScript + 2 *sizeof(uint16_t));
+/*
+ * Data is stored in the lead byte to reorder code table as:
+ * index count - unsigned short (2 bytes) - number of index entries
+ * data size - unsigned short (2 bytes) - number of unsigned short data elements
+ * index[index count] - array of unsigned short (2 bytes each entry)
+ * - index is sorted by lead byte
+ * - if an index has the high bit set then it is not an index but a single data entry
+ * once the high bit is stripped off
+ * data[data size] - array of unsigned short (2 bytes each entry)
+ * - the data is an usigned short count followed by count number of reorder codes
+ */
+U_CFUNC int U_EXPORT2
+ucol_getReorderCodesForLeadByte(const UCollator *uca, int leadByte, int16_t* returnReorderCodes, int returnCapacity) {
+ uint16_t* leadByteTable = ((uint16_t*) ((uint8_t *)uca->image + uca->image->leadByteToScript));
+ uint16_t leadByteIndexLength = *leadByteTable;
if (leadByte >= leadByteIndexLength) {
return 0;
}
-
- if ((leadByteIndex[leadByte] & 0x8000) == 0x8000) {
+ uint16_t leadByteIndex = *(leadByteTable + (2 + leadByte));
+
+ if ((leadByteIndex & 0x8000) == 0x8000) {
// offset isn't offset but instead is a single data element
if (returnCapacity >= 1) {
- returnReorderCodes[0] = leadByteIndex[leadByte] & ~0x8000;
+ returnReorderCodes[0] = leadByteIndex & ~0x8000;
return 1;
}
return 0;
}
- uint16_t* dataOffsetBase = (uint16_t*) ((uint8_t *)leadByteIndex + leadByteIndexLength * (2 * sizeof(uint16_t)));
- uint16_t reorderCodeCount = *(dataOffsetBase + leadByteIndex[leadByte]);
- reorderCodeCount = reorderCodeCount > returnCapacity ? returnCapacity : reorderCodeCount;
- uprv_memcpy(returnReorderCodes, dataOffsetBase + leadByteIndex[leadByte] + 1, reorderCodeCount * sizeof(uint16_t));
+ //uint16_t* dataOffsetBase = leadByteTable + (2 + leadByteIndexLength);
+ uint16_t* reorderCodeData = leadByteTable + (2 + leadByteIndexLength) + leadByteIndex;
+ uint16_t reorderCodeCount = *reorderCodeData > returnCapacity ? returnCapacity : *reorderCodeData;
+ uprv_memcpy(returnReorderCodes, reorderCodeData + 1, reorderCodeCount * sizeof(uint16_t));
return reorderCodeCount;
}
// used to mark ignorable reorder code slots
static const int32_t UCOL_REORDER_CODE_IGNORE = UCOL_REORDER_CODE_LIMIT + 1;
-void ucol_buildPermutationTable(UCollator *coll, UErrorCode *status) {
+U_CFUNC void U_EXPORT2
+ucol_buildPermutationTable(UCollator *coll, UErrorCode *status) {
uint16_t leadBytesSize = 256;
uint16_t leadBytes[256];
- int32_t internalReorderCodesLength = coll->reorderCodesLength + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST);
- int32_t* internalReorderCodes;
-
+
// The lowest byte that hasn't been assigned a mapping
int toBottom = 0x03;
// The highest byte that hasn't been assigned a mapping - don't include the special or trailing
// are we filling from the bottom?
bool fromTheBottom = true;
+ int32_t reorderCodesIndex = -1;
// lead bytes that have alread been assigned to the permutation table
bool newLeadByteUsed[256];
bool permutationSlotFilled[256];
// nothing to do
- if(U_FAILURE(*status) || coll == NULL || coll->reorderCodesLength == 0) {
- if (coll != NULL) {
- if (coll->leadBytePermutationTable != NULL) {
+ if(U_FAILURE(*status) || coll == NULL) {
+ return;
+ }
+
+ // clear the reordering
+ if (coll->reorderCodes == NULL || coll->reorderCodesLength == 0
+ || (coll->reorderCodesLength == 1 && coll->reorderCodes[0] == UCOL_REORDER_CODE_NONE)) {
+ if (coll->leadBytePermutationTable != NULL) {
+ if (coll->freeLeadBytePermutationTableOnClose) {
uprv_free(coll->leadBytePermutationTable);
- coll->leadBytePermutationTable = NULL;
}
+ coll->leadBytePermutationTable = NULL;
+ coll->freeLeadBytePermutationTableOnClose = FALSE;
coll->reorderCodesLength = 0;
}
return;
}
+ // set reordering to the default reordering
+ if (coll->reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
+ if (coll->reorderCodesLength != 1) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if (coll->freeReorderCodesOnClose == TRUE) {
+ uprv_free(coll->reorderCodes);
+ }
+ coll->reorderCodes = NULL;
+ coll->freeReorderCodesOnClose = FALSE;
+
+ if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) {
+ uprv_free(coll->leadBytePermutationTable);
+ }
+ coll->leadBytePermutationTable = NULL;
+ coll->freeLeadBytePermutationTableOnClose = FALSE;
+
+ if (coll->defaultReorderCodesLength == 0) {
+ return;
+ }
+
+ coll->reorderCodes = (int32_t*)uprv_malloc(coll->defaultReorderCodesLength * sizeof(int32_t));
+ if (coll->reorderCodes == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ coll->freeReorderCodesOnClose = TRUE;
+ coll->reorderCodesLength = coll->defaultReorderCodesLength;
+ uprv_memcpy(coll->reorderCodes, coll->defaultReorderCodes, coll->reorderCodesLength * sizeof(int32_t));
+ }
+
if (coll->leadBytePermutationTable == NULL) {
coll->leadBytePermutationTable = (uint8_t*)uprv_malloc(256*sizeof(uint8_t));
if (coll->leadBytePermutationTable == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return;
}
+ coll->freeLeadBytePermutationTableOnClose = TRUE;
}
- // prefill the reordering codes with the leading entries
- internalReorderCodes = (int32_t*)uprv_malloc(internalReorderCodesLength * sizeof(int32_t));
- if (internalReorderCodes == NULL) {
+ int32_t internalReorderCodesLength = coll->reorderCodesLength + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST);
+ LocalMemory<int32_t> internalReorderCodes((int32_t*)uprv_malloc(internalReorderCodesLength * sizeof(int32_t)));
+ if (internalReorderCodes.isNull()) {
*status = U_MEMORY_ALLOCATION_ERROR;
- if (coll->leadBytePermutationTable != NULL) {
+ if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) {
uprv_free(coll->leadBytePermutationTable);
- coll->leadBytePermutationTable = NULL;
}
+ coll->leadBytePermutationTable = NULL;
+ coll->freeLeadBytePermutationTableOnClose = FALSE;
return;
}
-
+
+ // prefill the reordering codes with the leading entries
for (uint32_t codeIndex = 0; codeIndex < (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST); codeIndex++) {
internalReorderCodes[codeIndex] = UCOL_REORDER_CODE_FIRST + codeIndex;
}
* possible location. At each step, we also need to make sure that any scripts
* that need to not be moved are copied to their same location in the final table.
*/
- for (int reorderCodesIndex = 0; reorderCodesIndex < internalReorderCodesLength; reorderCodesIndex++) {
+ for (int reorderCodesCount = 0; reorderCodesCount < internalReorderCodesLength; reorderCodesCount++) {
+ reorderCodesIndex += fromTheBottom ? 1 : -1;
int32_t next = internalReorderCodes[reorderCodesIndex];
if (next == UCOL_REORDER_CODE_IGNORE) {
continue;
if (fromTheBottom == false) {
// double turnaround
*status = U_ILLEGAL_ARGUMENT_ERROR;
- if (coll->leadBytePermutationTable != NULL) {
+ if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) {
uprv_free(coll->leadBytePermutationTable);
- coll->leadBytePermutationTable = NULL;
}
+ coll->leadBytePermutationTable = NULL;
+ coll->freeLeadBytePermutationTableOnClose = FALSE;
coll->reorderCodesLength = 0;
- if (internalReorderCodes != NULL) {
- uprv_free(internalReorderCodes);
- }
return;
}
fromTheBottom = false;
+ reorderCodesIndex = internalReorderCodesLength;
continue;
}
- uint16_t leadByteCount = ucol_getLeadBytesForReorderCode(coll, next, leadBytes, leadBytesSize);
+ uint16_t leadByteCount = ucol_getLeadBytesForReorderCode(coll->UCA, next, leadBytes, leadBytesSize);
if (fromTheBottom) {
for (int leadByteIndex = 0; leadByteIndex < leadByteCount; leadByteIndex++) {
// don't place a lead byte twice in the permutation table
if (permutationSlotFilled[leadBytes[leadByteIndex]]) {
// lead byte already used
*status = U_ILLEGAL_ARGUMENT_ERROR;
- if (coll->leadBytePermutationTable != NULL) {
+ if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) {
uprv_free(coll->leadBytePermutationTable);
- coll->leadBytePermutationTable = NULL;
}
+ coll->leadBytePermutationTable = NULL;
+ coll->freeLeadBytePermutationTableOnClose = FALSE;
coll->reorderCodesLength = 0;
- if (internalReorderCodes != NULL) {
- uprv_free(internalReorderCodes);
- }
return;
}
if (permutationSlotFilled[leadBytes[leadByteIndex]]) {
// lead byte already used
*status = U_ILLEGAL_ARGUMENT_ERROR;
- if (coll->leadBytePermutationTable != NULL) {
+ if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) {
uprv_free(coll->leadBytePermutationTable);
- coll->leadBytePermutationTable = NULL;
}
+ coll->leadBytePermutationTable = NULL;
+ coll->freeLeadBytePermutationTableOnClose = FALSE;
coll->reorderCodesLength = 0;
- if (internalReorderCodes != NULL) {
- uprv_free(internalReorderCodes);
- }
return;
}
}
#endif
- if (internalReorderCodes != NULL) {
- uprv_free(internalReorderCodes);
- }
-
// force a regen of the latin one table since it is affected by the script reordering
coll->latinOneRegenTable = TRUE;
ucol_updateInternalState(coll, status);