+// Copyright (C) 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
-* Copyright (C) 1999-2011, International Business Machines
+* Copyright (C) 1999-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ustr_imp.h
#include "unicode/uiter.h"
#include "ucase.h"
-/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. */
+/** Simple declaration to avoid including unicode/ubrk.h. */
#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
# define UBRK_TYPEDEF_UBREAK_ITERATOR
typedef struct UBreakIterator UBreakIterator;
UErrorCode *pErrorCode);
/**
+ * Interanl API, used for detecting length of
+ * shared prefix case-insensitively.
+ * @param s1 input string 1
+ * @param length1 length of string 1, or -1 (NULL terminated)
+ * @param s2 input string 2
+ * @param length2 length of string 2, or -1 (NULL terminated)
+ * @param options compare options
+ * @param matchLen1 (output) length of partial prefix match in s1
+ * @param matchLen2 (output) length of partial prefix match in s2
+ * @param pErrorCode receives error status
+ */
+U_CAPI void
+u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ int32_t *matchLen1, int32_t *matchLen2,
+ UErrorCode *pErrorCode);
+
+/**
* Are the Unicode properties loaded?
* This must be used before internal functions are called that do
* not perform this check.
typedef struct UCaseMap UCaseMap;
#endif
-enum {
- TO_LOWER,
- TO_UPPER,
- TO_TITLE,
- FOLD_CASE
-};
+#if UCONFIG_NO_BREAK_ITERATION
+# define UCASEMAP_INITIALIZER { NULL, { 0 }, 0, 0 }
+#else
+# define UCASEMAP_INITIALIZER { NULL, NULL, { 0 }, 0, 0 }
+#endif
-U_CFUNC int32_t
-ustr_toLower(const UCaseProps *csp,
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- const char *locale,
- UErrorCode *pErrorCode);
+U_CFUNC void
+ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale);
-U_CFUNC int32_t
-ustr_toUpper(const UCaseProps *csp,
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- const char *locale,
- UErrorCode *pErrorCode);
+#ifndef U_STRING_CASE_MAPPER_DEFINED
+#define U_STRING_CASE_MAPPER_DEFINED
+
+/**
+ * String case mapping function type, used by ustrcase_map().
+ * All error checking must be done.
+ * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
+ * src and dest must not overlap.
+ */
+typedef int32_t U_CALLCONV
+UStringCaseMapper(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToLower(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToUpper(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
#if !UCONFIG_NO_BREAK_ITERATION
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToTitle(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalFold(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Implements argument checking and buffer handling
+ * for string case mapping as a common function.
+ */
U_CFUNC int32_t
-ustr_toTitle(const UCaseProps *csp,
+ustrcase_map(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
- UBreakIterator *titleIter,
- const char *locale, uint32_t options,
+ UStringCaseMapper *stringCaseMapper,
UErrorCode *pErrorCode);
-#endif
+/**
+ * UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
+ * UTF-8 version of UStringCaseMapper.
+ * All error checking must be done.
+ * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
+ * src and dest must not overlap.
+ */
+typedef int32_t U_CALLCONV
+UTF8CaseMapper(const UCaseMap *csm,
+ uint8_t *dest, int32_t destCapacity,
+ const uint8_t *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/** Implements UTF8CaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
+ uint8_t *dest, int32_t destCapacity,
+ const uint8_t *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
/**
- * Internal case folding function.
+ * Implements argument checking and buffer handling
+ * for UTF-8 string case mapping as a common function.
*/
U_CFUNC int32_t
-ustr_foldCase(const UCaseProps *csp,
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- uint32_t options,
- UErrorCode *pErrorCode);
+ucasemap_mapUTF8(const UCaseMap *csm,
+ uint8_t *dest, int32_t destCapacity,
+ const uint8_t *src, int32_t srcLength,
+ UTF8CaseMapper *stringCaseMapper,
+ UErrorCode *pErrorCode);
+
+#ifdef __cplusplus
+
+U_NAMESPACE_BEGIN
+namespace GreekUpper {
+
+// Data bits.
+static const uint32_t UPPER_MASK = 0x3ff;
+static const uint32_t HAS_VOWEL = 0x1000;
+static const uint32_t HAS_YPOGEGRAMMENI = 0x2000;
+static const uint32_t HAS_ACCENT = 0x4000;
+static const uint32_t HAS_DIALYTIKA = 0x8000;
+// Further bits during data building and processing, not stored in the data map.
+static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000;
+static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000;
+
+static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
+static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
+ HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
+static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
+
+// State bits.
+static const uint32_t AFTER_CASED = 1;
+static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2;
+
+uint32_t getLetterData(UChar32 c);
+
+/**
+ * Returns a non-zero value for each of the Greek combining diacritics
+ * listed in The Unicode Standard, version 8, chapter 7.2 Greek,
+ * plus some perispomeni look-alikes.
+ */
+uint32_t getDiacriticData(UChar32 c);
+
+} // namespace GreekUpper
+U_NAMESPACE_END
+
+#endif // __cplusplus
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashUCharsN(const UChar *str, int32_t length);
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashCharsN(const char *str, int32_t length);
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashICharsN(const char *str, int32_t length);
/**
* NUL-terminate a UChar * string if possible.