1 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
2 /* Unicode character case mappings.
3 Copyright (C) 2002, 2009-2014 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Lesser General Public License as published
7 by the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include <unistring/stdbool.h>
36 /* ========================================================================= */
38 /* Character case mappings.
39 These mappings are locale and context independent.
40 WARNING! These functions are not sufficient for languages such as German.
41 Better use the functions below that treat an entire string at once and are
44 /* Return the uppercase mapping of a Unicode character. */
46 uc_toupper (ucs4_t uc)
49 /* Return the lowercase mapping of a Unicode character. */
51 uc_tolower (ucs4_t uc)
54 /* Return the titlecase mapping of a Unicode character. */
56 uc_totitle (ucs4_t uc)
59 /* ========================================================================= */
61 /* String case mappings. */
63 /* These functions are locale dependent. The iso639_language argument
64 identifies the language (e.g. "tr" for Turkish). NULL means to use
65 locale independent case mappings. */
67 /* Return the ISO 639 language code of the current locale.
68 Return "" if it is unknown, or in the "C" locale. */
70 uc_locale_language (void)
75 All functions prefixed with u8_ operate on UTF-8 encoded strings.
76 Their unit is an uint8_t (1 byte).
78 All functions prefixed with u16_ operate on UTF-16 encoded strings.
79 Their unit is an uint16_t (a 2-byte word).
81 All functions prefixed with u32_ operate on UCS-4 encoded strings.
82 Their unit is an uint32_t (a 4-byte word).
84 All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
87 Functions returning a string result take a (resultbuf, lengthp) argument
88 pair. If resultbuf is not NULL and the result fits into *lengthp units,
89 it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly
90 allocated string is returned. In both cases, *lengthp is set to the
91 length (number of units) of the returned string. In case of error,
92 NULL is returned and errno is set. */
94 /* Return the uppercase mapping of a string.
95 The nf argument identifies the normalization form to apply after the
96 case-mapping. It can also be NULL, for no normalization. */
98 u8_toupper (const uint8_t *s, size_t n, const char *iso639_language,
100 uint8_t *resultbuf, size_t *lengthp);
102 u16_toupper (const uint16_t *s, size_t n, const char *iso639_language,
104 uint16_t *resultbuf, size_t *lengthp);
106 u32_toupper (const uint32_t *s, size_t n, const char *iso639_language,
108 uint32_t *resultbuf, size_t *lengthp);
110 /* Return the lowercase mapping of a string.
111 The nf argument identifies the normalization form to apply after the
112 case-mapping. It can also be NULL, for no normalization. */
114 u8_tolower (const uint8_t *s, size_t n, const char *iso639_language,
116 uint8_t *resultbuf, size_t *lengthp);
118 u16_tolower (const uint16_t *s, size_t n, const char *iso639_language,
120 uint16_t *resultbuf, size_t *lengthp);
122 u32_tolower (const uint32_t *s, size_t n, const char *iso639_language,
124 uint32_t *resultbuf, size_t *lengthp);
126 /* Return the titlecase mapping of a string.
127 The nf argument identifies the normalization form to apply after the
128 case-mapping. It can also be NULL, for no normalization. */
130 u8_totitle (const uint8_t *s, size_t n, const char *iso639_language,
132 uint8_t *resultbuf, size_t *lengthp);
134 u16_totitle (const uint16_t *s, size_t n, const char *iso639_language,
136 uint16_t *resultbuf, size_t *lengthp);
138 u32_totitle (const uint32_t *s, size_t n, const char *iso639_language,
140 uint32_t *resultbuf, size_t *lengthp);
142 /* The case-mapping context given by a prefix string. */
143 typedef struct casing_prefix_context
145 /* These fields are private, undocumented. */
146 uint32_t last_char_except_ignorable;
147 uint32_t last_char_normal_or_above;
149 casing_prefix_context_t;
150 /* The case-mapping context of the empty prefix string. */
151 extern LIBUNISTRING_DLL_VARIABLE const casing_prefix_context_t unicase_empty_prefix_context;
152 /* Return the case-mapping context of a given prefix string. */
153 extern casing_prefix_context_t
154 u8_casing_prefix_context (const uint8_t *s, size_t n);
155 extern casing_prefix_context_t
156 u16_casing_prefix_context (const uint16_t *s, size_t n);
157 extern casing_prefix_context_t
158 u32_casing_prefix_context (const uint32_t *s, size_t n);
159 /* Return the case-mapping context of the prefix concat(A, S), given the
160 case-mapping context of the prefix A. */
161 extern casing_prefix_context_t
162 u8_casing_prefixes_context (const uint8_t *s, size_t n,
163 casing_prefix_context_t a_context);
164 extern casing_prefix_context_t
165 u16_casing_prefixes_context (const uint16_t *s, size_t n,
166 casing_prefix_context_t a_context);
167 extern casing_prefix_context_t
168 u32_casing_prefixes_context (const uint32_t *s, size_t n,
169 casing_prefix_context_t a_context);
171 /* The case-mapping context given by a suffix string. */
172 typedef struct casing_suffix_context
174 /* These fields are private, undocumented. */
175 uint32_t first_char_except_ignorable;
178 casing_suffix_context_t;
179 /* The case-mapping context of the empty suffix string. */
180 extern LIBUNISTRING_DLL_VARIABLE const casing_suffix_context_t unicase_empty_suffix_context;
181 /* Return the case-mapping context of a given suffix string. */
182 extern casing_suffix_context_t
183 u8_casing_suffix_context (const uint8_t *s, size_t n);
184 extern casing_suffix_context_t
185 u16_casing_suffix_context (const uint16_t *s, size_t n);
186 extern casing_suffix_context_t
187 u32_casing_suffix_context (const uint32_t *s, size_t n);
188 /* Return the case-mapping context of the suffix concat(S, A), given the
189 case-mapping context of the suffix A. */
190 extern casing_suffix_context_t
191 u8_casing_suffixes_context (const uint8_t *s, size_t n,
192 casing_suffix_context_t a_context);
193 extern casing_suffix_context_t
194 u16_casing_suffixes_context (const uint16_t *s, size_t n,
195 casing_suffix_context_t a_context);
196 extern casing_suffix_context_t
197 u32_casing_suffixes_context (const uint32_t *s, size_t n,
198 casing_suffix_context_t a_context);
200 /* Return the uppercase mapping of a string that is surrounded by a prefix
203 u8_ct_toupper (const uint8_t *s, size_t n,
204 casing_prefix_context_t prefix_context,
205 casing_suffix_context_t suffix_context,
206 const char *iso639_language,
208 uint8_t *resultbuf, size_t *lengthp);
210 u16_ct_toupper (const uint16_t *s, size_t n,
211 casing_prefix_context_t prefix_context,
212 casing_suffix_context_t suffix_context,
213 const char *iso639_language,
215 uint16_t *resultbuf, size_t *lengthp);
217 u32_ct_toupper (const uint32_t *s, size_t n,
218 casing_prefix_context_t prefix_context,
219 casing_suffix_context_t suffix_context,
220 const char *iso639_language,
222 uint32_t *resultbuf, size_t *lengthp);
224 /* Return the lowercase mapping of a string that is surrounded by a prefix
227 u8_ct_tolower (const uint8_t *s, size_t n,
228 casing_prefix_context_t prefix_context,
229 casing_suffix_context_t suffix_context,
230 const char *iso639_language,
232 uint8_t *resultbuf, size_t *lengthp);
234 u16_ct_tolower (const uint16_t *s, size_t n,
235 casing_prefix_context_t prefix_context,
236 casing_suffix_context_t suffix_context,
237 const char *iso639_language,
239 uint16_t *resultbuf, size_t *lengthp);
241 u32_ct_tolower (const uint32_t *s, size_t n,
242 casing_prefix_context_t prefix_context,
243 casing_suffix_context_t suffix_context,
244 const char *iso639_language,
246 uint32_t *resultbuf, size_t *lengthp);
248 /* Return the titlecase mapping of a string that is surrounded by a prefix
251 u8_ct_totitle (const uint8_t *s, size_t n,
252 casing_prefix_context_t prefix_context,
253 casing_suffix_context_t suffix_context,
254 const char *iso639_language,
256 uint8_t *resultbuf, size_t *lengthp);
258 u16_ct_totitle (const uint16_t *s, size_t n,
259 casing_prefix_context_t prefix_context,
260 casing_suffix_context_t suffix_context,
261 const char *iso639_language,
263 uint16_t *resultbuf, size_t *lengthp);
265 u32_ct_totitle (const uint32_t *s, size_t n,
266 casing_prefix_context_t prefix_context,
267 casing_suffix_context_t suffix_context,
268 const char *iso639_language,
270 uint32_t *resultbuf, size_t *lengthp);
272 /* Return the case folded string.
273 Comparing uN_casefold (S1) and uN_casefold (S2) with uN_cmp2() is equivalent
274 to comparing S1 and S2 with uN_casecmp().
275 The nf argument identifies the normalization form to apply after the
276 case-mapping. It can also be NULL, for no normalization. */
278 u8_casefold (const uint8_t *s, size_t n, const char *iso639_language,
280 uint8_t *resultbuf, size_t *lengthp);
282 u16_casefold (const uint16_t *s, size_t n, const char *iso639_language,
284 uint16_t *resultbuf, size_t *lengthp);
286 u32_casefold (const uint32_t *s, size_t n, const char *iso639_language,
288 uint32_t *resultbuf, size_t *lengthp);
289 /* Likewise, for a string that is surrounded by a prefix and a suffix. */
291 u8_ct_casefold (const uint8_t *s, size_t n,
292 casing_prefix_context_t prefix_context,
293 casing_suffix_context_t suffix_context,
294 const char *iso639_language,
296 uint8_t *resultbuf, size_t *lengthp);
298 u16_ct_casefold (const uint16_t *s, size_t n,
299 casing_prefix_context_t prefix_context,
300 casing_suffix_context_t suffix_context,
301 const char *iso639_language,
303 uint16_t *resultbuf, size_t *lengthp);
305 u32_ct_casefold (const uint32_t *s, size_t n,
306 casing_prefix_context_t prefix_context,
307 casing_suffix_context_t suffix_context,
308 const char *iso639_language,
310 uint32_t *resultbuf, size_t *lengthp);
312 /* Compare S1 and S2, ignoring differences in case and normalization.
313 The nf argument identifies the normalization form to apply after the
314 case-mapping. It can also be NULL, for no normalization.
315 If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
316 return 0. Upon failure, return -1 with errno set. */
318 u8_casecmp (const uint8_t *s1, size_t n1,
319 const uint8_t *s2, size_t n2,
320 const char *iso639_language, uninorm_t nf, int *resultp);
322 u16_casecmp (const uint16_t *s1, size_t n1,
323 const uint16_t *s2, size_t n2,
324 const char *iso639_language, uninorm_t nf, int *resultp);
326 u32_casecmp (const uint32_t *s1, size_t n1,
327 const uint32_t *s2, size_t n2,
328 const char *iso639_language, uninorm_t nf, int *resultp);
330 ulc_casecmp (const char *s1, size_t n1,
331 const char *s2, size_t n2,
332 const char *iso639_language, uninorm_t nf, int *resultp);
334 /* Convert the string S of length N to a NUL-terminated byte sequence, in such
335 a way that comparing uN_casexfrm (S1) and uN_casexfrm (S2) with the gnulib
336 function memcmp2() is equivalent to comparing S1 and S2 with uN_casecoll().
337 NF must be either UNINORM_NFC, UNINORM_NFKC, or NULL for no normalization. */
339 u8_casexfrm (const uint8_t *s, size_t n, const char *iso639_language,
340 uninorm_t nf, char *resultbuf, size_t *lengthp);
342 u16_casexfrm (const uint16_t *s, size_t n, const char *iso639_language,
343 uninorm_t nf, char *resultbuf, size_t *lengthp);
345 u32_casexfrm (const uint32_t *s, size_t n, const char *iso639_language,
346 uninorm_t nf, char *resultbuf, size_t *lengthp);
348 ulc_casexfrm (const char *s, size_t n, const char *iso639_language,
349 uninorm_t nf, char *resultbuf, size_t *lengthp);
351 /* Compare S1 and S2, ignoring differences in case and normalization, using the
352 collation rules of the current locale.
353 The nf argument identifies the normalization form to apply after the
354 case-mapping. It must be either UNINORM_NFC or UNINORM_NFKC. It can also
355 be NULL, for no normalization.
356 If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
357 return 0. Upon failure, return -1 with errno set. */
359 u8_casecoll (const uint8_t *s1, size_t n1,
360 const uint8_t *s2, size_t n2,
361 const char *iso639_language, uninorm_t nf, int *resultp);
363 u16_casecoll (const uint16_t *s1, size_t n1,
364 const uint16_t *s2, size_t n2,
365 const char *iso639_language, uninorm_t nf, int *resultp);
367 u32_casecoll (const uint32_t *s1, size_t n1,
368 const uint32_t *s2, size_t n2,
369 const char *iso639_language, uninorm_t nf, int *resultp);
371 ulc_casecoll (const char *s1, size_t n1,
372 const char *s2, size_t n2,
373 const char *iso639_language, uninorm_t nf, int *resultp);
376 /* Set *RESULTP to true if mapping NFD(S) to upper case is a no-op, or to false
377 otherwise, and return 0. Upon failure, return -1 with errno set. */
379 u8_is_uppercase (const uint8_t *s, size_t n,
380 const char *iso639_language,
383 u16_is_uppercase (const uint16_t *s, size_t n,
384 const char *iso639_language,
387 u32_is_uppercase (const uint32_t *s, size_t n,
388 const char *iso639_language,
391 /* Set *RESULTP to true if mapping NFD(S) to lower case is a no-op, or to false
392 otherwise, and return 0. Upon failure, return -1 with errno set. */
394 u8_is_lowercase (const uint8_t *s, size_t n,
395 const char *iso639_language,
398 u16_is_lowercase (const uint16_t *s, size_t n,
399 const char *iso639_language,
402 u32_is_lowercase (const uint32_t *s, size_t n,
403 const char *iso639_language,
406 /* Set *RESULTP to true if mapping NFD(S) to title case is a no-op, or to false
407 otherwise, and return 0. Upon failure, return -1 with errno set. */
409 u8_is_titlecase (const uint8_t *s, size_t n,
410 const char *iso639_language,
413 u16_is_titlecase (const uint16_t *s, size_t n,
414 const char *iso639_language,
417 u32_is_titlecase (const uint32_t *s, size_t n,
418 const char *iso639_language,
421 /* Set *RESULTP to true if applying case folding to NFD(S) is a no-op, or to
422 false otherwise, and return 0. Upon failure, return -1 with errno set. */
424 u8_is_casefolded (const uint8_t *s, size_t n,
425 const char *iso639_language,
428 u16_is_casefolded (const uint16_t *s, size_t n,
429 const char *iso639_language,
432 u32_is_casefolded (const uint32_t *s, size_t n,
433 const char *iso639_language,
436 /* Set *RESULTP to true if case matters for S, that is, if mapping NFD(S) to
437 either upper case or lower case or title case is not a no-op.
438 Set *RESULTP to false if NFD(S) maps to itself under the upper case mapping,
439 under the lower case mapping, and under the title case mapping; in other
440 words, when NFD(S) consists entirely of caseless characters.
441 Upon failure, return -1 with errno set. */
443 u8_is_cased (const uint8_t *s, size_t n,
444 const char *iso639_language,
447 u16_is_cased (const uint16_t *s, size_t n,
448 const char *iso639_language,
451 u32_is_cased (const uint32_t *s, size_t n,
452 const char *iso639_language,
456 /* ========================================================================= */
462 #endif /* _UNICASE_H */