1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * Copyright (c) 2014 SGI.
10 #include <linux/types.h>
11 #include <linux/export.h>
12 #include <linux/string.h>
13 #include <linux/module.h>
14 #include <linux/unicode.h>
16 int utf8version_is_supported(unsigned int version);
19 * Look for the correct const struct utf8data for a unicode version.
20 * Returns NULL if the version requested is too new.
22 * Two normalization forms are supported: nfdi and nfdicf.
25 * - Apply unicode normalization form NFD.
26 * - Remove any Default_Ignorable_Code_Point.
29 * - Apply unicode normalization form NFD.
30 * - Remove any Default_Ignorable_Code_Point.
31 * - Apply a full casefold (C + F).
33 extern const struct utf8data *utf8nfdi(unsigned int maxage);
34 extern const struct utf8data *utf8nfdicf(unsigned int maxage);
37 * Determine the length of the normalized from of the string,
38 * excluding any terminating NULL byte.
39 * Returns 0 if only ignorable code points are present.
40 * Returns -1 if the input is not valid UTF-8.
42 extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len);
44 /* Needed in struct utf8cursor below. */
45 #define UTF8HANGULLEAF (12)
48 * Cursor structure used by the normalizer.
51 const struct utf8data *data;
60 unsigned char hangul[UTF8HANGULLEAF];
64 * Initialize a utf8cursor to normalize a string.
65 * Returns 0 on success.
66 * Returns -1 on failure.
68 extern int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
70 extern int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
71 const char *s, size_t len);
74 * Get the next byte in the normalization.
75 * Returns a value > 0 && < 256 on success.
76 * Returns 0 when the end of the normalization is reached.
77 * Returns -1 if the string being normalized is not valid UTF-8.
79 extern int utf8byte(struct utf8cursor *u8c);
81 #endif /* UTF8NORM_H */