#include <linux/export.h>
#include <linux/string.h>
#include <linux/module.h>
+#include <linux/unicode.h>
-/* Encoding a unicode version number as a single unsigned int. */
-#define UNICODE_MAJ_SHIFT (16)
-#define UNICODE_MIN_SHIFT (8)
-
-#define UNICODE_AGE(MAJ, MIN, REV) \
- (((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) | \
- ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \
- ((unsigned int)(REV)))
-
-/* Highest unicode version supported by the data tables. */
-extern int utf8version_is_supported(u8 maj, u8 min, u8 rev);
-extern int utf8version_latest(void);
-
-/*
- * Look for the correct const struct utf8data for a unicode version.
- * Returns NULL if the version requested is too new.
- *
- * Two normalization forms are supported: nfdi and nfdicf.
- *
- * nfdi:
- * - Apply unicode normalization form NFD.
- * - Remove any Default_Ignorable_Code_Point.
- *
- * nfdicf:
- * - Apply unicode normalization form NFD.
- * - Remove any Default_Ignorable_Code_Point.
- * - Apply a full casefold (C + F).
- */
-extern const struct utf8data *utf8nfdi(unsigned int maxage);
-extern const struct utf8data *utf8nfdicf(unsigned int maxage);
-
-/*
- * Determine the maximum age of any unicode character in the string.
- * Returns 0 if only unassigned code points are present.
- * Returns -1 if the input is not valid UTF-8.
- */
-extern int utf8agemax(const struct utf8data *data, const char *s);
-extern int utf8nagemax(const struct utf8data *data, const char *s, size_t len);
-
-/*
- * Determine the minimum age of any unicode character in the string.
- * Returns 0 if any unassigned code points are present.
- * Returns -1 if the input is not valid UTF-8.
- */
-extern int utf8agemin(const struct utf8data *data, const char *s);
-extern int utf8nagemin(const struct utf8data *data, const char *s, size_t len);
+int utf8version_is_supported(const struct unicode_map *um, unsigned int version);
/*
* Determine the length of the normalized from of the string,
* Returns 0 if only ignorable code points are present.
* Returns -1 if the input is not valid UTF-8.
*/
-extern ssize_t utf8len(const struct utf8data *data, const char *s);
-extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len);
+ssize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n,
+ const char *s, size_t len);
/* Needed in struct utf8cursor below. */
#define UTF8HANGULLEAF (12)
* Cursor structure used by the normalizer.
*/
struct utf8cursor {
- const struct utf8data *data;
+ const struct unicode_map *um;
+ enum utf8_normalization n;
const char *s;
const char *p;
const char *ss;
* Returns 0 on success.
* Returns -1 on failure.
*/
-extern int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
- const char *s);
-extern int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
- const char *s, size_t len);
+int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um,
+ enum utf8_normalization n, const char *s, size_t len);
/*
* Get the next byte in the normalization.
*/
extern int utf8byte(struct utf8cursor *u8c);
+struct utf8data {
+ unsigned int maxage;
+ unsigned int offset;
+};
+
+struct utf8data_table {
+ const unsigned int *utf8agetab;
+ int utf8agetab_size;
+
+ const struct utf8data *utf8nfdicfdata;
+ int utf8nfdicfdata_size;
+
+ const struct utf8data *utf8nfdidata;
+ int utf8nfdidata_size;
+
+ const unsigned char *utf8data;
+};
+
+extern struct utf8data_table utf8_data_table;
+
#endif /* UTF8NORM_H */