4 #include <asm/unaligned.h>
6 static inline int utf8_to_utf16le(const char *s, __le16 *cp, unsigned len)
13 * this insists on correct encodings, though not minimal ones.
14 * BUT it currently rejects legit 4-byte UTF-8 code points,
15 * which need surrogate pairs. (Unicode 3.1 can use them.)
17 while (len != 0 && (c = (u8) *s++) != 0) {
21 * 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
23 if ((c & 0xe0) == 0xc0) {
24 uchar = (c & 0x1f) << 6;
27 if ((c & 0xc0) != 0x80)
33 * 3-byte sequence (most CJKV characters):
34 * zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx
36 } else if ((c & 0xf0) == 0xe0) {
37 uchar = (c & 0x0f) << 12;
40 if ((c & 0xc0) != 0x80)
46 if ((c & 0xc0) != 0x80)
51 /* no bogus surrogates */
52 if (0xd800 <= uchar && uchar <= 0xdfff)
56 * 4-byte sequence (surrogate pairs, currently rare):
57 * 11101110wwwwzzzzyy + 110111yyyyxxxxxx
58 * = 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
60 * FIXME accept the surrogate code points (only)
66 put_unaligned_le16(uchar, cp++);
75 #endif /* _LINUX_UTF_H */