1 // SPDX-License-Identifier: GPL-2.0+
3 * charset conversion utils
5 * Copyright (c) 2017 Rob Clark
10 #include <capitalization.h>
11 #include <efi_loader.h>
14 static struct capitalization_table capitalization_table[] =
15 #ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
16 UNICODE_CAPITALIZATION_TABLE;
17 #elif CONFIG_FAT_DEFAULT_CODEPAGE == 1250
18 CP1250_CAPITALIZATION_TABLE;
20 CP437_CAPITALIZATION_TABLE;
24 * get_code() - read Unicode code point from UTF-8 stream
26 * @read_u8: - stream reader
27 * @src: - string buffer passed to stream reader, optional
28 * Return: - Unicode code point
30 static int get_code(u8 (*read_u8)(void *data), void *data)
37 if (ch >= 0xc2 && ch <= 0xf4) {
46 if (ch < 0x80 || ch > 0xbf)
54 if ((code >= 0xD800 && code <= 0xDFFF) ||
58 if (ch < 0x80 || ch > 0xbf)
61 /* 0xc0 - 0xdf or continuation byte (0x80 - 0xbf) */
65 if (ch < 0x80 || ch > 0xbf)
69 } else if (ch >= 0x80) {
78 * read_string() - read byte from character string
80 * @data: - pointer to string
83 * The string pointer is incremented if it does not point to '\0'.
85 static u8 read_string(void *data)
88 const char **src = (const char **)data;
91 if (!src || !*src || !**src)
99 * read_console() - read byte from console
101 * @data - not used, needed to match interface
102 * Return: - byte read or 0 on error
104 static u8 read_console(void *data)
114 int console_read_unicode(s32 *code)
117 /* No input available */
121 /* Read Unicode code */
122 *code = get_code(read_console, NULL);
126 s32 utf8_get(const char **src)
128 return get_code(read_string, src);
131 int utf8_put(s32 code, char **dst)
135 if ((code >= 0xD800 && code <= 0xDFFF) || code >= 0x110000)
137 if (code <= 0x007F) {
140 if (code <= 0x07FF) {
141 **dst = code >> 6 | 0xC0;
143 if (code < 0x10000) {
144 **dst = code >> 12 | 0xE0;
146 **dst = code >> 18 | 0xF0;
148 **dst = (code >> 12 & 0x3F) | 0x80;
151 **dst = (code >> 6 & 0x3F) | 0x80;
154 **dst = (code & 0x3F) | 0x80;
160 size_t utf8_utf16_strnlen(const char *src, size_t count)
164 for (; *src && count; --count) {
165 s32 code = utf8_get(&src);
170 /* Reserve space for a replacement character */
172 } else if (code < 0x10000) {
181 int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count)
183 if (!src || !dst || !*dst)
186 for (; count && *src; --count) {
187 s32 code = utf8_get(&src);
191 utf16_put(code, dst);
197 s32 utf16_get(const u16 **src)
207 if (code >= 0xDC00 && code <= 0xDFFF)
209 if (code >= 0xD800 && code <= 0xDBFF) {
217 if (code2 <= 0xDC00 || code2 >= 0xDFFF)
225 int utf16_put(s32 code, u16 **dst)
229 if ((code >= 0xD800 && code <= 0xDFFF) || code >= 0x110000)
231 if (code < 0x10000) {
235 **dst = code >> 10 | 0xD800;
237 **dst = (code & 0x3ff) | 0xDC00;
243 size_t utf16_strnlen(const u16 *src, size_t count)
247 for (; *src && count; --count) {
248 s32 code = utf16_get(&src);
253 * In case of an illegal sequence still reserve space for a
254 * replacement character.
261 size_t utf16_utf8_strnlen(const u16 *src, size_t count)
265 for (; *src && count; --count) {
266 s32 code = utf16_get(&src);
271 /* Reserve space for a replacement character */
273 else if (code < 0x80)
275 else if (code < 0x800)
277 else if (code < 0x10000)
285 int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count)
287 if (!src || !dst || !*dst)
290 for (; count && *src; --count) {
291 s32 code = utf16_get(&src);
301 s32 utf_to_lower(const s32 code)
303 struct capitalization_table *pos = capitalization_table;
307 if (code >= 'A' && code <= 'Z')
311 for (; pos->upper; ++pos) {
312 if (pos->upper == code) {
320 s32 utf_to_upper(const s32 code)
322 struct capitalization_table *pos = capitalization_table;
326 if (code >= 'a' && code <= 'z')
330 for (; pos->lower; ++pos) {
331 if (pos->lower == code) {
340 * u16_strncmp() - compare two u16 string
342 * @s1: first string to compare
343 * @s2: second string to compare
344 * @n: maximum number of u16 to compare
345 * Return: 0 if the first n u16 are the same in s1 and s2
346 * < 0 if the first different u16 in s1 is less than the
347 * corresponding u16 in s2
348 * > 0 if the first different u16 in s1 is greater than the
349 * corresponding u16 in s2
351 int u16_strncmp(const u16 *s1, const u16 *s2, size_t n)
355 for (; n; --n, ++s1, ++s2) {
364 size_t u16_strlen(const void *in)
366 const char *pos = in;
369 for (; pos[0] || pos[1]; pos += 2)
371 ret = pos - (char *)in;
376 size_t __efi_runtime u16_strnlen(const u16 *in, size_t count)
379 for (i = 0; count-- && in[i]; i++);
383 size_t u16_strsize(const void *in)
385 return (u16_strlen(in) + 1) * sizeof(u16);
388 u16 *u16_strcpy(u16 *dest, const u16 *src)
392 for (;; dest++, src++) {
401 u16 *u16_strdup(const void *src)
408 len = (u16_strlen(src) + 1) * sizeof(u16);
412 memcpy(new, src, len);
417 /* Convert UTF-16 to UTF-8. */
418 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
420 uint32_t code_high = 0;
423 uint32_t code = *src++;
426 if (code >= 0xDC00 && code <= 0xDFFF) {
427 /* Surrogate pair. */
428 code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000;
430 *dest++ = (code >> 18) | 0xF0;
431 *dest++ = ((code >> 12) & 0x3F) | 0x80;
432 *dest++ = ((code >> 6) & 0x3F) | 0x80;
433 *dest++ = (code & 0x3F) | 0x80;
437 /* *src may be valid. Don't eat it. */
443 if (code <= 0x007F) {
445 } else if (code <= 0x07FF) {
446 *dest++ = (code >> 6) | 0xC0;
447 *dest++ = (code & 0x3F) | 0x80;
448 } else if (code >= 0xD800 && code <= 0xDBFF) {
451 } else if (code >= 0xDC00 && code <= 0xDFFF) {
454 } else if (code < 0x10000) {
455 *dest++ = (code >> 12) | 0xE0;
456 *dest++ = ((code >> 6) & 0x3F) | 0x80;
457 *dest++ = (code & 0x3F) | 0x80;
459 *dest++ = (code >> 18) | 0xF0;
460 *dest++ = ((code >> 12) & 0x3F) | 0x80;
461 *dest++ = ((code >> 6) & 0x3F) | 0x80;
462 *dest++ = (code & 0x3F) | 0x80;