1 // SPDX-License-Identifier: GPL-2.0+
3 * charset conversion utils
5 * Copyright (c) 2017 Rob Clark
10 #include <capitalization.h>
12 #include <efi_loader.h>
17 * codepage_437 - Unicode to codepage 437 translation table
19 const u16 codepage_437[128] = CP437;
21 static struct capitalization_table capitalization_table[] =
22 #ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
23 UNICODE_CAPITALIZATION_TABLE;
24 #elif CONFIG_FAT_DEFAULT_CODEPAGE == 1250
25 CP1250_CAPITALIZATION_TABLE;
27 CP437_CAPITALIZATION_TABLE;
31 * get_code() - read Unicode code point from UTF-8 stream
33 * @read_u8: - stream reader
34 * @src: - string buffer passed to stream reader, optional
35 * Return: - Unicode code point
37 static int get_code(u8 (*read_u8)(void *data), void *data)
44 if (ch >= 0xc2 && ch <= 0xf4) {
53 if (ch < 0x80 || ch > 0xbf)
61 if ((code >= 0xD800 && code <= 0xDFFF) ||
65 if (ch < 0x80 || ch > 0xbf)
68 /* 0xc0 - 0xdf or continuation byte (0x80 - 0xbf) */
72 if (ch < 0x80 || ch > 0xbf)
76 } else if (ch >= 0x80) {
85 * read_string() - read byte from character string
87 * @data: - pointer to string
90 * The string pointer is incremented if it does not point to '\0'.
92 static u8 read_string(void *data)
95 const char **src = (const char **)data;
98 if (!src || !*src || !**src)
106 * read_console() - read byte from console
108 * @data - not used, needed to match interface
109 * Return: - byte read or 0 on error
111 static u8 read_console(void *data)
121 int console_read_unicode(s32 *code)
124 /* No input available */
128 /* Read Unicode code */
129 *code = get_code(read_console, NULL);
133 s32 utf8_get(const char **src)
135 return get_code(read_string, src);
138 int utf8_put(s32 code, char **dst)
142 if ((code >= 0xD800 && code <= 0xDFFF) || code >= 0x110000)
144 if (code <= 0x007F) {
147 if (code <= 0x07FF) {
148 **dst = code >> 6 | 0xC0;
150 if (code < 0x10000) {
151 **dst = code >> 12 | 0xE0;
153 **dst = code >> 18 | 0xF0;
155 **dst = (code >> 12 & 0x3F) | 0x80;
158 **dst = (code >> 6 & 0x3F) | 0x80;
161 **dst = (code & 0x3F) | 0x80;
167 size_t utf8_utf16_strnlen(const char *src, size_t count)
171 for (; *src && count; --count) {
172 s32 code = utf8_get(&src);
177 /* Reserve space for a replacement character */
179 } else if (code < 0x10000) {
188 int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count)
190 if (!src || !dst || !*dst)
193 for (; count && *src; --count) {
194 s32 code = utf8_get(&src);
198 utf16_put(code, dst);
204 s32 utf16_get(const u16 **src)
214 if (code >= 0xDC00 && code <= 0xDFFF)
216 if (code >= 0xD800 && code <= 0xDBFF) {
224 if (code2 <= 0xDC00 || code2 >= 0xDFFF)
232 int utf16_put(s32 code, u16 **dst)
236 if ((code >= 0xD800 && code <= 0xDFFF) || code >= 0x110000)
238 if (code < 0x10000) {
242 **dst = code >> 10 | 0xD800;
244 **dst = (code & 0x3ff) | 0xDC00;
250 size_t utf16_strnlen(const u16 *src, size_t count)
254 for (; *src && count; --count) {
255 s32 code = utf16_get(&src);
260 * In case of an illegal sequence still reserve space for a
261 * replacement character.
268 size_t utf16_utf8_strnlen(const u16 *src, size_t count)
272 for (; *src && count; --count) {
273 s32 code = utf16_get(&src);
278 /* Reserve space for a replacement character */
280 else if (code < 0x80)
282 else if (code < 0x800)
284 else if (code < 0x10000)
292 int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count)
294 if (!src || !dst || !*dst)
297 for (; count && *src; --count) {
298 s32 code = utf16_get(&src);
308 s32 utf_to_lower(const s32 code)
310 struct capitalization_table *pos = capitalization_table;
314 if (code >= 'A' && code <= 'Z')
318 for (; pos->upper; ++pos) {
319 if (pos->upper == code) {
327 s32 utf_to_upper(const s32 code)
329 struct capitalization_table *pos = capitalization_table;
333 if (code >= 'a' && code <= 'z')
337 for (; pos->lower; ++pos) {
338 if (pos->lower == code) {
347 * u16_strncmp() - compare two u16 string
349 * @s1: first string to compare
350 * @s2: second string to compare
351 * @n: maximum number of u16 to compare
352 * Return: 0 if the first n u16 are the same in s1 and s2
353 * < 0 if the first different u16 in s1 is less than the
354 * corresponding u16 in s2
355 * > 0 if the first different u16 in s1 is greater than the
356 * corresponding u16 in s2
358 int u16_strncmp(const u16 *s1, const u16 *s2, size_t n)
362 for (; n; --n, ++s1, ++s2) {
371 size_t u16_strlen(const void *in)
373 const char *pos = in;
376 for (; pos[0] || pos[1]; pos += 2)
378 ret = pos - (char *)in;
383 size_t __efi_runtime u16_strnlen(const u16 *in, size_t count)
386 for (i = 0; count-- && in[i]; i++);
390 size_t u16_strsize(const void *in)
392 return (u16_strlen(in) + 1) * sizeof(u16);
395 u16 *u16_strcpy(u16 *dest, const u16 *src)
399 for (;; dest++, src++) {
408 u16 *u16_strdup(const void *src)
415 len = (u16_strlen(src) + 1) * sizeof(u16);
419 memcpy(new, src, len);
424 /* Convert UTF-16 to UTF-8. */
425 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
427 uint32_t code_high = 0;
430 uint32_t code = *src++;
433 if (code >= 0xDC00 && code <= 0xDFFF) {
434 /* Surrogate pair. */
435 code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000;
437 *dest++ = (code >> 18) | 0xF0;
438 *dest++ = ((code >> 12) & 0x3F) | 0x80;
439 *dest++ = ((code >> 6) & 0x3F) | 0x80;
440 *dest++ = (code & 0x3F) | 0x80;
444 /* *src may be valid. Don't eat it. */
450 if (code <= 0x007F) {
452 } else if (code <= 0x07FF) {
453 *dest++ = (code >> 6) | 0xC0;
454 *dest++ = (code & 0x3F) | 0x80;
455 } else if (code >= 0xD800 && code <= 0xDBFF) {
458 } else if (code >= 0xDC00 && code <= 0xDFFF) {
461 } else if (code < 0x10000) {
462 *dest++ = (code >> 12) | 0xE0;
463 *dest++ = ((code >> 6) & 0x3F) | 0x80;
464 *dest++ = (code & 0x3F) | 0x80;
466 *dest++ = (code >> 18) | 0xF0;
467 *dest++ = ((code >> 12) & 0x3F) | 0x80;
468 *dest++ = ((code >> 6) & 0x3F) | 0x80;
469 *dest++ = (code & 0x3F) | 0x80;
478 * utf_to_cp() - translate Unicode code point to 8bit codepage
480 * Codepoints that do not exist in the codepage are rendered as question mark.
482 * @c: pointer to Unicode code point to be translated
483 * @codepage: Unicode to codepage translation table
484 * Return: 0 on success, -ENOENT if codepoint cannot be translated
486 int utf_to_cp(s32 *c, const u16 *codepage)
491 /* Look up codepage translation */
492 for (j = 0; j < 0x80; ++j) {
493 if (*c == codepage[j]) {