From 15c316bcbc23b777eae4e21f129526190aa20af1 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Mon, 18 May 2020 15:07:15 -0400 Subject: [PATCH] efi/libstub: Get the exact UTF-8 length efi_convert_cmdline currently overestimates the length of the equivalent UTF-8 encoding. snprintf can now be used to do the conversion to UTF-8, however, it does not have a way to specify the size of the UTF-16 string, only the size of the resulting UTF-8 string. So in order to use it, we need to precalculate the exact UTF-8 size. Signed-off-by: Arvind Sankar Link: https://lore.kernel.org/r/20200518190716.751506-24-nivedita@alum.mit.edu Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/efi-stub-helper.c | 44 +++++++++++++++++++------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 0d00073..4d544f39 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -206,15 +206,6 @@ efi_status_t efi_parse_options(char const *cmdline) } /* - * Get the number of UTF-8 bytes corresponding to an UTF-16 character. - * This overestimates for surrogates, but that is okay. - */ -static int efi_utf8_bytes(u16 c) -{ - return 1 + (c >= 0x80) + (c >= 0x800); -} - -/* * Convert an UTF-16 string, not necessarily null terminated, to UTF-8. */ static u8 *efi_utf16_to_utf8(u8 *dst, const u16 *src, int n) @@ -274,10 +265,39 @@ char *efi_convert_cmdline(efi_loaded_image_t *image, if (options) { s2 = options; - while (*s2 && *s2 != '\n' - && options_chars < load_options_chars) { - options_bytes += efi_utf8_bytes(*s2++); + while (options_chars < load_options_chars) { + u16 c = *s2++; + + if (c == L'\0' || c == L'\n') + break; + /* + * Get the number of UTF-8 bytes corresponding to a + * UTF-16 character. + * The first part handles everything in the BMP. + */ + options_bytes += 1 + (c >= 0x80) + (c >= 0x800); options_chars++; + /* + * Add one more byte for valid surrogate pairs. Invalid + * surrogates will be replaced with 0xfffd and take up + * only 3 bytes. + */ + if ((c & 0xfc00) == 0xd800) { + /* + * If the very last word is a high surrogate, + * we must ignore it since we can't access the + * low surrogate. + */ + if (options_chars == load_options_chars) { + options_bytes -= 3; + options_chars--; + break; + } else if ((*s2 & 0xfc00) == 0xdc00) { + options_bytes++; + options_chars++; + s2++; + } + } } } -- 2.7.4