X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=src%2Futf8.c;h=d37ba8e301d7efb7a07dd63d2e26e29d33893836;hb=5b3774ace991a396752ff0a846fdfb5c38424551;hp=a7fa82e271f2bcdaf35c3f6d5c9abc9105145eb7;hpb=767fa86d42a5e25e7043622d189247e02a5ca379;p=platform%2Fupstream%2Flibxkbcommon.git diff --git a/src/utf8.c b/src/utf8.c index a7fa82e..d37ba8e 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -24,12 +24,19 @@ * Author: Rob Bradford */ +#include "config.h" + #include #include #include #include "utf8.h" +/* Conformant encoding form conversion from UTF-32 to UTF-8. + * + * See https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G28875 + * for further details. +*/ int utf32_to_utf8(uint32_t unichar, char *buffer) { @@ -45,21 +52,20 @@ utf32_to_utf8(uint32_t unichar, char *buffer) length = 2; head = 0xc0; } + /* Handle surrogates */ + else if (0xd800 <= unichar && unichar <= 0xdfff) { + goto ill_formed_code_unit_subsequence; + } else if (unichar <= 0xffff) { length = 3; head = 0xe0; } - else if (unichar <= 0x1fffff) { + else if (unichar <= 0x10ffff) { length = 4; head = 0xf0; } - else if (unichar <= 0x3ffffff) { - length = 5; - head = 0xf8; - } else { - length = 6; - head = 0xfc; + goto ill_formed_code_unit_subsequence; } for (count = length - 1, shift = 0; count > 0; count--, shift += 6) @@ -69,6 +75,10 @@ utf32_to_utf8(uint32_t unichar, char *buffer) buffer[length] = '\0'; return length + 1; + +ill_formed_code_unit_subsequence: + buffer[0] = '\0'; + return 0; } bool