From: Mikhail Zabaluev Date: Thu, 27 May 2010 18:00:12 +0000 (-0400) Subject: Optimized the overlong sequence check in g_utf8_get_char_extended() X-Git-Tag: 2.25.8~58 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c3bc0f4f8fc5125a732f383671ee5bff939423d2;p=platform%2Fupstream%2Fglib.git Optimized the overlong sequence check in g_utf8_get_char_extended() Rather make it branch to get the due sequence length for the resulting character code, we can as well get the minimum code value in the initial branching. --- diff --git a/glib/gutf8.c b/glib/gutf8.c index 27b1e4c..d0badbb 100644 --- a/glib/gutf8.c +++ b/glib/gutf8.c @@ -704,20 +704,23 @@ g_utf8_strrchr (const char *p, /* Like g_utf8_get_char, but take a maximum length - * and return (gunichar)-2 on incomplete trailing character + * and return (gunichar)-2 on incomplete trailing character; + * also check for malformed or overlong sequences + * and return (gunichar)-1 in this case. */ static inline gunichar g_utf8_get_char_extended (const gchar *p, - gssize max_len) + gssize max_len) { guint i, len; + gunichar min_code; gunichar wc = (guchar) *p; if (wc < 0x80) { return wc; } - else if (wc < 0xc0) + else if (G_UNLIKELY (wc < 0xc0)) { return (gunichar)-1; } @@ -725,33 +728,38 @@ g_utf8_get_char_extended (const gchar *p, { len = 2; wc &= 0x1f; + min_code = 1 << 7; } else if (wc < 0xf0) { len = 3; wc &= 0x0f; + min_code = 1 << 11; } else if (wc < 0xf8) { len = 4; wc &= 0x07; + min_code = 1 << 16; } else if (wc < 0xfc) { len = 5; wc &= 0x03; + min_code = 1 << 21; } else if (wc < 0xfe) { len = 6; wc &= 0x01; + min_code = 1 << 26; } else { return (gunichar)-1; } - - if (max_len >= 0 && len > max_len) + + if (G_UNLIKELY (max_len >= 0 && len > max_len)) { for (i = 1; i < max_len; i++) { @@ -764,8 +772,8 @@ g_utf8_get_char_extended (const gchar *p, for (i = 1; i < len; ++i) { gunichar ch = ((guchar *)p)[i]; - - if ((ch & 0xc0) != 0x80) + + if (G_UNLIKELY ((ch & 0xc0) != 0x80)) { if (ch) return (gunichar)-1; @@ -777,9 +785,9 @@ g_utf8_get_char_extended (const gchar *p, wc |= (ch & 0x3f); } - if (UTF8_LENGTH(wc) != len) + if (G_UNLIKELY (wc < min_code)) return (gunichar)-1; - + return wc; }