/* Like g_utf8_get_char, but take a maximum length
- * and return (gunichar)-2 on incomplete trailing character
+ * and return (gunichar)-2 on incomplete trailing character;
+ * also check for malformed or overlong sequences
+ * and return (gunichar)-1 in this case.
*/
static inline gunichar
g_utf8_get_char_extended (const gchar *p,
- gssize max_len)
+ gssize max_len)
{
guint i, len;
+ gunichar min_code;
gunichar wc = (guchar) *p;
if (wc < 0x80)
{
return wc;
}
- else if (wc < 0xc0)
+ else if (G_UNLIKELY (wc < 0xc0))
{
return (gunichar)-1;
}
{
len = 2;
wc &= 0x1f;
+ min_code = 1 << 7;
}
else if (wc < 0xf0)
{
len = 3;
wc &= 0x0f;
+ min_code = 1 << 11;
}
else if (wc < 0xf8)
{
len = 4;
wc &= 0x07;
+ min_code = 1 << 16;
}
else if (wc < 0xfc)
{
len = 5;
wc &= 0x03;
+ min_code = 1 << 21;
}
else if (wc < 0xfe)
{
len = 6;
wc &= 0x01;
+ min_code = 1 << 26;
}
else
{
return (gunichar)-1;
}
-
- if (max_len >= 0 && len > max_len)
+
+ if (G_UNLIKELY (max_len >= 0 && len > max_len))
{
for (i = 1; i < max_len; i++)
{
for (i = 1; i < len; ++i)
{
gunichar ch = ((guchar *)p)[i];
-
- if ((ch & 0xc0) != 0x80)
+
+ if (G_UNLIKELY ((ch & 0xc0) != 0x80))
{
if (ch)
return (gunichar)-1;
wc |= (ch & 0x3f);
}
- if (UTF8_LENGTH(wc) != len)
+ if (G_UNLIKELY (wc < min_code))
return (gunichar)-1;
-
+
return wc;
}