[CVE-2021-3517] Validate UTF8 in xmlEncodeEntities 99/286699/1
authorJoel Hockey <joel.hockey@gmail.com>
Mon, 17 Aug 2020 00:19:35 +0000 (17:19 -0700)
committerDongHun Kwak <dh0128.kwak@samsung.com>
Thu, 12 Jan 2023 01:07:45 +0000 (10:07 +0900)
Code is currently assuming UTF-8 without validating. Truncated UTF-8
input can cause out-of-bounds array access.

Adds further checks to partial fix in 50f06b3e.

Fixes #178

Change-Id: Ie12b322068d4550475a04fc5976a79e8a38231f9
Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
entities.c

index d575e9d..7cdbc4d 100644 (file)
@@ -666,11 +666,25 @@ xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
            } else {
                /*
                 * We assume we have UTF-8 input.
+                * It must match either:
+                *   110xxxxx 10xxxxxx
+                *   1110xxxx 10xxxxxx 10xxxxxx
+                *   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+                * That is:
+                *   cur[0] is 11xxxxxx
+                *   cur[1] is 10xxxxxx
+                *   cur[2] is 10xxxxxx if cur[0] is 111xxxxx
+                *   cur[3] is 10xxxxxx if cur[0] is 1111xxxx
+                *   cur[0] is not 11111xxx
                 */
                char buf[11], *ptr;
                int val = 0, l = 1;
 
-               if (*cur < 0xC0) {
+               if (((cur[0] & 0xC0) != 0xC0) ||
+                   ((cur[1] & 0xC0) != 0x80) ||
+                   (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) ||
+                   (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) ||
+                   (((cur[0] & 0xF8) == 0xF8))) {
                    xmlEntitiesErr(XML_CHECK_NOT_UTF8,
                            "xmlEncodeEntities: input not UTF-8");
                    if (doc != NULL)