Fix HTML or feed algorithm to not go past resource_length
authorGustavo Noronha Silva <gustavo.noronha@collabora.co.uk>
Mon, 13 Jul 2009 14:36:10 +0000 (15:36 +0100)
committerGustavo Noronha Silva <gns@gnome.org>
Mon, 13 Jul 2009 14:38:14 +0000 (15:38 +0100)
libsoup/soup-content-sniffer.c

index 7a39473..bf97136 100644 (file)
@@ -413,42 +413,68 @@ sniff_feed_or_html (SoupContentSniffer *sniffer, SoupMessage *msg, SoupBuffer *b
        int resource_length = MIN (512, buffer->length);
        int pos = 0;
 
+       if (resource_length < 3)
+               goto text_html;
+
        /* Skip a leading UTF-8 BOM */
        if (resource[0] == 0xEF && resource[1] == 0xBB && resource[2] == 0xBF)
                pos = 3;
 
  look_for_tag:
+       if (pos > resource_length)
+               goto text_html;
+
        /* Skip insignificant white space */
        while ((resource[pos] == '\x09') ||
               (resource[pos] == '\x20') ||
               (resource[pos] == '\x0A') ||
-              (resource[pos] == '\x0D'))
+              (resource[pos] == '\x0D')) {
                pos++;
 
+               if (pos > resource_length)
+                       goto text_html;
+       }
+
        /* != < */
        if (resource[pos] != '\x3C')
                return g_strdup ("text/html");
 
        pos++;
 
+       if ((pos + 2) > resource_length)
+               goto text_html;
+
        /* Skipping comments */
        if ((resource[pos] == '\x2D') ||
            (resource[pos+1] == '\x2D') ||
            (resource[pos+2] == '\x3E')) {
                pos = pos + 3;
 
+               if ((pos + 2) > resource_length)
+                       goto text_html;
+
                while ((resource[pos] != '\x2D') &&
                       (resource[pos+1] != '\x2D') &&
-                      (resource[pos+2] != '\x3E'))
+                      (resource[pos+2] != '\x3E')) {
                        pos++;
 
+                       if ((pos + 2) > resource_length)
+                               goto text_html;
+               }
+
                goto look_for_tag;
        }
 
+       if (pos > resource_length)
+               goto text_html;
+
        /* == ! */
        if (resource[pos] == '\x21') {
                do {
                        pos++;
+
+                       if (pos > resource_length)
+                               goto text_html;
                } while (resource[pos] != '\x3E');
 
                pos++;
@@ -457,6 +483,9 @@ sniff_feed_or_html (SoupContentSniffer *sniffer, SoupMessage *msg, SoupBuffer *b
        } else if (resource[pos] == '\x3F') { /* ? */
                do {
                        pos++;
+
+                       if ((pos + 1) > resource_length)
+                               goto text_html;
                } while ((resource[pos] != '\x3F') &&
                         (resource[pos+1] != '\x3E'));
 
@@ -465,17 +494,24 @@ sniff_feed_or_html (SoupContentSniffer *sniffer, SoupMessage *msg, SoupBuffer *b
                goto look_for_tag;
        }
 
+       if ((pos + 2) > resource_length)
+               goto text_html;
+
        if ((resource[pos] == '\x72') &&
            (resource[pos+1] == '\x73') &&
            (resource[pos+2] == '\x73'))
                return g_strdup ("application/rss+xml");
 
+       if ((pos + 3) > resource_length)
+               goto text_html;
+
        if ((resource[pos] == '\x66') &&
            (resource[pos+1] == '\x65') &&
            (resource[pos+2] == '\x65') &&
            (resource[pos+3] == '\x64'))
                return g_strdup ("application/atom+xml");
 
+ text_html:
        return g_strdup ("text/html");
 }