Imported Upstream version 2.9.1
[platform/upstream/libxml2.git] / HTMLparser.c
index 66ff17b..dd0c1ea 100644 (file)
@@ -44,6 +44,9 @@
 #include <libxml/globals.h>
 #include <libxml/uri.h>
 
+#include "buf.h"
+#include "enc.h"
+
 #define HTML_MAX_NAMELEN 1000
 #define HTML_PARSER_BIG_BUFFER_SIZE 1000
 #define HTML_PARSER_BUFFER_SIZE 100
@@ -1082,7 +1085,7 @@ static const char * const htmlStartClose[] = {
 "div",         "p", "head", NULL,
 "noscript",    "p", NULL,
 "center",      "font", "b", "i", "p", "head", NULL,
-"a",           "a", NULL,
+"a",           "a", "head", NULL,
 "caption",     "p", NULL,
 "colgroup",    "caption", "colgroup", "col", "p", NULL,
 "col",         "caption", "col", "p", NULL,
@@ -1100,6 +1103,43 @@ static const char * const htmlStartClose[] = {
 "option",      "option", NULL,
 "fieldset",    "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",
                "pre", "listing", "xmp", "a", NULL,
+/* most tags in in FONTSTYLE, PHRASE and SPECIAL should close <head> */
+"tt",          "head", NULL,
+"i",           "head", NULL,
+"b",           "head", NULL,
+"u",           "head", NULL,
+"s",           "head", NULL,
+"strike",      "head", NULL,
+"big",         "head", NULL,
+"small",       "head", NULL,
+
+"em",          "head", NULL,
+"strong",      "head", NULL,
+"dfn",         "head", NULL,
+"code",                "head", NULL,
+"samp",                "head", NULL,
+"kbd",         "head", NULL,
+"var",         "head", NULL,
+"cite",                "head", NULL,
+"abbr",                "head", NULL,
+"acronym",     "head", NULL,
+
+/* "a" */
+"img",         "head", NULL,
+/* "applet" */
+/* "embed" */
+/* "object" */
+"font",                "head", NULL,
+/* "basefont" */
+"br",          "head", NULL,
+/* "script" */
+"map",         "head", NULL,
+"q",           "head", NULL,
+"sub",         "head", NULL,
+"sup",         "head", NULL,
+"span",                "head", NULL,
+"bdo",         "head", NULL,
+"iframe",      "head", NULL,
 NULL
 };
 
@@ -2941,9 +2981,14 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) {
             */
            if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
                if (areBlanks(ctxt, buf, nbchar)) {
-                   if (ctxt->sax->ignorableWhitespace != NULL)
-                       ctxt->sax->ignorableWhitespace(ctxt->userData,
-                                                      buf, nbchar);
+                   if (ctxt->keepBlanks) {
+                       if (ctxt->sax->characters != NULL)
+                           ctxt->sax->characters(ctxt->userData, buf, nbchar);
+                   } else {
+                       if (ctxt->sax->ignorableWhitespace != NULL)
+                           ctxt->sax->ignorableWhitespace(ctxt->userData,
+                                                          buf, nbchar);
+                   }
                } else {
                    htmlCheckParagraph(ctxt);
                    if (ctxt->sax->characters != NULL)
@@ -2974,8 +3019,14 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) {
         */
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
            if (areBlanks(ctxt, buf, nbchar)) {
-               if (ctxt->sax->ignorableWhitespace != NULL)
-                   ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
+               if (ctxt->keepBlanks) {
+                   if (ctxt->sax->characters != NULL)
+                       ctxt->sax->characters(ctxt->userData, buf, nbchar);
+               } else {
+                   if (ctxt->sax->ignorableWhitespace != NULL)
+                       ctxt->sax->ignorableWhitespace(ctxt->userData,
+                                                      buf, nbchar);
+               }
            } else {
                htmlCheckParagraph(ctxt);
                if (ctxt->sax->characters != NULL)
@@ -3509,19 +3560,14 @@ htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) {
             * convert as much as possible to the parser reading buffer.
             */
            processed = ctxt->input->cur - ctxt->input->base;
-           xmlBufferShrink(ctxt->input->buf->buffer, processed);
-           nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
-                                      ctxt->input->buf->buffer,
-                                      ctxt->input->buf->raw);
+           xmlBufShrink(ctxt->input->buf->buffer, processed);
+           nbchars = xmlCharEncInput(ctxt->input->buf, 1);
            if (nbchars < 0) {
                htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
                             "htmlCheckEncoding: encoder error\n",
                             NULL, NULL);
            }
-           ctxt->input->base =
-           ctxt->input->cur = ctxt->input->buf->buffer->content;
-            ctxt->input->end =
-                          &ctxt->input->base[ctxt->input->buf->buffer->use];
+            xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input);
        }
     }
 }
@@ -4906,9 +4952,7 @@ htmlCreateMemoryParserCtxt(const char *buffer, int size) {
 
     input->filename = NULL;
     input->buf = buf;
-    input->base = input->buf->buffer->content;
-    input->cur = input->buf->buffer->content;
-    input->end = &input->buf->buffer->content[input->buf->buffer->use];
+    xmlBufResetInput(buf->buffer, input);
 
     inputPush(ctxt, input);
     return(ctxt);
@@ -5025,8 +5069,8 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
         buf = in->base;
         len = in->length;
     } else {
-        buf = in->buf->buffer->content;
-        len = in->buf->buffer->use;
+        buf = xmlBufContent(in->buf->buffer);
+        len = xmlBufUse(in->buf->buffer);
     }
 
     /* take into account the sequence length */
@@ -5118,13 +5162,13 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
  * @stop: Array of chars, which stop the lookup.
  * @stopLen: Length of stop-Array
  *
- * Try to find if any char of the stop-Array is available in the input 
+ * Try to find if any char of the stop-Array is available in the input
  * stream.
  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
  * to avoid rescanning sequences of bytes, it DOES change the state of the
  * parser, do not use liberally.
  *
- * Returns the index to the current parsing point if a stopChar 
+ * Returns the index to the current parsing point if a stopChar
  *      is available, -1 otherwise.
  */
 static int
@@ -5152,8 +5196,8 @@ htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
         buf = in->base;
         len = in->length;
     } else {
-        buf = in->buf->buffer->content;
-        len = in->buf->buffer->use;
+        buf = xmlBufContent(in->buf->buffer);
+        len = xmlBufUse(in->buf->buffer);
     }
 
     for (; base < len; base++) {
@@ -5264,7 +5308,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
        if (in->buf == NULL)
            avail = in->length - (in->cur - in->base);
        else
-           avail = in->buf->buffer->use - (in->cur - in->base);
+           avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
        if ((avail == 0) && (terminate)) {
            htmlAutoCloseOnEnd(ctxt);
            if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
@@ -5300,7 +5344,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
                    if (in->buf == NULL)
                        avail = in->length - (in->cur - in->base);
                    else
-                       avail = in->buf->buffer->use - (in->cur - in->base);
+                       avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
                }
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
                    ctxt->sax->setDocumentLocator(ctxt->userData,
@@ -5342,7 +5386,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
                if (in->buf == NULL)
                    avail = in->length - (in->cur - in->base);
                else
-                   avail = in->buf->buffer->use - (in->cur - in->base);
+                   avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
                /*
                 * no chars in buffer
                 */
@@ -5415,7 +5459,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
                if (in->buf == NULL)
                    avail = in->length - (in->cur - in->base);
                else
-                   avail = in->buf->buffer->use - (in->cur - in->base);
+                   avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
                if (avail < 2)
                    goto done;
                cur = in->cur[0];
@@ -5456,7 +5500,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
                if (in->buf == NULL)
                    avail = in->length - (in->cur - in->base);
                else
-                   avail = in->buf->buffer->use - (in->cur - in->base);
+                   avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
                if (avail < 1)
                    goto done;
                cur = in->cur[0];
@@ -5654,9 +5698,15 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
                    if ((cur != '<') && (cur != '&')) {
                        if (ctxt->sax != NULL) {
                            if (IS_BLANK_CH(cur)) {
-                               if (ctxt->sax->ignorableWhitespace != NULL)
-                                   ctxt->sax->ignorableWhitespace(
-                                           ctxt->userData, &cur, 1);
+                               if (ctxt->keepBlanks) {
+                                   if (ctxt->sax->characters != NULL)
+                                       ctxt->sax->characters(
+                                               ctxt->userData, &cur, 1);
+                               } else {
+                                   if (ctxt->sax->ignorableWhitespace != NULL)
+                                       ctxt->sax->ignorableWhitespace(
+                                               ctxt->userData, &cur, 1);
+                               }
                            } else {
                                htmlCheckParagraph(ctxt);
                                if (ctxt->sax->characters != NULL)
@@ -5979,8 +6029,8 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
     }
     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
-       int base = ctxt->input->base - ctxt->input->buf->buffer->content;
-       int cur = ctxt->input->cur - ctxt->input->base;
+       size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
+       size_t cur = ctxt->input->cur - ctxt->input->base;
        int res;
 
        res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
@@ -5989,10 +6039,7 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
            ctxt->disableSAX = 1;
            return (XML_PARSER_EOF);
        }
-       ctxt->input->base = ctxt->input->buf->buffer->content + base;
-       ctxt->input->cur = ctxt->input->base + cur;
-       ctxt->input->end =
-         &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
+        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
 #ifdef DEBUG_PUSH
        xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
 #endif
@@ -6007,13 +6054,16 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
            if ((in->encoder != NULL) && (in->buffer != NULL) &&
                    (in->raw != NULL)) {
                int nbchars;
+               size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
+               size_t current = ctxt->input->cur - ctxt->input->base;
 
-               nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
+               nbchars = xmlCharEncInput(in, terminate);
                if (nbchars < 0) {
                    htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
                                 "encoder error\n", NULL, NULL);
                    return(XML_ERR_INVALID_ENCODING);
                }
+               xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
            }
        }
     }
@@ -6107,24 +6157,18 @@ htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,
        inputStream->filename = (char *)
            xmlCanonicPath((const xmlChar *) filename);
     inputStream->buf = buf;
-    inputStream->base = inputStream->buf->buffer->content;
-    inputStream->cur = inputStream->buf->buffer->content;
-    inputStream->end =
-       &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
+    xmlBufResetInput(buf->buffer, inputStream);
 
     inputPush(ctxt, inputStream);
 
     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
         (ctxt->input->buf != NULL))  {
-       int base = ctxt->input->base - ctxt->input->buf->buffer->content;
-       int cur = ctxt->input->cur - ctxt->input->base;
+       size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
+       size_t cur = ctxt->input->cur - ctxt->input->base;
 
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
 
-       ctxt->input->base = ctxt->input->buf->buffer->content + base;
-       ctxt->input->cur = ctxt->input->base + cur;
-       ctxt->input->end =
-           &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
+        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
 #ifdef DEBUG_PUSH
        xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
 #endif