*
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
*/
-static void
+static void LIBXML_ATTR_FORMAT(3,0)
htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, const xmlChar *str1, const xmlChar *str2)
{
*
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
*/
-static void
+static void LIBXML_ATTR_FORMAT(3,0)
htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, int val)
{
#define UPP(val) (toupper(ctxt->input->cur[(val)]))
#define CUR_PTR ctxt->input->cur
+#define BASE_PTR ctxt->input->base
#define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
(*in == '_') || (*in == '-') ||
(*in == ':') || (*in == '.'))
in++;
+
+ if (in == ctxt->input->end)
+ return(NULL);
+
if ((*in > 0) && (*in < 0x80)) {
count = in - ctxt->input->cur;
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
int len = 0, l;
int c;
int count = 0;
+ const xmlChar *base = ctxt->input->base;
/*
* Handler for more complex cases
len += l;
NEXTL(l);
c = CUR_CHAR(l);
+ if (ctxt->input->base != base) {
+ /*
+ * We changed encoding from an unknown encoding
+ * Input buffer changed location, so we better start again
+ */
+ return(htmlParseNameComplex(ctxt));
+ }
}
+
+ if (ctxt->input->base > ctxt->input->cur - len)
+ return(NULL);
+
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
}
static xmlChar *
htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
- const xmlChar *q;
+ size_t len = 0, startPosition = 0;
xmlChar *ret = NULL;
if (CUR == '"') {
NEXT;
- q = CUR_PTR;
- while ((IS_CHAR_CH(CUR)) && (CUR != '"'))
+
+ if (CUR_PTR < BASE_PTR)
+ return(ret);
+ startPosition = CUR_PTR - BASE_PTR;
+
+ while ((IS_CHAR_CH(CUR)) && (CUR != '"')) {
NEXT;
+ len++;
+ }
if (!IS_CHAR_CH(CUR)) {
htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
"Unfinished SystemLiteral\n", NULL, NULL);
} else {
- ret = xmlStrndup(q, CUR_PTR - q);
+ ret = xmlStrndup((BASE_PTR+startPosition), len);
NEXT;
}
} else if (CUR == '\'') {
NEXT;
- q = CUR_PTR;
- while ((IS_CHAR_CH(CUR)) && (CUR != '\''))
+
+ if (CUR_PTR < BASE_PTR)
+ return(ret);
+ startPosition = CUR_PTR - BASE_PTR;
+
+ while ((IS_CHAR_CH(CUR)) && (CUR != '\'')) {
NEXT;
+ len++;
+ }
if (!IS_CHAR_CH(CUR)) {
htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
"Unfinished SystemLiteral\n", NULL, NULL);
} else {
- ret = xmlStrndup(q, CUR_PTR - q);
+ ret = xmlStrndup((BASE_PTR+startPosition), len);
NEXT;
}
} else {
static xmlChar *
htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
- const xmlChar *q;
+ size_t len = 0, startPosition = 0;
xmlChar *ret = NULL;
/*
* Name ::= (Letter | '_') (NameChar)*
*/
if (CUR == '"') {
NEXT;
- q = CUR_PTR;
- while (IS_PUBIDCHAR_CH(CUR)) NEXT;
+
+ if (CUR_PTR < BASE_PTR)
+ return(ret);
+ startPosition = CUR_PTR - BASE_PTR;
+
+ while (IS_PUBIDCHAR_CH(CUR)) {
+ len++;
+ NEXT;
+ }
+
if (CUR != '"') {
htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
"Unfinished PubidLiteral\n", NULL, NULL);
} else {
- ret = xmlStrndup(q, CUR_PTR - q);
+ ret = xmlStrndup((BASE_PTR + startPosition), len);
NEXT;
}
} else if (CUR == '\'') {
NEXT;
- q = CUR_PTR;
- while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\''))
- NEXT;
+
+ if (CUR_PTR < BASE_PTR)
+ return(ret);
+ startPosition = CUR_PTR - BASE_PTR;
+
+ while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\'')){
+ len++;
+ NEXT;
+ }
+
if (CUR != '\'') {
htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
"Unfinished PubidLiteral\n", NULL, NULL);
} else {
- ret = xmlStrndup(q, CUR_PTR - q);
+ ret = xmlStrndup((BASE_PTR + startPosition), len);
NEXT;
}
} else {
/**
- * htmlParseCharData:
+ * htmlParseCharDataInternal:
* @ctxt: an HTML parser context
+ * @readahead: optional read ahead character in ascii range
*
* parse a CharData section.
* if we are within a CDATA section ']]>' marks an end of section.
*/
static void
-htmlParseCharData(htmlParserCtxtPtr ctxt) {
- xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5];
+htmlParseCharDataInternal(htmlParserCtxtPtr ctxt, int readahead) {
+ xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 6];
int nbchar = 0;
int cur, l;
int chunk = 0;
+ if (readahead)
+ buf[nbchar++] = readahead;
+
SHRINK;
cur = CUR_CHAR(l);
while (((cur != '<') || (ctxt->token == '<')) &&
}
/**
+ * htmlParseCharData:
+ * @ctxt: an HTML parser context
+ *
+ * parse a CharData section.
+ * if we are within a CDATA section ']]>' marks an end of section.
+ *
+ * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
+ */
+
+static void
+htmlParseCharData(htmlParserCtxtPtr ctxt) {
+ htmlParseCharDataInternal(ctxt, 0);
+}
+
+/**
* htmlParseExternalID:
* @ctxt: an HTML parser context
* @publicID: a xmlChar** receiving PubidLiteral
ctxt->instate = state;
return;
}
+ len = 0;
+ buf[len] = 0;
q = CUR_CHAR(ql);
+ if (!IS_CHAR(q))
+ goto unfinished;
NEXTL(ql);
r = CUR_CHAR(rl);
+ if (!IS_CHAR(r))
+ goto unfinished;
NEXTL(rl);
cur = CUR_CHAR(l);
- len = 0;
while (IS_CHAR(cur) &&
((cur != '>') ||
(r != '-') || (q != '-'))) {
}
}
buf[len] = 0;
- if (!IS_CHAR(cur)) {
- htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
- "Comment not terminated \n<!--%.50s\n", buf, NULL);
- xmlFree(buf);
- } else {
+ if (IS_CHAR(cur)) {
NEXT;
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
(!ctxt->disableSAX))
ctxt->sax->comment(ctxt->userData, buf);
xmlFree(buf);
+ ctxt->instate = state;
+ return;
}
- ctxt->instate = state;
+
+unfinished:
+ htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
+ "Comment not terminated \n<!--%.50s\n", buf, NULL);
+ xmlFree(buf);
}
/**
htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
"htmlParseStartTag: invalid element name\n",
NULL, NULL);
+ /* if recover preserve text on classic misconstructs */
+ if ((ctxt->recovery) && ((IS_BLANK_CH(CUR)) || (CUR == '<') ||
+ (CUR == '=') || (CUR == '>') || (((CUR >= '0') && (CUR <= '9'))))) {
+ htmlParseCharDataInternal(ctxt, '<');
+ return(-1);
+ }
+
+
/* Dump the bogus tag like browsers do */
while ((IS_CHAR_CH(CUR)) && (CUR != '>') &&
(ctxt->instate != XML_PARSER_EOF))
if (ctxt->keepBlanks) {
if (ctxt->sax->characters != NULL)
ctxt->sax->characters(
- ctxt->userData, &cur, 1);
+ ctxt->userData, &in->cur[0], 1);
} else {
if (ctxt->sax->ignorableWhitespace != NULL)
ctxt->sax->ignorableWhitespace(
- ctxt->userData, &cur, 1);
+ ctxt->userData, &in->cur[0], 1);
}
} else {
htmlCheckParagraph(ctxt);
if (ctxt->sax->characters != NULL)
ctxt->sax->characters(
- ctxt->userData, &cur, 1);
+ ctxt->userData, &in->cur[0], 1);
}
}
ctxt->token = 0;
* DICT_FREE:
* @str: a string
*
- * Free a string if it is not owned by the "dict" dictionnary in the
+ * Free a string if it is not owned by the "dict" dictionary in the
* current scope
*/
#define DICT_FREE(str) \