#include <libxml/globals.h>
#include <libxml/uri.h>
+#include "buf.h"
+
/************************************************************************
* *
- * Getting/Setting encoding meta tags *
+ * Getting/Setting encoding meta tags *
* *
************************************************************************/
/**
* htmlGetMetaEncoding:
* @doc: the document
- *
+ *
* Encoding definition lookup in the Meta tags
*
* Returns the current encoding as flagged in the HTML source
found_content:
encoding = xmlStrstr(content, BAD_CAST"charset=");
- if (encoding == NULL)
+ if (encoding == NULL)
encoding = xmlStrstr(content, BAD_CAST"Charset=");
- if (encoding == NULL)
+ if (encoding == NULL)
encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
if (encoding != NULL) {
encoding += 8;
} else {
encoding = xmlStrstr(content, BAD_CAST"charset =");
- if (encoding == NULL)
+ if (encoding == NULL)
encoding = xmlStrstr(content, BAD_CAST"Charset =");
- if (encoding == NULL)
+ if (encoding == NULL)
encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
if (encoding != NULL)
encoding += 9;
* htmlSetMetaEncoding:
* @doc: the document
* @encoding: the encoding string
- *
+ *
* Sets the current encoding in the Meta tags
* NOTE: this will not change the document content encoding, just
* the META flag associated.
*/
int
htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
- htmlNodePtr cur, meta;
- const xmlChar *content;
+ htmlNodePtr cur, meta = NULL, head = NULL;
+ const xmlChar *content = NULL;
char newcontent[100];
+ newcontent[0] = 0;
if (doc == NULL)
return(-1);
+ /* html isn't a real encoding it's just libxml2 way to get entities */
+ if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
+ return(-1);
+
if (encoding != NULL) {
snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
(char *)encoding);
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
break;
- if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
+ if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
+ head = cur->parent;
goto found_meta;
+ }
}
cur = cur->next;
}
if (cur == NULL)
return(-1);
found_head:
- if (cur->children == NULL) {
- if (encoding == NULL)
- return(0);
- meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
- xmlAddChild(cur, meta);
- xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
- xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
- return(0);
- }
+ head = cur;
+ if (cur->children == NULL)
+ goto create;
cur = cur->children;
found_meta:
- if (encoding != NULL) {
- /*
- * Create a new Meta element with the right attributes
- */
-
- meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
- xmlAddPrevSibling(cur, meta);
- xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
- xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
- }
-
/*
- * Search and destroy all the remaining the meta elements carrying
+ * Search and update all the remaining the meta elements carrying
* encoding informations
*/
while (cur != NULL) {
if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
http = 1;
- else
+ else
{
- if ((value != NULL) &&
- (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
- content = value;
+ if ((value != NULL) &&
+ (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
+ content = value;
}
if ((http != 0) && (content != NULL))
break;
}
if ((http != 0) && (content != NULL)) {
meta = cur;
- cur = cur->next;
- xmlUnlinkNode(meta);
- xmlFreeNode(meta);
- continue;
+ break;
}
}
}
cur = cur->next;
}
+create:
+ if (meta == NULL) {
+ if ((encoding != NULL) && (head != NULL)) {
+ /*
+ * Create a new Meta element with the right attributes
+ */
+
+ meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
+ if (head->children == NULL)
+ xmlAddChild(head, meta);
+ else
+ xmlAddPrevSibling(head->children, meta);
+ xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
+ xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
+ }
+ } else {
+ /* remove the meta tag if NULL is passed */
+ if (encoding == NULL) {
+ xmlUnlinkNode(meta);
+ xmlFreeNode(meta);
+ }
+ /* change the document only if there is a real encoding change */
+ else if (xmlStrcasestr(content, encoding) == NULL) {
+ xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
+ }
+ }
+
+
return(0);
}
* @name: the name of the attribute to check
*
* Determine if a given attribute is a boolean attribute.
- *
+ *
* returns: false if the attribute is not boolean, true otherwise.
*/
int
}
#ifdef LIBXML_OUTPUT_ENABLED
+/*
+ * private routine exported from xmlIO.c
+ */
+xmlOutputBufferPtr
+xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
/************************************************************************
* *
- * Output error handlers *
+ * Output error handlers *
* *
************************************************************************/
/**
/************************************************************************
* *
- * Dumping HTML tree content to a simple buffer *
+ * Dumping HTML tree content to a simple buffer *
* *
************************************************************************/
-static int
-htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
- int format);
-
/**
- * htmlNodeDumpFormat:
- * @buf: the HTML buffer output
+ * htmlBufNodeDumpFormat:
+ * @buf: the xmlBufPtr output
* @doc: the document
* @cur: the current node
* @format: should formatting spaces been added
*
* Returns the number of byte written or -1 in case of error
*/
-static int
-htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
+static size_t
+htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
int format) {
- unsigned int use;
+ size_t use;
int ret;
xmlOutputBufferPtr outbuf;
outbuf->context = NULL;
outbuf->written = 0;
- use = buf->use;
+ use = xmlBufUse(buf);
htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
xmlFree(outbuf);
- ret = buf->use - use;
+ ret = xmlBufUse(buf) - use;
return (ret);
}
*/
int
htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
+ xmlBufPtr buffer;
+ size_t ret;
+
+ if ((buf == NULL) || (cur == NULL))
+ return(-1);
+
xmlInitParser();
+ buffer = xmlBufFromBuffer(buf);
+ if (buffer == NULL)
+ return(-1);
+
+ ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
- return(htmlNodeDumpFormat(buf, doc, cur, 1));
+ xmlBufBackToBuffer(buffer);
+
+ if (ret > INT_MAX)
+ return(-1);
+ return((int) ret);
}
/**
if (enc != XML_CHAR_ENCODING_UTF8) {
handler = xmlFindCharEncodingHandler(encoding);
if (handler == NULL)
- return(-1);
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
}
}
if (handler == NULL)
handler = xmlFindCharEncodingHandler("ascii");
- /*
+ /*
* save the content to a temp buffer.
*/
buf = xmlOutputBufferCreateFile(out, handler);
}
handler = xmlFindCharEncodingHandler(encoding);
- if (handler == NULL) {
- *mem = NULL;
- *size = 0;
- return;
- }
+ if (handler == NULL)
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
+
} else {
handler = xmlFindCharEncodingHandler(encoding);
}
if (handler == NULL)
handler = xmlFindCharEncodingHandler("ascii");
- buf = xmlAllocOutputBuffer(handler);
+ buf = xmlAllocOutputBufferInternal(handler);
if (buf == NULL) {
*mem = NULL;
*size = 0;
return;
}
- htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
+ htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
xmlOutputBufferFlush(buf);
if (buf->conv != NULL) {
- *size = buf->conv->use;
- *mem = xmlStrndup(buf->conv->content, *size);
+ *size = xmlBufUse(buf->conv);
+ *mem = xmlStrndup(xmlBufContent(buf->conv), *size);
} else {
- *size = buf->buffer->use;
- *mem = xmlStrndup(buf->buffer->content, *size);
+ *size = xmlBufUse(buf->buffer);
+ *mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
}
(void)xmlOutputBufferClose(buf);
}
/************************************************************************
* *
- * Dumping HTML tree content to an I/O output buffer *
+ * Dumping HTML tree content to an I/O output buffer *
* *
************************************************************************/
* @buf: the HTML buffer output
* @doc: the document
* @encoding: the encoding string
- *
+ *
* TODO: check whether encoding is needed
*
* Dump the HTML document DTD, if any.
xmlOutputBufferWriteString(buf, (const char *)cur->name);
if (cur->ExternalID != NULL) {
xmlOutputBufferWriteString(buf, " PUBLIC ");
- xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
+ xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
if (cur->SystemID != NULL) {
xmlOutputBufferWriteString(buf, " ");
- xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
- }
- } else if (cur->SystemID != NULL) {
+ xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
+ }
+ } else if (cur->SystemID != NULL &&
+ xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
xmlOutputBufferWriteString(buf, " SYSTEM ");
- xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
+ xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
}
xmlOutputBufferWriteString(buf, ">\n");
}
xmlChar *value;
/*
- * TODO: The html output method should not escape a & character
- * occurring in an attribute value immediately followed by
- * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
+ * The html output method should not escape a & character
+ * occurring in an attribute value immediately followed by
+ * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
+ * This is implemented in xmlEncodeEntitiesReentrant
*/
if (cur == NULL) {
(!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
(!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
- xmlChar *escaped;
xmlChar *tmp = value;
+ /* xmlURIEscapeStr() escapes '"' so it can be safely used. */
+ xmlBufCCat(buf->buffer, "\"");
while (IS_BLANK_CH(*tmp)) tmp++;
- escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
- if (escaped != NULL) {
- xmlBufferWriteQuotedString(buf->buffer, escaped);
- xmlFree(escaped);
- } else {
- xmlBufferWriteQuotedString(buf->buffer, value);
+ /* URI Escape everything, except server side includes. */
+ for ( ; ; ) {
+ xmlChar *escaped;
+ xmlChar endChar;
+ xmlChar *end = NULL;
+ xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--");
+ if (start != NULL) {
+ end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->");
+ if (end != NULL) {
+ *start = '\0';
+ }
+ }
+
+ /* Escape the whole string, or until start (set to '\0'). */
+ escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
+ if (escaped != NULL) {
+ xmlBufCat(buf->buffer, escaped);
+ xmlFree(escaped);
+ } else {
+ xmlBufCat(buf->buffer, tmp);
+ }
+
+ if (end == NULL) { /* Everything has been written. */
+ break;
+ }
+
+ /* Do not escape anything within server side includes. */
+ *start = '<'; /* Restore the first character of "<!--". */
+ end += 3; /* strlen("-->") */
+ endChar = *end;
+ *end = '\0';
+ xmlBufCat(buf->buffer, start);
+ *end = endChar;
+ tmp = end;
}
+
+ xmlBufCCat(buf->buffer, "\"");
} else {
- xmlBufferWriteQuotedString(buf->buffer, value);
+ xmlBufWriteQuotedString(buf->buffer, value);
}
xmlFree(value);
} else {
htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
return;
}
+ if (cur->type == XML_ATTRIBUTE_NODE) {
+ htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
+ return;
+ }
if (cur->type == HTML_TEXT_NODE) {
if (cur->content != NULL) {
if (((cur->name == (const xmlChar *)xmlStringText) ||
handler = xmlFindCharEncodingHandler(encoding);
if (handler == NULL)
- return(-1);
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
} else {
handler = xmlFindCharEncodingHandler(encoding);
}
if ((cur == NULL) || (filename == NULL))
return(-1);
-
+
xmlInitParser();
encoding = (const char *) htmlGetMetaEncoding(cur);
handler = xmlFindCharEncodingHandler(encoding);
if (handler == NULL)
- return(-1);
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
}
}
if (handler == NULL)
handler = xmlFindCharEncodingHandler("ascii");
- /*
+ /*
* save the content to a temp buffer.
*/
buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
* @encoding: the document encoding
*
* Dump an HTML document to a file using a given encoding.
- *
+ *
* returns: the number of byte written or -1 in case of failure.
*/
int
if ((cur == NULL) || (filename == NULL))
return(-1);
-
+
xmlInitParser();
if (encoding != NULL) {
handler = xmlFindCharEncodingHandler(encoding);
if (handler == NULL)
- return(-1);
- htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
}
+ htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
} else {
htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
}
if (handler == NULL)
handler = xmlFindCharEncodingHandler("ascii");
- /*
+ /*
* save the content to a temp buffer.
*/
buf = xmlOutputBufferCreateFilename(filename, handler, 0);
*
* Dump an HTML document to a file using a given encoding
* and formatting returns/spaces are added.
- *
+ *
* returns: the number of byte written or -1 in case of failure.
*/
int