2 * HTMLtree.c : implementation of access function for an HTML tree.
4 * See Copyright for the status of this software.
12 #ifdef LIBXML_HTML_ENABLED
14 #include <string.h> /* for memset() only ! */
23 #include <libxml/xmlmemory.h>
24 #include <libxml/HTMLparser.h>
25 #include <libxml/HTMLtree.h>
26 #include <libxml/entities.h>
27 #include <libxml/valid.h>
28 #include <libxml/xmlerror.h>
29 #include <libxml/parserInternals.h>
30 #include <libxml/globals.h>
31 #include <libxml/uri.h>
35 /************************************************************************
37 * Getting/Setting encoding meta tags *
39 ************************************************************************/
42 * htmlGetMetaEncoding:
45 * Encoding definition lookup in the Meta tags
47 * Returns the current encoding as flagged in the HTML source
50 htmlGetMetaEncoding(htmlDocPtr doc) {
52 const xmlChar *content;
53 const xmlChar *encoding;
63 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
64 if (xmlStrEqual(cur->name, BAD_CAST"html"))
66 if (xmlStrEqual(cur->name, BAD_CAST"head"))
68 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
81 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
82 if (xmlStrEqual(cur->name, BAD_CAST"head"))
84 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
95 * Search the meta elements
99 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
100 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
101 xmlAttrPtr attr = cur->properties;
103 const xmlChar *value;
107 while (attr != NULL) {
108 if ((attr->children != NULL) &&
109 (attr->children->type == XML_TEXT_NODE) &&
110 (attr->children->next == NULL)) {
111 value = attr->children->content;
112 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
113 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
115 else if ((value != NULL)
116 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
118 if ((http != 0) && (content != NULL))
130 encoding = xmlStrstr(content, BAD_CAST"charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"Charset=");
133 if (encoding == NULL)
134 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
135 if (encoding != NULL) {
138 encoding = xmlStrstr(content, BAD_CAST"charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"Charset =");
141 if (encoding == NULL)
142 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
143 if (encoding != NULL)
146 if (encoding != NULL) {
147 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
153 * htmlSetMetaEncoding:
155 * @encoding: the encoding string
157 * Sets the current encoding in the Meta tags
158 * NOTE: this will not change the document content encoding, just
159 * the META flag associated.
161 * Returns 0 in case of success and -1 in case of error
164 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
165 htmlNodePtr cur, meta = NULL, head = NULL;
166 const xmlChar *content = NULL;
167 char newcontent[100];
174 /* html isn't a real encoding it's just libxml2 way to get entities */
175 if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
178 if (encoding != NULL) {
179 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
181 newcontent[sizeof(newcontent) - 1] = 0;
189 while (cur != NULL) {
190 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
191 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
193 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
195 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
207 while (cur != NULL) {
208 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
209 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
211 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
222 if (cur->children == NULL)
228 * Search and update all the remaining the meta elements carrying
229 * encoding informations
231 while (cur != NULL) {
232 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
233 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
234 xmlAttrPtr attr = cur->properties;
236 const xmlChar *value;
240 while (attr != NULL) {
241 if ((attr->children != NULL) &&
242 (attr->children->type == XML_TEXT_NODE) &&
243 (attr->children->next == NULL)) {
244 value = attr->children->content;
245 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
246 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
250 if ((value != NULL) &&
251 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
254 if ((http != 0) && (content != NULL))
259 if ((http != 0) && (content != NULL)) {
270 if ((encoding != NULL) && (head != NULL)) {
272 * Create a new Meta element with the right attributes
275 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
276 if (head->children == NULL)
277 xmlAddChild(head, meta);
279 xmlAddPrevSibling(head->children, meta);
280 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
281 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
284 /* remove the meta tag if NULL is passed */
285 if (encoding == NULL) {
289 /* change the document only if there is a real encoding change */
290 else if (xmlStrcasestr(content, encoding) == NULL) {
291 xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
302 * These are the HTML attributes which will be output
303 * in minimized form, i.e. <option selected="selected"> will be
304 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
307 static const char* htmlBooleanAttrs[] = {
308 "checked", "compact", "declare", "defer", "disabled", "ismap",
309 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
316 * @name: the name of the attribute to check
318 * Determine if a given attribute is a boolean attribute.
320 * returns: false if the attribute is not boolean, true otherwise.
323 htmlIsBooleanAttr(const xmlChar *name)
327 while (htmlBooleanAttrs[i] != NULL) {
328 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
335 #ifdef LIBXML_OUTPUT_ENABLED
337 * private routine exported from xmlIO.c
340 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
341 /************************************************************************
343 * Output error handlers *
345 ************************************************************************/
348 * @extra: extra informations
350 * Handle an out of memory condition
353 htmlSaveErrMemory(const char *extra)
355 __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
360 * @code: the error number
361 * @node: the location of the error.
362 * @extra: extra informations
364 * Handle an out of memory condition
367 htmlSaveErr(int code, xmlNodePtr node, const char *extra)
369 const char *msg = NULL;
372 case XML_SAVE_NOT_UTF8:
373 msg = "string is not in UTF-8\n";
375 case XML_SAVE_CHAR_INVALID:
376 msg = "invalid character value\n";
378 case XML_SAVE_UNKNOWN_ENCODING:
379 msg = "unknown encoding %s\n";
381 case XML_SAVE_NO_DOCTYPE:
382 msg = "HTML has no DOCTYPE\n";
385 msg = "unexpected error number\n";
387 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
390 /************************************************************************
392 * Dumping HTML tree content to a simple buffer *
394 ************************************************************************/
397 * htmlBufNodeDumpFormat:
398 * @buf: the xmlBufPtr output
400 * @cur: the current node
401 * @format: should formatting spaces been added
403 * Dump an HTML node, recursive behaviour,children are printed too.
405 * Returns the number of byte written or -1 in case of error
408 htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
412 xmlOutputBufferPtr outbuf;
420 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
421 if (outbuf == NULL) {
422 htmlSaveErrMemory("allocating HTML output buffer");
425 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
426 outbuf->buffer = buf;
427 outbuf->encoder = NULL;
428 outbuf->writecallback = NULL;
429 outbuf->closecallback = NULL;
430 outbuf->context = NULL;
433 use = xmlBufUse(buf);
434 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
436 ret = xmlBufUse(buf) - use;
442 * @buf: the HTML buffer output
444 * @cur: the current node
446 * Dump an HTML node, recursive behaviour,children are printed too,
447 * and formatting returns are added.
449 * Returns the number of byte written or -1 in case of error
452 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
456 if ((buf == NULL) || (cur == NULL))
460 buffer = xmlBufFromBuffer(buf);
464 ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
466 xmlBufBackToBuffer(buffer);
474 * htmlNodeDumpFileFormat:
475 * @out: the FILE pointer
477 * @cur: the current node
478 * @encoding: the document encoding
479 * @format: should formatting spaces been added
481 * Dump an HTML node, recursive behaviour,children are printed too.
483 * TODO: if encoding == NULL try to save in the doc encoding
485 * returns: the number of byte written or -1 in case of failure.
488 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
489 xmlNodePtr cur, const char *encoding, int format) {
490 xmlOutputBufferPtr buf;
491 xmlCharEncodingHandlerPtr handler = NULL;
496 if (encoding != NULL) {
499 enc = xmlParseCharEncoding(encoding);
500 if (enc != XML_CHAR_ENCODING_UTF8) {
501 handler = xmlFindCharEncodingHandler(encoding);
503 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
508 * Fallback to HTML or ASCII when the encoding is unspecified
511 handler = xmlFindCharEncodingHandler("HTML");
513 handler = xmlFindCharEncodingHandler("ascii");
516 * save the content to a temp buffer.
518 buf = xmlOutputBufferCreateFile(out, handler);
519 if (buf == NULL) return(0);
521 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
523 ret = xmlOutputBufferClose(buf);
529 * @out: the FILE pointer
531 * @cur: the current node
533 * Dump an HTML node, recursive behaviour,children are printed too,
534 * and formatting returns are added.
537 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
538 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
542 * htmlDocDumpMemoryFormat:
544 * @mem: OUT: the memory pointer
545 * @size: OUT: the memory length
546 * @format: should formatting spaces been added
548 * Dump an HTML document in memory and return the xmlChar * and it's size.
549 * It's up to the caller to free the memory.
552 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
553 xmlOutputBufferPtr buf;
554 xmlCharEncodingHandlerPtr handler = NULL;
555 const char *encoding;
559 if ((mem == NULL) || (size == NULL))
567 encoding = (const char *) htmlGetMetaEncoding(cur);
569 if (encoding != NULL) {
572 enc = xmlParseCharEncoding(encoding);
573 if (enc != cur->charset) {
574 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
583 handler = xmlFindCharEncodingHandler(encoding);
585 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
588 handler = xmlFindCharEncodingHandler(encoding);
593 * Fallback to HTML or ASCII when the encoding is unspecified
596 handler = xmlFindCharEncodingHandler("HTML");
598 handler = xmlFindCharEncodingHandler("ascii");
600 buf = xmlAllocOutputBufferInternal(handler);
607 htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
609 xmlOutputBufferFlush(buf);
610 if (buf->conv != NULL) {
611 *size = xmlBufUse(buf->conv);
612 *mem = xmlStrndup(xmlBufContent(buf->conv), *size);
614 *size = xmlBufUse(buf->buffer);
615 *mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
617 (void)xmlOutputBufferClose(buf);
623 * @mem: OUT: the memory pointer
624 * @size: OUT: the memory length
626 * Dump an HTML document in memory and return the xmlChar * and it's size.
627 * It's up to the caller to free the memory.
630 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
631 htmlDocDumpMemoryFormat(cur, mem, size, 1);
635 /************************************************************************
637 * Dumping HTML tree content to an I/O output buffer *
639 ************************************************************************/
641 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
645 * @buf: the HTML buffer output
647 * @encoding: the encoding string
649 * TODO: check whether encoding is needed
651 * Dump the HTML document DTD, if any.
654 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
655 const char *encoding ATTRIBUTE_UNUSED) {
656 xmlDtdPtr cur = doc->intSubset;
659 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
662 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
663 xmlOutputBufferWriteString(buf, (const char *)cur->name);
664 if (cur->ExternalID != NULL) {
665 xmlOutputBufferWriteString(buf, " PUBLIC ");
666 xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
667 if (cur->SystemID != NULL) {
668 xmlOutputBufferWriteString(buf, " ");
669 xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
671 } else if (cur->SystemID != NULL &&
672 xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
673 xmlOutputBufferWriteString(buf, " SYSTEM ");
674 xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
676 xmlOutputBufferWriteString(buf, ">\n");
680 * htmlAttrDumpOutput:
681 * @buf: the HTML buffer output
683 * @cur: the attribute pointer
684 * @encoding: the encoding string
686 * Dump an HTML attribute
689 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
690 const char *encoding ATTRIBUTE_UNUSED) {
694 * The html output method should not escape a & character
695 * occurring in an attribute value immediately followed by
696 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
697 * This is implemented in xmlEncodeEntitiesReentrant
703 xmlOutputBufferWriteString(buf, " ");
704 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
705 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
706 xmlOutputBufferWriteString(buf, ":");
708 xmlOutputBufferWriteString(buf, (const char *)cur->name);
709 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
710 value = xmlNodeListGetString(doc, cur->children, 0);
712 xmlOutputBufferWriteString(buf, "=");
713 if ((cur->ns == NULL) && (cur->parent != NULL) &&
714 (cur->parent->ns == NULL) &&
715 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
716 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
717 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
718 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
719 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
720 xmlChar *tmp = value;
721 /* xmlURIEscapeStr() escapes '"' so it can be safely used. */
722 xmlBufCCat(buf->buffer, "\"");
724 while (IS_BLANK_CH(*tmp)) tmp++;
726 /* URI Escape everything, except server side includes. */
731 xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--");
733 end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->");
739 /* Escape the whole string, or until start (set to '\0'). */
740 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
741 if (escaped != NULL) {
742 xmlBufCat(buf->buffer, escaped);
745 xmlBufCat(buf->buffer, tmp);
748 if (end == NULL) { /* Everything has been written. */
752 /* Do not escape anything within server side includes. */
753 *start = '<'; /* Restore the first character of "<!--". */
754 end += 3; /* strlen("-->") */
757 xmlBufCat(buf->buffer, start);
762 xmlBufCCat(buf->buffer, "\"");
764 xmlBufWriteQuotedString(buf->buffer, value);
768 xmlOutputBufferWriteString(buf, "=\"\"");
774 * htmlAttrListDumpOutput:
775 * @buf: the HTML buffer output
777 * @cur: the first attribute pointer
778 * @encoding: the encoding string
780 * Dump a list of HTML attributes
783 htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
787 while (cur != NULL) {
788 htmlAttrDumpOutput(buf, doc, cur, encoding);
796 * htmlNodeListDumpOutput:
797 * @buf: the HTML buffer output
799 * @cur: the first node
800 * @encoding: the encoding string
801 * @format: should formatting spaces been added
803 * Dump an HTML node list, recursive behaviour,children are printed too.
806 htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
807 xmlNodePtr cur, const char *encoding, int format) {
811 while (cur != NULL) {
812 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
818 * htmlNodeDumpFormatOutput:
819 * @buf: the HTML buffer output
821 * @cur: the current node
822 * @encoding: the encoding string
823 * @format: should formatting spaces been added
825 * Dump an HTML node, recursive behaviour,children are printed too.
828 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
829 xmlNodePtr cur, const char *encoding, int format) {
830 const htmlElemDesc * info;
834 if ((cur == NULL) || (buf == NULL)) {
840 if (cur->type == XML_DTD_NODE)
842 if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
843 (cur->type == XML_DOCUMENT_NODE)){
844 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
847 if (cur->type == XML_ATTRIBUTE_NODE) {
848 htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
851 if (cur->type == HTML_TEXT_NODE) {
852 if (cur->content != NULL) {
853 if (((cur->name == (const xmlChar *)xmlStringText) ||
854 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
855 ((cur->parent == NULL) ||
856 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
857 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
860 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
861 if (buffer != NULL) {
862 xmlOutputBufferWriteString(buf, (const char *)buffer);
866 xmlOutputBufferWriteString(buf, (const char *)cur->content);
871 if (cur->type == HTML_COMMENT_NODE) {
872 if (cur->content != NULL) {
873 xmlOutputBufferWriteString(buf, "<!--");
874 xmlOutputBufferWriteString(buf, (const char *)cur->content);
875 xmlOutputBufferWriteString(buf, "-->");
879 if (cur->type == HTML_PI_NODE) {
880 if (cur->name == NULL)
882 xmlOutputBufferWriteString(buf, "<?");
883 xmlOutputBufferWriteString(buf, (const char *)cur->name);
884 if (cur->content != NULL) {
885 xmlOutputBufferWriteString(buf, " ");
886 xmlOutputBufferWriteString(buf, (const char *)cur->content);
888 xmlOutputBufferWriteString(buf, ">");
891 if (cur->type == HTML_ENTITY_REF_NODE) {
892 xmlOutputBufferWriteString(buf, "&");
893 xmlOutputBufferWriteString(buf, (const char *)cur->name);
894 xmlOutputBufferWriteString(buf, ";");
897 if (cur->type == HTML_PRESERVE_NODE) {
898 if (cur->content != NULL) {
899 xmlOutputBufferWriteString(buf, (const char *)cur->content);
905 * Get specific HTML info for that node.
908 info = htmlTagLookup(cur->name);
912 xmlOutputBufferWriteString(buf, "<");
913 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
914 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
915 xmlOutputBufferWriteString(buf, ":");
917 xmlOutputBufferWriteString(buf, (const char *)cur->name);
919 xmlNsListDumpOutput(buf, cur->nsDef);
920 if (cur->properties != NULL)
921 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
923 if ((info != NULL) && (info->empty)) {
924 xmlOutputBufferWriteString(buf, ">");
925 if ((format) && (!info->isinline) && (cur->next != NULL)) {
926 if ((cur->next->type != HTML_TEXT_NODE) &&
927 (cur->next->type != HTML_ENTITY_REF_NODE) &&
928 (cur->parent != NULL) &&
929 (cur->parent->name != NULL) &&
930 (cur->parent->name[0] != 'p')) /* p, pre, param */
931 xmlOutputBufferWriteString(buf, "\n");
935 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
936 (cur->children == NULL)) {
937 if ((info != NULL) && (info->saveEndTag != 0) &&
938 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
939 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
940 xmlOutputBufferWriteString(buf, ">");
942 xmlOutputBufferWriteString(buf, "></");
943 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
944 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
945 xmlOutputBufferWriteString(buf, ":");
947 xmlOutputBufferWriteString(buf, (const char *)cur->name);
948 xmlOutputBufferWriteString(buf, ">");
950 if ((format) && (cur->next != NULL) &&
951 (info != NULL) && (!info->isinline)) {
952 if ((cur->next->type != HTML_TEXT_NODE) &&
953 (cur->next->type != HTML_ENTITY_REF_NODE) &&
954 (cur->parent != NULL) &&
955 (cur->parent->name != NULL) &&
956 (cur->parent->name[0] != 'p')) /* p, pre, param */
957 xmlOutputBufferWriteString(buf, "\n");
961 xmlOutputBufferWriteString(buf, ">");
962 if ((cur->type != XML_ELEMENT_NODE) &&
963 (cur->content != NULL)) {
965 * Uses the OutputBuffer property to automatically convert
966 * invalids to charrefs
969 xmlOutputBufferWriteString(buf, (const char *) cur->content);
971 if (cur->children != NULL) {
972 if ((format) && (info != NULL) && (!info->isinline) &&
973 (cur->children->type != HTML_TEXT_NODE) &&
974 (cur->children->type != HTML_ENTITY_REF_NODE) &&
975 (cur->children != cur->last) &&
976 (cur->name != NULL) &&
977 (cur->name[0] != 'p')) /* p, pre, param */
978 xmlOutputBufferWriteString(buf, "\n");
979 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
980 if ((format) && (info != NULL) && (!info->isinline) &&
981 (cur->last->type != HTML_TEXT_NODE) &&
982 (cur->last->type != HTML_ENTITY_REF_NODE) &&
983 (cur->children != cur->last) &&
984 (cur->name != NULL) &&
985 (cur->name[0] != 'p')) /* p, pre, param */
986 xmlOutputBufferWriteString(buf, "\n");
988 xmlOutputBufferWriteString(buf, "</");
989 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
990 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
991 xmlOutputBufferWriteString(buf, ":");
993 xmlOutputBufferWriteString(buf, (const char *)cur->name);
994 xmlOutputBufferWriteString(buf, ">");
995 if ((format) && (info != NULL) && (!info->isinline) &&
996 (cur->next != NULL)) {
997 if ((cur->next->type != HTML_TEXT_NODE) &&
998 (cur->next->type != HTML_ENTITY_REF_NODE) &&
999 (cur->parent != NULL) &&
1000 (cur->parent->name != NULL) &&
1001 (cur->parent->name[0] != 'p')) /* p, pre, param */
1002 xmlOutputBufferWriteString(buf, "\n");
1007 * htmlNodeDumpOutput:
1008 * @buf: the HTML buffer output
1009 * @doc: the document
1010 * @cur: the current node
1011 * @encoding: the encoding string
1013 * Dump an HTML node, recursive behaviour,children are printed too,
1014 * and formatting returns/spaces are added.
1017 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
1018 xmlNodePtr cur, const char *encoding) {
1019 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
1023 * htmlDocContentDumpFormatOutput:
1024 * @buf: the HTML buffer output
1025 * @cur: the document
1026 * @encoding: the encoding string
1027 * @format: should formatting spaces been added
1029 * Dump an HTML document.
1032 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1033 const char *encoding, int format) {
1038 if ((buf == NULL) || (cur == NULL))
1042 * force to output the stuff as HTML, especially for entities
1045 cur->type = XML_HTML_DOCUMENT_NODE;
1046 if (cur->intSubset != NULL) {
1047 htmlDtdDumpOutput(buf, cur, NULL);
1049 if (cur->children != NULL) {
1050 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
1052 xmlOutputBufferWriteString(buf, "\n");
1053 cur->type = (xmlElementType) type;
1057 * htmlDocContentDumpOutput:
1058 * @buf: the HTML buffer output
1059 * @cur: the document
1060 * @encoding: the encoding string
1062 * Dump an HTML document. Formating return/spaces are added.
1065 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1066 const char *encoding) {
1067 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1070 /************************************************************************
1072 * Saving functions front-ends *
1074 ************************************************************************/
1079 * @cur: the document
1081 * Dump an HTML document to an open FILE.
1083 * returns: the number of byte written or -1 in case of failure.
1086 htmlDocDump(FILE *f, xmlDocPtr cur) {
1087 xmlOutputBufferPtr buf;
1088 xmlCharEncodingHandlerPtr handler = NULL;
1089 const char *encoding;
1094 if ((cur == NULL) || (f == NULL)) {
1098 encoding = (const char *) htmlGetMetaEncoding(cur);
1100 if (encoding != NULL) {
1101 xmlCharEncoding enc;
1103 enc = xmlParseCharEncoding(encoding);
1104 if (enc != cur->charset) {
1105 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1112 handler = xmlFindCharEncodingHandler(encoding);
1113 if (handler == NULL)
1114 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1116 handler = xmlFindCharEncodingHandler(encoding);
1121 * Fallback to HTML or ASCII when the encoding is unspecified
1123 if (handler == NULL)
1124 handler = xmlFindCharEncodingHandler("HTML");
1125 if (handler == NULL)
1126 handler = xmlFindCharEncodingHandler("ascii");
1128 buf = xmlOutputBufferCreateFile(f, handler);
1129 if (buf == NULL) return(-1);
1130 htmlDocContentDumpOutput(buf, cur, NULL);
1132 ret = xmlOutputBufferClose(buf);
1138 * @filename: the filename (or URL)
1139 * @cur: the document
1141 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1143 * returns: the number of byte written or -1 in case of failure.
1146 htmlSaveFile(const char *filename, xmlDocPtr cur) {
1147 xmlOutputBufferPtr buf;
1148 xmlCharEncodingHandlerPtr handler = NULL;
1149 const char *encoding;
1152 if ((cur == NULL) || (filename == NULL))
1157 encoding = (const char *) htmlGetMetaEncoding(cur);
1159 if (encoding != NULL) {
1160 xmlCharEncoding enc;
1162 enc = xmlParseCharEncoding(encoding);
1163 if (enc != cur->charset) {
1164 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1171 handler = xmlFindCharEncodingHandler(encoding);
1172 if (handler == NULL)
1173 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1178 * Fallback to HTML or ASCII when the encoding is unspecified
1180 if (handler == NULL)
1181 handler = xmlFindCharEncodingHandler("HTML");
1182 if (handler == NULL)
1183 handler = xmlFindCharEncodingHandler("ascii");
1186 * save the content to a temp buffer.
1188 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1189 if (buf == NULL) return(0);
1191 htmlDocContentDumpOutput(buf, cur, NULL);
1193 ret = xmlOutputBufferClose(buf);
1198 * htmlSaveFileFormat:
1199 * @filename: the filename
1200 * @cur: the document
1201 * @format: should formatting spaces been added
1202 * @encoding: the document encoding
1204 * Dump an HTML document to a file using a given encoding.
1206 * returns: the number of byte written or -1 in case of failure.
1209 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1210 const char *encoding, int format) {
1211 xmlOutputBufferPtr buf;
1212 xmlCharEncodingHandlerPtr handler = NULL;
1215 if ((cur == NULL) || (filename == NULL))
1220 if (encoding != NULL) {
1221 xmlCharEncoding enc;
1223 enc = xmlParseCharEncoding(encoding);
1224 if (enc != cur->charset) {
1225 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1232 handler = xmlFindCharEncodingHandler(encoding);
1233 if (handler == NULL)
1234 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1236 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1238 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1242 * Fallback to HTML or ASCII when the encoding is unspecified
1244 if (handler == NULL)
1245 handler = xmlFindCharEncodingHandler("HTML");
1246 if (handler == NULL)
1247 handler = xmlFindCharEncodingHandler("ascii");
1250 * save the content to a temp buffer.
1252 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1253 if (buf == NULL) return(0);
1255 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1257 ret = xmlOutputBufferClose(buf);
1263 * @filename: the filename
1264 * @cur: the document
1265 * @encoding: the document encoding
1267 * Dump an HTML document to a file using a given encoding
1268 * and formatting returns/spaces are added.
1270 * returns: the number of byte written or -1 in case of failure.
1273 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1274 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1277 #endif /* LIBXML_OUTPUT_ENABLED */
1279 #define bottom_HTMLtree
1280 #include "elfgcchack.h"
1281 #endif /* LIBXML_HTML_ENABLED */