2 * HTMLtree.c : implementation of access function for an HTML tree.
4 * See Copyright for the status of this software.
12 #ifdef LIBXML_HTML_ENABLED
14 #include <string.h> /* for memset() only ! */
23 #include <libxml/xmlmemory.h>
24 #include <libxml/HTMLparser.h>
25 #include <libxml/HTMLtree.h>
26 #include <libxml/entities.h>
27 #include <libxml/valid.h>
28 #include <libxml/xmlerror.h>
29 #include <libxml/parserInternals.h>
30 #include <libxml/globals.h>
31 #include <libxml/uri.h>
33 /************************************************************************
35 * Getting/Setting encoding meta tags *
37 ************************************************************************/
40 * htmlGetMetaEncoding:
43 * Encoding definition lookup in the Meta tags
45 * Returns the current encoding as flagged in the HTML source
48 htmlGetMetaEncoding(htmlDocPtr doc) {
50 const xmlChar *content;
51 const xmlChar *encoding;
61 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
62 if (xmlStrEqual(cur->name, BAD_CAST"html"))
64 if (xmlStrEqual(cur->name, BAD_CAST"head"))
66 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
79 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
80 if (xmlStrEqual(cur->name, BAD_CAST"head"))
82 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
93 * Search the meta elements
97 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
98 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
99 xmlAttrPtr attr = cur->properties;
101 const xmlChar *value;
105 while (attr != NULL) {
106 if ((attr->children != NULL) &&
107 (attr->children->type == XML_TEXT_NODE) &&
108 (attr->children->next == NULL)) {
109 value = attr->children->content;
110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
113 else if ((value != NULL)
114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
116 if ((http != 0) && (content != NULL))
128 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) {
136 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL)
144 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
151 * htmlSetMetaEncoding:
153 * @encoding: the encoding string
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
159 * Returns 0 in case of success and -1 in case of error
162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163 htmlNodePtr cur, meta;
164 const xmlChar *content;
165 char newcontent[100];
171 if (encoding != NULL) {
172 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
174 newcontent[sizeof(newcontent) - 1] = 0;
182 while (cur != NULL) {
183 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
184 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
186 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
188 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
200 while (cur != NULL) {
201 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
202 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
204 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
212 if (cur->children == NULL) {
213 if (encoding == NULL)
215 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
216 xmlAddChild(cur, meta);
217 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
218 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
224 if (encoding != NULL) {
226 * Create a new Meta element with the right attributes
229 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
230 xmlAddPrevSibling(cur, meta);
231 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
232 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
236 * Search and destroy all the remaining the meta elements carrying
237 * encoding informations
239 while (cur != NULL) {
240 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
241 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
242 xmlAttrPtr attr = cur->properties;
244 const xmlChar *value;
248 while (attr != NULL) {
249 if ((attr->children != NULL) &&
250 (attr->children->type == XML_TEXT_NODE) &&
251 (attr->children->next == NULL)) {
252 value = attr->children->content;
253 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
254 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
258 if ((value != NULL) &&
259 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
262 if ((http != 0) && (content != NULL))
267 if ((http != 0) && (content != NULL)) {
285 * These are the HTML attributes which will be output
286 * in minimized form, i.e. <option selected="selected"> will be
287 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
290 static const char* htmlBooleanAttrs[] = {
291 "checked", "compact", "declare", "defer", "disabled", "ismap",
292 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
299 * @name: the name of the attribute to check
301 * Determine if a given attribute is a boolean attribute.
303 * returns: false if the attribute is not boolean, true otherwise.
306 htmlIsBooleanAttr(const xmlChar *name)
310 while (htmlBooleanAttrs[i] != NULL) {
311 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
318 #ifdef LIBXML_OUTPUT_ENABLED
319 /************************************************************************
321 * Output error handlers *
323 ************************************************************************/
326 * @extra: extra informations
328 * Handle an out of memory condition
331 htmlSaveErrMemory(const char *extra)
333 __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
338 * @code: the error number
339 * @node: the location of the error.
340 * @extra: extra informations
342 * Handle an out of memory condition
345 htmlSaveErr(int code, xmlNodePtr node, const char *extra)
347 const char *msg = NULL;
350 case XML_SAVE_NOT_UTF8:
351 msg = "string is not in UTF-8\n";
353 case XML_SAVE_CHAR_INVALID:
354 msg = "invalid character value\n";
356 case XML_SAVE_UNKNOWN_ENCODING:
357 msg = "unknown encoding %s\n";
359 case XML_SAVE_NO_DOCTYPE:
360 msg = "HTML has no DOCTYPE\n";
363 msg = "unexpected error number\n";
365 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
368 /************************************************************************
370 * Dumping HTML tree content to a simple buffer *
372 ************************************************************************/
375 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
379 * htmlNodeDumpFormat:
380 * @buf: the HTML buffer output
382 * @cur: the current node
383 * @format: should formatting spaces been added
385 * Dump an HTML node, recursive behaviour,children are printed too.
387 * Returns the number of byte written or -1 in case of error
390 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
394 xmlOutputBufferPtr outbuf;
402 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
403 if (outbuf == NULL) {
404 htmlSaveErrMemory("allocating HTML output buffer");
407 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
408 outbuf->buffer = buf;
409 outbuf->encoder = NULL;
410 outbuf->writecallback = NULL;
411 outbuf->closecallback = NULL;
412 outbuf->context = NULL;
416 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
418 ret = buf->use - use;
424 * @buf: the HTML buffer output
426 * @cur: the current node
428 * Dump an HTML node, recursive behaviour,children are printed too,
429 * and formatting returns are added.
431 * Returns the number of byte written or -1 in case of error
434 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
437 return(htmlNodeDumpFormat(buf, doc, cur, 1));
441 * htmlNodeDumpFileFormat:
442 * @out: the FILE pointer
444 * @cur: the current node
445 * @encoding: the document encoding
446 * @format: should formatting spaces been added
448 * Dump an HTML node, recursive behaviour,children are printed too.
450 * TODO: if encoding == NULL try to save in the doc encoding
452 * returns: the number of byte written or -1 in case of failure.
455 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
456 xmlNodePtr cur, const char *encoding, int format) {
457 xmlOutputBufferPtr buf;
458 xmlCharEncodingHandlerPtr handler = NULL;
463 if (encoding != NULL) {
466 enc = xmlParseCharEncoding(encoding);
467 if (enc != XML_CHAR_ENCODING_UTF8) {
468 handler = xmlFindCharEncodingHandler(encoding);
475 * Fallback to HTML or ASCII when the encoding is unspecified
478 handler = xmlFindCharEncodingHandler("HTML");
480 handler = xmlFindCharEncodingHandler("ascii");
483 * save the content to a temp buffer.
485 buf = xmlOutputBufferCreateFile(out, handler);
486 if (buf == NULL) return(0);
488 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
490 ret = xmlOutputBufferClose(buf);
496 * @out: the FILE pointer
498 * @cur: the current node
500 * Dump an HTML node, recursive behaviour,children are printed too,
501 * and formatting returns are added.
504 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
505 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
509 * htmlDocDumpMemoryFormat:
511 * @mem: OUT: the memory pointer
512 * @size: OUT: the memory length
513 * @format: should formatting spaces been added
515 * Dump an HTML document in memory and return the xmlChar * and it's size.
516 * It's up to the caller to free the memory.
519 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
520 xmlOutputBufferPtr buf;
521 xmlCharEncodingHandlerPtr handler = NULL;
522 const char *encoding;
526 if ((mem == NULL) || (size == NULL))
534 encoding = (const char *) htmlGetMetaEncoding(cur);
536 if (encoding != NULL) {
539 enc = xmlParseCharEncoding(encoding);
540 if (enc != cur->charset) {
541 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
550 handler = xmlFindCharEncodingHandler(encoding);
551 if (handler == NULL) {
557 handler = xmlFindCharEncodingHandler(encoding);
562 * Fallback to HTML or ASCII when the encoding is unspecified
565 handler = xmlFindCharEncodingHandler("HTML");
567 handler = xmlFindCharEncodingHandler("ascii");
569 buf = xmlAllocOutputBuffer(handler);
576 htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
578 xmlOutputBufferFlush(buf);
579 if (buf->conv != NULL) {
580 *size = buf->conv->use;
581 *mem = xmlStrndup(buf->conv->content, *size);
583 *size = buf->buffer->use;
584 *mem = xmlStrndup(buf->buffer->content, *size);
586 (void)xmlOutputBufferClose(buf);
592 * @mem: OUT: the memory pointer
593 * @size: OUT: the memory length
595 * Dump an HTML document in memory and return the xmlChar * and it's size.
596 * It's up to the caller to free the memory.
599 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
600 htmlDocDumpMemoryFormat(cur, mem, size, 1);
604 /************************************************************************
606 * Dumping HTML tree content to an I/O output buffer *
608 ************************************************************************/
610 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
614 * @buf: the HTML buffer output
616 * @encoding: the encoding string
618 * TODO: check whether encoding is needed
620 * Dump the HTML document DTD, if any.
623 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
624 const char *encoding ATTRIBUTE_UNUSED) {
625 xmlDtdPtr cur = doc->intSubset;
628 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
631 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
632 xmlOutputBufferWriteString(buf, (const char *)cur->name);
633 if (cur->ExternalID != NULL) {
634 xmlOutputBufferWriteString(buf, " PUBLIC ");
635 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
636 if (cur->SystemID != NULL) {
637 xmlOutputBufferWriteString(buf, " ");
638 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
640 } else if (cur->SystemID != NULL) {
641 xmlOutputBufferWriteString(buf, " SYSTEM ");
642 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
644 xmlOutputBufferWriteString(buf, ">\n");
648 * htmlAttrDumpOutput:
649 * @buf: the HTML buffer output
651 * @cur: the attribute pointer
652 * @encoding: the encoding string
654 * Dump an HTML attribute
657 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
658 const char *encoding ATTRIBUTE_UNUSED) {
662 * TODO: The html output method should not escape a & character
663 * occurring in an attribute value immediately followed by
664 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
670 xmlOutputBufferWriteString(buf, " ");
671 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
672 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
673 xmlOutputBufferWriteString(buf, ":");
675 xmlOutputBufferWriteString(buf, (const char *)cur->name);
676 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
677 value = xmlNodeListGetString(doc, cur->children, 0);
679 xmlOutputBufferWriteString(buf, "=");
680 if ((cur->ns == NULL) && (cur->parent != NULL) &&
681 (cur->parent->ns == NULL) &&
682 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
683 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
684 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
685 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
686 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
688 xmlChar *tmp = value;
690 while (IS_BLANK_CH(*tmp)) tmp++;
692 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
693 if (escaped != NULL) {
694 xmlBufferWriteQuotedString(buf->buffer, escaped);
697 xmlBufferWriteQuotedString(buf->buffer, value);
700 xmlBufferWriteQuotedString(buf->buffer, value);
704 xmlOutputBufferWriteString(buf, "=\"\"");
710 * htmlAttrListDumpOutput:
711 * @buf: the HTML buffer output
713 * @cur: the first attribute pointer
714 * @encoding: the encoding string
716 * Dump a list of HTML attributes
719 htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
723 while (cur != NULL) {
724 htmlAttrDumpOutput(buf, doc, cur, encoding);
732 * htmlNodeListDumpOutput:
733 * @buf: the HTML buffer output
735 * @cur: the first node
736 * @encoding: the encoding string
737 * @format: should formatting spaces been added
739 * Dump an HTML node list, recursive behaviour,children are printed too.
742 htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
743 xmlNodePtr cur, const char *encoding, int format) {
747 while (cur != NULL) {
748 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
754 * htmlNodeDumpFormatOutput:
755 * @buf: the HTML buffer output
757 * @cur: the current node
758 * @encoding: the encoding string
759 * @format: should formatting spaces been added
761 * Dump an HTML node, recursive behaviour,children are printed too.
764 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
765 xmlNodePtr cur, const char *encoding, int format) {
766 const htmlElemDesc * info;
770 if ((cur == NULL) || (buf == NULL)) {
776 if (cur->type == XML_DTD_NODE)
778 if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
779 (cur->type == XML_DOCUMENT_NODE)){
780 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
783 if (cur->type == HTML_TEXT_NODE) {
784 if (cur->content != NULL) {
785 if (((cur->name == (const xmlChar *)xmlStringText) ||
786 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
787 ((cur->parent == NULL) ||
788 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
789 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
792 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
793 if (buffer != NULL) {
794 xmlOutputBufferWriteString(buf, (const char *)buffer);
798 xmlOutputBufferWriteString(buf, (const char *)cur->content);
803 if (cur->type == HTML_COMMENT_NODE) {
804 if (cur->content != NULL) {
805 xmlOutputBufferWriteString(buf, "<!--");
806 xmlOutputBufferWriteString(buf, (const char *)cur->content);
807 xmlOutputBufferWriteString(buf, "-->");
811 if (cur->type == HTML_PI_NODE) {
812 if (cur->name == NULL)
814 xmlOutputBufferWriteString(buf, "<?");
815 xmlOutputBufferWriteString(buf, (const char *)cur->name);
816 if (cur->content != NULL) {
817 xmlOutputBufferWriteString(buf, " ");
818 xmlOutputBufferWriteString(buf, (const char *)cur->content);
820 xmlOutputBufferWriteString(buf, ">");
823 if (cur->type == HTML_ENTITY_REF_NODE) {
824 xmlOutputBufferWriteString(buf, "&");
825 xmlOutputBufferWriteString(buf, (const char *)cur->name);
826 xmlOutputBufferWriteString(buf, ";");
829 if (cur->type == HTML_PRESERVE_NODE) {
830 if (cur->content != NULL) {
831 xmlOutputBufferWriteString(buf, (const char *)cur->content);
837 * Get specific HTML info for that node.
840 info = htmlTagLookup(cur->name);
844 xmlOutputBufferWriteString(buf, "<");
845 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
846 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
847 xmlOutputBufferWriteString(buf, ":");
849 xmlOutputBufferWriteString(buf, (const char *)cur->name);
851 xmlNsListDumpOutput(buf, cur->nsDef);
852 if (cur->properties != NULL)
853 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
855 if ((info != NULL) && (info->empty)) {
856 xmlOutputBufferWriteString(buf, ">");
857 if ((format) && (!info->isinline) && (cur->next != NULL)) {
858 if ((cur->next->type != HTML_TEXT_NODE) &&
859 (cur->next->type != HTML_ENTITY_REF_NODE) &&
860 (cur->parent != NULL) &&
861 (cur->parent->name != NULL) &&
862 (cur->parent->name[0] != 'p')) /* p, pre, param */
863 xmlOutputBufferWriteString(buf, "\n");
867 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
868 (cur->children == NULL)) {
869 if ((info != NULL) && (info->saveEndTag != 0) &&
870 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
871 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
872 xmlOutputBufferWriteString(buf, ">");
874 xmlOutputBufferWriteString(buf, "></");
875 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
876 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
877 xmlOutputBufferWriteString(buf, ":");
879 xmlOutputBufferWriteString(buf, (const char *)cur->name);
880 xmlOutputBufferWriteString(buf, ">");
882 if ((format) && (cur->next != NULL) &&
883 (info != NULL) && (!info->isinline)) {
884 if ((cur->next->type != HTML_TEXT_NODE) &&
885 (cur->next->type != HTML_ENTITY_REF_NODE) &&
886 (cur->parent != NULL) &&
887 (cur->parent->name != NULL) &&
888 (cur->parent->name[0] != 'p')) /* p, pre, param */
889 xmlOutputBufferWriteString(buf, "\n");
893 xmlOutputBufferWriteString(buf, ">");
894 if ((cur->type != XML_ELEMENT_NODE) &&
895 (cur->content != NULL)) {
897 * Uses the OutputBuffer property to automatically convert
898 * invalids to charrefs
901 xmlOutputBufferWriteString(buf, (const char *) cur->content);
903 if (cur->children != NULL) {
904 if ((format) && (info != NULL) && (!info->isinline) &&
905 (cur->children->type != HTML_TEXT_NODE) &&
906 (cur->children->type != HTML_ENTITY_REF_NODE) &&
907 (cur->children != cur->last) &&
908 (cur->name != NULL) &&
909 (cur->name[0] != 'p')) /* p, pre, param */
910 xmlOutputBufferWriteString(buf, "\n");
911 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
912 if ((format) && (info != NULL) && (!info->isinline) &&
913 (cur->last->type != HTML_TEXT_NODE) &&
914 (cur->last->type != HTML_ENTITY_REF_NODE) &&
915 (cur->children != cur->last) &&
916 (cur->name != NULL) &&
917 (cur->name[0] != 'p')) /* p, pre, param */
918 xmlOutputBufferWriteString(buf, "\n");
920 xmlOutputBufferWriteString(buf, "</");
921 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
922 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
923 xmlOutputBufferWriteString(buf, ":");
925 xmlOutputBufferWriteString(buf, (const char *)cur->name);
926 xmlOutputBufferWriteString(buf, ">");
927 if ((format) && (info != NULL) && (!info->isinline) &&
928 (cur->next != NULL)) {
929 if ((cur->next->type != HTML_TEXT_NODE) &&
930 (cur->next->type != HTML_ENTITY_REF_NODE) &&
931 (cur->parent != NULL) &&
932 (cur->parent->name != NULL) &&
933 (cur->parent->name[0] != 'p')) /* p, pre, param */
934 xmlOutputBufferWriteString(buf, "\n");
939 * htmlNodeDumpOutput:
940 * @buf: the HTML buffer output
942 * @cur: the current node
943 * @encoding: the encoding string
945 * Dump an HTML node, recursive behaviour,children are printed too,
946 * and formatting returns/spaces are added.
949 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
950 xmlNodePtr cur, const char *encoding) {
951 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
955 * htmlDocContentDumpFormatOutput:
956 * @buf: the HTML buffer output
958 * @encoding: the encoding string
959 * @format: should formatting spaces been added
961 * Dump an HTML document.
964 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
965 const char *encoding, int format) {
970 if ((buf == NULL) || (cur == NULL))
974 * force to output the stuff as HTML, especially for entities
977 cur->type = XML_HTML_DOCUMENT_NODE;
978 if (cur->intSubset != NULL) {
979 htmlDtdDumpOutput(buf, cur, NULL);
981 if (cur->children != NULL) {
982 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
984 xmlOutputBufferWriteString(buf, "\n");
985 cur->type = (xmlElementType) type;
989 * htmlDocContentDumpOutput:
990 * @buf: the HTML buffer output
992 * @encoding: the encoding string
994 * Dump an HTML document. Formating return/spaces are added.
997 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
998 const char *encoding) {
999 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1002 /************************************************************************
1004 * Saving functions front-ends *
1006 ************************************************************************/
1011 * @cur: the document
1013 * Dump an HTML document to an open FILE.
1015 * returns: the number of byte written or -1 in case of failure.
1018 htmlDocDump(FILE *f, xmlDocPtr cur) {
1019 xmlOutputBufferPtr buf;
1020 xmlCharEncodingHandlerPtr handler = NULL;
1021 const char *encoding;
1026 if ((cur == NULL) || (f == NULL)) {
1030 encoding = (const char *) htmlGetMetaEncoding(cur);
1032 if (encoding != NULL) {
1033 xmlCharEncoding enc;
1035 enc = xmlParseCharEncoding(encoding);
1036 if (enc != cur->charset) {
1037 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1044 handler = xmlFindCharEncodingHandler(encoding);
1045 if (handler == NULL)
1048 handler = xmlFindCharEncodingHandler(encoding);
1053 * Fallback to HTML or ASCII when the encoding is unspecified
1055 if (handler == NULL)
1056 handler = xmlFindCharEncodingHandler("HTML");
1057 if (handler == NULL)
1058 handler = xmlFindCharEncodingHandler("ascii");
1060 buf = xmlOutputBufferCreateFile(f, handler);
1061 if (buf == NULL) return(-1);
1062 htmlDocContentDumpOutput(buf, cur, NULL);
1064 ret = xmlOutputBufferClose(buf);
1070 * @filename: the filename (or URL)
1071 * @cur: the document
1073 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1075 * returns: the number of byte written or -1 in case of failure.
1078 htmlSaveFile(const char *filename, xmlDocPtr cur) {
1079 xmlOutputBufferPtr buf;
1080 xmlCharEncodingHandlerPtr handler = NULL;
1081 const char *encoding;
1084 if ((cur == NULL) || (filename == NULL))
1089 encoding = (const char *) htmlGetMetaEncoding(cur);
1091 if (encoding != NULL) {
1092 xmlCharEncoding enc;
1094 enc = xmlParseCharEncoding(encoding);
1095 if (enc != cur->charset) {
1096 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1103 handler = xmlFindCharEncodingHandler(encoding);
1104 if (handler == NULL)
1110 * Fallback to HTML or ASCII when the encoding is unspecified
1112 if (handler == NULL)
1113 handler = xmlFindCharEncodingHandler("HTML");
1114 if (handler == NULL)
1115 handler = xmlFindCharEncodingHandler("ascii");
1118 * save the content to a temp buffer.
1120 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1121 if (buf == NULL) return(0);
1123 htmlDocContentDumpOutput(buf, cur, NULL);
1125 ret = xmlOutputBufferClose(buf);
1130 * htmlSaveFileFormat:
1131 * @filename: the filename
1132 * @cur: the document
1133 * @format: should formatting spaces been added
1134 * @encoding: the document encoding
1136 * Dump an HTML document to a file using a given encoding.
1138 * returns: the number of byte written or -1 in case of failure.
1141 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1142 const char *encoding, int format) {
1143 xmlOutputBufferPtr buf;
1144 xmlCharEncodingHandlerPtr handler = NULL;
1147 if ((cur == NULL) || (filename == NULL))
1152 if (encoding != NULL) {
1153 xmlCharEncoding enc;
1155 enc = xmlParseCharEncoding(encoding);
1156 if (enc != cur->charset) {
1157 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1164 handler = xmlFindCharEncodingHandler(encoding);
1165 if (handler == NULL)
1167 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1170 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1174 * Fallback to HTML or ASCII when the encoding is unspecified
1176 if (handler == NULL)
1177 handler = xmlFindCharEncodingHandler("HTML");
1178 if (handler == NULL)
1179 handler = xmlFindCharEncodingHandler("ascii");
1182 * save the content to a temp buffer.
1184 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1185 if (buf == NULL) return(0);
1187 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1189 ret = xmlOutputBufferClose(buf);
1195 * @filename: the filename
1196 * @cur: the document
1197 * @encoding: the document encoding
1199 * Dump an HTML document to a file using a given encoding
1200 * and formatting returns/spaces are added.
1202 * returns: the number of byte written or -1 in case of failure.
1205 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1206 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1209 #endif /* LIBXML_OUTPUT_ENABLED */
1211 #define bottom_HTMLtree
1212 #include "elfgcchack.h"
1213 #endif /* LIBXML_HTML_ENABLED */