2 * HTMLtree.c : implementation of access function for an HTML tree.
4 * See Copyright for the status of this software.
12 #ifdef LIBXML_HTML_ENABLED
14 #include <string.h> /* for memset() only ! */
23 #include <libxml/xmlmemory.h>
24 #include <libxml/HTMLparser.h>
25 #include <libxml/HTMLtree.h>
26 #include <libxml/entities.h>
27 #include <libxml/valid.h>
28 #include <libxml/xmlerror.h>
29 #include <libxml/parserInternals.h>
30 #include <libxml/globals.h>
31 #include <libxml/uri.h>
33 /************************************************************************
35 * Getting/Setting encoding meta tags *
37 ************************************************************************/
40 * htmlGetMetaEncoding:
43 * Encoding definition lookup in the Meta tags
45 * Returns the current encoding as flagged in the HTML source
48 htmlGetMetaEncoding(htmlDocPtr doc) {
50 const xmlChar *content;
51 const xmlChar *encoding;
61 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
62 if (xmlStrEqual(cur->name, BAD_CAST"html"))
64 if (xmlStrEqual(cur->name, BAD_CAST"head"))
66 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
79 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
80 if (xmlStrEqual(cur->name, BAD_CAST"head"))
82 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
93 * Search the meta elements
97 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
98 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
99 xmlAttrPtr attr = cur->properties;
101 const xmlChar *value;
105 while (attr != NULL) {
106 if ((attr->children != NULL) &&
107 (attr->children->type == XML_TEXT_NODE) &&
108 (attr->children->next == NULL)) {
109 value = attr->children->content;
110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
113 else if ((value != NULL)
114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
116 if ((http != 0) && (content != NULL))
128 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) {
136 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL)
144 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
151 * htmlSetMetaEncoding:
153 * @encoding: the encoding string
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
159 * Returns 0 in case of success and -1 in case of error
162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163 htmlNodePtr cur, meta = NULL, head = NULL;
164 const xmlChar *content = NULL;
165 char newcontent[100];
172 /* html isn't a real encoding it's just libxml2 way to get entities */
173 if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
176 if (encoding != NULL) {
177 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
179 newcontent[sizeof(newcontent) - 1] = 0;
187 while (cur != NULL) {
188 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
189 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
191 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
193 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
205 while (cur != NULL) {
206 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
207 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
209 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
220 if (cur->children == NULL)
226 * Search and update all the remaining the meta elements carrying
227 * encoding informations
229 while (cur != NULL) {
230 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
231 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
232 xmlAttrPtr attr = cur->properties;
234 const xmlChar *value;
238 while (attr != NULL) {
239 if ((attr->children != NULL) &&
240 (attr->children->type == XML_TEXT_NODE) &&
241 (attr->children->next == NULL)) {
242 value = attr->children->content;
243 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
244 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
248 if ((value != NULL) &&
249 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
252 if ((http != 0) && (content != NULL))
257 if ((http != 0) && (content != NULL)) {
268 if ((encoding != NULL) && (head != NULL)) {
270 * Create a new Meta element with the right attributes
273 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
274 if (head->children == NULL)
275 xmlAddChild(head, meta);
277 xmlAddPrevSibling(head->children, meta);
278 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
279 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
282 /* remove the meta tag if NULL is passed */
283 if (encoding == NULL) {
287 /* change the document only if there is a real encoding change */
288 else if (xmlStrcasestr(content, encoding) == NULL) {
289 xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
300 * These are the HTML attributes which will be output
301 * in minimized form, i.e. <option selected="selected"> will be
302 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
305 static const char* htmlBooleanAttrs[] = {
306 "checked", "compact", "declare", "defer", "disabled", "ismap",
307 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
314 * @name: the name of the attribute to check
316 * Determine if a given attribute is a boolean attribute.
318 * returns: false if the attribute is not boolean, true otherwise.
321 htmlIsBooleanAttr(const xmlChar *name)
325 while (htmlBooleanAttrs[i] != NULL) {
326 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
333 #ifdef LIBXML_OUTPUT_ENABLED
335 * private routine exported from xmlIO.c
338 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
339 /************************************************************************
341 * Output error handlers *
343 ************************************************************************/
346 * @extra: extra informations
348 * Handle an out of memory condition
351 htmlSaveErrMemory(const char *extra)
353 __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
358 * @code: the error number
359 * @node: the location of the error.
360 * @extra: extra informations
362 * Handle an out of memory condition
365 htmlSaveErr(int code, xmlNodePtr node, const char *extra)
367 const char *msg = NULL;
370 case XML_SAVE_NOT_UTF8:
371 msg = "string is not in UTF-8\n";
373 case XML_SAVE_CHAR_INVALID:
374 msg = "invalid character value\n";
376 case XML_SAVE_UNKNOWN_ENCODING:
377 msg = "unknown encoding %s\n";
379 case XML_SAVE_NO_DOCTYPE:
380 msg = "HTML has no DOCTYPE\n";
383 msg = "unexpected error number\n";
385 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
388 /************************************************************************
390 * Dumping HTML tree content to a simple buffer *
392 ************************************************************************/
395 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
399 * htmlNodeDumpFormat:
400 * @buf: the HTML buffer output
402 * @cur: the current node
403 * @format: should formatting spaces been added
405 * Dump an HTML node, recursive behaviour,children are printed too.
407 * Returns the number of byte written or -1 in case of error
410 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
414 xmlOutputBufferPtr outbuf;
422 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
423 if (outbuf == NULL) {
424 htmlSaveErrMemory("allocating HTML output buffer");
427 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
428 outbuf->buffer = buf;
429 outbuf->encoder = NULL;
430 outbuf->writecallback = NULL;
431 outbuf->closecallback = NULL;
432 outbuf->context = NULL;
436 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
438 ret = buf->use - use;
444 * @buf: the HTML buffer output
446 * @cur: the current node
448 * Dump an HTML node, recursive behaviour,children are printed too,
449 * and formatting returns are added.
451 * Returns the number of byte written or -1 in case of error
454 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
457 return(htmlNodeDumpFormat(buf, doc, cur, 1));
461 * htmlNodeDumpFileFormat:
462 * @out: the FILE pointer
464 * @cur: the current node
465 * @encoding: the document encoding
466 * @format: should formatting spaces been added
468 * Dump an HTML node, recursive behaviour,children are printed too.
470 * TODO: if encoding == NULL try to save in the doc encoding
472 * returns: the number of byte written or -1 in case of failure.
475 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
476 xmlNodePtr cur, const char *encoding, int format) {
477 xmlOutputBufferPtr buf;
478 xmlCharEncodingHandlerPtr handler = NULL;
483 if (encoding != NULL) {
486 enc = xmlParseCharEncoding(encoding);
487 if (enc != XML_CHAR_ENCODING_UTF8) {
488 handler = xmlFindCharEncodingHandler(encoding);
490 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
495 * Fallback to HTML or ASCII when the encoding is unspecified
498 handler = xmlFindCharEncodingHandler("HTML");
500 handler = xmlFindCharEncodingHandler("ascii");
503 * save the content to a temp buffer.
505 buf = xmlOutputBufferCreateFile(out, handler);
506 if (buf == NULL) return(0);
508 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
510 ret = xmlOutputBufferClose(buf);
516 * @out: the FILE pointer
518 * @cur: the current node
520 * Dump an HTML node, recursive behaviour,children are printed too,
521 * and formatting returns are added.
524 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
525 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
529 * htmlDocDumpMemoryFormat:
531 * @mem: OUT: the memory pointer
532 * @size: OUT: the memory length
533 * @format: should formatting spaces been added
535 * Dump an HTML document in memory and return the xmlChar * and it's size.
536 * It's up to the caller to free the memory.
539 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
540 xmlOutputBufferPtr buf;
541 xmlCharEncodingHandlerPtr handler = NULL;
542 const char *encoding;
546 if ((mem == NULL) || (size == NULL))
554 encoding = (const char *) htmlGetMetaEncoding(cur);
556 if (encoding != NULL) {
559 enc = xmlParseCharEncoding(encoding);
560 if (enc != cur->charset) {
561 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
570 handler = xmlFindCharEncodingHandler(encoding);
572 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
575 handler = xmlFindCharEncodingHandler(encoding);
580 * Fallback to HTML or ASCII when the encoding is unspecified
583 handler = xmlFindCharEncodingHandler("HTML");
585 handler = xmlFindCharEncodingHandler("ascii");
587 buf = xmlAllocOutputBufferInternal(handler);
594 htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
596 xmlOutputBufferFlush(buf);
597 if (buf->conv != NULL) {
598 *size = buf->conv->use;
599 *mem = xmlStrndup(buf->conv->content, *size);
601 *size = buf->buffer->use;
602 *mem = xmlStrndup(buf->buffer->content, *size);
604 (void)xmlOutputBufferClose(buf);
610 * @mem: OUT: the memory pointer
611 * @size: OUT: the memory length
613 * Dump an HTML document in memory and return the xmlChar * and it's size.
614 * It's up to the caller to free the memory.
617 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
618 htmlDocDumpMemoryFormat(cur, mem, size, 1);
622 /************************************************************************
624 * Dumping HTML tree content to an I/O output buffer *
626 ************************************************************************/
628 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
632 * @buf: the HTML buffer output
634 * @encoding: the encoding string
636 * TODO: check whether encoding is needed
638 * Dump the HTML document DTD, if any.
641 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
642 const char *encoding ATTRIBUTE_UNUSED) {
643 xmlDtdPtr cur = doc->intSubset;
646 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
649 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
650 xmlOutputBufferWriteString(buf, (const char *)cur->name);
651 if (cur->ExternalID != NULL) {
652 xmlOutputBufferWriteString(buf, " PUBLIC ");
653 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
654 if (cur->SystemID != NULL) {
655 xmlOutputBufferWriteString(buf, " ");
656 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
658 } else if (cur->SystemID != NULL) {
659 xmlOutputBufferWriteString(buf, " SYSTEM ");
660 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
662 xmlOutputBufferWriteString(buf, ">\n");
666 * htmlAttrDumpOutput:
667 * @buf: the HTML buffer output
669 * @cur: the attribute pointer
670 * @encoding: the encoding string
672 * Dump an HTML attribute
675 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
676 const char *encoding ATTRIBUTE_UNUSED) {
680 * TODO: The html output method should not escape a & character
681 * occurring in an attribute value immediately followed by
682 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
688 xmlOutputBufferWriteString(buf, " ");
689 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
690 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
691 xmlOutputBufferWriteString(buf, ":");
693 xmlOutputBufferWriteString(buf, (const char *)cur->name);
694 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
695 value = xmlNodeListGetString(doc, cur->children, 0);
697 xmlOutputBufferWriteString(buf, "=");
698 if ((cur->ns == NULL) && (cur->parent != NULL) &&
699 (cur->parent->ns == NULL) &&
700 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
701 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
702 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
703 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
704 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
706 xmlChar *tmp = value;
708 while (IS_BLANK_CH(*tmp)) tmp++;
710 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
711 if (escaped != NULL) {
712 xmlBufferWriteQuotedString(buf->buffer, escaped);
715 xmlBufferWriteQuotedString(buf->buffer, value);
718 xmlBufferWriteQuotedString(buf->buffer, value);
722 xmlOutputBufferWriteString(buf, "=\"\"");
728 * htmlAttrListDumpOutput:
729 * @buf: the HTML buffer output
731 * @cur: the first attribute pointer
732 * @encoding: the encoding string
734 * Dump a list of HTML attributes
737 htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
741 while (cur != NULL) {
742 htmlAttrDumpOutput(buf, doc, cur, encoding);
750 * htmlNodeListDumpOutput:
751 * @buf: the HTML buffer output
753 * @cur: the first node
754 * @encoding: the encoding string
755 * @format: should formatting spaces been added
757 * Dump an HTML node list, recursive behaviour,children are printed too.
760 htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
761 xmlNodePtr cur, const char *encoding, int format) {
765 while (cur != NULL) {
766 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
772 * htmlNodeDumpFormatOutput:
773 * @buf: the HTML buffer output
775 * @cur: the current node
776 * @encoding: the encoding string
777 * @format: should formatting spaces been added
779 * Dump an HTML node, recursive behaviour,children are printed too.
782 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
783 xmlNodePtr cur, const char *encoding, int format) {
784 const htmlElemDesc * info;
788 if ((cur == NULL) || (buf == NULL)) {
794 if (cur->type == XML_DTD_NODE)
796 if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
797 (cur->type == XML_DOCUMENT_NODE)){
798 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
801 if (cur->type == XML_ATTRIBUTE_NODE) {
802 htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
805 if (cur->type == HTML_TEXT_NODE) {
806 if (cur->content != NULL) {
807 if (((cur->name == (const xmlChar *)xmlStringText) ||
808 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
809 ((cur->parent == NULL) ||
810 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
811 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
814 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
815 if (buffer != NULL) {
816 xmlOutputBufferWriteString(buf, (const char *)buffer);
820 xmlOutputBufferWriteString(buf, (const char *)cur->content);
825 if (cur->type == HTML_COMMENT_NODE) {
826 if (cur->content != NULL) {
827 xmlOutputBufferWriteString(buf, "<!--");
828 xmlOutputBufferWriteString(buf, (const char *)cur->content);
829 xmlOutputBufferWriteString(buf, "-->");
833 if (cur->type == HTML_PI_NODE) {
834 if (cur->name == NULL)
836 xmlOutputBufferWriteString(buf, "<?");
837 xmlOutputBufferWriteString(buf, (const char *)cur->name);
838 if (cur->content != NULL) {
839 xmlOutputBufferWriteString(buf, " ");
840 xmlOutputBufferWriteString(buf, (const char *)cur->content);
842 xmlOutputBufferWriteString(buf, ">");
845 if (cur->type == HTML_ENTITY_REF_NODE) {
846 xmlOutputBufferWriteString(buf, "&");
847 xmlOutputBufferWriteString(buf, (const char *)cur->name);
848 xmlOutputBufferWriteString(buf, ";");
851 if (cur->type == HTML_PRESERVE_NODE) {
852 if (cur->content != NULL) {
853 xmlOutputBufferWriteString(buf, (const char *)cur->content);
859 * Get specific HTML info for that node.
862 info = htmlTagLookup(cur->name);
866 xmlOutputBufferWriteString(buf, "<");
867 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
868 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
869 xmlOutputBufferWriteString(buf, ":");
871 xmlOutputBufferWriteString(buf, (const char *)cur->name);
873 xmlNsListDumpOutput(buf, cur->nsDef);
874 if (cur->properties != NULL)
875 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
877 if ((info != NULL) && (info->empty)) {
878 xmlOutputBufferWriteString(buf, ">");
879 if ((format) && (!info->isinline) && (cur->next != NULL)) {
880 if ((cur->next->type != HTML_TEXT_NODE) &&
881 (cur->next->type != HTML_ENTITY_REF_NODE) &&
882 (cur->parent != NULL) &&
883 (cur->parent->name != NULL) &&
884 (cur->parent->name[0] != 'p')) /* p, pre, param */
885 xmlOutputBufferWriteString(buf, "\n");
889 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
890 (cur->children == NULL)) {
891 if ((info != NULL) && (info->saveEndTag != 0) &&
892 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
893 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
894 xmlOutputBufferWriteString(buf, ">");
896 xmlOutputBufferWriteString(buf, "></");
897 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
898 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
899 xmlOutputBufferWriteString(buf, ":");
901 xmlOutputBufferWriteString(buf, (const char *)cur->name);
902 xmlOutputBufferWriteString(buf, ">");
904 if ((format) && (cur->next != NULL) &&
905 (info != NULL) && (!info->isinline)) {
906 if ((cur->next->type != HTML_TEXT_NODE) &&
907 (cur->next->type != HTML_ENTITY_REF_NODE) &&
908 (cur->parent != NULL) &&
909 (cur->parent->name != NULL) &&
910 (cur->parent->name[0] != 'p')) /* p, pre, param */
911 xmlOutputBufferWriteString(buf, "\n");
915 xmlOutputBufferWriteString(buf, ">");
916 if ((cur->type != XML_ELEMENT_NODE) &&
917 (cur->content != NULL)) {
919 * Uses the OutputBuffer property to automatically convert
920 * invalids to charrefs
923 xmlOutputBufferWriteString(buf, (const char *) cur->content);
925 if (cur->children != NULL) {
926 if ((format) && (info != NULL) && (!info->isinline) &&
927 (cur->children->type != HTML_TEXT_NODE) &&
928 (cur->children->type != HTML_ENTITY_REF_NODE) &&
929 (cur->children != cur->last) &&
930 (cur->name != NULL) &&
931 (cur->name[0] != 'p')) /* p, pre, param */
932 xmlOutputBufferWriteString(buf, "\n");
933 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
934 if ((format) && (info != NULL) && (!info->isinline) &&
935 (cur->last->type != HTML_TEXT_NODE) &&
936 (cur->last->type != HTML_ENTITY_REF_NODE) &&
937 (cur->children != cur->last) &&
938 (cur->name != NULL) &&
939 (cur->name[0] != 'p')) /* p, pre, param */
940 xmlOutputBufferWriteString(buf, "\n");
942 xmlOutputBufferWriteString(buf, "</");
943 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
944 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
945 xmlOutputBufferWriteString(buf, ":");
947 xmlOutputBufferWriteString(buf, (const char *)cur->name);
948 xmlOutputBufferWriteString(buf, ">");
949 if ((format) && (info != NULL) && (!info->isinline) &&
950 (cur->next != NULL)) {
951 if ((cur->next->type != HTML_TEXT_NODE) &&
952 (cur->next->type != HTML_ENTITY_REF_NODE) &&
953 (cur->parent != NULL) &&
954 (cur->parent->name != NULL) &&
955 (cur->parent->name[0] != 'p')) /* p, pre, param */
956 xmlOutputBufferWriteString(buf, "\n");
961 * htmlNodeDumpOutput:
962 * @buf: the HTML buffer output
964 * @cur: the current node
965 * @encoding: the encoding string
967 * Dump an HTML node, recursive behaviour,children are printed too,
968 * and formatting returns/spaces are added.
971 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
972 xmlNodePtr cur, const char *encoding) {
973 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
977 * htmlDocContentDumpFormatOutput:
978 * @buf: the HTML buffer output
980 * @encoding: the encoding string
981 * @format: should formatting spaces been added
983 * Dump an HTML document.
986 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
987 const char *encoding, int format) {
992 if ((buf == NULL) || (cur == NULL))
996 * force to output the stuff as HTML, especially for entities
999 cur->type = XML_HTML_DOCUMENT_NODE;
1000 if (cur->intSubset != NULL) {
1001 htmlDtdDumpOutput(buf, cur, NULL);
1003 if (cur->children != NULL) {
1004 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
1006 xmlOutputBufferWriteString(buf, "\n");
1007 cur->type = (xmlElementType) type;
1011 * htmlDocContentDumpOutput:
1012 * @buf: the HTML buffer output
1013 * @cur: the document
1014 * @encoding: the encoding string
1016 * Dump an HTML document. Formating return/spaces are added.
1019 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1020 const char *encoding) {
1021 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1024 /************************************************************************
1026 * Saving functions front-ends *
1028 ************************************************************************/
1033 * @cur: the document
1035 * Dump an HTML document to an open FILE.
1037 * returns: the number of byte written or -1 in case of failure.
1040 htmlDocDump(FILE *f, xmlDocPtr cur) {
1041 xmlOutputBufferPtr buf;
1042 xmlCharEncodingHandlerPtr handler = NULL;
1043 const char *encoding;
1048 if ((cur == NULL) || (f == NULL)) {
1052 encoding = (const char *) htmlGetMetaEncoding(cur);
1054 if (encoding != NULL) {
1055 xmlCharEncoding enc;
1057 enc = xmlParseCharEncoding(encoding);
1058 if (enc != cur->charset) {
1059 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1066 handler = xmlFindCharEncodingHandler(encoding);
1067 if (handler == NULL)
1068 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1070 handler = xmlFindCharEncodingHandler(encoding);
1075 * Fallback to HTML or ASCII when the encoding is unspecified
1077 if (handler == NULL)
1078 handler = xmlFindCharEncodingHandler("HTML");
1079 if (handler == NULL)
1080 handler = xmlFindCharEncodingHandler("ascii");
1082 buf = xmlOutputBufferCreateFile(f, handler);
1083 if (buf == NULL) return(-1);
1084 htmlDocContentDumpOutput(buf, cur, NULL);
1086 ret = xmlOutputBufferClose(buf);
1092 * @filename: the filename (or URL)
1093 * @cur: the document
1095 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1097 * returns: the number of byte written or -1 in case of failure.
1100 htmlSaveFile(const char *filename, xmlDocPtr cur) {
1101 xmlOutputBufferPtr buf;
1102 xmlCharEncodingHandlerPtr handler = NULL;
1103 const char *encoding;
1106 if ((cur == NULL) || (filename == NULL))
1111 encoding = (const char *) htmlGetMetaEncoding(cur);
1113 if (encoding != NULL) {
1114 xmlCharEncoding enc;
1116 enc = xmlParseCharEncoding(encoding);
1117 if (enc != cur->charset) {
1118 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1125 handler = xmlFindCharEncodingHandler(encoding);
1126 if (handler == NULL)
1127 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1132 * Fallback to HTML or ASCII when the encoding is unspecified
1134 if (handler == NULL)
1135 handler = xmlFindCharEncodingHandler("HTML");
1136 if (handler == NULL)
1137 handler = xmlFindCharEncodingHandler("ascii");
1140 * save the content to a temp buffer.
1142 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1143 if (buf == NULL) return(0);
1145 htmlDocContentDumpOutput(buf, cur, NULL);
1147 ret = xmlOutputBufferClose(buf);
1152 * htmlSaveFileFormat:
1153 * @filename: the filename
1154 * @cur: the document
1155 * @format: should formatting spaces been added
1156 * @encoding: the document encoding
1158 * Dump an HTML document to a file using a given encoding.
1160 * returns: the number of byte written or -1 in case of failure.
1163 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1164 const char *encoding, int format) {
1165 xmlOutputBufferPtr buf;
1166 xmlCharEncodingHandlerPtr handler = NULL;
1169 if ((cur == NULL) || (filename == NULL))
1174 if (encoding != NULL) {
1175 xmlCharEncoding enc;
1177 enc = xmlParseCharEncoding(encoding);
1178 if (enc != cur->charset) {
1179 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1186 handler = xmlFindCharEncodingHandler(encoding);
1187 if (handler == NULL)
1188 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1190 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1192 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1196 * Fallback to HTML or ASCII when the encoding is unspecified
1198 if (handler == NULL)
1199 handler = xmlFindCharEncodingHandler("HTML");
1200 if (handler == NULL)
1201 handler = xmlFindCharEncodingHandler("ascii");
1204 * save the content to a temp buffer.
1206 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1207 if (buf == NULL) return(0);
1209 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1211 ret = xmlOutputBufferClose(buf);
1217 * @filename: the filename
1218 * @cur: the document
1219 * @encoding: the document encoding
1221 * Dump an HTML document to a file using a given encoding
1222 * and formatting returns/spaces are added.
1224 * returns: the number of byte written or -1 in case of failure.
1227 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1228 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1231 #endif /* LIBXML_OUTPUT_ENABLED */
1233 #define bottom_HTMLtree
1234 #include "elfgcchack.h"
1235 #endif /* LIBXML_HTML_ENABLED */