gnulib-local/lib/libxml/HTMLtree.c

   1 /*
   2  * HTMLtree.c : implementation of access function for an HTML tree.
   3  *
   4  * See Copyright for the status of this software.
   5  *
   6  * daniel@veillard.com
   7  */
   8
   9
  10 #define IN_LIBXML
  11 #include "libxml.h"
  12 #ifdef LIBXML_HTML_ENABLED
  13
  14 #include <string.h> /* for memset() only ! */
  15
  16 #ifdef HAVE_CTYPE_H
  17 #include <ctype.h>
  18 #endif
  19 #ifdef HAVE_STDLIB_H
  20 #include <stdlib.h>
  21 #endif
  22
  23 #include <libxml/xmlmemory.h>
  24 #include <libxml/HTMLparser.h>
  25 #include <libxml/HTMLtree.h>
  26 #include <libxml/entities.h>
  27 #include <libxml/valid.h>
  28 #include <libxml/xmlerror.h>
  29 #include <libxml/parserInternals.h>
  30 #include <libxml/globals.h>
  31 #include <libxml/uri.h>
  32
  33 /************************************************************************
  34  *                                                                      *
  35  *              Getting/Setting encoding meta tags                      *
  36  *                                                                      *
  37  ************************************************************************/
  38
  39 /**
  40  * htmlGetMetaEncoding:
  41  * @doc:  the document
  42  *
  43  * Encoding definition lookup in the Meta tags
  44  *
  45  * Returns the current encoding as flagged in the HTML source
  46  */
  47 const xmlChar *
  48 htmlGetMetaEncoding(htmlDocPtr doc) {
  49     htmlNodePtr cur;
  50     const xmlChar *content;
  51     const xmlChar *encoding;
  52
  53     if (doc == NULL)
  54         return(NULL);
  55     cur = doc->children;
  56
  57     /*
  58      * Search the html
  59      */
  60     while (cur != NULL) {
  61         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
  62             if (xmlStrEqual(cur->name, BAD_CAST"html"))
  63                 break;
  64             if (xmlStrEqual(cur->name, BAD_CAST"head"))
  65                 goto found_head;
  66             if (xmlStrEqual(cur->name, BAD_CAST"meta"))
  67                 goto found_meta;
  68         }
  69         cur = cur->next;
  70     }
  71     if (cur == NULL)
  72         return(NULL);
  73     cur = cur->children;
  74
  75     /*
  76      * Search the head
  77      */
  78     while (cur != NULL) {
  79         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
  80             if (xmlStrEqual(cur->name, BAD_CAST"head"))
  81                 break;
  82             if (xmlStrEqual(cur->name, BAD_CAST"meta"))
  83                 goto found_meta;
  84         }
  85         cur = cur->next;
  86     }
  87     if (cur == NULL)
  88         return(NULL);
  89 found_head:
  90     cur = cur->children;
  91
  92     /*
  93      * Search the meta elements
  94      */
  95 found_meta:
  96     while (cur != NULL) {
  97         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
  98             if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
  99                 xmlAttrPtr attr = cur->properties;
 100                 int http;
 101                 const xmlChar *value;
 102
 103                 content = NULL;
 104                 http = 0;
 105                 while (attr != NULL) {
 106                     if ((attr->children != NULL) &&
 107                         (attr->children->type == XML_TEXT_NODE) &&
 108                         (attr->children->next == NULL)) {
 109                         value = attr->children->content;
 110                         if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
 111                          && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
 112                             http = 1;
 113                         else if ((value != NULL)
 114                          && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
 115                             content = value;
 116                         if ((http != 0) && (content != NULL))
 117                             goto found_content;
 118                     }
 119                     attr = attr->next;
 120                 }
 121             }
 122         }
 123         cur = cur->next;
 124     }
 125     return(NULL);
 126
 127 found_content:
 128     encoding = xmlStrstr(content, BAD_CAST"charset=");
 129     if (encoding == NULL)
 130         encoding = xmlStrstr(content, BAD_CAST"Charset=");
 131     if (encoding == NULL)
 132         encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
 133     if (encoding != NULL) {
 134         encoding += 8;
 135     } else {
 136         encoding = xmlStrstr(content, BAD_CAST"charset =");
 137         if (encoding == NULL)
 138             encoding = xmlStrstr(content, BAD_CAST"Charset =");
 139         if (encoding == NULL)
 140             encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
 141         if (encoding != NULL)
 142             encoding += 9;
 143     }
 144     if (encoding != NULL) {
 145         while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
 146     }
 147     return(encoding);
 148 }
 149
 150 /**
 151  * htmlSetMetaEncoding:
 152  * @doc:  the document
 153  * @encoding:  the encoding string
 154  *
 155  * Sets the current encoding in the Meta tags
 156  * NOTE: this will not change the document content encoding, just
 157  * the META flag associated.
 158  *
 159  * Returns 0 in case of success and -1 in case of error
 160  */
 161 int
 162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
 163     htmlNodePtr cur, meta;
 164     const xmlChar *content;
 165     char newcontent[100];
 166
 167
 168     if (doc == NULL)
 169         return(-1);
 170
 171     if (encoding != NULL) {
 172         snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
 173                 (char *)encoding);
 174         newcontent[sizeof(newcontent) - 1] = 0;
 175     }
 176
 177     cur = doc->children;
 178
 179     /*
 180      * Search the html
 181      */
 182     while (cur != NULL) {
 183         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 184             if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
 185                 break;
 186             if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
 187                 goto found_head;
 188             if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
 189                 goto found_meta;
 190         }
 191         cur = cur->next;
 192     }
 193     if (cur == NULL)
 194         return(-1);
 195     cur = cur->children;
 196
 197     /*
 198      * Search the head
 199      */
 200     while (cur != NULL) {
 201         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 202             if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
 203                 break;
 204             if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
 205                 goto found_meta;
 206         }
 207         cur = cur->next;
 208     }
 209     if (cur == NULL)
 210         return(-1);
 211 found_head:
 212     if (cur->children == NULL) {
 213         if (encoding == NULL)
 214             return(0);
 215         meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
 216         xmlAddChild(cur, meta);
 217         xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
 218         xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
 219         return(0);
 220     }
 221     cur = cur->children;
 222
 223 found_meta:
 224     if (encoding != NULL) {
 225         /*
 226          * Create a new Meta element with the right attributes
 227          */
 228
 229         meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
 230         xmlAddPrevSibling(cur, meta);
 231         xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
 232         xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
 233     }
 234
 235     /*
 236      * Search and destroy all the remaining the meta elements carrying
 237      * encoding informations
 238      */
 239     while (cur != NULL) {
 240         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 241             if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
 242                 xmlAttrPtr attr = cur->properties;
 243                 int http;
 244                 const xmlChar *value;
 245
 246                 content = NULL;
 247                 http = 0;
 248                 while (attr != NULL) {
 249                     if ((attr->children != NULL) &&
 250                         (attr->children->type == XML_TEXT_NODE) &&
 251                         (attr->children->next == NULL)) {
 252                         value = attr->children->content;
 253                         if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
 254                          && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
 255                             http = 1;
 256                         else
 257                         {
 258                            if ((value != NULL) &&
 259                                 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
 260                               content = value;
 261                         }
 262                         if ((http != 0) && (content != NULL))
 263                             break;
 264                     }
 265                     attr = attr->next;
 266                 }
 267                 if ((http != 0) && (content != NULL)) {
 268                     meta = cur;
 269                     cur = cur->next;
 270                     xmlUnlinkNode(meta);
 271                     xmlFreeNode(meta);
 272                     continue;
 273                 }
 274
 275             }
 276         }
 277         cur = cur->next;
 278     }
 279     return(0);
 280 }
 281
 282 /**
 283  * booleanHTMLAttrs:
 284  *
 285  * These are the HTML attributes which will be output
 286  * in minimized form, i.e. <option selected="selected"> will be
 287  * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
 288  *
 289  */
 290 static const char* htmlBooleanAttrs[] = {
 291   "checked", "compact", "declare", "defer", "disabled", "ismap",
 292   "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
 293   "selected", NULL
 294 };
 295
 296
 297 /**
 298  * htmlIsBooleanAttr:
 299  * @name:  the name of the attribute to check
 300  *
 301  * Determine if a given attribute is a boolean attribute.
 302  *
 303  * returns: false if the attribute is not boolean, true otherwise.
 304  */
 305 int
 306 htmlIsBooleanAttr(const xmlChar *name)
 307 {
 308     int i = 0;
 309
 310     while (htmlBooleanAttrs[i] != NULL) {
 311         if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
 312             return 1;
 313         i++;
 314     }
 315     return 0;
 316 }
 317
 318 #ifdef LIBXML_OUTPUT_ENABLED
 319 /************************************************************************
 320  *                                                                      *
 321  *                      Output error handlers                           *
 322  *                                                                      *
 323  ************************************************************************/
 324 /**
 325  * htmlSaveErrMemory:
 326  * @extra:  extra informations
 327  *
 328  * Handle an out of memory condition
 329  */
 330 static void
 331 htmlSaveErrMemory(const char *extra)
 332 {
 333     __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
 334 }
 335
 336 /**
 337  * htmlSaveErr:
 338  * @code:  the error number
 339  * @node:  the location of the error.
 340  * @extra:  extra informations
 341  *
 342  * Handle an out of memory condition
 343  */
 344 static void
 345 htmlSaveErr(int code, xmlNodePtr node, const char *extra)
 346 {
 347     const char *msg = NULL;
 348
 349     switch(code) {
 350         case XML_SAVE_NOT_UTF8:
 351             msg = "string is not in UTF-8\n";
 352             break;
 353         case XML_SAVE_CHAR_INVALID:
 354             msg = "invalid character value\n";
 355             break;
 356         case XML_SAVE_UNKNOWN_ENCODING:
 357             msg = "unknown encoding %s\n";
 358             break;
 359         case XML_SAVE_NO_DOCTYPE:
 360             msg = "HTML has no DOCTYPE\n";
 361             break;
 362         default:
 363             msg = "unexpected error number\n";
 364     }
 365     __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
 366 }
 367
 368 /************************************************************************
 369  *                                                                      *
 370  *              Dumping HTML tree content to a simple buffer            *
 371  *                                                                      *
 372  ************************************************************************/
 373
 374 static int
 375 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
 376                    int format);
 377
 378 /**
 379  * htmlNodeDumpFormat:
 380  * @buf:  the HTML buffer output
 381  * @doc:  the document
 382  * @cur:  the current node
 383  * @format:  should formatting spaces been added
 384  *
 385  * Dump an HTML node, recursive behaviour,children are printed too.
 386  *
 387  * Returns the number of byte written or -1 in case of error
 388  */
 389 static int
 390 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
 391                    int format) {
 392     unsigned int use;
 393     int ret;
 394     xmlOutputBufferPtr outbuf;
 395
 396     if (cur == NULL) {
 397         return (-1);
 398     }
 399     if (buf == NULL) {
 400         return (-1);
 401     }
 402     outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
 403     if (outbuf == NULL) {
 404         htmlSaveErrMemory("allocating HTML output buffer");
 405         return (-1);
 406     }
 407     memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
 408     outbuf->buffer = buf;
 409     outbuf->encoder = NULL;
 410     outbuf->writecallback = NULL;
 411     outbuf->closecallback = NULL;
 412     outbuf->context = NULL;
 413     outbuf->written = 0;
 414
 415     use = buf->use;
 416     htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
 417     xmlFree(outbuf);
 418     ret = buf->use - use;
 419     return (ret);
 420 }
 421
 422 /**
 423  * htmlNodeDump:
 424  * @buf:  the HTML buffer output
 425  * @doc:  the document
 426  * @cur:  the current node
 427  *
 428  * Dump an HTML node, recursive behaviour,children are printed too,
 429  * and formatting returns are added.
 430  *
 431  * Returns the number of byte written or -1 in case of error
 432  */
 433 int
 434 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
 435     xmlInitParser();
 436
 437     return(htmlNodeDumpFormat(buf, doc, cur, 1));
 438 }
 439
 440 /**
 441  * htmlNodeDumpFileFormat:
 442  * @out:  the FILE pointer
 443  * @doc:  the document
 444  * @cur:  the current node
 445  * @encoding: the document encoding
 446  * @format:  should formatting spaces been added
 447  *
 448  * Dump an HTML node, recursive behaviour,children are printed too.
 449  *
 450  * TODO: if encoding == NULL try to save in the doc encoding
 451  *
 452  * returns: the number of byte written or -1 in case of failure.
 453  */
 454 int
 455 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
 456                        xmlNodePtr cur, const char *encoding, int format) {
 457     xmlOutputBufferPtr buf;
 458     xmlCharEncodingHandlerPtr handler = NULL;
 459     int ret;
 460
 461     xmlInitParser();
 462
 463     if (encoding != NULL) {
 464         xmlCharEncoding enc;
 465
 466         enc = xmlParseCharEncoding(encoding);
 467         if (enc != XML_CHAR_ENCODING_UTF8) {
 468             handler = xmlFindCharEncodingHandler(encoding);
 469             if (handler == NULL)
 470                 return(-1);
 471         }
 472     }
 473
 474     /*
 475      * Fallback to HTML or ASCII when the encoding is unspecified
 476      */
 477     if (handler == NULL)
 478         handler = xmlFindCharEncodingHandler("HTML");
 479     if (handler == NULL)
 480         handler = xmlFindCharEncodingHandler("ascii");
 481
 482     /*
 483      * save the content to a temp buffer.
 484      */
 485     buf = xmlOutputBufferCreateFile(out, handler);
 486     if (buf == NULL) return(0);
 487
 488     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
 489
 490     ret = xmlOutputBufferClose(buf);
 491     return(ret);
 492 }
 493
 494 /**
 495  * htmlNodeDumpFile:
 496  * @out:  the FILE pointer
 497  * @doc:  the document
 498  * @cur:  the current node
 499  *
 500  * Dump an HTML node, recursive behaviour,children are printed too,
 501  * and formatting returns are added.
 502  */
 503 void
 504 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
 505     htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
 506 }
 507
 508 /**
 509  * htmlDocDumpMemoryFormat:
 510  * @cur:  the document
 511  * @mem:  OUT: the memory pointer
 512  * @size:  OUT: the memory length
 513  * @format:  should formatting spaces been added
 514  *
 515  * Dump an HTML document in memory and return the xmlChar * and it's size.
 516  * It's up to the caller to free the memory.
 517  */
 518 void
 519 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
 520     xmlOutputBufferPtr buf;
 521     xmlCharEncodingHandlerPtr handler = NULL;
 522     const char *encoding;
 523
 524     xmlInitParser();
 525
 526     if ((mem == NULL) || (size == NULL))
 527         return;
 528     if (cur == NULL) {
 529         *mem = NULL;
 530         *size = 0;
 531         return;
 532     }
 533
 534     encoding = (const char *) htmlGetMetaEncoding(cur);
 535
 536     if (encoding != NULL) {
 537         xmlCharEncoding enc;
 538
 539         enc = xmlParseCharEncoding(encoding);
 540         if (enc != cur->charset) {
 541             if (cur->charset != XML_CHAR_ENCODING_UTF8) {
 542                 /*
 543                  * Not supported yet
 544                  */
 545                 *mem = NULL;
 546                 *size = 0;
 547                 return;
 548             }
 549
 550             handler = xmlFindCharEncodingHandler(encoding);
 551             if (handler == NULL) {
 552                 *mem = NULL;
 553                 *size = 0;
 554                 return;
 555             }
 556         } else {
 557             handler = xmlFindCharEncodingHandler(encoding);
 558         }
 559     }
 560
 561     /*
 562      * Fallback to HTML or ASCII when the encoding is unspecified
 563      */
 564     if (handler == NULL)
 565         handler = xmlFindCharEncodingHandler("HTML");
 566     if (handler == NULL)
 567         handler = xmlFindCharEncodingHandler("ascii");
 568
 569     buf = xmlAllocOutputBuffer(handler);
 570     if (buf == NULL) {
 571         *mem = NULL;
 572         *size = 0;
 573         return;
 574     }
 575
 576         htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
 577
 578     xmlOutputBufferFlush(buf);
 579     if (buf->conv != NULL) {
 580         *size = buf->conv->use;
 581         *mem = xmlStrndup(buf->conv->content, *size);
 582     } else {
 583         *size = buf->buffer->use;
 584         *mem = xmlStrndup(buf->buffer->content, *size);
 585     }
 586     (void)xmlOutputBufferClose(buf);
 587 }
 588
 589 /**
 590  * htmlDocDumpMemory:
 591  * @cur:  the document
 592  * @mem:  OUT: the memory pointer
 593  * @size:  OUT: the memory length
 594  *
 595  * Dump an HTML document in memory and return the xmlChar * and it's size.
 596  * It's up to the caller to free the memory.
 597  */
 598 void
 599 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
 600         htmlDocDumpMemoryFormat(cur, mem, size, 1);
 601 }
 602
 603
 604 /************************************************************************
 605  *                                                                      *
 606  *              Dumping HTML tree content to an I/O output buffer       *
 607  *                                                                      *
 608  ************************************************************************/
 609
 610 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
 611
 612 /**
 613  * htmlDtdDumpOutput:
 614  * @buf:  the HTML buffer output
 615  * @doc:  the document
 616  * @encoding:  the encoding string
 617  *
 618  * TODO: check whether encoding is needed
 619  *
 620  * Dump the HTML document DTD, if any.
 621  */
 622 static void
 623 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 624                   const char *encoding ATTRIBUTE_UNUSED) {
 625     xmlDtdPtr cur = doc->intSubset;
 626
 627     if (cur == NULL) {
 628         htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
 629         return;
 630     }
 631     xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
 632     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 633     if (cur->ExternalID != NULL) {
 634         xmlOutputBufferWriteString(buf, " PUBLIC ");
 635         xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
 636         if (cur->SystemID != NULL) {
 637             xmlOutputBufferWriteString(buf, " ");
 638             xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
 639         }
 640     }  else if (cur->SystemID != NULL) {
 641         xmlOutputBufferWriteString(buf, " SYSTEM ");
 642         xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
 643     }
 644     xmlOutputBufferWriteString(buf, ">\n");
 645 }
 646
 647 /**
 648  * htmlAttrDumpOutput:
 649  * @buf:  the HTML buffer output
 650  * @doc:  the document
 651  * @cur:  the attribute pointer
 652  * @encoding:  the encoding string
 653  *
 654  * Dump an HTML attribute
 655  */
 656 static void
 657 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
 658                    const char *encoding ATTRIBUTE_UNUSED) {
 659     xmlChar *value;
 660
 661     /*
 662      * TODO: The html output method should not escape a & character
 663      *       occurring in an attribute value immediately followed by
 664      *       a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
 665      */
 666
 667     if (cur == NULL) {
 668         return;
 669     }
 670     xmlOutputBufferWriteString(buf, " ");
 671     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 672         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 673         xmlOutputBufferWriteString(buf, ":");
 674     }
 675     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 676     if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
 677         value = xmlNodeListGetString(doc, cur->children, 0);
 678         if (value) {
 679             xmlOutputBufferWriteString(buf, "=");
 680             if ((cur->ns == NULL) && (cur->parent != NULL) &&
 681                 (cur->parent->ns == NULL) &&
 682                 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
 683                  (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
 684                  (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
 685                  ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
 686                   (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
 687                 xmlChar *escaped;
 688                 xmlChar *tmp = value;
 689
 690                 while (IS_BLANK_CH(*tmp)) tmp++;
 691
 692                 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
 693                 if (escaped != NULL) {
 694                     xmlBufferWriteQuotedString(buf->buffer, escaped);
 695                     xmlFree(escaped);
 696                 } else {
 697                     xmlBufferWriteQuotedString(buf->buffer, value);
 698                 }
 699             } else {
 700                 xmlBufferWriteQuotedString(buf->buffer, value);
 701             }
 702             xmlFree(value);
 703         } else  {
 704             xmlOutputBufferWriteString(buf, "=\"\"");
 705         }
 706     }
 707 }
 708
 709 /**
 710  * htmlAttrListDumpOutput:
 711  * @buf:  the HTML buffer output
 712  * @doc:  the document
 713  * @cur:  the first attribute pointer
 714  * @encoding:  the encoding string
 715  *
 716  * Dump a list of HTML attributes
 717  */
 718 static void
 719 htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
 720     if (cur == NULL) {
 721         return;
 722     }
 723     while (cur != NULL) {
 724         htmlAttrDumpOutput(buf, doc, cur, encoding);
 725         cur = cur->next;
 726     }
 727 }
 728
 729
 730
 731 /**
 732  * htmlNodeListDumpOutput:
 733  * @buf:  the HTML buffer output
 734  * @doc:  the document
 735  * @cur:  the first node
 736  * @encoding:  the encoding string
 737  * @format:  should formatting spaces been added
 738  *
 739  * Dump an HTML node list, recursive behaviour,children are printed too.
 740  */
 741 static void
 742 htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 743                        xmlNodePtr cur, const char *encoding, int format) {
 744     if (cur == NULL) {
 745         return;
 746     }
 747     while (cur != NULL) {
 748         htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
 749         cur = cur->next;
 750     }
 751 }
 752
 753 /**
 754  * htmlNodeDumpFormatOutput:
 755  * @buf:  the HTML buffer output
 756  * @doc:  the document
 757  * @cur:  the current node
 758  * @encoding:  the encoding string
 759  * @format:  should formatting spaces been added
 760  *
 761  * Dump an HTML node, recursive behaviour,children are printed too.
 762  */
 763 void
 764 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 765                          xmlNodePtr cur, const char *encoding, int format) {
 766     const htmlElemDesc * info;
 767
 768     xmlInitParser();
 769
 770     if ((cur == NULL) || (buf == NULL)) {
 771         return;
 772     }
 773     /*
 774      * Special cases.
 775      */
 776     if (cur->type == XML_DTD_NODE)
 777         return;
 778     if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
 779         (cur->type == XML_DOCUMENT_NODE)){
 780         htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
 781         return;
 782     }
 783     if (cur->type == HTML_TEXT_NODE) {
 784         if (cur->content != NULL) {
 785             if (((cur->name == (const xmlChar *)xmlStringText) ||
 786                  (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
 787                 ((cur->parent == NULL) ||
 788                  ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
 789                   (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
 790                 xmlChar *buffer;
 791
 792                 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
 793                 if (buffer != NULL) {
 794                     xmlOutputBufferWriteString(buf, (const char *)buffer);
 795                     xmlFree(buffer);
 796                 }
 797             } else {
 798                 xmlOutputBufferWriteString(buf, (const char *)cur->content);
 799             }
 800         }
 801         return;
 802     }
 803     if (cur->type == HTML_COMMENT_NODE) {
 804         if (cur->content != NULL) {
 805             xmlOutputBufferWriteString(buf, "<!--");
 806             xmlOutputBufferWriteString(buf, (const char *)cur->content);
 807             xmlOutputBufferWriteString(buf, "-->");
 808         }
 809         return;
 810     }
 811     if (cur->type == HTML_PI_NODE) {
 812         if (cur->name == NULL)
 813             return;
 814         xmlOutputBufferWriteString(buf, "<?");
 815         xmlOutputBufferWriteString(buf, (const char *)cur->name);
 816         if (cur->content != NULL) {
 817             xmlOutputBufferWriteString(buf, " ");
 818             xmlOutputBufferWriteString(buf, (const char *)cur->content);
 819         }
 820         xmlOutputBufferWriteString(buf, ">");
 821         return;
 822     }
 823     if (cur->type == HTML_ENTITY_REF_NODE) {
 824         xmlOutputBufferWriteString(buf, "&");
 825         xmlOutputBufferWriteString(buf, (const char *)cur->name);
 826         xmlOutputBufferWriteString(buf, ";");
 827         return;
 828     }
 829     if (cur->type == HTML_PRESERVE_NODE) {
 830         if (cur->content != NULL) {
 831             xmlOutputBufferWriteString(buf, (const char *)cur->content);
 832         }
 833         return;
 834     }
 835
 836     /*
 837      * Get specific HTML info for that node.
 838      */
 839     if (cur->ns == NULL)
 840         info = htmlTagLookup(cur->name);
 841     else
 842         info = NULL;
 843
 844     xmlOutputBufferWriteString(buf, "<");
 845     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 846         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 847         xmlOutputBufferWriteString(buf, ":");
 848     }
 849     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 850     if (cur->nsDef)
 851         xmlNsListDumpOutput(buf, cur->nsDef);
 852     if (cur->properties != NULL)
 853         htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
 854
 855     if ((info != NULL) && (info->empty)) {
 856         xmlOutputBufferWriteString(buf, ">");
 857         if ((format) && (!info->isinline) && (cur->next != NULL)) {
 858             if ((cur->next->type != HTML_TEXT_NODE) &&
 859                 (cur->next->type != HTML_ENTITY_REF_NODE) &&
 860                 (cur->parent != NULL) &&
 861                 (cur->parent->name != NULL) &&
 862                 (cur->parent->name[0] != 'p')) /* p, pre, param */
 863                 xmlOutputBufferWriteString(buf, "\n");
 864         }
 865         return;
 866     }
 867     if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
 868         (cur->children == NULL)) {
 869         if ((info != NULL) && (info->saveEndTag != 0) &&
 870             (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
 871             (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
 872             xmlOutputBufferWriteString(buf, ">");
 873         } else {
 874             xmlOutputBufferWriteString(buf, "></");
 875             if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 876                 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 877                 xmlOutputBufferWriteString(buf, ":");
 878             }
 879             xmlOutputBufferWriteString(buf, (const char *)cur->name);
 880             xmlOutputBufferWriteString(buf, ">");
 881         }
 882         if ((format) && (cur->next != NULL) &&
 883             (info != NULL) && (!info->isinline)) {
 884             if ((cur->next->type != HTML_TEXT_NODE) &&
 885                 (cur->next->type != HTML_ENTITY_REF_NODE) &&
 886                 (cur->parent != NULL) &&
 887                 (cur->parent->name != NULL) &&
 888                 (cur->parent->name[0] != 'p')) /* p, pre, param */
 889                 xmlOutputBufferWriteString(buf, "\n");
 890         }
 891         return;
 892     }
 893     xmlOutputBufferWriteString(buf, ">");
 894     if ((cur->type != XML_ELEMENT_NODE) &&
 895         (cur->content != NULL)) {
 896             /*
 897              * Uses the OutputBuffer property to automatically convert
 898              * invalids to charrefs
 899              */
 900
 901             xmlOutputBufferWriteString(buf, (const char *) cur->content);
 902     }
 903     if (cur->children != NULL) {
 904         if ((format) && (info != NULL) && (!info->isinline) &&
 905             (cur->children->type != HTML_TEXT_NODE) &&
 906             (cur->children->type != HTML_ENTITY_REF_NODE) &&
 907             (cur->children != cur->last) &&
 908             (cur->name != NULL) &&
 909             (cur->name[0] != 'p')) /* p, pre, param */
 910             xmlOutputBufferWriteString(buf, "\n");
 911         htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
 912         if ((format) && (info != NULL) && (!info->isinline) &&
 913             (cur->last->type != HTML_TEXT_NODE) &&
 914             (cur->last->type != HTML_ENTITY_REF_NODE) &&
 915             (cur->children != cur->last) &&
 916             (cur->name != NULL) &&
 917             (cur->name[0] != 'p')) /* p, pre, param */
 918             xmlOutputBufferWriteString(buf, "\n");
 919     }
 920     xmlOutputBufferWriteString(buf, "</");
 921     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 922         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 923         xmlOutputBufferWriteString(buf, ":");
 924     }
 925     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 926     xmlOutputBufferWriteString(buf, ">");
 927     if ((format) && (info != NULL) && (!info->isinline) &&
 928         (cur->next != NULL)) {
 929         if ((cur->next->type != HTML_TEXT_NODE) &&
 930             (cur->next->type != HTML_ENTITY_REF_NODE) &&
 931             (cur->parent != NULL) &&
 932             (cur->parent->name != NULL) &&
 933             (cur->parent->name[0] != 'p')) /* p, pre, param */
 934             xmlOutputBufferWriteString(buf, "\n");
 935     }
 936 }
 937
 938 /**
 939  * htmlNodeDumpOutput:
 940  * @buf:  the HTML buffer output
 941  * @doc:  the document
 942  * @cur:  the current node
 943  * @encoding:  the encoding string
 944  *
 945  * Dump an HTML node, recursive behaviour,children are printed too,
 946  * and formatting returns/spaces are added.
 947  */
 948 void
 949 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 950                    xmlNodePtr cur, const char *encoding) {
 951     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
 952 }
 953
 954 /**
 955  * htmlDocContentDumpFormatOutput:
 956  * @buf:  the HTML buffer output
 957  * @cur:  the document
 958  * @encoding:  the encoding string
 959  * @format:  should formatting spaces been added
 960  *
 961  * Dump an HTML document.
 962  */
 963 void
 964 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
 965                                const char *encoding, int format) {
 966     int type;
 967
 968     xmlInitParser();
 969
 970     if ((buf == NULL) || (cur == NULL))
 971         return;
 972
 973     /*
 974      * force to output the stuff as HTML, especially for entities
 975      */
 976     type = cur->type;
 977     cur->type = XML_HTML_DOCUMENT_NODE;
 978     if (cur->intSubset != NULL) {
 979         htmlDtdDumpOutput(buf, cur, NULL);
 980     }
 981     if (cur->children != NULL) {
 982         htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
 983     }
 984     xmlOutputBufferWriteString(buf, "\n");
 985     cur->type = (xmlElementType) type;
 986 }
 987
 988 /**
 989  * htmlDocContentDumpOutput:
 990  * @buf:  the HTML buffer output
 991  * @cur:  the document
 992  * @encoding:  the encoding string
 993  *
 994  * Dump an HTML document. Formating return/spaces are added.
 995  */
 996 void
 997 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
 998                          const char *encoding) {
 999     htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1000 }
1001
1002 /************************************************************************
1003  *                                                                      *
1004  *              Saving functions front-ends                             *
1005  *                                                                      *
1006  ************************************************************************/
1007
1008 /**
1009  * htmlDocDump:
1010  * @f:  the FILE*
1011  * @cur:  the document
1012  *
1013  * Dump an HTML document to an open FILE.
1014  *
1015  * returns: the number of byte written or -1 in case of failure.
1016  */
1017 int
1018 htmlDocDump(FILE *f, xmlDocPtr cur) {
1019     xmlOutputBufferPtr buf;
1020     xmlCharEncodingHandlerPtr handler = NULL;
1021     const char *encoding;
1022     int ret;
1023
1024     xmlInitParser();
1025
1026     if ((cur == NULL) || (f == NULL)) {
1027         return(-1);
1028     }
1029
1030     encoding = (const char *) htmlGetMetaEncoding(cur);
1031
1032     if (encoding != NULL) {
1033         xmlCharEncoding enc;
1034
1035         enc = xmlParseCharEncoding(encoding);
1036         if (enc != cur->charset) {
1037             if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1038                 /*
1039                  * Not supported yet
1040                  */
1041                 return(-1);
1042             }
1043
1044             handler = xmlFindCharEncodingHandler(encoding);
1045             if (handler == NULL)
1046                 return(-1);
1047         } else {
1048             handler = xmlFindCharEncodingHandler(encoding);
1049         }
1050     }
1051
1052     /*
1053      * Fallback to HTML or ASCII when the encoding is unspecified
1054      */
1055     if (handler == NULL)
1056         handler = xmlFindCharEncodingHandler("HTML");
1057     if (handler == NULL)
1058         handler = xmlFindCharEncodingHandler("ascii");
1059
1060     buf = xmlOutputBufferCreateFile(f, handler);
1061     if (buf == NULL) return(-1);
1062     htmlDocContentDumpOutput(buf, cur, NULL);
1063
1064     ret = xmlOutputBufferClose(buf);
1065     return(ret);
1066 }
1067
1068 /**
1069  * htmlSaveFile:
1070  * @filename:  the filename (or URL)
1071  * @cur:  the document
1072  *
1073  * Dump an HTML document to a file. If @filename is "-" the stdout file is
1074  * used.
1075  * returns: the number of byte written or -1 in case of failure.
1076  */
1077 int
1078 htmlSaveFile(const char *filename, xmlDocPtr cur) {
1079     xmlOutputBufferPtr buf;
1080     xmlCharEncodingHandlerPtr handler = NULL;
1081     const char *encoding;
1082     int ret;
1083
1084     if ((cur == NULL) || (filename == NULL))
1085         return(-1);
1086
1087     xmlInitParser();
1088
1089     encoding = (const char *) htmlGetMetaEncoding(cur);
1090
1091     if (encoding != NULL) {
1092         xmlCharEncoding enc;
1093
1094         enc = xmlParseCharEncoding(encoding);
1095         if (enc != cur->charset) {
1096             if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1097                 /*
1098                  * Not supported yet
1099                  */
1100                 return(-1);
1101             }
1102
1103             handler = xmlFindCharEncodingHandler(encoding);
1104             if (handler == NULL)
1105                 return(-1);
1106         }
1107     }
1108
1109     /*
1110      * Fallback to HTML or ASCII when the encoding is unspecified
1111      */
1112     if (handler == NULL)
1113         handler = xmlFindCharEncodingHandler("HTML");
1114     if (handler == NULL)
1115         handler = xmlFindCharEncodingHandler("ascii");
1116
1117     /*
1118      * save the content to a temp buffer.
1119      */
1120     buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1121     if (buf == NULL) return(0);
1122
1123     htmlDocContentDumpOutput(buf, cur, NULL);
1124
1125     ret = xmlOutputBufferClose(buf);
1126     return(ret);
1127 }
1128
1129 /**
1130  * htmlSaveFileFormat:
1131  * @filename:  the filename
1132  * @cur:  the document
1133  * @format:  should formatting spaces been added
1134  * @encoding: the document encoding
1135  *
1136  * Dump an HTML document to a file using a given encoding.
1137  *
1138  * returns: the number of byte written or -1 in case of failure.
1139  */
1140 int
1141 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1142                    const char *encoding, int format) {
1143     xmlOutputBufferPtr buf;
1144     xmlCharEncodingHandlerPtr handler = NULL;
1145     int ret;
1146
1147     if ((cur == NULL) || (filename == NULL))
1148         return(-1);
1149
1150     xmlInitParser();
1151
1152     if (encoding != NULL) {
1153         xmlCharEncoding enc;
1154
1155         enc = xmlParseCharEncoding(encoding);
1156         if (enc != cur->charset) {
1157             if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1158                 /*
1159                  * Not supported yet
1160                  */
1161                 return(-1);
1162             }
1163
1164             handler = xmlFindCharEncodingHandler(encoding);
1165             if (handler == NULL)
1166                 return(-1);
1167             htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1168         }
1169     } else {
1170         htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1171     }
1172
1173     /*
1174      * Fallback to HTML or ASCII when the encoding is unspecified
1175      */
1176     if (handler == NULL)
1177         handler = xmlFindCharEncodingHandler("HTML");
1178     if (handler == NULL)
1179         handler = xmlFindCharEncodingHandler("ascii");
1180
1181     /*
1182      * save the content to a temp buffer.
1183      */
1184     buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1185     if (buf == NULL) return(0);
1186
1187     htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1188
1189     ret = xmlOutputBufferClose(buf);
1190     return(ret);
1191 }
1192
1193 /**
1194  * htmlSaveFileEnc:
1195  * @filename:  the filename
1196  * @cur:  the document
1197  * @encoding: the document encoding
1198  *
1199  * Dump an HTML document to a file using a given encoding
1200  * and formatting returns/spaces are added.
1201  *
1202  * returns: the number of byte written or -1 in case of failure.
1203  */
1204 int
1205 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1206     return(htmlSaveFileFormat(filename, cur, encoding, 1));
1207 }
1208
1209 #endif /* LIBXML_OUTPUT_ENABLED */
1210
1211 #define bottom_HTMLtree
1212 #include "elfgcchack.h"
1213 #endif /* LIBXML_HTML_ENABLED */