HTMLtree.c

   1 /*
   2  * HTMLtree.c : implementation of access function for an HTML tree.
   3  *
   4  * See Copyright for the status of this software.
   5  *
   6  * daniel@veillard.com
   7  */
   8
   9
  10 #define IN_LIBXML
  11 #include "libxml.h"
  12 #ifdef LIBXML_HTML_ENABLED
  13
  14 #include <string.h> /* for memset() only ! */
  15
  16 #ifdef HAVE_CTYPE_H
  17 #include <ctype.h>
  18 #endif
  19 #ifdef HAVE_STDLIB_H
  20 #include <stdlib.h>
  21 #endif
  22
  23 #include <libxml/xmlmemory.h>
  24 #include <libxml/HTMLparser.h>
  25 #include <libxml/HTMLtree.h>
  26 #include <libxml/entities.h>
  27 #include <libxml/valid.h>
  28 #include <libxml/xmlerror.h>
  29 #include <libxml/parserInternals.h>
  30 #include <libxml/globals.h>
  31 #include <libxml/uri.h>
  32
  33 /************************************************************************
  34  *                                                                      *
  35  *              Getting/Setting encoding meta tags                      *
  36  *                                                                      *
  37  ************************************************************************/
  38
  39 /**
  40  * htmlGetMetaEncoding:
  41  * @doc:  the document
  42  *
  43  * Encoding definition lookup in the Meta tags
  44  *
  45  * Returns the current encoding as flagged in the HTML source
  46  */
  47 const xmlChar *
  48 htmlGetMetaEncoding(htmlDocPtr doc) {
  49     htmlNodePtr cur;
  50     const xmlChar *content;
  51     const xmlChar *encoding;
  52
  53     if (doc == NULL)
  54         return(NULL);
  55     cur = doc->children;
  56
  57     /*
  58      * Search the html
  59      */
  60     while (cur != NULL) {
  61         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
  62             if (xmlStrEqual(cur->name, BAD_CAST"html"))
  63                 break;
  64             if (xmlStrEqual(cur->name, BAD_CAST"head"))
  65                 goto found_head;
  66             if (xmlStrEqual(cur->name, BAD_CAST"meta"))
  67                 goto found_meta;
  68         }
  69         cur = cur->next;
  70     }
  71     if (cur == NULL)
  72         return(NULL);
  73     cur = cur->children;
  74
  75     /*
  76      * Search the head
  77      */
  78     while (cur != NULL) {
  79         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
  80             if (xmlStrEqual(cur->name, BAD_CAST"head"))
  81                 break;
  82             if (xmlStrEqual(cur->name, BAD_CAST"meta"))
  83                 goto found_meta;
  84         }
  85         cur = cur->next;
  86     }
  87     if (cur == NULL)
  88         return(NULL);
  89 found_head:
  90     cur = cur->children;
  91
  92     /*
  93      * Search the meta elements
  94      */
  95 found_meta:
  96     while (cur != NULL) {
  97         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
  98             if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
  99                 xmlAttrPtr attr = cur->properties;
 100                 int http;
 101                 const xmlChar *value;
 102
 103                 content = NULL;
 104                 http = 0;
 105                 while (attr != NULL) {
 106                     if ((attr->children != NULL) &&
 107                         (attr->children->type == XML_TEXT_NODE) &&
 108                         (attr->children->next == NULL)) {
 109                         value = attr->children->content;
 110                         if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
 111                          && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
 112                             http = 1;
 113                         else if ((value != NULL)
 114                          && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
 115                             content = value;
 116                         if ((http != 0) && (content != NULL))
 117                             goto found_content;
 118                     }
 119                     attr = attr->next;
 120                 }
 121             }
 122         }
 123         cur = cur->next;
 124     }
 125     return(NULL);
 126
 127 found_content:
 128     encoding = xmlStrstr(content, BAD_CAST"charset=");
 129     if (encoding == NULL)
 130         encoding = xmlStrstr(content, BAD_CAST"Charset=");
 131     if (encoding == NULL)
 132         encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
 133     if (encoding != NULL) {
 134         encoding += 8;
 135     } else {
 136         encoding = xmlStrstr(content, BAD_CAST"charset =");
 137         if (encoding == NULL)
 138             encoding = xmlStrstr(content, BAD_CAST"Charset =");
 139         if (encoding == NULL)
 140             encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
 141         if (encoding != NULL)
 142             encoding += 9;
 143     }
 144     if (encoding != NULL) {
 145         while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
 146     }
 147     return(encoding);
 148 }
 149
 150 /**
 151  * htmlSetMetaEncoding:
 152  * @doc:  the document
 153  * @encoding:  the encoding string
 154  *
 155  * Sets the current encoding in the Meta tags
 156  * NOTE: this will not change the document content encoding, just
 157  * the META flag associated.
 158  *
 159  * Returns 0 in case of success and -1 in case of error
 160  */
 161 int
 162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
 163     htmlNodePtr cur, meta = NULL, head = NULL;
 164     const xmlChar *content = NULL;
 165     char newcontent[100];
 166
 167     newcontent[0] = 0;
 168
 169     if (doc == NULL)
 170         return(-1);
 171
 172     /* html isn't a real encoding it's just libxml2 way to get entities */
 173     if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
 174         return(-1);
 175
 176     if (encoding != NULL) {
 177         snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
 178                 (char *)encoding);
 179         newcontent[sizeof(newcontent) - 1] = 0;
 180     }
 181
 182     cur = doc->children;
 183
 184     /*
 185      * Search the html
 186      */
 187     while (cur != NULL) {
 188         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 189             if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
 190                 break;
 191             if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
 192                 goto found_head;
 193             if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
 194                 goto found_meta;
 195         }
 196         cur = cur->next;
 197     }
 198     if (cur == NULL)
 199         return(-1);
 200     cur = cur->children;
 201
 202     /*
 203      * Search the head
 204      */
 205     while (cur != NULL) {
 206         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 207             if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
 208                 break;
 209             if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
 210                 head = cur->parent;
 211                 goto found_meta;
 212             }
 213         }
 214         cur = cur->next;
 215     }
 216     if (cur == NULL)
 217         return(-1);
 218 found_head:
 219     head = cur;
 220     if (cur->children == NULL)
 221         goto create;
 222     cur = cur->children;
 223
 224 found_meta:
 225     /*
 226      * Search and update all the remaining the meta elements carrying
 227      * encoding informations
 228      */
 229     while (cur != NULL) {
 230         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 231             if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
 232                 xmlAttrPtr attr = cur->properties;
 233                 int http;
 234                 const xmlChar *value;
 235
 236                 content = NULL;
 237                 http = 0;
 238                 while (attr != NULL) {
 239                     if ((attr->children != NULL) &&
 240                         (attr->children->type == XML_TEXT_NODE) &&
 241                         (attr->children->next == NULL)) {
 242                         value = attr->children->content;
 243                         if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
 244                          && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
 245                             http = 1;
 246                         else
 247                         {
 248                            if ((value != NULL) &&
 249                                (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
 250                                content = value;
 251                         }
 252                         if ((http != 0) && (content != NULL))
 253                             break;
 254                     }
 255                     attr = attr->next;
 256                 }
 257                 if ((http != 0) && (content != NULL)) {
 258                     meta = cur;
 259                     break;
 260                 }
 261
 262             }
 263         }
 264         cur = cur->next;
 265     }
 266 create:
 267     if (meta == NULL) {
 268         if ((encoding != NULL) && (head != NULL)) {
 269             /*
 270              * Create a new Meta element with the right attributes
 271              */
 272
 273             meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
 274             if (head->children == NULL)
 275                 xmlAddChild(head, meta);
 276             else
 277                 xmlAddPrevSibling(head->children, meta);
 278             xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
 279             xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
 280         }
 281     } else {
 282         /* remove the meta tag if NULL is passed */
 283         if (encoding == NULL) {
 284             xmlUnlinkNode(meta);
 285             xmlFreeNode(meta);
 286         }
 287         /* change the document only if there is a real encoding change */
 288         else if (xmlStrcasestr(content, encoding) == NULL) {
 289             xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
 290         }
 291     }
 292
 293
 294     return(0);
 295 }
 296
 297 /**
 298  * booleanHTMLAttrs:
 299  *
 300  * These are the HTML attributes which will be output
 301  * in minimized form, i.e. <option selected="selected"> will be
 302  * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
 303  *
 304  */
 305 static const char* htmlBooleanAttrs[] = {
 306   "checked", "compact", "declare", "defer", "disabled", "ismap",
 307   "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
 308   "selected", NULL
 309 };
 310
 311
 312 /**
 313  * htmlIsBooleanAttr:
 314  * @name:  the name of the attribute to check
 315  *
 316  * Determine if a given attribute is a boolean attribute.
 317  *
 318  * returns: false if the attribute is not boolean, true otherwise.
 319  */
 320 int
 321 htmlIsBooleanAttr(const xmlChar *name)
 322 {
 323     int i = 0;
 324
 325     while (htmlBooleanAttrs[i] != NULL) {
 326         if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
 327             return 1;
 328         i++;
 329     }
 330     return 0;
 331 }
 332
 333 #ifdef LIBXML_OUTPUT_ENABLED
 334 /*
 335  * private routine exported from xmlIO.c
 336  */
 337 xmlOutputBufferPtr
 338 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
 339 /************************************************************************
 340  *                                                                      *
 341  *                      Output error handlers                           *
 342  *                                                                      *
 343  ************************************************************************/
 344 /**
 345  * htmlSaveErrMemory:
 346  * @extra:  extra informations
 347  *
 348  * Handle an out of memory condition
 349  */
 350 static void
 351 htmlSaveErrMemory(const char *extra)
 352 {
 353     __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
 354 }
 355
 356 /**
 357  * htmlSaveErr:
 358  * @code:  the error number
 359  * @node:  the location of the error.
 360  * @extra:  extra informations
 361  *
 362  * Handle an out of memory condition
 363  */
 364 static void
 365 htmlSaveErr(int code, xmlNodePtr node, const char *extra)
 366 {
 367     const char *msg = NULL;
 368
 369     switch(code) {
 370         case XML_SAVE_NOT_UTF8:
 371             msg = "string is not in UTF-8\n";
 372             break;
 373         case XML_SAVE_CHAR_INVALID:
 374             msg = "invalid character value\n";
 375             break;
 376         case XML_SAVE_UNKNOWN_ENCODING:
 377             msg = "unknown encoding %s\n";
 378             break;
 379         case XML_SAVE_NO_DOCTYPE:
 380             msg = "HTML has no DOCTYPE\n";
 381             break;
 382         default:
 383             msg = "unexpected error number\n";
 384     }
 385     __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
 386 }
 387
 388 /************************************************************************
 389  *                                                                      *
 390  *              Dumping HTML tree content to a simple buffer            *
 391  *                                                                      *
 392  ************************************************************************/
 393
 394 static int
 395 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
 396                    int format);
 397
 398 /**
 399  * htmlNodeDumpFormat:
 400  * @buf:  the HTML buffer output
 401  * @doc:  the document
 402  * @cur:  the current node
 403  * @format:  should formatting spaces been added
 404  *
 405  * Dump an HTML node, recursive behaviour,children are printed too.
 406  *
 407  * Returns the number of byte written or -1 in case of error
 408  */
 409 static int
 410 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
 411                    int format) {
 412     unsigned int use;
 413     int ret;
 414     xmlOutputBufferPtr outbuf;
 415
 416     if (cur == NULL) {
 417         return (-1);
 418     }
 419     if (buf == NULL) {
 420         return (-1);
 421     }
 422     outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
 423     if (outbuf == NULL) {
 424         htmlSaveErrMemory("allocating HTML output buffer");
 425         return (-1);
 426     }
 427     memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
 428     outbuf->buffer = buf;
 429     outbuf->encoder = NULL;
 430     outbuf->writecallback = NULL;
 431     outbuf->closecallback = NULL;
 432     outbuf->context = NULL;
 433     outbuf->written = 0;
 434
 435     use = buf->use;
 436     htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
 437     xmlFree(outbuf);
 438     ret = buf->use - use;
 439     return (ret);
 440 }
 441
 442 /**
 443  * htmlNodeDump:
 444  * @buf:  the HTML buffer output
 445  * @doc:  the document
 446  * @cur:  the current node
 447  *
 448  * Dump an HTML node, recursive behaviour,children are printed too,
 449  * and formatting returns are added.
 450  *
 451  * Returns the number of byte written or -1 in case of error
 452  */
 453 int
 454 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
 455     xmlInitParser();
 456
 457     return(htmlNodeDumpFormat(buf, doc, cur, 1));
 458 }
 459
 460 /**
 461  * htmlNodeDumpFileFormat:
 462  * @out:  the FILE pointer
 463  * @doc:  the document
 464  * @cur:  the current node
 465  * @encoding: the document encoding
 466  * @format:  should formatting spaces been added
 467  *
 468  * Dump an HTML node, recursive behaviour,children are printed too.
 469  *
 470  * TODO: if encoding == NULL try to save in the doc encoding
 471  *
 472  * returns: the number of byte written or -1 in case of failure.
 473  */
 474 int
 475 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
 476                        xmlNodePtr cur, const char *encoding, int format) {
 477     xmlOutputBufferPtr buf;
 478     xmlCharEncodingHandlerPtr handler = NULL;
 479     int ret;
 480
 481     xmlInitParser();
 482
 483     if (encoding != NULL) {
 484         xmlCharEncoding enc;
 485
 486         enc = xmlParseCharEncoding(encoding);
 487         if (enc != XML_CHAR_ENCODING_UTF8) {
 488             handler = xmlFindCharEncodingHandler(encoding);
 489             if (handler == NULL)
 490                 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
 491         }
 492     }
 493
 494     /*
 495      * Fallback to HTML or ASCII when the encoding is unspecified
 496      */
 497     if (handler == NULL)
 498         handler = xmlFindCharEncodingHandler("HTML");
 499     if (handler == NULL)
 500         handler = xmlFindCharEncodingHandler("ascii");
 501
 502     /*
 503      * save the content to a temp buffer.
 504      */
 505     buf = xmlOutputBufferCreateFile(out, handler);
 506     if (buf == NULL) return(0);
 507
 508     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
 509
 510     ret = xmlOutputBufferClose(buf);
 511     return(ret);
 512 }
 513
 514 /**
 515  * htmlNodeDumpFile:
 516  * @out:  the FILE pointer
 517  * @doc:  the document
 518  * @cur:  the current node
 519  *
 520  * Dump an HTML node, recursive behaviour,children are printed too,
 521  * and formatting returns are added.
 522  */
 523 void
 524 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
 525     htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
 526 }
 527
 528 /**
 529  * htmlDocDumpMemoryFormat:
 530  * @cur:  the document
 531  * @mem:  OUT: the memory pointer
 532  * @size:  OUT: the memory length
 533  * @format:  should formatting spaces been added
 534  *
 535  * Dump an HTML document in memory and return the xmlChar * and it's size.
 536  * It's up to the caller to free the memory.
 537  */
 538 void
 539 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
 540     xmlOutputBufferPtr buf;
 541     xmlCharEncodingHandlerPtr handler = NULL;
 542     const char *encoding;
 543
 544     xmlInitParser();
 545
 546     if ((mem == NULL) || (size == NULL))
 547         return;
 548     if (cur == NULL) {
 549         *mem = NULL;
 550         *size = 0;
 551         return;
 552     }
 553
 554     encoding = (const char *) htmlGetMetaEncoding(cur);
 555
 556     if (encoding != NULL) {
 557         xmlCharEncoding enc;
 558
 559         enc = xmlParseCharEncoding(encoding);
 560         if (enc != cur->charset) {
 561             if (cur->charset != XML_CHAR_ENCODING_UTF8) {
 562                 /*
 563                  * Not supported yet
 564                  */
 565                 *mem = NULL;
 566                 *size = 0;
 567                 return;
 568             }
 569
 570             handler = xmlFindCharEncodingHandler(encoding);
 571             if (handler == NULL)
 572                 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
 573
 574         } else {
 575             handler = xmlFindCharEncodingHandler(encoding);
 576         }
 577     }
 578
 579     /*
 580      * Fallback to HTML or ASCII when the encoding is unspecified
 581      */
 582     if (handler == NULL)
 583         handler = xmlFindCharEncodingHandler("HTML");
 584     if (handler == NULL)
 585         handler = xmlFindCharEncodingHandler("ascii");
 586
 587     buf = xmlAllocOutputBufferInternal(handler);
 588     if (buf == NULL) {
 589         *mem = NULL;
 590         *size = 0;
 591         return;
 592     }
 593
 594     htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
 595
 596     xmlOutputBufferFlush(buf);
 597     if (buf->conv != NULL) {
 598         *size = buf->conv->use;
 599         *mem = xmlStrndup(buf->conv->content, *size);
 600     } else {
 601         *size = buf->buffer->use;
 602         *mem = xmlStrndup(buf->buffer->content, *size);
 603     }
 604     (void)xmlOutputBufferClose(buf);
 605 }
 606
 607 /**
 608  * htmlDocDumpMemory:
 609  * @cur:  the document
 610  * @mem:  OUT: the memory pointer
 611  * @size:  OUT: the memory length
 612  *
 613  * Dump an HTML document in memory and return the xmlChar * and it's size.
 614  * It's up to the caller to free the memory.
 615  */
 616 void
 617 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
 618         htmlDocDumpMemoryFormat(cur, mem, size, 1);
 619 }
 620
 621
 622 /************************************************************************
 623  *                                                                      *
 624  *              Dumping HTML tree content to an I/O output buffer       *
 625  *                                                                      *
 626  ************************************************************************/
 627
 628 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
 629
 630 /**
 631  * htmlDtdDumpOutput:
 632  * @buf:  the HTML buffer output
 633  * @doc:  the document
 634  * @encoding:  the encoding string
 635  *
 636  * TODO: check whether encoding is needed
 637  *
 638  * Dump the HTML document DTD, if any.
 639  */
 640 static void
 641 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 642                   const char *encoding ATTRIBUTE_UNUSED) {
 643     xmlDtdPtr cur = doc->intSubset;
 644
 645     if (cur == NULL) {
 646         htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
 647         return;
 648     }
 649     xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
 650     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 651     if (cur->ExternalID != NULL) {
 652         xmlOutputBufferWriteString(buf, " PUBLIC ");
 653         xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
 654         if (cur->SystemID != NULL) {
 655             xmlOutputBufferWriteString(buf, " ");
 656             xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
 657         }
 658     }  else if (cur->SystemID != NULL) {
 659         xmlOutputBufferWriteString(buf, " SYSTEM ");
 660         xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
 661     }
 662     xmlOutputBufferWriteString(buf, ">\n");
 663 }
 664
 665 /**
 666  * htmlAttrDumpOutput:
 667  * @buf:  the HTML buffer output
 668  * @doc:  the document
 669  * @cur:  the attribute pointer
 670  * @encoding:  the encoding string
 671  *
 672  * Dump an HTML attribute
 673  */
 674 static void
 675 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
 676                    const char *encoding ATTRIBUTE_UNUSED) {
 677     xmlChar *value;
 678
 679     /*
 680      * TODO: The html output method should not escape a & character
 681      *       occurring in an attribute value immediately followed by
 682      *       a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
 683      */
 684
 685     if (cur == NULL) {
 686         return;
 687     }
 688     xmlOutputBufferWriteString(buf, " ");
 689     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 690         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 691         xmlOutputBufferWriteString(buf, ":");
 692     }
 693     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 694     if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
 695         value = xmlNodeListGetString(doc, cur->children, 0);
 696         if (value) {
 697             xmlOutputBufferWriteString(buf, "=");
 698             if ((cur->ns == NULL) && (cur->parent != NULL) &&
 699                 (cur->parent->ns == NULL) &&
 700                 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
 701                  (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
 702                  (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
 703                  ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
 704                   (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
 705                 xmlChar *escaped;
 706                 xmlChar *tmp = value;
 707
 708                 while (IS_BLANK_CH(*tmp)) tmp++;
 709
 710                 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
 711                 if (escaped != NULL) {
 712                     xmlBufferWriteQuotedString(buf->buffer, escaped);
 713                     xmlFree(escaped);
 714                 } else {
 715                     xmlBufferWriteQuotedString(buf->buffer, value);
 716                 }
 717             } else {
 718                 xmlBufferWriteQuotedString(buf->buffer, value);
 719             }
 720             xmlFree(value);
 721         } else  {
 722             xmlOutputBufferWriteString(buf, "=\"\"");
 723         }
 724     }
 725 }
 726
 727 /**
 728  * htmlAttrListDumpOutput:
 729  * @buf:  the HTML buffer output
 730  * @doc:  the document
 731  * @cur:  the first attribute pointer
 732  * @encoding:  the encoding string
 733  *
 734  * Dump a list of HTML attributes
 735  */
 736 static void
 737 htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
 738     if (cur == NULL) {
 739         return;
 740     }
 741     while (cur != NULL) {
 742         htmlAttrDumpOutput(buf, doc, cur, encoding);
 743         cur = cur->next;
 744     }
 745 }
 746
 747
 748
 749 /**
 750  * htmlNodeListDumpOutput:
 751  * @buf:  the HTML buffer output
 752  * @doc:  the document
 753  * @cur:  the first node
 754  * @encoding:  the encoding string
 755  * @format:  should formatting spaces been added
 756  *
 757  * Dump an HTML node list, recursive behaviour,children are printed too.
 758  */
 759 static void
 760 htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 761                        xmlNodePtr cur, const char *encoding, int format) {
 762     if (cur == NULL) {
 763         return;
 764     }
 765     while (cur != NULL) {
 766         htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
 767         cur = cur->next;
 768     }
 769 }
 770
 771 /**
 772  * htmlNodeDumpFormatOutput:
 773  * @buf:  the HTML buffer output
 774  * @doc:  the document
 775  * @cur:  the current node
 776  * @encoding:  the encoding string
 777  * @format:  should formatting spaces been added
 778  *
 779  * Dump an HTML node, recursive behaviour,children are printed too.
 780  */
 781 void
 782 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 783                          xmlNodePtr cur, const char *encoding, int format) {
 784     const htmlElemDesc * info;
 785
 786     xmlInitParser();
 787
 788     if ((cur == NULL) || (buf == NULL)) {
 789         return;
 790     }
 791     /*
 792      * Special cases.
 793      */
 794     if (cur->type == XML_DTD_NODE)
 795         return;
 796     if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
 797         (cur->type == XML_DOCUMENT_NODE)){
 798         htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
 799         return;
 800     }
 801     if (cur->type == XML_ATTRIBUTE_NODE) {
 802         htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
 803         return;
 804     }
 805     if (cur->type == HTML_TEXT_NODE) {
 806         if (cur->content != NULL) {
 807             if (((cur->name == (const xmlChar *)xmlStringText) ||
 808                  (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
 809                 ((cur->parent == NULL) ||
 810                  ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
 811                   (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
 812                 xmlChar *buffer;
 813
 814                 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
 815                 if (buffer != NULL) {
 816                     xmlOutputBufferWriteString(buf, (const char *)buffer);
 817                     xmlFree(buffer);
 818                 }
 819             } else {
 820                 xmlOutputBufferWriteString(buf, (const char *)cur->content);
 821             }
 822         }
 823         return;
 824     }
 825     if (cur->type == HTML_COMMENT_NODE) {
 826         if (cur->content != NULL) {
 827             xmlOutputBufferWriteString(buf, "<!--");
 828             xmlOutputBufferWriteString(buf, (const char *)cur->content);
 829             xmlOutputBufferWriteString(buf, "-->");
 830         }
 831         return;
 832     }
 833     if (cur->type == HTML_PI_NODE) {
 834         if (cur->name == NULL)
 835             return;
 836         xmlOutputBufferWriteString(buf, "<?");
 837         xmlOutputBufferWriteString(buf, (const char *)cur->name);
 838         if (cur->content != NULL) {
 839             xmlOutputBufferWriteString(buf, " ");
 840             xmlOutputBufferWriteString(buf, (const char *)cur->content);
 841         }
 842         xmlOutputBufferWriteString(buf, ">");
 843         return;
 844     }
 845     if (cur->type == HTML_ENTITY_REF_NODE) {
 846         xmlOutputBufferWriteString(buf, "&");
 847         xmlOutputBufferWriteString(buf, (const char *)cur->name);
 848         xmlOutputBufferWriteString(buf, ";");
 849         return;
 850     }
 851     if (cur->type == HTML_PRESERVE_NODE) {
 852         if (cur->content != NULL) {
 853             xmlOutputBufferWriteString(buf, (const char *)cur->content);
 854         }
 855         return;
 856     }
 857
 858     /*
 859      * Get specific HTML info for that node.
 860      */
 861     if (cur->ns == NULL)
 862         info = htmlTagLookup(cur->name);
 863     else
 864         info = NULL;
 865
 866     xmlOutputBufferWriteString(buf, "<");
 867     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 868         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 869         xmlOutputBufferWriteString(buf, ":");
 870     }
 871     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 872     if (cur->nsDef)
 873         xmlNsListDumpOutput(buf, cur->nsDef);
 874     if (cur->properties != NULL)
 875         htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
 876
 877     if ((info != NULL) && (info->empty)) {
 878         xmlOutputBufferWriteString(buf, ">");
 879         if ((format) && (!info->isinline) && (cur->next != NULL)) {
 880             if ((cur->next->type != HTML_TEXT_NODE) &&
 881                 (cur->next->type != HTML_ENTITY_REF_NODE) &&
 882                 (cur->parent != NULL) &&
 883                 (cur->parent->name != NULL) &&
 884                 (cur->parent->name[0] != 'p')) /* p, pre, param */
 885                 xmlOutputBufferWriteString(buf, "\n");
 886         }
 887         return;
 888     }
 889     if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
 890         (cur->children == NULL)) {
 891         if ((info != NULL) && (info->saveEndTag != 0) &&
 892             (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
 893             (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
 894             xmlOutputBufferWriteString(buf, ">");
 895         } else {
 896             xmlOutputBufferWriteString(buf, "></");
 897             if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 898                 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 899                 xmlOutputBufferWriteString(buf, ":");
 900             }
 901             xmlOutputBufferWriteString(buf, (const char *)cur->name);
 902             xmlOutputBufferWriteString(buf, ">");
 903         }
 904         if ((format) && (cur->next != NULL) &&
 905             (info != NULL) && (!info->isinline)) {
 906             if ((cur->next->type != HTML_TEXT_NODE) &&
 907                 (cur->next->type != HTML_ENTITY_REF_NODE) &&
 908                 (cur->parent != NULL) &&
 909                 (cur->parent->name != NULL) &&
 910                 (cur->parent->name[0] != 'p')) /* p, pre, param */
 911                 xmlOutputBufferWriteString(buf, "\n");
 912         }
 913         return;
 914     }
 915     xmlOutputBufferWriteString(buf, ">");
 916     if ((cur->type != XML_ELEMENT_NODE) &&
 917         (cur->content != NULL)) {
 918             /*
 919              * Uses the OutputBuffer property to automatically convert
 920              * invalids to charrefs
 921              */
 922
 923             xmlOutputBufferWriteString(buf, (const char *) cur->content);
 924     }
 925     if (cur->children != NULL) {
 926         if ((format) && (info != NULL) && (!info->isinline) &&
 927             (cur->children->type != HTML_TEXT_NODE) &&
 928             (cur->children->type != HTML_ENTITY_REF_NODE) &&
 929             (cur->children != cur->last) &&
 930             (cur->name != NULL) &&
 931             (cur->name[0] != 'p')) /* p, pre, param */
 932             xmlOutputBufferWriteString(buf, "\n");
 933         htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
 934         if ((format) && (info != NULL) && (!info->isinline) &&
 935             (cur->last->type != HTML_TEXT_NODE) &&
 936             (cur->last->type != HTML_ENTITY_REF_NODE) &&
 937             (cur->children != cur->last) &&
 938             (cur->name != NULL) &&
 939             (cur->name[0] != 'p')) /* p, pre, param */
 940             xmlOutputBufferWriteString(buf, "\n");
 941     }
 942     xmlOutputBufferWriteString(buf, "</");
 943     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 944         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 945         xmlOutputBufferWriteString(buf, ":");
 946     }
 947     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 948     xmlOutputBufferWriteString(buf, ">");
 949     if ((format) && (info != NULL) && (!info->isinline) &&
 950         (cur->next != NULL)) {
 951         if ((cur->next->type != HTML_TEXT_NODE) &&
 952             (cur->next->type != HTML_ENTITY_REF_NODE) &&
 953             (cur->parent != NULL) &&
 954             (cur->parent->name != NULL) &&
 955             (cur->parent->name[0] != 'p')) /* p, pre, param */
 956             xmlOutputBufferWriteString(buf, "\n");
 957     }
 958 }
 959
 960 /**
 961  * htmlNodeDumpOutput:
 962  * @buf:  the HTML buffer output
 963  * @doc:  the document
 964  * @cur:  the current node
 965  * @encoding:  the encoding string
 966  *
 967  * Dump an HTML node, recursive behaviour,children are printed too,
 968  * and formatting returns/spaces are added.
 969  */
 970 void
 971 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 972                    xmlNodePtr cur, const char *encoding) {
 973     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
 974 }
 975
 976 /**
 977  * htmlDocContentDumpFormatOutput:
 978  * @buf:  the HTML buffer output
 979  * @cur:  the document
 980  * @encoding:  the encoding string
 981  * @format:  should formatting spaces been added
 982  *
 983  * Dump an HTML document.
 984  */
 985 void
 986 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
 987                                const char *encoding, int format) {
 988     int type;
 989
 990     xmlInitParser();
 991
 992     if ((buf == NULL) || (cur == NULL))
 993         return;
 994
 995     /*
 996      * force to output the stuff as HTML, especially for entities
 997      */
 998     type = cur->type;
 999     cur->type = XML_HTML_DOCUMENT_NODE;
1000     if (cur->intSubset != NULL) {
1001         htmlDtdDumpOutput(buf, cur, NULL);
1002     }
1003     if (cur->children != NULL) {
1004         htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
1005     }
1006     xmlOutputBufferWriteString(buf, "\n");
1007     cur->type = (xmlElementType) type;
1008 }
1009
1010 /**
1011  * htmlDocContentDumpOutput:
1012  * @buf:  the HTML buffer output
1013  * @cur:  the document
1014  * @encoding:  the encoding string
1015  *
1016  * Dump an HTML document. Formating return/spaces are added.
1017  */
1018 void
1019 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1020                          const char *encoding) {
1021     htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1022 }
1023
1024 /************************************************************************
1025  *                                                                      *
1026  *              Saving functions front-ends                             *
1027  *                                                                      *
1028  ************************************************************************/
1029
1030 /**
1031  * htmlDocDump:
1032  * @f:  the FILE*
1033  * @cur:  the document
1034  *
1035  * Dump an HTML document to an open FILE.
1036  *
1037  * returns: the number of byte written or -1 in case of failure.
1038  */
1039 int
1040 htmlDocDump(FILE *f, xmlDocPtr cur) {
1041     xmlOutputBufferPtr buf;
1042     xmlCharEncodingHandlerPtr handler = NULL;
1043     const char *encoding;
1044     int ret;
1045
1046     xmlInitParser();
1047
1048     if ((cur == NULL) || (f == NULL)) {
1049         return(-1);
1050     }
1051
1052     encoding = (const char *) htmlGetMetaEncoding(cur);
1053
1054     if (encoding != NULL) {
1055         xmlCharEncoding enc;
1056
1057         enc = xmlParseCharEncoding(encoding);
1058         if (enc != cur->charset) {
1059             if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1060                 /*
1061                  * Not supported yet
1062                  */
1063                 return(-1);
1064             }
1065
1066             handler = xmlFindCharEncodingHandler(encoding);
1067             if (handler == NULL)
1068                 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1069         } else {
1070             handler = xmlFindCharEncodingHandler(encoding);
1071         }
1072     }
1073
1074     /*
1075      * Fallback to HTML or ASCII when the encoding is unspecified
1076      */
1077     if (handler == NULL)
1078         handler = xmlFindCharEncodingHandler("HTML");
1079     if (handler == NULL)
1080         handler = xmlFindCharEncodingHandler("ascii");
1081
1082     buf = xmlOutputBufferCreateFile(f, handler);
1083     if (buf == NULL) return(-1);
1084     htmlDocContentDumpOutput(buf, cur, NULL);
1085
1086     ret = xmlOutputBufferClose(buf);
1087     return(ret);
1088 }
1089
1090 /**
1091  * htmlSaveFile:
1092  * @filename:  the filename (or URL)
1093  * @cur:  the document
1094  *
1095  * Dump an HTML document to a file. If @filename is "-" the stdout file is
1096  * used.
1097  * returns: the number of byte written or -1 in case of failure.
1098  */
1099 int
1100 htmlSaveFile(const char *filename, xmlDocPtr cur) {
1101     xmlOutputBufferPtr buf;
1102     xmlCharEncodingHandlerPtr handler = NULL;
1103     const char *encoding;
1104     int ret;
1105
1106     if ((cur == NULL) || (filename == NULL))
1107         return(-1);
1108
1109     xmlInitParser();
1110
1111     encoding = (const char *) htmlGetMetaEncoding(cur);
1112
1113     if (encoding != NULL) {
1114         xmlCharEncoding enc;
1115
1116         enc = xmlParseCharEncoding(encoding);
1117         if (enc != cur->charset) {
1118             if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1119                 /*
1120                  * Not supported yet
1121                  */
1122                 return(-1);
1123             }
1124
1125             handler = xmlFindCharEncodingHandler(encoding);
1126             if (handler == NULL)
1127                 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1128         }
1129     }
1130
1131     /*
1132      * Fallback to HTML or ASCII when the encoding is unspecified
1133      */
1134     if (handler == NULL)
1135         handler = xmlFindCharEncodingHandler("HTML");
1136     if (handler == NULL)
1137         handler = xmlFindCharEncodingHandler("ascii");
1138
1139     /*
1140      * save the content to a temp buffer.
1141      */
1142     buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1143     if (buf == NULL) return(0);
1144
1145     htmlDocContentDumpOutput(buf, cur, NULL);
1146
1147     ret = xmlOutputBufferClose(buf);
1148     return(ret);
1149 }
1150
1151 /**
1152  * htmlSaveFileFormat:
1153  * @filename:  the filename
1154  * @cur:  the document
1155  * @format:  should formatting spaces been added
1156  * @encoding: the document encoding
1157  *
1158  * Dump an HTML document to a file using a given encoding.
1159  *
1160  * returns: the number of byte written or -1 in case of failure.
1161  */
1162 int
1163 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1164                    const char *encoding, int format) {
1165     xmlOutputBufferPtr buf;
1166     xmlCharEncodingHandlerPtr handler = NULL;
1167     int ret;
1168
1169     if ((cur == NULL) || (filename == NULL))
1170         return(-1);
1171
1172     xmlInitParser();
1173
1174     if (encoding != NULL) {
1175         xmlCharEncoding enc;
1176
1177         enc = xmlParseCharEncoding(encoding);
1178         if (enc != cur->charset) {
1179             if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1180                 /*
1181                  * Not supported yet
1182                  */
1183                 return(-1);
1184             }
1185
1186             handler = xmlFindCharEncodingHandler(encoding);
1187             if (handler == NULL)
1188                 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1189         }
1190         htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1191     } else {
1192         htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1193     }
1194
1195     /*
1196      * Fallback to HTML or ASCII when the encoding is unspecified
1197      */
1198     if (handler == NULL)
1199         handler = xmlFindCharEncodingHandler("HTML");
1200     if (handler == NULL)
1201         handler = xmlFindCharEncodingHandler("ascii");
1202
1203     /*
1204      * save the content to a temp buffer.
1205      */
1206     buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1207     if (buf == NULL) return(0);
1208
1209     htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1210
1211     ret = xmlOutputBufferClose(buf);
1212     return(ret);
1213 }
1214
1215 /**
1216  * htmlSaveFileEnc:
1217  * @filename:  the filename
1218  * @cur:  the document
1219  * @encoding: the document encoding
1220  *
1221  * Dump an HTML document to a file using a given encoding
1222  * and formatting returns/spaces are added.
1223  *
1224  * returns: the number of byte written or -1 in case of failure.
1225  */
1226 int
1227 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1228     return(htmlSaveFileFormat(filename, cur, encoding, 1));
1229 }
1230
1231 #endif /* LIBXML_OUTPUT_ENABLED */
1232
1233 #define bottom_HTMLtree
1234 #include "elfgcchack.h"
1235 #endif /* LIBXML_HTML_ENABLED */