parser.c

   1 /*
   2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
   3  *            implemented on top of the SAX interfaces
   4  *
   5  * References:
   6  *   The XML specification:
   7  *     http://www.w3.org/TR/REC-xml
   8  *   Original 1.0 version:
   9  *     http://www.w3.org/TR/1998/REC-xml-19980210
  10  *   XML second edition working draft
  11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
  12  *
  13  * Okay this is a big file, the parser core is around 7000 lines, then it
  14  * is followed by the progressive parser top routines, then the various
  15  * high level APIs to call the parser and a few miscellaneous functions.
  16  * A number of helper functions and deprecated ones have been moved to
  17  * parserInternals.c to reduce this file size.
  18  * As much as possible the functions are associated with their relative
  19  * production in the XML specification. A few productions defining the
  20  * different ranges of character are actually implanted either in
  21  * parserInternals.h or parserInternals.c
  22  * The DOM tree build is realized from the default SAX callbacks in
  23  * the module SAX.c.
  24  * The routines doing the validation checks are in valid.c and called either
  25  * from the SAX callbacks or as standalone functions using a preparsed
  26  * document.
  27  *
  28  * See Copyright for the status of this software.
  29  *
  30  * daniel@veillard.com
  31  */
  32
  33 #define IN_LIBXML
  34 #include "libxml.h"
  35
  36 #if defined(WIN32) && !defined (__CYGWIN__)
  37 #define XML_DIR_SEP '\\'
  38 #else
  39 #define XML_DIR_SEP '/'
  40 #endif
  41
  42 #include <stdlib.h>
  43 #include <string.h>
  44 #include <stdarg.h>
  45 #include <libxml/xmlmemory.h>
  46 #include <libxml/threads.h>
  47 #include <libxml/globals.h>
  48 #include <libxml/tree.h>
  49 #include <libxml/parser.h>
  50 #include <libxml/parserInternals.h>
  51 #include <libxml/valid.h>
  52 #include <libxml/entities.h>
  53 #include <libxml/xmlerror.h>
  54 #include <libxml/encoding.h>
  55 #include <libxml/xmlIO.h>
  56 #include <libxml/uri.h>
  57 #ifdef LIBXML_CATALOG_ENABLED
  58 #include <libxml/catalog.h>
  59 #endif
  60 #ifdef LIBXML_SCHEMAS_ENABLED
  61 #include <libxml/xmlschemastypes.h>
  62 #include <libxml/relaxng.h>
  63 #endif
  64 #ifdef HAVE_CTYPE_H
  65 #include <ctype.h>
  66 #endif
  67 #ifdef HAVE_STDLIB_H
  68 #include <stdlib.h>
  69 #endif
  70 #ifdef HAVE_SYS_STAT_H
  71 #include <sys/stat.h>
  72 #endif
  73 #ifdef HAVE_FCNTL_H
  74 #include <fcntl.h>
  75 #endif
  76 #ifdef HAVE_UNISTD_H
  77 #include <unistd.h>
  78 #endif
  79 #ifdef HAVE_ZLIB_H
  80 #include <zlib.h>
  81 #endif
  82
  83 static void
  84 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
  85
  86 static xmlParserCtxtPtr
  87 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
  88                           const xmlChar *base, xmlParserCtxtPtr pctx);
  89
  90 /************************************************************************
  91  *                                                                      *
  92  *      Arbitrary limits set in the parser. See XML_PARSE_HUGE          *
  93  *                                                                      *
  94  ************************************************************************/
  95
  96 #define XML_PARSER_BIG_ENTITY 1000
  97 #define XML_PARSER_LOT_ENTITY 5000
  98
  99 /*
 100  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
 101  *    replacement over the size in byte of the input indicates that you have
 102  *    and eponential behaviour. A value of 10 correspond to at least 3 entity
 103  *    replacement per byte of input.
 104  */
 105 #define XML_PARSER_NON_LINEAR 10
 106
 107 /*
 108  * xmlParserEntityCheck
 109  *
 110  * Function to check non-linear entity expansion behaviour
 111  * This is here to detect and stop exponential linear entity expansion
 112  * This is not a limitation of the parser but a safety
 113  * boundary feature. It can be disabled with the XML_PARSE_HUGE
 114  * parser option.
 115  */
 116 static int
 117 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
 118                      xmlEntityPtr ent)
 119 {
 120     unsigned long consumed = 0;
 121
 122     if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
 123         return (0);
 124     if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
 125         return (1);
 126     if (size != 0) {
 127         /*
 128          * Do the check based on the replacement size of the entity
 129          */
 130         if (size < XML_PARSER_BIG_ENTITY)
 131             return(0);
 132
 133         /*
 134          * A limit on the amount of text data reasonably used
 135          */
 136         if (ctxt->input != NULL) {
 137             consumed = ctxt->input->consumed +
 138                 (ctxt->input->cur - ctxt->input->base);
 139         }
 140         consumed += ctxt->sizeentities;
 141
 142         if ((size < XML_PARSER_NON_LINEAR * consumed) &&
 143             (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
 144             return (0);
 145     } else if (ent != NULL) {
 146         /*
 147          * use the number of parsed entities in the replacement
 148          */
 149         size = ent->checked;
 150
 151         /*
 152          * The amount of data parsed counting entities size only once
 153          */
 154         if (ctxt->input != NULL) {
 155             consumed = ctxt->input->consumed +
 156                 (ctxt->input->cur - ctxt->input->base);
 157         }
 158         consumed += ctxt->sizeentities;
 159
 160         /*
 161          * Check the density of entities for the amount of data
 162          * knowing an entity reference will take at least 3 bytes
 163          */
 164         if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
 165             return (0);
 166     } else {
 167         /*
 168          * strange we got no data for checking just return
 169          */
 170         return (0);
 171     }
 172
 173     xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
 174     return (1);
 175 }
 176
 177 /**
 178  * xmlParserMaxDepth:
 179  *
 180  * arbitrary depth limit for the XML documents that we allow to
 181  * process. This is not a limitation of the parser but a safety
 182  * boundary feature. It can be disabled with the XML_PARSE_HUGE
 183  * parser option.
 184  */
 185 unsigned int xmlParserMaxDepth = 256;
 186
 187
 188
 189 #define SAX2 1
 190 #define XML_PARSER_BIG_BUFFER_SIZE 300
 191 #define XML_PARSER_BUFFER_SIZE 100
 192 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
 193
 194 /*
 195  * List of XML prefixed PI allowed by W3C specs
 196  */
 197
 198 static const char *xmlW3CPIs[] = {
 199     "xml-stylesheet",
 200     NULL
 201 };
 202
 203
 204 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
 205 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
 206                                               const xmlChar **str);
 207
 208 static xmlParserErrors
 209 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
 210                       xmlSAXHandlerPtr sax,
 211                       void *user_data, int depth, const xmlChar *URL,
 212                       const xmlChar *ID, xmlNodePtr *list);
 213
 214 static int
 215 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
 216                           const char *encoding);
 217 #ifdef LIBXML_LEGACY_ENABLED
 218 static void
 219 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
 220                       xmlNodePtr lastNode);
 221 #endif /* LIBXML_LEGACY_ENABLED */
 222
 223 static xmlParserErrors
 224 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
 225                       const xmlChar *string, void *user_data, xmlNodePtr *lst);
 226
 227 static int
 228 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
 229
 230 /************************************************************************
 231  *                                                                      *
 232  *              Some factorized error routines                          *
 233  *                                                                      *
 234  ************************************************************************/
 235
 236 /**
 237  * xmlErrAttributeDup:
 238  * @ctxt:  an XML parser context
 239  * @prefix:  the attribute prefix
 240  * @localname:  the attribute localname
 241  *
 242  * Handle a redefinition of attribute error
 243  */
 244 static void
 245 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
 246                    const xmlChar * localname)
 247 {
 248     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 249         (ctxt->instate == XML_PARSER_EOF))
 250         return;
 251     if (ctxt != NULL)
 252         ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
 253
 254     if (prefix == NULL)
 255         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
 256                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
 257                         (const char *) localname, NULL, NULL, 0, 0,
 258                         "Attribute %s redefined\n", localname);
 259     else
 260         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
 261                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
 262                         (const char *) prefix, (const char *) localname,
 263                         NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
 264                         localname);
 265     if (ctxt != NULL) {
 266         ctxt->wellFormed = 0;
 267         if (ctxt->recovery == 0)
 268             ctxt->disableSAX = 1;
 269     }
 270 }
 271
 272 /**
 273  * xmlFatalErr:
 274  * @ctxt:  an XML parser context
 275  * @error:  the error number
 276  * @extra:  extra information string
 277  *
 278  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 279  */
 280 static void
 281 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
 282 {
 283     const char *errmsg;
 284
 285     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 286         (ctxt->instate == XML_PARSER_EOF))
 287         return;
 288     switch (error) {
 289         case XML_ERR_INVALID_HEX_CHARREF:
 290             errmsg = "CharRef: invalid hexadecimal value\n";
 291             break;
 292         case XML_ERR_INVALID_DEC_CHARREF:
 293             errmsg = "CharRef: invalid decimal value\n";
 294             break;
 295         case XML_ERR_INVALID_CHARREF:
 296             errmsg = "CharRef: invalid value\n";
 297             break;
 298         case XML_ERR_INTERNAL_ERROR:
 299             errmsg = "internal error";
 300             break;
 301         case XML_ERR_PEREF_AT_EOF:
 302             errmsg = "PEReference at end of document\n";
 303             break;
 304         case XML_ERR_PEREF_IN_PROLOG:
 305             errmsg = "PEReference in prolog\n";
 306             break;
 307         case XML_ERR_PEREF_IN_EPILOG:
 308             errmsg = "PEReference in epilog\n";
 309             break;
 310         case XML_ERR_PEREF_NO_NAME:
 311             errmsg = "PEReference: no name\n";
 312             break;
 313         case XML_ERR_PEREF_SEMICOL_MISSING:
 314             errmsg = "PEReference: expecting ';'\n";
 315             break;
 316         case XML_ERR_ENTITY_LOOP:
 317             errmsg = "Detected an entity reference loop\n";
 318             break;
 319         case XML_ERR_ENTITY_NOT_STARTED:
 320             errmsg = "EntityValue: \" or ' expected\n";
 321             break;
 322         case XML_ERR_ENTITY_PE_INTERNAL:
 323             errmsg = "PEReferences forbidden in internal subset\n";
 324             break;
 325         case XML_ERR_ENTITY_NOT_FINISHED:
 326             errmsg = "EntityValue: \" or ' expected\n";
 327             break;
 328         case XML_ERR_ATTRIBUTE_NOT_STARTED:
 329             errmsg = "AttValue: \" or ' expected\n";
 330             break;
 331         case XML_ERR_LT_IN_ATTRIBUTE:
 332             errmsg = "Unescaped '<' not allowed in attributes values\n";
 333             break;
 334         case XML_ERR_LITERAL_NOT_STARTED:
 335             errmsg = "SystemLiteral \" or ' expected\n";
 336             break;
 337         case XML_ERR_LITERAL_NOT_FINISHED:
 338             errmsg = "Unfinished System or Public ID \" or ' expected\n";
 339             break;
 340         case XML_ERR_MISPLACED_CDATA_END:
 341             errmsg = "Sequence ']]>' not allowed in content\n";
 342             break;
 343         case XML_ERR_URI_REQUIRED:
 344             errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
 345             break;
 346         case XML_ERR_PUBID_REQUIRED:
 347             errmsg = "PUBLIC, the Public Identifier is missing\n";
 348             break;
 349         case XML_ERR_HYPHEN_IN_COMMENT:
 350             errmsg = "Comment must not contain '--' (double-hyphen)\n";
 351             break;
 352         case XML_ERR_PI_NOT_STARTED:
 353             errmsg = "xmlParsePI : no target name\n";
 354             break;
 355         case XML_ERR_RESERVED_XML_NAME:
 356             errmsg = "Invalid PI name\n";
 357             break;
 358         case XML_ERR_NOTATION_NOT_STARTED:
 359             errmsg = "NOTATION: Name expected here\n";
 360             break;
 361         case XML_ERR_NOTATION_NOT_FINISHED:
 362             errmsg = "'>' required to close NOTATION declaration\n";
 363             break;
 364         case XML_ERR_VALUE_REQUIRED:
 365             errmsg = "Entity value required\n";
 366             break;
 367         case XML_ERR_URI_FRAGMENT:
 368             errmsg = "Fragment not allowed";
 369             break;
 370         case XML_ERR_ATTLIST_NOT_STARTED:
 371             errmsg = "'(' required to start ATTLIST enumeration\n";
 372             break;
 373         case XML_ERR_NMTOKEN_REQUIRED:
 374             errmsg = "NmToken expected in ATTLIST enumeration\n";
 375             break;
 376         case XML_ERR_ATTLIST_NOT_FINISHED:
 377             errmsg = "')' required to finish ATTLIST enumeration\n";
 378             break;
 379         case XML_ERR_MIXED_NOT_STARTED:
 380             errmsg = "MixedContentDecl : '|' or ')*' expected\n";
 381             break;
 382         case XML_ERR_PCDATA_REQUIRED:
 383             errmsg = "MixedContentDecl : '#PCDATA' expected\n";
 384             break;
 385         case XML_ERR_ELEMCONTENT_NOT_STARTED:
 386             errmsg = "ContentDecl : Name or '(' expected\n";
 387             break;
 388         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
 389             errmsg = "ContentDecl : ',' '|' or ')' expected\n";
 390             break;
 391         case XML_ERR_PEREF_IN_INT_SUBSET:
 392             errmsg =
 393                 "PEReference: forbidden within markup decl in internal subset\n";
 394             break;
 395         case XML_ERR_GT_REQUIRED:
 396             errmsg = "expected '>'\n";
 397             break;
 398         case XML_ERR_CONDSEC_INVALID:
 399             errmsg = "XML conditional section '[' expected\n";
 400             break;
 401         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
 402             errmsg = "Content error in the external subset\n";
 403             break;
 404         case XML_ERR_CONDSEC_INVALID_KEYWORD:
 405             errmsg =
 406                 "conditional section INCLUDE or IGNORE keyword expected\n";
 407             break;
 408         case XML_ERR_CONDSEC_NOT_FINISHED:
 409             errmsg = "XML conditional section not closed\n";
 410             break;
 411         case XML_ERR_XMLDECL_NOT_STARTED:
 412             errmsg = "Text declaration '<?xml' required\n";
 413             break;
 414         case XML_ERR_XMLDECL_NOT_FINISHED:
 415             errmsg = "parsing XML declaration: '?>' expected\n";
 416             break;
 417         case XML_ERR_EXT_ENTITY_STANDALONE:
 418             errmsg = "external parsed entities cannot be standalone\n";
 419             break;
 420         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
 421             errmsg = "EntityRef: expecting ';'\n";
 422             break;
 423         case XML_ERR_DOCTYPE_NOT_FINISHED:
 424             errmsg = "DOCTYPE improperly terminated\n";
 425             break;
 426         case XML_ERR_LTSLASH_REQUIRED:
 427             errmsg = "EndTag: '</' not found\n";
 428             break;
 429         case XML_ERR_EQUAL_REQUIRED:
 430             errmsg = "expected '='\n";
 431             break;
 432         case XML_ERR_STRING_NOT_CLOSED:
 433             errmsg = "String not closed expecting \" or '\n";
 434             break;
 435         case XML_ERR_STRING_NOT_STARTED:
 436             errmsg = "String not started expecting ' or \"\n";
 437             break;
 438         case XML_ERR_ENCODING_NAME:
 439             errmsg = "Invalid XML encoding name\n";
 440             break;
 441         case XML_ERR_STANDALONE_VALUE:
 442             errmsg = "standalone accepts only 'yes' or 'no'\n";
 443             break;
 444         case XML_ERR_DOCUMENT_EMPTY:
 445             errmsg = "Document is empty\n";
 446             break;
 447         case XML_ERR_DOCUMENT_END:
 448             errmsg = "Extra content at the end of the document\n";
 449             break;
 450         case XML_ERR_NOT_WELL_BALANCED:
 451             errmsg = "chunk is not well balanced\n";
 452             break;
 453         case XML_ERR_EXTRA_CONTENT:
 454             errmsg = "extra content at the end of well balanced chunk\n";
 455             break;
 456         case XML_ERR_VERSION_MISSING:
 457             errmsg = "Malformed declaration expecting version\n";
 458             break;
 459 #if 0
 460         case:
 461             errmsg = "\n";
 462             break;
 463 #endif
 464         default:
 465             errmsg = "Unregistered error message\n";
 466     }
 467     if (ctxt != NULL)
 468         ctxt->errNo = error;
 469     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
 470                     XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
 471                     info);
 472     if (ctxt != NULL) {
 473         ctxt->wellFormed = 0;
 474         if (ctxt->recovery == 0)
 475             ctxt->disableSAX = 1;
 476     }
 477 }
 478
 479 /**
 480  * xmlFatalErrMsg:
 481  * @ctxt:  an XML parser context
 482  * @error:  the error number
 483  * @msg:  the error message
 484  *
 485  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 486  */
 487 static void
 488 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 489                const char *msg)
 490 {
 491     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 492         (ctxt->instate == XML_PARSER_EOF))
 493         return;
 494     if (ctxt != NULL)
 495         ctxt->errNo = error;
 496     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
 497                     XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
 498     if (ctxt != NULL) {
 499         ctxt->wellFormed = 0;
 500         if (ctxt->recovery == 0)
 501             ctxt->disableSAX = 1;
 502     }
 503 }
 504
 505 /**
 506  * xmlWarningMsg:
 507  * @ctxt:  an XML parser context
 508  * @error:  the error number
 509  * @msg:  the error message
 510  * @str1:  extra data
 511  * @str2:  extra data
 512  *
 513  * Handle a warning.
 514  */
 515 static void
 516 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 517               const char *msg, const xmlChar *str1, const xmlChar *str2)
 518 {
 519     xmlStructuredErrorFunc schannel = NULL;
 520
 521     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 522         (ctxt->instate == XML_PARSER_EOF))
 523         return;
 524     if ((ctxt != NULL) && (ctxt->sax != NULL) &&
 525         (ctxt->sax->initialized == XML_SAX2_MAGIC))
 526         schannel = ctxt->sax->serror;
 527     if (ctxt != NULL) {
 528         __xmlRaiseError(schannel,
 529                     (ctxt->sax) ? ctxt->sax->warning : NULL,
 530                     ctxt->userData,
 531                     ctxt, NULL, XML_FROM_PARSER, error,
 532                     XML_ERR_WARNING, NULL, 0,
 533                     (const char *) str1, (const char *) str2, NULL, 0, 0,
 534                     msg, (const char *) str1, (const char *) str2);
 535     } else {
 536         __xmlRaiseError(schannel, NULL, NULL,
 537                     ctxt, NULL, XML_FROM_PARSER, error,
 538                     XML_ERR_WARNING, NULL, 0,
 539                     (const char *) str1, (const char *) str2, NULL, 0, 0,
 540                     msg, (const char *) str1, (const char *) str2);
 541     }
 542 }
 543
 544 /**
 545  * xmlValidityError:
 546  * @ctxt:  an XML parser context
 547  * @error:  the error number
 548  * @msg:  the error message
 549  * @str1:  extra data
 550  *
 551  * Handle a validity error.
 552  */
 553 static void
 554 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 555               const char *msg, const xmlChar *str1, const xmlChar *str2)
 556 {
 557     xmlStructuredErrorFunc schannel = NULL;
 558
 559     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 560         (ctxt->instate == XML_PARSER_EOF))
 561         return;
 562     if (ctxt != NULL) {
 563         ctxt->errNo = error;
 564         if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
 565             schannel = ctxt->sax->serror;
 566     }
 567     if (ctxt != NULL) {
 568         __xmlRaiseError(schannel,
 569                     ctxt->vctxt.error, ctxt->vctxt.userData,
 570                     ctxt, NULL, XML_FROM_DTD, error,
 571                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
 572                     (const char *) str2, NULL, 0, 0,
 573                     msg, (const char *) str1, (const char *) str2);
 574         ctxt->valid = 0;
 575     } else {
 576         __xmlRaiseError(schannel, NULL, NULL,
 577                     ctxt, NULL, XML_FROM_DTD, error,
 578                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
 579                     (const char *) str2, NULL, 0, 0,
 580                     msg, (const char *) str1, (const char *) str2);
 581     }
 582 }
 583
 584 /**
 585  * xmlFatalErrMsgInt:
 586  * @ctxt:  an XML parser context
 587  * @error:  the error number
 588  * @msg:  the error message
 589  * @val:  an integer value
 590  *
 591  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 592  */
 593 static void
 594 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 595                   const char *msg, int val)
 596 {
 597     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 598         (ctxt->instate == XML_PARSER_EOF))
 599         return;
 600     if (ctxt != NULL)
 601         ctxt->errNo = error;
 602     __xmlRaiseError(NULL, NULL, NULL,
 603                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
 604                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
 605     if (ctxt != NULL) {
 606         ctxt->wellFormed = 0;
 607         if (ctxt->recovery == 0)
 608             ctxt->disableSAX = 1;
 609     }
 610 }
 611
 612 /**
 613  * xmlFatalErrMsgStrIntStr:
 614  * @ctxt:  an XML parser context
 615  * @error:  the error number
 616  * @msg:  the error message
 617  * @str1:  an string info
 618  * @val:  an integer value
 619  * @str2:  an string info
 620  *
 621  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 622  */
 623 static void
 624 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 625                   const char *msg, const xmlChar *str1, int val,
 626                   const xmlChar *str2)
 627 {
 628     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 629         (ctxt->instate == XML_PARSER_EOF))
 630         return;
 631     if (ctxt != NULL)
 632         ctxt->errNo = error;
 633     __xmlRaiseError(NULL, NULL, NULL,
 634                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
 635                     NULL, 0, (const char *) str1, (const char *) str2,
 636                     NULL, val, 0, msg, str1, val, str2);
 637     if (ctxt != NULL) {
 638         ctxt->wellFormed = 0;
 639         if (ctxt->recovery == 0)
 640             ctxt->disableSAX = 1;
 641     }
 642 }
 643
 644 /**
 645  * xmlFatalErrMsgStr:
 646  * @ctxt:  an XML parser context
 647  * @error:  the error number
 648  * @msg:  the error message
 649  * @val:  a string value
 650  *
 651  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 652  */
 653 static void
 654 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 655                   const char *msg, const xmlChar * val)
 656 {
 657     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 658         (ctxt->instate == XML_PARSER_EOF))
 659         return;
 660     if (ctxt != NULL)
 661         ctxt->errNo = error;
 662     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
 663                     XML_FROM_PARSER, error, XML_ERR_FATAL,
 664                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
 665                     val);
 666     if (ctxt != NULL) {
 667         ctxt->wellFormed = 0;
 668         if (ctxt->recovery == 0)
 669             ctxt->disableSAX = 1;
 670     }
 671 }
 672
 673 /**
 674  * xmlErrMsgStr:
 675  * @ctxt:  an XML parser context
 676  * @error:  the error number
 677  * @msg:  the error message
 678  * @val:  a string value
 679  *
 680  * Handle a non fatal parser error
 681  */
 682 static void
 683 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 684                   const char *msg, const xmlChar * val)
 685 {
 686     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 687         (ctxt->instate == XML_PARSER_EOF))
 688         return;
 689     if (ctxt != NULL)
 690         ctxt->errNo = error;
 691     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
 692                     XML_FROM_PARSER, error, XML_ERR_ERROR,
 693                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
 694                     val);
 695 }
 696
 697 /**
 698  * xmlNsErr:
 699  * @ctxt:  an XML parser context
 700  * @error:  the error number
 701  * @msg:  the message
 702  * @info1:  extra information string
 703  * @info2:  extra information string
 704  *
 705  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 706  */
 707 static void
 708 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 709          const char *msg,
 710          const xmlChar * info1, const xmlChar * info2,
 711          const xmlChar * info3)
 712 {
 713     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 714         (ctxt->instate == XML_PARSER_EOF))
 715         return;
 716     if (ctxt != NULL)
 717         ctxt->errNo = error;
 718     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
 719                     XML_ERR_ERROR, NULL, 0, (const char *) info1,
 720                     (const char *) info2, (const char *) info3, 0, 0, msg,
 721                     info1, info2, info3);
 722     if (ctxt != NULL)
 723         ctxt->nsWellFormed = 0;
 724 }
 725
 726 /**
 727  * xmlNsWarn
 728  * @ctxt:  an XML parser context
 729  * @error:  the error number
 730  * @msg:  the message
 731  * @info1:  extra information string
 732  * @info2:  extra information string
 733  *
 734  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 735  */
 736 static void
 737 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 738          const char *msg,
 739          const xmlChar * info1, const xmlChar * info2,
 740          const xmlChar * info3)
 741 {
 742     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 743         (ctxt->instate == XML_PARSER_EOF))
 744         return;
 745     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
 746                     XML_ERR_WARNING, NULL, 0, (const char *) info1,
 747                     (const char *) info2, (const char *) info3, 0, 0, msg,
 748                     info1, info2, info3);
 749 }
 750
 751 /************************************************************************
 752  *                                                                      *
 753  *              Library wide options                                    *
 754  *                                                                      *
 755  ************************************************************************/
 756
 757 /**
 758   * xmlHasFeature:
 759   * @feature: the feature to be examined
 760   *
 761   * Examines if the library has been compiled with a given feature.
 762   *
 763   * Returns a non-zero value if the feature exist, otherwise zero.
 764   * Returns zero (0) if the feature does not exist or an unknown
 765   * unknown feature is requested, non-zero otherwise.
 766   */
 767 int
 768 xmlHasFeature(xmlFeature feature)
 769 {
 770     switch (feature) {
 771         case XML_WITH_THREAD:
 772 #ifdef LIBXML_THREAD_ENABLED
 773             return(1);
 774 #else
 775             return(0);
 776 #endif
 777         case XML_WITH_TREE:
 778 #ifdef LIBXML_TREE_ENABLED
 779             return(1);
 780 #else
 781             return(0);
 782 #endif
 783         case XML_WITH_OUTPUT:
 784 #ifdef LIBXML_OUTPUT_ENABLED
 785             return(1);
 786 #else
 787             return(0);
 788 #endif
 789         case XML_WITH_PUSH:
 790 #ifdef LIBXML_PUSH_ENABLED
 791             return(1);
 792 #else
 793             return(0);
 794 #endif
 795         case XML_WITH_READER:
 796 #ifdef LIBXML_READER_ENABLED
 797             return(1);
 798 #else
 799             return(0);
 800 #endif
 801         case XML_WITH_PATTERN:
 802 #ifdef LIBXML_PATTERN_ENABLED
 803             return(1);
 804 #else
 805             return(0);
 806 #endif
 807         case XML_WITH_WRITER:
 808 #ifdef LIBXML_WRITER_ENABLED
 809             return(1);
 810 #else
 811             return(0);
 812 #endif
 813         case XML_WITH_SAX1:
 814 #ifdef LIBXML_SAX1_ENABLED
 815             return(1);
 816 #else
 817             return(0);
 818 #endif
 819         case XML_WITH_FTP:
 820 #ifdef LIBXML_FTP_ENABLED
 821             return(1);
 822 #else
 823             return(0);
 824 #endif
 825         case XML_WITH_HTTP:
 826 #ifdef LIBXML_HTTP_ENABLED
 827             return(1);
 828 #else
 829             return(0);
 830 #endif
 831         case XML_WITH_VALID:
 832 #ifdef LIBXML_VALID_ENABLED
 833             return(1);
 834 #else
 835             return(0);
 836 #endif
 837         case XML_WITH_HTML:
 838 #ifdef LIBXML_HTML_ENABLED
 839             return(1);
 840 #else
 841             return(0);
 842 #endif
 843         case XML_WITH_LEGACY:
 844 #ifdef LIBXML_LEGACY_ENABLED
 845             return(1);
 846 #else
 847             return(0);
 848 #endif
 849         case XML_WITH_C14N:
 850 #ifdef LIBXML_C14N_ENABLED
 851             return(1);
 852 #else
 853             return(0);
 854 #endif
 855         case XML_WITH_CATALOG:
 856 #ifdef LIBXML_CATALOG_ENABLED
 857             return(1);
 858 #else
 859             return(0);
 860 #endif
 861         case XML_WITH_XPATH:
 862 #ifdef LIBXML_XPATH_ENABLED
 863             return(1);
 864 #else
 865             return(0);
 866 #endif
 867         case XML_WITH_XPTR:
 868 #ifdef LIBXML_XPTR_ENABLED
 869             return(1);
 870 #else
 871             return(0);
 872 #endif
 873         case XML_WITH_XINCLUDE:
 874 #ifdef LIBXML_XINCLUDE_ENABLED
 875             return(1);
 876 #else
 877             return(0);
 878 #endif
 879         case XML_WITH_ICONV:
 880 #ifdef LIBXML_ICONV_ENABLED
 881             return(1);
 882 #else
 883             return(0);
 884 #endif
 885         case XML_WITH_ISO8859X:
 886 #ifdef LIBXML_ISO8859X_ENABLED
 887             return(1);
 888 #else
 889             return(0);
 890 #endif
 891         case XML_WITH_UNICODE:
 892 #ifdef LIBXML_UNICODE_ENABLED
 893             return(1);
 894 #else
 895             return(0);
 896 #endif
 897         case XML_WITH_REGEXP:
 898 #ifdef LIBXML_REGEXP_ENABLED
 899             return(1);
 900 #else
 901             return(0);
 902 #endif
 903         case XML_WITH_AUTOMATA:
 904 #ifdef LIBXML_AUTOMATA_ENABLED
 905             return(1);
 906 #else
 907             return(0);
 908 #endif
 909         case XML_WITH_EXPR:
 910 #ifdef LIBXML_EXPR_ENABLED
 911             return(1);
 912 #else
 913             return(0);
 914 #endif
 915         case XML_WITH_SCHEMAS:
 916 #ifdef LIBXML_SCHEMAS_ENABLED
 917             return(1);
 918 #else
 919             return(0);
 920 #endif
 921         case XML_WITH_SCHEMATRON:
 922 #ifdef LIBXML_SCHEMATRON_ENABLED
 923             return(1);
 924 #else
 925             return(0);
 926 #endif
 927         case XML_WITH_MODULES:
 928 #ifdef LIBXML_MODULES_ENABLED
 929             return(1);
 930 #else
 931             return(0);
 932 #endif
 933         case XML_WITH_DEBUG:
 934 #ifdef LIBXML_DEBUG_ENABLED
 935             return(1);
 936 #else
 937             return(0);
 938 #endif
 939         case XML_WITH_DEBUG_MEM:
 940 #ifdef DEBUG_MEMORY_LOCATION
 941             return(1);
 942 #else
 943             return(0);
 944 #endif
 945         case XML_WITH_DEBUG_RUN:
 946 #ifdef LIBXML_DEBUG_RUNTIME
 947             return(1);
 948 #else
 949             return(0);
 950 #endif
 951         case XML_WITH_ZLIB:
 952 #ifdef LIBXML_ZLIB_ENABLED
 953             return(1);
 954 #else
 955             return(0);
 956 #endif
 957         case XML_WITH_ICU:
 958 #ifdef LIBXML_ICU_ENABLED
 959             return(1);
 960 #else
 961             return(0);
 962 #endif
 963         default:
 964             break;
 965      }
 966      return(0);
 967 }
 968
 969 /************************************************************************
 970  *                                                                      *
 971  *              SAX2 defaulted attributes handling                      *
 972  *                                                                      *
 973  ************************************************************************/
 974
 975 /**
 976  * xmlDetectSAX2:
 977  * @ctxt:  an XML parser context
 978  *
 979  * Do the SAX2 detection and specific intialization
 980  */
 981 static void
 982 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
 983     if (ctxt == NULL) return;
 984 #ifdef LIBXML_SAX1_ENABLED
 985     if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
 986         ((ctxt->sax->startElementNs != NULL) ||
 987          (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
 988 #else
 989     ctxt->sax2 = 1;
 990 #endif /* LIBXML_SAX1_ENABLED */
 991
 992     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
 993     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
 994     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
 995     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
 996                 (ctxt->str_xml_ns == NULL)) {
 997         xmlErrMemory(ctxt, NULL);
 998     }
 999 }
1000
1001 typedef struct _xmlDefAttrs xmlDefAttrs;
1002 typedef xmlDefAttrs *xmlDefAttrsPtr;
1003 struct _xmlDefAttrs {
1004     int nbAttrs;        /* number of defaulted attributes on that element */
1005     int maxAttrs;       /* the size of the array */
1006     const xmlChar *values[5]; /* array of localname/prefix/values/external */
1007 };
1008
1009 /**
1010  * xmlAttrNormalizeSpace:
1011  * @src: the source string
1012  * @dst: the target string
1013  *
1014  * Normalize the space in non CDATA attribute values:
1015  * If the attribute type is not CDATA, then the XML processor MUST further
1016  * process the normalized attribute value by discarding any leading and
1017  * trailing space (#x20) characters, and by replacing sequences of space
1018  * (#x20) characters by a single space (#x20) character.
1019  * Note that the size of dst need to be at least src, and if one doesn't need
1020  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1021  * passing src as dst is just fine.
1022  *
1023  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1024  *         is needed.
1025  */
1026 static xmlChar *
1027 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1028 {
1029     if ((src == NULL) || (dst == NULL))
1030         return(NULL);
1031
1032     while (*src == 0x20) src++;
1033     while (*src != 0) {
1034         if (*src == 0x20) {
1035             while (*src == 0x20) src++;
1036             if (*src != 0)
1037                 *dst++ = 0x20;
1038         } else {
1039             *dst++ = *src++;
1040         }
1041     }
1042     *dst = 0;
1043     if (dst == src)
1044        return(NULL);
1045     return(dst);
1046 }
1047
1048 /**
1049  * xmlAttrNormalizeSpace2:
1050  * @src: the source string
1051  *
1052  * Normalize the space in non CDATA attribute values, a slightly more complex
1053  * front end to avoid allocation problems when running on attribute values
1054  * coming from the input.
1055  *
1056  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1057  *         is needed.
1058  */
1059 static const xmlChar *
1060 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1061 {
1062     int i;
1063     int remove_head = 0;
1064     int need_realloc = 0;
1065     const xmlChar *cur;
1066
1067     if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1068         return(NULL);
1069     i = *len;
1070     if (i <= 0)
1071         return(NULL);
1072
1073     cur = src;
1074     while (*cur == 0x20) {
1075         cur++;
1076         remove_head++;
1077     }
1078     while (*cur != 0) {
1079         if (*cur == 0x20) {
1080             cur++;
1081             if ((*cur == 0x20) || (*cur == 0)) {
1082                 need_realloc = 1;
1083                 break;
1084             }
1085         } else
1086             cur++;
1087     }
1088     if (need_realloc) {
1089         xmlChar *ret;
1090
1091         ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1092         if (ret == NULL) {
1093             xmlErrMemory(ctxt, NULL);
1094             return(NULL);
1095         }
1096         xmlAttrNormalizeSpace(ret, ret);
1097         *len = (int) strlen((const char *)ret);
1098         return(ret);
1099     } else if (remove_head) {
1100         *len -= remove_head;
1101         memmove(src, src + remove_head, 1 + *len);
1102         return(src);
1103     }
1104     return(NULL);
1105 }
1106
1107 /**
1108  * xmlAddDefAttrs:
1109  * @ctxt:  an XML parser context
1110  * @fullname:  the element fullname
1111  * @fullattr:  the attribute fullname
1112  * @value:  the attribute value
1113  *
1114  * Add a defaulted attribute for an element
1115  */
1116 static void
1117 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1118                const xmlChar *fullname,
1119                const xmlChar *fullattr,
1120                const xmlChar *value) {
1121     xmlDefAttrsPtr defaults;
1122     int len;
1123     const xmlChar *name;
1124     const xmlChar *prefix;
1125
1126     /*
1127      * Allows to detect attribute redefinitions
1128      */
1129     if (ctxt->attsSpecial != NULL) {
1130         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1131             return;
1132     }
1133
1134     if (ctxt->attsDefault == NULL) {
1135         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1136         if (ctxt->attsDefault == NULL)
1137             goto mem_error;
1138     }
1139
1140     /*
1141      * split the element name into prefix:localname , the string found
1142      * are within the DTD and then not associated to namespace names.
1143      */
1144     name = xmlSplitQName3(fullname, &len);
1145     if (name == NULL) {
1146         name = xmlDictLookup(ctxt->dict, fullname, -1);
1147         prefix = NULL;
1148     } else {
1149         name = xmlDictLookup(ctxt->dict, name, -1);
1150         prefix = xmlDictLookup(ctxt->dict, fullname, len);
1151     }
1152
1153     /*
1154      * make sure there is some storage
1155      */
1156     defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1157     if (defaults == NULL) {
1158         defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1159                            (4 * 5) * sizeof(const xmlChar *));
1160         if (defaults == NULL)
1161             goto mem_error;
1162         defaults->nbAttrs = 0;
1163         defaults->maxAttrs = 4;
1164         if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1165                                 defaults, NULL) < 0) {
1166             xmlFree(defaults);
1167             goto mem_error;
1168         }
1169     } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1170         xmlDefAttrsPtr temp;
1171
1172         temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1173                        (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1174         if (temp == NULL)
1175             goto mem_error;
1176         defaults = temp;
1177         defaults->maxAttrs *= 2;
1178         if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1179                                 defaults, NULL) < 0) {
1180             xmlFree(defaults);
1181             goto mem_error;
1182         }
1183     }
1184
1185     /*
1186      * Split the element name into prefix:localname , the string found
1187      * are within the DTD and hen not associated to namespace names.
1188      */
1189     name = xmlSplitQName3(fullattr, &len);
1190     if (name == NULL) {
1191         name = xmlDictLookup(ctxt->dict, fullattr, -1);
1192         prefix = NULL;
1193     } else {
1194         name = xmlDictLookup(ctxt->dict, name, -1);
1195         prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1196     }
1197
1198     defaults->values[5 * defaults->nbAttrs] = name;
1199     defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1200     /* intern the string and precompute the end */
1201     len = xmlStrlen(value);
1202     value = xmlDictLookup(ctxt->dict, value, len);
1203     defaults->values[5 * defaults->nbAttrs + 2] = value;
1204     defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1205     if (ctxt->external)
1206         defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1207     else
1208         defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1209     defaults->nbAttrs++;
1210
1211     return;
1212
1213 mem_error:
1214     xmlErrMemory(ctxt, NULL);
1215     return;
1216 }
1217
1218 /**
1219  * xmlAddSpecialAttr:
1220  * @ctxt:  an XML parser context
1221  * @fullname:  the element fullname
1222  * @fullattr:  the attribute fullname
1223  * @type:  the attribute type
1224  *
1225  * Register this attribute type
1226  */
1227 static void
1228 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1229                   const xmlChar *fullname,
1230                   const xmlChar *fullattr,
1231                   int type)
1232 {
1233     if (ctxt->attsSpecial == NULL) {
1234         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1235         if (ctxt->attsSpecial == NULL)
1236             goto mem_error;
1237     }
1238
1239     if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1240         return;
1241
1242     xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1243                      (void *) (long) type);
1244     return;
1245
1246 mem_error:
1247     xmlErrMemory(ctxt, NULL);
1248     return;
1249 }
1250
1251 /**
1252  * xmlCleanSpecialAttrCallback:
1253  *
1254  * Removes CDATA attributes from the special attribute table
1255  */
1256 static void
1257 xmlCleanSpecialAttrCallback(void *payload, void *data,
1258                             const xmlChar *fullname, const xmlChar *fullattr,
1259                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1260     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1261
1262     if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1263         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1264     }
1265 }
1266
1267 /**
1268  * xmlCleanSpecialAttr:
1269  * @ctxt:  an XML parser context
1270  *
1271  * Trim the list of attributes defined to remove all those of type
1272  * CDATA as they are not special. This call should be done when finishing
1273  * to parse the DTD and before starting to parse the document root.
1274  */
1275 static void
1276 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1277 {
1278     if (ctxt->attsSpecial == NULL)
1279         return;
1280
1281     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1282
1283     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1284         xmlHashFree(ctxt->attsSpecial, NULL);
1285         ctxt->attsSpecial = NULL;
1286     }
1287     return;
1288 }
1289
1290 /**
1291  * xmlCheckLanguageID:
1292  * @lang:  pointer to the string value
1293  *
1294  * Checks that the value conforms to the LanguageID production:
1295  *
1296  * NOTE: this is somewhat deprecated, those productions were removed from
1297  *       the XML Second edition.
1298  *
1299  * [33] LanguageID ::= Langcode ('-' Subcode)*
1300  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1301  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1302  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1303  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1304  * [38] Subcode ::= ([a-z] | [A-Z])+
1305  *
1306  * The current REC reference the sucessors of RFC 1766, currently 5646
1307  *
1308  * http://www.rfc-editor.org/rfc/rfc5646.txt
1309  * langtag       = language
1310  *                 ["-" script]
1311  *                 ["-" region]
1312  *                 *("-" variant)
1313  *                 *("-" extension)
1314  *                 ["-" privateuse]
1315  * language      = 2*3ALPHA            ; shortest ISO 639 code
1316  *                 ["-" extlang]       ; sometimes followed by
1317  *                                     ; extended language subtags
1318  *               / 4ALPHA              ; or reserved for future use
1319  *               / 5*8ALPHA            ; or registered language subtag
1320  *
1321  * extlang       = 3ALPHA              ; selected ISO 639 codes
1322  *                 *2("-" 3ALPHA)      ; permanently reserved
1323  *
1324  * script        = 4ALPHA              ; ISO 15924 code
1325  *
1326  * region        = 2ALPHA              ; ISO 3166-1 code
1327  *               / 3DIGIT              ; UN M.49 code
1328  *
1329  * variant       = 5*8alphanum         ; registered variants
1330  *               / (DIGIT 3alphanum)
1331  *
1332  * extension     = singleton 1*("-" (2*8alphanum))
1333  *
1334  *                                     ; Single alphanumerics
1335  *                                     ; "x" reserved for private use
1336  * singleton     = DIGIT               ; 0 - 9
1337  *               / %x41-57             ; A - W
1338  *               / %x59-5A             ; Y - Z
1339  *               / %x61-77             ; a - w
1340  *               / %x79-7A             ; y - z
1341  *
1342  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1343  * The parser below doesn't try to cope with extension or privateuse
1344  * that could be added but that's not interoperable anyway
1345  *
1346  * Returns 1 if correct 0 otherwise
1347  **/
1348 int
1349 xmlCheckLanguageID(const xmlChar * lang)
1350 {
1351     const xmlChar *cur = lang, *nxt;
1352
1353     if (cur == NULL)
1354         return (0);
1355     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1356         ((cur[0] == 'I') && (cur[1] == '-')) ||
1357         ((cur[0] == 'x') && (cur[1] == '-')) ||
1358         ((cur[0] == 'X') && (cur[1] == '-'))) {
1359         /*
1360          * Still allow IANA code and user code which were coming
1361          * from the previous version of the XML-1.0 specification
1362          * it's deprecated but we should not fail
1363          */
1364         cur += 2;
1365         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1366                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1367             cur++;
1368         return(cur[0] == 0);
1369     }
1370     nxt = cur;
1371     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1372            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1373            nxt++;
1374     if (nxt - cur >= 4) {
1375         /*
1376          * Reserved
1377          */
1378         if ((nxt - cur > 8) || (nxt[0] != 0))
1379             return(0);
1380         return(1);
1381     }
1382     if (nxt - cur < 2)
1383         return(0);
1384     /* we got an ISO 639 code */
1385     if (nxt[0] == 0)
1386         return(1);
1387     if (nxt[0] != '-')
1388         return(0);
1389
1390     nxt++;
1391     cur = nxt;
1392     /* now we can have extlang or script or region or variant */
1393     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1394         goto region_m49;
1395
1396     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1397            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1398            nxt++;
1399     if (nxt - cur == 4)
1400         goto script;
1401     if (nxt - cur == 2)
1402         goto region;
1403     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1404         goto variant;
1405     if (nxt - cur != 3)
1406         return(0);
1407     /* we parsed an extlang */
1408     if (nxt[0] == 0)
1409         return(1);
1410     if (nxt[0] != '-')
1411         return(0);
1412
1413     nxt++;
1414     cur = nxt;
1415     /* now we can have script or region or variant */
1416     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1417         goto region_m49;
1418
1419     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1420            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1421            nxt++;
1422     if (nxt - cur == 2)
1423         goto region;
1424     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1425         goto variant;
1426     if (nxt - cur != 4)
1427         return(0);
1428     /* we parsed a script */
1429 script:
1430     if (nxt[0] == 0)
1431         return(1);
1432     if (nxt[0] != '-')
1433         return(0);
1434
1435     nxt++;
1436     cur = nxt;
1437     /* now we can have region or variant */
1438     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1439         goto region_m49;
1440
1441     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1442            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1443            nxt++;
1444
1445     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1446         goto variant;
1447     if (nxt - cur != 2)
1448         return(0);
1449     /* we parsed a region */
1450 region:
1451     if (nxt[0] == 0)
1452         return(1);
1453     if (nxt[0] != '-')
1454         return(0);
1455
1456     nxt++;
1457     cur = nxt;
1458     /* now we can just have a variant */
1459     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1460            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1461            nxt++;
1462
1463     if ((nxt - cur < 5) || (nxt - cur > 8))
1464         return(0);
1465
1466     /* we parsed a variant */
1467 variant:
1468     if (nxt[0] == 0)
1469         return(1);
1470     if (nxt[0] != '-')
1471         return(0);
1472     /* extensions and private use subtags not checked */
1473     return (1);
1474
1475 region_m49:
1476     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1477         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1478         nxt += 3;
1479         goto region;
1480     }
1481     return(0);
1482 }
1483
1484 /************************************************************************
1485  *                                                                      *
1486  *              Parser stacks related functions and macros              *
1487  *                                                                      *
1488  ************************************************************************/
1489
1490 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1491                                             const xmlChar ** str);
1492
1493 #ifdef SAX2
1494 /**
1495  * nsPush:
1496  * @ctxt:  an XML parser context
1497  * @prefix:  the namespace prefix or NULL
1498  * @URL:  the namespace name
1499  *
1500  * Pushes a new parser namespace on top of the ns stack
1501  *
1502  * Returns -1 in case of error, -2 if the namespace should be discarded
1503  *         and the index in the stack otherwise.
1504  */
1505 static int
1506 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1507 {
1508     if (ctxt->options & XML_PARSE_NSCLEAN) {
1509         int i;
1510         for (i = 0;i < ctxt->nsNr;i += 2) {
1511             if (ctxt->nsTab[i] == prefix) {
1512                 /* in scope */
1513                 if (ctxt->nsTab[i + 1] == URL)
1514                     return(-2);
1515                 /* out of scope keep it */
1516                 break;
1517             }
1518         }
1519     }
1520     if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1521         ctxt->nsMax = 10;
1522         ctxt->nsNr = 0;
1523         ctxt->nsTab = (const xmlChar **)
1524                       xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1525         if (ctxt->nsTab == NULL) {
1526             xmlErrMemory(ctxt, NULL);
1527             ctxt->nsMax = 0;
1528             return (-1);
1529         }
1530     } else if (ctxt->nsNr >= ctxt->nsMax) {
1531         const xmlChar ** tmp;
1532         ctxt->nsMax *= 2;
1533         tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1534                                     ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1535         if (tmp == NULL) {
1536             xmlErrMemory(ctxt, NULL);
1537             ctxt->nsMax /= 2;
1538             return (-1);
1539         }
1540         ctxt->nsTab = tmp;
1541     }
1542     ctxt->nsTab[ctxt->nsNr++] = prefix;
1543     ctxt->nsTab[ctxt->nsNr++] = URL;
1544     return (ctxt->nsNr);
1545 }
1546 /**
1547  * nsPop:
1548  * @ctxt: an XML parser context
1549  * @nr:  the number to pop
1550  *
1551  * Pops the top @nr parser prefix/namespace from the ns stack
1552  *
1553  * Returns the number of namespaces removed
1554  */
1555 static int
1556 nsPop(xmlParserCtxtPtr ctxt, int nr)
1557 {
1558     int i;
1559
1560     if (ctxt->nsTab == NULL) return(0);
1561     if (ctxt->nsNr < nr) {
1562         xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1563         nr = ctxt->nsNr;
1564     }
1565     if (ctxt->nsNr <= 0)
1566         return (0);
1567
1568     for (i = 0;i < nr;i++) {
1569          ctxt->nsNr--;
1570          ctxt->nsTab[ctxt->nsNr] = NULL;
1571     }
1572     return(nr);
1573 }
1574 #endif
1575
1576 static int
1577 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1578     const xmlChar **atts;
1579     int *attallocs;
1580     int maxatts;
1581
1582     if (ctxt->atts == NULL) {
1583         maxatts = 55; /* allow for 10 attrs by default */
1584         atts = (const xmlChar **)
1585                xmlMalloc(maxatts * sizeof(xmlChar *));
1586         if (atts == NULL) goto mem_error;
1587         ctxt->atts = atts;
1588         attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1589         if (attallocs == NULL) goto mem_error;
1590         ctxt->attallocs = attallocs;
1591         ctxt->maxatts = maxatts;
1592     } else if (nr + 5 > ctxt->maxatts) {
1593         maxatts = (nr + 5) * 2;
1594         atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1595                                      maxatts * sizeof(const xmlChar *));
1596         if (atts == NULL) goto mem_error;
1597         ctxt->atts = atts;
1598         attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1599                                      (maxatts / 5) * sizeof(int));
1600         if (attallocs == NULL) goto mem_error;
1601         ctxt->attallocs = attallocs;
1602         ctxt->maxatts = maxatts;
1603     }
1604     return(ctxt->maxatts);
1605 mem_error:
1606     xmlErrMemory(ctxt, NULL);
1607     return(-1);
1608 }
1609
1610 /**
1611  * inputPush:
1612  * @ctxt:  an XML parser context
1613  * @value:  the parser input
1614  *
1615  * Pushes a new parser input on top of the input stack
1616  *
1617  * Returns -1 in case of error, the index in the stack otherwise
1618  */
1619 int
1620 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1621 {
1622     if ((ctxt == NULL) || (value == NULL))
1623         return(-1);
1624     if (ctxt->inputNr >= ctxt->inputMax) {
1625         ctxt->inputMax *= 2;
1626         ctxt->inputTab =
1627             (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1628                                              ctxt->inputMax *
1629                                              sizeof(ctxt->inputTab[0]));
1630         if (ctxt->inputTab == NULL) {
1631             xmlErrMemory(ctxt, NULL);
1632             xmlFreeInputStream(value);
1633             ctxt->inputMax /= 2;
1634             value = NULL;
1635             return (-1);
1636         }
1637     }
1638     ctxt->inputTab[ctxt->inputNr] = value;
1639     ctxt->input = value;
1640     return (ctxt->inputNr++);
1641 }
1642 /**
1643  * inputPop:
1644  * @ctxt: an XML parser context
1645  *
1646  * Pops the top parser input from the input stack
1647  *
1648  * Returns the input just removed
1649  */
1650 xmlParserInputPtr
1651 inputPop(xmlParserCtxtPtr ctxt)
1652 {
1653     xmlParserInputPtr ret;
1654
1655     if (ctxt == NULL)
1656         return(NULL);
1657     if (ctxt->inputNr <= 0)
1658         return (NULL);
1659     ctxt->inputNr--;
1660     if (ctxt->inputNr > 0)
1661         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1662     else
1663         ctxt->input = NULL;
1664     ret = ctxt->inputTab[ctxt->inputNr];
1665     ctxt->inputTab[ctxt->inputNr] = NULL;
1666     return (ret);
1667 }
1668 /**
1669  * nodePush:
1670  * @ctxt:  an XML parser context
1671  * @value:  the element node
1672  *
1673  * Pushes a new element node on top of the node stack
1674  *
1675  * Returns -1 in case of error, the index in the stack otherwise
1676  */
1677 int
1678 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1679 {
1680     if (ctxt == NULL) return(0);
1681     if (ctxt->nodeNr >= ctxt->nodeMax) {
1682         xmlNodePtr *tmp;
1683
1684         tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1685                                       ctxt->nodeMax * 2 *
1686                                       sizeof(ctxt->nodeTab[0]));
1687         if (tmp == NULL) {
1688             xmlErrMemory(ctxt, NULL);
1689             return (-1);
1690         }
1691         ctxt->nodeTab = tmp;
1692         ctxt->nodeMax *= 2;
1693     }
1694     if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1695         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1696         xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1697                  "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1698                           xmlParserMaxDepth);
1699         ctxt->instate = XML_PARSER_EOF;
1700         return(-1);
1701     }
1702     ctxt->nodeTab[ctxt->nodeNr] = value;
1703     ctxt->node = value;
1704     return (ctxt->nodeNr++);
1705 }
1706
1707 /**
1708  * nodePop:
1709  * @ctxt: an XML parser context
1710  *
1711  * Pops the top element node from the node stack
1712  *
1713  * Returns the node just removed
1714  */
1715 xmlNodePtr
1716 nodePop(xmlParserCtxtPtr ctxt)
1717 {
1718     xmlNodePtr ret;
1719
1720     if (ctxt == NULL) return(NULL);
1721     if (ctxt->nodeNr <= 0)
1722         return (NULL);
1723     ctxt->nodeNr--;
1724     if (ctxt->nodeNr > 0)
1725         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1726     else
1727         ctxt->node = NULL;
1728     ret = ctxt->nodeTab[ctxt->nodeNr];
1729     ctxt->nodeTab[ctxt->nodeNr] = NULL;
1730     return (ret);
1731 }
1732
1733 #ifdef LIBXML_PUSH_ENABLED
1734 /**
1735  * nameNsPush:
1736  * @ctxt:  an XML parser context
1737  * @value:  the element name
1738  * @prefix:  the element prefix
1739  * @URI:  the element namespace name
1740  *
1741  * Pushes a new element name/prefix/URL on top of the name stack
1742  *
1743  * Returns -1 in case of error, the index in the stack otherwise
1744  */
1745 static int
1746 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1747            const xmlChar *prefix, const xmlChar *URI, int nsNr)
1748 {
1749     if (ctxt->nameNr >= ctxt->nameMax) {
1750         const xmlChar * *tmp;
1751         void **tmp2;
1752         ctxt->nameMax *= 2;
1753         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1754                                     ctxt->nameMax *
1755                                     sizeof(ctxt->nameTab[0]));
1756         if (tmp == NULL) {
1757             ctxt->nameMax /= 2;
1758             goto mem_error;
1759         }
1760         ctxt->nameTab = tmp;
1761         tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1762                                     ctxt->nameMax * 3 *
1763                                     sizeof(ctxt->pushTab[0]));
1764         if (tmp2 == NULL) {
1765             ctxt->nameMax /= 2;
1766             goto mem_error;
1767         }
1768         ctxt->pushTab = tmp2;
1769     }
1770     ctxt->nameTab[ctxt->nameNr] = value;
1771     ctxt->name = value;
1772     ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1773     ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1774     ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1775     return (ctxt->nameNr++);
1776 mem_error:
1777     xmlErrMemory(ctxt, NULL);
1778     return (-1);
1779 }
1780 /**
1781  * nameNsPop:
1782  * @ctxt: an XML parser context
1783  *
1784  * Pops the top element/prefix/URI name from the name stack
1785  *
1786  * Returns the name just removed
1787  */
1788 static const xmlChar *
1789 nameNsPop(xmlParserCtxtPtr ctxt)
1790 {
1791     const xmlChar *ret;
1792
1793     if (ctxt->nameNr <= 0)
1794         return (NULL);
1795     ctxt->nameNr--;
1796     if (ctxt->nameNr > 0)
1797         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1798     else
1799         ctxt->name = NULL;
1800     ret = ctxt->nameTab[ctxt->nameNr];
1801     ctxt->nameTab[ctxt->nameNr] = NULL;
1802     return (ret);
1803 }
1804 #endif /* LIBXML_PUSH_ENABLED */
1805
1806 /**
1807  * namePush:
1808  * @ctxt:  an XML parser context
1809  * @value:  the element name
1810  *
1811  * Pushes a new element name on top of the name stack
1812  *
1813  * Returns -1 in case of error, the index in the stack otherwise
1814  */
1815 int
1816 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1817 {
1818     if (ctxt == NULL) return (-1);
1819
1820     if (ctxt->nameNr >= ctxt->nameMax) {
1821         const xmlChar * *tmp;
1822         ctxt->nameMax *= 2;
1823         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1824                                     ctxt->nameMax *
1825                                     sizeof(ctxt->nameTab[0]));
1826         if (tmp == NULL) {
1827             ctxt->nameMax /= 2;
1828             goto mem_error;
1829         }
1830         ctxt->nameTab = tmp;
1831     }
1832     ctxt->nameTab[ctxt->nameNr] = value;
1833     ctxt->name = value;
1834     return (ctxt->nameNr++);
1835 mem_error:
1836     xmlErrMemory(ctxt, NULL);
1837     return (-1);
1838 }
1839 /**
1840  * namePop:
1841  * @ctxt: an XML parser context
1842  *
1843  * Pops the top element name from the name stack
1844  *
1845  * Returns the name just removed
1846  */
1847 const xmlChar *
1848 namePop(xmlParserCtxtPtr ctxt)
1849 {
1850     const xmlChar *ret;
1851
1852     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1853         return (NULL);
1854     ctxt->nameNr--;
1855     if (ctxt->nameNr > 0)
1856         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1857     else
1858         ctxt->name = NULL;
1859     ret = ctxt->nameTab[ctxt->nameNr];
1860     ctxt->nameTab[ctxt->nameNr] = NULL;
1861     return (ret);
1862 }
1863
1864 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1865     if (ctxt->spaceNr >= ctxt->spaceMax) {
1866         int *tmp;
1867
1868         ctxt->spaceMax *= 2;
1869         tmp = (int *) xmlRealloc(ctxt->spaceTab,
1870                                  ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1871         if (tmp == NULL) {
1872             xmlErrMemory(ctxt, NULL);
1873             ctxt->spaceMax /=2;
1874             return(-1);
1875         }
1876         ctxt->spaceTab = tmp;
1877     }
1878     ctxt->spaceTab[ctxt->spaceNr] = val;
1879     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1880     return(ctxt->spaceNr++);
1881 }
1882
1883 static int spacePop(xmlParserCtxtPtr ctxt) {
1884     int ret;
1885     if (ctxt->spaceNr <= 0) return(0);
1886     ctxt->spaceNr--;
1887     if (ctxt->spaceNr > 0)
1888         ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1889     else
1890         ctxt->space = &ctxt->spaceTab[0];
1891     ret = ctxt->spaceTab[ctxt->spaceNr];
1892     ctxt->spaceTab[ctxt->spaceNr] = -1;
1893     return(ret);
1894 }
1895
1896 /*
1897  * Macros for accessing the content. Those should be used only by the parser,
1898  * and not exported.
1899  *
1900  * Dirty macros, i.e. one often need to make assumption on the context to
1901  * use them
1902  *
1903  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1904  *           To be used with extreme caution since operations consuming
1905  *           characters may move the input buffer to a different location !
1906  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1907  *           This should be used internally by the parser
1908  *           only to compare to ASCII values otherwise it would break when
1909  *           running with UTF-8 encoding.
1910  *   RAW     same as CUR but in the input buffer, bypass any token
1911  *           extraction that may have been done
1912  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1913  *           to compare on ASCII based substring.
1914  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1915  *           strings without newlines within the parser.
1916  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1917  *           defined char within the parser.
1918  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1919  *
1920  *   NEXT    Skip to the next character, this does the proper decoding
1921  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
1922  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
1923  *   CUR_CHAR(l) returns the current unicode character (int), set l
1924  *           to the number of xmlChars used for the encoding [0-5].
1925  *   CUR_SCHAR  same but operate on a string instead of the context
1926  *   COPY_BUF  copy the current unicode char to the target buffer, increment
1927  *            the index
1928  *   GROW, SHRINK  handling of input buffers
1929  */
1930
1931 #define RAW (*ctxt->input->cur)
1932 #define CUR (*ctxt->input->cur)
1933 #define NXT(val) ctxt->input->cur[(val)]
1934 #define CUR_PTR ctxt->input->cur
1935
1936 #define CMP4( s, c1, c2, c3, c4 ) \
1937   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1938     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1939 #define CMP5( s, c1, c2, c3, c4, c5 ) \
1940   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1941 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1942   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1943 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1944   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1945 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1946   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1947 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1948   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1949     ((unsigned char *) s)[ 8 ] == c9 )
1950 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1951   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1952     ((unsigned char *) s)[ 9 ] == c10 )
1953
1954 #define SKIP(val) do {                                                  \
1955     ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val);                   \
1956     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);     \
1957     if ((*ctxt->input->cur == 0) &&                                     \
1958         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))            \
1959             xmlPopInput(ctxt);                                          \
1960   } while (0)
1961
1962 #define SKIPL(val) do {                                                 \
1963     int skipl;                                                          \
1964     for(skipl=0; skipl<val; skipl++) {                                  \
1965         if (*(ctxt->input->cur) == '\n') {                              \
1966         ctxt->input->line++; ctxt->input->col = 1;                      \
1967         } else ctxt->input->col++;                                      \
1968         ctxt->nbChars++;                                                \
1969         ctxt->input->cur++;                                             \
1970     }                                                                   \
1971     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);     \
1972     if ((*ctxt->input->cur == 0) &&                                     \
1973         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))            \
1974             xmlPopInput(ctxt);                                          \
1975   } while (0)
1976
1977 #define SHRINK if ((ctxt->progressive == 0) &&                          \
1978                    (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1979                    (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1980         xmlSHRINK (ctxt);
1981
1982 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1983     xmlParserInputShrink(ctxt->input);
1984     if ((*ctxt->input->cur == 0) &&
1985         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1986             xmlPopInput(ctxt);
1987   }
1988
1989 #define GROW if ((ctxt->progressive == 0) &&                            \
1990                  (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))   \
1991         xmlGROW (ctxt);
1992
1993 static void xmlGROW (xmlParserCtxtPtr ctxt) {
1994     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1995     if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
1996         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1997             xmlPopInput(ctxt);
1998 }
1999
2000 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2001
2002 #define NEXT xmlNextChar(ctxt)
2003
2004 #define NEXT1 {                                                         \
2005         ctxt->input->col++;                                             \
2006         ctxt->input->cur++;                                             \
2007         ctxt->nbChars++;                                                \
2008         if (*ctxt->input->cur == 0)                                     \
2009             xmlParserInputGrow(ctxt->input, INPUT_CHUNK);               \
2010     }
2011
2012 #define NEXTL(l) do {                                                   \
2013     if (*(ctxt->input->cur) == '\n') {                                  \
2014         ctxt->input->line++; ctxt->input->col = 1;                      \
2015     } else ctxt->input->col++;                                          \
2016     ctxt->input->cur += l;                              \
2017     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);     \
2018   } while (0)
2019
2020 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2021 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2022
2023 #define COPY_BUF(l,b,i,v)                                               \
2024     if (l == 1) b[i++] = (xmlChar) v;                                   \
2025     else i += xmlCopyCharMultiByte(&b[i],v)
2026
2027 /**
2028  * xmlSkipBlankChars:
2029  * @ctxt:  the XML parser context
2030  *
2031  * skip all blanks character found at that point in the input streams.
2032  * It pops up finished entities in the process if allowable at that point.
2033  *
2034  * Returns the number of space chars skipped
2035  */
2036
2037 int
2038 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2039     int res = 0;
2040
2041     /*
2042      * It's Okay to use CUR/NEXT here since all the blanks are on
2043      * the ASCII range.
2044      */
2045     if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2046         const xmlChar *cur;
2047         /*
2048          * if we are in the document content, go really fast
2049          */
2050         cur = ctxt->input->cur;
2051         while (IS_BLANK_CH(*cur)) {
2052             if (*cur == '\n') {
2053                 ctxt->input->line++; ctxt->input->col = 1;
2054             }
2055             cur++;
2056             res++;
2057             if (*cur == 0) {
2058                 ctxt->input->cur = cur;
2059                 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2060                 cur = ctxt->input->cur;
2061             }
2062         }
2063         ctxt->input->cur = cur;
2064     } else {
2065         int cur;
2066         do {
2067             cur = CUR;
2068             while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2069                 NEXT;
2070                 cur = CUR;
2071                 res++;
2072             }
2073             while ((cur == 0) && (ctxt->inputNr > 1) &&
2074                    (ctxt->instate != XML_PARSER_COMMENT)) {
2075                 xmlPopInput(ctxt);
2076                 cur = CUR;
2077             }
2078             /*
2079              * Need to handle support of entities branching here
2080              */
2081             if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2082         } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2083     }
2084     return(res);
2085 }
2086
2087 /************************************************************************
2088  *                                                                      *
2089  *              Commodity functions to handle entities                  *
2090  *                                                                      *
2091  ************************************************************************/
2092
2093 /**
2094  * xmlPopInput:
2095  * @ctxt:  an XML parser context
2096  *
2097  * xmlPopInput: the current input pointed by ctxt->input came to an end
2098  *          pop it and return the next char.
2099  *
2100  * Returns the current xmlChar in the parser context
2101  */
2102 xmlChar
2103 xmlPopInput(xmlParserCtxtPtr ctxt) {
2104     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2105     if (xmlParserDebugEntities)
2106         xmlGenericError(xmlGenericErrorContext,
2107                 "Popping input %d\n", ctxt->inputNr);
2108     xmlFreeInputStream(inputPop(ctxt));
2109     if ((*ctxt->input->cur == 0) &&
2110         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2111             return(xmlPopInput(ctxt));
2112     return(CUR);
2113 }
2114
2115 /**
2116  * xmlPushInput:
2117  * @ctxt:  an XML parser context
2118  * @input:  an XML parser input fragment (entity, XML fragment ...).
2119  *
2120  * xmlPushInput: switch to a new input stream which is stacked on top
2121  *               of the previous one(s).
2122  * Returns -1 in case of error or the index in the input stack
2123  */
2124 int
2125 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2126     int ret;
2127     if (input == NULL) return(-1);
2128
2129     if (xmlParserDebugEntities) {
2130         if ((ctxt->input != NULL) && (ctxt->input->filename))
2131             xmlGenericError(xmlGenericErrorContext,
2132                     "%s(%d): ", ctxt->input->filename,
2133                     ctxt->input->line);
2134         xmlGenericError(xmlGenericErrorContext,
2135                 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2136     }
2137     ret = inputPush(ctxt, input);
2138     GROW;
2139     return(ret);
2140 }
2141
2142 /**
2143  * xmlParseCharRef:
2144  * @ctxt:  an XML parser context
2145  *
2146  * parse Reference declarations
2147  *
2148  * [66] CharRef ::= '&#' [0-9]+ ';' |
2149  *                  '&#x' [0-9a-fA-F]+ ';'
2150  *
2151  * [ WFC: Legal Character ]
2152  * Characters referred to using character references must match the
2153  * production for Char.
2154  *
2155  * Returns the value parsed (as an int), 0 in case of error
2156  */
2157 int
2158 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2159     unsigned int val = 0;
2160     int count = 0;
2161     unsigned int outofrange = 0;
2162
2163     /*
2164      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2165      */
2166     if ((RAW == '&') && (NXT(1) == '#') &&
2167         (NXT(2) == 'x')) {
2168         SKIP(3);
2169         GROW;
2170         while (RAW != ';') { /* loop blocked by count */
2171             if (count++ > 20) {
2172                 count = 0;
2173                 GROW;
2174             }
2175             if ((RAW >= '0') && (RAW <= '9'))
2176                 val = val * 16 + (CUR - '0');
2177             else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2178                 val = val * 16 + (CUR - 'a') + 10;
2179             else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2180                 val = val * 16 + (CUR - 'A') + 10;
2181             else {
2182                 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2183                 val = 0;
2184                 break;
2185             }
2186             if (val > 0x10FFFF)
2187                 outofrange = val;
2188
2189             NEXT;
2190             count++;
2191         }
2192         if (RAW == ';') {
2193             /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2194             ctxt->input->col++;
2195             ctxt->nbChars ++;
2196             ctxt->input->cur++;
2197         }
2198     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2199         SKIP(2);
2200         GROW;
2201         while (RAW != ';') { /* loop blocked by count */
2202             if (count++ > 20) {
2203                 count = 0;
2204                 GROW;
2205             }
2206             if ((RAW >= '0') && (RAW <= '9'))
2207                 val = val * 10 + (CUR - '0');
2208             else {
2209                 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2210                 val = 0;
2211                 break;
2212             }
2213             if (val > 0x10FFFF)
2214                 outofrange = val;
2215
2216             NEXT;
2217             count++;
2218         }
2219         if (RAW == ';') {
2220             /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2221             ctxt->input->col++;
2222             ctxt->nbChars ++;
2223             ctxt->input->cur++;
2224         }
2225     } else {
2226         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2227     }
2228
2229     /*
2230      * [ WFC: Legal Character ]
2231      * Characters referred to using character references must match the
2232      * production for Char.
2233      */
2234     if ((IS_CHAR(val) && (outofrange == 0))) {
2235         return(val);
2236     } else {
2237         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2238                           "xmlParseCharRef: invalid xmlChar value %d\n",
2239                           val);
2240     }
2241     return(0);
2242 }
2243
2244 /**
2245  * xmlParseStringCharRef:
2246  * @ctxt:  an XML parser context
2247  * @str:  a pointer to an index in the string
2248  *
2249  * parse Reference declarations, variant parsing from a string rather
2250  * than an an input flow.
2251  *
2252  * [66] CharRef ::= '&#' [0-9]+ ';' |
2253  *                  '&#x' [0-9a-fA-F]+ ';'
2254  *
2255  * [ WFC: Legal Character ]
2256  * Characters referred to using character references must match the
2257  * production for Char.
2258  *
2259  * Returns the value parsed (as an int), 0 in case of error, str will be
2260  *         updated to the current value of the index
2261  */
2262 static int
2263 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2264     const xmlChar *ptr;
2265     xmlChar cur;
2266     unsigned int val = 0;
2267     unsigned int outofrange = 0;
2268
2269     if ((str == NULL) || (*str == NULL)) return(0);
2270     ptr = *str;
2271     cur = *ptr;
2272     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2273         ptr += 3;
2274         cur = *ptr;
2275         while (cur != ';') { /* Non input consuming loop */
2276             if ((cur >= '0') && (cur <= '9'))
2277                 val = val * 16 + (cur - '0');
2278             else if ((cur >= 'a') && (cur <= 'f'))
2279                 val = val * 16 + (cur - 'a') + 10;
2280             else if ((cur >= 'A') && (cur <= 'F'))
2281                 val = val * 16 + (cur - 'A') + 10;
2282             else {
2283                 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2284                 val = 0;
2285                 break;
2286             }
2287             if (val > 0x10FFFF)
2288                 outofrange = val;
2289
2290             ptr++;
2291             cur = *ptr;
2292         }
2293         if (cur == ';')
2294             ptr++;
2295     } else if  ((cur == '&') && (ptr[1] == '#')){
2296         ptr += 2;
2297         cur = *ptr;
2298         while (cur != ';') { /* Non input consuming loops */
2299             if ((cur >= '0') && (cur <= '9'))
2300                 val = val * 10 + (cur - '0');
2301             else {
2302                 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2303                 val = 0;
2304                 break;
2305             }
2306             if (val > 0x10FFFF)
2307                 outofrange = val;
2308
2309             ptr++;
2310             cur = *ptr;
2311         }
2312         if (cur == ';')
2313             ptr++;
2314     } else {
2315         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2316         return(0);
2317     }
2318     *str = ptr;
2319
2320     /*
2321      * [ WFC: Legal Character ]
2322      * Characters referred to using character references must match the
2323      * production for Char.
2324      */
2325     if ((IS_CHAR(val) && (outofrange == 0))) {
2326         return(val);
2327     } else {
2328         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2329                           "xmlParseStringCharRef: invalid xmlChar value %d\n",
2330                           val);
2331     }
2332     return(0);
2333 }
2334
2335 /**
2336  * xmlNewBlanksWrapperInputStream:
2337  * @ctxt:  an XML parser context
2338  * @entity:  an Entity pointer
2339  *
2340  * Create a new input stream for wrapping
2341  * blanks around a PEReference
2342  *
2343  * Returns the new input stream or NULL
2344  */
2345
2346 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2347
2348 static xmlParserInputPtr
2349 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2350     xmlParserInputPtr input;
2351     xmlChar *buffer;
2352     size_t length;
2353     if (entity == NULL) {
2354         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2355                     "xmlNewBlanksWrapperInputStream entity\n");
2356         return(NULL);
2357     }
2358     if (xmlParserDebugEntities)
2359         xmlGenericError(xmlGenericErrorContext,
2360                 "new blanks wrapper for entity: %s\n", entity->name);
2361     input = xmlNewInputStream(ctxt);
2362     if (input == NULL) {
2363         return(NULL);
2364     }
2365     length = xmlStrlen(entity->name) + 5;
2366     buffer = xmlMallocAtomic(length);
2367     if (buffer == NULL) {
2368         xmlErrMemory(ctxt, NULL);
2369         xmlFree(input);
2370         return(NULL);
2371     }
2372     buffer [0] = ' ';
2373     buffer [1] = '%';
2374     buffer [length-3] = ';';
2375     buffer [length-2] = ' ';
2376     buffer [length-1] = 0;
2377     memcpy(buffer + 2, entity->name, length - 5);
2378     input->free = deallocblankswrapper;
2379     input->base = buffer;
2380     input->cur = buffer;
2381     input->length = length;
2382     input->end = &buffer[length];
2383     return(input);
2384 }
2385
2386 /**
2387  * xmlParserHandlePEReference:
2388  * @ctxt:  the parser context
2389  *
2390  * [69] PEReference ::= '%' Name ';'
2391  *
2392  * [ WFC: No Recursion ]
2393  * A parsed entity must not contain a recursive
2394  * reference to itself, either directly or indirectly.
2395  *
2396  * [ WFC: Entity Declared ]
2397  * In a document without any DTD, a document with only an internal DTD
2398  * subset which contains no parameter entity references, or a document
2399  * with "standalone='yes'", ...  ... The declaration of a parameter
2400  * entity must precede any reference to it...
2401  *
2402  * [ VC: Entity Declared ]
2403  * In a document with an external subset or external parameter entities
2404  * with "standalone='no'", ...  ... The declaration of a parameter entity
2405  * must precede any reference to it...
2406  *
2407  * [ WFC: In DTD ]
2408  * Parameter-entity references may only appear in the DTD.
2409  * NOTE: misleading but this is handled.
2410  *
2411  * A PEReference may have been detected in the current input stream
2412  * the handling is done accordingly to
2413  *      http://www.w3.org/TR/REC-xml#entproc
2414  * i.e.
2415  *   - Included in literal in entity values
2416  *   - Included as Parameter Entity reference within DTDs
2417  */
2418 void
2419 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2420     const xmlChar *name;
2421     xmlEntityPtr entity = NULL;
2422     xmlParserInputPtr input;
2423
2424     if (RAW != '%') return;
2425     switch(ctxt->instate) {
2426         case XML_PARSER_CDATA_SECTION:
2427             return;
2428         case XML_PARSER_COMMENT:
2429             return;
2430         case XML_PARSER_START_TAG:
2431             return;
2432         case XML_PARSER_END_TAG:
2433             return;
2434         case XML_PARSER_EOF:
2435             xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2436             return;
2437         case XML_PARSER_PROLOG:
2438         case XML_PARSER_START:
2439         case XML_PARSER_MISC:
2440             xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2441             return;
2442         case XML_PARSER_ENTITY_DECL:
2443         case XML_PARSER_CONTENT:
2444         case XML_PARSER_ATTRIBUTE_VALUE:
2445         case XML_PARSER_PI:
2446         case XML_PARSER_SYSTEM_LITERAL:
2447         case XML_PARSER_PUBLIC_LITERAL:
2448             /* we just ignore it there */
2449             return;
2450         case XML_PARSER_EPILOG:
2451             xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2452             return;
2453         case XML_PARSER_ENTITY_VALUE:
2454             /*
2455              * NOTE: in the case of entity values, we don't do the
2456              *       substitution here since we need the literal
2457              *       entity value to be able to save the internal
2458              *       subset of the document.
2459              *       This will be handled by xmlStringDecodeEntities
2460              */
2461             return;
2462         case XML_PARSER_DTD:
2463             /*
2464              * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2465              * In the internal DTD subset, parameter-entity references
2466              * can occur only where markup declarations can occur, not
2467              * within markup declarations.
2468              * In that case this is handled in xmlParseMarkupDecl
2469              */
2470             if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2471                 return;
2472             if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2473                 return;
2474             break;
2475         case XML_PARSER_IGNORE:
2476             return;
2477     }
2478
2479     NEXT;
2480     name = xmlParseName(ctxt);
2481     if (xmlParserDebugEntities)
2482         xmlGenericError(xmlGenericErrorContext,
2483                 "PEReference: %s\n", name);
2484     if (name == NULL) {
2485         xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2486     } else {
2487         if (RAW == ';') {
2488             NEXT;
2489             if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2490                 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2491             if (entity == NULL) {
2492
2493                 /*
2494                  * [ WFC: Entity Declared ]
2495                  * In a document without any DTD, a document with only an
2496                  * internal DTD subset which contains no parameter entity
2497                  * references, or a document with "standalone='yes'", ...
2498                  * ... The declaration of a parameter entity must precede
2499                  * any reference to it...
2500                  */
2501                 if ((ctxt->standalone == 1) ||
2502                     ((ctxt->hasExternalSubset == 0) &&
2503                      (ctxt->hasPErefs == 0))) {
2504                     xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2505                          "PEReference: %%%s; not found\n", name);
2506                 } else {
2507                     /*
2508                      * [ VC: Entity Declared ]
2509                      * In a document with an external subset or external
2510                      * parameter entities with "standalone='no'", ...
2511                      * ... The declaration of a parameter entity must precede
2512                      * any reference to it...
2513                      */
2514                     if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2515                         xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2516                                          "PEReference: %%%s; not found\n",
2517                                          name, NULL);
2518                     } else
2519                         xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2520                                       "PEReference: %%%s; not found\n",
2521                                       name, NULL);
2522                     ctxt->valid = 0;
2523                 }
2524             } else if (ctxt->input->free != deallocblankswrapper) {
2525                     input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2526                     if (xmlPushInput(ctxt, input) < 0)
2527                         return;
2528             } else {
2529                 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2530                     (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2531                     xmlChar start[4];
2532                     xmlCharEncoding enc;
2533
2534                     /*
2535                      * handle the extra spaces added before and after
2536                      * c.f. http://www.w3.org/TR/REC-xml#as-PE
2537                      * this is done independently.
2538                      */
2539                     input = xmlNewEntityInputStream(ctxt, entity);
2540                     if (xmlPushInput(ctxt, input) < 0)
2541                         return;
2542
2543                     /*
2544                      * Get the 4 first bytes and decode the charset
2545                      * if enc != XML_CHAR_ENCODING_NONE
2546                      * plug some encoding conversion routines.
2547                      * Note that, since we may have some non-UTF8
2548                      * encoding (like UTF16, bug 135229), the 'length'
2549                      * is not known, but we can calculate based upon
2550                      * the amount of data in the buffer.
2551                      */
2552                     GROW
2553                     if ((ctxt->input->end - ctxt->input->cur)>=4) {
2554                         start[0] = RAW;
2555                         start[1] = NXT(1);
2556                         start[2] = NXT(2);
2557                         start[3] = NXT(3);
2558                         enc = xmlDetectCharEncoding(start, 4);
2559                         if (enc != XML_CHAR_ENCODING_NONE) {
2560                             xmlSwitchEncoding(ctxt, enc);
2561                         }
2562                     }
2563
2564                     if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2565                         (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2566                         (IS_BLANK_CH(NXT(5)))) {
2567                         xmlParseTextDecl(ctxt);
2568                     }
2569                 } else {
2570                     xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2571                              "PEReference: %s is not a parameter entity\n",
2572                                       name);
2573                 }
2574             }
2575         } else {
2576             xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2577         }
2578     }
2579 }
2580
2581 /*
2582  * Macro used to grow the current buffer.
2583  */
2584 #define growBuffer(buffer, n) {                                         \
2585     xmlChar *tmp;                                                       \
2586     buffer##_size *= 2;                                                 \
2587     buffer##_size += n;                                                 \
2588     tmp = (xmlChar *)                                                   \
2589                 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar));    \
2590     if (tmp == NULL) goto mem_error;                                    \
2591     buffer = tmp;                                                       \
2592 }
2593
2594 /**
2595  * xmlStringLenDecodeEntities:
2596  * @ctxt:  the parser context
2597  * @str:  the input string
2598  * @len: the string length
2599  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2600  * @end:  an end marker xmlChar, 0 if none
2601  * @end2:  an end marker xmlChar, 0 if none
2602  * @end3:  an end marker xmlChar, 0 if none
2603  *
2604  * Takes a entity string content and process to do the adequate substitutions.
2605  *
2606  * [67] Reference ::= EntityRef | CharRef
2607  *
2608  * [69] PEReference ::= '%' Name ';'
2609  *
2610  * Returns A newly allocated string with the substitution done. The caller
2611  *      must deallocate it !
2612  */
2613 xmlChar *
2614 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615                       int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2616     xmlChar *buffer = NULL;
2617     int buffer_size = 0;
2618
2619     xmlChar *current = NULL;
2620     xmlChar *rep = NULL;
2621     const xmlChar *last;
2622     xmlEntityPtr ent;
2623     int c,l;
2624     int nbchars = 0;
2625
2626     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2627         return(NULL);
2628     last = str + len;
2629
2630     if (((ctxt->depth > 40) &&
2631          ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2632         (ctxt->depth > 1024)) {
2633         xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2634         return(NULL);
2635     }
2636
2637     /*
2638      * allocate a translation buffer.
2639      */
2640     buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2641     buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2642     if (buffer == NULL) goto mem_error;
2643
2644     /*
2645      * OK loop until we reach one of the ending char or a size limit.
2646      * we are operating on already parsed values.
2647      */
2648     if (str < last)
2649         c = CUR_SCHAR(str, l);
2650     else
2651         c = 0;
2652     while ((c != 0) && (c != end) && /* non input consuming loop */
2653            (c != end2) && (c != end3)) {
2654
2655         if (c == 0) break;
2656         if ((c == '&') && (str[1] == '#')) {
2657             int val = xmlParseStringCharRef(ctxt, &str);
2658             if (val != 0) {
2659                 COPY_BUF(0,buffer,nbchars,val);
2660             }
2661             if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2662                 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2663             }
2664         } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2665             if (xmlParserDebugEntities)
2666                 xmlGenericError(xmlGenericErrorContext,
2667                         "String decoding Entity Reference: %.30s\n",
2668                         str);
2669             ent = xmlParseStringEntityRef(ctxt, &str);
2670             if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2671                 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2672                 goto int_error;
2673             if (ent != NULL)
2674                 ctxt->nbentities += ent->checked;
2675             if ((ent != NULL) &&
2676                 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2677                 if (ent->content != NULL) {
2678                     COPY_BUF(0,buffer,nbchars,ent->content[0]);
2679                     if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2680                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2681                     }
2682                 } else {
2683                     xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2684                             "predefined entity has no content\n");
2685                 }
2686             } else if ((ent != NULL) && (ent->content != NULL)) {
2687                 ctxt->depth++;
2688                 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2689                                               0, 0, 0);
2690                 ctxt->depth--;
2691
2692                 if (rep != NULL) {
2693                     current = rep;
2694                     while (*current != 0) { /* non input consuming loop */
2695                         buffer[nbchars++] = *current++;
2696                         if (nbchars >
2697                             buffer_size - XML_PARSER_BUFFER_SIZE) {
2698                             if (xmlParserEntityCheck(ctxt, nbchars, ent))
2699                                 goto int_error;
2700                             growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2701                         }
2702                     }
2703                     xmlFree(rep);
2704                     rep = NULL;
2705                 }
2706             } else if (ent != NULL) {
2707                 int i = xmlStrlen(ent->name);
2708                 const xmlChar *cur = ent->name;
2709
2710                 buffer[nbchars++] = '&';
2711                 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2712                     growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713                 }
2714                 for (;i > 0;i--)
2715                     buffer[nbchars++] = *cur++;
2716                 buffer[nbchars++] = ';';
2717             }
2718         } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2719             if (xmlParserDebugEntities)
2720                 xmlGenericError(xmlGenericErrorContext,
2721                         "String decoding PE Reference: %.30s\n", str);
2722             ent = xmlParseStringPEReference(ctxt, &str);
2723             if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2724                 goto int_error;
2725             if (ent != NULL)
2726                 ctxt->nbentities += ent->checked;
2727             if (ent != NULL) {
2728                 if (ent->content == NULL) {
2729                     xmlLoadEntityContent(ctxt, ent);
2730                 }
2731                 ctxt->depth++;
2732                 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2733                                               0, 0, 0);
2734                 ctxt->depth--;
2735                 if (rep != NULL) {
2736                     current = rep;
2737                     while (*current != 0) { /* non input consuming loop */
2738                         buffer[nbchars++] = *current++;
2739                         if (nbchars >
2740                             buffer_size - XML_PARSER_BUFFER_SIZE) {
2741                             if (xmlParserEntityCheck(ctxt, nbchars, ent))
2742                                 goto int_error;
2743                             growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2744                         }
2745                     }
2746                     xmlFree(rep);
2747                     rep = NULL;
2748                 }
2749             }
2750         } else {
2751             COPY_BUF(l,buffer,nbchars,c);
2752             str += l;
2753             if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2754               growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2755             }
2756         }
2757         if (str < last)
2758             c = CUR_SCHAR(str, l);
2759         else
2760             c = 0;
2761     }
2762     buffer[nbchars] = 0;
2763     return(buffer);
2764
2765 mem_error:
2766     xmlErrMemory(ctxt, NULL);
2767 int_error:
2768     if (rep != NULL)
2769         xmlFree(rep);
2770     if (buffer != NULL)
2771         xmlFree(buffer);
2772     return(NULL);
2773 }
2774
2775 /**
2776  * xmlStringDecodeEntities:
2777  * @ctxt:  the parser context
2778  * @str:  the input string
2779  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2780  * @end:  an end marker xmlChar, 0 if none
2781  * @end2:  an end marker xmlChar, 0 if none
2782  * @end3:  an end marker xmlChar, 0 if none
2783  *
2784  * Takes a entity string content and process to do the adequate substitutions.
2785  *
2786  * [67] Reference ::= EntityRef | CharRef
2787  *
2788  * [69] PEReference ::= '%' Name ';'
2789  *
2790  * Returns A newly allocated string with the substitution done. The caller
2791  *      must deallocate it !
2792  */
2793 xmlChar *
2794 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2795                         xmlChar end, xmlChar  end2, xmlChar end3) {
2796     if ((ctxt == NULL) || (str == NULL)) return(NULL);
2797     return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2798            end, end2, end3));
2799 }
2800
2801 /************************************************************************
2802  *                                                                      *
2803  *              Commodity functions, cleanup needed ?                   *
2804  *                                                                      *
2805  ************************************************************************/
2806
2807 /**
2808  * areBlanks:
2809  * @ctxt:  an XML parser context
2810  * @str:  a xmlChar *
2811  * @len:  the size of @str
2812  * @blank_chars: we know the chars are blanks
2813  *
2814  * Is this a sequence of blank chars that one can ignore ?
2815  *
2816  * Returns 1 if ignorable 0 otherwise.
2817  */
2818
2819 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2820                      int blank_chars) {
2821     int i, ret;
2822     xmlNodePtr lastChild;
2823
2824     /*
2825      * Don't spend time trying to differentiate them, the same callback is
2826      * used !
2827      */
2828     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2829         return(0);
2830
2831     /*
2832      * Check for xml:space value.
2833      */
2834     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2835         (*(ctxt->space) == -2))
2836         return(0);
2837
2838     /*
2839      * Check that the string is made of blanks
2840      */
2841     if (blank_chars == 0) {
2842         for (i = 0;i < len;i++)
2843             if (!(IS_BLANK_CH(str[i]))) return(0);
2844     }
2845
2846     /*
2847      * Look if the element is mixed content in the DTD if available
2848      */
2849     if (ctxt->node == NULL) return(0);
2850     if (ctxt->myDoc != NULL) {
2851         ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2852         if (ret == 0) return(1);
2853         if (ret == 1) return(0);
2854     }
2855
2856     /*
2857      * Otherwise, heuristic :-\
2858      */
2859     if ((RAW != '<') && (RAW != 0xD)) return(0);
2860     if ((ctxt->node->children == NULL) &&
2861         (RAW == '<') && (NXT(1) == '/')) return(0);
2862
2863     lastChild = xmlGetLastChild(ctxt->node);
2864     if (lastChild == NULL) {
2865         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2866             (ctxt->node->content != NULL)) return(0);
2867     } else if (xmlNodeIsText(lastChild))
2868         return(0);
2869     else if ((ctxt->node->children != NULL) &&
2870              (xmlNodeIsText(ctxt->node->children)))
2871         return(0);
2872     return(1);
2873 }
2874
2875 /************************************************************************
2876  *                                                                      *
2877  *              Extra stuff for namespace support                       *
2878  *      Relates to http://www.w3.org/TR/WD-xml-names                    *
2879  *                                                                      *
2880  ************************************************************************/
2881
2882 /**
2883  * xmlSplitQName:
2884  * @ctxt:  an XML parser context
2885  * @name:  an XML parser context
2886  * @prefix:  a xmlChar **
2887  *
2888  * parse an UTF8 encoded XML qualified name string
2889  *
2890  * [NS 5] QName ::= (Prefix ':')? LocalPart
2891  *
2892  * [NS 6] Prefix ::= NCName
2893  *
2894  * [NS 7] LocalPart ::= NCName
2895  *
2896  * Returns the local part, and prefix is updated
2897  *   to get the Prefix if any.
2898  */
2899
2900 xmlChar *
2901 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2902     xmlChar buf[XML_MAX_NAMELEN + 5];
2903     xmlChar *buffer = NULL;
2904     int len = 0;
2905     int max = XML_MAX_NAMELEN;
2906     xmlChar *ret = NULL;
2907     const xmlChar *cur = name;
2908     int c;
2909
2910     if (prefix == NULL) return(NULL);
2911     *prefix = NULL;
2912
2913     if (cur == NULL) return(NULL);
2914
2915 #ifndef XML_XML_NAMESPACE
2916     /* xml: prefix is not really a namespace */
2917     if ((cur[0] == 'x') && (cur[1] == 'm') &&
2918         (cur[2] == 'l') && (cur[3] == ':'))
2919         return(xmlStrdup(name));
2920 #endif
2921
2922     /* nasty but well=formed */
2923     if (cur[0] == ':')
2924         return(xmlStrdup(name));
2925
2926     c = *cur++;
2927     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2928         buf[len++] = c;
2929         c = *cur++;
2930     }
2931     if (len >= max) {
2932         /*
2933          * Okay someone managed to make a huge name, so he's ready to pay
2934          * for the processing speed.
2935          */
2936         max = len * 2;
2937
2938         buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2939         if (buffer == NULL) {
2940             xmlErrMemory(ctxt, NULL);
2941             return(NULL);
2942         }
2943         memcpy(buffer, buf, len);
2944         while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2945             if (len + 10 > max) {
2946                 xmlChar *tmp;
2947
2948                 max *= 2;
2949                 tmp = (xmlChar *) xmlRealloc(buffer,
2950                                                 max * sizeof(xmlChar));
2951                 if (tmp == NULL) {
2952                     xmlFree(buffer);
2953                     xmlErrMemory(ctxt, NULL);
2954                     return(NULL);
2955                 }
2956                 buffer = tmp;
2957             }
2958             buffer[len++] = c;
2959             c = *cur++;
2960         }
2961         buffer[len] = 0;
2962     }
2963
2964     if ((c == ':') && (*cur == 0)) {
2965         if (buffer != NULL)
2966             xmlFree(buffer);
2967         *prefix = NULL;
2968         return(xmlStrdup(name));
2969     }
2970
2971     if (buffer == NULL)
2972         ret = xmlStrndup(buf, len);
2973     else {
2974         ret = buffer;
2975         buffer = NULL;
2976         max = XML_MAX_NAMELEN;
2977     }
2978
2979
2980     if (c == ':') {
2981         c = *cur;
2982         *prefix = ret;
2983         if (c == 0) {
2984             return(xmlStrndup(BAD_CAST "", 0));
2985         }
2986         len = 0;
2987
2988         /*
2989          * Check that the first character is proper to start
2990          * a new name
2991          */
2992         if (!(((c >= 0x61) && (c <= 0x7A)) ||
2993               ((c >= 0x41) && (c <= 0x5A)) ||
2994               (c == '_') || (c == ':'))) {
2995             int l;
2996             int first = CUR_SCHAR(cur, l);
2997
2998             if (!IS_LETTER(first) && (first != '_')) {
2999                 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3000                             "Name %s is not XML Namespace compliant\n",
3001                                   name);
3002             }
3003         }
3004         cur++;
3005
3006         while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3007             buf[len++] = c;
3008             c = *cur++;
3009         }
3010         if (len >= max) {
3011             /*
3012              * Okay someone managed to make a huge name, so he's ready to pay
3013              * for the processing speed.
3014              */
3015             max = len * 2;
3016
3017             buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3018             if (buffer == NULL) {
3019                 xmlErrMemory(ctxt, NULL);
3020                 return(NULL);
3021             }
3022             memcpy(buffer, buf, len);
3023             while (c != 0) { /* tested bigname2.xml */
3024                 if (len + 10 > max) {
3025                     xmlChar *tmp;
3026
3027                     max *= 2;
3028                     tmp = (xmlChar *) xmlRealloc(buffer,
3029                                                     max * sizeof(xmlChar));
3030                     if (tmp == NULL) {
3031                         xmlErrMemory(ctxt, NULL);
3032                         xmlFree(buffer);
3033                         return(NULL);
3034                     }
3035                     buffer = tmp;
3036                 }
3037                 buffer[len++] = c;
3038                 c = *cur++;
3039             }
3040             buffer[len] = 0;
3041         }
3042
3043         if (buffer == NULL)
3044             ret = xmlStrndup(buf, len);
3045         else {
3046             ret = buffer;
3047         }
3048     }
3049
3050     return(ret);
3051 }
3052
3053 /************************************************************************
3054  *                                                                      *
3055  *                      The parser itself                               *
3056  *      Relates to http://www.w3.org/TR/REC-xml                         *
3057  *                                                                      *
3058  ************************************************************************/
3059
3060 /************************************************************************
3061  *                                                                      *
3062  *      Routines to parse Name, NCName and NmToken                      *
3063  *                                                                      *
3064  ************************************************************************/
3065 #ifdef DEBUG
3066 static unsigned long nbParseName = 0;
3067 static unsigned long nbParseNmToken = 0;
3068 static unsigned long nbParseNCName = 0;
3069 static unsigned long nbParseNCNameComplex = 0;
3070 static unsigned long nbParseNameComplex = 0;
3071 static unsigned long nbParseStringName = 0;
3072 #endif
3073
3074 /*
3075  * The two following functions are related to the change of accepted
3076  * characters for Name and NmToken in the Revision 5 of XML-1.0
3077  * They correspond to the modified production [4] and the new production [4a]
3078  * changes in that revision. Also note that the macros used for the
3079  * productions Letter, Digit, CombiningChar and Extender are not needed
3080  * anymore.
3081  * We still keep compatibility to pre-revision5 parsing semantic if the
3082  * new XML_PARSE_OLD10 option is given to the parser.
3083  */
3084 static int
3085 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3086     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3087         /*
3088          * Use the new checks of production [4] [4a] amd [5] of the
3089          * Update 5 of XML-1.0
3090          */
3091         if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3092             (((c >= 'a') && (c <= 'z')) ||
3093              ((c >= 'A') && (c <= 'Z')) ||
3094              (c == '_') || (c == ':') ||
3095              ((c >= 0xC0) && (c <= 0xD6)) ||
3096              ((c >= 0xD8) && (c <= 0xF6)) ||
3097              ((c >= 0xF8) && (c <= 0x2FF)) ||
3098              ((c >= 0x370) && (c <= 0x37D)) ||
3099              ((c >= 0x37F) && (c <= 0x1FFF)) ||
3100              ((c >= 0x200C) && (c <= 0x200D)) ||
3101              ((c >= 0x2070) && (c <= 0x218F)) ||
3102              ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3103              ((c >= 0x3001) && (c <= 0xD7FF)) ||
3104              ((c >= 0xF900) && (c <= 0xFDCF)) ||
3105              ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3106              ((c >= 0x10000) && (c <= 0xEFFFF))))
3107             return(1);
3108     } else {
3109         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3110             return(1);
3111     }
3112     return(0);
3113 }
3114
3115 static int
3116 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3117     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3118         /*
3119          * Use the new checks of production [4] [4a] amd [5] of the
3120          * Update 5 of XML-1.0
3121          */
3122         if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3123             (((c >= 'a') && (c <= 'z')) ||
3124              ((c >= 'A') && (c <= 'Z')) ||
3125              ((c >= '0') && (c <= '9')) || /* !start */
3126              (c == '_') || (c == ':') ||
3127              (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3128              ((c >= 0xC0) && (c <= 0xD6)) ||
3129              ((c >= 0xD8) && (c <= 0xF6)) ||
3130              ((c >= 0xF8) && (c <= 0x2FF)) ||
3131              ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3132              ((c >= 0x370) && (c <= 0x37D)) ||
3133              ((c >= 0x37F) && (c <= 0x1FFF)) ||
3134              ((c >= 0x200C) && (c <= 0x200D)) ||
3135              ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3136              ((c >= 0x2070) && (c <= 0x218F)) ||
3137              ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3138              ((c >= 0x3001) && (c <= 0xD7FF)) ||
3139              ((c >= 0xF900) && (c <= 0xFDCF)) ||
3140              ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3141              ((c >= 0x10000) && (c <= 0xEFFFF))))
3142              return(1);
3143     } else {
3144         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3145             (c == '.') || (c == '-') ||
3146             (c == '_') || (c == ':') ||
3147             (IS_COMBINING(c)) ||
3148             (IS_EXTENDER(c)))
3149             return(1);
3150     }
3151     return(0);
3152 }
3153
3154 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3155                                           int *len, int *alloc, int normalize);
3156
3157 static const xmlChar *
3158 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3159     int len = 0, l;
3160     int c;
3161     int count = 0;
3162
3163 #ifdef DEBUG
3164     nbParseNameComplex++;
3165 #endif
3166
3167     /*
3168      * Handler for more complex cases
3169      */
3170     GROW;
3171     c = CUR_CHAR(l);
3172     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3173         /*
3174          * Use the new checks of production [4] [4a] amd [5] of the
3175          * Update 5 of XML-1.0
3176          */
3177         if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3178             (!(((c >= 'a') && (c <= 'z')) ||
3179                ((c >= 'A') && (c <= 'Z')) ||
3180                (c == '_') || (c == ':') ||
3181                ((c >= 0xC0) && (c <= 0xD6)) ||
3182                ((c >= 0xD8) && (c <= 0xF6)) ||
3183                ((c >= 0xF8) && (c <= 0x2FF)) ||
3184                ((c >= 0x370) && (c <= 0x37D)) ||
3185                ((c >= 0x37F) && (c <= 0x1FFF)) ||
3186                ((c >= 0x200C) && (c <= 0x200D)) ||
3187                ((c >= 0x2070) && (c <= 0x218F)) ||
3188                ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3189                ((c >= 0x3001) && (c <= 0xD7FF)) ||
3190                ((c >= 0xF900) && (c <= 0xFDCF)) ||
3191                ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3192                ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3193             return(NULL);
3194         }
3195         len += l;
3196         NEXTL(l);
3197         c = CUR_CHAR(l);
3198         while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3199                (((c >= 'a') && (c <= 'z')) ||
3200                 ((c >= 'A') && (c <= 'Z')) ||
3201                 ((c >= '0') && (c <= '9')) || /* !start */
3202                 (c == '_') || (c == ':') ||
3203                 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3204                 ((c >= 0xC0) && (c <= 0xD6)) ||
3205                 ((c >= 0xD8) && (c <= 0xF6)) ||
3206                 ((c >= 0xF8) && (c <= 0x2FF)) ||
3207                 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3208                 ((c >= 0x370) && (c <= 0x37D)) ||
3209                 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3210                 ((c >= 0x200C) && (c <= 0x200D)) ||
3211                 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3212                 ((c >= 0x2070) && (c <= 0x218F)) ||
3213                 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3214                 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3215                 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3216                 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3217                 ((c >= 0x10000) && (c <= 0xEFFFF))
3218                 )) {
3219             if (count++ > 100) {
3220                 count = 0;
3221                 GROW;
3222             }
3223             len += l;
3224             NEXTL(l);
3225             c = CUR_CHAR(l);
3226         }
3227     } else {
3228         if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3229             (!IS_LETTER(c) && (c != '_') &&
3230              (c != ':'))) {
3231             return(NULL);
3232         }
3233         len += l;
3234         NEXTL(l);
3235         c = CUR_CHAR(l);
3236
3237         while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3238                ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3239                 (c == '.') || (c == '-') ||
3240                 (c == '_') || (c == ':') ||
3241                 (IS_COMBINING(c)) ||
3242                 (IS_EXTENDER(c)))) {
3243             if (count++ > 100) {
3244                 count = 0;
3245                 GROW;
3246             }
3247             len += l;
3248             NEXTL(l);
3249             c = CUR_CHAR(l);
3250         }
3251     }
3252     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3253         return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3254     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3255 }
3256
3257 /**
3258  * xmlParseName:
3259  * @ctxt:  an XML parser context
3260  *
3261  * parse an XML name.
3262  *
3263  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3264  *                  CombiningChar | Extender
3265  *
3266  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3267  *
3268  * [6] Names ::= Name (#x20 Name)*
3269  *
3270  * Returns the Name parsed or NULL
3271  */
3272
3273 const xmlChar *
3274 xmlParseName(xmlParserCtxtPtr ctxt) {
3275     const xmlChar *in;
3276     const xmlChar *ret;
3277     int count = 0;
3278
3279     GROW;
3280
3281 #ifdef DEBUG
3282     nbParseName++;
3283 #endif
3284
3285     /*
3286      * Accelerator for simple ASCII names
3287      */
3288     in = ctxt->input->cur;
3289     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3290         ((*in >= 0x41) && (*in <= 0x5A)) ||
3291         (*in == '_') || (*in == ':')) {
3292         in++;
3293         while (((*in >= 0x61) && (*in <= 0x7A)) ||
3294                ((*in >= 0x41) && (*in <= 0x5A)) ||
3295                ((*in >= 0x30) && (*in <= 0x39)) ||
3296                (*in == '_') || (*in == '-') ||
3297                (*in == ':') || (*in == '.'))
3298             in++;
3299         if ((*in > 0) && (*in < 0x80)) {
3300             count = in - ctxt->input->cur;
3301             ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3302             ctxt->input->cur = in;
3303             ctxt->nbChars += count;
3304             ctxt->input->col += count;
3305             if (ret == NULL)
3306                 xmlErrMemory(ctxt, NULL);
3307             return(ret);
3308         }
3309     }
3310     /* accelerator for special cases */
3311     return(xmlParseNameComplex(ctxt));
3312 }
3313
3314 static const xmlChar *
3315 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3316     int len = 0, l;
3317     int c;
3318     int count = 0;
3319
3320 #ifdef DEBUG
3321     nbParseNCNameComplex++;
3322 #endif
3323
3324     /*
3325      * Handler for more complex cases
3326      */
3327     GROW;
3328     c = CUR_CHAR(l);
3329     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3330         (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3331         return(NULL);
3332     }
3333
3334     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3335            (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3336         if (count++ > 100) {
3337             count = 0;
3338             GROW;
3339         }
3340         len += l;
3341         NEXTL(l);
3342         c = CUR_CHAR(l);
3343     }
3344     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3345 }
3346
3347 /**
3348  * xmlParseNCName:
3349  * @ctxt:  an XML parser context
3350  * @len:  lenght of the string parsed
3351  *
3352  * parse an XML name.
3353  *
3354  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3355  *                      CombiningChar | Extender
3356  *
3357  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3358  *
3359  * Returns the Name parsed or NULL
3360  */
3361
3362 static const xmlChar *
3363 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3364     const xmlChar *in;
3365     const xmlChar *ret;
3366     int count = 0;
3367
3368 #ifdef DEBUG
3369     nbParseNCName++;
3370 #endif
3371
3372     /*
3373      * Accelerator for simple ASCII names
3374      */
3375     in = ctxt->input->cur;
3376     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3377         ((*in >= 0x41) && (*in <= 0x5A)) ||
3378         (*in == '_')) {
3379         in++;
3380         while (((*in >= 0x61) && (*in <= 0x7A)) ||
3381                ((*in >= 0x41) && (*in <= 0x5A)) ||
3382                ((*in >= 0x30) && (*in <= 0x39)) ||
3383                (*in == '_') || (*in == '-') ||
3384                (*in == '.'))
3385             in++;
3386         if ((*in > 0) && (*in < 0x80)) {
3387             count = in - ctxt->input->cur;
3388             ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3389             ctxt->input->cur = in;
3390             ctxt->nbChars += count;
3391             ctxt->input->col += count;
3392             if (ret == NULL) {
3393                 xmlErrMemory(ctxt, NULL);
3394             }
3395             return(ret);
3396         }
3397     }
3398     return(xmlParseNCNameComplex(ctxt));
3399 }
3400
3401 /**
3402  * xmlParseNameAndCompare:
3403  * @ctxt:  an XML parser context
3404  *
3405  * parse an XML name and compares for match
3406  * (specialized for endtag parsing)
3407  *
3408  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3409  * and the name for mismatch
3410  */
3411
3412 static const xmlChar *
3413 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3414     register const xmlChar *cmp = other;
3415     register const xmlChar *in;
3416     const xmlChar *ret;
3417
3418     GROW;
3419
3420     in = ctxt->input->cur;
3421     while (*in != 0 && *in == *cmp) {
3422         ++in;
3423         ++cmp;
3424         ctxt->input->col++;
3425     }
3426     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3427         /* success */
3428         ctxt->input->cur = in;
3429         return (const xmlChar*) 1;
3430     }
3431     /* failure (or end of input buffer), check with full function */
3432     ret = xmlParseName (ctxt);
3433     /* strings coming from the dictionnary direct compare possible */
3434     if (ret == other) {
3435         return (const xmlChar*) 1;
3436     }
3437     return ret;
3438 }
3439
3440 /**
3441  * xmlParseStringName:
3442  * @ctxt:  an XML parser context
3443  * @str:  a pointer to the string pointer (IN/OUT)
3444  *
3445  * parse an XML name.
3446  *
3447  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3448  *                  CombiningChar | Extender
3449  *
3450  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3451  *
3452  * [6] Names ::= Name (#x20 Name)*
3453  *
3454  * Returns the Name parsed or NULL. The @str pointer
3455  * is updated to the current location in the string.
3456  */
3457
3458 static xmlChar *
3459 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3460     xmlChar buf[XML_MAX_NAMELEN + 5];
3461     const xmlChar *cur = *str;
3462     int len = 0, l;
3463     int c;
3464
3465 #ifdef DEBUG
3466     nbParseStringName++;
3467 #endif
3468
3469     c = CUR_SCHAR(cur, l);
3470     if (!xmlIsNameStartChar(ctxt, c)) {
3471         return(NULL);
3472     }
3473
3474     COPY_BUF(l,buf,len,c);
3475     cur += l;
3476     c = CUR_SCHAR(cur, l);
3477     while (xmlIsNameChar(ctxt, c)) {
3478         COPY_BUF(l,buf,len,c);
3479         cur += l;
3480         c = CUR_SCHAR(cur, l);
3481         if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3482             /*
3483              * Okay someone managed to make a huge name, so he's ready to pay
3484              * for the processing speed.
3485              */
3486             xmlChar *buffer;
3487             int max = len * 2;
3488
3489             buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3490             if (buffer == NULL) {
3491                 xmlErrMemory(ctxt, NULL);
3492                 return(NULL);
3493             }
3494             memcpy(buffer, buf, len);
3495             while (xmlIsNameChar(ctxt, c)) {
3496                 if (len + 10 > max) {
3497                     xmlChar *tmp;
3498                     max *= 2;
3499                     tmp = (xmlChar *) xmlRealloc(buffer,
3500                                                     max * sizeof(xmlChar));
3501                     if (tmp == NULL) {
3502                         xmlErrMemory(ctxt, NULL);
3503                         xmlFree(buffer);
3504                         return(NULL);
3505                     }
3506                     buffer = tmp;
3507                 }
3508                 COPY_BUF(l,buffer,len,c);
3509                 cur += l;
3510                 c = CUR_SCHAR(cur, l);
3511             }
3512             buffer[len] = 0;
3513             *str = cur;
3514             return(buffer);
3515         }
3516     }
3517     *str = cur;
3518     return(xmlStrndup(buf, len));
3519 }
3520
3521 /**
3522  * xmlParseNmtoken:
3523  * @ctxt:  an XML parser context
3524  *
3525  * parse an XML Nmtoken.
3526  *
3527  * [7] Nmtoken ::= (NameChar)+
3528  *
3529  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3530  *
3531  * Returns the Nmtoken parsed or NULL
3532  */
3533
3534 xmlChar *
3535 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3536     xmlChar buf[XML_MAX_NAMELEN + 5];
3537     int len = 0, l;
3538     int c;
3539     int count = 0;
3540
3541 #ifdef DEBUG
3542     nbParseNmToken++;
3543 #endif
3544
3545     GROW;
3546     c = CUR_CHAR(l);
3547
3548     while (xmlIsNameChar(ctxt, c)) {
3549         if (count++ > 100) {
3550             count = 0;
3551             GROW;
3552         }
3553         COPY_BUF(l,buf,len,c);
3554         NEXTL(l);
3555         c = CUR_CHAR(l);
3556         if (len >= XML_MAX_NAMELEN) {
3557             /*
3558              * Okay someone managed to make a huge token, so he's ready to pay
3559              * for the processing speed.
3560              */
3561             xmlChar *buffer;
3562             int max = len * 2;
3563
3564             buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3565             if (buffer == NULL) {
3566                 xmlErrMemory(ctxt, NULL);
3567                 return(NULL);
3568             }
3569             memcpy(buffer, buf, len);
3570             while (xmlIsNameChar(ctxt, c)) {
3571                 if (count++ > 100) {
3572                     count = 0;
3573                     GROW;
3574                 }
3575                 if (len + 10 > max) {
3576                     xmlChar *tmp;
3577
3578                     max *= 2;
3579                     tmp = (xmlChar *) xmlRealloc(buffer,
3580                                                     max * sizeof(xmlChar));
3581                     if (tmp == NULL) {
3582                         xmlErrMemory(ctxt, NULL);
3583                         xmlFree(buffer);
3584                         return(NULL);
3585                     }
3586                     buffer = tmp;
3587                 }
3588                 COPY_BUF(l,buffer,len,c);
3589                 NEXTL(l);
3590                 c = CUR_CHAR(l);
3591             }
3592             buffer[len] = 0;
3593             return(buffer);
3594         }
3595     }
3596     if (len == 0)
3597         return(NULL);
3598     return(xmlStrndup(buf, len));
3599 }
3600
3601 /**
3602  * xmlParseEntityValue:
3603  * @ctxt:  an XML parser context
3604  * @orig:  if non-NULL store a copy of the original entity value
3605  *
3606  * parse a value for ENTITY declarations
3607  *
3608  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3609  *                     "'" ([^%&'] | PEReference | Reference)* "'"
3610  *
3611  * Returns the EntityValue parsed with reference substituted or NULL
3612  */
3613
3614 xmlChar *
3615 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3616     xmlChar *buf = NULL;
3617     int len = 0;
3618     int size = XML_PARSER_BUFFER_SIZE;
3619     int c, l;
3620     xmlChar stop;
3621     xmlChar *ret = NULL;
3622     const xmlChar *cur = NULL;
3623     xmlParserInputPtr input;
3624
3625     if (RAW == '"') stop = '"';
3626     else if (RAW == '\'') stop = '\'';
3627     else {
3628         xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3629         return(NULL);
3630     }
3631     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3632     if (buf == NULL) {
3633         xmlErrMemory(ctxt, NULL);
3634         return(NULL);
3635     }
3636
3637     /*
3638      * The content of the entity definition is copied in a buffer.
3639      */
3640
3641     ctxt->instate = XML_PARSER_ENTITY_VALUE;
3642     input = ctxt->input;
3643     GROW;
3644     NEXT;
3645     c = CUR_CHAR(l);
3646     /*
3647      * NOTE: 4.4.5 Included in Literal
3648      * When a parameter entity reference appears in a literal entity
3649      * value, ... a single or double quote character in the replacement
3650      * text is always treated as a normal data character and will not
3651      * terminate the literal.
3652      * In practice it means we stop the loop only when back at parsing
3653      * the initial entity and the quote is found
3654      */
3655     while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3656            (ctxt->input != input))) {
3657         if (len + 5 >= size) {
3658             xmlChar *tmp;
3659
3660             size *= 2;
3661             tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3662             if (tmp == NULL) {
3663                 xmlErrMemory(ctxt, NULL);
3664                 xmlFree(buf);
3665                 return(NULL);
3666             }
3667             buf = tmp;
3668         }
3669         COPY_BUF(l,buf,len,c);
3670         NEXTL(l);
3671         /*
3672          * Pop-up of finished entities.
3673          */
3674         while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3675             xmlPopInput(ctxt);
3676
3677         GROW;
3678         c = CUR_CHAR(l);
3679         if (c == 0) {
3680             GROW;
3681             c = CUR_CHAR(l);
3682         }
3683     }
3684     buf[len] = 0;
3685
3686     /*
3687      * Raise problem w.r.t. '&' and '%' being used in non-entities
3688      * reference constructs. Note Charref will be handled in
3689      * xmlStringDecodeEntities()
3690      */
3691     cur = buf;
3692     while (*cur != 0) { /* non input consuming */
3693         if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3694             xmlChar *name;
3695             xmlChar tmp = *cur;
3696
3697             cur++;
3698             name = xmlParseStringName(ctxt, &cur);
3699             if ((name == NULL) || (*cur != ';')) {
3700                 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3701             "EntityValue: '%c' forbidden except for entities references\n",
3702                                   tmp);
3703             }
3704             if ((tmp == '%') && (ctxt->inSubset == 1) &&
3705                 (ctxt->inputNr == 1)) {
3706                 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3707             }
3708             if (name != NULL)
3709                 xmlFree(name);
3710             if (*cur == 0)
3711                 break;
3712         }
3713         cur++;
3714     }
3715
3716     /*
3717      * Then PEReference entities are substituted.
3718      */
3719     if (c != stop) {
3720         xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3721         xmlFree(buf);
3722     } else {
3723         NEXT;
3724         /*
3725          * NOTE: 4.4.7 Bypassed
3726          * When a general entity reference appears in the EntityValue in
3727          * an entity declaration, it is bypassed and left as is.
3728          * so XML_SUBSTITUTE_REF is not set here.
3729          */
3730         ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3731                                       0, 0, 0);
3732         if (orig != NULL)
3733             *orig = buf;
3734         else
3735             xmlFree(buf);
3736     }
3737
3738     return(ret);
3739 }
3740
3741 /**
3742  * xmlParseAttValueComplex:
3743  * @ctxt:  an XML parser context
3744  * @len:   the resulting attribute len
3745  * @normalize:  wether to apply the inner normalization
3746  *
3747  * parse a value for an attribute, this is the fallback function
3748  * of xmlParseAttValue() when the attribute parsing requires handling
3749  * of non-ASCII characters, or normalization compaction.
3750  *
3751  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3752  */
3753 static xmlChar *
3754 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3755     xmlChar limit = 0;
3756     xmlChar *buf = NULL;
3757     xmlChar *rep = NULL;
3758     int len = 0;
3759     int buf_size = 0;
3760     int c, l, in_space = 0;
3761     xmlChar *current = NULL;
3762     xmlEntityPtr ent;
3763
3764     if (NXT(0) == '"') {
3765         ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3766         limit = '"';
3767         NEXT;
3768     } else if (NXT(0) == '\'') {
3769         limit = '\'';
3770         ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3771         NEXT;
3772     } else {
3773         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3774         return(NULL);
3775     }
3776
3777     /*
3778      * allocate a translation buffer.
3779      */
3780     buf_size = XML_PARSER_BUFFER_SIZE;
3781     buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3782     if (buf == NULL) goto mem_error;
3783
3784     /*
3785      * OK loop until we reach one of the ending char or a size limit.
3786      */
3787     c = CUR_CHAR(l);
3788     while ((NXT(0) != limit) && /* checked */
3789            (IS_CHAR(c)) && (c != '<')) {
3790         if (c == 0) break;
3791         if (c == '&') {
3792             in_space = 0;
3793             if (NXT(1) == '#') {
3794                 int val = xmlParseCharRef(ctxt);
3795
3796                 if (val == '&') {
3797                     if (ctxt->replaceEntities) {
3798                         if (len > buf_size - 10) {
3799                             growBuffer(buf, 10);
3800                         }
3801                         buf[len++] = '&';
3802                     } else {
3803                         /*
3804                          * The reparsing will be done in xmlStringGetNodeList()
3805                          * called by the attribute() function in SAX.c
3806                          */
3807                         if (len > buf_size - 10) {
3808                             growBuffer(buf, 10);
3809                         }
3810                         buf[len++] = '&';
3811                         buf[len++] = '#';
3812                         buf[len++] = '3';
3813                         buf[len++] = '8';
3814                         buf[len++] = ';';
3815                     }
3816                 } else if (val != 0) {
3817                     if (len > buf_size - 10) {
3818                         growBuffer(buf, 10);
3819                     }
3820                     len += xmlCopyChar(0, &buf[len], val);
3821                 }
3822             } else {
3823                 ent = xmlParseEntityRef(ctxt);
3824                 ctxt->nbentities++;
3825                 if (ent != NULL)
3826                     ctxt->nbentities += ent->owner;
3827                 if ((ent != NULL) &&
3828                     (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3829                     if (len > buf_size - 10) {
3830                         growBuffer(buf, 10);
3831                     }
3832                     if ((ctxt->replaceEntities == 0) &&
3833                         (ent->content[0] == '&')) {
3834                         buf[len++] = '&';
3835                         buf[len++] = '#';
3836                         buf[len++] = '3';
3837                         buf[len++] = '8';
3838                         buf[len++] = ';';
3839                     } else {
3840                         buf[len++] = ent->content[0];
3841                     }
3842                 } else if ((ent != NULL) &&
3843                            (ctxt->replaceEntities != 0)) {
3844                     if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3845                         rep = xmlStringDecodeEntities(ctxt, ent->content,
3846                                                       XML_SUBSTITUTE_REF,
3847                                                       0, 0, 0);
3848                         if (rep != NULL) {
3849                             current = rep;
3850                             while (*current != 0) { /* non input consuming */
3851                                 if ((*current == 0xD) || (*current == 0xA) ||
3852                                     (*current == 0x9)) {
3853                                     buf[len++] = 0x20;
3854                                     current++;
3855                                 } else
3856                                     buf[len++] = *current++;
3857                                 if (len > buf_size - 10) {
3858                                     growBuffer(buf, 10);
3859                                 }
3860                             }
3861                             xmlFree(rep);
3862                             rep = NULL;
3863                         }
3864                     } else {
3865                         if (len > buf_size - 10) {
3866                             growBuffer(buf, 10);
3867                         }
3868                         if (ent->content != NULL)
3869                             buf[len++] = ent->content[0];
3870                     }
3871                 } else if (ent != NULL) {
3872                     int i = xmlStrlen(ent->name);
3873                     const xmlChar *cur = ent->name;
3874
3875                     /*
3876                      * This may look absurd but is needed to detect
3877                      * entities problems
3878                      */
3879                     if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3880                         (ent->content != NULL)) {
3881                         rep = xmlStringDecodeEntities(ctxt, ent->content,
3882                                                   XML_SUBSTITUTE_REF, 0, 0, 0);
3883                         if (rep != NULL) {
3884                             xmlFree(rep);
3885                             rep = NULL;
3886                         }
3887                     }
3888
3889                     /*
3890                      * Just output the reference
3891                      */
3892                     buf[len++] = '&';
3893                     while (len > buf_size - i - 10) {
3894                         growBuffer(buf, i + 10);
3895                     }
3896                     for (;i > 0;i--)
3897                         buf[len++] = *cur++;
3898                     buf[len++] = ';';
3899                 }
3900             }
3901         } else {
3902             if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3903                 if ((len != 0) || (!normalize)) {
3904                     if ((!normalize) || (!in_space)) {
3905                         COPY_BUF(l,buf,len,0x20);
3906                         while (len > buf_size - 10) {
3907                             growBuffer(buf, 10);
3908                         }
3909                     }
3910                     in_space = 1;
3911                 }
3912             } else {
3913                 in_space = 0;
3914                 COPY_BUF(l,buf,len,c);
3915                 if (len > buf_size - 10) {
3916                     growBuffer(buf, 10);
3917                 }
3918             }
3919             NEXTL(l);
3920         }
3921         GROW;
3922         c = CUR_CHAR(l);
3923     }
3924     if ((in_space) && (normalize)) {
3925         while (buf[len - 1] == 0x20) len--;
3926     }
3927     buf[len] = 0;
3928     if (RAW == '<') {
3929         xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3930     } else if (RAW != limit) {
3931         if ((c != 0) && (!IS_CHAR(c))) {
3932             xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3933                            "invalid character in attribute value\n");
3934         } else {
3935             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3936                            "AttValue: ' expected\n");
3937         }
3938     } else
3939         NEXT;
3940     if (attlen != NULL) *attlen = len;
3941     return(buf);
3942
3943 mem_error:
3944     xmlErrMemory(ctxt, NULL);
3945     if (buf != NULL)
3946         xmlFree(buf);
3947     if (rep != NULL)
3948         xmlFree(rep);
3949     return(NULL);
3950 }
3951
3952 /**
3953  * xmlParseAttValue:
3954  * @ctxt:  an XML parser context
3955  *
3956  * parse a value for an attribute
3957  * Note: the parser won't do substitution of entities here, this
3958  * will be handled later in xmlStringGetNodeList
3959  *
3960  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3961  *                   "'" ([^<&'] | Reference)* "'"
3962  *
3963  * 3.3.3 Attribute-Value Normalization:
3964  * Before the value of an attribute is passed to the application or
3965  * checked for validity, the XML processor must normalize it as follows:
3966  * - a character reference is processed by appending the referenced
3967  *   character to the attribute value
3968  * - an entity reference is processed by recursively processing the
3969  *   replacement text of the entity
3970  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3971  *   appending #x20 to the normalized value, except that only a single
3972  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
3973  *   parsed entity or the literal entity value of an internal parsed entity
3974  * - other characters are processed by appending them to the normalized value
3975  * If the declared value is not CDATA, then the XML processor must further
3976  * process the normalized attribute value by discarding any leading and
3977  * trailing space (#x20) characters, and by replacing sequences of space
3978  * (#x20) characters by a single space (#x20) character.
3979  * All attributes for which no declaration has been read should be treated
3980  * by a non-validating parser as if declared CDATA.
3981  *
3982  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3983  */
3984
3985
3986 xmlChar *
3987 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3988     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3989     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3990 }
3991
3992 /**
3993  * xmlParseSystemLiteral:
3994  * @ctxt:  an XML parser context
3995  *
3996  * parse an XML Literal
3997  *
3998  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3999  *
4000  * Returns the SystemLiteral parsed or NULL
4001  */
4002
4003 xmlChar *
4004 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4005     xmlChar *buf = NULL;
4006     int len = 0;
4007     int size = XML_PARSER_BUFFER_SIZE;
4008     int cur, l;
4009     xmlChar stop;
4010     int state = ctxt->instate;
4011     int count = 0;
4012
4013     SHRINK;
4014     if (RAW == '"') {
4015         NEXT;
4016         stop = '"';
4017     } else if (RAW == '\'') {
4018         NEXT;
4019         stop = '\'';
4020     } else {
4021         xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4022         return(NULL);
4023     }
4024
4025     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4026     if (buf == NULL) {
4027         xmlErrMemory(ctxt, NULL);
4028         return(NULL);
4029     }
4030     ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4031     cur = CUR_CHAR(l);
4032     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4033         if (len + 5 >= size) {
4034             xmlChar *tmp;
4035
4036             size *= 2;
4037             tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4038             if (tmp == NULL) {
4039                 xmlFree(buf);
4040                 xmlErrMemory(ctxt, NULL);
4041                 ctxt->instate = (xmlParserInputState) state;
4042                 return(NULL);
4043             }
4044             buf = tmp;
4045         }
4046         count++;
4047         if (count > 50) {
4048             GROW;
4049             count = 0;
4050         }
4051         COPY_BUF(l,buf,len,cur);
4052         NEXTL(l);
4053         cur = CUR_CHAR(l);
4054         if (cur == 0) {
4055             GROW;
4056             SHRINK;
4057             cur = CUR_CHAR(l);
4058         }
4059     }
4060     buf[len] = 0;
4061     ctxt->instate = (xmlParserInputState) state;
4062     if (!IS_CHAR(cur)) {
4063         xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4064     } else {
4065         NEXT;
4066     }
4067     return(buf);
4068 }
4069
4070 /**
4071  * xmlParsePubidLiteral:
4072  * @ctxt:  an XML parser context
4073  *
4074  * parse an XML public literal
4075  *
4076  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4077  *
4078  * Returns the PubidLiteral parsed or NULL.
4079  */
4080
4081 xmlChar *
4082 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4083     xmlChar *buf = NULL;
4084     int len = 0;
4085     int size = XML_PARSER_BUFFER_SIZE;
4086     xmlChar cur;
4087     xmlChar stop;
4088     int count = 0;
4089     xmlParserInputState oldstate = ctxt->instate;
4090
4091     SHRINK;
4092     if (RAW == '"') {
4093         NEXT;
4094         stop = '"';
4095     } else if (RAW == '\'') {
4096         NEXT;
4097         stop = '\'';
4098     } else {
4099         xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4100         return(NULL);
4101     }
4102     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4103     if (buf == NULL) {
4104         xmlErrMemory(ctxt, NULL);
4105         return(NULL);
4106     }
4107     ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4108     cur = CUR;
4109     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4110         if (len + 1 >= size) {
4111             xmlChar *tmp;
4112
4113             size *= 2;
4114             tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4115             if (tmp == NULL) {
4116                 xmlErrMemory(ctxt, NULL);
4117                 xmlFree(buf);
4118                 return(NULL);
4119             }
4120             buf = tmp;
4121         }
4122         buf[len++] = cur;
4123         count++;
4124         if (count > 50) {
4125             GROW;
4126             count = 0;
4127         }
4128         NEXT;
4129         cur = CUR;
4130         if (cur == 0) {
4131             GROW;
4132             SHRINK;
4133             cur = CUR;
4134         }
4135     }
4136     buf[len] = 0;
4137     if (cur != stop) {
4138         xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4139     } else {
4140         NEXT;
4141     }
4142     ctxt->instate = oldstate;
4143     return(buf);
4144 }
4145
4146 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4147
4148 /*
4149  * used for the test in the inner loop of the char data testing
4150  */
4151 static const unsigned char test_char_data[256] = {
4152     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4153     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4154     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4155     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4156     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4157     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4158     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4159     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4160     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4161     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4162     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4163     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4164     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4165     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4166     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4167     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4168     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4169     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4170     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4171     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4172     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4173     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4174     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4175     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4176     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4177     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4178     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4179     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4180     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4181     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4182     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4183     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4184 };
4185
4186 /**
4187  * xmlParseCharData:
4188  * @ctxt:  an XML parser context
4189  * @cdata:  int indicating whether we are within a CDATA section
4190  *
4191  * parse a CharData section.
4192  * if we are within a CDATA section ']]>' marks an end of section.
4193  *
4194  * The right angle bracket (>) may be represented using the string "&gt;",
4195  * and must, for compatibility, be escaped using "&gt;" or a character
4196  * reference when it appears in the string "]]>" in content, when that
4197  * string is not marking the end of a CDATA section.
4198  *
4199  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4200  */
4201
4202 void
4203 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4204     const xmlChar *in;
4205     int nbchar = 0;
4206     int line = ctxt->input->line;
4207     int col = ctxt->input->col;
4208     int ccol;
4209
4210     SHRINK;
4211     GROW;
4212     /*
4213      * Accelerated common case where input don't need to be
4214      * modified before passing it to the handler.
4215      */
4216     if (!cdata) {
4217         in = ctxt->input->cur;
4218         do {
4219 get_more_space:
4220             while (*in == 0x20) { in++; ctxt->input->col++; }
4221             if (*in == 0xA) {
4222                 do {
4223                     ctxt->input->line++; ctxt->input->col = 1;
4224                     in++;
4225                 } while (*in == 0xA);
4226                 goto get_more_space;
4227             }
4228             if (*in == '<') {
4229                 nbchar = in - ctxt->input->cur;
4230                 if (nbchar > 0) {
4231                     const xmlChar *tmp = ctxt->input->cur;
4232                     ctxt->input->cur = in;
4233
4234                     if ((ctxt->sax != NULL) &&
4235                         (ctxt->sax->ignorableWhitespace !=
4236                          ctxt->sax->characters)) {
4237                         if (areBlanks(ctxt, tmp, nbchar, 1)) {
4238                             if (ctxt->sax->ignorableWhitespace != NULL)
4239                                 ctxt->sax->ignorableWhitespace(ctxt->userData,
4240                                                        tmp, nbchar);
4241                         } else {
4242                             if (ctxt->sax->characters != NULL)
4243                                 ctxt->sax->characters(ctxt->userData,
4244                                                       tmp, nbchar);
4245                             if (*ctxt->space == -1)
4246                                 *ctxt->space = -2;
4247                         }
4248                     } else if ((ctxt->sax != NULL) &&
4249                                (ctxt->sax->characters != NULL)) {
4250                         ctxt->sax->characters(ctxt->userData,
4251                                               tmp, nbchar);
4252                     }
4253                 }
4254                 return;
4255             }
4256
4257 get_more:
4258             ccol = ctxt->input->col;
4259             while (test_char_data[*in]) {
4260                 in++;
4261                 ccol++;
4262             }
4263             ctxt->input->col = ccol;
4264             if (*in == 0xA) {
4265                 do {
4266                     ctxt->input->line++; ctxt->input->col = 1;
4267                     in++;
4268                 } while (*in == 0xA);
4269                 goto get_more;
4270             }
4271             if (*in == ']') {
4272                 if ((in[1] == ']') && (in[2] == '>')) {
4273                     xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4274                     ctxt->input->cur = in;
4275                     return;
4276                 }
4277                 in++;
4278                 ctxt->input->col++;
4279                 goto get_more;
4280             }
4281             nbchar = in - ctxt->input->cur;
4282             if (nbchar > 0) {
4283                 if ((ctxt->sax != NULL) &&
4284                     (ctxt->sax->ignorableWhitespace !=
4285                      ctxt->sax->characters) &&
4286                     (IS_BLANK_CH(*ctxt->input->cur))) {
4287                     const xmlChar *tmp = ctxt->input->cur;
4288                     ctxt->input->cur = in;
4289
4290                     if (areBlanks(ctxt, tmp, nbchar, 0)) {
4291                         if (ctxt->sax->ignorableWhitespace != NULL)
4292                             ctxt->sax->ignorableWhitespace(ctxt->userData,
4293                                                            tmp, nbchar);
4294                     } else {
4295                         if (ctxt->sax->characters != NULL)
4296                             ctxt->sax->characters(ctxt->userData,
4297                                                   tmp, nbchar);
4298                         if (*ctxt->space == -1)
4299                             *ctxt->space = -2;
4300                     }
4301                     line = ctxt->input->line;
4302                     col = ctxt->input->col;
4303                 } else if (ctxt->sax != NULL) {
4304                     if (ctxt->sax->characters != NULL)
4305                         ctxt->sax->characters(ctxt->userData,
4306                                               ctxt->input->cur, nbchar);
4307                     line = ctxt->input->line;
4308                     col = ctxt->input->col;
4309                 }
4310                 /* something really bad happened in the SAX callback */
4311                 if (ctxt->instate != XML_PARSER_CONTENT)
4312                     return;
4313             }
4314             ctxt->input->cur = in;
4315             if (*in == 0xD) {
4316                 in++;
4317                 if (*in == 0xA) {
4318                     ctxt->input->cur = in;
4319                     in++;
4320                     ctxt->input->line++; ctxt->input->col = 1;
4321                     continue; /* while */
4322                 }
4323                 in--;
4324             }
4325             if (*in == '<') {
4326                 return;
4327             }
4328             if (*in == '&') {
4329                 return;
4330             }
4331             SHRINK;
4332             GROW;
4333             in = ctxt->input->cur;
4334         } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4335         nbchar = 0;
4336     }
4337     ctxt->input->line = line;
4338     ctxt->input->col = col;
4339     xmlParseCharDataComplex(ctxt, cdata);
4340 }
4341
4342 /**
4343  * xmlParseCharDataComplex:
4344  * @ctxt:  an XML parser context
4345  * @cdata:  int indicating whether we are within a CDATA section
4346  *
4347  * parse a CharData section.this is the fallback function
4348  * of xmlParseCharData() when the parsing requires handling
4349  * of non-ASCII characters.
4350  */
4351 static void
4352 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4353     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4354     int nbchar = 0;
4355     int cur, l;
4356     int count = 0;
4357
4358     SHRINK;
4359     GROW;
4360     cur = CUR_CHAR(l);
4361     while ((cur != '<') && /* checked */
4362            (cur != '&') &&
4363            (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4364         if ((cur == ']') && (NXT(1) == ']') &&
4365             (NXT(2) == '>')) {
4366             if (cdata) break;
4367             else {
4368                 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4369             }
4370         }
4371         COPY_BUF(l,buf,nbchar,cur);
4372         if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4373             buf[nbchar] = 0;
4374
4375             /*
4376              * OK the segment is to be consumed as chars.
4377              */
4378             if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4379                 if (areBlanks(ctxt, buf, nbchar, 0)) {
4380                     if (ctxt->sax->ignorableWhitespace != NULL)
4381                         ctxt->sax->ignorableWhitespace(ctxt->userData,
4382                                                        buf, nbchar);
4383                 } else {
4384                     if (ctxt->sax->characters != NULL)
4385                         ctxt->sax->characters(ctxt->userData, buf, nbchar);
4386                     if ((ctxt->sax->characters !=
4387                          ctxt->sax->ignorableWhitespace) &&
4388                         (*ctxt->space == -1))
4389                         *ctxt->space = -2;
4390                 }
4391             }
4392             nbchar = 0;
4393             /* something really bad happened in the SAX callback */
4394             if (ctxt->instate != XML_PARSER_CONTENT)
4395                 return;
4396         }
4397         count++;
4398         if (count > 50) {
4399             GROW;
4400             count = 0;
4401         }
4402         NEXTL(l);
4403         cur = CUR_CHAR(l);
4404     }
4405     if (nbchar != 0) {
4406         buf[nbchar] = 0;
4407         /*
4408          * OK the segment is to be consumed as chars.
4409          */
4410         if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4411             if (areBlanks(ctxt, buf, nbchar, 0)) {
4412                 if (ctxt->sax->ignorableWhitespace != NULL)
4413                     ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4414             } else {
4415                 if (ctxt->sax->characters != NULL)
4416                     ctxt->sax->characters(ctxt->userData, buf, nbchar);
4417                 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4418                     (*ctxt->space == -1))
4419                     *ctxt->space = -2;
4420             }
4421         }
4422     }
4423     if ((cur != 0) && (!IS_CHAR(cur))) {
4424         /* Generate the error and skip the offending character */
4425         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4426                           "PCDATA invalid Char value %d\n",
4427                           cur);
4428         NEXTL(l);
4429     }
4430 }
4431
4432 /**
4433  * xmlParseExternalID:
4434  * @ctxt:  an XML parser context
4435  * @publicID:  a xmlChar** receiving PubidLiteral
4436  * @strict: indicate whether we should restrict parsing to only
4437  *          production [75], see NOTE below
4438  *
4439  * Parse an External ID or a Public ID
4440  *
4441  * NOTE: Productions [75] and [83] interact badly since [75] can generate
4442  *       'PUBLIC' S PubidLiteral S SystemLiteral
4443  *
4444  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4445  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4446  *
4447  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4448  *
4449  * Returns the function returns SystemLiteral and in the second
4450  *                case publicID receives PubidLiteral, is strict is off
4451  *                it is possible to return NULL and have publicID set.
4452  */
4453
4454 xmlChar *
4455 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4456     xmlChar *URI = NULL;
4457
4458     SHRINK;
4459
4460     *publicID = NULL;
4461     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4462         SKIP(6);
4463         if (!IS_BLANK_CH(CUR)) {
4464             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4465                            "Space required after 'SYSTEM'\n");
4466         }
4467         SKIP_BLANKS;
4468         URI = xmlParseSystemLiteral(ctxt);
4469         if (URI == NULL) {
4470             xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4471         }
4472     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4473         SKIP(6);
4474         if (!IS_BLANK_CH(CUR)) {
4475             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4476                     "Space required after 'PUBLIC'\n");
4477         }
4478         SKIP_BLANKS;
4479         *publicID = xmlParsePubidLiteral(ctxt);
4480         if (*publicID == NULL) {
4481             xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4482         }
4483         if (strict) {
4484             /*
4485              * We don't handle [83] so "S SystemLiteral" is required.
4486              */
4487             if (!IS_BLANK_CH(CUR)) {
4488                 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4489                         "Space required after the Public Identifier\n");
4490             }
4491         } else {
4492             /*
4493              * We handle [83] so we return immediately, if
4494              * "S SystemLiteral" is not detected. From a purely parsing
4495              * point of view that's a nice mess.
4496              */
4497             const xmlChar *ptr;
4498             GROW;
4499
4500             ptr = CUR_PTR;
4501             if (!IS_BLANK_CH(*ptr)) return(NULL);
4502
4503             while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4504             if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4505         }
4506         SKIP_BLANKS;
4507         URI = xmlParseSystemLiteral(ctxt);
4508         if (URI == NULL) {
4509             xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4510         }
4511     }
4512     return(URI);
4513 }
4514
4515 /**
4516  * xmlParseCommentComplex:
4517  * @ctxt:  an XML parser context
4518  * @buf:  the already parsed part of the buffer
4519  * @len:  number of bytes filles in the buffer
4520  * @size:  allocated size of the buffer
4521  *
4522  * Skip an XML (SGML) comment <!-- .... -->
4523  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4524  *  must not occur within comments. "
4525  * This is the slow routine in case the accelerator for ascii didn't work
4526  *
4527  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4528  */
4529 static void
4530 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
4531     int q, ql;
4532     int r, rl;
4533     int cur, l;
4534     int count = 0;
4535     int inputid;
4536
4537     inputid = ctxt->input->id;
4538
4539     if (buf == NULL) {
4540         len = 0;
4541         size = XML_PARSER_BUFFER_SIZE;
4542         buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4543         if (buf == NULL) {
4544             xmlErrMemory(ctxt, NULL);
4545             return;
4546         }
4547     }
4548     GROW;       /* Assure there's enough input data */
4549     q = CUR_CHAR(ql);
4550     if (q == 0)
4551         goto not_terminated;
4552     if (!IS_CHAR(q)) {
4553         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4554                           "xmlParseComment: invalid xmlChar value %d\n",
4555                           q);
4556         xmlFree (buf);
4557         return;
4558     }
4559     NEXTL(ql);
4560     r = CUR_CHAR(rl);
4561     if (r == 0)
4562         goto not_terminated;
4563     if (!IS_CHAR(r)) {
4564         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4565                           "xmlParseComment: invalid xmlChar value %d\n",
4566                           q);
4567         xmlFree (buf);
4568         return;
4569     }
4570     NEXTL(rl);
4571     cur = CUR_CHAR(l);
4572     if (cur == 0)
4573         goto not_terminated;
4574     while (IS_CHAR(cur) && /* checked */
4575            ((cur != '>') ||
4576             (r != '-') || (q != '-'))) {
4577         if ((r == '-') && (q == '-')) {
4578             xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4579         }
4580         if (len + 5 >= size) {
4581             xmlChar *new_buf;
4582             size *= 2;
4583             new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4584             if (new_buf == NULL) {
4585                 xmlFree (buf);
4586                 xmlErrMemory(ctxt, NULL);
4587                 return;
4588             }
4589             buf = new_buf;
4590         }
4591         COPY_BUF(ql,buf,len,q);
4592         q = r;
4593         ql = rl;
4594         r = cur;
4595         rl = l;
4596
4597         count++;
4598         if (count > 50) {
4599             GROW;
4600             count = 0;
4601         }
4602         NEXTL(l);
4603         cur = CUR_CHAR(l);
4604         if (cur == 0) {
4605             SHRINK;
4606             GROW;
4607             cur = CUR_CHAR(l);
4608         }
4609     }
4610     buf[len] = 0;
4611     if (cur == 0) {
4612         xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4613                              "Comment not terminated \n<!--%.50s\n", buf);
4614     } else if (!IS_CHAR(cur)) {
4615         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4616                           "xmlParseComment: invalid xmlChar value %d\n",
4617                           cur);
4618     } else {
4619         if (inputid != ctxt->input->id) {
4620             xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4621                 "Comment doesn't start and stop in the same entity\n");
4622         }
4623         NEXT;
4624         if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4625             (!ctxt->disableSAX))
4626             ctxt->sax->comment(ctxt->userData, buf);
4627     }
4628     xmlFree(buf);
4629     return;
4630 not_terminated:
4631     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4632                          "Comment not terminated\n", NULL);
4633     xmlFree(buf);
4634     return;
4635 }
4636
4637 /**
4638  * xmlParseComment:
4639  * @ctxt:  an XML parser context
4640  *
4641  * Skip an XML (SGML) comment <!-- .... -->
4642  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4643  *  must not occur within comments. "
4644  *
4645  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4646  */
4647 void
4648 xmlParseComment(xmlParserCtxtPtr ctxt) {
4649     xmlChar *buf = NULL;
4650     int size = XML_PARSER_BUFFER_SIZE;
4651     int len = 0;
4652     xmlParserInputState state;
4653     const xmlChar *in;
4654     int nbchar = 0, ccol;
4655     int inputid;
4656
4657     /*
4658      * Check that there is a comment right here.
4659      */
4660     if ((RAW != '<') || (NXT(1) != '!') ||
4661         (NXT(2) != '-') || (NXT(3) != '-')) return;
4662     state = ctxt->instate;
4663     ctxt->instate = XML_PARSER_COMMENT;
4664     inputid = ctxt->input->id;
4665     SKIP(4);
4666     SHRINK;
4667     GROW;
4668
4669     /*
4670      * Accelerated common case where input don't need to be
4671      * modified before passing it to the handler.
4672      */
4673     in = ctxt->input->cur;
4674     do {
4675         if (*in == 0xA) {
4676             do {
4677                 ctxt->input->line++; ctxt->input->col = 1;
4678                 in++;
4679             } while (*in == 0xA);
4680         }
4681 get_more:
4682         ccol = ctxt->input->col;
4683         while (((*in > '-') && (*in <= 0x7F)) ||
4684                ((*in >= 0x20) && (*in < '-')) ||
4685                (*in == 0x09)) {
4686                     in++;
4687                     ccol++;
4688         }
4689         ctxt->input->col = ccol;
4690         if (*in == 0xA) {
4691             do {
4692                 ctxt->input->line++; ctxt->input->col = 1;
4693                 in++;
4694             } while (*in == 0xA);
4695             goto get_more;
4696         }
4697         nbchar = in - ctxt->input->cur;
4698         /*
4699          * save current set of data
4700          */
4701         if (nbchar > 0) {
4702             if ((ctxt->sax != NULL) &&
4703                 (ctxt->sax->comment != NULL)) {
4704                 if (buf == NULL) {
4705                     if ((*in == '-') && (in[1] == '-'))
4706                         size = nbchar + 1;
4707                     else
4708                         size = XML_PARSER_BUFFER_SIZE + nbchar;
4709                     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4710                     if (buf == NULL) {
4711                         xmlErrMemory(ctxt, NULL);
4712                         ctxt->instate = state;
4713                         return;
4714                     }
4715                     len = 0;
4716                 } else if (len + nbchar + 1 >= size) {
4717                     xmlChar *new_buf;
4718                     size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4719                     new_buf = (xmlChar *) xmlRealloc(buf,
4720                                                      size * sizeof(xmlChar));
4721                     if (new_buf == NULL) {
4722                         xmlFree (buf);
4723                         xmlErrMemory(ctxt, NULL);
4724                         ctxt->instate = state;
4725                         return;
4726                     }
4727                     buf = new_buf;
4728                 }
4729                 memcpy(&buf[len], ctxt->input->cur, nbchar);
4730                 len += nbchar;
4731                 buf[len] = 0;
4732             }
4733         }
4734         ctxt->input->cur = in;
4735         if (*in == 0xA) {
4736             in++;
4737             ctxt->input->line++; ctxt->input->col = 1;
4738         }
4739         if (*in == 0xD) {
4740             in++;
4741             if (*in == 0xA) {
4742                 ctxt->input->cur = in;
4743                 in++;
4744                 ctxt->input->line++; ctxt->input->col = 1;
4745                 continue; /* while */
4746             }
4747             in--;
4748         }
4749         SHRINK;
4750         GROW;
4751         in = ctxt->input->cur;
4752         if (*in == '-') {
4753             if (in[1] == '-') {
4754                 if (in[2] == '>') {
4755                     if (ctxt->input->id != inputid) {
4756                         xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4757                         "comment doesn't start and stop in the same entity\n");
4758                     }
4759                     SKIP(3);
4760                     if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4761                         (!ctxt->disableSAX)) {
4762                         if (buf != NULL)
4763                             ctxt->sax->comment(ctxt->userData, buf);
4764                         else
4765                             ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4766                     }
4767                     if (buf != NULL)
4768                         xmlFree(buf);
4769                     ctxt->instate = state;
4770                     return;
4771                 }
4772                 if (buf != NULL)
4773                     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4774                                       "Comment not terminated \n<!--%.50s\n",
4775                                       buf);
4776                 else
4777                     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4778                                       "Comment not terminated \n", NULL);
4779                 in++;
4780                 ctxt->input->col++;
4781             }
4782             in++;
4783             ctxt->input->col++;
4784             goto get_more;
4785         }
4786     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4787     xmlParseCommentComplex(ctxt, buf, len, size);
4788     ctxt->instate = state;
4789     return;
4790 }
4791
4792
4793 /**
4794  * xmlParsePITarget:
4795  * @ctxt:  an XML parser context
4796  *
4797  * parse the name of a PI
4798  *
4799  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4800  *
4801  * Returns the PITarget name or NULL
4802  */
4803
4804 const xmlChar *
4805 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4806     const xmlChar *name;
4807
4808     name = xmlParseName(ctxt);
4809     if ((name != NULL) &&
4810         ((name[0] == 'x') || (name[0] == 'X')) &&
4811         ((name[1] == 'm') || (name[1] == 'M')) &&
4812         ((name[2] == 'l') || (name[2] == 'L'))) {
4813         int i;
4814         if ((name[0] == 'x') && (name[1] == 'm') &&
4815             (name[2] == 'l') && (name[3] == 0)) {
4816             xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4817                  "XML declaration allowed only at the start of the document\n");
4818             return(name);
4819         } else if (name[3] == 0) {
4820             xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4821             return(name);
4822         }
4823         for (i = 0;;i++) {
4824             if (xmlW3CPIs[i] == NULL) break;
4825             if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4826                 return(name);
4827         }
4828         xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4829                       "xmlParsePITarget: invalid name prefix 'xml'\n",
4830                       NULL, NULL);
4831     }
4832     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4833         xmlNsErr(ctxt, XML_NS_ERR_COLON,
4834                  "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4835     }
4836     return(name);
4837 }
4838
4839 #ifdef LIBXML_CATALOG_ENABLED
4840 /**
4841  * xmlParseCatalogPI:
4842  * @ctxt:  an XML parser context
4843  * @catalog:  the PI value string
4844  *
4845  * parse an XML Catalog Processing Instruction.
4846  *
4847  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4848  *
4849  * Occurs only if allowed by the user and if happening in the Misc
4850  * part of the document before any doctype informations
4851  * This will add the given catalog to the parsing context in order
4852  * to be used if there is a resolution need further down in the document
4853  */
4854
4855 static void
4856 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4857     xmlChar *URL = NULL;
4858     const xmlChar *tmp, *base;
4859     xmlChar marker;
4860
4861     tmp = catalog;
4862     while (IS_BLANK_CH(*tmp)) tmp++;
4863     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4864         goto error;
4865     tmp += 7;
4866     while (IS_BLANK_CH(*tmp)) tmp++;
4867     if (*tmp != '=') {
4868         return;
4869     }
4870     tmp++;
4871     while (IS_BLANK_CH(*tmp)) tmp++;
4872     marker = *tmp;
4873     if ((marker != '\'') && (marker != '"'))
4874         goto error;
4875     tmp++;
4876     base = tmp;
4877     while ((*tmp != 0) && (*tmp != marker)) tmp++;
4878     if (*tmp == 0)
4879         goto error;
4880     URL = xmlStrndup(base, tmp - base);
4881     tmp++;
4882     while (IS_BLANK_CH(*tmp)) tmp++;
4883     if (*tmp != 0)
4884         goto error;
4885
4886     if (URL != NULL) {
4887         ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4888         xmlFree(URL);
4889     }
4890     return;
4891
4892 error:
4893     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4894                   "Catalog PI syntax error: %s\n",
4895                   catalog, NULL);
4896     if (URL != NULL)
4897         xmlFree(URL);
4898 }
4899 #endif
4900
4901 /**
4902  * xmlParsePI:
4903  * @ctxt:  an XML parser context
4904  *
4905  * parse an XML Processing Instruction.
4906  *
4907  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4908  *
4909  * The processing is transfered to SAX once parsed.
4910  */
4911
4912 void
4913 xmlParsePI(xmlParserCtxtPtr ctxt) {
4914     xmlChar *buf = NULL;
4915     int len = 0;
4916     int size = XML_PARSER_BUFFER_SIZE;
4917     int cur, l;
4918     const xmlChar *target;
4919     xmlParserInputState state;
4920     int count = 0;
4921
4922     if ((RAW == '<') && (NXT(1) == '?')) {
4923         xmlParserInputPtr input = ctxt->input;
4924         state = ctxt->instate;
4925         ctxt->instate = XML_PARSER_PI;
4926         /*
4927          * this is a Processing Instruction.
4928          */
4929         SKIP(2);
4930         SHRINK;
4931
4932         /*
4933          * Parse the target name and check for special support like
4934          * namespace.
4935          */
4936         target = xmlParsePITarget(ctxt);
4937         if (target != NULL) {
4938             if ((RAW == '?') && (NXT(1) == '>')) {
4939                 if (input != ctxt->input) {
4940                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4941             "PI declaration doesn't start and stop in the same entity\n");
4942                 }
4943                 SKIP(2);
4944
4945                 /*
4946                  * SAX: PI detected.
4947                  */
4948                 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4949                     (ctxt->sax->processingInstruction != NULL))
4950                     ctxt->sax->processingInstruction(ctxt->userData,
4951                                                      target, NULL);
4952                 ctxt->instate = state;
4953                 return;
4954             }
4955             buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4956             if (buf == NULL) {
4957                 xmlErrMemory(ctxt, NULL);
4958                 ctxt->instate = state;
4959                 return;
4960             }
4961             cur = CUR;
4962             if (!IS_BLANK(cur)) {
4963                 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4964                           "ParsePI: PI %s space expected\n", target);
4965             }
4966             SKIP_BLANKS;
4967             cur = CUR_CHAR(l);
4968             while (IS_CHAR(cur) && /* checked */
4969                    ((cur != '?') || (NXT(1) != '>'))) {
4970                 if (len + 5 >= size) {
4971                     xmlChar *tmp;
4972
4973                     size *= 2;
4974                     tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4975                     if (tmp == NULL) {
4976                         xmlErrMemory(ctxt, NULL);
4977                         xmlFree(buf);
4978                         ctxt->instate = state;
4979                         return;
4980                     }
4981                     buf = tmp;
4982                 }
4983                 count++;
4984                 if (count > 50) {
4985                     GROW;
4986                     count = 0;
4987                 }
4988                 COPY_BUF(l,buf,len,cur);
4989                 NEXTL(l);
4990                 cur = CUR_CHAR(l);
4991                 if (cur == 0) {
4992                     SHRINK;
4993                     GROW;
4994                     cur = CUR_CHAR(l);
4995                 }
4996             }
4997             buf[len] = 0;
4998             if (cur != '?') {
4999                 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5000                       "ParsePI: PI %s never end ...\n", target);
5001             } else {
5002                 if (input != ctxt->input) {
5003                     xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5004             "PI declaration doesn't start and stop in the same entity\n");
5005                 }
5006                 SKIP(2);
5007
5008 #ifdef LIBXML_CATALOG_ENABLED
5009                 if (((state == XML_PARSER_MISC) ||
5010                      (state == XML_PARSER_START)) &&
5011                     (xmlStrEqual(target, XML_CATALOG_PI))) {
5012                     xmlCatalogAllow allow = xmlCatalogGetDefaults();
5013                     if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5014                         (allow == XML_CATA_ALLOW_ALL))
5015                         xmlParseCatalogPI(ctxt, buf);
5016                 }
5017 #endif
5018
5019
5020                 /*
5021                  * SAX: PI detected.
5022                  */
5023                 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5024                     (ctxt->sax->processingInstruction != NULL))
5025                     ctxt->sax->processingInstruction(ctxt->userData,
5026                                                      target, buf);
5027             }
5028             xmlFree(buf);
5029         } else {
5030             xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5031         }
5032         ctxt->instate = state;
5033     }
5034 }
5035
5036 /**
5037  * xmlParseNotationDecl:
5038  * @ctxt:  an XML parser context
5039  *
5040  * parse a notation declaration
5041  *
5042  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5043  *
5044  * Hence there is actually 3 choices:
5045  *     'PUBLIC' S PubidLiteral
5046  *     'PUBLIC' S PubidLiteral S SystemLiteral
5047  * and 'SYSTEM' S SystemLiteral
5048  *
5049  * See the NOTE on xmlParseExternalID().
5050  */
5051
5052 void
5053 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5054     const xmlChar *name;
5055     xmlChar *Pubid;
5056     xmlChar *Systemid;
5057
5058     if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5059         xmlParserInputPtr input = ctxt->input;
5060         SHRINK;
5061         SKIP(10);
5062         if (!IS_BLANK_CH(CUR)) {
5063             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5064                            "Space required after '<!NOTATION'\n");
5065             return;
5066         }
5067         SKIP_BLANKS;
5068
5069         name = xmlParseName(ctxt);
5070         if (name == NULL) {
5071             xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5072             return;
5073         }
5074         if (!IS_BLANK_CH(CUR)) {
5075             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5076                      "Space required after the NOTATION name'\n");
5077             return;
5078         }
5079         if (xmlStrchr(name, ':') != NULL) {
5080             xmlNsErr(ctxt, XML_NS_ERR_COLON,
5081                      "colon are forbidden from notation names '%s'\n",
5082                      name, NULL, NULL);
5083         }
5084         SKIP_BLANKS;
5085
5086         /*
5087          * Parse the IDs.
5088          */
5089         Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5090         SKIP_BLANKS;
5091
5092         if (RAW == '>') {
5093             if (input != ctxt->input) {
5094                 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5095         "Notation declaration doesn't start and stop in the same entity\n");
5096             }
5097             NEXT;
5098             if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5099                 (ctxt->sax->notationDecl != NULL))
5100                 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5101         } else {
5102             xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5103         }
5104         if (Systemid != NULL) xmlFree(Systemid);
5105         if (Pubid != NULL) xmlFree(Pubid);
5106     }
5107 }
5108
5109 /**
5110  * xmlParseEntityDecl:
5111  * @ctxt:  an XML parser context
5112  *
5113  * parse <!ENTITY declarations
5114  *
5115  * [70] EntityDecl ::= GEDecl | PEDecl
5116  *
5117  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5118  *
5119  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5120  *
5121  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5122  *
5123  * [74] PEDef ::= EntityValue | ExternalID
5124  *
5125  * [76] NDataDecl ::= S 'NDATA' S Name
5126  *
5127  * [ VC: Notation Declared ]
5128  * The Name must match the declared name of a notation.
5129  */
5130
5131 void
5132 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5133     const xmlChar *name = NULL;
5134     xmlChar *value = NULL;
5135     xmlChar *URI = NULL, *literal = NULL;
5136     const xmlChar *ndata = NULL;
5137     int isParameter = 0;
5138     xmlChar *orig = NULL;
5139     int skipped;
5140
5141     /* GROW; done in the caller */
5142     if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5143         xmlParserInputPtr input = ctxt->input;
5144         SHRINK;
5145         SKIP(8);
5146         skipped = SKIP_BLANKS;
5147         if (skipped == 0) {
5148             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5149                            "Space required after '<!ENTITY'\n");
5150         }
5151
5152         if (RAW == '%') {
5153             NEXT;
5154             skipped = SKIP_BLANKS;
5155             if (skipped == 0) {
5156                 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5157                                "Space required after '%'\n");
5158             }
5159             isParameter = 1;
5160         }
5161
5162         name = xmlParseName(ctxt);
5163         if (name == NULL) {
5164             xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5165                            "xmlParseEntityDecl: no name\n");
5166             return;
5167         }
5168         if (xmlStrchr(name, ':') != NULL) {
5169             xmlNsErr(ctxt, XML_NS_ERR_COLON,
5170                      "colon are forbidden from entities names '%s'\n",
5171                      name, NULL, NULL);
5172         }
5173         skipped = SKIP_BLANKS;
5174         if (skipped == 0) {
5175             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5176                            "Space required after the entity name\n");
5177         }
5178
5179         ctxt->instate = XML_PARSER_ENTITY_DECL;
5180         /*
5181          * handle the various case of definitions...
5182          */
5183         if (isParameter) {
5184             if ((RAW == '"') || (RAW == '\'')) {
5185                 value = xmlParseEntityValue(ctxt, &orig);
5186                 if (value) {
5187                     if ((ctxt->sax != NULL) &&
5188                         (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5189                         ctxt->sax->entityDecl(ctxt->userData, name,
5190                                     XML_INTERNAL_PARAMETER_ENTITY,
5191                                     NULL, NULL, value);
5192                 }
5193             } else {
5194                 URI = xmlParseExternalID(ctxt, &literal, 1);
5195                 if ((URI == NULL) && (literal == NULL)) {
5196                     xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5197                 }
5198                 if (URI) {
5199                     xmlURIPtr uri;
5200
5201                     uri = xmlParseURI((const char *) URI);
5202                     if (uri == NULL) {
5203                         xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5204                                      "Invalid URI: %s\n", URI);
5205                         /*
5206                          * This really ought to be a well formedness error
5207                          * but the XML Core WG decided otherwise c.f. issue
5208                          * E26 of the XML erratas.
5209                          */
5210                     } else {
5211                         if (uri->fragment != NULL) {
5212                             /*
5213                              * Okay this is foolish to block those but not
5214                              * invalid URIs.
5215                              */
5216                             xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5217                         } else {
5218                             if ((ctxt->sax != NULL) &&
5219                                 (!ctxt->disableSAX) &&
5220                                 (ctxt->sax->entityDecl != NULL))
5221                                 ctxt->sax->entityDecl(ctxt->userData, name,
5222                                             XML_EXTERNAL_PARAMETER_ENTITY,
5223                                             literal, URI, NULL);
5224                         }
5225                         xmlFreeURI(uri);
5226                     }
5227                 }
5228             }
5229         } else {
5230             if ((RAW == '"') || (RAW == '\'')) {
5231                 value = xmlParseEntityValue(ctxt, &orig);
5232                 if ((ctxt->sax != NULL) &&
5233                     (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5234                     ctxt->sax->entityDecl(ctxt->userData, name,
5235                                 XML_INTERNAL_GENERAL_ENTITY,
5236                                 NULL, NULL, value);
5237                 /*
5238                  * For expat compatibility in SAX mode.
5239                  */
5240                 if ((ctxt->myDoc == NULL) ||
5241                     (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5242                     if (ctxt->myDoc == NULL) {
5243                         ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5244                         if (ctxt->myDoc == NULL) {
5245                             xmlErrMemory(ctxt, "New Doc failed");
5246                             return;
5247                         }
5248                         ctxt->myDoc->properties = XML_DOC_INTERNAL;
5249                     }
5250                     if (ctxt->myDoc->intSubset == NULL)
5251                         ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5252                                             BAD_CAST "fake", NULL, NULL);
5253
5254                     xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5255                                       NULL, NULL, value);
5256                 }
5257             } else {
5258                 URI = xmlParseExternalID(ctxt, &literal, 1);
5259                 if ((URI == NULL) && (literal == NULL)) {
5260                     xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5261                 }
5262                 if (URI) {
5263                     xmlURIPtr uri;
5264
5265                     uri = xmlParseURI((const char *)URI);
5266                     if (uri == NULL) {
5267                         xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5268                                      "Invalid URI: %s\n", URI);
5269                         /*
5270                          * This really ought to be a well formedness error
5271                          * but the XML Core WG decided otherwise c.f. issue
5272                          * E26 of the XML erratas.
5273                          */
5274                     } else {
5275                         if (uri->fragment != NULL) {
5276                             /*
5277                              * Okay this is foolish to block those but not
5278                              * invalid URIs.
5279                              */
5280                             xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5281                         }
5282                         xmlFreeURI(uri);
5283                     }
5284                 }
5285                 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5286                     xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5287                                    "Space required before 'NDATA'\n");
5288                 }
5289                 SKIP_BLANKS;
5290                 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5291                     SKIP(5);
5292                     if (!IS_BLANK_CH(CUR)) {
5293                         xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5294                                        "Space required after 'NDATA'\n");
5295                     }
5296                     SKIP_BLANKS;
5297                     ndata = xmlParseName(ctxt);
5298                     if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5299                         (ctxt->sax->unparsedEntityDecl != NULL))
5300                         ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5301                                     literal, URI, ndata);
5302                 } else {
5303                     if ((ctxt->sax != NULL) &&
5304                         (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5305                         ctxt->sax->entityDecl(ctxt->userData, name,
5306                                     XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5307                                     literal, URI, NULL);
5308                     /*
5309                      * For expat compatibility in SAX mode.
5310                      * assuming the entity repalcement was asked for
5311                      */
5312                     if ((ctxt->replaceEntities != 0) &&
5313                         ((ctxt->myDoc == NULL) ||
5314                         (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5315                         if (ctxt->myDoc == NULL) {
5316                             ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5317                             if (ctxt->myDoc == NULL) {
5318                                 xmlErrMemory(ctxt, "New Doc failed");
5319                                 return;
5320                             }
5321                             ctxt->myDoc->properties = XML_DOC_INTERNAL;
5322                         }
5323
5324                         if (ctxt->myDoc->intSubset == NULL)
5325                             ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5326                                                 BAD_CAST "fake", NULL, NULL);
5327                         xmlSAX2EntityDecl(ctxt, name,
5328                                           XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5329                                           literal, URI, NULL);
5330                     }
5331                 }
5332             }
5333         }
5334         SKIP_BLANKS;
5335         if (RAW != '>') {
5336             xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5337                     "xmlParseEntityDecl: entity %s not terminated\n", name);
5338         } else {
5339             if (input != ctxt->input) {
5340                 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5341         "Entity declaration doesn't start and stop in the same entity\n");
5342             }
5343             NEXT;
5344         }
5345         if (orig != NULL) {
5346             /*
5347              * Ugly mechanism to save the raw entity value.
5348              */
5349             xmlEntityPtr cur = NULL;
5350
5351             if (isParameter) {
5352                 if ((ctxt->sax != NULL) &&
5353                     (ctxt->sax->getParameterEntity != NULL))
5354                     cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5355             } else {
5356                 if ((ctxt->sax != NULL) &&
5357                     (ctxt->sax->getEntity != NULL))
5358                     cur = ctxt->sax->getEntity(ctxt->userData, name);
5359                 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5360                     cur = xmlSAX2GetEntity(ctxt, name);
5361                 }
5362             }
5363             if (cur != NULL) {
5364                 if (cur->orig != NULL)
5365                     xmlFree(orig);
5366                 else
5367                     cur->orig = orig;
5368             } else
5369                 xmlFree(orig);
5370         }
5371         if (value != NULL) xmlFree(value);
5372         if (URI != NULL) xmlFree(URI);
5373         if (literal != NULL) xmlFree(literal);
5374     }
5375 }
5376
5377 /**
5378  * xmlParseDefaultDecl:
5379  * @ctxt:  an XML parser context
5380  * @value:  Receive a possible fixed default value for the attribute
5381  *
5382  * Parse an attribute default declaration
5383  *
5384  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5385  *
5386  * [ VC: Required Attribute ]
5387  * if the default declaration is the keyword #REQUIRED, then the
5388  * attribute must be specified for all elements of the type in the
5389  * attribute-list declaration.
5390  *
5391  * [ VC: Attribute Default Legal ]
5392  * The declared default value must meet the lexical constraints of
5393  * the declared attribute type c.f. xmlValidateAttributeDecl()
5394  *
5395  * [ VC: Fixed Attribute Default ]
5396  * if an attribute has a default value declared with the #FIXED
5397  * keyword, instances of that attribute must match the default value.
5398  *
5399  * [ WFC: No < in Attribute Values ]
5400  * handled in xmlParseAttValue()
5401  *
5402  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5403  *          or XML_ATTRIBUTE_FIXED.
5404  */
5405
5406 int
5407 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5408     int val;
5409     xmlChar *ret;
5410
5411     *value = NULL;
5412     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5413         SKIP(9);
5414         return(XML_ATTRIBUTE_REQUIRED);
5415     }
5416     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5417         SKIP(8);
5418         return(XML_ATTRIBUTE_IMPLIED);
5419     }
5420     val = XML_ATTRIBUTE_NONE;
5421     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5422         SKIP(6);
5423         val = XML_ATTRIBUTE_FIXED;
5424         if (!IS_BLANK_CH(CUR)) {
5425             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5426                            "Space required after '#FIXED'\n");
5427         }
5428         SKIP_BLANKS;
5429     }
5430     ret = xmlParseAttValue(ctxt);
5431     ctxt->instate = XML_PARSER_DTD;
5432     if (ret == NULL) {
5433         xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5434                        "Attribute default value declaration error\n");
5435     } else
5436         *value = ret;
5437     return(val);
5438 }
5439
5440 /**
5441  * xmlParseNotationType:
5442  * @ctxt:  an XML parser context
5443  *
5444  * parse an Notation attribute type.
5445  *
5446  * Note: the leading 'NOTATION' S part has already being parsed...
5447  *
5448  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5449  *
5450  * [ VC: Notation Attributes ]
5451  * Values of this type must match one of the notation names included
5452  * in the declaration; all notation names in the declaration must be declared.
5453  *
5454  * Returns: the notation attribute tree built while parsing
5455  */
5456
5457 xmlEnumerationPtr
5458 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5459     const xmlChar *name;
5460     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5461
5462     if (RAW != '(') {
5463         xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5464         return(NULL);
5465     }
5466     SHRINK;
5467     do {
5468         NEXT;
5469         SKIP_BLANKS;
5470         name = xmlParseName(ctxt);
5471         if (name == NULL) {
5472             xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5473                            "Name expected in NOTATION declaration\n");
5474             xmlFreeEnumeration(ret);
5475             return(NULL);
5476         }
5477         tmp = ret;
5478         while (tmp != NULL) {
5479             if (xmlStrEqual(name, tmp->name)) {
5480                 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5481           "standalone: attribute notation value token %s duplicated\n",
5482                                  name, NULL);
5483                 if (!xmlDictOwns(ctxt->dict, name))
5484                     xmlFree((xmlChar *) name);
5485                 break;
5486             }
5487             tmp = tmp->next;
5488         }
5489         if (tmp == NULL) {
5490             cur = xmlCreateEnumeration(name);
5491             if (cur == NULL) {
5492                 xmlFreeEnumeration(ret);
5493                 return(NULL);
5494             }
5495             if (last == NULL) ret = last = cur;
5496             else {
5497                 last->next = cur;
5498                 last = cur;
5499             }
5500         }
5501         SKIP_BLANKS;
5502     } while (RAW == '|');
5503     if (RAW != ')') {
5504         xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5505         xmlFreeEnumeration(ret);
5506         return(NULL);
5507     }
5508     NEXT;
5509     return(ret);
5510 }
5511
5512 /**
5513  * xmlParseEnumerationType:
5514  * @ctxt:  an XML parser context
5515  *
5516  * parse an Enumeration attribute type.
5517  *
5518  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5519  *
5520  * [ VC: Enumeration ]
5521  * Values of this type must match one of the Nmtoken tokens in
5522  * the declaration
5523  *
5524  * Returns: the enumeration attribute tree built while parsing
5525  */
5526
5527 xmlEnumerationPtr
5528 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5529     xmlChar *name;
5530     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5531
5532     if (RAW != '(') {
5533         xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5534         return(NULL);
5535     }
5536     SHRINK;
5537     do {
5538         NEXT;
5539         SKIP_BLANKS;
5540         name = xmlParseNmtoken(ctxt);
5541         if (name == NULL) {
5542             xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5543             return(ret);
5544         }
5545         tmp = ret;
5546         while (tmp != NULL) {
5547             if (xmlStrEqual(name, tmp->name)) {
5548                 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5549           "standalone: attribute enumeration value token %s duplicated\n",
5550                                  name, NULL);
5551                 if (!xmlDictOwns(ctxt->dict, name))
5552                     xmlFree(name);
5553                 break;
5554             }
5555             tmp = tmp->next;
5556         }
5557         if (tmp == NULL) {
5558             cur = xmlCreateEnumeration(name);
5559             if (!xmlDictOwns(ctxt->dict, name))
5560                 xmlFree(name);
5561             if (cur == NULL) {
5562                 xmlFreeEnumeration(ret);
5563                 return(NULL);
5564             }
5565             if (last == NULL) ret = last = cur;
5566             else {
5567                 last->next = cur;
5568                 last = cur;
5569             }
5570         }
5571         SKIP_BLANKS;
5572     } while (RAW == '|');
5573     if (RAW != ')') {
5574         xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5575         return(ret);
5576     }
5577     NEXT;
5578     return(ret);
5579 }
5580
5581 /**
5582  * xmlParseEnumeratedType:
5583  * @ctxt:  an XML parser context
5584  * @tree:  the enumeration tree built while parsing
5585  *
5586  * parse an Enumerated attribute type.
5587  *
5588  * [57] EnumeratedType ::= NotationType | Enumeration
5589  *
5590  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5591  *
5592  *
5593  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5594  */
5595
5596 int
5597 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5598     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5599         SKIP(8);
5600         if (!IS_BLANK_CH(CUR)) {
5601             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5602                            "Space required after 'NOTATION'\n");
5603             return(0);
5604         }
5605         SKIP_BLANKS;
5606         *tree = xmlParseNotationType(ctxt);
5607         if (*tree == NULL) return(0);
5608         return(XML_ATTRIBUTE_NOTATION);
5609     }
5610     *tree = xmlParseEnumerationType(ctxt);
5611     if (*tree == NULL) return(0);
5612     return(XML_ATTRIBUTE_ENUMERATION);
5613 }
5614
5615 /**
5616  * xmlParseAttributeType:
5617  * @ctxt:  an XML parser context
5618  * @tree:  the enumeration tree built while parsing
5619  *
5620  * parse the Attribute list def for an element
5621  *
5622  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5623  *
5624  * [55] StringType ::= 'CDATA'
5625  *
5626  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5627  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5628  *
5629  * Validity constraints for attribute values syntax are checked in
5630  * xmlValidateAttributeValue()
5631  *
5632  * [ VC: ID ]
5633  * Values of type ID must match the Name production. A name must not
5634  * appear more than once in an XML document as a value of this type;
5635  * i.e., ID values must uniquely identify the elements which bear them.
5636  *
5637  * [ VC: One ID per Element Type ]
5638  * No element type may have more than one ID attribute specified.
5639  *
5640  * [ VC: ID Attribute Default ]
5641  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5642  *
5643  * [ VC: IDREF ]
5644  * Values of type IDREF must match the Name production, and values
5645  * of type IDREFS must match Names; each IDREF Name must match the value
5646  * of an ID attribute on some element in the XML document; i.e. IDREF
5647  * values must match the value of some ID attribute.
5648  *
5649  * [ VC: Entity Name ]
5650  * Values of type ENTITY must match the Name production, values
5651  * of type ENTITIES must match Names; each Entity Name must match the
5652  * name of an unparsed entity declared in the DTD.
5653  *
5654  * [ VC: Name Token ]
5655  * Values of type NMTOKEN must match the Nmtoken production; values
5656  * of type NMTOKENS must match Nmtokens.
5657  *
5658  * Returns the attribute type
5659  */
5660 int
5661 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5662     SHRINK;
5663     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5664         SKIP(5);
5665         return(XML_ATTRIBUTE_CDATA);
5666      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5667         SKIP(6);
5668         return(XML_ATTRIBUTE_IDREFS);
5669      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5670         SKIP(5);
5671         return(XML_ATTRIBUTE_IDREF);
5672      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5673         SKIP(2);
5674         return(XML_ATTRIBUTE_ID);
5675      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5676         SKIP(6);
5677         return(XML_ATTRIBUTE_ENTITY);
5678      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5679         SKIP(8);
5680         return(XML_ATTRIBUTE_ENTITIES);
5681      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5682         SKIP(8);
5683         return(XML_ATTRIBUTE_NMTOKENS);
5684      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5685         SKIP(7);
5686         return(XML_ATTRIBUTE_NMTOKEN);
5687      }
5688      return(xmlParseEnumeratedType(ctxt, tree));
5689 }
5690
5691 /**
5692  * xmlParseAttributeListDecl:
5693  * @ctxt:  an XML parser context
5694  *
5695  * : parse the Attribute list def for an element
5696  *
5697  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5698  *
5699  * [53] AttDef ::= S Name S AttType S DefaultDecl
5700  *
5701  */
5702 void
5703 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5704     const xmlChar *elemName;
5705     const xmlChar *attrName;
5706     xmlEnumerationPtr tree;
5707
5708     if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5709         xmlParserInputPtr input = ctxt->input;
5710
5711         SKIP(9);
5712         if (!IS_BLANK_CH(CUR)) {
5713             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5714                                  "Space required after '<!ATTLIST'\n");
5715         }
5716         SKIP_BLANKS;
5717         elemName = xmlParseName(ctxt);
5718         if (elemName == NULL) {
5719             xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5720                            "ATTLIST: no name for Element\n");
5721             return;
5722         }
5723         SKIP_BLANKS;
5724         GROW;
5725         while (RAW != '>') {
5726             const xmlChar *check = CUR_PTR;
5727             int type;
5728             int def;
5729             xmlChar *defaultValue = NULL;
5730
5731             GROW;
5732             tree = NULL;
5733             attrName = xmlParseName(ctxt);
5734             if (attrName == NULL) {
5735                 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5736                                "ATTLIST: no name for Attribute\n");
5737                 break;
5738             }
5739             GROW;
5740             if (!IS_BLANK_CH(CUR)) {
5741                 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5742                         "Space required after the attribute name\n");
5743                 break;
5744             }
5745             SKIP_BLANKS;
5746
5747             type = xmlParseAttributeType(ctxt, &tree);
5748             if (type <= 0) {
5749                 break;
5750             }
5751
5752             GROW;
5753             if (!IS_BLANK_CH(CUR)) {
5754                 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5755                                "Space required after the attribute type\n");
5756                 if (tree != NULL)
5757                     xmlFreeEnumeration(tree);
5758                 break;
5759             }
5760             SKIP_BLANKS;
5761
5762             def = xmlParseDefaultDecl(ctxt, &defaultValue);
5763             if (def <= 0) {
5764                 if (defaultValue != NULL)
5765                     xmlFree(defaultValue);
5766                 if (tree != NULL)
5767                     xmlFreeEnumeration(tree);
5768                 break;
5769             }
5770             if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5771                 xmlAttrNormalizeSpace(defaultValue, defaultValue);
5772
5773             GROW;
5774             if (RAW != '>') {
5775                 if (!IS_BLANK_CH(CUR)) {
5776                     xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5777                         "Space required after the attribute default value\n");
5778                     if (defaultValue != NULL)
5779                         xmlFree(defaultValue);
5780                     if (tree != NULL)
5781                         xmlFreeEnumeration(tree);
5782                     break;
5783                 }
5784                 SKIP_BLANKS;
5785             }
5786             if (check == CUR_PTR) {
5787                 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5788                             "in xmlParseAttributeListDecl\n");
5789                 if (defaultValue != NULL)
5790                     xmlFree(defaultValue);
5791                 if (tree != NULL)
5792                     xmlFreeEnumeration(tree);
5793                 break;
5794             }
5795             if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5796                 (ctxt->sax->attributeDecl != NULL))
5797                 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5798                                 type, def, defaultValue, tree);
5799             else if (tree != NULL)
5800                 xmlFreeEnumeration(tree);
5801
5802             if ((ctxt->sax2) && (defaultValue != NULL) &&
5803                 (def != XML_ATTRIBUTE_IMPLIED) &&
5804                 (def != XML_ATTRIBUTE_REQUIRED)) {
5805                 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5806             }
5807             if (ctxt->sax2) {
5808                 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5809             }
5810             if (defaultValue != NULL)
5811                 xmlFree(defaultValue);
5812             GROW;
5813         }
5814         if (RAW == '>') {
5815             if (input != ctxt->input) {
5816                 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5817     "Attribute list declaration doesn't start and stop in the same entity\n",
5818                                  NULL, NULL);
5819             }
5820             NEXT;
5821         }
5822     }
5823 }
5824
5825 /**
5826  * xmlParseElementMixedContentDecl:
5827  * @ctxt:  an XML parser context
5828  * @inputchk:  the input used for the current entity, needed for boundary checks
5829  *
5830  * parse the declaration for a Mixed Element content
5831  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5832  *
5833  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5834  *                '(' S? '#PCDATA' S? ')'
5835  *
5836  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5837  *
5838  * [ VC: No Duplicate Types ]
5839  * The same name must not appear more than once in a single
5840  * mixed-content declaration.
5841  *
5842  * returns: the list of the xmlElementContentPtr describing the element choices
5843  */
5844 xmlElementContentPtr
5845 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5846     xmlElementContentPtr ret = NULL, cur = NULL, n;
5847     const xmlChar *elem = NULL;
5848
5849     GROW;
5850     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5851         SKIP(7);
5852         SKIP_BLANKS;
5853         SHRINK;
5854         if (RAW == ')') {
5855             if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5856                 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5857 "Element content declaration doesn't start and stop in the same entity\n",
5858                                  NULL, NULL);
5859             }
5860             NEXT;
5861             ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5862             if (ret == NULL)
5863                 return(NULL);
5864             if (RAW == '*') {
5865                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5866                 NEXT;
5867             }
5868             return(ret);
5869         }
5870         if ((RAW == '(') || (RAW == '|')) {
5871             ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5872             if (ret == NULL) return(NULL);
5873         }
5874         while (RAW == '|') {
5875             NEXT;
5876             if (elem == NULL) {
5877                 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5878                 if (ret == NULL) return(NULL);
5879                 ret->c1 = cur;
5880                 if (cur != NULL)
5881                     cur->parent = ret;
5882                 cur = ret;
5883             } else {
5884                 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5885                 if (n == NULL) return(NULL);
5886                 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5887                 if (n->c1 != NULL)
5888                     n->c1->parent = n;
5889                 cur->c2 = n;
5890                 if (n != NULL)
5891                     n->parent = cur;
5892                 cur = n;
5893             }
5894             SKIP_BLANKS;
5895             elem = xmlParseName(ctxt);
5896             if (elem == NULL) {
5897                 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5898                         "xmlParseElementMixedContentDecl : Name expected\n");
5899                 xmlFreeDocElementContent(ctxt->myDoc, cur);
5900                 return(NULL);
5901             }
5902             SKIP_BLANKS;
5903             GROW;
5904         }
5905         if ((RAW == ')') && (NXT(1) == '*')) {
5906             if (elem != NULL) {
5907                 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5908                                                XML_ELEMENT_CONTENT_ELEMENT);
5909                 if (cur->c2 != NULL)
5910                     cur->c2->parent = cur;
5911             }
5912             if (ret != NULL)
5913                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5914             if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5915                 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5916 "Element content declaration doesn't start and stop in the same entity\n",
5917                                  NULL, NULL);
5918             }
5919             SKIP(2);
5920         } else {
5921             xmlFreeDocElementContent(ctxt->myDoc, ret);
5922             xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5923             return(NULL);
5924         }
5925
5926     } else {
5927         xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5928     }
5929     return(ret);
5930 }
5931
5932 /**
5933  * xmlParseElementChildrenContentDeclPriv:
5934  * @ctxt:  an XML parser context
5935  * @inputchk:  the input used for the current entity, needed for boundary checks
5936  * @depth: the level of recursion
5937  *
5938  * parse the declaration for a Mixed Element content
5939  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5940  *
5941  *
5942  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5943  *
5944  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5945  *
5946  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5947  *
5948  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5949  *
5950  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5951  * TODO Parameter-entity replacement text must be properly nested
5952  *      with parenthesized groups. That is to say, if either of the
5953  *      opening or closing parentheses in a choice, seq, or Mixed
5954  *      construct is contained in the replacement text for a parameter
5955  *      entity, both must be contained in the same replacement text. For
5956  *      interoperability, if a parameter-entity reference appears in a
5957  *      choice, seq, or Mixed construct, its replacement text should not
5958  *      be empty, and neither the first nor last non-blank character of
5959  *      the replacement text should be a connector (| or ,).
5960  *
5961  * Returns the tree of xmlElementContentPtr describing the element
5962  *          hierarchy.
5963  */
5964 static xmlElementContentPtr
5965 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5966                                        int depth) {
5967     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5968     const xmlChar *elem;
5969     xmlChar type = 0;
5970
5971     if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5972         (depth >  2048)) {
5973         xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5974 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5975                           depth);
5976         return(NULL);
5977     }
5978     SKIP_BLANKS;
5979     GROW;
5980     if (RAW == '(') {
5981         int inputid = ctxt->input->id;
5982
5983         /* Recurse on first child */
5984         NEXT;
5985         SKIP_BLANKS;
5986         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5987                                                            depth + 1);
5988         SKIP_BLANKS;
5989         GROW;
5990     } else {
5991         elem = xmlParseName(ctxt);
5992         if (elem == NULL) {
5993             xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5994             return(NULL);
5995         }
5996         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5997         if (cur == NULL) {
5998             xmlErrMemory(ctxt, NULL);
5999             return(NULL);
6000         }
6001         GROW;
6002         if (RAW == '?') {
6003             cur->ocur = XML_ELEMENT_CONTENT_OPT;
6004             NEXT;
6005         } else if (RAW == '*') {
6006             cur->ocur = XML_ELEMENT_CONTENT_MULT;
6007             NEXT;
6008         } else if (RAW == '+') {
6009             cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6010             NEXT;
6011         } else {
6012             cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6013         }
6014         GROW;
6015     }
6016     SKIP_BLANKS;
6017     SHRINK;
6018     while (RAW != ')') {
6019         /*
6020          * Each loop we parse one separator and one element.
6021          */
6022         if (RAW == ',') {
6023             if (type == 0) type = CUR;
6024
6025             /*
6026              * Detect "Name | Name , Name" error
6027              */
6028             else if (type != CUR) {
6029                 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6030                     "xmlParseElementChildrenContentDecl : '%c' expected\n",
6031                                   type);
6032                 if ((last != NULL) && (last != ret))
6033                     xmlFreeDocElementContent(ctxt->myDoc, last);
6034                 if (ret != NULL)
6035                     xmlFreeDocElementContent(ctxt->myDoc, ret);
6036                 return(NULL);
6037             }
6038             NEXT;
6039
6040             op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6041             if (op == NULL) {
6042                 if ((last != NULL) && (last != ret))
6043                     xmlFreeDocElementContent(ctxt->myDoc, last);
6044                 xmlFreeDocElementContent(ctxt->myDoc, ret);
6045                 return(NULL);
6046             }
6047             if (last == NULL) {
6048                 op->c1 = ret;
6049                 if (ret != NULL)
6050                     ret->parent = op;
6051                 ret = cur = op;
6052             } else {
6053                 cur->c2 = op;
6054                 if (op != NULL)
6055                     op->parent = cur;
6056                 op->c1 = last;
6057                 if (last != NULL)
6058                     last->parent = op;
6059                 cur =op;
6060                 last = NULL;
6061             }
6062         } else if (RAW == '|') {
6063             if (type == 0) type = CUR;
6064
6065             /*
6066              * Detect "Name , Name | Name" error
6067              */
6068             else if (type != CUR) {
6069                 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6070                     "xmlParseElementChildrenContentDecl : '%c' expected\n",
6071                                   type);
6072                 if ((last != NULL) && (last != ret))
6073                     xmlFreeDocElementContent(ctxt->myDoc, last);
6074                 if (ret != NULL)
6075                     xmlFreeDocElementContent(ctxt->myDoc, ret);
6076                 return(NULL);
6077             }
6078             NEXT;
6079
6080             op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6081             if (op == NULL) {
6082                 if ((last != NULL) && (last != ret))
6083                     xmlFreeDocElementContent(ctxt->myDoc, last);
6084                 if (ret != NULL)
6085                     xmlFreeDocElementContent(ctxt->myDoc, ret);
6086                 return(NULL);
6087             }
6088             if (last == NULL) {
6089                 op->c1 = ret;
6090                 if (ret != NULL)
6091                     ret->parent = op;
6092                 ret = cur = op;
6093             } else {
6094                 cur->c2 = op;
6095                 if (op != NULL)
6096                     op->parent = cur;
6097                 op->c1 = last;
6098                 if (last != NULL)
6099                     last->parent = op;
6100                 cur =op;
6101                 last = NULL;
6102             }
6103         } else {
6104             xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6105             if ((last != NULL) && (last != ret))
6106                 xmlFreeDocElementContent(ctxt->myDoc, last);
6107             if (ret != NULL)
6108                 xmlFreeDocElementContent(ctxt->myDoc, ret);
6109             return(NULL);
6110         }
6111         GROW;
6112         SKIP_BLANKS;
6113         GROW;
6114         if (RAW == '(') {
6115             int inputid = ctxt->input->id;
6116             /* Recurse on second child */
6117             NEXT;
6118             SKIP_BLANKS;
6119             last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6120                                                           depth + 1);
6121             SKIP_BLANKS;
6122         } else {
6123             elem = xmlParseName(ctxt);
6124             if (elem == NULL) {
6125                 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6126                 if (ret != NULL)
6127                     xmlFreeDocElementContent(ctxt->myDoc, ret);
6128                 return(NULL);
6129             }
6130             last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6131             if (last == NULL) {
6132                 if (ret != NULL)
6133                     xmlFreeDocElementContent(ctxt->myDoc, ret);
6134                 return(NULL);
6135             }
6136             if (RAW == '?') {
6137                 last->ocur = XML_ELEMENT_CONTENT_OPT;
6138                 NEXT;
6139             } else if (RAW == '*') {
6140                 last->ocur = XML_ELEMENT_CONTENT_MULT;
6141                 NEXT;
6142             } else if (RAW == '+') {
6143                 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6144                 NEXT;
6145             } else {
6146                 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6147             }
6148         }
6149         SKIP_BLANKS;
6150         GROW;
6151     }
6152     if ((cur != NULL) && (last != NULL)) {
6153         cur->c2 = last;
6154         if (last != NULL)
6155             last->parent = cur;
6156     }
6157     if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6158         xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6159 "Element content declaration doesn't start and stop in the same entity\n",
6160                          NULL, NULL);
6161     }
6162     NEXT;
6163     if (RAW == '?') {
6164         if (ret != NULL) {
6165             if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6166                 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6167                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6168             else
6169                 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6170         }
6171         NEXT;
6172     } else if (RAW == '*') {
6173         if (ret != NULL) {
6174             ret->ocur = XML_ELEMENT_CONTENT_MULT;
6175             cur = ret;
6176             /*
6177              * Some normalization:
6178              * (a | b* | c?)* == (a | b | c)*
6179              */
6180             while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6181                 if ((cur->c1 != NULL) &&
6182                     ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6183                      (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6184                     cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6185                 if ((cur->c2 != NULL) &&
6186                     ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6187                      (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6188                     cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6189                 cur = cur->c2;
6190             }
6191         }
6192         NEXT;
6193     } else if (RAW == '+') {
6194         if (ret != NULL) {
6195             int found = 0;
6196
6197             if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6198                 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6199                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6200             else
6201                 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6202             /*
6203              * Some normalization:
6204              * (a | b*)+ == (a | b)*
6205              * (a | b?)+ == (a | b)*
6206              */
6207             while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6208                 if ((cur->c1 != NULL) &&
6209                     ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6210                      (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6211                     cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6212                     found = 1;
6213                 }
6214                 if ((cur->c2 != NULL) &&
6215                     ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6216                      (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6217                     cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6218                     found = 1;
6219                 }
6220                 cur = cur->c2;
6221             }
6222             if (found)
6223                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6224         }
6225         NEXT;
6226     }
6227     return(ret);
6228 }
6229
6230 /**
6231  * xmlParseElementChildrenContentDecl:
6232  * @ctxt:  an XML parser context
6233  * @inputchk:  the input used for the current entity, needed for boundary checks
6234  *
6235  * parse the declaration for a Mixed Element content
6236  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6237  *
6238  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6239  *
6240  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6241  *
6242  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6243  *
6244  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6245  *
6246  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6247  * TODO Parameter-entity replacement text must be properly nested
6248  *      with parenthesized groups. That is to say, if either of the
6249  *      opening or closing parentheses in a choice, seq, or Mixed
6250  *      construct is contained in the replacement text for a parameter
6251  *      entity, both must be contained in the same replacement text. For
6252  *      interoperability, if a parameter-entity reference appears in a
6253  *      choice, seq, or Mixed construct, its replacement text should not
6254  *      be empty, and neither the first nor last non-blank character of
6255  *      the replacement text should be a connector (| or ,).
6256  *
6257  * Returns the tree of xmlElementContentPtr describing the element
6258  *          hierarchy.
6259  */
6260 xmlElementContentPtr
6261 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6262     /* stub left for API/ABI compat */
6263     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6264 }
6265
6266 /**
6267  * xmlParseElementContentDecl:
6268  * @ctxt:  an XML parser context
6269  * @name:  the name of the element being defined.
6270  * @result:  the Element Content pointer will be stored here if any
6271  *
6272  * parse the declaration for an Element content either Mixed or Children,
6273  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6274  *
6275  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6276  *
6277  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6278  */
6279
6280 int
6281 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6282                            xmlElementContentPtr *result) {
6283
6284     xmlElementContentPtr tree = NULL;
6285     int inputid = ctxt->input->id;
6286     int res;
6287
6288     *result = NULL;
6289
6290     if (RAW != '(') {
6291         xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6292                 "xmlParseElementContentDecl : %s '(' expected\n", name);
6293         return(-1);
6294     }
6295     NEXT;
6296     GROW;
6297     SKIP_BLANKS;
6298     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6299         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6300         res = XML_ELEMENT_TYPE_MIXED;
6301     } else {
6302         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6303         res = XML_ELEMENT_TYPE_ELEMENT;
6304     }
6305     SKIP_BLANKS;
6306     *result = tree;
6307     return(res);
6308 }
6309
6310 /**
6311  * xmlParseElementDecl:
6312  * @ctxt:  an XML parser context
6313  *
6314  * parse an Element declaration.
6315  *
6316  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6317  *
6318  * [ VC: Unique Element Type Declaration ]
6319  * No element type may be declared more than once
6320  *
6321  * Returns the type of the element, or -1 in case of error
6322  */
6323 int
6324 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6325     const xmlChar *name;
6326     int ret = -1;
6327     xmlElementContentPtr content  = NULL;
6328
6329     /* GROW; done in the caller */
6330     if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6331         xmlParserInputPtr input = ctxt->input;
6332
6333         SKIP(9);
6334         if (!IS_BLANK_CH(CUR)) {
6335             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6336                            "Space required after 'ELEMENT'\n");
6337         }
6338         SKIP_BLANKS;
6339         name = xmlParseName(ctxt);
6340         if (name == NULL) {
6341             xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6342                            "xmlParseElementDecl: no name for Element\n");
6343             return(-1);
6344         }
6345         while ((RAW == 0) && (ctxt->inputNr > 1))
6346             xmlPopInput(ctxt);
6347         if (!IS_BLANK_CH(CUR)) {
6348             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6349                            "Space required after the element name\n");
6350         }
6351         SKIP_BLANKS;
6352         if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6353             SKIP(5);
6354             /*
6355              * Element must always be empty.
6356              */
6357             ret = XML_ELEMENT_TYPE_EMPTY;
6358         } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6359                    (NXT(2) == 'Y')) {
6360             SKIP(3);
6361             /*
6362              * Element is a generic container.
6363              */
6364             ret = XML_ELEMENT_TYPE_ANY;
6365         } else if (RAW == '(') {
6366             ret = xmlParseElementContentDecl(ctxt, name, &content);
6367         } else {
6368             /*
6369              * [ WFC: PEs in Internal Subset ] error handling.
6370              */
6371             if ((RAW == '%') && (ctxt->external == 0) &&
6372                 (ctxt->inputNr == 1)) {
6373                 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6374           "PEReference: forbidden within markup decl in internal subset\n");
6375             } else {
6376                 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6377                       "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6378             }
6379             return(-1);
6380         }
6381
6382         SKIP_BLANKS;
6383         /*
6384          * Pop-up of finished entities.
6385          */
6386         while ((RAW == 0) && (ctxt->inputNr > 1))
6387             xmlPopInput(ctxt);
6388         SKIP_BLANKS;
6389
6390         if (RAW != '>') {
6391             xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6392             if (content != NULL) {
6393                 xmlFreeDocElementContent(ctxt->myDoc, content);
6394             }
6395         } else {
6396             if (input != ctxt->input) {
6397                 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6398     "Element declaration doesn't start and stop in the same entity\n");
6399             }
6400
6401             NEXT;
6402             if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6403                 (ctxt->sax->elementDecl != NULL)) {
6404                 if (content != NULL)
6405                     content->parent = NULL;
6406                 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6407                                        content);
6408                 if ((content != NULL) && (content->parent == NULL)) {
6409                     /*
6410                      * this is a trick: if xmlAddElementDecl is called,
6411                      * instead of copying the full tree it is plugged directly
6412                      * if called from the parser. Avoid duplicating the
6413                      * interfaces or change the API/ABI
6414                      */
6415                     xmlFreeDocElementContent(ctxt->myDoc, content);
6416                 }
6417             } else if (content != NULL) {
6418                 xmlFreeDocElementContent(ctxt->myDoc, content);
6419             }
6420         }
6421     }
6422     return(ret);
6423 }
6424
6425 /**
6426  * xmlParseConditionalSections
6427  * @ctxt:  an XML parser context
6428  *
6429  * [61] conditionalSect ::= includeSect | ignoreSect
6430  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6431  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6432  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6433  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6434  */
6435
6436 static void
6437 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6438     int id = ctxt->input->id;
6439
6440     SKIP(3);
6441     SKIP_BLANKS;
6442     if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6443         SKIP(7);
6444         SKIP_BLANKS;
6445         if (RAW != '[') {
6446             xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6447         } else {
6448             if (ctxt->input->id != id) {
6449                 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6450             "All markup of the conditional section is not in the same entity\n",
6451                                      NULL, NULL);
6452             }
6453             NEXT;
6454         }
6455         if (xmlParserDebugEntities) {
6456             if ((ctxt->input != NULL) && (ctxt->input->filename))
6457                 xmlGenericError(xmlGenericErrorContext,
6458                         "%s(%d): ", ctxt->input->filename,
6459                         ctxt->input->line);
6460             xmlGenericError(xmlGenericErrorContext,
6461                     "Entering INCLUDE Conditional Section\n");
6462         }
6463
6464         while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6465                (NXT(2) != '>'))) {
6466             const xmlChar *check = CUR_PTR;
6467             unsigned int cons = ctxt->input->consumed;
6468
6469             if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6470                 xmlParseConditionalSections(ctxt);
6471             } else if (IS_BLANK_CH(CUR)) {
6472                 NEXT;
6473             } else if (RAW == '%') {
6474                 xmlParsePEReference(ctxt);
6475             } else
6476                 xmlParseMarkupDecl(ctxt);
6477
6478             /*
6479              * Pop-up of finished entities.
6480              */
6481             while ((RAW == 0) && (ctxt->inputNr > 1))
6482                 xmlPopInput(ctxt);
6483
6484             if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6485                 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6486                 break;
6487             }
6488         }
6489         if (xmlParserDebugEntities) {
6490             if ((ctxt->input != NULL) && (ctxt->input->filename))
6491                 xmlGenericError(xmlGenericErrorContext,
6492                         "%s(%d): ", ctxt->input->filename,
6493                         ctxt->input->line);
6494             xmlGenericError(xmlGenericErrorContext,
6495                     "Leaving INCLUDE Conditional Section\n");
6496         }
6497
6498     } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6499         int state;
6500         xmlParserInputState instate;
6501         int depth = 0;
6502
6503         SKIP(6);
6504         SKIP_BLANKS;
6505         if (RAW != '[') {
6506             xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6507         } else {
6508             if (ctxt->input->id != id) {
6509                 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6510             "All markup of the conditional section is not in the same entity\n",
6511                                      NULL, NULL);
6512             }
6513             NEXT;
6514         }
6515         if (xmlParserDebugEntities) {
6516             if ((ctxt->input != NULL) && (ctxt->input->filename))
6517                 xmlGenericError(xmlGenericErrorContext,
6518                         "%s(%d): ", ctxt->input->filename,
6519                         ctxt->input->line);
6520             xmlGenericError(xmlGenericErrorContext,
6521                     "Entering IGNORE Conditional Section\n");
6522         }
6523
6524         /*
6525          * Parse up to the end of the conditional section
6526          * But disable SAX event generating DTD building in the meantime
6527          */
6528         state = ctxt->disableSAX;
6529         instate = ctxt->instate;
6530         if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6531         ctxt->instate = XML_PARSER_IGNORE;
6532
6533         while ((depth >= 0) && (RAW != 0)) {
6534           if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6535             depth++;
6536             SKIP(3);
6537             continue;
6538           }
6539           if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6540             if (--depth >= 0) SKIP(3);
6541             continue;
6542           }
6543           NEXT;
6544           continue;
6545         }
6546
6547         ctxt->disableSAX = state;
6548         ctxt->instate = instate;
6549
6550         if (xmlParserDebugEntities) {
6551             if ((ctxt->input != NULL) && (ctxt->input->filename))
6552                 xmlGenericError(xmlGenericErrorContext,
6553                         "%s(%d): ", ctxt->input->filename,
6554                         ctxt->input->line);
6555             xmlGenericError(xmlGenericErrorContext,
6556                     "Leaving IGNORE Conditional Section\n");
6557         }
6558
6559     } else {
6560         xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6561     }
6562
6563     if (RAW == 0)
6564         SHRINK;
6565
6566     if (RAW == 0) {
6567         xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6568     } else {
6569         if (ctxt->input->id != id) {
6570             xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6571         "All markup of the conditional section is not in the same entity\n",
6572                                  NULL, NULL);
6573         }
6574         SKIP(3);
6575     }
6576 }
6577
6578 /**
6579  * xmlParseMarkupDecl:
6580  * @ctxt:  an XML parser context
6581  *
6582  * parse Markup declarations
6583  *
6584  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6585  *                     NotationDecl | PI | Comment
6586  *
6587  * [ VC: Proper Declaration/PE Nesting ]
6588  * Parameter-entity replacement text must be properly nested with
6589  * markup declarations. That is to say, if either the first character
6590  * or the last character of a markup declaration (markupdecl above) is
6591  * contained in the replacement text for a parameter-entity reference,
6592  * both must be contained in the same replacement text.
6593  *
6594  * [ WFC: PEs in Internal Subset ]
6595  * In the internal DTD subset, parameter-entity references can occur
6596  * only where markup declarations can occur, not within markup declarations.
6597  * (This does not apply to references that occur in external parameter
6598  * entities or to the external subset.)
6599  */
6600 void
6601 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6602     GROW;
6603     if (CUR == '<') {
6604         if (NXT(1) == '!') {
6605             switch (NXT(2)) {
6606                 case 'E':
6607                     if (NXT(3) == 'L')
6608                         xmlParseElementDecl(ctxt);
6609                     else if (NXT(3) == 'N')
6610                         xmlParseEntityDecl(ctxt);
6611                     break;
6612                 case 'A':
6613                     xmlParseAttributeListDecl(ctxt);
6614                     break;
6615                 case 'N':
6616                     xmlParseNotationDecl(ctxt);
6617                     break;
6618                 case '-':
6619                     xmlParseComment(ctxt);
6620                     break;
6621                 default:
6622                     /* there is an error but it will be detected later */
6623                     break;
6624             }
6625         } else if (NXT(1) == '?') {
6626             xmlParsePI(ctxt);
6627         }
6628     }
6629     /*
6630      * This is only for internal subset. On external entities,
6631      * the replacement is done before parsing stage
6632      */
6633     if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6634         xmlParsePEReference(ctxt);
6635
6636     /*
6637      * Conditional sections are allowed from entities included
6638      * by PE References in the internal subset.
6639      */
6640     if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6641         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6642             xmlParseConditionalSections(ctxt);
6643         }
6644     }
6645
6646     ctxt->instate = XML_PARSER_DTD;
6647 }
6648
6649 /**
6650  * xmlParseTextDecl:
6651  * @ctxt:  an XML parser context
6652  *
6653  * parse an XML declaration header for external entities
6654  *
6655  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6656  */
6657
6658 void
6659 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6660     xmlChar *version;
6661     const xmlChar *encoding;
6662
6663     /*
6664      * We know that '<?xml' is here.
6665      */
6666     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6667         SKIP(5);
6668     } else {
6669         xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6670         return;
6671     }
6672
6673     if (!IS_BLANK_CH(CUR)) {
6674         xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6675                        "Space needed after '<?xml'\n");
6676     }
6677     SKIP_BLANKS;
6678
6679     /*
6680      * We may have the VersionInfo here.
6681      */
6682     version = xmlParseVersionInfo(ctxt);
6683     if (version == NULL)
6684         version = xmlCharStrdup(XML_DEFAULT_VERSION);
6685     else {
6686         if (!IS_BLANK_CH(CUR)) {
6687             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6688                            "Space needed here\n");
6689         }
6690     }
6691     ctxt->input->version = version;
6692
6693     /*
6694      * We must have the encoding declaration
6695      */
6696     encoding = xmlParseEncodingDecl(ctxt);
6697     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6698         /*
6699          * The XML REC instructs us to stop parsing right here
6700          */
6701         return;
6702     }
6703     if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6704         xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6705                        "Missing encoding in text declaration\n");
6706     }
6707
6708     SKIP_BLANKS;
6709     if ((RAW == '?') && (NXT(1) == '>')) {
6710         SKIP(2);
6711     } else if (RAW == '>') {
6712         /* Deprecated old WD ... */
6713         xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6714         NEXT;
6715     } else {
6716         xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6717         MOVETO_ENDTAG(CUR_PTR);
6718         NEXT;
6719     }
6720 }
6721
6722 /**
6723  * xmlParseExternalSubset:
6724  * @ctxt:  an XML parser context
6725  * @ExternalID: the external identifier
6726  * @SystemID: the system identifier (or URL)
6727  *
6728  * parse Markup declarations from an external subset
6729  *
6730  * [30] extSubset ::= textDecl? extSubsetDecl
6731  *
6732  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6733  */
6734 void
6735 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6736                        const xmlChar *SystemID) {
6737     xmlDetectSAX2(ctxt);
6738     GROW;
6739
6740     if ((ctxt->encoding == NULL) &&
6741         (ctxt->input->end - ctxt->input->cur >= 4)) {
6742         xmlChar start[4];
6743         xmlCharEncoding enc;
6744
6745         start[0] = RAW;
6746         start[1] = NXT(1);
6747         start[2] = NXT(2);
6748         start[3] = NXT(3);
6749         enc = xmlDetectCharEncoding(start, 4);
6750         if (enc != XML_CHAR_ENCODING_NONE)
6751             xmlSwitchEncoding(ctxt, enc);
6752     }
6753
6754     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6755         xmlParseTextDecl(ctxt);
6756         if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6757             /*
6758              * The XML REC instructs us to stop parsing right here
6759              */
6760             ctxt->instate = XML_PARSER_EOF;
6761             return;
6762         }
6763     }
6764     if (ctxt->myDoc == NULL) {
6765         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6766         if (ctxt->myDoc == NULL) {
6767             xmlErrMemory(ctxt, "New Doc failed");
6768             return;
6769         }
6770         ctxt->myDoc->properties = XML_DOC_INTERNAL;
6771     }
6772     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6773         xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6774
6775     ctxt->instate = XML_PARSER_DTD;
6776     ctxt->external = 1;
6777     while (((RAW == '<') && (NXT(1) == '?')) ||
6778            ((RAW == '<') && (NXT(1) == '!')) ||
6779            (RAW == '%') || IS_BLANK_CH(CUR)) {
6780         const xmlChar *check = CUR_PTR;
6781         unsigned int cons = ctxt->input->consumed;
6782
6783         GROW;
6784         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6785             xmlParseConditionalSections(ctxt);
6786         } else if (IS_BLANK_CH(CUR)) {
6787             NEXT;
6788         } else if (RAW == '%') {
6789             xmlParsePEReference(ctxt);
6790         } else
6791             xmlParseMarkupDecl(ctxt);
6792
6793         /*
6794          * Pop-up of finished entities.
6795          */
6796         while ((RAW == 0) && (ctxt->inputNr > 1))
6797             xmlPopInput(ctxt);
6798
6799         if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6800             xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6801             break;
6802         }
6803     }
6804
6805     if (RAW != 0) {
6806         xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6807     }
6808
6809 }
6810
6811 /**
6812  * xmlParseReference:
6813  * @ctxt:  an XML parser context
6814  *
6815  * parse and handle entity references in content, depending on the SAX
6816  * interface, this may end-up in a call to character() if this is a
6817  * CharRef, a predefined entity, if there is no reference() callback.
6818  * or if the parser was asked to switch to that mode.
6819  *
6820  * [67] Reference ::= EntityRef | CharRef
6821  */
6822 void
6823 xmlParseReference(xmlParserCtxtPtr ctxt) {
6824     xmlEntityPtr ent;
6825     xmlChar *val;
6826     int was_checked;
6827     xmlNodePtr list = NULL;
6828     xmlParserErrors ret = XML_ERR_OK;
6829
6830
6831     if (RAW != '&')
6832         return;
6833
6834     /*
6835      * Simple case of a CharRef
6836      */
6837     if (NXT(1) == '#') {
6838         int i = 0;
6839         xmlChar out[10];
6840         int hex = NXT(2);
6841         int value = xmlParseCharRef(ctxt);
6842
6843         if (value == 0)
6844             return;
6845         if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6846             /*
6847              * So we are using non-UTF-8 buffers
6848              * Check that the char fit on 8bits, if not
6849              * generate a CharRef.
6850              */
6851             if (value <= 0xFF) {
6852                 out[0] = value;
6853                 out[1] = 0;
6854                 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6855                     (!ctxt->disableSAX))
6856                     ctxt->sax->characters(ctxt->userData, out, 1);
6857             } else {
6858                 if ((hex == 'x') || (hex == 'X'))
6859                     snprintf((char *)out, sizeof(out), "#x%X", value);
6860                 else
6861                     snprintf((char *)out, sizeof(out), "#%d", value);
6862                 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6863                     (!ctxt->disableSAX))
6864                     ctxt->sax->reference(ctxt->userData, out);
6865             }
6866         } else {
6867             /*
6868              * Just encode the value in UTF-8
6869              */
6870             COPY_BUF(0 ,out, i, value);
6871             out[i] = 0;
6872             if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6873                 (!ctxt->disableSAX))
6874                 ctxt->sax->characters(ctxt->userData, out, i);
6875         }
6876         return;
6877     }
6878
6879     /*
6880      * We are seeing an entity reference
6881      */
6882     ent = xmlParseEntityRef(ctxt);
6883     if (ent == NULL) return;
6884     if (!ctxt->wellFormed)
6885         return;
6886     was_checked = ent->checked;
6887
6888     /* special case of predefined entities */
6889     if ((ent->name == NULL) ||
6890         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6891         val = ent->content;
6892         if (val == NULL) return;
6893         /*
6894          * inline the entity.
6895          */
6896         if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6897             (!ctxt->disableSAX))
6898             ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6899         return;
6900     }
6901
6902     /*
6903      * The first reference to the entity trigger a parsing phase
6904      * where the ent->children is filled with the result from
6905      * the parsing.
6906      */
6907     if (ent->checked == 0) {
6908         unsigned long oldnbent = ctxt->nbentities;
6909
6910         /*
6911          * This is a bit hackish but this seems the best
6912          * way to make sure both SAX and DOM entity support
6913          * behaves okay.
6914          */
6915         void *user_data;
6916         if (ctxt->userData == ctxt)
6917             user_data = NULL;
6918         else
6919             user_data = ctxt->userData;
6920
6921         /*
6922          * Check that this entity is well formed
6923          * 4.3.2: An internal general parsed entity is well-formed
6924          * if its replacement text matches the production labeled
6925          * content.
6926          */
6927         if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6928             ctxt->depth++;
6929             ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6930                                                       user_data, &list);
6931             ctxt->depth--;
6932
6933         } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6934             ctxt->depth++;
6935             ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6936                                            user_data, ctxt->depth, ent->URI,
6937                                            ent->ExternalID, &list);
6938             ctxt->depth--;
6939         } else {
6940             ret = XML_ERR_ENTITY_PE_INTERNAL;
6941             xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6942                          "invalid entity type found\n", NULL);
6943         }
6944
6945         /*
6946          * Store the number of entities needing parsing for this entity
6947          * content and do checkings
6948          */
6949         ent->checked = ctxt->nbentities - oldnbent;
6950         if (ret == XML_ERR_ENTITY_LOOP) {
6951             xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6952             xmlFreeNodeList(list);
6953             return;
6954         }
6955         if (xmlParserEntityCheck(ctxt, 0, ent)) {
6956             xmlFreeNodeList(list);
6957             return;
6958         }
6959
6960         if ((ret == XML_ERR_OK) && (list != NULL)) {
6961             if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6962              (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6963                 (ent->children == NULL)) {
6964                 ent->children = list;
6965                 if (ctxt->replaceEntities) {
6966                     /*
6967                      * Prune it directly in the generated document
6968                      * except for single text nodes.
6969                      */
6970                     if (((list->type == XML_TEXT_NODE) &&
6971                          (list->next == NULL)) ||
6972                         (ctxt->parseMode == XML_PARSE_READER)) {
6973                         list->parent = (xmlNodePtr) ent;
6974                         list = NULL;
6975                         ent->owner = 1;
6976                     } else {
6977                         ent->owner = 0;
6978                         while (list != NULL) {
6979                             list->parent = (xmlNodePtr) ctxt->node;
6980                             list->doc = ctxt->myDoc;
6981                             if (list->next == NULL)
6982                                 ent->last = list;
6983                             list = list->next;
6984                         }
6985                         list = ent->children;
6986 #ifdef LIBXML_LEGACY_ENABLED
6987                         if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6988                           xmlAddEntityReference(ent, list, NULL);
6989 #endif /* LIBXML_LEGACY_ENABLED */
6990                     }
6991                 } else {
6992                     ent->owner = 1;
6993                     while (list != NULL) {
6994                         list->parent = (xmlNodePtr) ent;
6995                         if (list->next == NULL)
6996                             ent->last = list;
6997                         list = list->next;
6998                     }
6999                 }
7000             } else {
7001                 xmlFreeNodeList(list);
7002                 list = NULL;
7003             }
7004         } else if ((ret != XML_ERR_OK) &&
7005                    (ret != XML_WAR_UNDECLARED_ENTITY)) {
7006             xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7007                      "Entity '%s' failed to parse\n", ent->name);
7008         } else if (list != NULL) {
7009             xmlFreeNodeList(list);
7010             list = NULL;
7011         }
7012         if (ent->checked == 0)
7013             ent->checked = 1;
7014     } else if (ent->checked != 1) {
7015         ctxt->nbentities += ent->checked;
7016     }
7017
7018     /*
7019      * Now that the entity content has been gathered
7020      * provide it to the application, this can take different forms based
7021      * on the parsing modes.
7022      */
7023     if (ent->children == NULL) {
7024         /*
7025          * Probably running in SAX mode and the callbacks don't
7026          * build the entity content. So unless we already went
7027          * though parsing for first checking go though the entity
7028          * content to generate callbacks associated to the entity
7029          */
7030         if (was_checked != 0) {
7031             void *user_data;
7032             /*
7033              * This is a bit hackish but this seems the best
7034              * way to make sure both SAX and DOM entity support
7035              * behaves okay.
7036              */
7037             if (ctxt->userData == ctxt)
7038                 user_data = NULL;
7039             else
7040                 user_data = ctxt->userData;
7041
7042             if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7043                 ctxt->depth++;
7044                 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7045                                    ent->content, user_data, NULL);
7046                 ctxt->depth--;
7047             } else if (ent->etype ==
7048                        XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7049                 ctxt->depth++;
7050                 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7051                            ctxt->sax, user_data, ctxt->depth,
7052                            ent->URI, ent->ExternalID, NULL);
7053                 ctxt->depth--;
7054             } else {
7055                 ret = XML_ERR_ENTITY_PE_INTERNAL;
7056                 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7057                              "invalid entity type found\n", NULL);
7058             }
7059             if (ret == XML_ERR_ENTITY_LOOP) {
7060                 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7061                 return;
7062             }
7063         }
7064         if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7065             (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7066             /*
7067              * Entity reference callback comes second, it's somewhat
7068              * superfluous but a compatibility to historical behaviour
7069              */
7070             ctxt->sax->reference(ctxt->userData, ent->name);
7071         }
7072         return;
7073     }
7074
7075     /*
7076      * If we didn't get any children for the entity being built
7077      */
7078     if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7079         (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7080         /*
7081          * Create a node.
7082          */
7083         ctxt->sax->reference(ctxt->userData, ent->name);
7084         return;
7085     }
7086
7087     if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7088         /*
7089          * There is a problem on the handling of _private for entities
7090          * (bug 155816): Should we copy the content of the field from
7091          * the entity (possibly overwriting some value set by the user
7092          * when a copy is created), should we leave it alone, or should
7093          * we try to take care of different situations?  The problem
7094          * is exacerbated by the usage of this field by the xmlReader.
7095          * To fix this bug, we look at _private on the created node
7096          * and, if it's NULL, we copy in whatever was in the entity.
7097          * If it's not NULL we leave it alone.  This is somewhat of a
7098          * hack - maybe we should have further tests to determine
7099          * what to do.
7100          */
7101         if ((ctxt->node != NULL) && (ent->children != NULL)) {
7102             /*
7103              * Seems we are generating the DOM content, do
7104              * a simple tree copy for all references except the first
7105              * In the first occurrence list contains the replacement.
7106              * progressive == 2 means we are operating on the Reader
7107              * and since nodes are discarded we must copy all the time.
7108              */
7109             if (((list == NULL) && (ent->owner == 0)) ||
7110                 (ctxt->parseMode == XML_PARSE_READER)) {
7111                 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7112
7113                 /*
7114                  * when operating on a reader, the entities definitions
7115                  * are always owning the entities subtree.
7116                 if (ctxt->parseMode == XML_PARSE_READER)
7117                     ent->owner = 1;
7118                  */
7119
7120                 cur = ent->children;
7121                 while (cur != NULL) {
7122                     nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7123                     if (nw != NULL) {
7124                         if (nw->_private == NULL)
7125                             nw->_private = cur->_private;
7126                         if (firstChild == NULL){
7127                             firstChild = nw;
7128                         }
7129                         nw = xmlAddChild(ctxt->node, nw);
7130                     }
7131                     if (cur == ent->last) {
7132                         /*
7133                          * needed to detect some strange empty
7134                          * node cases in the reader tests
7135                          */
7136                         if ((ctxt->parseMode == XML_PARSE_READER) &&
7137                             (nw != NULL) &&
7138                             (nw->type == XML_ELEMENT_NODE) &&
7139                             (nw->children == NULL))
7140                             nw->extra = 1;
7141
7142                         break;
7143                     }
7144                     cur = cur->next;
7145                 }
7146 #ifdef LIBXML_LEGACY_ENABLED
7147                 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7148                   xmlAddEntityReference(ent, firstChild, nw);
7149 #endif /* LIBXML_LEGACY_ENABLED */
7150             } else if (list == NULL) {
7151                 xmlNodePtr nw = NULL, cur, next, last,
7152                            firstChild = NULL;
7153                 /*
7154                  * Copy the entity child list and make it the new
7155                  * entity child list. The goal is to make sure any
7156                  * ID or REF referenced will be the one from the
7157                  * document content and not the entity copy.
7158                  */
7159                 cur = ent->children;
7160                 ent->children = NULL;
7161                 last = ent->last;
7162                 ent->last = NULL;
7163                 while (cur != NULL) {
7164                     next = cur->next;
7165                     cur->next = NULL;
7166                     cur->parent = NULL;
7167                     nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7168                     if (nw != NULL) {
7169                         if (nw->_private == NULL)
7170                             nw->_private = cur->_private;
7171                         if (firstChild == NULL){
7172                             firstChild = cur;
7173                         }
7174                         xmlAddChild((xmlNodePtr) ent, nw);
7175                         xmlAddChild(ctxt->node, cur);
7176                     }
7177                     if (cur == last)
7178                         break;
7179                     cur = next;
7180                 }
7181                 if (ent->owner == 0)
7182                     ent->owner = 1;
7183 #ifdef LIBXML_LEGACY_ENABLED
7184                 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7185                   xmlAddEntityReference(ent, firstChild, nw);
7186 #endif /* LIBXML_LEGACY_ENABLED */
7187             } else {
7188                 const xmlChar *nbktext;
7189
7190                 /*
7191                  * the name change is to avoid coalescing of the
7192                  * node with a possible previous text one which
7193                  * would make ent->children a dangling pointer
7194                  */
7195                 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7196                                         -1);
7197                 if (ent->children->type == XML_TEXT_NODE)
7198                     ent->children->name = nbktext;
7199                 if ((ent->last != ent->children) &&
7200                     (ent->last->type == XML_TEXT_NODE))
7201                     ent->last->name = nbktext;
7202                 xmlAddChildList(ctxt->node, ent->children);
7203             }
7204
7205             /*
7206              * This is to avoid a nasty side effect, see
7207              * characters() in SAX.c
7208              */
7209             ctxt->nodemem = 0;
7210             ctxt->nodelen = 0;
7211             return;
7212         }
7213     }
7214 }
7215
7216 /**
7217  * xmlParseEntityRef:
7218  * @ctxt:  an XML parser context
7219  *
7220  * parse ENTITY references declarations
7221  *
7222  * [68] EntityRef ::= '&' Name ';'
7223  *
7224  * [ WFC: Entity Declared ]
7225  * In a document without any DTD, a document with only an internal DTD
7226  * subset which contains no parameter entity references, or a document
7227  * with "standalone='yes'", the Name given in the entity reference
7228  * must match that in an entity declaration, except that well-formed
7229  * documents need not declare any of the following entities: amp, lt,
7230  * gt, apos, quot.  The declaration of a parameter entity must precede
7231  * any reference to it.  Similarly, the declaration of a general entity
7232  * must precede any reference to it which appears in a default value in an
7233  * attribute-list declaration. Note that if entities are declared in the
7234  * external subset or in external parameter entities, a non-validating
7235  * processor is not obligated to read and process their declarations;
7236  * for such documents, the rule that an entity must be declared is a
7237  * well-formedness constraint only if standalone='yes'.
7238  *
7239  * [ WFC: Parsed Entity ]
7240  * An entity reference must not contain the name of an unparsed entity
7241  *
7242  * Returns the xmlEntityPtr if found, or NULL otherwise.
7243  */
7244 xmlEntityPtr
7245 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7246     const xmlChar *name;
7247     xmlEntityPtr ent = NULL;
7248
7249     GROW;
7250
7251     if (RAW != '&')
7252         return(NULL);
7253     NEXT;
7254     name = xmlParseName(ctxt);
7255     if (name == NULL) {
7256         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7257                        "xmlParseEntityRef: no name\n");
7258         return(NULL);
7259     }
7260     if (RAW != ';') {
7261         xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7262         return(NULL);
7263     }
7264     NEXT;
7265
7266     /*
7267      * Predefined entites override any extra definition
7268      */
7269     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7270         ent = xmlGetPredefinedEntity(name);
7271         if (ent != NULL)
7272             return(ent);
7273     }
7274
7275     /*
7276      * Increate the number of entity references parsed
7277      */
7278     ctxt->nbentities++;
7279
7280     /*
7281      * Ask first SAX for entity resolution, otherwise try the
7282      * entities which may have stored in the parser context.
7283      */
7284     if (ctxt->sax != NULL) {
7285         if (ctxt->sax->getEntity != NULL)
7286             ent = ctxt->sax->getEntity(ctxt->userData, name);
7287         if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7288             (ctxt->options & XML_PARSE_OLDSAX))
7289             ent = xmlGetPredefinedEntity(name);
7290         if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7291             (ctxt->userData==ctxt)) {
7292             ent = xmlSAX2GetEntity(ctxt, name);
7293         }
7294     }
7295     /*
7296      * [ WFC: Entity Declared ]
7297      * In a document without any DTD, a document with only an
7298      * internal DTD subset which contains no parameter entity
7299      * references, or a document with "standalone='yes'", the
7300      * Name given in the entity reference must match that in an
7301      * entity declaration, except that well-formed documents
7302      * need not declare any of the following entities: amp, lt,
7303      * gt, apos, quot.
7304      * The declaration of a parameter entity must precede any
7305      * reference to it.
7306      * Similarly, the declaration of a general entity must
7307      * precede any reference to it which appears in a default
7308      * value in an attribute-list declaration. Note that if
7309      * entities are declared in the external subset or in
7310      * external parameter entities, a non-validating processor
7311      * is not obligated to read and process their declarations;
7312      * for such documents, the rule that an entity must be
7313      * declared is a well-formedness constraint only if
7314      * standalone='yes'.
7315      */
7316     if (ent == NULL) {
7317         if ((ctxt->standalone == 1) ||
7318             ((ctxt->hasExternalSubset == 0) &&
7319              (ctxt->hasPErefs == 0))) {
7320             xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7321                      "Entity '%s' not defined\n", name);
7322         } else {
7323             xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7324                      "Entity '%s' not defined\n", name);
7325             if ((ctxt->inSubset == 0) &&
7326                 (ctxt->sax != NULL) &&
7327                 (ctxt->sax->reference != NULL)) {
7328                 ctxt->sax->reference(ctxt->userData, name);
7329             }
7330         }
7331         ctxt->valid = 0;
7332     }
7333
7334     /*
7335      * [ WFC: Parsed Entity ]
7336      * An entity reference must not contain the name of an
7337      * unparsed entity
7338      */
7339     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7340         xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7341                  "Entity reference to unparsed entity %s\n", name);
7342     }
7343
7344     /*
7345      * [ WFC: No External Entity References ]
7346      * Attribute values cannot contain direct or indirect
7347      * entity references to external entities.
7348      */
7349     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7350              (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7351         xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7352              "Attribute references external entity '%s'\n", name);
7353     }
7354     /*
7355      * [ WFC: No < in Attribute Values ]
7356      * The replacement text of any entity referred to directly or
7357      * indirectly in an attribute value (other than "&lt;") must
7358      * not contain a <.
7359      */
7360     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7361              (ent != NULL) && (ent->content != NULL) &&
7362              (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7363              (xmlStrchr(ent->content, '<'))) {
7364         xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7365     "'<' in entity '%s' is not allowed in attributes values\n", name);
7366     }
7367
7368     /*
7369      * Internal check, no parameter entities here ...
7370      */
7371     else {
7372         switch (ent->etype) {
7373             case XML_INTERNAL_PARAMETER_ENTITY:
7374             case XML_EXTERNAL_PARAMETER_ENTITY:
7375             xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7376              "Attempt to reference the parameter entity '%s'\n",
7377                               name);
7378             break;
7379             default:
7380             break;
7381         }
7382     }
7383
7384     /*
7385      * [ WFC: No Recursion ]
7386      * A parsed entity must not contain a recursive reference
7387      * to itself, either directly or indirectly.
7388      * Done somewhere else
7389      */
7390     return(ent);
7391 }
7392
7393 /**
7394  * xmlParseStringEntityRef:
7395  * @ctxt:  an XML parser context
7396  * @str:  a pointer to an index in the string
7397  *
7398  * parse ENTITY references declarations, but this version parses it from
7399  * a string value.
7400  *
7401  * [68] EntityRef ::= '&' Name ';'
7402  *
7403  * [ WFC: Entity Declared ]
7404  * In a document without any DTD, a document with only an internal DTD
7405  * subset which contains no parameter entity references, or a document
7406  * with "standalone='yes'", the Name given in the entity reference
7407  * must match that in an entity declaration, except that well-formed
7408  * documents need not declare any of the following entities: amp, lt,
7409  * gt, apos, quot.  The declaration of a parameter entity must precede
7410  * any reference to it.  Similarly, the declaration of a general entity
7411  * must precede any reference to it which appears in a default value in an
7412  * attribute-list declaration. Note that if entities are declared in the
7413  * external subset or in external parameter entities, a non-validating
7414  * processor is not obligated to read and process their declarations;
7415  * for such documents, the rule that an entity must be declared is a
7416  * well-formedness constraint only if standalone='yes'.
7417  *
7418  * [ WFC: Parsed Entity ]
7419  * An entity reference must not contain the name of an unparsed entity
7420  *
7421  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7422  * is updated to the current location in the string.
7423  */
7424 static xmlEntityPtr
7425 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7426     xmlChar *name;
7427     const xmlChar *ptr;
7428     xmlChar cur;
7429     xmlEntityPtr ent = NULL;
7430
7431     if ((str == NULL) || (*str == NULL))
7432         return(NULL);
7433     ptr = *str;
7434     cur = *ptr;
7435     if (cur != '&')
7436         return(NULL);
7437
7438     ptr++;
7439     name = xmlParseStringName(ctxt, &ptr);
7440     if (name == NULL) {
7441         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7442                        "xmlParseStringEntityRef: no name\n");
7443         *str = ptr;
7444         return(NULL);
7445     }
7446     if (*ptr != ';') {
7447         xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7448         xmlFree(name);
7449         *str = ptr;
7450         return(NULL);
7451     }
7452     ptr++;
7453
7454
7455     /*
7456      * Predefined entites override any extra definition
7457      */
7458     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7459         ent = xmlGetPredefinedEntity(name);
7460         if (ent != NULL) {
7461             xmlFree(name);
7462             *str = ptr;
7463             return(ent);
7464         }
7465     }
7466
7467     /*
7468      * Increate the number of entity references parsed
7469      */
7470     ctxt->nbentities++;
7471
7472     /*
7473      * Ask first SAX for entity resolution, otherwise try the
7474      * entities which may have stored in the parser context.
7475      */
7476     if (ctxt->sax != NULL) {
7477         if (ctxt->sax->getEntity != NULL)
7478             ent = ctxt->sax->getEntity(ctxt->userData, name);
7479         if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7480             ent = xmlGetPredefinedEntity(name);
7481         if ((ent == NULL) && (ctxt->userData==ctxt)) {
7482             ent = xmlSAX2GetEntity(ctxt, name);
7483         }
7484     }
7485
7486     /*
7487      * [ WFC: Entity Declared ]
7488      * In a document without any DTD, a document with only an
7489      * internal DTD subset which contains no parameter entity
7490      * references, or a document with "standalone='yes'", the
7491      * Name given in the entity reference must match that in an
7492      * entity declaration, except that well-formed documents
7493      * need not declare any of the following entities: amp, lt,
7494      * gt, apos, quot.
7495      * The declaration of a parameter entity must precede any
7496      * reference to it.
7497      * Similarly, the declaration of a general entity must
7498      * precede any reference to it which appears in a default
7499      * value in an attribute-list declaration. Note that if
7500      * entities are declared in the external subset or in
7501      * external parameter entities, a non-validating processor
7502      * is not obligated to read and process their declarations;
7503      * for such documents, the rule that an entity must be
7504      * declared is a well-formedness constraint only if
7505      * standalone='yes'.
7506      */
7507     if (ent == NULL) {
7508         if ((ctxt->standalone == 1) ||
7509             ((ctxt->hasExternalSubset == 0) &&
7510              (ctxt->hasPErefs == 0))) {
7511             xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7512                      "Entity '%s' not defined\n", name);
7513         } else {
7514             xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7515                           "Entity '%s' not defined\n",
7516                           name);
7517         }
7518         /* TODO ? check regressions ctxt->valid = 0; */
7519     }
7520
7521     /*
7522      * [ WFC: Parsed Entity ]
7523      * An entity reference must not contain the name of an
7524      * unparsed entity
7525      */
7526     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7527         xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7528                  "Entity reference to unparsed entity %s\n", name);
7529     }
7530
7531     /*
7532      * [ WFC: No External Entity References ]
7533      * Attribute values cannot contain direct or indirect
7534      * entity references to external entities.
7535      */
7536     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7537              (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7538         xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7539          "Attribute references external entity '%s'\n", name);
7540     }
7541     /*
7542      * [ WFC: No < in Attribute Values ]
7543      * The replacement text of any entity referred to directly or
7544      * indirectly in an attribute value (other than "&lt;") must
7545      * not contain a <.
7546      */
7547     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7548              (ent != NULL) && (ent->content != NULL) &&
7549              (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7550              (xmlStrchr(ent->content, '<'))) {
7551         xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7552      "'<' in entity '%s' is not allowed in attributes values\n",
7553                           name);
7554     }
7555
7556     /*
7557      * Internal check, no parameter entities here ...
7558      */
7559     else {
7560         switch (ent->etype) {
7561             case XML_INTERNAL_PARAMETER_ENTITY:
7562             case XML_EXTERNAL_PARAMETER_ENTITY:
7563                 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7564              "Attempt to reference the parameter entity '%s'\n",
7565                                   name);
7566             break;
7567             default:
7568             break;
7569         }
7570     }
7571
7572     /*
7573      * [ WFC: No Recursion ]
7574      * A parsed entity must not contain a recursive reference
7575      * to itself, either directly or indirectly.
7576      * Done somewhere else
7577      */
7578
7579     xmlFree(name);
7580     *str = ptr;
7581     return(ent);
7582 }
7583
7584 /**
7585  * xmlParsePEReference:
7586  * @ctxt:  an XML parser context
7587  *
7588  * parse PEReference declarations
7589  * The entity content is handled directly by pushing it's content as
7590  * a new input stream.
7591  *
7592  * [69] PEReference ::= '%' Name ';'
7593  *
7594  * [ WFC: No Recursion ]
7595  * A parsed entity must not contain a recursive
7596  * reference to itself, either directly or indirectly.
7597  *
7598  * [ WFC: Entity Declared ]
7599  * In a document without any DTD, a document with only an internal DTD
7600  * subset which contains no parameter entity references, or a document
7601  * with "standalone='yes'", ...  ... The declaration of a parameter
7602  * entity must precede any reference to it...
7603  *
7604  * [ VC: Entity Declared ]
7605  * In a document with an external subset or external parameter entities
7606  * with "standalone='no'", ...  ... The declaration of a parameter entity
7607  * must precede any reference to it...
7608  *
7609  * [ WFC: In DTD ]
7610  * Parameter-entity references may only appear in the DTD.
7611  * NOTE: misleading but this is handled.
7612  */
7613 void
7614 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7615 {
7616     const xmlChar *name;
7617     xmlEntityPtr entity = NULL;
7618     xmlParserInputPtr input;
7619
7620     if (RAW != '%')
7621         return;
7622     NEXT;
7623     name = xmlParseName(ctxt);
7624     if (name == NULL) {
7625         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7626                        "xmlParsePEReference: no name\n");
7627         return;
7628     }
7629     if (RAW != ';') {
7630         xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7631         return;
7632     }
7633
7634     NEXT;
7635
7636     /*
7637      * Increate the number of entity references parsed
7638      */
7639     ctxt->nbentities++;
7640
7641     /*
7642      * Request the entity from SAX
7643      */
7644     if ((ctxt->sax != NULL) &&
7645         (ctxt->sax->getParameterEntity != NULL))
7646         entity = ctxt->sax->getParameterEntity(ctxt->userData,
7647                                                name);
7648     if (entity == NULL) {
7649         /*
7650          * [ WFC: Entity Declared ]
7651          * In a document without any DTD, a document with only an
7652          * internal DTD subset which contains no parameter entity
7653          * references, or a document with "standalone='yes'", ...
7654          * ... The declaration of a parameter entity must precede
7655          * any reference to it...
7656          */
7657         if ((ctxt->standalone == 1) ||
7658             ((ctxt->hasExternalSubset == 0) &&
7659              (ctxt->hasPErefs == 0))) {
7660             xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7661                               "PEReference: %%%s; not found\n",
7662                               name);
7663         } else {
7664             /*
7665              * [ VC: Entity Declared ]
7666              * In a document with an external subset or external
7667              * parameter entities with "standalone='no'", ...
7668              * ... The declaration of a parameter entity must
7669              * precede any reference to it...
7670              */
7671             xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7672                           "PEReference: %%%s; not found\n",
7673                           name, NULL);
7674             ctxt->valid = 0;
7675         }
7676     } else {
7677         /*
7678          * Internal checking in case the entity quest barfed
7679          */
7680         if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7681             (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7682             xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7683                   "Internal: %%%s; is not a parameter entity\n",
7684                           name, NULL);
7685         } else if (ctxt->input->free != deallocblankswrapper) {
7686             input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7687             if (xmlPushInput(ctxt, input) < 0)
7688                 return;
7689         } else {
7690             /*
7691              * TODO !!!
7692              * handle the extra spaces added before and after
7693              * c.f. http://www.w3.org/TR/REC-xml#as-PE
7694              */
7695             input = xmlNewEntityInputStream(ctxt, entity);
7696             if (xmlPushInput(ctxt, input) < 0)
7697                 return;
7698             if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7699                 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7700                 (IS_BLANK_CH(NXT(5)))) {
7701                 xmlParseTextDecl(ctxt);
7702                 if (ctxt->errNo ==
7703                     XML_ERR_UNSUPPORTED_ENCODING) {
7704                     /*
7705                      * The XML REC instructs us to stop parsing
7706                      * right here
7707                      */
7708                     ctxt->instate = XML_PARSER_EOF;
7709                     return;
7710                 }
7711             }
7712         }
7713     }
7714     ctxt->hasPErefs = 1;
7715 }
7716
7717 /**
7718  * xmlLoadEntityContent:
7719  * @ctxt:  an XML parser context
7720  * @entity: an unloaded system entity
7721  *
7722  * Load the original content of the given system entity from the
7723  * ExternalID/SystemID given. This is to be used for Included in Literal
7724  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7725  *
7726  * Returns 0 in case of success and -1 in case of failure
7727  */
7728 static int
7729 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7730     xmlParserInputPtr input;
7731     xmlBufferPtr buf;
7732     int l, c;
7733     int count = 0;
7734
7735     if ((ctxt == NULL) || (entity == NULL) ||
7736         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7737          (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7738         (entity->content != NULL)) {
7739         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7740                     "xmlLoadEntityContent parameter error");
7741         return(-1);
7742     }
7743
7744     if (xmlParserDebugEntities)
7745         xmlGenericError(xmlGenericErrorContext,
7746                 "Reading %s entity content input\n", entity->name);
7747
7748     buf = xmlBufferCreate();
7749     if (buf == NULL) {
7750         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7751                     "xmlLoadEntityContent parameter error");
7752         return(-1);
7753     }
7754
7755     input = xmlNewEntityInputStream(ctxt, entity);
7756     if (input == NULL) {
7757         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7758                     "xmlLoadEntityContent input error");
7759         xmlBufferFree(buf);
7760         return(-1);
7761     }
7762
7763     /*
7764      * Push the entity as the current input, read char by char
7765      * saving to the buffer until the end of the entity or an error
7766      */
7767     if (xmlPushInput(ctxt, input) < 0) {
7768         xmlBufferFree(buf);
7769         return(-1);
7770     }
7771
7772     GROW;
7773     c = CUR_CHAR(l);
7774     while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7775            (IS_CHAR(c))) {
7776         xmlBufferAdd(buf, ctxt->input->cur, l);
7777         if (count++ > 100) {
7778             count = 0;
7779             GROW;
7780         }
7781         NEXTL(l);
7782         c = CUR_CHAR(l);
7783     }
7784
7785     if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7786         xmlPopInput(ctxt);
7787     } else if (!IS_CHAR(c)) {
7788         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7789                           "xmlLoadEntityContent: invalid char value %d\n",
7790                           c);
7791         xmlBufferFree(buf);
7792         return(-1);
7793     }
7794     entity->content = buf->content;
7795     buf->content = NULL;
7796     xmlBufferFree(buf);
7797
7798     return(0);
7799 }
7800
7801 /**
7802  * xmlParseStringPEReference:
7803  * @ctxt:  an XML parser context
7804  * @str:  a pointer to an index in the string
7805  *
7806  * parse PEReference declarations
7807  *
7808  * [69] PEReference ::= '%' Name ';'
7809  *
7810  * [ WFC: No Recursion ]
7811  * A parsed entity must not contain a recursive
7812  * reference to itself, either directly or indirectly.
7813  *
7814  * [ WFC: Entity Declared ]
7815  * In a document without any DTD, a document with only an internal DTD
7816  * subset which contains no parameter entity references, or a document
7817  * with "standalone='yes'", ...  ... The declaration of a parameter
7818  * entity must precede any reference to it...
7819  *
7820  * [ VC: Entity Declared ]
7821  * In a document with an external subset or external parameter entities
7822  * with "standalone='no'", ...  ... The declaration of a parameter entity
7823  * must precede any reference to it...
7824  *
7825  * [ WFC: In DTD ]
7826  * Parameter-entity references may only appear in the DTD.
7827  * NOTE: misleading but this is handled.
7828  *
7829  * Returns the string of the entity content.
7830  *         str is updated to the current value of the index
7831  */
7832 static xmlEntityPtr
7833 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7834     const xmlChar *ptr;
7835     xmlChar cur;
7836     xmlChar *name;
7837     xmlEntityPtr entity = NULL;
7838
7839     if ((str == NULL) || (*str == NULL)) return(NULL);
7840     ptr = *str;
7841     cur = *ptr;
7842     if (cur != '%')
7843         return(NULL);
7844     ptr++;
7845     name = xmlParseStringName(ctxt, &ptr);
7846     if (name == NULL) {
7847         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7848                        "xmlParseStringPEReference: no name\n");
7849         *str = ptr;
7850         return(NULL);
7851     }
7852     cur = *ptr;
7853     if (cur != ';') {
7854         xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7855         xmlFree(name);
7856         *str = ptr;
7857         return(NULL);
7858     }
7859     ptr++;
7860
7861     /*
7862      * Increate the number of entity references parsed
7863      */
7864     ctxt->nbentities++;
7865
7866     /*
7867      * Request the entity from SAX
7868      */
7869     if ((ctxt->sax != NULL) &&
7870         (ctxt->sax->getParameterEntity != NULL))
7871         entity = ctxt->sax->getParameterEntity(ctxt->userData,
7872                                                name);
7873     if (entity == NULL) {
7874         /*
7875          * [ WFC: Entity Declared ]
7876          * In a document without any DTD, a document with only an
7877          * internal DTD subset which contains no parameter entity
7878          * references, or a document with "standalone='yes'", ...
7879          * ... The declaration of a parameter entity must precede
7880          * any reference to it...
7881          */
7882         if ((ctxt->standalone == 1) ||
7883             ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7884             xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7885                  "PEReference: %%%s; not found\n", name);
7886         } else {
7887             /*
7888              * [ VC: Entity Declared ]
7889              * In a document with an external subset or external
7890              * parameter entities with "standalone='no'", ...
7891              * ... The declaration of a parameter entity must
7892              * precede any reference to it...
7893              */
7894             xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7895                           "PEReference: %%%s; not found\n",
7896                           name, NULL);
7897             ctxt->valid = 0;
7898         }
7899     } else {
7900         /*
7901          * Internal checking in case the entity quest barfed
7902          */
7903         if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7904             (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7905             xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7906                           "%%%s; is not a parameter entity\n",
7907                           name, NULL);
7908         }
7909     }
7910     ctxt->hasPErefs = 1;
7911     xmlFree(name);
7912     *str = ptr;
7913     return(entity);
7914 }
7915
7916 /**
7917  * xmlParseDocTypeDecl:
7918  * @ctxt:  an XML parser context
7919  *
7920  * parse a DOCTYPE declaration
7921  *
7922  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7923  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7924  *
7925  * [ VC: Root Element Type ]
7926  * The Name in the document type declaration must match the element
7927  * type of the root element.
7928  */
7929
7930 void
7931 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7932     const xmlChar *name = NULL;
7933     xmlChar *ExternalID = NULL;
7934     xmlChar *URI = NULL;
7935
7936     /*
7937      * We know that '<!DOCTYPE' has been detected.
7938      */
7939     SKIP(9);
7940
7941     SKIP_BLANKS;
7942
7943     /*
7944      * Parse the DOCTYPE name.
7945      */
7946     name = xmlParseName(ctxt);
7947     if (name == NULL) {
7948         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7949                        "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7950     }
7951     ctxt->intSubName = name;
7952
7953     SKIP_BLANKS;
7954
7955     /*
7956      * Check for SystemID and ExternalID
7957      */
7958     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7959
7960     if ((URI != NULL) || (ExternalID != NULL)) {
7961         ctxt->hasExternalSubset = 1;
7962     }
7963     ctxt->extSubURI = URI;
7964     ctxt->extSubSystem = ExternalID;
7965
7966     SKIP_BLANKS;
7967
7968     /*
7969      * Create and update the internal subset.
7970      */
7971     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7972         (!ctxt->disableSAX))
7973         ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7974
7975     /*
7976      * Is there any internal subset declarations ?
7977      * they are handled separately in xmlParseInternalSubset()
7978      */
7979     if (RAW == '[')
7980         return;
7981
7982     /*
7983      * We should be at the end of the DOCTYPE declaration.
7984      */
7985     if (RAW != '>') {
7986         xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7987     }
7988     NEXT;
7989 }
7990
7991 /**
7992  * xmlParseInternalSubset:
7993  * @ctxt:  an XML parser context
7994  *
7995  * parse the internal subset declaration
7996  *
7997  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7998  */
7999
8000 static void
8001 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8002     /*
8003      * Is there any DTD definition ?
8004      */
8005     if (RAW == '[') {
8006         ctxt->instate = XML_PARSER_DTD;
8007         NEXT;
8008         /*
8009          * Parse the succession of Markup declarations and
8010          * PEReferences.
8011          * Subsequence (markupdecl | PEReference | S)*
8012          */
8013         while (RAW != ']') {
8014             const xmlChar *check = CUR_PTR;
8015             unsigned int cons = ctxt->input->consumed;
8016
8017             SKIP_BLANKS;
8018             xmlParseMarkupDecl(ctxt);
8019             xmlParsePEReference(ctxt);
8020
8021             /*
8022              * Pop-up of finished entities.
8023              */
8024             while ((RAW == 0) && (ctxt->inputNr > 1))
8025                 xmlPopInput(ctxt);
8026
8027             if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8028                 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8029              "xmlParseInternalSubset: error detected in Markup declaration\n");
8030                 break;
8031             }
8032         }
8033         if (RAW == ']') {
8034             NEXT;
8035             SKIP_BLANKS;
8036         }
8037     }
8038
8039     /*
8040      * We should be at the end of the DOCTYPE declaration.
8041      */
8042     if (RAW != '>') {
8043         xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8044     }
8045     NEXT;
8046 }
8047
8048 #ifdef LIBXML_SAX1_ENABLED
8049 /**
8050  * xmlParseAttribute:
8051  * @ctxt:  an XML parser context
8052  * @value:  a xmlChar ** used to store the value of the attribute
8053  *
8054  * parse an attribute
8055  *
8056  * [41] Attribute ::= Name Eq AttValue
8057  *
8058  * [ WFC: No External Entity References ]
8059  * Attribute values cannot contain direct or indirect entity references
8060  * to external entities.
8061  *
8062  * [ WFC: No < in Attribute Values ]
8063  * The replacement text of any entity referred to directly or indirectly in
8064  * an attribute value (other than "&lt;") must not contain a <.
8065  *
8066  * [ VC: Attribute Value Type ]
8067  * The attribute must have been declared; the value must be of the type
8068  * declared for it.
8069  *
8070  * [25] Eq ::= S? '=' S?
8071  *
8072  * With namespace:
8073  *
8074  * [NS 11] Attribute ::= QName Eq AttValue
8075  *
8076  * Also the case QName == xmlns:??? is handled independently as a namespace
8077  * definition.
8078  *
8079  * Returns the attribute name, and the value in *value.
8080  */
8081
8082 const xmlChar *
8083 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8084     const xmlChar *name;
8085     xmlChar *val;
8086
8087     *value = NULL;
8088     GROW;
8089     name = xmlParseName(ctxt);
8090     if (name == NULL) {
8091         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8092                        "error parsing attribute name\n");
8093         return(NULL);
8094     }
8095
8096     /*
8097      * read the value
8098      */
8099     SKIP_BLANKS;
8100     if (RAW == '=') {
8101         NEXT;
8102         SKIP_BLANKS;
8103         val = xmlParseAttValue(ctxt);
8104         ctxt->instate = XML_PARSER_CONTENT;
8105     } else {
8106         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8107                "Specification mandate value for attribute %s\n", name);
8108         return(NULL);
8109     }
8110
8111     /*
8112      * Check that xml:lang conforms to the specification
8113      * No more registered as an error, just generate a warning now
8114      * since this was deprecated in XML second edition
8115      */
8116     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8117         if (!xmlCheckLanguageID(val)) {
8118             xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8119                           "Malformed value for xml:lang : %s\n",
8120                           val, NULL);
8121         }
8122     }
8123
8124     /*
8125      * Check that xml:space conforms to the specification
8126      */
8127     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8128         if (xmlStrEqual(val, BAD_CAST "default"))
8129             *(ctxt->space) = 0;
8130         else if (xmlStrEqual(val, BAD_CAST "preserve"))
8131             *(ctxt->space) = 1;
8132         else {
8133                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8134 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8135                                  val, NULL);
8136         }
8137     }
8138
8139     *value = val;
8140     return(name);
8141 }
8142
8143 /**
8144  * xmlParseStartTag:
8145  * @ctxt:  an XML parser context
8146  *
8147  * parse a start of tag either for rule element or
8148  * EmptyElement. In both case we don't parse the tag closing chars.
8149  *
8150  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8151  *
8152  * [ WFC: Unique Att Spec ]
8153  * No attribute name may appear more than once in the same start-tag or
8154  * empty-element tag.
8155  *
8156  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8157  *
8158  * [ WFC: Unique Att Spec ]
8159  * No attribute name may appear more than once in the same start-tag or
8160  * empty-element tag.
8161  *
8162  * With namespace:
8163  *
8164  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8165  *
8166  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8167  *
8168  * Returns the element name parsed
8169  */
8170
8171 const xmlChar *
8172 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8173     const xmlChar *name;
8174     const xmlChar *attname;
8175     xmlChar *attvalue;
8176     const xmlChar **atts = ctxt->atts;
8177     int nbatts = 0;
8178     int maxatts = ctxt->maxatts;
8179     int i;
8180
8181     if (RAW != '<') return(NULL);
8182     NEXT1;
8183
8184     name = xmlParseName(ctxt);
8185     if (name == NULL) {
8186         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8187              "xmlParseStartTag: invalid element name\n");
8188         return(NULL);
8189     }
8190
8191     /*
8192      * Now parse the attributes, it ends up with the ending
8193      *
8194      * (S Attribute)* S?
8195      */
8196     SKIP_BLANKS;
8197     GROW;
8198
8199     while ((RAW != '>') &&
8200            ((RAW != '/') || (NXT(1) != '>')) &&
8201            (IS_BYTE_CHAR(RAW))) {
8202         const xmlChar *q = CUR_PTR;
8203         unsigned int cons = ctxt->input->consumed;
8204
8205         attname = xmlParseAttribute(ctxt, &attvalue);
8206         if ((attname != NULL) && (attvalue != NULL)) {
8207             /*
8208              * [ WFC: Unique Att Spec ]
8209              * No attribute name may appear more than once in the same
8210              * start-tag or empty-element tag.
8211              */
8212             for (i = 0; i < nbatts;i += 2) {
8213                 if (xmlStrEqual(atts[i], attname)) {
8214                     xmlErrAttributeDup(ctxt, NULL, attname);
8215                     xmlFree(attvalue);
8216                     goto failed;
8217                 }
8218             }
8219             /*
8220              * Add the pair to atts
8221              */
8222             if (atts == NULL) {
8223                 maxatts = 22; /* allow for 10 attrs by default */
8224                 atts = (const xmlChar **)
8225                        xmlMalloc(maxatts * sizeof(xmlChar *));
8226                 if (atts == NULL) {
8227                     xmlErrMemory(ctxt, NULL);
8228                     if (attvalue != NULL)
8229                         xmlFree(attvalue);
8230                     goto failed;
8231                 }
8232                 ctxt->atts = atts;
8233                 ctxt->maxatts = maxatts;
8234             } else if (nbatts + 4 > maxatts) {
8235                 const xmlChar **n;
8236
8237                 maxatts *= 2;
8238                 n = (const xmlChar **) xmlRealloc((void *) atts,
8239                                              maxatts * sizeof(const xmlChar *));
8240                 if (n == NULL) {
8241                     xmlErrMemory(ctxt, NULL);
8242                     if (attvalue != NULL)
8243                         xmlFree(attvalue);
8244                     goto failed;
8245                 }
8246                 atts = n;
8247                 ctxt->atts = atts;
8248                 ctxt->maxatts = maxatts;
8249             }
8250             atts[nbatts++] = attname;
8251             atts[nbatts++] = attvalue;
8252             atts[nbatts] = NULL;
8253             atts[nbatts + 1] = NULL;
8254         } else {
8255             if (attvalue != NULL)
8256                 xmlFree(attvalue);
8257         }
8258
8259 failed:
8260
8261         GROW
8262         if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8263             break;
8264         if (!IS_BLANK_CH(RAW)) {
8265             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8266                            "attributes construct error\n");
8267         }
8268         SKIP_BLANKS;
8269         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8270             (attname == NULL) && (attvalue == NULL)) {
8271             xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8272                            "xmlParseStartTag: problem parsing attributes\n");
8273             break;
8274         }
8275         SHRINK;
8276         GROW;
8277     }
8278
8279     /*
8280      * SAX: Start of Element !
8281      */
8282     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8283         (!ctxt->disableSAX)) {
8284         if (nbatts > 0)
8285             ctxt->sax->startElement(ctxt->userData, name, atts);
8286         else
8287             ctxt->sax->startElement(ctxt->userData, name, NULL);
8288     }
8289
8290     if (atts != NULL) {
8291         /* Free only the content strings */
8292         for (i = 1;i < nbatts;i+=2)
8293             if (atts[i] != NULL)
8294                xmlFree((xmlChar *) atts[i]);
8295     }
8296     return(name);
8297 }
8298
8299 /**
8300  * xmlParseEndTag1:
8301  * @ctxt:  an XML parser context
8302  * @line:  line of the start tag
8303  * @nsNr:  number of namespaces on the start tag
8304  *
8305  * parse an end of tag
8306  *
8307  * [42] ETag ::= '</' Name S? '>'
8308  *
8309  * With namespace
8310  *
8311  * [NS 9] ETag ::= '</' QName S? '>'
8312  */
8313
8314 static void
8315 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8316     const xmlChar *name;
8317
8318     GROW;
8319     if ((RAW != '<') || (NXT(1) != '/')) {
8320         xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8321                        "xmlParseEndTag: '</' not found\n");
8322         return;
8323     }
8324     SKIP(2);
8325
8326     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8327
8328     /*
8329      * We should definitely be at the ending "S? '>'" part
8330      */
8331     GROW;
8332     SKIP_BLANKS;
8333     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8334         xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8335     } else
8336         NEXT1;
8337
8338     /*
8339      * [ WFC: Element Type Match ]
8340      * The Name in an element's end-tag must match the element type in the
8341      * start-tag.
8342      *
8343      */
8344     if (name != (xmlChar*)1) {
8345         if (name == NULL) name = BAD_CAST "unparseable";
8346         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8347                      "Opening and ending tag mismatch: %s line %d and %s\n",
8348                                 ctxt->name, line, name);
8349     }
8350
8351     /*
8352      * SAX: End of Tag
8353      */
8354     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8355         (!ctxt->disableSAX))
8356         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8357
8358     namePop(ctxt);
8359     spacePop(ctxt);
8360     return;
8361 }
8362
8363 /**
8364  * xmlParseEndTag:
8365  * @ctxt:  an XML parser context
8366  *
8367  * parse an end of tag
8368  *
8369  * [42] ETag ::= '</' Name S? '>'
8370  *
8371  * With namespace
8372  *
8373  * [NS 9] ETag ::= '</' QName S? '>'
8374  */
8375
8376 void
8377 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8378     xmlParseEndTag1(ctxt, 0);
8379 }
8380 #endif /* LIBXML_SAX1_ENABLED */
8381
8382 /************************************************************************
8383  *                                                                      *
8384  *                    SAX 2 specific operations                         *
8385  *                                                                      *
8386  ************************************************************************/
8387
8388 /*
8389  * xmlGetNamespace:
8390  * @ctxt:  an XML parser context
8391  * @prefix:  the prefix to lookup
8392  *
8393  * Lookup the namespace name for the @prefix (which ca be NULL)
8394  * The prefix must come from the @ctxt->dict dictionnary
8395  *
8396  * Returns the namespace name or NULL if not bound
8397  */
8398 static const xmlChar *
8399 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8400     int i;
8401
8402     if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8403     for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8404         if (ctxt->nsTab[i] == prefix) {
8405             if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8406                 return(NULL);
8407             return(ctxt->nsTab[i + 1]);
8408         }
8409     return(NULL);
8410 }
8411
8412 /**
8413  * xmlParseQName:
8414  * @ctxt:  an XML parser context
8415  * @prefix:  pointer to store the prefix part
8416  *
8417  * parse an XML Namespace QName
8418  *
8419  * [6]  QName  ::= (Prefix ':')? LocalPart
8420  * [7]  Prefix  ::= NCName
8421  * [8]  LocalPart  ::= NCName
8422  *
8423  * Returns the Name parsed or NULL
8424  */
8425
8426 static const xmlChar *
8427 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8428     const xmlChar *l, *p;
8429
8430     GROW;
8431
8432     l = xmlParseNCName(ctxt);
8433     if (l == NULL) {
8434         if (CUR == ':') {
8435             l = xmlParseName(ctxt);
8436             if (l != NULL) {
8437                 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8438                          "Failed to parse QName '%s'\n", l, NULL, NULL);
8439                 *prefix = NULL;
8440                 return(l);
8441             }
8442         }
8443         return(NULL);
8444     }
8445     if (CUR == ':') {
8446         NEXT;
8447         p = l;
8448         l = xmlParseNCName(ctxt);
8449         if (l == NULL) {
8450             xmlChar *tmp;
8451
8452             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8453                      "Failed to parse QName '%s:'\n", p, NULL, NULL);
8454             l = xmlParseNmtoken(ctxt);
8455             if (l == NULL)
8456                 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8457             else {
8458                 tmp = xmlBuildQName(l, p, NULL, 0);
8459                 xmlFree((char *)l);
8460             }
8461             p = xmlDictLookup(ctxt->dict, tmp, -1);
8462             if (tmp != NULL) xmlFree(tmp);
8463             *prefix = NULL;
8464             return(p);
8465         }
8466         if (CUR == ':') {
8467             xmlChar *tmp;
8468
8469             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8470                      "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8471             NEXT;
8472             tmp = (xmlChar *) xmlParseName(ctxt);
8473             if (tmp != NULL) {
8474                 tmp = xmlBuildQName(tmp, l, NULL, 0);
8475                 l = xmlDictLookup(ctxt->dict, tmp, -1);
8476                 if (tmp != NULL) xmlFree(tmp);
8477                 *prefix = p;
8478                 return(l);
8479             }
8480             tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8481             l = xmlDictLookup(ctxt->dict, tmp, -1);
8482             if (tmp != NULL) xmlFree(tmp);
8483             *prefix = p;
8484             return(l);
8485         }
8486         *prefix = p;
8487     } else
8488         *prefix = NULL;
8489     return(l);
8490 }
8491
8492 /**
8493  * xmlParseQNameAndCompare:
8494  * @ctxt:  an XML parser context
8495  * @name:  the localname
8496  * @prefix:  the prefix, if any.
8497  *
8498  * parse an XML name and compares for match
8499  * (specialized for endtag parsing)
8500  *
8501  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8502  * and the name for mismatch
8503  */
8504
8505 static const xmlChar *
8506 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8507                         xmlChar const *prefix) {
8508     const xmlChar *cmp;
8509     const xmlChar *in;
8510     const xmlChar *ret;
8511     const xmlChar *prefix2;
8512
8513     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8514
8515     GROW;
8516     in = ctxt->input->cur;
8517
8518     cmp = prefix;
8519     while (*in != 0 && *in == *cmp) {
8520         ++in;
8521         ++cmp;
8522     }
8523     if ((*cmp == 0) && (*in == ':')) {
8524         in++;
8525         cmp = name;
8526         while (*in != 0 && *in == *cmp) {
8527             ++in;
8528             ++cmp;
8529         }
8530         if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8531             /* success */
8532             ctxt->input->cur = in;
8533             return((const xmlChar*) 1);
8534         }
8535     }
8536     /*
8537      * all strings coms from the dictionary, equality can be done directly
8538      */
8539     ret = xmlParseQName (ctxt, &prefix2);
8540     if ((ret == name) && (prefix == prefix2))
8541         return((const xmlChar*) 1);
8542     return ret;
8543 }
8544
8545 /**
8546  * xmlParseAttValueInternal:
8547  * @ctxt:  an XML parser context
8548  * @len:  attribute len result
8549  * @alloc:  whether the attribute was reallocated as a new string
8550  * @normalize:  if 1 then further non-CDATA normalization must be done
8551  *
8552  * parse a value for an attribute.
8553  * NOTE: if no normalization is needed, the routine will return pointers
8554  *       directly from the data buffer.
8555  *
8556  * 3.3.3 Attribute-Value Normalization:
8557  * Before the value of an attribute is passed to the application or
8558  * checked for validity, the XML processor must normalize it as follows:
8559  * - a character reference is processed by appending the referenced
8560  *   character to the attribute value
8561  * - an entity reference is processed by recursively processing the
8562  *   replacement text of the entity
8563  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8564  *   appending #x20 to the normalized value, except that only a single
8565  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8566  *   parsed entity or the literal entity value of an internal parsed entity
8567  * - other characters are processed by appending them to the normalized value
8568  * If the declared value is not CDATA, then the XML processor must further
8569  * process the normalized attribute value by discarding any leading and
8570  * trailing space (#x20) characters, and by replacing sequences of space
8571  * (#x20) characters by a single space (#x20) character.
8572  * All attributes for which no declaration has been read should be treated
8573  * by a non-validating parser as if declared CDATA.
8574  *
8575  * Returns the AttValue parsed or NULL. The value has to be freed by the
8576  *     caller if it was copied, this can be detected by val[*len] == 0.
8577  */
8578
8579 static xmlChar *
8580 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8581                          int normalize)
8582 {
8583     xmlChar limit = 0;
8584     const xmlChar *in = NULL, *start, *end, *last;
8585     xmlChar *ret = NULL;
8586
8587     GROW;
8588     in = (xmlChar *) CUR_PTR;
8589     if (*in != '"' && *in != '\'') {
8590         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8591         return (NULL);
8592     }
8593     ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8594
8595     /*
8596      * try to handle in this routine the most common case where no
8597      * allocation of a new string is required and where content is
8598      * pure ASCII.
8599      */
8600     limit = *in++;
8601     end = ctxt->input->end;
8602     start = in;
8603     if (in >= end) {
8604         const xmlChar *oldbase = ctxt->input->base;
8605         GROW;
8606         if (oldbase != ctxt->input->base) {
8607             long delta = ctxt->input->base - oldbase;
8608             start = start + delta;
8609             in = in + delta;
8610         }
8611         end = ctxt->input->end;
8612     }
8613     if (normalize) {
8614         /*
8615          * Skip any leading spaces
8616          */
8617         while ((in < end) && (*in != limit) &&
8618                ((*in == 0x20) || (*in == 0x9) ||
8619                 (*in == 0xA) || (*in == 0xD))) {
8620             in++;
8621             start = in;
8622             if (in >= end) {
8623                 const xmlChar *oldbase = ctxt->input->base;
8624                 GROW;
8625                 if (oldbase != ctxt->input->base) {
8626                     long delta = ctxt->input->base - oldbase;
8627                     start = start + delta;
8628                     in = in + delta;
8629                 }
8630                 end = ctxt->input->end;
8631             }
8632         }
8633         while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8634                (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8635             if ((*in++ == 0x20) && (*in == 0x20)) break;
8636             if (in >= end) {
8637                 const xmlChar *oldbase = ctxt->input->base;
8638                 GROW;
8639                 if (oldbase != ctxt->input->base) {
8640                     long delta = ctxt->input->base - oldbase;
8641                     start = start + delta;
8642                     in = in + delta;
8643                 }
8644                 end = ctxt->input->end;
8645             }
8646         }
8647         last = in;
8648         /*
8649          * skip the trailing blanks
8650          */
8651         while ((last[-1] == 0x20) && (last > start)) last--;
8652         while ((in < end) && (*in != limit) &&
8653                ((*in == 0x20) || (*in == 0x9) ||
8654                 (*in == 0xA) || (*in == 0xD))) {
8655             in++;
8656             if (in >= end) {
8657                 const xmlChar *oldbase = ctxt->input->base;
8658                 GROW;
8659                 if (oldbase != ctxt->input->base) {
8660                     long delta = ctxt->input->base - oldbase;
8661                     start = start + delta;
8662                     in = in + delta;
8663                     last = last + delta;
8664                 }
8665                 end = ctxt->input->end;
8666             }
8667         }
8668         if (*in != limit) goto need_complex;
8669     } else {
8670         while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8671                (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8672             in++;
8673             if (in >= end) {
8674                 const xmlChar *oldbase = ctxt->input->base;
8675                 GROW;
8676                 if (oldbase != ctxt->input->base) {
8677                     long delta = ctxt->input->base - oldbase;
8678                     start = start + delta;
8679                     in = in + delta;
8680                 }
8681                 end = ctxt->input->end;
8682             }
8683         }
8684         last = in;
8685         if (*in != limit) goto need_complex;
8686     }
8687     in++;
8688     if (len != NULL) {
8689         *len = last - start;
8690         ret = (xmlChar *) start;
8691     } else {
8692         if (alloc) *alloc = 1;
8693         ret = xmlStrndup(start, last - start);
8694     }
8695     CUR_PTR = in;
8696     if (alloc) *alloc = 0;
8697     return ret;
8698 need_complex:
8699     if (alloc) *alloc = 1;
8700     return xmlParseAttValueComplex(ctxt, len, normalize);
8701 }
8702
8703 /**
8704  * xmlParseAttribute2:
8705  * @ctxt:  an XML parser context
8706  * @pref:  the element prefix
8707  * @elem:  the element name
8708  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8709  * @value:  a xmlChar ** used to store the value of the attribute
8710  * @len:  an int * to save the length of the attribute
8711  * @alloc:  an int * to indicate if the attribute was allocated
8712  *
8713  * parse an attribute in the new SAX2 framework.
8714  *
8715  * Returns the attribute name, and the value in *value, .
8716  */
8717
8718 static const xmlChar *
8719 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8720                    const xmlChar * pref, const xmlChar * elem,
8721                    const xmlChar ** prefix, xmlChar ** value,
8722                    int *len, int *alloc)
8723 {
8724     const xmlChar *name;
8725     xmlChar *val, *internal_val = NULL;
8726     int normalize = 0;
8727
8728     *value = NULL;
8729     GROW;
8730     name = xmlParseQName(ctxt, prefix);
8731     if (name == NULL) {
8732         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8733                        "error parsing attribute name\n");
8734         return (NULL);
8735     }
8736
8737     /*
8738      * get the type if needed
8739      */
8740     if (ctxt->attsSpecial != NULL) {
8741         int type;
8742
8743         type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8744                                             pref, elem, *prefix, name);
8745         if (type != 0)
8746             normalize = 1;
8747     }
8748
8749     /*
8750      * read the value
8751      */
8752     SKIP_BLANKS;
8753     if (RAW == '=') {
8754         NEXT;
8755         SKIP_BLANKS;
8756         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8757         if (normalize) {
8758             /*
8759              * Sometimes a second normalisation pass for spaces is needed
8760              * but that only happens if charrefs or entities refernces
8761              * have been used in the attribute value, i.e. the attribute
8762              * value have been extracted in an allocated string already.
8763              */
8764             if (*alloc) {
8765                 const xmlChar *val2;
8766
8767                 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8768                 if ((val2 != NULL) && (val2 != val)) {
8769                     xmlFree(val);
8770                     val = (xmlChar *) val2;
8771                 }
8772             }
8773         }
8774         ctxt->instate = XML_PARSER_CONTENT;
8775     } else {
8776         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8777                           "Specification mandate value for attribute %s\n",
8778                           name);
8779         return (NULL);
8780     }
8781
8782     if (*prefix == ctxt->str_xml) {
8783         /*
8784          * Check that xml:lang conforms to the specification
8785          * No more registered as an error, just generate a warning now
8786          * since this was deprecated in XML second edition
8787          */
8788         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8789             internal_val = xmlStrndup(val, *len);
8790             if (!xmlCheckLanguageID(internal_val)) {
8791                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8792                               "Malformed value for xml:lang : %s\n",
8793                               internal_val, NULL);
8794             }
8795         }
8796
8797         /*
8798          * Check that xml:space conforms to the specification
8799          */
8800         if (xmlStrEqual(name, BAD_CAST "space")) {
8801             internal_val = xmlStrndup(val, *len);
8802             if (xmlStrEqual(internal_val, BAD_CAST "default"))
8803                 *(ctxt->space) = 0;
8804             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8805                 *(ctxt->space) = 1;
8806             else {
8807                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8808                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8809                               internal_val, NULL);
8810             }
8811         }
8812         if (internal_val) {
8813             xmlFree(internal_val);
8814         }
8815     }
8816
8817     *value = val;
8818     return (name);
8819 }
8820 /**
8821  * xmlParseStartTag2:
8822  * @ctxt:  an XML parser context
8823  *
8824  * parse a start of tag either for rule element or
8825  * EmptyElement. In both case we don't parse the tag closing chars.
8826  * This routine is called when running SAX2 parsing
8827  *
8828  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8829  *
8830  * [ WFC: Unique Att Spec ]
8831  * No attribute name may appear more than once in the same start-tag or
8832  * empty-element tag.
8833  *
8834  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8835  *
8836  * [ WFC: Unique Att Spec ]
8837  * No attribute name may appear more than once in the same start-tag or
8838  * empty-element tag.
8839  *
8840  * With namespace:
8841  *
8842  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8843  *
8844  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8845  *
8846  * Returns the element name parsed
8847  */
8848
8849 static const xmlChar *
8850 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8851                   const xmlChar **URI, int *tlen) {
8852     const xmlChar *localname;
8853     const xmlChar *prefix;
8854     const xmlChar *attname;
8855     const xmlChar *aprefix;
8856     const xmlChar *nsname;
8857     xmlChar *attvalue;
8858     const xmlChar **atts = ctxt->atts;
8859     int maxatts = ctxt->maxatts;
8860     int nratts, nbatts, nbdef;
8861     int i, j, nbNs, attval, oldline, oldcol;
8862     const xmlChar *base;
8863     unsigned long cur;
8864     int nsNr = ctxt->nsNr;
8865
8866     if (RAW != '<') return(NULL);
8867     NEXT1;
8868
8869     /*
8870      * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8871      *       point since the attribute values may be stored as pointers to
8872      *       the buffer and calling SHRINK would destroy them !
8873      *       The Shrinking is only possible once the full set of attribute
8874      *       callbacks have been done.
8875      */
8876 reparse:
8877     SHRINK;
8878     base = ctxt->input->base;
8879     cur = ctxt->input->cur - ctxt->input->base;
8880     oldline = ctxt->input->line;
8881     oldcol = ctxt->input->col;
8882     nbatts = 0;
8883     nratts = 0;
8884     nbdef = 0;
8885     nbNs = 0;
8886     attval = 0;
8887     /* Forget any namespaces added during an earlier parse of this element. */
8888     ctxt->nsNr = nsNr;
8889
8890     localname = xmlParseQName(ctxt, &prefix);
8891     if (localname == NULL) {
8892         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8893                        "StartTag: invalid element name\n");
8894         return(NULL);
8895     }
8896     *tlen = ctxt->input->cur - ctxt->input->base - cur;
8897
8898     /*
8899      * Now parse the attributes, it ends up with the ending
8900      *
8901      * (S Attribute)* S?
8902      */
8903     SKIP_BLANKS;
8904     GROW;
8905     if (ctxt->input->base != base) goto base_changed;
8906
8907     while ((RAW != '>') &&
8908            ((RAW != '/') || (NXT(1) != '>')) &&
8909            (IS_BYTE_CHAR(RAW))) {
8910         const xmlChar *q = CUR_PTR;
8911         unsigned int cons = ctxt->input->consumed;
8912         int len = -1, alloc = 0;
8913
8914         attname = xmlParseAttribute2(ctxt, prefix, localname,
8915                                      &aprefix, &attvalue, &len, &alloc);
8916         if (ctxt->input->base != base) {
8917             if ((attvalue != NULL) && (alloc != 0))
8918                 xmlFree(attvalue);
8919             attvalue = NULL;
8920             goto base_changed;
8921         }
8922         if ((attname != NULL) && (attvalue != NULL)) {
8923             if (len < 0) len = xmlStrlen(attvalue);
8924             if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8925                 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8926                 xmlURIPtr uri;
8927
8928                 if (*URL != 0) {
8929                     uri = xmlParseURI((const char *) URL);
8930                     if (uri == NULL) {
8931                         xmlNsErr(ctxt, XML_WAR_NS_URI,
8932                                  "xmlns: '%s' is not a valid URI\n",
8933                                            URL, NULL, NULL);
8934                     } else {
8935                         if (uri->scheme == NULL) {
8936                             xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8937                                       "xmlns: URI %s is not absolute\n",
8938                                       URL, NULL, NULL);
8939                         }
8940                         xmlFreeURI(uri);
8941                     }
8942                     if (URL == ctxt->str_xml_ns) {
8943                         if (attname != ctxt->str_xml) {
8944                             xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8945                          "xml namespace URI cannot be the default namespace\n",
8946                                      NULL, NULL, NULL);
8947                         }
8948                         goto skip_default_ns;
8949                     }
8950                     if ((len == 29) &&
8951                         (xmlStrEqual(URL,
8952                                  BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8953                         xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8954                              "reuse of the xmlns namespace name is forbidden\n",
8955                                  NULL, NULL, NULL);
8956                         goto skip_default_ns;
8957                     }
8958                 }
8959                 /*
8960                  * check that it's not a defined namespace
8961                  */
8962                 for (j = 1;j <= nbNs;j++)
8963                     if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8964                         break;
8965                 if (j <= nbNs)
8966                     xmlErrAttributeDup(ctxt, NULL, attname);
8967                 else
8968                     if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8969 skip_default_ns:
8970                 if (alloc != 0) xmlFree(attvalue);
8971                 SKIP_BLANKS;
8972                 continue;
8973             }
8974             if (aprefix == ctxt->str_xmlns) {
8975                 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8976                 xmlURIPtr uri;
8977
8978                 if (attname == ctxt->str_xml) {
8979                     if (URL != ctxt->str_xml_ns) {
8980                         xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8981                                  "xml namespace prefix mapped to wrong URI\n",
8982                                  NULL, NULL, NULL);
8983                     }
8984                     /*
8985                      * Do not keep a namespace definition node
8986                      */
8987                     goto skip_ns;
8988                 }
8989                 if (URL == ctxt->str_xml_ns) {
8990                     if (attname != ctxt->str_xml) {
8991                         xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8992                                  "xml namespace URI mapped to wrong prefix\n",
8993                                  NULL, NULL, NULL);
8994                     }
8995                     goto skip_ns;
8996                 }
8997                 if (attname == ctxt->str_xmlns) {
8998                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8999                              "redefinition of the xmlns prefix is forbidden\n",
9000                              NULL, NULL, NULL);
9001                     goto skip_ns;
9002                 }
9003                 if ((len == 29) &&
9004                     (xmlStrEqual(URL,
9005                                  BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9006                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9007                              "reuse of the xmlns namespace name is forbidden\n",
9008                              NULL, NULL, NULL);
9009                     goto skip_ns;
9010                 }
9011                 if ((URL == NULL) || (URL[0] == 0)) {
9012                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9013                              "xmlns:%s: Empty XML namespace is not allowed\n",
9014                                   attname, NULL, NULL);
9015                     goto skip_ns;
9016                 } else {
9017                     uri = xmlParseURI((const char *) URL);
9018                     if (uri == NULL) {
9019                         xmlNsErr(ctxt, XML_WAR_NS_URI,
9020                              "xmlns:%s: '%s' is not a valid URI\n",
9021                                            attname, URL, NULL);
9022                     } else {
9023                         if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9024                             xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9025                                       "xmlns:%s: URI %s is not absolute\n",
9026                                       attname, URL, NULL);
9027                         }
9028                         xmlFreeURI(uri);
9029                     }
9030                 }
9031
9032                 /*
9033                  * check that it's not a defined namespace
9034                  */
9035                 for (j = 1;j <= nbNs;j++)
9036                     if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9037                         break;
9038                 if (j <= nbNs)
9039                     xmlErrAttributeDup(ctxt, aprefix, attname);
9040                 else
9041                     if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9042 skip_ns:
9043                 if (alloc != 0) xmlFree(attvalue);
9044                 SKIP_BLANKS;
9045                 if (ctxt->input->base != base) goto base_changed;
9046                 continue;
9047             }
9048
9049             /*
9050              * Add the pair to atts
9051              */
9052             if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9053                 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9054                     if (attvalue[len] == 0)
9055                         xmlFree(attvalue);
9056                     goto failed;
9057                 }
9058                 maxatts = ctxt->maxatts;
9059                 atts = ctxt->atts;
9060             }
9061             ctxt->attallocs[nratts++] = alloc;
9062             atts[nbatts++] = attname;
9063             atts[nbatts++] = aprefix;
9064             atts[nbatts++] = NULL; /* the URI will be fetched later */
9065             atts[nbatts++] = attvalue;
9066             attvalue += len;
9067             atts[nbatts++] = attvalue;
9068             /*
9069              * tag if some deallocation is needed
9070              */
9071             if (alloc != 0) attval = 1;
9072         } else {
9073             if ((attvalue != NULL) && (attvalue[len] == 0))
9074                 xmlFree(attvalue);
9075         }
9076
9077 failed:
9078
9079         GROW
9080         if (ctxt->input->base != base) goto base_changed;
9081         if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9082             break;
9083         if (!IS_BLANK_CH(RAW)) {
9084             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9085                            "attributes construct error\n");
9086             break;
9087         }
9088         SKIP_BLANKS;
9089         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9090             (attname == NULL) && (attvalue == NULL)) {
9091             xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9092                  "xmlParseStartTag: problem parsing attributes\n");
9093             break;
9094         }
9095         GROW;
9096         if (ctxt->input->base != base) goto base_changed;
9097     }
9098
9099     /*
9100      * The attributes defaulting
9101      */
9102     if (ctxt->attsDefault != NULL) {
9103         xmlDefAttrsPtr defaults;
9104
9105         defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9106         if (defaults != NULL) {
9107             for (i = 0;i < defaults->nbAttrs;i++) {
9108                 attname = defaults->values[5 * i];
9109                 aprefix = defaults->values[5 * i + 1];
9110
9111                 /*
9112                  * special work for namespaces defaulted defs
9113                  */
9114                 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9115                     /*
9116                      * check that it's not a defined namespace
9117                      */
9118                     for (j = 1;j <= nbNs;j++)
9119                         if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9120                             break;
9121                     if (j <= nbNs) continue;
9122
9123                     nsname = xmlGetNamespace(ctxt, NULL);
9124                     if (nsname != defaults->values[5 * i + 2]) {
9125                         if (nsPush(ctxt, NULL,
9126                                    defaults->values[5 * i + 2]) > 0)
9127                             nbNs++;
9128                     }
9129                 } else if (aprefix == ctxt->str_xmlns) {
9130                     /*
9131                      * check that it's not a defined namespace
9132                      */
9133                     for (j = 1;j <= nbNs;j++)
9134                         if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9135                             break;
9136                     if (j <= nbNs) continue;
9137
9138                     nsname = xmlGetNamespace(ctxt, attname);
9139                     if (nsname != defaults->values[2]) {
9140                         if (nsPush(ctxt, attname,
9141                                    defaults->values[5 * i + 2]) > 0)
9142                             nbNs++;
9143                     }
9144                 } else {
9145                     /*
9146                      * check that it's not a defined attribute
9147                      */
9148                     for (j = 0;j < nbatts;j+=5) {
9149                         if ((attname == atts[j]) && (aprefix == atts[j+1]))
9150                             break;
9151                     }
9152                     if (j < nbatts) continue;
9153
9154                     if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9155                         if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9156                             return(NULL);
9157                         }
9158                         maxatts = ctxt->maxatts;
9159                         atts = ctxt->atts;
9160                     }
9161                     atts[nbatts++] = attname;
9162                     atts[nbatts++] = aprefix;
9163                     if (aprefix == NULL)
9164                         atts[nbatts++] = NULL;
9165                     else
9166                         atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9167                     atts[nbatts++] = defaults->values[5 * i + 2];
9168                     atts[nbatts++] = defaults->values[5 * i + 3];
9169                     if ((ctxt->standalone == 1) &&
9170                         (defaults->values[5 * i + 4] != NULL)) {
9171                         xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9172           "standalone: attribute %s on %s defaulted from external subset\n",
9173                                          attname, localname);
9174                     }
9175                     nbdef++;
9176                 }
9177             }
9178         }
9179     }
9180
9181     /*
9182      * The attributes checkings
9183      */
9184     for (i = 0; i < nbatts;i += 5) {
9185         /*
9186         * The default namespace does not apply to attribute names.
9187         */
9188         if (atts[i + 1] != NULL) {
9189             nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9190             if (nsname == NULL) {
9191                 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9192                     "Namespace prefix %s for %s on %s is not defined\n",
9193                     atts[i + 1], atts[i], localname);
9194             }
9195             atts[i + 2] = nsname;
9196         } else
9197             nsname = NULL;
9198         /*
9199          * [ WFC: Unique Att Spec ]
9200          * No attribute name may appear more than once in the same
9201          * start-tag or empty-element tag.
9202          * As extended by the Namespace in XML REC.
9203          */
9204         for (j = 0; j < i;j += 5) {
9205             if (atts[i] == atts[j]) {
9206                 if (atts[i+1] == atts[j+1]) {
9207                     xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9208                     break;
9209                 }
9210                 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9211                     xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9212                              "Namespaced Attribute %s in '%s' redefined\n",
9213                              atts[i], nsname, NULL);
9214                     break;
9215                 }
9216             }
9217         }
9218     }
9219
9220     nsname = xmlGetNamespace(ctxt, prefix);
9221     if ((prefix != NULL) && (nsname == NULL)) {
9222         xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9223                  "Namespace prefix %s on %s is not defined\n",
9224                  prefix, localname, NULL);
9225     }
9226     *pref = prefix;
9227     *URI = nsname;
9228
9229     /*
9230      * SAX: Start of Element !
9231      */
9232     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9233         (!ctxt->disableSAX)) {
9234         if (nbNs > 0)
9235             ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9236                           nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9237                           nbatts / 5, nbdef, atts);
9238         else
9239             ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9240                           nsname, 0, NULL, nbatts / 5, nbdef, atts);
9241     }
9242
9243     /*
9244      * Free up attribute allocated strings if needed
9245      */
9246     if (attval != 0) {
9247         for (i = 3,j = 0; j < nratts;i += 5,j++)
9248             if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9249                 xmlFree((xmlChar *) atts[i]);
9250     }
9251
9252     return(localname);
9253
9254 base_changed:
9255     /*
9256      * the attribute strings are valid iif the base didn't changed
9257      */
9258     if (attval != 0) {
9259         for (i = 3,j = 0; j < nratts;i += 5,j++)
9260             if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9261                 xmlFree((xmlChar *) atts[i]);
9262     }
9263     ctxt->input->cur = ctxt->input->base + cur;
9264     ctxt->input->line = oldline;
9265     ctxt->input->col = oldcol;
9266     if (ctxt->wellFormed == 1) {
9267         goto reparse;
9268     }
9269     return(NULL);
9270 }
9271
9272 /**
9273  * xmlParseEndTag2:
9274  * @ctxt:  an XML parser context
9275  * @line:  line of the start tag
9276  * @nsNr:  number of namespaces on the start tag
9277  *
9278  * parse an end of tag
9279  *
9280  * [42] ETag ::= '</' Name S? '>'
9281  *
9282  * With namespace
9283  *
9284  * [NS 9] ETag ::= '</' QName S? '>'
9285  */
9286
9287 static void
9288 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9289                 const xmlChar *URI, int line, int nsNr, int tlen) {
9290     const xmlChar *name;
9291
9292     GROW;
9293     if ((RAW != '<') || (NXT(1) != '/')) {
9294         xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9295         return;
9296     }
9297     SKIP(2);
9298
9299     if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9300         if (ctxt->input->cur[tlen] == '>') {
9301             ctxt->input->cur += tlen + 1;
9302             goto done;
9303         }
9304         ctxt->input->cur += tlen;
9305         name = (xmlChar*)1;
9306     } else {
9307         if (prefix == NULL)
9308             name = xmlParseNameAndCompare(ctxt, ctxt->name);
9309         else
9310             name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9311     }
9312
9313     /*
9314      * We should definitely be at the ending "S? '>'" part
9315      */
9316     GROW;
9317     SKIP_BLANKS;
9318     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9319         xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9320     } else
9321         NEXT1;
9322
9323     /*
9324      * [ WFC: Element Type Match ]
9325      * The Name in an element's end-tag must match the element type in the
9326      * start-tag.
9327      *
9328      */
9329     if (name != (xmlChar*)1) {
9330         if (name == NULL) name = BAD_CAST "unparseable";
9331         if ((line == 0) && (ctxt->node != NULL))
9332             line = ctxt->node->line;
9333         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9334                      "Opening and ending tag mismatch: %s line %d and %s\n",
9335                                 ctxt->name, line, name);
9336     }
9337
9338     /*
9339      * SAX: End of Tag
9340      */
9341 done:
9342     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9343         (!ctxt->disableSAX))
9344         ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9345
9346     spacePop(ctxt);
9347     if (nsNr != 0)
9348         nsPop(ctxt, nsNr);
9349     return;
9350 }
9351
9352 /**
9353  * xmlParseCDSect:
9354  * @ctxt:  an XML parser context
9355  *
9356  * Parse escaped pure raw content.
9357  *
9358  * [18] CDSect ::= CDStart CData CDEnd
9359  *
9360  * [19] CDStart ::= '<![CDATA['
9361  *
9362  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9363  *
9364  * [21] CDEnd ::= ']]>'
9365  */
9366 void
9367 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9368     xmlChar *buf = NULL;
9369     int len = 0;
9370     int size = XML_PARSER_BUFFER_SIZE;
9371     int r, rl;
9372     int s, sl;
9373     int cur, l;
9374     int count = 0;
9375
9376     /* Check 2.6.0 was NXT(0) not RAW */
9377     if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9378         SKIP(9);
9379     } else
9380         return;
9381
9382     ctxt->instate = XML_PARSER_CDATA_SECTION;
9383     r = CUR_CHAR(rl);
9384     if (!IS_CHAR(r)) {
9385         xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9386         ctxt->instate = XML_PARSER_CONTENT;
9387         return;
9388     }
9389     NEXTL(rl);
9390     s = CUR_CHAR(sl);
9391     if (!IS_CHAR(s)) {
9392         xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9393         ctxt->instate = XML_PARSER_CONTENT;
9394         return;
9395     }
9396     NEXTL(sl);
9397     cur = CUR_CHAR(l);
9398     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9399     if (buf == NULL) {
9400         xmlErrMemory(ctxt, NULL);
9401         return;
9402     }
9403     while (IS_CHAR(cur) &&
9404            ((r != ']') || (s != ']') || (cur != '>'))) {
9405         if (len + 5 >= size) {
9406             xmlChar *tmp;
9407
9408             size *= 2;
9409             tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9410             if (tmp == NULL) {
9411                 xmlFree(buf);
9412                 xmlErrMemory(ctxt, NULL);
9413                 return;
9414             }
9415             buf = tmp;
9416         }
9417         COPY_BUF(rl,buf,len,r);
9418         r = s;
9419         rl = sl;
9420         s = cur;
9421         sl = l;
9422         count++;
9423         if (count > 50) {
9424             GROW;
9425             count = 0;
9426         }
9427         NEXTL(l);
9428         cur = CUR_CHAR(l);
9429     }
9430     buf[len] = 0;
9431     ctxt->instate = XML_PARSER_CONTENT;
9432     if (cur != '>') {
9433         xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9434                              "CData section not finished\n%.50s\n", buf);
9435         xmlFree(buf);
9436         return;
9437     }
9438     NEXTL(l);
9439
9440     /*
9441      * OK the buffer is to be consumed as cdata.
9442      */
9443     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9444         if (ctxt->sax->cdataBlock != NULL)
9445             ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9446         else if (ctxt->sax->characters != NULL)
9447             ctxt->sax->characters(ctxt->userData, buf, len);
9448     }
9449     xmlFree(buf);
9450 }
9451
9452 /**
9453  * xmlParseContent:
9454  * @ctxt:  an XML parser context
9455  *
9456  * Parse a content:
9457  *
9458  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9459  */
9460
9461 void
9462 xmlParseContent(xmlParserCtxtPtr ctxt) {
9463     GROW;
9464     while ((RAW != 0) &&
9465            ((RAW != '<') || (NXT(1) != '/')) &&
9466            (ctxt->instate != XML_PARSER_EOF)) {
9467         const xmlChar *test = CUR_PTR;
9468         unsigned int cons = ctxt->input->consumed;
9469         const xmlChar *cur = ctxt->input->cur;
9470
9471         /*
9472          * First case : a Processing Instruction.
9473          */
9474         if ((*cur == '<') && (cur[1] == '?')) {
9475             xmlParsePI(ctxt);
9476         }
9477
9478         /*
9479          * Second case : a CDSection
9480          */
9481         /* 2.6.0 test was *cur not RAW */
9482         else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9483             xmlParseCDSect(ctxt);
9484         }
9485
9486         /*
9487          * Third case :  a comment
9488          */
9489         else if ((*cur == '<') && (NXT(1) == '!') &&
9490                  (NXT(2) == '-') && (NXT(3) == '-')) {
9491             xmlParseComment(ctxt);
9492             ctxt->instate = XML_PARSER_CONTENT;
9493         }
9494
9495         /*
9496          * Fourth case :  a sub-element.
9497          */
9498         else if (*cur == '<') {
9499             xmlParseElement(ctxt);
9500         }
9501
9502         /*
9503          * Fifth case : a reference. If if has not been resolved,
9504          *    parsing returns it's Name, create the node
9505          */
9506
9507         else if (*cur == '&') {
9508             xmlParseReference(ctxt);
9509         }
9510
9511         /*
9512          * Last case, text. Note that References are handled directly.
9513          */
9514         else {
9515             xmlParseCharData(ctxt, 0);
9516         }
9517
9518         GROW;
9519         /*
9520          * Pop-up of finished entities.
9521          */
9522         while ((RAW == 0) && (ctxt->inputNr > 1))
9523             xmlPopInput(ctxt);
9524         SHRINK;
9525
9526         if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9527             xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9528                         "detected an error in element content\n");
9529             ctxt->instate = XML_PARSER_EOF;
9530             break;
9531         }
9532     }
9533 }
9534
9535 /**
9536  * xmlParseElement:
9537  * @ctxt:  an XML parser context
9538  *
9539  * parse an XML element, this is highly recursive
9540  *
9541  * [39] element ::= EmptyElemTag | STag content ETag
9542  *
9543  * [ WFC: Element Type Match ]
9544  * The Name in an element's end-tag must match the element type in the
9545  * start-tag.
9546  *
9547  */
9548
9549 void
9550 xmlParseElement(xmlParserCtxtPtr ctxt) {
9551     const xmlChar *name;
9552     const xmlChar *prefix = NULL;
9553     const xmlChar *URI = NULL;
9554     xmlParserNodeInfo node_info;
9555     int line, tlen;
9556     xmlNodePtr ret;
9557     int nsNr = ctxt->nsNr;
9558
9559     if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9560         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9561         xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9562                  "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9563                           xmlParserMaxDepth);
9564         ctxt->instate = XML_PARSER_EOF;
9565         return;
9566     }
9567
9568     /* Capture start position */
9569     if (ctxt->record_info) {
9570         node_info.begin_pos = ctxt->input->consumed +
9571                           (CUR_PTR - ctxt->input->base);
9572         node_info.begin_line = ctxt->input->line;
9573     }
9574
9575     if (ctxt->spaceNr == 0)
9576         spacePush(ctxt, -1);
9577     else if (*ctxt->space == -2)
9578         spacePush(ctxt, -1);
9579     else
9580         spacePush(ctxt, *ctxt->space);
9581
9582     line = ctxt->input->line;
9583 #ifdef LIBXML_SAX1_ENABLED
9584     if (ctxt->sax2)
9585 #endif /* LIBXML_SAX1_ENABLED */
9586         name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9587 #ifdef LIBXML_SAX1_ENABLED
9588     else
9589         name = xmlParseStartTag(ctxt);
9590 #endif /* LIBXML_SAX1_ENABLED */
9591     if (name == NULL) {
9592         spacePop(ctxt);
9593         return;
9594     }
9595     namePush(ctxt, name);
9596     ret = ctxt->node;
9597
9598 #ifdef LIBXML_VALID_ENABLED
9599     /*
9600      * [ VC: Root Element Type ]
9601      * The Name in the document type declaration must match the element
9602      * type of the root element.
9603      */
9604     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9605         ctxt->node && (ctxt->node == ctxt->myDoc->children))
9606         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9607 #endif /* LIBXML_VALID_ENABLED */
9608
9609     /*
9610      * Check for an Empty Element.
9611      */
9612     if ((RAW == '/') && (NXT(1) == '>')) {
9613         SKIP(2);
9614         if (ctxt->sax2) {
9615             if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9616                 (!ctxt->disableSAX))
9617                 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9618 #ifdef LIBXML_SAX1_ENABLED
9619         } else {
9620             if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9621                 (!ctxt->disableSAX))
9622                 ctxt->sax->endElement(ctxt->userData, name);
9623 #endif /* LIBXML_SAX1_ENABLED */
9624         }
9625         namePop(ctxt);
9626         spacePop(ctxt);
9627         if (nsNr != ctxt->nsNr)
9628             nsPop(ctxt, ctxt->nsNr - nsNr);
9629         if ( ret != NULL && ctxt->record_info ) {
9630            node_info.end_pos = ctxt->input->consumed +
9631                               (CUR_PTR - ctxt->input->base);
9632            node_info.end_line = ctxt->input->line;
9633            node_info.node = ret;
9634            xmlParserAddNodeInfo(ctxt, &node_info);
9635         }
9636         return;
9637     }
9638     if (RAW == '>') {
9639         NEXT1;
9640     } else {
9641         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9642                      "Couldn't find end of Start Tag %s line %d\n",
9643                                 name, line, NULL);
9644
9645         /*
9646          * end of parsing of this node.
9647          */
9648         nodePop(ctxt);
9649         namePop(ctxt);
9650         spacePop(ctxt);
9651         if (nsNr != ctxt->nsNr)
9652             nsPop(ctxt, ctxt->nsNr - nsNr);
9653
9654         /*
9655          * Capture end position and add node
9656          */
9657         if ( ret != NULL && ctxt->record_info ) {
9658            node_info.end_pos = ctxt->input->consumed +
9659                               (CUR_PTR - ctxt->input->base);
9660            node_info.end_line = ctxt->input->line;
9661            node_info.node = ret;
9662            xmlParserAddNodeInfo(ctxt, &node_info);
9663         }
9664         return;
9665     }
9666
9667     /*
9668      * Parse the content of the element:
9669      */
9670     xmlParseContent(ctxt);
9671     if (!IS_BYTE_CHAR(RAW)) {
9672         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9673          "Premature end of data in tag %s line %d\n",
9674                                 name, line, NULL);
9675
9676         /*
9677          * end of parsing of this node.
9678          */
9679         nodePop(ctxt);
9680         namePop(ctxt);
9681         spacePop(ctxt);
9682         if (nsNr != ctxt->nsNr)
9683             nsPop(ctxt, ctxt->nsNr - nsNr);
9684         return;
9685     }
9686
9687     /*
9688      * parse the end of tag: '</' should be here.
9689      */
9690     if (ctxt->sax2) {
9691         xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
9692         namePop(ctxt);
9693     }
9694 #ifdef LIBXML_SAX1_ENABLED
9695       else
9696         xmlParseEndTag1(ctxt, line);
9697 #endif /* LIBXML_SAX1_ENABLED */
9698
9699     /*
9700      * Capture end position and add node
9701      */
9702     if ( ret != NULL && ctxt->record_info ) {
9703        node_info.end_pos = ctxt->input->consumed +
9704                           (CUR_PTR - ctxt->input->base);
9705        node_info.end_line = ctxt->input->line;
9706        node_info.node = ret;
9707        xmlParserAddNodeInfo(ctxt, &node_info);
9708     }
9709 }
9710
9711 /**
9712  * xmlParseVersionNum:
9713  * @ctxt:  an XML parser context
9714  *
9715  * parse the XML version value.
9716  *
9717  * [26] VersionNum ::= '1.' [0-9]+
9718  *
9719  * In practice allow [0-9].[0-9]+ at that level
9720  *
9721  * Returns the string giving the XML version number, or NULL
9722  */
9723 xmlChar *
9724 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9725     xmlChar *buf = NULL;
9726     int len = 0;
9727     int size = 10;
9728     xmlChar cur;
9729
9730     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9731     if (buf == NULL) {
9732         xmlErrMemory(ctxt, NULL);
9733         return(NULL);
9734     }
9735     cur = CUR;
9736     if (!((cur >= '0') && (cur <= '9'))) {
9737         xmlFree(buf);
9738         return(NULL);
9739     }
9740     buf[len++] = cur;
9741     NEXT;
9742     cur=CUR;
9743     if (cur != '.') {
9744         xmlFree(buf);
9745         return(NULL);
9746     }
9747     buf[len++] = cur;
9748     NEXT;
9749     cur=CUR;
9750     while ((cur >= '0') && (cur <= '9')) {
9751         if (len + 1 >= size) {
9752             xmlChar *tmp;
9753
9754             size *= 2;
9755             tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9756             if (tmp == NULL) {
9757                 xmlFree(buf);
9758                 xmlErrMemory(ctxt, NULL);
9759                 return(NULL);
9760             }
9761             buf = tmp;
9762         }
9763         buf[len++] = cur;
9764         NEXT;
9765         cur=CUR;
9766     }
9767     buf[len] = 0;
9768     return(buf);
9769 }
9770
9771 /**
9772  * xmlParseVersionInfo:
9773  * @ctxt:  an XML parser context
9774  *
9775  * parse the XML version.
9776  *
9777  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9778  *
9779  * [25] Eq ::= S? '=' S?
9780  *
9781  * Returns the version string, e.g. "1.0"
9782  */
9783
9784 xmlChar *
9785 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9786     xmlChar *version = NULL;
9787
9788     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9789         SKIP(7);
9790         SKIP_BLANKS;
9791         if (RAW != '=') {
9792             xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9793             return(NULL);
9794         }
9795         NEXT;
9796         SKIP_BLANKS;
9797         if (RAW == '"') {
9798             NEXT;
9799             version = xmlParseVersionNum(ctxt);
9800             if (RAW != '"') {
9801                 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9802             } else
9803                 NEXT;
9804         } else if (RAW == '\''){
9805             NEXT;
9806             version = xmlParseVersionNum(ctxt);
9807             if (RAW != '\'') {
9808                 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9809             } else
9810                 NEXT;
9811         } else {
9812             xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9813         }
9814     }
9815     return(version);
9816 }
9817
9818 /**
9819  * xmlParseEncName:
9820  * @ctxt:  an XML parser context
9821  *
9822  * parse the XML encoding name
9823  *
9824  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9825  *
9826  * Returns the encoding name value or NULL
9827  */
9828 xmlChar *
9829 xmlParseEncName(xmlParserCtxtPtr ctxt) {
9830     xmlChar *buf = NULL;
9831     int len = 0;
9832     int size = 10;
9833     xmlChar cur;
9834
9835     cur = CUR;
9836     if (((cur >= 'a') && (cur <= 'z')) ||
9837         ((cur >= 'A') && (cur <= 'Z'))) {
9838         buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9839         if (buf == NULL) {
9840             xmlErrMemory(ctxt, NULL);
9841             return(NULL);
9842         }
9843
9844         buf[len++] = cur;
9845         NEXT;
9846         cur = CUR;
9847         while (((cur >= 'a') && (cur <= 'z')) ||
9848                ((cur >= 'A') && (cur <= 'Z')) ||
9849                ((cur >= '0') && (cur <= '9')) ||
9850                (cur == '.') || (cur == '_') ||
9851                (cur == '-')) {
9852             if (len + 1 >= size) {
9853                 xmlChar *tmp;
9854
9855                 size *= 2;
9856                 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9857                 if (tmp == NULL) {
9858                     xmlErrMemory(ctxt, NULL);
9859                     xmlFree(buf);
9860                     return(NULL);
9861                 }
9862                 buf = tmp;
9863             }
9864             buf[len++] = cur;
9865             NEXT;
9866             cur = CUR;
9867             if (cur == 0) {
9868                 SHRINK;
9869                 GROW;
9870                 cur = CUR;
9871             }
9872         }
9873         buf[len] = 0;
9874     } else {
9875         xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9876     }
9877     return(buf);
9878 }
9879
9880 /**
9881  * xmlParseEncodingDecl:
9882  * @ctxt:  an XML parser context
9883  *
9884  * parse the XML encoding declaration
9885  *
9886  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
9887  *
9888  * this setups the conversion filters.
9889  *
9890  * Returns the encoding value or NULL
9891  */
9892
9893 const xmlChar *
9894 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9895     xmlChar *encoding = NULL;
9896
9897     SKIP_BLANKS;
9898     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9899         SKIP(8);
9900         SKIP_BLANKS;
9901         if (RAW != '=') {
9902             xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9903             return(NULL);
9904         }
9905         NEXT;
9906         SKIP_BLANKS;
9907         if (RAW == '"') {
9908             NEXT;
9909             encoding = xmlParseEncName(ctxt);
9910             if (RAW != '"') {
9911                 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9912             } else
9913                 NEXT;
9914         } else if (RAW == '\''){
9915             NEXT;
9916             encoding = xmlParseEncName(ctxt);
9917             if (RAW != '\'') {
9918                 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9919             } else
9920                 NEXT;
9921         } else {
9922             xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9923         }
9924         /*
9925          * UTF-16 encoding stwich has already taken place at this stage,
9926          * more over the little-endian/big-endian selection is already done
9927          */
9928         if ((encoding != NULL) &&
9929             ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9930              (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9931             /*
9932              * If no encoding was passed to the parser, that we are
9933              * using UTF-16 and no decoder is present i.e. the
9934              * document is apparently UTF-8 compatible, then raise an
9935              * encoding mismatch fatal error
9936              */
9937             if ((ctxt->encoding == NULL) &&
9938                 (ctxt->input->buf != NULL) &&
9939                 (ctxt->input->buf->encoder == NULL)) {
9940                 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9941                   "Document labelled UTF-16 but has UTF-8 content\n");
9942             }
9943             if (ctxt->encoding != NULL)
9944                 xmlFree((xmlChar *) ctxt->encoding);
9945             ctxt->encoding = encoding;
9946         }
9947         /*
9948          * UTF-8 encoding is handled natively
9949          */
9950         else if ((encoding != NULL) &&
9951             ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9952              (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9953             if (ctxt->encoding != NULL)
9954                 xmlFree((xmlChar *) ctxt->encoding);
9955             ctxt->encoding = encoding;
9956         }
9957         else if (encoding != NULL) {
9958             xmlCharEncodingHandlerPtr handler;
9959
9960             if (ctxt->input->encoding != NULL)
9961                 xmlFree((xmlChar *) ctxt->input->encoding);
9962             ctxt->input->encoding = encoding;
9963
9964             handler = xmlFindCharEncodingHandler((const char *) encoding);
9965             if (handler != NULL) {
9966                 xmlSwitchToEncoding(ctxt, handler);
9967             } else {
9968                 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9969                         "Unsupported encoding %s\n", encoding);
9970                 return(NULL);
9971             }
9972         }
9973     }
9974     return(encoding);
9975 }
9976
9977 /**
9978  * xmlParseSDDecl:
9979  * @ctxt:  an XML parser context
9980  *
9981  * parse the XML standalone declaration
9982  *
9983  * [32] SDDecl ::= S 'standalone' Eq
9984  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9985  *
9986  * [ VC: Standalone Document Declaration ]
9987  * TODO The standalone document declaration must have the value "no"
9988  * if any external markup declarations contain declarations of:
9989  *  - attributes with default values, if elements to which these
9990  *    attributes apply appear in the document without specifications
9991  *    of values for these attributes, or
9992  *  - entities (other than amp, lt, gt, apos, quot), if references
9993  *    to those entities appear in the document, or
9994  *  - attributes with values subject to normalization, where the
9995  *    attribute appears in the document with a value which will change
9996  *    as a result of normalization, or
9997  *  - element types with element content, if white space occurs directly
9998  *    within any instance of those types.
9999  *
10000  * Returns:
10001  *   1 if standalone="yes"
10002  *   0 if standalone="no"
10003  *  -2 if standalone attribute is missing or invalid
10004  *        (A standalone value of -2 means that the XML declaration was found,
10005  *         but no value was specified for the standalone attribute).
10006  */
10007
10008 int
10009 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10010     int standalone = -2;
10011
10012     SKIP_BLANKS;
10013     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10014         SKIP(10);
10015         SKIP_BLANKS;
10016         if (RAW != '=') {
10017             xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10018             return(standalone);
10019         }
10020         NEXT;
10021         SKIP_BLANKS;
10022         if (RAW == '\''){
10023             NEXT;
10024             if ((RAW == 'n') && (NXT(1) == 'o')) {
10025                 standalone = 0;
10026                 SKIP(2);
10027             } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10028                        (NXT(2) == 's')) {
10029                 standalone = 1;
10030                 SKIP(3);
10031             } else {
10032                 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10033             }
10034             if (RAW != '\'') {
10035                 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10036             } else
10037                 NEXT;
10038         } else if (RAW == '"'){
10039             NEXT;
10040             if ((RAW == 'n') && (NXT(1) == 'o')) {
10041                 standalone = 0;
10042                 SKIP(2);
10043             } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10044                        (NXT(2) == 's')) {
10045                 standalone = 1;
10046                 SKIP(3);
10047             } else {
10048                 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10049             }
10050             if (RAW != '"') {
10051                 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10052             } else
10053                 NEXT;
10054         } else {
10055             xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10056         }
10057     }
10058     return(standalone);
10059 }
10060
10061 /**
10062  * xmlParseXMLDecl:
10063  * @ctxt:  an XML parser context
10064  *
10065  * parse an XML declaration header
10066  *
10067  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10068  */
10069
10070 void
10071 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10072     xmlChar *version;
10073
10074     /*
10075      * This value for standalone indicates that the document has an
10076      * XML declaration but it does not have a standalone attribute.
10077      * It will be overwritten later if a standalone attribute is found.
10078      */
10079     ctxt->input->standalone = -2;
10080
10081     /*
10082      * We know that '<?xml' is here.
10083      */
10084     SKIP(5);
10085
10086     if (!IS_BLANK_CH(RAW)) {
10087         xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10088                        "Blank needed after '<?xml'\n");
10089     }
10090     SKIP_BLANKS;
10091
10092     /*
10093      * We must have the VersionInfo here.
10094      */
10095     version = xmlParseVersionInfo(ctxt);
10096     if (version == NULL) {
10097         xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10098     } else {
10099         if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10100             /*
10101              * Changed here for XML-1.0 5th edition
10102              */
10103             if (ctxt->options & XML_PARSE_OLD10) {
10104                 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10105                                   "Unsupported version '%s'\n",
10106                                   version);
10107             } else {
10108                 if ((version[0] == '1') && ((version[1] == '.'))) {
10109                     xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10110                                   "Unsupported version '%s'\n",
10111                                   version, NULL);
10112                 } else {
10113                     xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10114                                       "Unsupported version '%s'\n",
10115                                       version);
10116                 }
10117             }
10118         }
10119         if (ctxt->version != NULL)
10120             xmlFree((void *) ctxt->version);
10121         ctxt->version = version;
10122     }
10123
10124     /*
10125      * We may have the encoding declaration
10126      */
10127     if (!IS_BLANK_CH(RAW)) {
10128         if ((RAW == '?') && (NXT(1) == '>')) {
10129             SKIP(2);
10130             return;
10131         }
10132         xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10133     }
10134     xmlParseEncodingDecl(ctxt);
10135     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10136         /*
10137          * The XML REC instructs us to stop parsing right here
10138          */
10139         return;
10140     }
10141
10142     /*
10143      * We may have the standalone status.
10144      */
10145     if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10146         if ((RAW == '?') && (NXT(1) == '>')) {
10147             SKIP(2);
10148             return;
10149         }
10150         xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10151     }
10152
10153     /*
10154      * We can grow the input buffer freely at that point
10155      */
10156     GROW;
10157
10158     SKIP_BLANKS;
10159     ctxt->input->standalone = xmlParseSDDecl(ctxt);
10160
10161     SKIP_BLANKS;
10162     if ((RAW == '?') && (NXT(1) == '>')) {
10163         SKIP(2);
10164     } else if (RAW == '>') {
10165         /* Deprecated old WD ... */
10166         xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10167         NEXT;
10168     } else {
10169         xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10170         MOVETO_ENDTAG(CUR_PTR);
10171         NEXT;
10172     }
10173 }
10174
10175 /**
10176  * xmlParseMisc:
10177  * @ctxt:  an XML parser context
10178  *
10179  * parse an XML Misc* optional field.
10180  *
10181  * [27] Misc ::= Comment | PI |  S
10182  */
10183
10184 void
10185 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10186     while (((RAW == '<') && (NXT(1) == '?')) ||
10187            (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10188            IS_BLANK_CH(CUR)) {
10189         if ((RAW == '<') && (NXT(1) == '?')) {
10190             xmlParsePI(ctxt);
10191         } else if (IS_BLANK_CH(CUR)) {
10192             NEXT;
10193         } else
10194             xmlParseComment(ctxt);
10195     }
10196 }
10197
10198 /**
10199  * xmlParseDocument:
10200  * @ctxt:  an XML parser context
10201  *
10202  * parse an XML document (and build a tree if using the standard SAX
10203  * interface).
10204  *
10205  * [1] document ::= prolog element Misc*
10206  *
10207  * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10208  *
10209  * Returns 0, -1 in case of error. the parser context is augmented
10210  *                as a result of the parsing.
10211  */
10212
10213 int
10214 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10215     xmlChar start[4];
10216     xmlCharEncoding enc;
10217
10218     xmlInitParser();
10219
10220     if ((ctxt == NULL) || (ctxt->input == NULL))
10221         return(-1);
10222
10223     GROW;
10224
10225     /*
10226      * SAX: detecting the level.
10227      */
10228     xmlDetectSAX2(ctxt);
10229
10230     /*
10231      * SAX: beginning of the document processing.
10232      */
10233     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10234         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10235
10236     if ((ctxt->encoding == NULL) &&
10237         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10238         /*
10239          * Get the 4 first bytes and decode the charset
10240          * if enc != XML_CHAR_ENCODING_NONE
10241          * plug some encoding conversion routines.
10242          */
10243         start[0] = RAW;
10244         start[1] = NXT(1);
10245         start[2] = NXT(2);
10246         start[3] = NXT(3);
10247         enc = xmlDetectCharEncoding(&start[0], 4);
10248         if (enc != XML_CHAR_ENCODING_NONE) {
10249             xmlSwitchEncoding(ctxt, enc);
10250         }
10251     }
10252
10253
10254     if (CUR == 0) {
10255         xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10256     }
10257
10258     /*
10259      * Check for the XMLDecl in the Prolog.
10260      * do not GROW here to avoid the detected encoder to decode more
10261      * than just the first line, unless the amount of data is really
10262      * too small to hold "<?xml version="1.0" encoding="foo"
10263      */
10264     if ((ctxt->input->end - ctxt->input->cur) < 35) {
10265        GROW;
10266     }
10267     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10268
10269         /*
10270          * Note that we will switch encoding on the fly.
10271          */
10272         xmlParseXMLDecl(ctxt);
10273         if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10274             /*
10275              * The XML REC instructs us to stop parsing right here
10276              */
10277             return(-1);
10278         }
10279         ctxt->standalone = ctxt->input->standalone;
10280         SKIP_BLANKS;
10281     } else {
10282         ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10283     }
10284     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10285         ctxt->sax->startDocument(ctxt->userData);
10286
10287     /*
10288      * The Misc part of the Prolog
10289      */
10290     GROW;
10291     xmlParseMisc(ctxt);
10292
10293     /*
10294      * Then possibly doc type declaration(s) and more Misc
10295      * (doctypedecl Misc*)?
10296      */
10297     GROW;
10298     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10299
10300         ctxt->inSubset = 1;
10301         xmlParseDocTypeDecl(ctxt);
10302         if (RAW == '[') {
10303             ctxt->instate = XML_PARSER_DTD;
10304             xmlParseInternalSubset(ctxt);
10305         }
10306
10307         /*
10308          * Create and update the external subset.
10309          */
10310         ctxt->inSubset = 2;
10311         if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10312             (!ctxt->disableSAX))
10313             ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10314                                       ctxt->extSubSystem, ctxt->extSubURI);
10315         ctxt->inSubset = 0;
10316
10317         xmlCleanSpecialAttr(ctxt);
10318
10319         ctxt->instate = XML_PARSER_PROLOG;
10320         xmlParseMisc(ctxt);
10321     }
10322
10323     /*
10324      * Time to start parsing the tree itself
10325      */
10326     GROW;
10327     if (RAW != '<') {
10328         xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10329                        "Start tag expected, '<' not found\n");
10330     } else {
10331         ctxt->instate = XML_PARSER_CONTENT;
10332         xmlParseElement(ctxt);
10333         ctxt->instate = XML_PARSER_EPILOG;
10334
10335
10336         /*
10337          * The Misc part at the end
10338          */
10339         xmlParseMisc(ctxt);
10340
10341         if (RAW != 0) {
10342             xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10343         }
10344         ctxt->instate = XML_PARSER_EOF;
10345     }
10346
10347     /*
10348      * SAX: end of the document processing.
10349      */
10350     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10351         ctxt->sax->endDocument(ctxt->userData);
10352
10353     /*
10354      * Remove locally kept entity definitions if the tree was not built
10355      */
10356     if ((ctxt->myDoc != NULL) &&
10357         (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10358         xmlFreeDoc(ctxt->myDoc);
10359         ctxt->myDoc = NULL;
10360     }
10361
10362     if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10363         ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10364         if (ctxt->valid)
10365             ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10366         if (ctxt->nsWellFormed)
10367             ctxt->myDoc->properties |= XML_DOC_NSVALID;
10368         if (ctxt->options & XML_PARSE_OLD10)
10369             ctxt->myDoc->properties |= XML_DOC_OLD10;
10370     }
10371     if (! ctxt->wellFormed) {
10372         ctxt->valid = 0;
10373         return(-1);
10374     }
10375     return(0);
10376 }
10377
10378 /**
10379  * xmlParseExtParsedEnt:
10380  * @ctxt:  an XML parser context
10381  *
10382  * parse a general parsed entity
10383  * An external general parsed entity is well-formed if it matches the
10384  * production labeled extParsedEnt.
10385  *
10386  * [78] extParsedEnt ::= TextDecl? content
10387  *
10388  * Returns 0, -1 in case of error. the parser context is augmented
10389  *                as a result of the parsing.
10390  */
10391
10392 int
10393 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10394     xmlChar start[4];
10395     xmlCharEncoding enc;
10396
10397     if ((ctxt == NULL) || (ctxt->input == NULL))
10398         return(-1);
10399
10400     xmlDefaultSAXHandlerInit();
10401
10402     xmlDetectSAX2(ctxt);
10403
10404     GROW;
10405
10406     /*
10407      * SAX: beginning of the document processing.
10408      */
10409     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10410         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10411
10412     /*
10413      * Get the 4 first bytes and decode the charset
10414      * if enc != XML_CHAR_ENCODING_NONE
10415      * plug some encoding conversion routines.
10416      */
10417     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10418         start[0] = RAW;
10419         start[1] = NXT(1);
10420         start[2] = NXT(2);
10421         start[3] = NXT(3);
10422         enc = xmlDetectCharEncoding(start, 4);
10423         if (enc != XML_CHAR_ENCODING_NONE) {
10424             xmlSwitchEncoding(ctxt, enc);
10425         }
10426     }
10427
10428
10429     if (CUR == 0) {
10430         xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10431     }
10432
10433     /*
10434      * Check for the XMLDecl in the Prolog.
10435      */
10436     GROW;
10437     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10438
10439         /*
10440          * Note that we will switch encoding on the fly.
10441          */
10442         xmlParseXMLDecl(ctxt);
10443         if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10444             /*
10445              * The XML REC instructs us to stop parsing right here
10446              */
10447             return(-1);
10448         }
10449         SKIP_BLANKS;
10450     } else {
10451         ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10452     }
10453     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10454         ctxt->sax->startDocument(ctxt->userData);
10455
10456     /*
10457      * Doing validity checking on chunk doesn't make sense
10458      */
10459     ctxt->instate = XML_PARSER_CONTENT;
10460     ctxt->validate = 0;
10461     ctxt->loadsubset = 0;
10462     ctxt->depth = 0;
10463
10464     xmlParseContent(ctxt);
10465
10466     if ((RAW == '<') && (NXT(1) == '/')) {
10467         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10468     } else if (RAW != 0) {
10469         xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10470     }
10471
10472     /*
10473      * SAX: end of the document processing.
10474      */
10475     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10476         ctxt->sax->endDocument(ctxt->userData);
10477
10478     if (! ctxt->wellFormed) return(-1);
10479     return(0);
10480 }
10481
10482 #ifdef LIBXML_PUSH_ENABLED
10483 /************************************************************************
10484  *                                                                      *
10485  *              Progressive parsing interfaces                          *
10486  *                                                                      *
10487  ************************************************************************/
10488
10489 /**
10490  * xmlParseLookupSequence:
10491  * @ctxt:  an XML parser context
10492  * @first:  the first char to lookup
10493  * @next:  the next char to lookup or zero
10494  * @third:  the next char to lookup or zero
10495  *
10496  * Try to find if a sequence (first, next, third) or  just (first next) or
10497  * (first) is available in the input stream.
10498  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10499  * to avoid rescanning sequences of bytes, it DOES change the state of the
10500  * parser, do not use liberally.
10501  *
10502  * Returns the index to the current parsing point if the full sequence
10503  *      is available, -1 otherwise.
10504  */
10505 static int
10506 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10507                        xmlChar next, xmlChar third) {
10508     int base, len;
10509     xmlParserInputPtr in;
10510     const xmlChar *buf;
10511
10512     in = ctxt->input;
10513     if (in == NULL) return(-1);
10514     base = in->cur - in->base;
10515     if (base < 0) return(-1);
10516     if (ctxt->checkIndex > base)
10517         base = ctxt->checkIndex;
10518     if (in->buf == NULL) {
10519         buf = in->base;
10520         len = in->length;
10521     } else {
10522         buf = in->buf->buffer->content;
10523         len = in->buf->buffer->use;
10524     }
10525     /* take into account the sequence length */
10526     if (third) len -= 2;
10527     else if (next) len --;
10528     for (;base < len;base++) {
10529         if (buf[base] == first) {
10530             if (third != 0) {
10531                 if ((buf[base + 1] != next) ||
10532                     (buf[base + 2] != third)) continue;
10533             } else if (next != 0) {
10534                 if (buf[base + 1] != next) continue;
10535             }
10536             ctxt->checkIndex = 0;
10537 #ifdef DEBUG_PUSH
10538             if (next == 0)
10539                 xmlGenericError(xmlGenericErrorContext,
10540                         "PP: lookup '%c' found at %d\n",
10541                         first, base);
10542             else if (third == 0)
10543                 xmlGenericError(xmlGenericErrorContext,
10544                         "PP: lookup '%c%c' found at %d\n",
10545                         first, next, base);
10546             else
10547                 xmlGenericError(xmlGenericErrorContext,
10548                         "PP: lookup '%c%c%c' found at %d\n",
10549                         first, next, third, base);
10550 #endif
10551             return(base - (in->cur - in->base));
10552         }
10553     }
10554     ctxt->checkIndex = base;
10555 #ifdef DEBUG_PUSH
10556     if (next == 0)
10557         xmlGenericError(xmlGenericErrorContext,
10558                 "PP: lookup '%c' failed\n", first);
10559     else if (third == 0)
10560         xmlGenericError(xmlGenericErrorContext,
10561                 "PP: lookup '%c%c' failed\n", first, next);
10562     else
10563         xmlGenericError(xmlGenericErrorContext,
10564                 "PP: lookup '%c%c%c' failed\n", first, next, third);
10565 #endif
10566     return(-1);
10567 }
10568
10569 /**
10570  * xmlParseGetLasts:
10571  * @ctxt:  an XML parser context
10572  * @lastlt:  pointer to store the last '<' from the input
10573  * @lastgt:  pointer to store the last '>' from the input
10574  *
10575  * Lookup the last < and > in the current chunk
10576  */
10577 static void
10578 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10579                  const xmlChar **lastgt) {
10580     const xmlChar *tmp;
10581
10582     if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10583         xmlGenericError(xmlGenericErrorContext,
10584                     "Internal error: xmlParseGetLasts\n");
10585         return;
10586     }
10587     if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10588         tmp = ctxt->input->end;
10589         tmp--;
10590         while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10591         if (tmp < ctxt->input->base) {
10592             *lastlt = NULL;
10593             *lastgt = NULL;
10594         } else {
10595             *lastlt = tmp;
10596             tmp++;
10597             while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10598                 if (*tmp == '\'') {
10599                     tmp++;
10600                     while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10601                     if (tmp < ctxt->input->end) tmp++;
10602                 } else if (*tmp == '"') {
10603                     tmp++;
10604                     while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10605                     if (tmp < ctxt->input->end) tmp++;
10606                 } else
10607                     tmp++;
10608             }
10609             if (tmp < ctxt->input->end)
10610                 *lastgt = tmp;
10611             else {
10612                 tmp = *lastlt;
10613                 tmp--;
10614                 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10615                 if (tmp >= ctxt->input->base)
10616                     *lastgt = tmp;
10617                 else
10618                     *lastgt = NULL;
10619             }
10620         }
10621     } else {
10622         *lastlt = NULL;
10623         *lastgt = NULL;
10624     }
10625 }
10626 /**
10627  * xmlCheckCdataPush:
10628  * @cur: pointer to the bock of characters
10629  * @len: length of the block in bytes
10630  *
10631  * Check that the block of characters is okay as SCdata content [20]
10632  *
10633  * Returns the number of bytes to pass if okay, a negative index where an
10634  *         UTF-8 error occured otherwise
10635  */
10636 static int
10637 xmlCheckCdataPush(const xmlChar *utf, int len) {
10638     int ix;
10639     unsigned char c;
10640     int codepoint;
10641
10642     if ((utf == NULL) || (len <= 0))
10643         return(0);
10644
10645     for (ix = 0; ix < len;) {      /* string is 0-terminated */
10646         c = utf[ix];
10647         if ((c & 0x80) == 0x00) {       /* 1-byte code, starts with 10 */
10648             if (c >= 0x20)
10649                 ix++;
10650             else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10651                 ix++;
10652             else
10653                 return(-ix);
10654         } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10655             if (ix + 2 > len) return(ix);
10656             if ((utf[ix+1] & 0xc0 ) != 0x80)
10657                 return(-ix);
10658             codepoint = (utf[ix] & 0x1f) << 6;
10659             codepoint |= utf[ix+1] & 0x3f;
10660             if (!xmlIsCharQ(codepoint))
10661                 return(-ix);
10662             ix += 2;
10663         } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10664             if (ix + 3 > len) return(ix);
10665             if (((utf[ix+1] & 0xc0) != 0x80) ||
10666                 ((utf[ix+2] & 0xc0) != 0x80))
10667                     return(-ix);
10668             codepoint = (utf[ix] & 0xf) << 12;
10669             codepoint |= (utf[ix+1] & 0x3f) << 6;
10670             codepoint |= utf[ix+2] & 0x3f;
10671             if (!xmlIsCharQ(codepoint))
10672                 return(-ix);
10673             ix += 3;
10674         } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10675             if (ix + 4 > len) return(ix);
10676             if (((utf[ix+1] & 0xc0) != 0x80) ||
10677                 ((utf[ix+2] & 0xc0) != 0x80) ||
10678                 ((utf[ix+3] & 0xc0) != 0x80))
10679                     return(-ix);
10680             codepoint = (utf[ix] & 0x7) << 18;
10681             codepoint |= (utf[ix+1] & 0x3f) << 12;
10682             codepoint |= (utf[ix+2] & 0x3f) << 6;
10683             codepoint |= utf[ix+3] & 0x3f;
10684             if (!xmlIsCharQ(codepoint))
10685                 return(-ix);
10686             ix += 4;
10687         } else                          /* unknown encoding */
10688             return(-ix);
10689       }
10690       return(ix);
10691 }
10692
10693 /**
10694  * xmlParseTryOrFinish:
10695  * @ctxt:  an XML parser context
10696  * @terminate:  last chunk indicator
10697  *
10698  * Try to progress on parsing
10699  *
10700  * Returns zero if no parsing was possible
10701  */
10702 static int
10703 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10704     int ret = 0;
10705     int avail, tlen;
10706     xmlChar cur, next;
10707     const xmlChar *lastlt, *lastgt;
10708
10709     if (ctxt->input == NULL)
10710         return(0);
10711
10712 #ifdef DEBUG_PUSH
10713     switch (ctxt->instate) {
10714         case XML_PARSER_EOF:
10715             xmlGenericError(xmlGenericErrorContext,
10716                     "PP: try EOF\n"); break;
10717         case XML_PARSER_START:
10718             xmlGenericError(xmlGenericErrorContext,
10719                     "PP: try START\n"); break;
10720         case XML_PARSER_MISC:
10721             xmlGenericError(xmlGenericErrorContext,
10722                     "PP: try MISC\n");break;
10723         case XML_PARSER_COMMENT:
10724             xmlGenericError(xmlGenericErrorContext,
10725                     "PP: try COMMENT\n");break;
10726         case XML_PARSER_PROLOG:
10727             xmlGenericError(xmlGenericErrorContext,
10728                     "PP: try PROLOG\n");break;
10729         case XML_PARSER_START_TAG:
10730             xmlGenericError(xmlGenericErrorContext,
10731                     "PP: try START_TAG\n");break;
10732         case XML_PARSER_CONTENT:
10733             xmlGenericError(xmlGenericErrorContext,
10734                     "PP: try CONTENT\n");break;
10735         case XML_PARSER_CDATA_SECTION:
10736             xmlGenericError(xmlGenericErrorContext,
10737                     "PP: try CDATA_SECTION\n");break;
10738         case XML_PARSER_END_TAG:
10739             xmlGenericError(xmlGenericErrorContext,
10740                     "PP: try END_TAG\n");break;
10741         case XML_PARSER_ENTITY_DECL:
10742             xmlGenericError(xmlGenericErrorContext,
10743                     "PP: try ENTITY_DECL\n");break;
10744         case XML_PARSER_ENTITY_VALUE:
10745             xmlGenericError(xmlGenericErrorContext,
10746                     "PP: try ENTITY_VALUE\n");break;
10747         case XML_PARSER_ATTRIBUTE_VALUE:
10748             xmlGenericError(xmlGenericErrorContext,
10749                     "PP: try ATTRIBUTE_VALUE\n");break;
10750         case XML_PARSER_DTD:
10751             xmlGenericError(xmlGenericErrorContext,
10752                     "PP: try DTD\n");break;
10753         case XML_PARSER_EPILOG:
10754             xmlGenericError(xmlGenericErrorContext,
10755                     "PP: try EPILOG\n");break;
10756         case XML_PARSER_PI:
10757             xmlGenericError(xmlGenericErrorContext,
10758                     "PP: try PI\n");break;
10759         case XML_PARSER_IGNORE:
10760             xmlGenericError(xmlGenericErrorContext,
10761                     "PP: try IGNORE\n");break;
10762     }
10763 #endif
10764
10765     if ((ctxt->input != NULL) &&
10766         (ctxt->input->cur - ctxt->input->base > 4096)) {
10767         xmlSHRINK(ctxt);
10768         ctxt->checkIndex = 0;
10769     }
10770     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10771
10772     while (1) {
10773         if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10774             return(0);
10775
10776
10777         /*
10778          * Pop-up of finished entities.
10779          */
10780         while ((RAW == 0) && (ctxt->inputNr > 1))
10781             xmlPopInput(ctxt);
10782
10783         if (ctxt->input == NULL) break;
10784         if (ctxt->input->buf == NULL)
10785             avail = ctxt->input->length -
10786                     (ctxt->input->cur - ctxt->input->base);
10787         else {
10788             /*
10789              * If we are operating on converted input, try to flush
10790              * remainng chars to avoid them stalling in the non-converted
10791              * buffer.
10792              */
10793             if ((ctxt->input->buf->raw != NULL) &&
10794                 (ctxt->input->buf->raw->use > 0)) {
10795                 int base = ctxt->input->base -
10796                            ctxt->input->buf->buffer->content;
10797                 int current = ctxt->input->cur - ctxt->input->base;
10798
10799                 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10800                 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10801                 ctxt->input->cur = ctxt->input->base + current;
10802                 ctxt->input->end =
10803                     &ctxt->input->buf->buffer->content[
10804                                        ctxt->input->buf->buffer->use];
10805             }
10806             avail = ctxt->input->buf->buffer->use -
10807                     (ctxt->input->cur - ctxt->input->base);
10808         }
10809         if (avail < 1)
10810             goto done;
10811         switch (ctxt->instate) {
10812             case XML_PARSER_EOF:
10813                 /*
10814                  * Document parsing is done !
10815                  */
10816                 goto done;
10817             case XML_PARSER_START:
10818                 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10819                     xmlChar start[4];
10820                     xmlCharEncoding enc;
10821
10822                     /*
10823                      * Very first chars read from the document flow.
10824                      */
10825                     if (avail < 4)
10826                         goto done;
10827
10828                     /*
10829                      * Get the 4 first bytes and decode the charset
10830                      * if enc != XML_CHAR_ENCODING_NONE
10831                      * plug some encoding conversion routines,
10832                      * else xmlSwitchEncoding will set to (default)
10833                      * UTF8.
10834                      */
10835                     start[0] = RAW;
10836                     start[1] = NXT(1);
10837                     start[2] = NXT(2);
10838                     start[3] = NXT(3);
10839                     enc = xmlDetectCharEncoding(start, 4);
10840                     xmlSwitchEncoding(ctxt, enc);
10841                     break;
10842                 }
10843
10844                 if (avail < 2)
10845                     goto done;
10846                 cur = ctxt->input->cur[0];
10847                 next = ctxt->input->cur[1];
10848                 if (cur == 0) {
10849                     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10850                         ctxt->sax->setDocumentLocator(ctxt->userData,
10851                                                       &xmlDefaultSAXLocator);
10852                     xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10853                     ctxt->instate = XML_PARSER_EOF;
10854 #ifdef DEBUG_PUSH
10855                     xmlGenericError(xmlGenericErrorContext,
10856                             "PP: entering EOF\n");
10857 #endif
10858                     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10859                         ctxt->sax->endDocument(ctxt->userData);
10860                     goto done;
10861                 }
10862                 if ((cur == '<') && (next == '?')) {
10863                     /* PI or XML decl */
10864                     if (avail < 5) return(ret);
10865                     if ((!terminate) &&
10866                         (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10867                         return(ret);
10868                     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10869                         ctxt->sax->setDocumentLocator(ctxt->userData,
10870                                                       &xmlDefaultSAXLocator);
10871                     if ((ctxt->input->cur[2] == 'x') &&
10872                         (ctxt->input->cur[3] == 'm') &&
10873                         (ctxt->input->cur[4] == 'l') &&
10874                         (IS_BLANK_CH(ctxt->input->cur[5]))) {
10875                         ret += 5;
10876 #ifdef DEBUG_PUSH
10877                         xmlGenericError(xmlGenericErrorContext,
10878                                 "PP: Parsing XML Decl\n");
10879 #endif
10880                         xmlParseXMLDecl(ctxt);
10881                         if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10882                             /*
10883                              * The XML REC instructs us to stop parsing right
10884                              * here
10885                              */
10886                             ctxt->instate = XML_PARSER_EOF;
10887                             return(0);
10888                         }
10889                         ctxt->standalone = ctxt->input->standalone;
10890                         if ((ctxt->encoding == NULL) &&
10891                             (ctxt->input->encoding != NULL))
10892                             ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10893                         if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10894                             (!ctxt->disableSAX))
10895                             ctxt->sax->startDocument(ctxt->userData);
10896                         ctxt->instate = XML_PARSER_MISC;
10897 #ifdef DEBUG_PUSH
10898                         xmlGenericError(xmlGenericErrorContext,
10899                                 "PP: entering MISC\n");
10900 #endif
10901                     } else {
10902                         ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10903                         if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10904                             (!ctxt->disableSAX))
10905                             ctxt->sax->startDocument(ctxt->userData);
10906                         ctxt->instate = XML_PARSER_MISC;
10907 #ifdef DEBUG_PUSH
10908                         xmlGenericError(xmlGenericErrorContext,
10909                                 "PP: entering MISC\n");
10910 #endif
10911                     }
10912                 } else {
10913                     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10914                         ctxt->sax->setDocumentLocator(ctxt->userData,
10915                                                       &xmlDefaultSAXLocator);
10916                     ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10917                     if (ctxt->version == NULL) {
10918                         xmlErrMemory(ctxt, NULL);
10919                         break;
10920                     }
10921                     if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10922                         (!ctxt->disableSAX))
10923                         ctxt->sax->startDocument(ctxt->userData);
10924                     ctxt->instate = XML_PARSER_MISC;
10925 #ifdef DEBUG_PUSH
10926                     xmlGenericError(xmlGenericErrorContext,
10927                             "PP: entering MISC\n");
10928 #endif
10929                 }
10930                 break;
10931             case XML_PARSER_START_TAG: {
10932                 const xmlChar *name;
10933                 const xmlChar *prefix = NULL;
10934                 const xmlChar *URI = NULL;
10935                 int nsNr = ctxt->nsNr;
10936
10937                 if ((avail < 2) && (ctxt->inputNr == 1))
10938                     goto done;
10939                 cur = ctxt->input->cur[0];
10940                 if (cur != '<') {
10941                     xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10942                     ctxt->instate = XML_PARSER_EOF;
10943                     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10944                         ctxt->sax->endDocument(ctxt->userData);
10945                     goto done;
10946                 }
10947                 if (!terminate) {
10948                     if (ctxt->progressive) {
10949                         /* > can be found unescaped in attribute values */
10950                         if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10951                             goto done;
10952                     } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10953                         goto done;
10954                     }
10955                 }
10956                 if (ctxt->spaceNr == 0)
10957                     spacePush(ctxt, -1);
10958                 else if (*ctxt->space == -2)
10959                     spacePush(ctxt, -1);
10960                 else
10961                     spacePush(ctxt, *ctxt->space);
10962 #ifdef LIBXML_SAX1_ENABLED
10963                 if (ctxt->sax2)
10964 #endif /* LIBXML_SAX1_ENABLED */
10965                     name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10966 #ifdef LIBXML_SAX1_ENABLED
10967                 else
10968                     name = xmlParseStartTag(ctxt);
10969 #endif /* LIBXML_SAX1_ENABLED */
10970                 if (name == NULL) {
10971                     spacePop(ctxt);
10972                     ctxt->instate = XML_PARSER_EOF;
10973                     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10974                         ctxt->sax->endDocument(ctxt->userData);
10975                     goto done;
10976                 }
10977 #ifdef LIBXML_VALID_ENABLED
10978                 /*
10979                  * [ VC: Root Element Type ]
10980                  * The Name in the document type declaration must match
10981                  * the element type of the root element.
10982                  */
10983                 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10984                     ctxt->node && (ctxt->node == ctxt->myDoc->children))
10985                     ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10986 #endif /* LIBXML_VALID_ENABLED */
10987
10988                 /*
10989                  * Check for an Empty Element.
10990                  */
10991                 if ((RAW == '/') && (NXT(1) == '>')) {
10992                     SKIP(2);
10993
10994                     if (ctxt->sax2) {
10995                         if ((ctxt->sax != NULL) &&
10996                             (ctxt->sax->endElementNs != NULL) &&
10997                             (!ctxt->disableSAX))
10998                             ctxt->sax->endElementNs(ctxt->userData, name,
10999                                                     prefix, URI);
11000                         if (ctxt->nsNr - nsNr > 0)
11001                             nsPop(ctxt, ctxt->nsNr - nsNr);
11002 #ifdef LIBXML_SAX1_ENABLED
11003                     } else {
11004                         if ((ctxt->sax != NULL) &&
11005                             (ctxt->sax->endElement != NULL) &&
11006                             (!ctxt->disableSAX))
11007                             ctxt->sax->endElement(ctxt->userData, name);
11008 #endif /* LIBXML_SAX1_ENABLED */
11009                     }
11010                     spacePop(ctxt);
11011                     if (ctxt->nameNr == 0) {
11012                         ctxt->instate = XML_PARSER_EPILOG;
11013                     } else {
11014                         ctxt->instate = XML_PARSER_CONTENT;
11015                     }
11016                     break;
11017                 }
11018                 if (RAW == '>') {
11019                     NEXT;
11020                 } else {
11021                     xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11022                                          "Couldn't find end of Start Tag %s\n",
11023                                          name);
11024                     nodePop(ctxt);
11025                     spacePop(ctxt);
11026                 }
11027                 if (ctxt->sax2)
11028                     nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11029 #ifdef LIBXML_SAX1_ENABLED
11030                 else
11031                     namePush(ctxt, name);
11032 #endif /* LIBXML_SAX1_ENABLED */
11033
11034                 ctxt->instate = XML_PARSER_CONTENT;
11035                 break;
11036             }
11037             case XML_PARSER_CONTENT: {
11038                 const xmlChar *test;
11039                 unsigned int cons;
11040                 if ((avail < 2) && (ctxt->inputNr == 1))
11041                     goto done;
11042                 cur = ctxt->input->cur[0];
11043                 next = ctxt->input->cur[1];
11044
11045                 test = CUR_PTR;
11046                 cons = ctxt->input->consumed;
11047                 if ((cur == '<') && (next == '/')) {
11048                     ctxt->instate = XML_PARSER_END_TAG;
11049                     break;
11050                 } else if ((cur == '<') && (next == '?')) {
11051                     if ((!terminate) &&
11052                         (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11053                         goto done;
11054                     xmlParsePI(ctxt);
11055                 } else if ((cur == '<') && (next != '!')) {
11056                     ctxt->instate = XML_PARSER_START_TAG;
11057                     break;
11058                 } else if ((cur == '<') && (next == '!') &&
11059                            (ctxt->input->cur[2] == '-') &&
11060                            (ctxt->input->cur[3] == '-')) {
11061                     int term;
11062
11063                     if (avail < 4)
11064                         goto done;
11065                     ctxt->input->cur += 4;
11066                     term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11067                     ctxt->input->cur -= 4;
11068                     if ((!terminate) && (term < 0))
11069                         goto done;
11070                     xmlParseComment(ctxt);
11071                     ctxt->instate = XML_PARSER_CONTENT;
11072                 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11073                     (ctxt->input->cur[2] == '[') &&
11074                     (ctxt->input->cur[3] == 'C') &&
11075                     (ctxt->input->cur[4] == 'D') &&
11076                     (ctxt->input->cur[5] == 'A') &&
11077                     (ctxt->input->cur[6] == 'T') &&
11078                     (ctxt->input->cur[7] == 'A') &&
11079                     (ctxt->input->cur[8] == '[')) {
11080                     SKIP(9);
11081                     ctxt->instate = XML_PARSER_CDATA_SECTION;
11082                     break;
11083                 } else if ((cur == '<') && (next == '!') &&
11084                            (avail < 9)) {
11085                     goto done;
11086                 } else if (cur == '&') {
11087                     if ((!terminate) &&
11088                         (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11089                         goto done;
11090                     xmlParseReference(ctxt);
11091                 } else {
11092                     /* TODO Avoid the extra copy, handle directly !!! */
11093                     /*
11094                      * Goal of the following test is:
11095                      *  - minimize calls to the SAX 'character' callback
11096                      *    when they are mergeable
11097                      *  - handle an problem for isBlank when we only parse
11098                      *    a sequence of blank chars and the next one is
11099                      *    not available to check against '<' presence.
11100                      *  - tries to homogenize the differences in SAX
11101                      *    callbacks between the push and pull versions
11102                      *    of the parser.
11103                      */
11104                     if ((ctxt->inputNr == 1) &&
11105                         (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11106                         if (!terminate) {
11107                             if (ctxt->progressive) {
11108                                 if ((lastlt == NULL) ||
11109                                     (ctxt->input->cur > lastlt))
11110                                     goto done;
11111                             } else if (xmlParseLookupSequence(ctxt,
11112                                                               '<', 0, 0) < 0) {
11113                                 goto done;
11114                             }
11115                         }
11116                     }
11117                     ctxt->checkIndex = 0;
11118                     xmlParseCharData(ctxt, 0);
11119                 }
11120                 /*
11121                  * Pop-up of finished entities.
11122                  */
11123                 while ((RAW == 0) && (ctxt->inputNr > 1))
11124                     xmlPopInput(ctxt);
11125                 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11126                     xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11127                                 "detected an error in element content\n");
11128                     ctxt->instate = XML_PARSER_EOF;
11129                     break;
11130                 }
11131                 break;
11132             }
11133             case XML_PARSER_END_TAG:
11134                 if (avail < 2)
11135                     goto done;
11136                 if (!terminate) {
11137                     if (ctxt->progressive) {
11138                         /* > can be found unescaped in attribute values */
11139                         if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11140                             goto done;
11141                     } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11142                         goto done;
11143                     }
11144                 }
11145                 if (ctxt->sax2) {
11146                     xmlParseEndTag2(ctxt,
11147                            (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11148                            (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11149                        (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11150                     nameNsPop(ctxt);
11151                 }
11152 #ifdef LIBXML_SAX1_ENABLED
11153                   else
11154                     xmlParseEndTag1(ctxt, 0);
11155 #endif /* LIBXML_SAX1_ENABLED */
11156                 if (ctxt->nameNr == 0) {
11157                     ctxt->instate = XML_PARSER_EPILOG;
11158                 } else {
11159                     ctxt->instate = XML_PARSER_CONTENT;
11160                 }
11161                 break;
11162             case XML_PARSER_CDATA_SECTION: {
11163                 /*
11164                  * The Push mode need to have the SAX callback for
11165                  * cdataBlock merge back contiguous callbacks.
11166                  */
11167                 int base;
11168
11169                 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11170                 if (base < 0) {
11171                     if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11172                         int tmp;
11173
11174                         tmp = xmlCheckCdataPush(ctxt->input->cur,
11175                                                 XML_PARSER_BIG_BUFFER_SIZE);
11176                         if (tmp < 0) {
11177                             tmp = -tmp;
11178                             ctxt->input->cur += tmp;
11179                             goto encoding_error;
11180                         }
11181                         if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11182                             if (ctxt->sax->cdataBlock != NULL)
11183                                 ctxt->sax->cdataBlock(ctxt->userData,
11184                                                       ctxt->input->cur, tmp);
11185                             else if (ctxt->sax->characters != NULL)
11186                                 ctxt->sax->characters(ctxt->userData,
11187                                                       ctxt->input->cur, tmp);
11188                         }
11189                         SKIPL(tmp);
11190                         ctxt->checkIndex = 0;
11191                     }
11192                     goto done;
11193                 } else {
11194                     int tmp;
11195
11196                     tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11197                     if ((tmp < 0) || (tmp != base)) {
11198                         tmp = -tmp;
11199                         ctxt->input->cur += tmp;
11200                         goto encoding_error;
11201                     }
11202                     if ((ctxt->sax != NULL) && (base == 0) &&
11203                         (ctxt->sax->cdataBlock != NULL) &&
11204                         (!ctxt->disableSAX)) {
11205                         /*
11206                          * Special case to provide identical behaviour
11207                          * between pull and push parsers on enpty CDATA
11208                          * sections
11209                          */
11210                          if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11211                              (!strncmp((const char *)&ctxt->input->cur[-9],
11212                                        "<![CDATA[", 9)))
11213                              ctxt->sax->cdataBlock(ctxt->userData,
11214                                                    BAD_CAST "", 0);
11215                     } else if ((ctxt->sax != NULL) && (base > 0) &&
11216                         (!ctxt->disableSAX)) {
11217                         if (ctxt->sax->cdataBlock != NULL)
11218                             ctxt->sax->cdataBlock(ctxt->userData,
11219                                                   ctxt->input->cur, base);
11220                         else if (ctxt->sax->characters != NULL)
11221                             ctxt->sax->characters(ctxt->userData,
11222                                                   ctxt->input->cur, base);
11223                     }
11224                     SKIPL(base + 3);
11225                     ctxt->checkIndex = 0;
11226                     ctxt->instate = XML_PARSER_CONTENT;
11227 #ifdef DEBUG_PUSH
11228                     xmlGenericError(xmlGenericErrorContext,
11229                             "PP: entering CONTENT\n");
11230 #endif
11231                 }
11232                 break;
11233             }
11234             case XML_PARSER_MISC:
11235                 SKIP_BLANKS;
11236                 if (ctxt->input->buf == NULL)
11237                     avail = ctxt->input->length -
11238                             (ctxt->input->cur - ctxt->input->base);
11239                 else
11240                     avail = ctxt->input->buf->buffer->use -
11241                             (ctxt->input->cur - ctxt->input->base);
11242                 if (avail < 2)
11243                     goto done;
11244                 cur = ctxt->input->cur[0];
11245                 next = ctxt->input->cur[1];
11246                 if ((cur == '<') && (next == '?')) {
11247                     if ((!terminate) &&
11248                         (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11249                         goto done;
11250 #ifdef DEBUG_PUSH
11251                     xmlGenericError(xmlGenericErrorContext,
11252                             "PP: Parsing PI\n");
11253 #endif
11254                     xmlParsePI(ctxt);
11255                     ctxt->checkIndex = 0;
11256                 } else if ((cur == '<') && (next == '!') &&
11257                     (ctxt->input->cur[2] == '-') &&
11258                     (ctxt->input->cur[3] == '-')) {
11259                     if ((!terminate) &&
11260                         (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11261                         goto done;
11262 #ifdef DEBUG_PUSH
11263                     xmlGenericError(xmlGenericErrorContext,
11264                             "PP: Parsing Comment\n");
11265 #endif
11266                     xmlParseComment(ctxt);
11267                     ctxt->instate = XML_PARSER_MISC;
11268                     ctxt->checkIndex = 0;
11269                 } else if ((cur == '<') && (next == '!') &&
11270                     (ctxt->input->cur[2] == 'D') &&
11271                     (ctxt->input->cur[3] == 'O') &&
11272                     (ctxt->input->cur[4] == 'C') &&
11273                     (ctxt->input->cur[5] == 'T') &&
11274                     (ctxt->input->cur[6] == 'Y') &&
11275                     (ctxt->input->cur[7] == 'P') &&
11276                     (ctxt->input->cur[8] == 'E')) {
11277                     if ((!terminate) &&
11278                         (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11279                         goto done;
11280 #ifdef DEBUG_PUSH
11281                     xmlGenericError(xmlGenericErrorContext,
11282                             "PP: Parsing internal subset\n");
11283 #endif
11284                     ctxt->inSubset = 1;
11285                     xmlParseDocTypeDecl(ctxt);
11286                     if (RAW == '[') {
11287                         ctxt->instate = XML_PARSER_DTD;
11288 #ifdef DEBUG_PUSH
11289                         xmlGenericError(xmlGenericErrorContext,
11290                                 "PP: entering DTD\n");
11291 #endif
11292                     } else {
11293                         /*
11294                          * Create and update the external subset.
11295                          */
11296                         ctxt->inSubset = 2;
11297                         if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11298                             (ctxt->sax->externalSubset != NULL))
11299                             ctxt->sax->externalSubset(ctxt->userData,
11300                                     ctxt->intSubName, ctxt->extSubSystem,
11301                                     ctxt->extSubURI);
11302                         ctxt->inSubset = 0;
11303                         xmlCleanSpecialAttr(ctxt);
11304                         ctxt->instate = XML_PARSER_PROLOG;
11305 #ifdef DEBUG_PUSH
11306                         xmlGenericError(xmlGenericErrorContext,
11307                                 "PP: entering PROLOG\n");
11308 #endif
11309                     }
11310                 } else if ((cur == '<') && (next == '!') &&
11311                            (avail < 9)) {
11312                     goto done;
11313                 } else {
11314                     ctxt->instate = XML_PARSER_START_TAG;
11315                     ctxt->progressive = 1;
11316                     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11317 #ifdef DEBUG_PUSH
11318                     xmlGenericError(xmlGenericErrorContext,
11319                             "PP: entering START_TAG\n");
11320 #endif
11321                 }
11322                 break;
11323             case XML_PARSER_PROLOG:
11324                 SKIP_BLANKS;
11325                 if (ctxt->input->buf == NULL)
11326                     avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11327                 else
11328                     avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11329                 if (avail < 2)
11330                     goto done;
11331                 cur = ctxt->input->cur[0];
11332                 next = ctxt->input->cur[1];
11333                 if ((cur == '<') && (next == '?')) {
11334                     if ((!terminate) &&
11335                         (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11336                         goto done;
11337 #ifdef DEBUG_PUSH
11338                     xmlGenericError(xmlGenericErrorContext,
11339                             "PP: Parsing PI\n");
11340 #endif
11341                     xmlParsePI(ctxt);
11342                 } else if ((cur == '<') && (next == '!') &&
11343                     (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11344                     if ((!terminate) &&
11345                         (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11346                         goto done;
11347 #ifdef DEBUG_PUSH
11348                     xmlGenericError(xmlGenericErrorContext,
11349                             "PP: Parsing Comment\n");
11350 #endif
11351                     xmlParseComment(ctxt);
11352                     ctxt->instate = XML_PARSER_PROLOG;
11353                 } else if ((cur == '<') && (next == '!') &&
11354                            (avail < 4)) {
11355                     goto done;
11356                 } else {
11357                     ctxt->instate = XML_PARSER_START_TAG;
11358                     if (ctxt->progressive == 0)
11359                         ctxt->progressive = 1;
11360                     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11361 #ifdef DEBUG_PUSH
11362                     xmlGenericError(xmlGenericErrorContext,
11363                             "PP: entering START_TAG\n");
11364 #endif
11365                 }
11366                 break;
11367             case XML_PARSER_EPILOG:
11368                 SKIP_BLANKS;
11369                 if (ctxt->input->buf == NULL)
11370                     avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11371                 else
11372                     avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11373                 if (avail < 2)
11374                     goto done;
11375                 cur = ctxt->input->cur[0];
11376                 next = ctxt->input->cur[1];
11377                 if ((cur == '<') && (next == '?')) {
11378                     if ((!terminate) &&
11379                         (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11380                         goto done;
11381 #ifdef DEBUG_PUSH
11382                     xmlGenericError(xmlGenericErrorContext,
11383                             "PP: Parsing PI\n");
11384 #endif
11385                     xmlParsePI(ctxt);
11386                     ctxt->instate = XML_PARSER_EPILOG;
11387                 } else if ((cur == '<') && (next == '!') &&
11388                     (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11389                     if ((!terminate) &&
11390                         (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11391                         goto done;
11392 #ifdef DEBUG_PUSH
11393                     xmlGenericError(xmlGenericErrorContext,
11394                             "PP: Parsing Comment\n");
11395 #endif
11396                     xmlParseComment(ctxt);
11397                     ctxt->instate = XML_PARSER_EPILOG;
11398                 } else if ((cur == '<') && (next == '!') &&
11399                            (avail < 4)) {
11400                     goto done;
11401                 } else {
11402                     xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11403                     ctxt->instate = XML_PARSER_EOF;
11404 #ifdef DEBUG_PUSH
11405                     xmlGenericError(xmlGenericErrorContext,
11406                             "PP: entering EOF\n");
11407 #endif
11408                     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11409                         ctxt->sax->endDocument(ctxt->userData);
11410                     goto done;
11411                 }
11412                 break;
11413             case XML_PARSER_DTD: {
11414                 /*
11415                  * Sorry but progressive parsing of the internal subset
11416                  * is not expected to be supported. We first check that
11417                  * the full content of the internal subset is available and
11418                  * the parsing is launched only at that point.
11419                  * Internal subset ends up with "']' S? '>'" in an unescaped
11420                  * section and not in a ']]>' sequence which are conditional
11421                  * sections (whoever argued to keep that crap in XML deserve
11422                  * a place in hell !).
11423                  */
11424                 int base, i;
11425                 xmlChar *buf;
11426                 xmlChar quote = 0;
11427
11428                 base = ctxt->input->cur - ctxt->input->base;
11429                 if (base < 0) return(0);
11430                 if (ctxt->checkIndex > base)
11431                     base = ctxt->checkIndex;
11432                 buf = ctxt->input->buf->buffer->content;
11433                 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11434                      base++) {
11435                     if (quote != 0) {
11436                         if (buf[base] == quote)
11437                             quote = 0;
11438                         continue;
11439                     }
11440                     if ((quote == 0) && (buf[base] == '<')) {
11441                         int found  = 0;
11442                         /* special handling of comments */
11443                         if (((unsigned int) base + 4 <
11444                              ctxt->input->buf->buffer->use) &&
11445                             (buf[base + 1] == '!') &&
11446                             (buf[base + 2] == '-') &&
11447                             (buf[base + 3] == '-')) {
11448                             for (;(unsigned int) base + 3 <
11449                                   ctxt->input->buf->buffer->use; base++) {
11450                                 if ((buf[base] == '-') &&
11451                                     (buf[base + 1] == '-') &&
11452                                     (buf[base + 2] == '>')) {
11453                                     found = 1;
11454                                     base += 2;
11455                                     break;
11456                                 }
11457                             }
11458                             if (!found) {
11459 #if 0
11460                                 fprintf(stderr, "unfinished comment\n");
11461 #endif
11462                                 break; /* for */
11463                             }
11464                             continue;
11465                         }
11466                     }
11467                     if (buf[base] == '"') {
11468                         quote = '"';
11469                         continue;
11470                     }
11471                     if (buf[base] == '\'') {
11472                         quote = '\'';
11473                         continue;
11474                     }
11475                     if (buf[base] == ']') {
11476 #if 0
11477                         fprintf(stderr, "%c%c%c%c: ", buf[base],
11478                                 buf[base + 1], buf[base + 2], buf[base + 3]);
11479 #endif
11480                         if ((unsigned int) base +1 >=
11481                             ctxt->input->buf->buffer->use)
11482                             break;
11483                         if (buf[base + 1] == ']') {
11484                             /* conditional crap, skip both ']' ! */
11485                             base++;
11486                             continue;
11487                         }
11488                         for (i = 1;
11489                      (unsigned int) base + i < ctxt->input->buf->buffer->use;
11490                              i++) {
11491                             if (buf[base + i] == '>') {
11492 #if 0
11493                                 fprintf(stderr, "found\n");
11494 #endif
11495                                 goto found_end_int_subset;
11496                             }
11497                             if (!IS_BLANK_CH(buf[base + i])) {
11498 #if 0
11499                                 fprintf(stderr, "not found\n");
11500 #endif
11501                                 goto not_end_of_int_subset;
11502                             }
11503                         }
11504 #if 0
11505                         fprintf(stderr, "end of stream\n");
11506 #endif
11507                         break;
11508
11509                     }
11510 not_end_of_int_subset:
11511                     continue; /* for */
11512                 }
11513                 /*
11514                  * We didn't found the end of the Internal subset
11515                  */
11516 #ifdef DEBUG_PUSH
11517                 if (next == 0)
11518                     xmlGenericError(xmlGenericErrorContext,
11519                             "PP: lookup of int subset end filed\n");
11520 #endif
11521                 goto done;
11522
11523 found_end_int_subset:
11524                 xmlParseInternalSubset(ctxt);
11525                 ctxt->inSubset = 2;
11526                 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11527                     (ctxt->sax->externalSubset != NULL))
11528                     ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11529                             ctxt->extSubSystem, ctxt->extSubURI);
11530                 ctxt->inSubset = 0;
11531                 xmlCleanSpecialAttr(ctxt);
11532                 ctxt->instate = XML_PARSER_PROLOG;
11533                 ctxt->checkIndex = 0;
11534 #ifdef DEBUG_PUSH
11535                 xmlGenericError(xmlGenericErrorContext,
11536                         "PP: entering PROLOG\n");
11537 #endif
11538                 break;
11539             }
11540             case XML_PARSER_COMMENT:
11541                 xmlGenericError(xmlGenericErrorContext,
11542                         "PP: internal error, state == COMMENT\n");
11543                 ctxt->instate = XML_PARSER_CONTENT;
11544 #ifdef DEBUG_PUSH
11545                 xmlGenericError(xmlGenericErrorContext,
11546                         "PP: entering CONTENT\n");
11547 #endif
11548                 break;
11549             case XML_PARSER_IGNORE:
11550                 xmlGenericError(xmlGenericErrorContext,
11551                         "PP: internal error, state == IGNORE");
11552                 ctxt->instate = XML_PARSER_DTD;
11553 #ifdef DEBUG_PUSH
11554                 xmlGenericError(xmlGenericErrorContext,
11555                         "PP: entering DTD\n");
11556 #endif
11557                 break;
11558             case XML_PARSER_PI:
11559                 xmlGenericError(xmlGenericErrorContext,
11560                         "PP: internal error, state == PI\n");
11561                 ctxt->instate = XML_PARSER_CONTENT;
11562 #ifdef DEBUG_PUSH
11563                 xmlGenericError(xmlGenericErrorContext,
11564                         "PP: entering CONTENT\n");
11565 #endif
11566                 break;
11567             case XML_PARSER_ENTITY_DECL:
11568                 xmlGenericError(xmlGenericErrorContext,
11569                         "PP: internal error, state == ENTITY_DECL\n");
11570                 ctxt->instate = XML_PARSER_DTD;
11571 #ifdef DEBUG_PUSH
11572                 xmlGenericError(xmlGenericErrorContext,
11573                         "PP: entering DTD\n");
11574 #endif
11575                 break;
11576             case XML_PARSER_ENTITY_VALUE:
11577                 xmlGenericError(xmlGenericErrorContext,
11578                         "PP: internal error, state == ENTITY_VALUE\n");
11579                 ctxt->instate = XML_PARSER_CONTENT;
11580 #ifdef DEBUG_PUSH
11581                 xmlGenericError(xmlGenericErrorContext,
11582                         "PP: entering DTD\n");
11583 #endif
11584                 break;
11585             case XML_PARSER_ATTRIBUTE_VALUE:
11586                 xmlGenericError(xmlGenericErrorContext,
11587                         "PP: internal error, state == ATTRIBUTE_VALUE\n");
11588                 ctxt->instate = XML_PARSER_START_TAG;
11589 #ifdef DEBUG_PUSH
11590                 xmlGenericError(xmlGenericErrorContext,
11591                         "PP: entering START_TAG\n");
11592 #endif
11593                 break;
11594             case XML_PARSER_SYSTEM_LITERAL:
11595                 xmlGenericError(xmlGenericErrorContext,
11596                         "PP: internal error, state == SYSTEM_LITERAL\n");
11597                 ctxt->instate = XML_PARSER_START_TAG;
11598 #ifdef DEBUG_PUSH
11599                 xmlGenericError(xmlGenericErrorContext,
11600                         "PP: entering START_TAG\n");
11601 #endif
11602                 break;
11603             case XML_PARSER_PUBLIC_LITERAL:
11604                 xmlGenericError(xmlGenericErrorContext,
11605                         "PP: internal error, state == PUBLIC_LITERAL\n");
11606                 ctxt->instate = XML_PARSER_START_TAG;
11607 #ifdef DEBUG_PUSH
11608                 xmlGenericError(xmlGenericErrorContext,
11609                         "PP: entering START_TAG\n");
11610 #endif
11611                 break;
11612         }
11613     }
11614 done:
11615 #ifdef DEBUG_PUSH
11616     xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11617 #endif
11618     return(ret);
11619 encoding_error:
11620     {
11621         char buffer[150];
11622
11623         snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11624                         ctxt->input->cur[0], ctxt->input->cur[1],
11625                         ctxt->input->cur[2], ctxt->input->cur[3]);
11626         __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11627                      "Input is not proper UTF-8, indicate encoding !\n%s",
11628                      BAD_CAST buffer, NULL);
11629     }
11630     return(0);
11631 }
11632
11633 /**
11634  * xmlParseChunk:
11635  * @ctxt:  an XML parser context
11636  * @chunk:  an char array
11637  * @size:  the size in byte of the chunk
11638  * @terminate:  last chunk indicator
11639  *
11640  * Parse a Chunk of memory
11641  *
11642  * Returns zero if no error, the xmlParserErrors otherwise.
11643  */
11644 int
11645 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11646               int terminate) {
11647     int end_in_lf = 0;
11648     int remain = 0;
11649
11650     if (ctxt == NULL)
11651         return(XML_ERR_INTERNAL_ERROR);
11652     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11653         return(ctxt->errNo);
11654     if (ctxt->instate == XML_PARSER_START)
11655         xmlDetectSAX2(ctxt);
11656     if ((size > 0) && (chunk != NULL) && (!terminate) &&
11657         (chunk[size - 1] == '\r')) {
11658         end_in_lf = 1;
11659         size--;
11660     }
11661
11662 xmldecl_done:
11663
11664     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11665         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
11666         int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11667         int cur = ctxt->input->cur - ctxt->input->base;
11668         int res;
11669
11670         /*
11671          * Specific handling if we autodetected an encoding, we should not
11672          * push more than the first line ... which depend on the encoding
11673          * And only push the rest once the final encoding was detected
11674          */
11675         if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11676             (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
11677             unsigned int len = 45;
11678
11679             if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11680                                BAD_CAST "UTF-16")) ||
11681                 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11682                                BAD_CAST "UTF16")))
11683                 len = 90;
11684             else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11685                                     BAD_CAST "UCS-4")) ||
11686                      (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11687                                     BAD_CAST "UCS4")))
11688                 len = 180;
11689
11690             if (ctxt->input->buf->rawconsumed < len)
11691                 len -= ctxt->input->buf->rawconsumed;
11692
11693             /*
11694              * Change size for reading the initial declaration only
11695              * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11696              * will blindly copy extra bytes from memory.
11697              */
11698             if ((unsigned int) size > len) {
11699                 remain = size - len;
11700                 size = len;
11701             } else {
11702                 remain = 0;
11703             }
11704         }
11705         res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11706         if (res < 0) {
11707             ctxt->errNo = XML_PARSER_EOF;
11708             ctxt->disableSAX = 1;
11709             return (XML_PARSER_EOF);
11710         }
11711         ctxt->input->base = ctxt->input->buf->buffer->content + base;
11712         ctxt->input->cur = ctxt->input->base + cur;
11713         ctxt->input->end =
11714             &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11715 #ifdef DEBUG_PUSH
11716         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11717 #endif
11718
11719     } else if (ctxt->instate != XML_PARSER_EOF) {
11720         if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11721             xmlParserInputBufferPtr in = ctxt->input->buf;
11722             if ((in->encoder != NULL) && (in->buffer != NULL) &&
11723                     (in->raw != NULL)) {
11724                 int nbchars;
11725
11726                 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11727                 if (nbchars < 0) {
11728                     /* TODO 2.6.0 */
11729                     xmlGenericError(xmlGenericErrorContext,
11730                                     "xmlParseChunk: encoder error\n");
11731                     return(XML_ERR_INVALID_ENCODING);
11732                 }
11733             }
11734         }
11735     }
11736     if (remain != 0)
11737         xmlParseTryOrFinish(ctxt, 0);
11738     else
11739         xmlParseTryOrFinish(ctxt, terminate);
11740     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11741         return(ctxt->errNo);
11742
11743     if (remain != 0) {
11744         chunk += size;
11745         size = remain;
11746         remain = 0;
11747         goto xmldecl_done;
11748     }
11749     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11750         (ctxt->input->buf != NULL)) {
11751         xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11752     }
11753     if (terminate) {
11754         /*
11755          * Check for termination
11756          */
11757         int avail = 0;
11758
11759         if (ctxt->input != NULL) {
11760             if (ctxt->input->buf == NULL)
11761                 avail = ctxt->input->length -
11762                         (ctxt->input->cur - ctxt->input->base);
11763             else
11764                 avail = ctxt->input->buf->buffer->use -
11765                         (ctxt->input->cur - ctxt->input->base);
11766         }
11767
11768         if ((ctxt->instate != XML_PARSER_EOF) &&
11769             (ctxt->instate != XML_PARSER_EPILOG)) {
11770             xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11771         }
11772         if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
11773             xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11774         }
11775         if (ctxt->instate != XML_PARSER_EOF) {
11776             if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11777                 ctxt->sax->endDocument(ctxt->userData);
11778         }
11779         ctxt->instate = XML_PARSER_EOF;
11780     }
11781     return((xmlParserErrors) ctxt->errNo);
11782 }
11783
11784 /************************************************************************
11785  *                                                                      *
11786  *              I/O front end functions to the parser                   *
11787  *                                                                      *
11788  ************************************************************************/
11789
11790 /**
11791  * xmlCreatePushParserCtxt:
11792  * @sax:  a SAX handler
11793  * @user_data:  The user data returned on SAX callbacks
11794  * @chunk:  a pointer to an array of chars
11795  * @size:  number of chars in the array
11796  * @filename:  an optional file name or URI
11797  *
11798  * Create a parser context for using the XML parser in push mode.
11799  * If @buffer and @size are non-NULL, the data is used to detect
11800  * the encoding.  The remaining characters will be parsed so they
11801  * don't need to be fed in again through xmlParseChunk.
11802  * To allow content encoding detection, @size should be >= 4
11803  * The value of @filename is used for fetching external entities
11804  * and error/warning reports.
11805  *
11806  * Returns the new parser context or NULL
11807  */
11808
11809 xmlParserCtxtPtr
11810 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11811                         const char *chunk, int size, const char *filename) {
11812     xmlParserCtxtPtr ctxt;
11813     xmlParserInputPtr inputStream;
11814     xmlParserInputBufferPtr buf;
11815     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11816
11817     /*
11818      * plug some encoding conversion routines
11819      */
11820     if ((chunk != NULL) && (size >= 4))
11821         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11822
11823     buf = xmlAllocParserInputBuffer(enc);
11824     if (buf == NULL) return(NULL);
11825
11826     ctxt = xmlNewParserCtxt();
11827     if (ctxt == NULL) {
11828         xmlErrMemory(NULL, "creating parser: out of memory\n");
11829         xmlFreeParserInputBuffer(buf);
11830         return(NULL);
11831     }
11832     ctxt->dictNames = 1;
11833     ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11834     if (ctxt->pushTab == NULL) {
11835         xmlErrMemory(ctxt, NULL);
11836         xmlFreeParserInputBuffer(buf);
11837         xmlFreeParserCtxt(ctxt);
11838         return(NULL);
11839     }
11840     if (sax != NULL) {
11841 #ifdef LIBXML_SAX1_ENABLED
11842         if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11843 #endif /* LIBXML_SAX1_ENABLED */
11844             xmlFree(ctxt->sax);
11845         ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11846         if (ctxt->sax == NULL) {
11847             xmlErrMemory(ctxt, NULL);
11848             xmlFreeParserInputBuffer(buf);
11849             xmlFreeParserCtxt(ctxt);
11850             return(NULL);
11851         }
11852         memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11853         if (sax->initialized == XML_SAX2_MAGIC)
11854             memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11855         else
11856             memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11857         if (user_data != NULL)
11858             ctxt->userData = user_data;
11859     }
11860     if (filename == NULL) {
11861         ctxt->directory = NULL;
11862     } else {
11863         ctxt->directory = xmlParserGetDirectory(filename);
11864     }
11865
11866     inputStream = xmlNewInputStream(ctxt);
11867     if (inputStream == NULL) {
11868         xmlFreeParserCtxt(ctxt);
11869         xmlFreeParserInputBuffer(buf);
11870         return(NULL);
11871     }
11872
11873     if (filename == NULL)
11874         inputStream->filename = NULL;
11875     else {
11876         inputStream->filename = (char *)
11877             xmlCanonicPath((const xmlChar *) filename);
11878         if (inputStream->filename == NULL) {
11879             xmlFreeParserCtxt(ctxt);
11880             xmlFreeParserInputBuffer(buf);
11881             return(NULL);
11882         }
11883     }
11884     inputStream->buf = buf;
11885     inputStream->base = inputStream->buf->buffer->content;
11886     inputStream->cur = inputStream->buf->buffer->content;
11887     inputStream->end =
11888         &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11889
11890     inputPush(ctxt, inputStream);
11891
11892     /*
11893      * If the caller didn't provide an initial 'chunk' for determining
11894      * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11895      * that it can be automatically determined later
11896      */
11897     if ((size == 0) || (chunk == NULL)) {
11898         ctxt->charset = XML_CHAR_ENCODING_NONE;
11899     } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11900         int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11901         int cur = ctxt->input->cur - ctxt->input->base;
11902
11903         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11904
11905         ctxt->input->base = ctxt->input->buf->buffer->content + base;
11906         ctxt->input->cur = ctxt->input->base + cur;
11907         ctxt->input->end =
11908             &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11909 #ifdef DEBUG_PUSH
11910         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11911 #endif
11912     }
11913
11914     if (enc != XML_CHAR_ENCODING_NONE) {
11915         xmlSwitchEncoding(ctxt, enc);
11916     }
11917
11918     return(ctxt);
11919 }
11920 #endif /* LIBXML_PUSH_ENABLED */
11921
11922 /**
11923  * xmlStopParser:
11924  * @ctxt:  an XML parser context
11925  *
11926  * Blocks further parser processing
11927  */
11928 void
11929 xmlStopParser(xmlParserCtxtPtr ctxt) {
11930     if (ctxt == NULL)
11931         return;
11932     ctxt->instate = XML_PARSER_EOF;
11933     ctxt->disableSAX = 1;
11934     if (ctxt->input != NULL) {
11935         ctxt->input->cur = BAD_CAST"";
11936         ctxt->input->base = ctxt->input->cur;
11937     }
11938 }
11939
11940 /**
11941  * xmlCreateIOParserCtxt:
11942  * @sax:  a SAX handler
11943  * @user_data:  The user data returned on SAX callbacks
11944  * @ioread:  an I/O read function
11945  * @ioclose:  an I/O close function
11946  * @ioctx:  an I/O handler
11947  * @enc:  the charset encoding if known
11948  *
11949  * Create a parser context for using the XML parser with an existing
11950  * I/O stream
11951  *
11952  * Returns the new parser context or NULL
11953  */
11954 xmlParserCtxtPtr
11955 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11956         xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
11957         void *ioctx, xmlCharEncoding enc) {
11958     xmlParserCtxtPtr ctxt;
11959     xmlParserInputPtr inputStream;
11960     xmlParserInputBufferPtr buf;
11961
11962     if (ioread == NULL) return(NULL);
11963
11964     buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11965     if (buf == NULL) return(NULL);
11966
11967     ctxt = xmlNewParserCtxt();
11968     if (ctxt == NULL) {
11969         xmlFreeParserInputBuffer(buf);
11970         return(NULL);
11971     }
11972     if (sax != NULL) {
11973 #ifdef LIBXML_SAX1_ENABLED
11974         if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11975 #endif /* LIBXML_SAX1_ENABLED */
11976             xmlFree(ctxt->sax);
11977         ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11978         if (ctxt->sax == NULL) {
11979             xmlErrMemory(ctxt, NULL);
11980             xmlFreeParserCtxt(ctxt);
11981             return(NULL);
11982         }
11983         memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11984         if (sax->initialized == XML_SAX2_MAGIC)
11985             memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11986         else
11987             memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11988         if (user_data != NULL)
11989             ctxt->userData = user_data;
11990     }
11991
11992     inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11993     if (inputStream == NULL) {
11994         xmlFreeParserCtxt(ctxt);
11995         return(NULL);
11996     }
11997     inputPush(ctxt, inputStream);
11998
11999     return(ctxt);
12000 }
12001
12002 #ifdef LIBXML_VALID_ENABLED
12003 /************************************************************************
12004  *                                                                      *
12005  *              Front ends when parsing a DTD                           *
12006  *                                                                      *
12007  ************************************************************************/
12008
12009 /**
12010  * xmlIOParseDTD:
12011  * @sax:  the SAX handler block or NULL
12012  * @input:  an Input Buffer
12013  * @enc:  the charset encoding if known
12014  *
12015  * Load and parse a DTD
12016  *
12017  * Returns the resulting xmlDtdPtr or NULL in case of error.
12018  * @input will be freed by the function in any case.
12019  */
12020
12021 xmlDtdPtr
12022 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12023               xmlCharEncoding enc) {
12024     xmlDtdPtr ret = NULL;
12025     xmlParserCtxtPtr ctxt;
12026     xmlParserInputPtr pinput = NULL;
12027     xmlChar start[4];
12028
12029     if (input == NULL)
12030         return(NULL);
12031
12032     ctxt = xmlNewParserCtxt();
12033     if (ctxt == NULL) {
12034         xmlFreeParserInputBuffer(input);
12035         return(NULL);
12036     }
12037
12038     /*
12039      * Set-up the SAX context
12040      */
12041     if (sax != NULL) {
12042         if (ctxt->sax != NULL)
12043             xmlFree(ctxt->sax);
12044         ctxt->sax = sax;
12045         ctxt->userData = ctxt;
12046     }
12047     xmlDetectSAX2(ctxt);
12048
12049     /*
12050      * generate a parser input from the I/O handler
12051      */
12052
12053     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12054     if (pinput == NULL) {
12055         if (sax != NULL) ctxt->sax = NULL;
12056         xmlFreeParserInputBuffer(input);
12057         xmlFreeParserCtxt(ctxt);
12058         return(NULL);
12059     }
12060
12061     /*
12062      * plug some encoding conversion routines here.
12063      */
12064     if (xmlPushInput(ctxt, pinput) < 0) {
12065         if (sax != NULL) ctxt->sax = NULL;
12066         xmlFreeParserCtxt(ctxt);
12067         return(NULL);
12068     }
12069     if (enc != XML_CHAR_ENCODING_NONE) {
12070         xmlSwitchEncoding(ctxt, enc);
12071     }
12072
12073     pinput->filename = NULL;
12074     pinput->line = 1;
12075     pinput->col = 1;
12076     pinput->base = ctxt->input->cur;
12077     pinput->cur = ctxt->input->cur;
12078     pinput->free = NULL;
12079
12080     /*
12081      * let's parse that entity knowing it's an external subset.
12082      */
12083     ctxt->inSubset = 2;
12084     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12085     if (ctxt->myDoc == NULL) {
12086         xmlErrMemory(ctxt, "New Doc failed");
12087         return(NULL);
12088     }
12089     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12090     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12091                                        BAD_CAST "none", BAD_CAST "none");
12092
12093     if ((enc == XML_CHAR_ENCODING_NONE) &&
12094         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12095         /*
12096          * Get the 4 first bytes and decode the charset
12097          * if enc != XML_CHAR_ENCODING_NONE
12098          * plug some encoding conversion routines.
12099          */
12100         start[0] = RAW;
12101         start[1] = NXT(1);
12102         start[2] = NXT(2);
12103         start[3] = NXT(3);
12104         enc = xmlDetectCharEncoding(start, 4);
12105         if (enc != XML_CHAR_ENCODING_NONE) {
12106             xmlSwitchEncoding(ctxt, enc);
12107         }
12108     }
12109
12110     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12111
12112     if (ctxt->myDoc != NULL) {
12113         if (ctxt->wellFormed) {
12114             ret = ctxt->myDoc->extSubset;
12115             ctxt->myDoc->extSubset = NULL;
12116             if (ret != NULL) {
12117                 xmlNodePtr tmp;
12118
12119                 ret->doc = NULL;
12120                 tmp = ret->children;
12121                 while (tmp != NULL) {
12122                     tmp->doc = NULL;
12123                     tmp = tmp->next;
12124                 }
12125             }
12126         } else {
12127             ret = NULL;
12128         }
12129         xmlFreeDoc(ctxt->myDoc);
12130         ctxt->myDoc = NULL;
12131     }
12132     if (sax != NULL) ctxt->sax = NULL;
12133     xmlFreeParserCtxt(ctxt);
12134
12135     return(ret);
12136 }
12137
12138 /**
12139  * xmlSAXParseDTD:
12140  * @sax:  the SAX handler block
12141  * @ExternalID:  a NAME* containing the External ID of the DTD
12142  * @SystemID:  a NAME* containing the URL to the DTD
12143  *
12144  * Load and parse an external subset.
12145  *
12146  * Returns the resulting xmlDtdPtr or NULL in case of error.
12147  */
12148
12149 xmlDtdPtr
12150 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12151                           const xmlChar *SystemID) {
12152     xmlDtdPtr ret = NULL;
12153     xmlParserCtxtPtr ctxt;
12154     xmlParserInputPtr input = NULL;
12155     xmlCharEncoding enc;
12156     xmlChar* systemIdCanonic;
12157
12158     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12159
12160     ctxt = xmlNewParserCtxt();
12161     if (ctxt == NULL) {
12162         return(NULL);
12163     }
12164
12165     /*
12166      * Set-up the SAX context
12167      */
12168     if (sax != NULL) {
12169         if (ctxt->sax != NULL)
12170             xmlFree(ctxt->sax);
12171         ctxt->sax = sax;
12172         ctxt->userData = ctxt;
12173     }
12174
12175     /*
12176      * Canonicalise the system ID
12177      */
12178     systemIdCanonic = xmlCanonicPath(SystemID);
12179     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12180         xmlFreeParserCtxt(ctxt);
12181         return(NULL);
12182     }
12183
12184     /*
12185      * Ask the Entity resolver to load the damn thing
12186      */
12187
12188     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12189         input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12190                                          systemIdCanonic);
12191     if (input == NULL) {
12192         if (sax != NULL) ctxt->sax = NULL;
12193         xmlFreeParserCtxt(ctxt);
12194         if (systemIdCanonic != NULL)
12195             xmlFree(systemIdCanonic);
12196         return(NULL);
12197     }
12198
12199     /*
12200      * plug some encoding conversion routines here.
12201      */
12202     if (xmlPushInput(ctxt, input) < 0) {
12203         if (sax != NULL) ctxt->sax = NULL;
12204         xmlFreeParserCtxt(ctxt);
12205         if (systemIdCanonic != NULL)
12206             xmlFree(systemIdCanonic);
12207         return(NULL);
12208     }
12209     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12210         enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12211         xmlSwitchEncoding(ctxt, enc);
12212     }
12213
12214     if (input->filename == NULL)
12215         input->filename = (char *) systemIdCanonic;
12216     else
12217         xmlFree(systemIdCanonic);
12218     input->line = 1;
12219     input->col = 1;
12220     input->base = ctxt->input->cur;
12221     input->cur = ctxt->input->cur;
12222     input->free = NULL;
12223
12224     /*
12225      * let's parse that entity knowing it's an external subset.
12226      */
12227     ctxt->inSubset = 2;
12228     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12229     if (ctxt->myDoc == NULL) {
12230         xmlErrMemory(ctxt, "New Doc failed");
12231         if (sax != NULL) ctxt->sax = NULL;
12232         xmlFreeParserCtxt(ctxt);
12233         return(NULL);
12234     }
12235     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12236     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12237                                        ExternalID, SystemID);
12238     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12239
12240     if (ctxt->myDoc != NULL) {
12241         if (ctxt->wellFormed) {
12242             ret = ctxt->myDoc->extSubset;
12243             ctxt->myDoc->extSubset = NULL;
12244             if (ret != NULL) {
12245                 xmlNodePtr tmp;
12246
12247                 ret->doc = NULL;
12248                 tmp = ret->children;
12249                 while (tmp != NULL) {
12250                     tmp->doc = NULL;
12251                     tmp = tmp->next;
12252                 }
12253             }
12254         } else {
12255             ret = NULL;
12256         }
12257         xmlFreeDoc(ctxt->myDoc);
12258         ctxt->myDoc = NULL;
12259     }
12260     if (sax != NULL) ctxt->sax = NULL;
12261     xmlFreeParserCtxt(ctxt);
12262
12263     return(ret);
12264 }
12265
12266
12267 /**
12268  * xmlParseDTD:
12269  * @ExternalID:  a NAME* containing the External ID of the DTD
12270  * @SystemID:  a NAME* containing the URL to the DTD
12271  *
12272  * Load and parse an external subset.
12273  *
12274  * Returns the resulting xmlDtdPtr or NULL in case of error.
12275  */
12276
12277 xmlDtdPtr
12278 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12279     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12280 }
12281 #endif /* LIBXML_VALID_ENABLED */
12282
12283 /************************************************************************
12284  *                                                                      *
12285  *              Front ends when parsing an Entity                       *
12286  *                                                                      *
12287  ************************************************************************/
12288
12289 /**
12290  * xmlParseCtxtExternalEntity:
12291  * @ctx:  the existing parsing context
12292  * @URL:  the URL for the entity to load
12293  * @ID:  the System ID for the entity to load
12294  * @lst:  the return value for the set of parsed nodes
12295  *
12296  * Parse an external general entity within an existing parsing context
12297  * An external general parsed entity is well-formed if it matches the
12298  * production labeled extParsedEnt.
12299  *
12300  * [78] extParsedEnt ::= TextDecl? content
12301  *
12302  * Returns 0 if the entity is well formed, -1 in case of args problem and
12303  *    the parser error code otherwise
12304  */
12305
12306 int
12307 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12308                        const xmlChar *ID, xmlNodePtr *lst) {
12309     xmlParserCtxtPtr ctxt;
12310     xmlDocPtr newDoc;
12311     xmlNodePtr newRoot;
12312     xmlSAXHandlerPtr oldsax = NULL;
12313     int ret = 0;
12314     xmlChar start[4];
12315     xmlCharEncoding enc;
12316
12317     if (ctx == NULL) return(-1);
12318
12319     if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12320         (ctx->depth > 1024)) {
12321         return(XML_ERR_ENTITY_LOOP);
12322     }
12323
12324     if (lst != NULL)
12325         *lst = NULL;
12326     if ((URL == NULL) && (ID == NULL))
12327         return(-1);
12328     if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12329         return(-1);
12330
12331     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12332     if (ctxt == NULL) {
12333         return(-1);
12334     }
12335
12336     oldsax = ctxt->sax;
12337     ctxt->sax = ctx->sax;
12338     xmlDetectSAX2(ctxt);
12339     newDoc = xmlNewDoc(BAD_CAST "1.0");
12340     if (newDoc == NULL) {
12341         xmlFreeParserCtxt(ctxt);
12342         return(-1);
12343     }
12344     newDoc->properties = XML_DOC_INTERNAL;
12345     if (ctx->myDoc->dict) {
12346         newDoc->dict = ctx->myDoc->dict;
12347         xmlDictReference(newDoc->dict);
12348     }
12349     if (ctx->myDoc != NULL) {
12350         newDoc->intSubset = ctx->myDoc->intSubset;
12351         newDoc->extSubset = ctx->myDoc->extSubset;
12352     }
12353     if (ctx->myDoc->URL != NULL) {
12354         newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12355     }
12356     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12357     if (newRoot == NULL) {
12358         ctxt->sax = oldsax;
12359         xmlFreeParserCtxt(ctxt);
12360         newDoc->intSubset = NULL;
12361         newDoc->extSubset = NULL;
12362         xmlFreeDoc(newDoc);
12363         return(-1);
12364     }
12365     xmlAddChild((xmlNodePtr) newDoc, newRoot);
12366     nodePush(ctxt, newDoc->children);
12367     if (ctx->myDoc == NULL) {
12368         ctxt->myDoc = newDoc;
12369     } else {
12370         ctxt->myDoc = ctx->myDoc;
12371         newDoc->children->doc = ctx->myDoc;
12372     }
12373
12374     /*
12375      * Get the 4 first bytes and decode the charset
12376      * if enc != XML_CHAR_ENCODING_NONE
12377      * plug some encoding conversion routines.
12378      */
12379     GROW
12380     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12381         start[0] = RAW;
12382         start[1] = NXT(1);
12383         start[2] = NXT(2);
12384         start[3] = NXT(3);
12385         enc = xmlDetectCharEncoding(start, 4);
12386         if (enc != XML_CHAR_ENCODING_NONE) {
12387             xmlSwitchEncoding(ctxt, enc);
12388         }
12389     }
12390
12391     /*
12392      * Parse a possible text declaration first
12393      */
12394     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12395         xmlParseTextDecl(ctxt);
12396         /*
12397          * An XML-1.0 document can't reference an entity not XML-1.0
12398          */
12399         if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12400             (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12401             xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12402                            "Version mismatch between document and entity\n");
12403         }
12404     }
12405
12406     /*
12407      * Doing validity checking on chunk doesn't make sense
12408      */
12409     ctxt->instate = XML_PARSER_CONTENT;
12410     ctxt->validate = ctx->validate;
12411     ctxt->valid = ctx->valid;
12412     ctxt->loadsubset = ctx->loadsubset;
12413     ctxt->depth = ctx->depth + 1;
12414     ctxt->replaceEntities = ctx->replaceEntities;
12415     if (ctxt->validate) {
12416         ctxt->vctxt.error = ctx->vctxt.error;
12417         ctxt->vctxt.warning = ctx->vctxt.warning;
12418     } else {
12419         ctxt->vctxt.error = NULL;
12420         ctxt->vctxt.warning = NULL;
12421     }
12422     ctxt->vctxt.nodeTab = NULL;
12423     ctxt->vctxt.nodeNr = 0;
12424     ctxt->vctxt.nodeMax = 0;
12425     ctxt->vctxt.node = NULL;
12426     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12427     ctxt->dict = ctx->dict;
12428     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12429     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12430     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12431     ctxt->dictNames = ctx->dictNames;
12432     ctxt->attsDefault = ctx->attsDefault;
12433     ctxt->attsSpecial = ctx->attsSpecial;
12434     ctxt->linenumbers = ctx->linenumbers;
12435
12436     xmlParseContent(ctxt);
12437
12438     ctx->validate = ctxt->validate;
12439     ctx->valid = ctxt->valid;
12440     if ((RAW == '<') && (NXT(1) == '/')) {
12441         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12442     } else if (RAW != 0) {
12443         xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12444     }
12445     if (ctxt->node != newDoc->children) {
12446         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12447     }
12448
12449     if (!ctxt->wellFormed) {
12450         if (ctxt->errNo == 0)
12451             ret = 1;
12452         else
12453             ret = ctxt->errNo;
12454     } else {
12455         if (lst != NULL) {
12456             xmlNodePtr cur;
12457
12458             /*
12459              * Return the newly created nodeset after unlinking it from
12460              * they pseudo parent.
12461              */
12462             cur = newDoc->children->children;
12463             *lst = cur;
12464             while (cur != NULL) {
12465                 cur->parent = NULL;
12466                 cur = cur->next;
12467             }
12468             newDoc->children->children = NULL;
12469         }
12470         ret = 0;
12471     }
12472     ctxt->sax = oldsax;
12473     ctxt->dict = NULL;
12474     ctxt->attsDefault = NULL;
12475     ctxt->attsSpecial = NULL;
12476     xmlFreeParserCtxt(ctxt);
12477     newDoc->intSubset = NULL;
12478     newDoc->extSubset = NULL;
12479     xmlFreeDoc(newDoc);
12480
12481     return(ret);
12482 }
12483
12484 /**
12485  * xmlParseExternalEntityPrivate:
12486  * @doc:  the document the chunk pertains to
12487  * @oldctxt:  the previous parser context if available
12488  * @sax:  the SAX handler bloc (possibly NULL)
12489  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12490  * @depth:  Used for loop detection, use 0
12491  * @URL:  the URL for the entity to load
12492  * @ID:  the System ID for the entity to load
12493  * @list:  the return value for the set of parsed nodes
12494  *
12495  * Private version of xmlParseExternalEntity()
12496  *
12497  * Returns 0 if the entity is well formed, -1 in case of args problem and
12498  *    the parser error code otherwise
12499  */
12500
12501 static xmlParserErrors
12502 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12503                       xmlSAXHandlerPtr sax,
12504                       void *user_data, int depth, const xmlChar *URL,
12505                       const xmlChar *ID, xmlNodePtr *list) {
12506     xmlParserCtxtPtr ctxt;
12507     xmlDocPtr newDoc;
12508     xmlNodePtr newRoot;
12509     xmlSAXHandlerPtr oldsax = NULL;
12510     xmlParserErrors ret = XML_ERR_OK;
12511     xmlChar start[4];
12512     xmlCharEncoding enc;
12513
12514     if (((depth > 40) &&
12515         ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12516         (depth > 1024)) {
12517         return(XML_ERR_ENTITY_LOOP);
12518     }
12519
12520     if (list != NULL)
12521         *list = NULL;
12522     if ((URL == NULL) && (ID == NULL))
12523         return(XML_ERR_INTERNAL_ERROR);
12524     if (doc == NULL)
12525         return(XML_ERR_INTERNAL_ERROR);
12526
12527
12528     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
12529     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12530     ctxt->userData = ctxt;
12531     if (oldctxt != NULL) {
12532         ctxt->_private = oldctxt->_private;
12533         ctxt->loadsubset = oldctxt->loadsubset;
12534         ctxt->validate = oldctxt->validate;
12535         ctxt->external = oldctxt->external;
12536         ctxt->record_info = oldctxt->record_info;
12537         ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12538         ctxt->node_seq.length = oldctxt->node_seq.length;
12539         ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12540     } else {
12541         /*
12542          * Doing validity checking on chunk without context
12543          * doesn't make sense
12544          */
12545         ctxt->_private = NULL;
12546         ctxt->validate = 0;
12547         ctxt->external = 2;
12548         ctxt->loadsubset = 0;
12549     }
12550     if (sax != NULL) {
12551         oldsax = ctxt->sax;
12552         ctxt->sax = sax;
12553         if (user_data != NULL)
12554             ctxt->userData = user_data;
12555     }
12556     xmlDetectSAX2(ctxt);
12557     newDoc = xmlNewDoc(BAD_CAST "1.0");
12558     if (newDoc == NULL) {
12559         ctxt->node_seq.maximum = 0;
12560         ctxt->node_seq.length = 0;
12561         ctxt->node_seq.buffer = NULL;
12562         xmlFreeParserCtxt(ctxt);
12563         return(XML_ERR_INTERNAL_ERROR);
12564     }
12565     newDoc->properties = XML_DOC_INTERNAL;
12566     newDoc->intSubset = doc->intSubset;
12567     newDoc->extSubset = doc->extSubset;
12568     newDoc->dict = doc->dict;
12569     xmlDictReference(newDoc->dict);
12570
12571     if (doc->URL != NULL) {
12572         newDoc->URL = xmlStrdup(doc->URL);
12573     }
12574     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12575     if (newRoot == NULL) {
12576         if (sax != NULL)
12577             ctxt->sax = oldsax;
12578         ctxt->node_seq.maximum = 0;
12579         ctxt->node_seq.length = 0;
12580         ctxt->node_seq.buffer = NULL;
12581         xmlFreeParserCtxt(ctxt);
12582         newDoc->intSubset = NULL;
12583         newDoc->extSubset = NULL;
12584         xmlFreeDoc(newDoc);
12585         return(XML_ERR_INTERNAL_ERROR);
12586     }
12587     xmlAddChild((xmlNodePtr) newDoc, newRoot);
12588     nodePush(ctxt, newDoc->children);
12589     ctxt->myDoc = doc;
12590     newRoot->doc = doc;
12591
12592     /*
12593      * Get the 4 first bytes and decode the charset
12594      * if enc != XML_CHAR_ENCODING_NONE
12595      * plug some encoding conversion routines.
12596      */
12597     GROW;
12598     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12599         start[0] = RAW;
12600         start[1] = NXT(1);
12601         start[2] = NXT(2);
12602         start[3] = NXT(3);
12603         enc = xmlDetectCharEncoding(start, 4);
12604         if (enc != XML_CHAR_ENCODING_NONE) {
12605             xmlSwitchEncoding(ctxt, enc);
12606         }
12607     }
12608
12609     /*
12610      * Parse a possible text declaration first
12611      */
12612     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12613         xmlParseTextDecl(ctxt);
12614     }
12615
12616     ctxt->instate = XML_PARSER_CONTENT;
12617     ctxt->depth = depth;
12618
12619     xmlParseContent(ctxt);
12620
12621     if ((RAW == '<') && (NXT(1) == '/')) {
12622         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12623     } else if (RAW != 0) {
12624         xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12625     }
12626     if (ctxt->node != newDoc->children) {
12627         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12628     }
12629
12630     if (!ctxt->wellFormed) {
12631         if (ctxt->errNo == 0)
12632             ret = XML_ERR_INTERNAL_ERROR;
12633         else
12634             ret = (xmlParserErrors)ctxt->errNo;
12635     } else {
12636         if (list != NULL) {
12637             xmlNodePtr cur;
12638
12639             /*
12640              * Return the newly created nodeset after unlinking it from
12641              * they pseudo parent.
12642              */
12643             cur = newDoc->children->children;
12644             *list = cur;
12645             while (cur != NULL) {
12646                 cur->parent = NULL;
12647                 cur = cur->next;
12648             }
12649             newDoc->children->children = NULL;
12650         }
12651         ret = XML_ERR_OK;
12652     }
12653
12654     /*
12655      * Record in the parent context the number of entities replacement
12656      * done when parsing that reference.
12657      */
12658     if (oldctxt != NULL)
12659         oldctxt->nbentities += ctxt->nbentities;
12660
12661     /*
12662      * Also record the size of the entity parsed
12663      */
12664     if (ctxt->input != NULL) {
12665         oldctxt->sizeentities += ctxt->input->consumed;
12666         oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12667     }
12668     /*
12669      * And record the last error if any
12670      */
12671     if (ctxt->lastError.code != XML_ERR_OK)
12672         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12673
12674     if (sax != NULL)
12675         ctxt->sax = oldsax;
12676     oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12677     oldctxt->node_seq.length = ctxt->node_seq.length;
12678     oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12679     ctxt->node_seq.maximum = 0;
12680     ctxt->node_seq.length = 0;
12681     ctxt->node_seq.buffer = NULL;
12682     xmlFreeParserCtxt(ctxt);
12683     newDoc->intSubset = NULL;
12684     newDoc->extSubset = NULL;
12685     xmlFreeDoc(newDoc);
12686
12687     return(ret);
12688 }
12689
12690 #ifdef LIBXML_SAX1_ENABLED
12691 /**
12692  * xmlParseExternalEntity:
12693  * @doc:  the document the chunk pertains to
12694  * @sax:  the SAX handler bloc (possibly NULL)
12695  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12696  * @depth:  Used for loop detection, use 0
12697  * @URL:  the URL for the entity to load
12698  * @ID:  the System ID for the entity to load
12699  * @lst:  the return value for the set of parsed nodes
12700  *
12701  * Parse an external general entity
12702  * An external general parsed entity is well-formed if it matches the
12703  * production labeled extParsedEnt.
12704  *
12705  * [78] extParsedEnt ::= TextDecl? content
12706  *
12707  * Returns 0 if the entity is well formed, -1 in case of args problem and
12708  *    the parser error code otherwise
12709  */
12710
12711 int
12712 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12713           int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12714     return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12715                                        ID, lst));
12716 }
12717
12718 /**
12719  * xmlParseBalancedChunkMemory:
12720  * @doc:  the document the chunk pertains to
12721  * @sax:  the SAX handler bloc (possibly NULL)
12722  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12723  * @depth:  Used for loop detection, use 0
12724  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12725  * @lst:  the return value for the set of parsed nodes
12726  *
12727  * Parse a well-balanced chunk of an XML document
12728  * called by the parser
12729  * The allowed sequence for the Well Balanced Chunk is the one defined by
12730  * the content production in the XML grammar:
12731  *
12732  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12733  *
12734  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12735  *    the parser error code otherwise
12736  */
12737
12738 int
12739 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12740      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12741     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12742                                                 depth, string, lst, 0 );
12743 }
12744 #endif /* LIBXML_SAX1_ENABLED */
12745
12746 /**
12747  * xmlParseBalancedChunkMemoryInternal:
12748  * @oldctxt:  the existing parsing context
12749  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12750  * @user_data:  the user data field for the parser context
12751  * @lst:  the return value for the set of parsed nodes
12752  *
12753  *
12754  * Parse a well-balanced chunk of an XML document
12755  * called by the parser
12756  * The allowed sequence for the Well Balanced Chunk is the one defined by
12757  * the content production in the XML grammar:
12758  *
12759  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12760  *
12761  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12762  * error code otherwise
12763  *
12764  * In case recover is set to 1, the nodelist will not be empty even if
12765  * the parsed chunk is not well balanced.
12766  */
12767 static xmlParserErrors
12768 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12769         const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12770     xmlParserCtxtPtr ctxt;
12771     xmlDocPtr newDoc = NULL;
12772     xmlNodePtr newRoot;
12773     xmlSAXHandlerPtr oldsax = NULL;
12774     xmlNodePtr content = NULL;
12775     xmlNodePtr last = NULL;
12776     int size;
12777     xmlParserErrors ret = XML_ERR_OK;
12778 #ifdef SAX2
12779     int i;
12780 #endif
12781
12782     if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12783         (oldctxt->depth >  1024)) {
12784         return(XML_ERR_ENTITY_LOOP);
12785     }
12786
12787
12788     if (lst != NULL)
12789         *lst = NULL;
12790     if (string == NULL)
12791         return(XML_ERR_INTERNAL_ERROR);
12792
12793     size = xmlStrlen(string);
12794
12795     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12796     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12797     if (user_data != NULL)
12798         ctxt->userData = user_data;
12799     else
12800         ctxt->userData = ctxt;
12801     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12802     ctxt->dict = oldctxt->dict;
12803     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12804     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12805     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12806
12807 #ifdef SAX2
12808     /* propagate namespaces down the entity */
12809     for (i = 0;i < oldctxt->nsNr;i += 2) {
12810         nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12811     }
12812 #endif
12813
12814     oldsax = ctxt->sax;
12815     ctxt->sax = oldctxt->sax;
12816     xmlDetectSAX2(ctxt);
12817     ctxt->replaceEntities = oldctxt->replaceEntities;
12818     ctxt->options = oldctxt->options;
12819
12820     ctxt->_private = oldctxt->_private;
12821     if (oldctxt->myDoc == NULL) {
12822         newDoc = xmlNewDoc(BAD_CAST "1.0");
12823         if (newDoc == NULL) {
12824             ctxt->sax = oldsax;
12825             ctxt->dict = NULL;
12826             xmlFreeParserCtxt(ctxt);
12827             return(XML_ERR_INTERNAL_ERROR);
12828         }
12829         newDoc->properties = XML_DOC_INTERNAL;
12830         newDoc->dict = ctxt->dict;
12831         xmlDictReference(newDoc->dict);
12832         ctxt->myDoc = newDoc;
12833     } else {
12834         ctxt->myDoc = oldctxt->myDoc;
12835         content = ctxt->myDoc->children;
12836         last = ctxt->myDoc->last;
12837     }
12838     newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12839     if (newRoot == NULL) {
12840         ctxt->sax = oldsax;
12841         ctxt->dict = NULL;
12842         xmlFreeParserCtxt(ctxt);
12843         if (newDoc != NULL) {
12844             xmlFreeDoc(newDoc);
12845         }
12846         return(XML_ERR_INTERNAL_ERROR);
12847     }
12848     ctxt->myDoc->children = NULL;
12849     ctxt->myDoc->last = NULL;
12850     xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12851     nodePush(ctxt, ctxt->myDoc->children);
12852     ctxt->instate = XML_PARSER_CONTENT;
12853     ctxt->depth = oldctxt->depth + 1;
12854
12855     ctxt->validate = 0;
12856     ctxt->loadsubset = oldctxt->loadsubset;
12857     if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12858         /*
12859          * ID/IDREF registration will be done in xmlValidateElement below
12860          */
12861         ctxt->loadsubset |= XML_SKIP_IDS;
12862     }
12863     ctxt->dictNames = oldctxt->dictNames;
12864     ctxt->attsDefault = oldctxt->attsDefault;
12865     ctxt->attsSpecial = oldctxt->attsSpecial;
12866
12867     xmlParseContent(ctxt);
12868     if ((RAW == '<') && (NXT(1) == '/')) {
12869         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12870     } else if (RAW != 0) {
12871         xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12872     }
12873     if (ctxt->node != ctxt->myDoc->children) {
12874         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12875     }
12876
12877     if (!ctxt->wellFormed) {
12878         if (ctxt->errNo == 0)
12879             ret = XML_ERR_INTERNAL_ERROR;
12880         else
12881             ret = (xmlParserErrors)ctxt->errNo;
12882     } else {
12883       ret = XML_ERR_OK;
12884     }
12885
12886     if ((lst != NULL) && (ret == XML_ERR_OK)) {
12887         xmlNodePtr cur;
12888
12889         /*
12890          * Return the newly created nodeset after unlinking it from
12891          * they pseudo parent.
12892          */
12893         cur = ctxt->myDoc->children->children;
12894         *lst = cur;
12895         while (cur != NULL) {
12896 #ifdef LIBXML_VALID_ENABLED
12897             if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12898                 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12899                 (cur->type == XML_ELEMENT_NODE)) {
12900                 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12901                         oldctxt->myDoc, cur);
12902             }
12903 #endif /* LIBXML_VALID_ENABLED */
12904             cur->parent = NULL;
12905             cur = cur->next;
12906         }
12907         ctxt->myDoc->children->children = NULL;
12908     }
12909     if (ctxt->myDoc != NULL) {
12910         xmlFreeNode(ctxt->myDoc->children);
12911         ctxt->myDoc->children = content;
12912         ctxt->myDoc->last = last;
12913     }
12914
12915     /*
12916      * Record in the parent context the number of entities replacement
12917      * done when parsing that reference.
12918      */
12919     if (oldctxt != NULL)
12920         oldctxt->nbentities += ctxt->nbentities;
12921
12922     /*
12923      * Also record the last error if any
12924      */
12925     if (ctxt->lastError.code != XML_ERR_OK)
12926         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12927
12928     ctxt->sax = oldsax;
12929     ctxt->dict = NULL;
12930     ctxt->attsDefault = NULL;
12931     ctxt->attsSpecial = NULL;
12932     xmlFreeParserCtxt(ctxt);
12933     if (newDoc != NULL) {
12934         xmlFreeDoc(newDoc);
12935     }
12936
12937     return(ret);
12938 }
12939
12940 /**
12941  * xmlParseInNodeContext:
12942  * @node:  the context node
12943  * @data:  the input string
12944  * @datalen:  the input string length in bytes
12945  * @options:  a combination of xmlParserOption
12946  * @lst:  the return value for the set of parsed nodes
12947  *
12948  * Parse a well-balanced chunk of an XML document
12949  * within the context (DTD, namespaces, etc ...) of the given node.
12950  *
12951  * The allowed sequence for the data is a Well Balanced Chunk defined by
12952  * the content production in the XML grammar:
12953  *
12954  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12955  *
12956  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12957  * error code otherwise
12958  */
12959 xmlParserErrors
12960 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12961                       int options, xmlNodePtr *lst) {
12962 #ifdef SAX2
12963     xmlParserCtxtPtr ctxt;
12964     xmlDocPtr doc = NULL;
12965     xmlNodePtr fake, cur;
12966     int nsnr = 0;
12967
12968     xmlParserErrors ret = XML_ERR_OK;
12969
12970     /*
12971      * check all input parameters, grab the document
12972      */
12973     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12974         return(XML_ERR_INTERNAL_ERROR);
12975     switch (node->type) {
12976         case XML_ELEMENT_NODE:
12977         case XML_ATTRIBUTE_NODE:
12978         case XML_TEXT_NODE:
12979         case XML_CDATA_SECTION_NODE:
12980         case XML_ENTITY_REF_NODE:
12981         case XML_PI_NODE:
12982         case XML_COMMENT_NODE:
12983         case XML_DOCUMENT_NODE:
12984         case XML_HTML_DOCUMENT_NODE:
12985             break;
12986         default:
12987             return(XML_ERR_INTERNAL_ERROR);
12988
12989     }
12990     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12991            (node->type != XML_DOCUMENT_NODE) &&
12992            (node->type != XML_HTML_DOCUMENT_NODE))
12993         node = node->parent;
12994     if (node == NULL)
12995         return(XML_ERR_INTERNAL_ERROR);
12996     if (node->type == XML_ELEMENT_NODE)
12997         doc = node->doc;
12998     else
12999         doc = (xmlDocPtr) node;
13000     if (doc == NULL)
13001         return(XML_ERR_INTERNAL_ERROR);
13002
13003     /*
13004      * allocate a context and set-up everything not related to the
13005      * node position in the tree
13006      */
13007     if (doc->type == XML_DOCUMENT_NODE)
13008         ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13009 #ifdef LIBXML_HTML_ENABLED
13010     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13011         ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13012         /*
13013          * When parsing in context, it makes no sense to add implied
13014          * elements like html/body/etc...
13015          */
13016         options |= HTML_PARSE_NOIMPLIED;
13017     }
13018 #endif
13019     else
13020         return(XML_ERR_INTERNAL_ERROR);
13021
13022     if (ctxt == NULL)
13023         return(XML_ERR_NO_MEMORY);
13024
13025     /*
13026      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13027      * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13028      * we must wait until the last moment to free the original one.
13029      */
13030     if (doc->dict != NULL) {
13031         if (ctxt->dict != NULL)
13032             xmlDictFree(ctxt->dict);
13033         ctxt->dict = doc->dict;
13034     } else
13035         options |= XML_PARSE_NODICT;
13036
13037     if (doc->encoding != NULL) {
13038         xmlCharEncodingHandlerPtr hdlr;
13039
13040         if (ctxt->encoding != NULL)
13041             xmlFree((xmlChar *) ctxt->encoding);
13042         ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13043
13044         hdlr = xmlFindCharEncodingHandler(doc->encoding);
13045         if (hdlr != NULL) {
13046             xmlSwitchToEncoding(ctxt, hdlr);
13047         } else {
13048             return(XML_ERR_UNSUPPORTED_ENCODING);
13049         }
13050     }
13051
13052     xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13053     xmlDetectSAX2(ctxt);
13054     ctxt->myDoc = doc;
13055
13056     fake = xmlNewComment(NULL);
13057     if (fake == NULL) {
13058         xmlFreeParserCtxt(ctxt);
13059         return(XML_ERR_NO_MEMORY);
13060     }
13061     xmlAddChild(node, fake);
13062
13063     if (node->type == XML_ELEMENT_NODE) {
13064         nodePush(ctxt, node);
13065         /*
13066          * initialize the SAX2 namespaces stack
13067          */
13068         cur = node;
13069         while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13070             xmlNsPtr ns = cur->nsDef;
13071             const xmlChar *iprefix, *ihref;
13072
13073             while (ns != NULL) {
13074                 if (ctxt->dict) {
13075                     iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13076                     ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13077                 } else {
13078                     iprefix = ns->prefix;
13079                     ihref = ns->href;
13080                 }
13081
13082                 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13083                     nsPush(ctxt, iprefix, ihref);
13084                     nsnr++;
13085                 }
13086                 ns = ns->next;
13087             }
13088             cur = cur->parent;
13089         }
13090         ctxt->instate = XML_PARSER_CONTENT;
13091     }
13092
13093     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13094         /*
13095          * ID/IDREF registration will be done in xmlValidateElement below
13096          */
13097         ctxt->loadsubset |= XML_SKIP_IDS;
13098     }
13099
13100 #ifdef LIBXML_HTML_ENABLED
13101     if (doc->type == XML_HTML_DOCUMENT_NODE)
13102         __htmlParseContent(ctxt);
13103     else
13104 #endif
13105         xmlParseContent(ctxt);
13106
13107     nsPop(ctxt, nsnr);
13108     if ((RAW == '<') && (NXT(1) == '/')) {
13109         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13110     } else if (RAW != 0) {
13111         xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13112     }
13113     if ((ctxt->node != NULL) && (ctxt->node != node)) {
13114         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13115         ctxt->wellFormed = 0;
13116     }
13117
13118     if (!ctxt->wellFormed) {
13119         if (ctxt->errNo == 0)
13120             ret = XML_ERR_INTERNAL_ERROR;
13121         else
13122             ret = (xmlParserErrors)ctxt->errNo;
13123     } else {
13124         ret = XML_ERR_OK;
13125     }
13126
13127     /*
13128      * Return the newly created nodeset after unlinking it from
13129      * the pseudo sibling.
13130      */
13131
13132     cur = fake->next;
13133     fake->next = NULL;
13134     node->last = fake;
13135
13136     if (cur != NULL) {
13137         cur->prev = NULL;
13138     }
13139
13140     *lst = cur;
13141
13142     while (cur != NULL) {
13143         cur->parent = NULL;
13144         cur = cur->next;
13145     }
13146
13147     xmlUnlinkNode(fake);
13148     xmlFreeNode(fake);
13149
13150
13151     if (ret != XML_ERR_OK) {
13152         xmlFreeNodeList(*lst);
13153         *lst = NULL;
13154     }
13155
13156     if (doc->dict != NULL)
13157         ctxt->dict = NULL;
13158     xmlFreeParserCtxt(ctxt);
13159
13160     return(ret);
13161 #else /* !SAX2 */
13162     return(XML_ERR_INTERNAL_ERROR);
13163 #endif
13164 }
13165
13166 #ifdef LIBXML_SAX1_ENABLED
13167 /**
13168  * xmlParseBalancedChunkMemoryRecover:
13169  * @doc:  the document the chunk pertains to
13170  * @sax:  the SAX handler bloc (possibly NULL)
13171  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13172  * @depth:  Used for loop detection, use 0
13173  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13174  * @lst:  the return value for the set of parsed nodes
13175  * @recover: return nodes even if the data is broken (use 0)
13176  *
13177  *
13178  * Parse a well-balanced chunk of an XML document
13179  * called by the parser
13180  * The allowed sequence for the Well Balanced Chunk is the one defined by
13181  * the content production in the XML grammar:
13182  *
13183  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13184  *
13185  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13186  *    the parser error code otherwise
13187  *
13188  * In case recover is set to 1, the nodelist will not be empty even if
13189  * the parsed chunk is not well balanced, assuming the parsing succeeded to
13190  * some extent.
13191  */
13192 int
13193 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13194      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13195      int recover) {
13196     xmlParserCtxtPtr ctxt;
13197     xmlDocPtr newDoc;
13198     xmlSAXHandlerPtr oldsax = NULL;
13199     xmlNodePtr content, newRoot;
13200     int size;
13201     int ret = 0;
13202
13203     if (depth > 40) {
13204         return(XML_ERR_ENTITY_LOOP);
13205     }
13206
13207
13208     if (lst != NULL)
13209         *lst = NULL;
13210     if (string == NULL)
13211         return(-1);
13212
13213     size = xmlStrlen(string);
13214
13215     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13216     if (ctxt == NULL) return(-1);
13217     ctxt->userData = ctxt;
13218     if (sax != NULL) {
13219         oldsax = ctxt->sax;
13220         ctxt->sax = sax;
13221         if (user_data != NULL)
13222             ctxt->userData = user_data;
13223     }
13224     newDoc = xmlNewDoc(BAD_CAST "1.0");
13225     if (newDoc == NULL) {
13226         xmlFreeParserCtxt(ctxt);
13227         return(-1);
13228     }
13229     newDoc->properties = XML_DOC_INTERNAL;
13230     if ((doc != NULL) && (doc->dict != NULL)) {
13231         xmlDictFree(ctxt->dict);
13232         ctxt->dict = doc->dict;
13233         xmlDictReference(ctxt->dict);
13234         ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13235         ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13236         ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13237         ctxt->dictNames = 1;
13238     } else {
13239         xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13240     }
13241     if (doc != NULL) {
13242         newDoc->intSubset = doc->intSubset;
13243         newDoc->extSubset = doc->extSubset;
13244     }
13245     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13246     if (newRoot == NULL) {
13247         if (sax != NULL)
13248             ctxt->sax = oldsax;
13249         xmlFreeParserCtxt(ctxt);
13250         newDoc->intSubset = NULL;
13251         newDoc->extSubset = NULL;
13252         xmlFreeDoc(newDoc);
13253         return(-1);
13254     }
13255     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13256     nodePush(ctxt, newRoot);
13257     if (doc == NULL) {
13258         ctxt->myDoc = newDoc;
13259     } else {
13260         ctxt->myDoc = newDoc;
13261         newDoc->children->doc = doc;
13262         /* Ensure that doc has XML spec namespace */
13263         xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13264         newDoc->oldNs = doc->oldNs;
13265     }
13266     ctxt->instate = XML_PARSER_CONTENT;
13267     ctxt->depth = depth;
13268
13269     /*
13270      * Doing validity checking on chunk doesn't make sense
13271      */
13272     ctxt->validate = 0;
13273     ctxt->loadsubset = 0;
13274     xmlDetectSAX2(ctxt);
13275
13276     if ( doc != NULL ){
13277         content = doc->children;
13278         doc->children = NULL;
13279         xmlParseContent(ctxt);
13280         doc->children = content;
13281     }
13282     else {
13283         xmlParseContent(ctxt);
13284     }
13285     if ((RAW == '<') && (NXT(1) == '/')) {
13286         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13287     } else if (RAW != 0) {
13288         xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13289     }
13290     if (ctxt->node != newDoc->children) {
13291         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13292     }
13293
13294     if (!ctxt->wellFormed) {
13295         if (ctxt->errNo == 0)
13296             ret = 1;
13297         else
13298             ret = ctxt->errNo;
13299     } else {
13300       ret = 0;
13301     }
13302
13303     if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13304         xmlNodePtr cur;
13305
13306         /*
13307          * Return the newly created nodeset after unlinking it from
13308          * they pseudo parent.
13309          */
13310         cur = newDoc->children->children;
13311         *lst = cur;
13312         while (cur != NULL) {
13313             xmlSetTreeDoc(cur, doc);
13314             cur->parent = NULL;
13315             cur = cur->next;
13316         }
13317         newDoc->children->children = NULL;
13318     }
13319
13320     if (sax != NULL)
13321         ctxt->sax = oldsax;
13322     xmlFreeParserCtxt(ctxt);
13323     newDoc->intSubset = NULL;
13324     newDoc->extSubset = NULL;
13325     newDoc->oldNs = NULL;
13326     xmlFreeDoc(newDoc);
13327
13328     return(ret);
13329 }
13330
13331 /**
13332  * xmlSAXParseEntity:
13333  * @sax:  the SAX handler block
13334  * @filename:  the filename
13335  *
13336  * parse an XML external entity out of context and build a tree.
13337  * It use the given SAX function block to handle the parsing callback.
13338  * If sax is NULL, fallback to the default DOM tree building routines.
13339  *
13340  * [78] extParsedEnt ::= TextDecl? content
13341  *
13342  * This correspond to a "Well Balanced" chunk
13343  *
13344  * Returns the resulting document tree
13345  */
13346
13347 xmlDocPtr
13348 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13349     xmlDocPtr ret;
13350     xmlParserCtxtPtr ctxt;
13351
13352     ctxt = xmlCreateFileParserCtxt(filename);
13353     if (ctxt == NULL) {
13354         return(NULL);
13355     }
13356     if (sax != NULL) {
13357         if (ctxt->sax != NULL)
13358             xmlFree(ctxt->sax);
13359         ctxt->sax = sax;
13360         ctxt->userData = NULL;
13361     }
13362
13363     xmlParseExtParsedEnt(ctxt);
13364
13365     if (ctxt->wellFormed)
13366         ret = ctxt->myDoc;
13367     else {
13368         ret = NULL;
13369         xmlFreeDoc(ctxt->myDoc);
13370         ctxt->myDoc = NULL;
13371     }
13372     if (sax != NULL)
13373         ctxt->sax = NULL;
13374     xmlFreeParserCtxt(ctxt);
13375
13376     return(ret);
13377 }
13378
13379 /**
13380  * xmlParseEntity:
13381  * @filename:  the filename
13382  *
13383  * parse an XML external entity out of context and build a tree.
13384  *
13385  * [78] extParsedEnt ::= TextDecl? content
13386  *
13387  * This correspond to a "Well Balanced" chunk
13388  *
13389  * Returns the resulting document tree
13390  */
13391
13392 xmlDocPtr
13393 xmlParseEntity(const char *filename) {
13394     return(xmlSAXParseEntity(NULL, filename));
13395 }
13396 #endif /* LIBXML_SAX1_ENABLED */
13397
13398 /**
13399  * xmlCreateEntityParserCtxtInternal:
13400  * @URL:  the entity URL
13401  * @ID:  the entity PUBLIC ID
13402  * @base:  a possible base for the target URI
13403  * @pctx:  parser context used to set options on new context
13404  *
13405  * Create a parser context for an external entity
13406  * Automatic support for ZLIB/Compress compressed document is provided
13407  * by default if found at compile-time.
13408  *
13409  * Returns the new parser context or NULL
13410  */
13411 static xmlParserCtxtPtr
13412 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13413                           const xmlChar *base, xmlParserCtxtPtr pctx) {
13414     xmlParserCtxtPtr ctxt;
13415     xmlParserInputPtr inputStream;
13416     char *directory = NULL;
13417     xmlChar *uri;
13418
13419     ctxt = xmlNewParserCtxt();
13420     if (ctxt == NULL) {
13421         return(NULL);
13422     }
13423
13424     if (pctx != NULL) {
13425         ctxt->options = pctx->options;
13426         ctxt->_private = pctx->_private;
13427     }
13428
13429     uri = xmlBuildURI(URL, base);
13430
13431     if (uri == NULL) {
13432         inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13433         if (inputStream == NULL) {
13434             xmlFreeParserCtxt(ctxt);
13435             return(NULL);
13436         }
13437
13438         inputPush(ctxt, inputStream);
13439
13440         if ((ctxt->directory == NULL) && (directory == NULL))
13441             directory = xmlParserGetDirectory((char *)URL);
13442         if ((ctxt->directory == NULL) && (directory != NULL))
13443             ctxt->directory = directory;
13444     } else {
13445         inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13446         if (inputStream == NULL) {
13447             xmlFree(uri);
13448             xmlFreeParserCtxt(ctxt);
13449             return(NULL);
13450         }
13451
13452         inputPush(ctxt, inputStream);
13453
13454         if ((ctxt->directory == NULL) && (directory == NULL))
13455             directory = xmlParserGetDirectory((char *)uri);
13456         if ((ctxt->directory == NULL) && (directory != NULL))
13457             ctxt->directory = directory;
13458         xmlFree(uri);
13459     }
13460     return(ctxt);
13461 }
13462
13463 /**
13464  * xmlCreateEntityParserCtxt:
13465  * @URL:  the entity URL
13466  * @ID:  the entity PUBLIC ID
13467  * @base:  a possible base for the target URI
13468  *
13469  * Create a parser context for an external entity
13470  * Automatic support for ZLIB/Compress compressed document is provided
13471  * by default if found at compile-time.
13472  *
13473  * Returns the new parser context or NULL
13474  */
13475 xmlParserCtxtPtr
13476 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13477                           const xmlChar *base) {
13478     return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13479
13480 }
13481
13482 /************************************************************************
13483  *                                                                      *
13484  *              Front ends when parsing from a file                     *
13485  *                                                                      *
13486  ************************************************************************/
13487
13488 /**
13489  * xmlCreateURLParserCtxt:
13490  * @filename:  the filename or URL
13491  * @options:  a combination of xmlParserOption
13492  *
13493  * Create a parser context for a file or URL content.
13494  * Automatic support for ZLIB/Compress compressed document is provided
13495  * by default if found at compile-time and for file accesses
13496  *
13497  * Returns the new parser context or NULL
13498  */
13499 xmlParserCtxtPtr
13500 xmlCreateURLParserCtxt(const char *filename, int options)
13501 {
13502     xmlParserCtxtPtr ctxt;
13503     xmlParserInputPtr inputStream;
13504     char *directory = NULL;
13505
13506     ctxt = xmlNewParserCtxt();
13507     if (ctxt == NULL) {
13508         xmlErrMemory(NULL, "cannot allocate parser context");
13509         return(NULL);
13510     }
13511
13512     if (options)
13513         xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13514     ctxt->linenumbers = 1;
13515
13516     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13517     if (inputStream == NULL) {
13518         xmlFreeParserCtxt(ctxt);
13519         return(NULL);
13520     }
13521
13522     inputPush(ctxt, inputStream);
13523     if ((ctxt->directory == NULL) && (directory == NULL))
13524         directory = xmlParserGetDirectory(filename);
13525     if ((ctxt->directory == NULL) && (directory != NULL))
13526         ctxt->directory = directory;
13527
13528     return(ctxt);
13529 }
13530
13531 /**
13532  * xmlCreateFileParserCtxt:
13533  * @filename:  the filename
13534  *
13535  * Create a parser context for a file content.
13536  * Automatic support for ZLIB/Compress compressed document is provided
13537  * by default if found at compile-time.
13538  *
13539  * Returns the new parser context or NULL
13540  */
13541 xmlParserCtxtPtr
13542 xmlCreateFileParserCtxt(const char *filename)
13543 {
13544     return(xmlCreateURLParserCtxt(filename, 0));
13545 }
13546
13547 #ifdef LIBXML_SAX1_ENABLED
13548 /**
13549  * xmlSAXParseFileWithData:
13550  * @sax:  the SAX handler block
13551  * @filename:  the filename
13552  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13553  *             documents
13554  * @data:  the userdata
13555  *
13556  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13557  * compressed document is provided by default if found at compile-time.
13558  * It use the given SAX function block to handle the parsing callback.
13559  * If sax is NULL, fallback to the default DOM tree building routines.
13560  *
13561  * User data (void *) is stored within the parser context in the
13562  * context's _private member, so it is available nearly everywhere in libxml
13563  *
13564  * Returns the resulting document tree
13565  */
13566
13567 xmlDocPtr
13568 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13569                         int recovery, void *data) {
13570     xmlDocPtr ret;
13571     xmlParserCtxtPtr ctxt;
13572
13573     xmlInitParser();
13574
13575     ctxt = xmlCreateFileParserCtxt(filename);
13576     if (ctxt == NULL) {
13577         return(NULL);
13578     }
13579     if (sax != NULL) {
13580         if (ctxt->sax != NULL)
13581             xmlFree(ctxt->sax);
13582         ctxt->sax = sax;
13583     }
13584     xmlDetectSAX2(ctxt);
13585     if (data!=NULL) {
13586         ctxt->_private = data;
13587     }
13588
13589     if (ctxt->directory == NULL)
13590         ctxt->directory = xmlParserGetDirectory(filename);
13591
13592     ctxt->recovery = recovery;
13593
13594     xmlParseDocument(ctxt);
13595
13596     if ((ctxt->wellFormed) || recovery) {
13597         ret = ctxt->myDoc;
13598         if (ret != NULL) {
13599             if (ctxt->input->buf->compressed > 0)
13600                 ret->compression = 9;
13601             else
13602                 ret->compression = ctxt->input->buf->compressed;
13603         }
13604     }
13605     else {
13606        ret = NULL;
13607        xmlFreeDoc(ctxt->myDoc);
13608        ctxt->myDoc = NULL;
13609     }
13610     if (sax != NULL)
13611         ctxt->sax = NULL;
13612     xmlFreeParserCtxt(ctxt);
13613
13614     return(ret);
13615 }
13616
13617 /**
13618  * xmlSAXParseFile:
13619  * @sax:  the SAX handler block
13620  * @filename:  the filename
13621  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13622  *             documents
13623  *
13624  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13625  * compressed document is provided by default if found at compile-time.
13626  * It use the given SAX function block to handle the parsing callback.
13627  * If sax is NULL, fallback to the default DOM tree building routines.
13628  *
13629  * Returns the resulting document tree
13630  */
13631
13632 xmlDocPtr
13633 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13634                           int recovery) {
13635     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13636 }
13637
13638 /**
13639  * xmlRecoverDoc:
13640  * @cur:  a pointer to an array of xmlChar
13641  *
13642  * parse an XML in-memory document and build a tree.
13643  * In the case the document is not Well Formed, a attempt to build a
13644  * tree is tried anyway
13645  *
13646  * Returns the resulting document tree or NULL in case of failure
13647  */
13648
13649 xmlDocPtr
13650 xmlRecoverDoc(const xmlChar *cur) {
13651     return(xmlSAXParseDoc(NULL, cur, 1));
13652 }
13653
13654 /**
13655  * xmlParseFile:
13656  * @filename:  the filename
13657  *
13658  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13659  * compressed document is provided by default if found at compile-time.
13660  *
13661  * Returns the resulting document tree if the file was wellformed,
13662  * NULL otherwise.
13663  */
13664
13665 xmlDocPtr
13666 xmlParseFile(const char *filename) {
13667     return(xmlSAXParseFile(NULL, filename, 0));
13668 }
13669
13670 /**
13671  * xmlRecoverFile:
13672  * @filename:  the filename
13673  *
13674  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13675  * compressed document is provided by default if found at compile-time.
13676  * In the case the document is not Well Formed, it attempts to build
13677  * a tree anyway
13678  *
13679  * Returns the resulting document tree or NULL in case of failure
13680  */
13681
13682 xmlDocPtr
13683 xmlRecoverFile(const char *filename) {
13684     return(xmlSAXParseFile(NULL, filename, 1));
13685 }
13686
13687
13688 /**
13689  * xmlSetupParserForBuffer:
13690  * @ctxt:  an XML parser context
13691  * @buffer:  a xmlChar * buffer
13692  * @filename:  a file name
13693  *
13694  * Setup the parser context to parse a new buffer; Clears any prior
13695  * contents from the parser context. The buffer parameter must not be
13696  * NULL, but the filename parameter can be
13697  */
13698 void
13699 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13700                              const char* filename)
13701 {
13702     xmlParserInputPtr input;
13703
13704     if ((ctxt == NULL) || (buffer == NULL))
13705         return;
13706
13707     input = xmlNewInputStream(ctxt);
13708     if (input == NULL) {
13709         xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13710         xmlClearParserCtxt(ctxt);
13711         return;
13712     }
13713
13714     xmlClearParserCtxt(ctxt);
13715     if (filename != NULL)
13716         input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13717     input->base = buffer;
13718     input->cur = buffer;
13719     input->end = &buffer[xmlStrlen(buffer)];
13720     inputPush(ctxt, input);
13721 }
13722
13723 /**
13724  * xmlSAXUserParseFile:
13725  * @sax:  a SAX handler
13726  * @user_data:  The user data returned on SAX callbacks
13727  * @filename:  a file name
13728  *
13729  * parse an XML file and call the given SAX handler routines.
13730  * Automatic support for ZLIB/Compress compressed document is provided
13731  *
13732  * Returns 0 in case of success or a error number otherwise
13733  */
13734 int
13735 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13736                     const char *filename) {
13737     int ret = 0;
13738     xmlParserCtxtPtr ctxt;
13739
13740     ctxt = xmlCreateFileParserCtxt(filename);
13741     if (ctxt == NULL) return -1;
13742     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13743         xmlFree(ctxt->sax);
13744     ctxt->sax = sax;
13745     xmlDetectSAX2(ctxt);
13746
13747     if (user_data != NULL)
13748         ctxt->userData = user_data;
13749
13750     xmlParseDocument(ctxt);
13751
13752     if (ctxt->wellFormed)
13753         ret = 0;
13754     else {
13755         if (ctxt->errNo != 0)
13756             ret = ctxt->errNo;
13757         else
13758             ret = -1;
13759     }
13760     if (sax != NULL)
13761         ctxt->sax = NULL;
13762     if (ctxt->myDoc != NULL) {
13763         xmlFreeDoc(ctxt->myDoc);
13764         ctxt->myDoc = NULL;
13765     }
13766     xmlFreeParserCtxt(ctxt);
13767
13768     return ret;
13769 }
13770 #endif /* LIBXML_SAX1_ENABLED */
13771
13772 /************************************************************************
13773  *                                                                      *
13774  *              Front ends when parsing from memory                     *
13775  *                                                                      *
13776  ************************************************************************/
13777
13778 /**
13779  * xmlCreateMemoryParserCtxt:
13780  * @buffer:  a pointer to a char array
13781  * @size:  the size of the array
13782  *
13783  * Create a parser context for an XML in-memory document.
13784  *
13785  * Returns the new parser context or NULL
13786  */
13787 xmlParserCtxtPtr
13788 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13789     xmlParserCtxtPtr ctxt;
13790     xmlParserInputPtr input;
13791     xmlParserInputBufferPtr buf;
13792
13793     if (buffer == NULL)
13794         return(NULL);
13795     if (size <= 0)
13796         return(NULL);
13797
13798     ctxt = xmlNewParserCtxt();
13799     if (ctxt == NULL)
13800         return(NULL);
13801
13802     /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13803     buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13804     if (buf == NULL) {
13805         xmlFreeParserCtxt(ctxt);
13806         return(NULL);
13807     }
13808
13809     input = xmlNewInputStream(ctxt);
13810     if (input == NULL) {
13811         xmlFreeParserInputBuffer(buf);
13812         xmlFreeParserCtxt(ctxt);
13813         return(NULL);
13814     }
13815
13816     input->filename = NULL;
13817     input->buf = buf;
13818     input->base = input->buf->buffer->content;
13819     input->cur = input->buf->buffer->content;
13820     input->end = &input->buf->buffer->content[input->buf->buffer->use];
13821
13822     inputPush(ctxt, input);
13823     return(ctxt);
13824 }
13825
13826 #ifdef LIBXML_SAX1_ENABLED
13827 /**
13828  * xmlSAXParseMemoryWithData:
13829  * @sax:  the SAX handler block
13830  * @buffer:  an pointer to a char array
13831  * @size:  the size of the array
13832  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13833  *             documents
13834  * @data:  the userdata
13835  *
13836  * parse an XML in-memory block and use the given SAX function block
13837  * to handle the parsing callback. If sax is NULL, fallback to the default
13838  * DOM tree building routines.
13839  *
13840  * User data (void *) is stored within the parser context in the
13841  * context's _private member, so it is available nearly everywhere in libxml
13842  *
13843  * Returns the resulting document tree
13844  */
13845
13846 xmlDocPtr
13847 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13848                   int size, int recovery, void *data) {
13849     xmlDocPtr ret;
13850     xmlParserCtxtPtr ctxt;
13851
13852     xmlInitParser();
13853
13854     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13855     if (ctxt == NULL) return(NULL);
13856     if (sax != NULL) {
13857         if (ctxt->sax != NULL)
13858             xmlFree(ctxt->sax);
13859         ctxt->sax = sax;
13860     }
13861     xmlDetectSAX2(ctxt);
13862     if (data!=NULL) {
13863         ctxt->_private=data;
13864     }
13865
13866     ctxt->recovery = recovery;
13867
13868     xmlParseDocument(ctxt);
13869
13870     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13871     else {
13872        ret = NULL;
13873        xmlFreeDoc(ctxt->myDoc);
13874        ctxt->myDoc = NULL;
13875     }
13876     if (sax != NULL)
13877         ctxt->sax = NULL;
13878     xmlFreeParserCtxt(ctxt);
13879
13880     return(ret);
13881 }
13882
13883 /**
13884  * xmlSAXParseMemory:
13885  * @sax:  the SAX handler block
13886  * @buffer:  an pointer to a char array
13887  * @size:  the size of the array
13888  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13889  *             documents
13890  *
13891  * parse an XML in-memory block and use the given SAX function block
13892  * to handle the parsing callback. If sax is NULL, fallback to the default
13893  * DOM tree building routines.
13894  *
13895  * Returns the resulting document tree
13896  */
13897 xmlDocPtr
13898 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13899                   int size, int recovery) {
13900     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13901 }
13902
13903 /**
13904  * xmlParseMemory:
13905  * @buffer:  an pointer to a char array
13906  * @size:  the size of the array
13907  *
13908  * parse an XML in-memory block and build a tree.
13909  *
13910  * Returns the resulting document tree
13911  */
13912
13913 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13914    return(xmlSAXParseMemory(NULL, buffer, size, 0));
13915 }
13916
13917 /**
13918  * xmlRecoverMemory:
13919  * @buffer:  an pointer to a char array
13920  * @size:  the size of the array
13921  *
13922  * parse an XML in-memory block and build a tree.
13923  * In the case the document is not Well Formed, an attempt to
13924  * build a tree is tried anyway
13925  *
13926  * Returns the resulting document tree or NULL in case of error
13927  */
13928
13929 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13930    return(xmlSAXParseMemory(NULL, buffer, size, 1));
13931 }
13932
13933 /**
13934  * xmlSAXUserParseMemory:
13935  * @sax:  a SAX handler
13936  * @user_data:  The user data returned on SAX callbacks
13937  * @buffer:  an in-memory XML document input
13938  * @size:  the length of the XML document in bytes
13939  *
13940  * A better SAX parsing routine.
13941  * parse an XML in-memory buffer and call the given SAX handler routines.
13942  *
13943  * Returns 0 in case of success or a error number otherwise
13944  */
13945 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13946                           const char *buffer, int size) {
13947     int ret = 0;
13948     xmlParserCtxtPtr ctxt;
13949
13950     xmlInitParser();
13951
13952     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13953     if (ctxt == NULL) return -1;
13954     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13955         xmlFree(ctxt->sax);
13956     ctxt->sax = sax;
13957     xmlDetectSAX2(ctxt);
13958
13959     if (user_data != NULL)
13960         ctxt->userData = user_data;
13961
13962     xmlParseDocument(ctxt);
13963
13964     if (ctxt->wellFormed)
13965         ret = 0;
13966     else {
13967         if (ctxt->errNo != 0)
13968             ret = ctxt->errNo;
13969         else
13970             ret = -1;
13971     }
13972     if (sax != NULL)
13973         ctxt->sax = NULL;
13974     if (ctxt->myDoc != NULL) {
13975         xmlFreeDoc(ctxt->myDoc);
13976         ctxt->myDoc = NULL;
13977     }
13978     xmlFreeParserCtxt(ctxt);
13979
13980     return ret;
13981 }
13982 #endif /* LIBXML_SAX1_ENABLED */
13983
13984 /**
13985  * xmlCreateDocParserCtxt:
13986  * @cur:  a pointer to an array of xmlChar
13987  *
13988  * Creates a parser context for an XML in-memory document.
13989  *
13990  * Returns the new parser context or NULL
13991  */
13992 xmlParserCtxtPtr
13993 xmlCreateDocParserCtxt(const xmlChar *cur) {
13994     int len;
13995
13996     if (cur == NULL)
13997         return(NULL);
13998     len = xmlStrlen(cur);
13999     return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14000 }
14001
14002 #ifdef LIBXML_SAX1_ENABLED
14003 /**
14004  * xmlSAXParseDoc:
14005  * @sax:  the SAX handler block
14006  * @cur:  a pointer to an array of xmlChar
14007  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14008  *             documents
14009  *
14010  * parse an XML in-memory document and build a tree.
14011  * It use the given SAX function block to handle the parsing callback.
14012  * If sax is NULL, fallback to the default DOM tree building routines.
14013  *
14014  * Returns the resulting document tree
14015  */
14016
14017 xmlDocPtr
14018 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14019     xmlDocPtr ret;
14020     xmlParserCtxtPtr ctxt;
14021     xmlSAXHandlerPtr oldsax = NULL;
14022
14023     if (cur == NULL) return(NULL);
14024
14025
14026     ctxt = xmlCreateDocParserCtxt(cur);
14027     if (ctxt == NULL) return(NULL);
14028     if (sax != NULL) {
14029         oldsax = ctxt->sax;
14030         ctxt->sax = sax;
14031         ctxt->userData = NULL;
14032     }
14033     xmlDetectSAX2(ctxt);
14034
14035     xmlParseDocument(ctxt);
14036     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14037     else {
14038        ret = NULL;
14039        xmlFreeDoc(ctxt->myDoc);
14040        ctxt->myDoc = NULL;
14041     }
14042     if (sax != NULL)
14043         ctxt->sax = oldsax;
14044     xmlFreeParserCtxt(ctxt);
14045
14046     return(ret);
14047 }
14048
14049 /**
14050  * xmlParseDoc:
14051  * @cur:  a pointer to an array of xmlChar
14052  *
14053  * parse an XML in-memory document and build a tree.
14054  *
14055  * Returns the resulting document tree
14056  */
14057
14058 xmlDocPtr
14059 xmlParseDoc(const xmlChar *cur) {
14060     return(xmlSAXParseDoc(NULL, cur, 0));
14061 }
14062 #endif /* LIBXML_SAX1_ENABLED */
14063
14064 #ifdef LIBXML_LEGACY_ENABLED
14065 /************************************************************************
14066  *                                                                      *
14067  *      Specific function to keep track of entities references          *
14068  *      and used by the XSLT debugger                                   *
14069  *                                                                      *
14070  ************************************************************************/
14071
14072 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14073
14074 /**
14075  * xmlAddEntityReference:
14076  * @ent : A valid entity
14077  * @firstNode : A valid first node for children of entity
14078  * @lastNode : A valid last node of children entity
14079  *
14080  * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14081  */
14082 static void
14083 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14084                       xmlNodePtr lastNode)
14085 {
14086     if (xmlEntityRefFunc != NULL) {
14087         (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14088     }
14089 }
14090
14091
14092 /**
14093  * xmlSetEntityReferenceFunc:
14094  * @func: A valid function
14095  *
14096  * Set the function to call call back when a xml reference has been made
14097  */
14098 void
14099 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14100 {
14101     xmlEntityRefFunc = func;
14102 }
14103 #endif /* LIBXML_LEGACY_ENABLED */
14104
14105 /************************************************************************
14106  *                                                                      *
14107  *                              Miscellaneous                           *
14108  *                                                                      *
14109  ************************************************************************/
14110
14111 #ifdef LIBXML_XPATH_ENABLED
14112 #include <libxml/xpath.h>
14113 #endif
14114
14115 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14116 static int xmlParserInitialized = 0;
14117
14118 /**
14119  * xmlInitParser:
14120  *
14121  * Initialization function for the XML parser.
14122  * This is not reentrant. Call once before processing in case of
14123  * use in multithreaded programs.
14124  */
14125
14126 void
14127 xmlInitParser(void) {
14128     if (xmlParserInitialized != 0)
14129         return;
14130
14131 #ifdef LIBXML_THREAD_ENABLED
14132     __xmlGlobalInitMutexLock();
14133     if (xmlParserInitialized == 0) {
14134 #endif
14135         xmlInitThreads();
14136         xmlInitGlobals();
14137         if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14138             (xmlGenericError == NULL))
14139             initGenericErrorDefaultFunc(NULL);
14140         xmlInitMemory();
14141         xmlInitCharEncodingHandlers();
14142         xmlDefaultSAXHandlerInit();
14143         xmlRegisterDefaultInputCallbacks();
14144 #ifdef LIBXML_OUTPUT_ENABLED
14145         xmlRegisterDefaultOutputCallbacks();
14146 #endif /* LIBXML_OUTPUT_ENABLED */
14147 #ifdef LIBXML_HTML_ENABLED
14148         htmlInitAutoClose();
14149         htmlDefaultSAXHandlerInit();
14150 #endif
14151 #ifdef LIBXML_XPATH_ENABLED
14152         xmlXPathInit();
14153 #endif
14154         xmlParserInitialized = 1;
14155 #ifdef LIBXML_THREAD_ENABLED
14156     }
14157     __xmlGlobalInitMutexUnlock();
14158 #endif
14159 }
14160
14161 /**
14162  * xmlCleanupParser:
14163  *
14164  * This function name is somewhat misleading. It does not clean up
14165  * parser state, it cleans up memory allocated by the library itself.
14166  * It is a cleanup function for the XML library. It tries to reclaim all
14167  * related global memory allocated for the library processing.
14168  * It doesn't deallocate any document related memory. One should
14169  * call xmlCleanupParser() only when the process has finished using
14170  * the library and all XML/HTML documents built with it.
14171  * See also xmlInitParser() which has the opposite function of preparing
14172  * the library for operations.
14173  *
14174  * WARNING: if your application is multithreaded or has plugin support
14175  *          calling this may crash the application if another thread or
14176  *          a plugin is still using libxml2. It's sometimes very hard to
14177  *          guess if libxml2 is in use in the application, some libraries
14178  *          or plugins may use it without notice. In case of doubt abstain
14179  *          from calling this function or do it just before calling exit()
14180  *          to avoid leak reports from valgrind !
14181  */
14182
14183 void
14184 xmlCleanupParser(void) {
14185     if (!xmlParserInitialized)
14186         return;
14187
14188     xmlCleanupCharEncodingHandlers();
14189 #ifdef LIBXML_CATALOG_ENABLED
14190     xmlCatalogCleanup();
14191 #endif
14192     xmlDictCleanup();
14193     xmlCleanupInputCallbacks();
14194 #ifdef LIBXML_OUTPUT_ENABLED
14195     xmlCleanupOutputCallbacks();
14196 #endif
14197 #ifdef LIBXML_SCHEMAS_ENABLED
14198     xmlSchemaCleanupTypes();
14199     xmlRelaxNGCleanupTypes();
14200 #endif
14201     xmlCleanupGlobals();
14202     xmlResetLastError();
14203     xmlCleanupThreads(); /* must be last if called not from the main thread */
14204     xmlCleanupMemory();
14205     xmlParserInitialized = 0;
14206 }
14207
14208 /************************************************************************
14209  *                                                                      *
14210  *      New set (2.6.0) of simpler and more flexible APIs               *
14211  *                                                                      *
14212  ************************************************************************/
14213
14214 /**
14215  * DICT_FREE:
14216  * @str:  a string
14217  *
14218  * Free a string if it is not owned by the "dict" dictionnary in the
14219  * current scope
14220  */
14221 #define DICT_FREE(str)                                          \
14222         if ((str) && ((!dict) ||                                \
14223             (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14224             xmlFree((char *)(str));
14225
14226 /**
14227  * xmlCtxtReset:
14228  * @ctxt: an XML parser context
14229  *
14230  * Reset a parser context
14231  */
14232 void
14233 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14234 {
14235     xmlParserInputPtr input;
14236     xmlDictPtr dict;
14237
14238     if (ctxt == NULL)
14239         return;
14240
14241     dict = ctxt->dict;
14242
14243     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14244         xmlFreeInputStream(input);
14245     }
14246     ctxt->inputNr = 0;
14247     ctxt->input = NULL;
14248
14249     ctxt->spaceNr = 0;
14250     if (ctxt->spaceTab != NULL) {
14251         ctxt->spaceTab[0] = -1;
14252         ctxt->space = &ctxt->spaceTab[0];
14253     } else {
14254         ctxt->space = NULL;
14255     }
14256
14257
14258     ctxt->nodeNr = 0;
14259     ctxt->node = NULL;
14260
14261     ctxt->nameNr = 0;
14262     ctxt->name = NULL;
14263
14264     DICT_FREE(ctxt->version);
14265     ctxt->version = NULL;
14266     DICT_FREE(ctxt->encoding);
14267     ctxt->encoding = NULL;
14268     DICT_FREE(ctxt->directory);
14269     ctxt->directory = NULL;
14270     DICT_FREE(ctxt->extSubURI);
14271     ctxt->extSubURI = NULL;
14272     DICT_FREE(ctxt->extSubSystem);
14273     ctxt->extSubSystem = NULL;
14274     if (ctxt->myDoc != NULL)
14275         xmlFreeDoc(ctxt->myDoc);
14276     ctxt->myDoc = NULL;
14277
14278     ctxt->standalone = -1;
14279     ctxt->hasExternalSubset = 0;
14280     ctxt->hasPErefs = 0;
14281     ctxt->html = 0;
14282     ctxt->external = 0;
14283     ctxt->instate = XML_PARSER_START;
14284     ctxt->token = 0;
14285
14286     ctxt->wellFormed = 1;
14287     ctxt->nsWellFormed = 1;
14288     ctxt->disableSAX = 0;
14289     ctxt->valid = 1;
14290 #if 0
14291     ctxt->vctxt.userData = ctxt;
14292     ctxt->vctxt.error = xmlParserValidityError;
14293     ctxt->vctxt.warning = xmlParserValidityWarning;
14294 #endif
14295     ctxt->record_info = 0;
14296     ctxt->nbChars = 0;
14297     ctxt->checkIndex = 0;
14298     ctxt->inSubset = 0;
14299     ctxt->errNo = XML_ERR_OK;
14300     ctxt->depth = 0;
14301     ctxt->charset = XML_CHAR_ENCODING_UTF8;
14302     ctxt->catalogs = NULL;
14303     ctxt->nbentities = 0;
14304     ctxt->sizeentities = 0;
14305     xmlInitNodeInfoSeq(&ctxt->node_seq);
14306
14307     if (ctxt->attsDefault != NULL) {
14308         xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14309         ctxt->attsDefault = NULL;
14310     }
14311     if (ctxt->attsSpecial != NULL) {
14312         xmlHashFree(ctxt->attsSpecial, NULL);
14313         ctxt->attsSpecial = NULL;
14314     }
14315
14316 #ifdef LIBXML_CATALOG_ENABLED
14317     if (ctxt->catalogs != NULL)
14318         xmlCatalogFreeLocal(ctxt->catalogs);
14319 #endif
14320     if (ctxt->lastError.code != XML_ERR_OK)
14321         xmlResetError(&ctxt->lastError);
14322 }
14323
14324 /**
14325  * xmlCtxtResetPush:
14326  * @ctxt: an XML parser context
14327  * @chunk:  a pointer to an array of chars
14328  * @size:  number of chars in the array
14329  * @filename:  an optional file name or URI
14330  * @encoding:  the document encoding, or NULL
14331  *
14332  * Reset a push parser context
14333  *
14334  * Returns 0 in case of success and 1 in case of error
14335  */
14336 int
14337 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14338                  int size, const char *filename, const char *encoding)
14339 {
14340     xmlParserInputPtr inputStream;
14341     xmlParserInputBufferPtr buf;
14342     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14343
14344     if (ctxt == NULL)
14345         return(1);
14346
14347     if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14348         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14349
14350     buf = xmlAllocParserInputBuffer(enc);
14351     if (buf == NULL)
14352         return(1);
14353
14354     if (ctxt == NULL) {
14355         xmlFreeParserInputBuffer(buf);
14356         return(1);
14357     }
14358
14359     xmlCtxtReset(ctxt);
14360
14361     if (ctxt->pushTab == NULL) {
14362         ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14363                                             sizeof(xmlChar *));
14364         if (ctxt->pushTab == NULL) {
14365             xmlErrMemory(ctxt, NULL);
14366             xmlFreeParserInputBuffer(buf);
14367             return(1);
14368         }
14369     }
14370
14371     if (filename == NULL) {
14372         ctxt->directory = NULL;
14373     } else {
14374         ctxt->directory = xmlParserGetDirectory(filename);
14375     }
14376
14377     inputStream = xmlNewInputStream(ctxt);
14378     if (inputStream == NULL) {
14379         xmlFreeParserInputBuffer(buf);
14380         return(1);
14381     }
14382
14383     if (filename == NULL)
14384         inputStream->filename = NULL;
14385     else
14386         inputStream->filename = (char *)
14387             xmlCanonicPath((const xmlChar *) filename);
14388     inputStream->buf = buf;
14389     inputStream->base = inputStream->buf->buffer->content;
14390     inputStream->cur = inputStream->buf->buffer->content;
14391     inputStream->end =
14392         &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14393
14394     inputPush(ctxt, inputStream);
14395
14396     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14397         (ctxt->input->buf != NULL)) {
14398         int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14399         int cur = ctxt->input->cur - ctxt->input->base;
14400
14401         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14402
14403         ctxt->input->base = ctxt->input->buf->buffer->content + base;
14404         ctxt->input->cur = ctxt->input->base + cur;
14405         ctxt->input->end =
14406             &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14407                                                use];
14408 #ifdef DEBUG_PUSH
14409         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14410 #endif
14411     }
14412
14413     if (encoding != NULL) {
14414         xmlCharEncodingHandlerPtr hdlr;
14415
14416         if (ctxt->encoding != NULL)
14417             xmlFree((xmlChar *) ctxt->encoding);
14418         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14419
14420         hdlr = xmlFindCharEncodingHandler(encoding);
14421         if (hdlr != NULL) {
14422             xmlSwitchToEncoding(ctxt, hdlr);
14423         } else {
14424             xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14425                               "Unsupported encoding %s\n", BAD_CAST encoding);
14426         }
14427     } else if (enc != XML_CHAR_ENCODING_NONE) {
14428         xmlSwitchEncoding(ctxt, enc);
14429     }
14430
14431     return(0);
14432 }
14433
14434
14435 /**
14436  * xmlCtxtUseOptionsInternal:
14437  * @ctxt: an XML parser context
14438  * @options:  a combination of xmlParserOption
14439  * @encoding:  the user provided encoding to use
14440  *
14441  * Applies the options to the parser context
14442  *
14443  * Returns 0 in case of success, the set of unknown or unimplemented options
14444  *         in case of error.
14445  */
14446 static int
14447 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14448 {
14449     if (ctxt == NULL)
14450         return(-1);
14451     if (encoding != NULL) {
14452         if (ctxt->encoding != NULL)
14453             xmlFree((xmlChar *) ctxt->encoding);
14454         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14455     }
14456     if (options & XML_PARSE_RECOVER) {
14457         ctxt->recovery = 1;
14458         options -= XML_PARSE_RECOVER;
14459         ctxt->options |= XML_PARSE_RECOVER;
14460     } else
14461         ctxt->recovery = 0;
14462     if (options & XML_PARSE_DTDLOAD) {
14463         ctxt->loadsubset = XML_DETECT_IDS;
14464         options -= XML_PARSE_DTDLOAD;
14465         ctxt->options |= XML_PARSE_DTDLOAD;
14466     } else
14467         ctxt->loadsubset = 0;
14468     if (options & XML_PARSE_DTDATTR) {
14469         ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14470         options -= XML_PARSE_DTDATTR;
14471         ctxt->options |= XML_PARSE_DTDATTR;
14472     }
14473     if (options & XML_PARSE_NOENT) {
14474         ctxt->replaceEntities = 1;
14475         /* ctxt->loadsubset |= XML_DETECT_IDS; */
14476         options -= XML_PARSE_NOENT;
14477         ctxt->options |= XML_PARSE_NOENT;
14478     } else
14479         ctxt->replaceEntities = 0;
14480     if (options & XML_PARSE_PEDANTIC) {
14481         ctxt->pedantic = 1;
14482         options -= XML_PARSE_PEDANTIC;
14483         ctxt->options |= XML_PARSE_PEDANTIC;
14484     } else
14485         ctxt->pedantic = 0;
14486     if (options & XML_PARSE_NOBLANKS) {
14487         ctxt->keepBlanks = 0;
14488         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14489         options -= XML_PARSE_NOBLANKS;
14490         ctxt->options |= XML_PARSE_NOBLANKS;
14491     } else
14492         ctxt->keepBlanks = 1;
14493     if (options & XML_PARSE_DTDVALID) {
14494         ctxt->validate = 1;
14495         if (options & XML_PARSE_NOWARNING)
14496             ctxt->vctxt.warning = NULL;
14497         if (options & XML_PARSE_NOERROR)
14498             ctxt->vctxt.error = NULL;
14499         options -= XML_PARSE_DTDVALID;
14500         ctxt->options |= XML_PARSE_DTDVALID;
14501     } else
14502         ctxt->validate = 0;
14503     if (options & XML_PARSE_NOWARNING) {
14504         ctxt->sax->warning = NULL;
14505         options -= XML_PARSE_NOWARNING;
14506     }
14507     if (options & XML_PARSE_NOERROR) {
14508         ctxt->sax->error = NULL;
14509         ctxt->sax->fatalError = NULL;
14510         options -= XML_PARSE_NOERROR;
14511     }
14512 #ifdef LIBXML_SAX1_ENABLED
14513     if (options & XML_PARSE_SAX1) {
14514         ctxt->sax->startElement = xmlSAX2StartElement;
14515         ctxt->sax->endElement = xmlSAX2EndElement;
14516         ctxt->sax->startElementNs = NULL;
14517         ctxt->sax->endElementNs = NULL;
14518         ctxt->sax->initialized = 1;
14519         options -= XML_PARSE_SAX1;
14520         ctxt->options |= XML_PARSE_SAX1;
14521     }
14522 #endif /* LIBXML_SAX1_ENABLED */
14523     if (options & XML_PARSE_NODICT) {
14524         ctxt->dictNames = 0;
14525         options -= XML_PARSE_NODICT;
14526         ctxt->options |= XML_PARSE_NODICT;
14527     } else {
14528         ctxt->dictNames = 1;
14529     }
14530     if (options & XML_PARSE_NOCDATA) {
14531         ctxt->sax->cdataBlock = NULL;
14532         options -= XML_PARSE_NOCDATA;
14533         ctxt->options |= XML_PARSE_NOCDATA;
14534     }
14535     if (options & XML_PARSE_NSCLEAN) {
14536         ctxt->options |= XML_PARSE_NSCLEAN;
14537         options -= XML_PARSE_NSCLEAN;
14538     }
14539     if (options & XML_PARSE_NONET) {
14540         ctxt->options |= XML_PARSE_NONET;
14541         options -= XML_PARSE_NONET;
14542     }
14543     if (options & XML_PARSE_COMPACT) {
14544         ctxt->options |= XML_PARSE_COMPACT;
14545         options -= XML_PARSE_COMPACT;
14546     }
14547     if (options & XML_PARSE_OLD10) {
14548         ctxt->options |= XML_PARSE_OLD10;
14549         options -= XML_PARSE_OLD10;
14550     }
14551     if (options & XML_PARSE_NOBASEFIX) {
14552         ctxt->options |= XML_PARSE_NOBASEFIX;
14553         options -= XML_PARSE_NOBASEFIX;
14554     }
14555     if (options & XML_PARSE_HUGE) {
14556         ctxt->options |= XML_PARSE_HUGE;
14557         options -= XML_PARSE_HUGE;
14558     }
14559     if (options & XML_PARSE_OLDSAX) {
14560         ctxt->options |= XML_PARSE_OLDSAX;
14561         options -= XML_PARSE_OLDSAX;
14562     }
14563     ctxt->linenumbers = 1;
14564     return (options);
14565 }
14566
14567 /**
14568  * xmlCtxtUseOptions:
14569  * @ctxt: an XML parser context
14570  * @options:  a combination of xmlParserOption
14571  *
14572  * Applies the options to the parser context
14573  *
14574  * Returns 0 in case of success, the set of unknown or unimplemented options
14575  *         in case of error.
14576  */
14577 int
14578 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14579 {
14580    return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14581 }
14582
14583 /**
14584  * xmlDoRead:
14585  * @ctxt:  an XML parser context
14586  * @URL:  the base URL to use for the document
14587  * @encoding:  the document encoding, or NULL
14588  * @options:  a combination of xmlParserOption
14589  * @reuse:  keep the context for reuse
14590  *
14591  * Common front-end for the xmlRead functions
14592  *
14593  * Returns the resulting document tree or NULL
14594  */
14595 static xmlDocPtr
14596 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14597           int options, int reuse)
14598 {
14599     xmlDocPtr ret;
14600
14601     xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14602     if (encoding != NULL) {
14603         xmlCharEncodingHandlerPtr hdlr;
14604
14605         hdlr = xmlFindCharEncodingHandler(encoding);
14606         if (hdlr != NULL)
14607             xmlSwitchToEncoding(ctxt, hdlr);
14608     }
14609     if ((URL != NULL) && (ctxt->input != NULL) &&
14610         (ctxt->input->filename == NULL))
14611         ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14612     xmlParseDocument(ctxt);
14613     if ((ctxt->wellFormed) || ctxt->recovery)
14614         ret = ctxt->myDoc;
14615     else {
14616         ret = NULL;
14617         if (ctxt->myDoc != NULL) {
14618             xmlFreeDoc(ctxt->myDoc);
14619         }
14620     }
14621     ctxt->myDoc = NULL;
14622     if (!reuse) {
14623         xmlFreeParserCtxt(ctxt);
14624     }
14625
14626     return (ret);
14627 }
14628
14629 /**
14630  * xmlReadDoc:
14631  * @cur:  a pointer to a zero terminated string
14632  * @URL:  the base URL to use for the document
14633  * @encoding:  the document encoding, or NULL
14634  * @options:  a combination of xmlParserOption
14635  *
14636  * parse an XML in-memory document and build a tree.
14637  *
14638  * Returns the resulting document tree
14639  */
14640 xmlDocPtr
14641 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14642 {
14643     xmlParserCtxtPtr ctxt;
14644
14645     if (cur == NULL)
14646         return (NULL);
14647
14648     ctxt = xmlCreateDocParserCtxt(cur);
14649     if (ctxt == NULL)
14650         return (NULL);
14651     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14652 }
14653
14654 /**
14655  * xmlReadFile:
14656  * @filename:  a file or URL
14657  * @encoding:  the document encoding, or NULL
14658  * @options:  a combination of xmlParserOption
14659  *
14660  * parse an XML file from the filesystem or the network.
14661  *
14662  * Returns the resulting document tree
14663  */
14664 xmlDocPtr
14665 xmlReadFile(const char *filename, const char *encoding, int options)
14666 {
14667     xmlParserCtxtPtr ctxt;
14668
14669     ctxt = xmlCreateURLParserCtxt(filename, options);
14670     if (ctxt == NULL)
14671         return (NULL);
14672     return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14673 }
14674
14675 /**
14676  * xmlReadMemory:
14677  * @buffer:  a pointer to a char array
14678  * @size:  the size of the array
14679  * @URL:  the base URL to use for the document
14680  * @encoding:  the document encoding, or NULL
14681  * @options:  a combination of xmlParserOption
14682  *
14683  * parse an XML in-memory document and build a tree.
14684  *
14685  * Returns the resulting document tree
14686  */
14687 xmlDocPtr
14688 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14689 {
14690     xmlParserCtxtPtr ctxt;
14691
14692     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14693     if (ctxt == NULL)
14694         return (NULL);
14695     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14696 }
14697
14698 /**
14699  * xmlReadFd:
14700  * @fd:  an open file descriptor
14701  * @URL:  the base URL to use for the document
14702  * @encoding:  the document encoding, or NULL
14703  * @options:  a combination of xmlParserOption
14704  *
14705  * parse an XML from a file descriptor and build a tree.
14706  * NOTE that the file descriptor will not be closed when the
14707  *      reader is closed or reset.
14708  *
14709  * Returns the resulting document tree
14710  */
14711 xmlDocPtr
14712 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14713 {
14714     xmlParserCtxtPtr ctxt;
14715     xmlParserInputBufferPtr input;
14716     xmlParserInputPtr stream;
14717
14718     if (fd < 0)
14719         return (NULL);
14720
14721     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14722     if (input == NULL)
14723         return (NULL);
14724     input->closecallback = NULL;
14725     ctxt = xmlNewParserCtxt();
14726     if (ctxt == NULL) {
14727         xmlFreeParserInputBuffer(input);
14728         return (NULL);
14729     }
14730     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14731     if (stream == NULL) {
14732         xmlFreeParserInputBuffer(input);
14733         xmlFreeParserCtxt(ctxt);
14734         return (NULL);
14735     }
14736     inputPush(ctxt, stream);
14737     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14738 }
14739
14740 /**
14741  * xmlReadIO:
14742  * @ioread:  an I/O read function
14743  * @ioclose:  an I/O close function
14744  * @ioctx:  an I/O handler
14745  * @URL:  the base URL to use for the document
14746  * @encoding:  the document encoding, or NULL
14747  * @options:  a combination of xmlParserOption
14748  *
14749  * parse an XML document from I/O functions and source and build a tree.
14750  *
14751  * Returns the resulting document tree
14752  */
14753 xmlDocPtr
14754 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14755           void *ioctx, const char *URL, const char *encoding, int options)
14756 {
14757     xmlParserCtxtPtr ctxt;
14758     xmlParserInputBufferPtr input;
14759     xmlParserInputPtr stream;
14760
14761     if (ioread == NULL)
14762         return (NULL);
14763
14764     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14765                                          XML_CHAR_ENCODING_NONE);
14766     if (input == NULL)
14767         return (NULL);
14768     ctxt = xmlNewParserCtxt();
14769     if (ctxt == NULL) {
14770         xmlFreeParserInputBuffer(input);
14771         return (NULL);
14772     }
14773     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14774     if (stream == NULL) {
14775         xmlFreeParserInputBuffer(input);
14776         xmlFreeParserCtxt(ctxt);
14777         return (NULL);
14778     }
14779     inputPush(ctxt, stream);
14780     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14781 }
14782
14783 /**
14784  * xmlCtxtReadDoc:
14785  * @ctxt:  an XML parser context
14786  * @cur:  a pointer to a zero terminated string
14787  * @URL:  the base URL to use for the document
14788  * @encoding:  the document encoding, or NULL
14789  * @options:  a combination of xmlParserOption
14790  *
14791  * parse an XML in-memory document and build a tree.
14792  * This reuses the existing @ctxt parser context
14793  *
14794  * Returns the resulting document tree
14795  */
14796 xmlDocPtr
14797 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14798                const char *URL, const char *encoding, int options)
14799 {
14800     xmlParserInputPtr stream;
14801
14802     if (cur == NULL)
14803         return (NULL);
14804     if (ctxt == NULL)
14805         return (NULL);
14806
14807     xmlCtxtReset(ctxt);
14808
14809     stream = xmlNewStringInputStream(ctxt, cur);
14810     if (stream == NULL) {
14811         return (NULL);
14812     }
14813     inputPush(ctxt, stream);
14814     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14815 }
14816
14817 /**
14818  * xmlCtxtReadFile:
14819  * @ctxt:  an XML parser context
14820  * @filename:  a file or URL
14821  * @encoding:  the document encoding, or NULL
14822  * @options:  a combination of xmlParserOption
14823  *
14824  * parse an XML file from the filesystem or the network.
14825  * This reuses the existing @ctxt parser context
14826  *
14827  * Returns the resulting document tree
14828  */
14829 xmlDocPtr
14830 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14831                 const char *encoding, int options)
14832 {
14833     xmlParserInputPtr stream;
14834
14835     if (filename == NULL)
14836         return (NULL);
14837     if (ctxt == NULL)
14838         return (NULL);
14839
14840     xmlCtxtReset(ctxt);
14841
14842     stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14843     if (stream == NULL) {
14844         return (NULL);
14845     }
14846     inputPush(ctxt, stream);
14847     return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14848 }
14849
14850 /**
14851  * xmlCtxtReadMemory:
14852  * @ctxt:  an XML parser context
14853  * @buffer:  a pointer to a char array
14854  * @size:  the size of the array
14855  * @URL:  the base URL to use for the document
14856  * @encoding:  the document encoding, or NULL
14857  * @options:  a combination of xmlParserOption
14858  *
14859  * parse an XML in-memory document and build a tree.
14860  * This reuses the existing @ctxt parser context
14861  *
14862  * Returns the resulting document tree
14863  */
14864 xmlDocPtr
14865 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14866                   const char *URL, const char *encoding, int options)
14867 {
14868     xmlParserInputBufferPtr input;
14869     xmlParserInputPtr stream;
14870
14871     if (ctxt == NULL)
14872         return (NULL);
14873     if (buffer == NULL)
14874         return (NULL);
14875
14876     xmlCtxtReset(ctxt);
14877
14878     input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14879     if (input == NULL) {
14880         return(NULL);
14881     }
14882
14883     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14884     if (stream == NULL) {
14885         xmlFreeParserInputBuffer(input);
14886         return(NULL);
14887     }
14888
14889     inputPush(ctxt, stream);
14890     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14891 }
14892
14893 /**
14894  * xmlCtxtReadFd:
14895  * @ctxt:  an XML parser context
14896  * @fd:  an open file descriptor
14897  * @URL:  the base URL to use for the document
14898  * @encoding:  the document encoding, or NULL
14899  * @options:  a combination of xmlParserOption
14900  *
14901  * parse an XML from a file descriptor and build a tree.
14902  * This reuses the existing @ctxt parser context
14903  * NOTE that the file descriptor will not be closed when the
14904  *      reader is closed or reset.
14905  *
14906  * Returns the resulting document tree
14907  */
14908 xmlDocPtr
14909 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14910               const char *URL, const char *encoding, int options)
14911 {
14912     xmlParserInputBufferPtr input;
14913     xmlParserInputPtr stream;
14914
14915     if (fd < 0)
14916         return (NULL);
14917     if (ctxt == NULL)
14918         return (NULL);
14919
14920     xmlCtxtReset(ctxt);
14921
14922
14923     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14924     if (input == NULL)
14925         return (NULL);
14926     input->closecallback = NULL;
14927     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14928     if (stream == NULL) {
14929         xmlFreeParserInputBuffer(input);
14930         return (NULL);
14931     }
14932     inputPush(ctxt, stream);
14933     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14934 }
14935
14936 /**
14937  * xmlCtxtReadIO:
14938  * @ctxt:  an XML parser context
14939  * @ioread:  an I/O read function
14940  * @ioclose:  an I/O close function
14941  * @ioctx:  an I/O handler
14942  * @URL:  the base URL to use for the document
14943  * @encoding:  the document encoding, or NULL
14944  * @options:  a combination of xmlParserOption
14945  *
14946  * parse an XML document from I/O functions and source and build a tree.
14947  * This reuses the existing @ctxt parser context
14948  *
14949  * Returns the resulting document tree
14950  */
14951 xmlDocPtr
14952 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14953               xmlInputCloseCallback ioclose, void *ioctx,
14954               const char *URL,
14955               const char *encoding, int options)
14956 {
14957     xmlParserInputBufferPtr input;
14958     xmlParserInputPtr stream;
14959
14960     if (ioread == NULL)
14961         return (NULL);
14962     if (ctxt == NULL)
14963         return (NULL);
14964
14965     xmlCtxtReset(ctxt);
14966
14967     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14968                                          XML_CHAR_ENCODING_NONE);
14969     if (input == NULL)
14970         return (NULL);
14971     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14972     if (stream == NULL) {
14973         xmlFreeParserInputBuffer(input);
14974         return (NULL);
14975     }
14976     inputPush(ctxt, stream);
14977     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14978 }
14979
14980 #define bottom_parser
14981 #include "elfgcchack.h"