2 * Summary: internals routines and limits exported by the parser.
3 * Description: this module exports a number of internal parsing routines
4 * they are not really all intended for applications but
5 * can prove useful doing low level processing.
7 * Copy: See Copyright for the status of this software.
9 * Author: Daniel Veillard
12 #ifndef __XML_PARSER_INTERNALS_H__
13 #define __XML_PARSER_INTERNALS_H__
15 #include <libxml/xmlversion.h>
16 #include <libxml/parser.h>
17 #include <libxml/HTMLparser.h>
18 #include <libxml/chvalid.h>
27 * arbitrary depth limit for the XML documents that we allow to
28 * process. This is not a limitation of the parser but a safety
29 * boundary feature, use XML_PARSE_HUGE option to override it.
31 XMLPUBVAR unsigned int xmlParserMaxDepth;
34 * XML_MAX_TEXT_LENGTH:
36 * Maximum size allowed for a single text node when building a tree.
37 * This is not a limitation of the parser but a safety boundary feature,
38 * use XML_PARSE_HUGE option to override it.
41 #define XML_MAX_TEXT_LENGTH 10000000
44 * XML_MAX_NAME_LENGTH:
46 * Maximum size allowed for a markup identitier
47 * This is not a limitation of the parser but a safety boundary feature,
48 * use XML_PARSE_HUGE option to override it.
49 * Note that with the use of parsing dictionaries overriding the limit
50 * may result in more runtime memory usage in face of "unfriendly' content
53 #define XML_MAX_NAME_LENGTH 50000
56 * XML_MAX_DICTIONARY_LIMIT:
58 * Maximum size allowed by the parser for a dictionary by default
59 * This is not a limitation of the parser but a safety boundary feature,
60 * use XML_PARSE_HUGE option to override it.
63 #define XML_MAX_DICTIONARY_LIMIT 10000000
66 * XML_MAX_LOOKUP_LIMIT:
68 * Maximum size allowed by the parser for ahead lookup
69 * This is an upper boundary enforced by the parser to avoid bad
70 * behaviour on "unfriendly' content
73 #define XML_MAX_LOOKUP_LIMIT 10000000
78 * Identifiers can be longer, but this will be more costly
81 #define XML_MAX_NAMELEN 100
86 * The parser tries to always have that amount of input ready.
87 * One of the point is providing context when reporting errors.
89 #define INPUT_CHUNK 250
91 /************************************************************************
93 * UNICODE version of the macros. *
95 ************************************************************************/
98 * @c: an byte value (int)
100 * Macro to check the following production in the XML spec:
102 * [2] Char ::= #x9 | #xA | #xD | [#x20...]
103 * any byte character in the accepted range
105 #define IS_BYTE_CHAR(c) xmlIsChar_ch(c)
109 * @c: an UNICODE value (int)
111 * Macro to check the following production in the XML spec:
113 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
114 * | [#x10000-#x10FFFF]
115 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
117 #define IS_CHAR(c) xmlIsCharQ(c)
121 * @c: an xmlChar (usually an unsigned char)
123 * Behaves like IS_CHAR on single-byte value
125 #define IS_CHAR_CH(c) xmlIsChar_ch(c)
129 * @c: an UNICODE value (int)
131 * Macro to check the following production in the XML spec:
133 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
135 #define IS_BLANK(c) xmlIsBlankQ(c)
139 * @c: an xmlChar value (normally unsigned char)
141 * Behaviour same as IS_BLANK
143 #define IS_BLANK_CH(c) xmlIsBlank_ch(c)
147 * @c: an UNICODE value (int)
149 * Macro to check the following production in the XML spec:
151 * [85] BaseChar ::= ... long list see REC ...
153 #define IS_BASECHAR(c) xmlIsBaseCharQ(c)
157 * @c: an UNICODE value (int)
159 * Macro to check the following production in the XML spec:
161 * [88] Digit ::= ... long list see REC ...
163 #define IS_DIGIT(c) xmlIsDigitQ(c)
167 * @c: an xmlChar value (usually an unsigned char)
169 * Behaves like IS_DIGIT but with a single byte argument
171 #define IS_DIGIT_CH(c) xmlIsDigit_ch(c)
175 * @c: an UNICODE value (int)
177 * Macro to check the following production in the XML spec:
179 * [87] CombiningChar ::= ... long list see REC ...
181 #define IS_COMBINING(c) xmlIsCombiningQ(c)
185 * @c: an xmlChar (usually an unsigned char)
187 * Always false (all combining chars > 0xff)
189 #define IS_COMBINING_CH(c) 0
193 * @c: an UNICODE value (int)
195 * Macro to check the following production in the XML spec:
198 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
199 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
200 * [#x309D-#x309E] | [#x30FC-#x30FE]
202 #define IS_EXTENDER(c) xmlIsExtenderQ(c)
206 * @c: an xmlChar value (usually an unsigned char)
208 * Behaves like IS_EXTENDER but with a single-byte argument
210 #define IS_EXTENDER_CH(c) xmlIsExtender_ch(c)
214 * @c: an UNICODE value (int)
216 * Macro to check the following production in the XML spec:
219 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
221 #define IS_IDEOGRAPHIC(c) xmlIsIdeographicQ(c)
225 * @c: an UNICODE value (int)
227 * Macro to check the following production in the XML spec:
230 * [84] Letter ::= BaseChar | Ideographic
232 #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
236 * @c: an xmlChar value (normally unsigned char)
238 * Macro behaves like IS_LETTER, but only check base chars
241 #define IS_LETTER_CH(c) xmlIsBaseChar_ch(c)
245 * @c: an xmlChar value
247 * Macro to check [a-zA-Z]
250 #define IS_ASCII_LETTER(c) (((0x41 <= (c)) && ((c) <= 0x5a)) || \
251 ((0x61 <= (c)) && ((c) <= 0x7a)))
255 * @c: an xmlChar value
257 * Macro to check [0-9]
260 #define IS_ASCII_DIGIT(c) ((0x30 <= (c)) && ((c) <= 0x39))
264 * @c: an UNICODE value (int)
266 * Macro to check the following production in the XML spec:
269 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
271 #define IS_PUBIDCHAR(c) xmlIsPubidCharQ(c)
275 * @c: an xmlChar value (normally unsigned char)
277 * Same as IS_PUBIDCHAR but for single-byte value
279 #define IS_PUBIDCHAR_CH(c) xmlIsPubidChar_ch(c)
283 * @p: and UTF8 string pointer
285 * Skips the end of line chars.
287 #define SKIP_EOL(p) \
288 if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
289 if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
293 * @p: and UTF8 string pointer
295 * Skips to the next '>' char.
297 #define MOVETO_ENDTAG(p) \
298 while ((*p) && (*(p) != '>')) (p)++
302 * @p: and UTF8 string pointer
304 * Skips to the next '<' char.
306 #define MOVETO_STARTTAG(p) \
307 while ((*p) && (*(p) != '<')) (p)++
310 * Global variables used for predefined strings.
312 XMLPUBVAR const xmlChar xmlStringText[];
313 XMLPUBVAR const xmlChar xmlStringTextNoenc[];
314 XMLPUBVAR const xmlChar xmlStringComment[];
317 * Function to finish the work of the macros where needed.
319 XMLPUBFUN int XMLCALL xmlIsLetter (int c);
324 XMLPUBFUN xmlParserCtxtPtr XMLCALL
325 xmlCreateFileParserCtxt (const char *filename);
326 XMLPUBFUN xmlParserCtxtPtr XMLCALL
327 xmlCreateURLParserCtxt (const char *filename,
329 XMLPUBFUN xmlParserCtxtPtr XMLCALL
330 xmlCreateMemoryParserCtxt(const char *buffer,
332 XMLPUBFUN xmlParserCtxtPtr XMLCALL
333 xmlCreateEntityParserCtxt(const xmlChar *URL,
335 const xmlChar *base);
336 XMLPUBFUN int XMLCALL
337 xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
338 xmlCharEncoding enc);
339 XMLPUBFUN int XMLCALL
340 xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,
341 xmlCharEncodingHandlerPtr handler);
342 XMLPUBFUN int XMLCALL
343 xmlSwitchInputEncoding (xmlParserCtxtPtr ctxt,
344 xmlParserInputPtr input,
345 xmlCharEncodingHandlerPtr handler);
348 /* internal error reporting */
349 XMLPUBFUN void XMLCALL
350 __xmlErrEncoding (xmlParserCtxtPtr ctxt,
351 xmlParserErrors xmlerr,
353 const xmlChar * str1,
354 const xmlChar * str2);
360 XMLPUBFUN xmlParserInputPtr XMLCALL
361 xmlNewStringInputStream (xmlParserCtxtPtr ctxt,
362 const xmlChar *buffer);
363 XMLPUBFUN xmlParserInputPtr XMLCALL
364 xmlNewEntityInputStream (xmlParserCtxtPtr ctxt,
365 xmlEntityPtr entity);
366 XMLPUBFUN int XMLCALL
367 xmlPushInput (xmlParserCtxtPtr ctxt,
368 xmlParserInputPtr input);
369 XMLPUBFUN xmlChar XMLCALL
370 xmlPopInput (xmlParserCtxtPtr ctxt);
371 XMLPUBFUN void XMLCALL
372 xmlFreeInputStream (xmlParserInputPtr input);
373 XMLPUBFUN xmlParserInputPtr XMLCALL
374 xmlNewInputFromFile (xmlParserCtxtPtr ctxt,
375 const char *filename);
376 XMLPUBFUN xmlParserInputPtr XMLCALL
377 xmlNewInputStream (xmlParserCtxtPtr ctxt);
382 XMLPUBFUN xmlChar * XMLCALL
383 xmlSplitQName (xmlParserCtxtPtr ctxt,
388 * Generic production rules.
390 XMLPUBFUN const xmlChar * XMLCALL
391 xmlParseName (xmlParserCtxtPtr ctxt);
392 XMLPUBFUN xmlChar * XMLCALL
393 xmlParseNmtoken (xmlParserCtxtPtr ctxt);
394 XMLPUBFUN xmlChar * XMLCALL
395 xmlParseEntityValue (xmlParserCtxtPtr ctxt,
397 XMLPUBFUN xmlChar * XMLCALL
398 xmlParseAttValue (xmlParserCtxtPtr ctxt);
399 XMLPUBFUN xmlChar * XMLCALL
400 xmlParseSystemLiteral (xmlParserCtxtPtr ctxt);
401 XMLPUBFUN xmlChar * XMLCALL
402 xmlParsePubidLiteral (xmlParserCtxtPtr ctxt);
403 XMLPUBFUN void XMLCALL
404 xmlParseCharData (xmlParserCtxtPtr ctxt,
406 XMLPUBFUN xmlChar * XMLCALL
407 xmlParseExternalID (xmlParserCtxtPtr ctxt,
410 XMLPUBFUN void XMLCALL
411 xmlParseComment (xmlParserCtxtPtr ctxt);
412 XMLPUBFUN const xmlChar * XMLCALL
413 xmlParsePITarget (xmlParserCtxtPtr ctxt);
414 XMLPUBFUN void XMLCALL
415 xmlParsePI (xmlParserCtxtPtr ctxt);
416 XMLPUBFUN void XMLCALL
417 xmlParseNotationDecl (xmlParserCtxtPtr ctxt);
418 XMLPUBFUN void XMLCALL
419 xmlParseEntityDecl (xmlParserCtxtPtr ctxt);
420 XMLPUBFUN int XMLCALL
421 xmlParseDefaultDecl (xmlParserCtxtPtr ctxt,
423 XMLPUBFUN xmlEnumerationPtr XMLCALL
424 xmlParseNotationType (xmlParserCtxtPtr ctxt);
425 XMLPUBFUN xmlEnumerationPtr XMLCALL
426 xmlParseEnumerationType (xmlParserCtxtPtr ctxt);
427 XMLPUBFUN int XMLCALL
428 xmlParseEnumeratedType (xmlParserCtxtPtr ctxt,
429 xmlEnumerationPtr *tree);
430 XMLPUBFUN int XMLCALL
431 xmlParseAttributeType (xmlParserCtxtPtr ctxt,
432 xmlEnumerationPtr *tree);
433 XMLPUBFUN void XMLCALL
434 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt);
435 XMLPUBFUN xmlElementContentPtr XMLCALL
436 xmlParseElementMixedContentDecl
437 (xmlParserCtxtPtr ctxt,
439 XMLPUBFUN xmlElementContentPtr XMLCALL
440 xmlParseElementChildrenContentDecl
441 (xmlParserCtxtPtr ctxt,
443 XMLPUBFUN int XMLCALL
444 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,
446 xmlElementContentPtr *result);
447 XMLPUBFUN int XMLCALL
448 xmlParseElementDecl (xmlParserCtxtPtr ctxt);
449 XMLPUBFUN void XMLCALL
450 xmlParseMarkupDecl (xmlParserCtxtPtr ctxt);
451 XMLPUBFUN int XMLCALL
452 xmlParseCharRef (xmlParserCtxtPtr ctxt);
453 XMLPUBFUN xmlEntityPtr XMLCALL
454 xmlParseEntityRef (xmlParserCtxtPtr ctxt);
455 XMLPUBFUN void XMLCALL
456 xmlParseReference (xmlParserCtxtPtr ctxt);
457 XMLPUBFUN void XMLCALL
458 xmlParsePEReference (xmlParserCtxtPtr ctxt);
459 XMLPUBFUN void XMLCALL
460 xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt);
461 #ifdef LIBXML_SAX1_ENABLED
462 XMLPUBFUN const xmlChar * XMLCALL
463 xmlParseAttribute (xmlParserCtxtPtr ctxt,
465 XMLPUBFUN const xmlChar * XMLCALL
466 xmlParseStartTag (xmlParserCtxtPtr ctxt);
467 XMLPUBFUN void XMLCALL
468 xmlParseEndTag (xmlParserCtxtPtr ctxt);
469 #endif /* LIBXML_SAX1_ENABLED */
470 XMLPUBFUN void XMLCALL
471 xmlParseCDSect (xmlParserCtxtPtr ctxt);
472 XMLPUBFUN void XMLCALL
473 xmlParseContent (xmlParserCtxtPtr ctxt);
474 XMLPUBFUN void XMLCALL
475 xmlParseElement (xmlParserCtxtPtr ctxt);
476 XMLPUBFUN xmlChar * XMLCALL
477 xmlParseVersionNum (xmlParserCtxtPtr ctxt);
478 XMLPUBFUN xmlChar * XMLCALL
479 xmlParseVersionInfo (xmlParserCtxtPtr ctxt);
480 XMLPUBFUN xmlChar * XMLCALL
481 xmlParseEncName (xmlParserCtxtPtr ctxt);
482 XMLPUBFUN const xmlChar * XMLCALL
483 xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);
484 XMLPUBFUN int XMLCALL
485 xmlParseSDDecl (xmlParserCtxtPtr ctxt);
486 XMLPUBFUN void XMLCALL
487 xmlParseXMLDecl (xmlParserCtxtPtr ctxt);
488 XMLPUBFUN void XMLCALL
489 xmlParseTextDecl (xmlParserCtxtPtr ctxt);
490 XMLPUBFUN void XMLCALL
491 xmlParseMisc (xmlParserCtxtPtr ctxt);
492 XMLPUBFUN void XMLCALL
493 xmlParseExternalSubset (xmlParserCtxtPtr ctxt,
494 const xmlChar *ExternalID,
495 const xmlChar *SystemID);
497 * XML_SUBSTITUTE_NONE:
499 * If no entities need to be substituted.
501 #define XML_SUBSTITUTE_NONE 0
503 * XML_SUBSTITUTE_REF:
505 * Whether general entities need to be substituted.
507 #define XML_SUBSTITUTE_REF 1
509 * XML_SUBSTITUTE_PEREF:
511 * Whether parameter entities need to be substituted.
513 #define XML_SUBSTITUTE_PEREF 2
515 * XML_SUBSTITUTE_BOTH:
517 * Both general and parameter entities need to be substituted.
519 #define XML_SUBSTITUTE_BOTH 3
521 XMLPUBFUN xmlChar * XMLCALL
522 xmlStringDecodeEntities (xmlParserCtxtPtr ctxt,
528 XMLPUBFUN xmlChar * XMLCALL
529 xmlStringLenDecodeEntities (xmlParserCtxtPtr ctxt,
538 * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP.
540 XMLPUBFUN int XMLCALL nodePush (xmlParserCtxtPtr ctxt,
542 XMLPUBFUN xmlNodePtr XMLCALL nodePop (xmlParserCtxtPtr ctxt);
543 XMLPUBFUN int XMLCALL inputPush (xmlParserCtxtPtr ctxt,
544 xmlParserInputPtr value);
545 XMLPUBFUN xmlParserInputPtr XMLCALL inputPop (xmlParserCtxtPtr ctxt);
546 XMLPUBFUN const xmlChar * XMLCALL namePop (xmlParserCtxtPtr ctxt);
547 XMLPUBFUN int XMLCALL namePush (xmlParserCtxtPtr ctxt,
548 const xmlChar *value);
551 * other commodities shared between parser.c and parserInternals.
553 XMLPUBFUN int XMLCALL xmlSkipBlankChars (xmlParserCtxtPtr ctxt);
554 XMLPUBFUN int XMLCALL xmlStringCurrentChar (xmlParserCtxtPtr ctxt,
557 XMLPUBFUN void XMLCALL xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
558 XMLPUBFUN int XMLCALL xmlCheckLanguageID (const xmlChar *lang);
561 * Really core function shared with HTML parser.
563 XMLPUBFUN int XMLCALL xmlCurrentChar (xmlParserCtxtPtr ctxt,
565 XMLPUBFUN int XMLCALL xmlCopyCharMultiByte (xmlChar *out,
567 XMLPUBFUN int XMLCALL xmlCopyChar (int len,
570 XMLPUBFUN void XMLCALL xmlNextChar (xmlParserCtxtPtr ctxt);
571 XMLPUBFUN void XMLCALL xmlParserInputShrink (xmlParserInputPtr in);
573 #ifdef LIBXML_HTML_ENABLED
575 * Actually comes from the HTML parser but launched from the init stuff.
577 XMLPUBFUN void XMLCALL htmlInitAutoClose (void);
578 XMLPUBFUN htmlParserCtxtPtr XMLCALL htmlCreateFileParserCtxt(const char *filename,
579 const char *encoding);
583 * Specific function to keep track of entities references
584 * and used by the XSLT debugger.
586 #ifdef LIBXML_LEGACY_ENABLED
588 * xmlEntityReferenceFunc:
590 * @firstNode: the fist node in the chunk
591 * @lastNode: the last nod in the chunk
593 * Callback function used when one needs to be able to track back the
594 * provenance of a chunk of nodes inherited from an entity replacement.
596 typedef void (*xmlEntityReferenceFunc) (xmlEntityPtr ent,
597 xmlNodePtr firstNode,
598 xmlNodePtr lastNode);
600 XMLPUBFUN void XMLCALL xmlSetEntityReferenceFunc (xmlEntityReferenceFunc func);
602 XMLPUBFUN xmlChar * XMLCALL
603 xmlParseQuotedString (xmlParserCtxtPtr ctxt);
604 XMLPUBFUN void XMLCALL
605 xmlParseNamespace (xmlParserCtxtPtr ctxt);
606 XMLPUBFUN xmlChar * XMLCALL
607 xmlNamespaceParseNSDef (xmlParserCtxtPtr ctxt);
608 XMLPUBFUN xmlChar * XMLCALL
609 xmlScanName (xmlParserCtxtPtr ctxt);
610 XMLPUBFUN xmlChar * XMLCALL
611 xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt);
612 XMLPUBFUN void XMLCALL xmlParserHandleReference(xmlParserCtxtPtr ctxt);
613 XMLPUBFUN xmlChar * XMLCALL
614 xmlNamespaceParseQName (xmlParserCtxtPtr ctxt,
619 XMLPUBFUN xmlChar * XMLCALL
620 xmlDecodeEntities (xmlParserCtxtPtr ctxt,
626 XMLPUBFUN void XMLCALL
627 xmlHandleEntity (xmlParserCtxtPtr ctxt,
628 xmlEntityPtr entity);
630 #endif /* LIBXML_LEGACY_ENABLED */
636 XMLPUBFUN void XMLCALL
637 xmlErrMemory (xmlParserCtxtPtr ctxt,
644 #endif /* __XML_PARSER_INTERNALS_H__ */