2 * Summary: internals routines exported by the parser.
3 * Description: this module exports a number of internal parsing routines
4 * they are not really all intended for applications but
5 * can prove useful doing low level processing.
7 * Copy: See Copyright for the status of this software.
9 * Author: Daniel Veillard
12 #ifndef __XML_PARSER_INTERNALS_H__
13 #define __XML_PARSER_INTERNALS_H__
15 #include <libxml/xmlversion.h>
16 #include <libxml/parser.h>
17 #include <libxml/HTMLparser.h>
18 #include <libxml/chvalid.h>
27 * arbitrary depth limit for the XML documents that we allow to
28 * process. This is not a limitation of the parser but a safety
29 * boundary feature, use XML_PARSE_HUGE option to override it.
31 XMLPUBVAR unsigned int xmlParserMaxDepth;
34 * XML_MAX_TEXT_LENGTH:
36 * Maximum size allowed for a single text node when building a tree.
37 * This is not a limitation of the parser but a safety boundary feature,
38 * use XML_PARSE_HUGE option to override it.
40 #define XML_MAX_TEXT_LENGTH 10000000
45 * Identifiers can be longer, but this will be more costly
48 #define XML_MAX_NAMELEN 100
53 * The parser tries to always have that amount of input ready.
54 * One of the point is providing context when reporting errors.
56 #define INPUT_CHUNK 250
58 /************************************************************************
60 * UNICODE version of the macros. *
62 ************************************************************************/
65 * @c: an byte value (int)
67 * Macro to check the following production in the XML spec:
69 * [2] Char ::= #x9 | #xA | #xD | [#x20...]
70 * any byte character in the accepted range
72 #define IS_BYTE_CHAR(c) xmlIsChar_ch(c)
76 * @c: an UNICODE value (int)
78 * Macro to check the following production in the XML spec:
80 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
81 * | [#x10000-#x10FFFF]
82 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
84 #define IS_CHAR(c) xmlIsCharQ(c)
88 * @c: an xmlChar (usually an unsigned char)
90 * Behaves like IS_CHAR on single-byte value
92 #define IS_CHAR_CH(c) xmlIsChar_ch(c)
96 * @c: an UNICODE value (int)
98 * Macro to check the following production in the XML spec:
100 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
102 #define IS_BLANK(c) xmlIsBlankQ(c)
106 * @c: an xmlChar value (normally unsigned char)
108 * Behaviour same as IS_BLANK
110 #define IS_BLANK_CH(c) xmlIsBlank_ch(c)
114 * @c: an UNICODE value (int)
116 * Macro to check the following production in the XML spec:
118 * [85] BaseChar ::= ... long list see REC ...
120 #define IS_BASECHAR(c) xmlIsBaseCharQ(c)
124 * @c: an UNICODE value (int)
126 * Macro to check the following production in the XML spec:
128 * [88] Digit ::= ... long list see REC ...
130 #define IS_DIGIT(c) xmlIsDigitQ(c)
134 * @c: an xmlChar value (usually an unsigned char)
136 * Behaves like IS_DIGIT but with a single byte argument
138 #define IS_DIGIT_CH(c) xmlIsDigit_ch(c)
142 * @c: an UNICODE value (int)
144 * Macro to check the following production in the XML spec:
146 * [87] CombiningChar ::= ... long list see REC ...
148 #define IS_COMBINING(c) xmlIsCombiningQ(c)
152 * @c: an xmlChar (usually an unsigned char)
154 * Always false (all combining chars > 0xff)
156 #define IS_COMBINING_CH(c) 0
160 * @c: an UNICODE value (int)
162 * Macro to check the following production in the XML spec:
165 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
166 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
167 * [#x309D-#x309E] | [#x30FC-#x30FE]
169 #define IS_EXTENDER(c) xmlIsExtenderQ(c)
173 * @c: an xmlChar value (usually an unsigned char)
175 * Behaves like IS_EXTENDER but with a single-byte argument
177 #define IS_EXTENDER_CH(c) xmlIsExtender_ch(c)
181 * @c: an UNICODE value (int)
183 * Macro to check the following production in the XML spec:
186 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
188 #define IS_IDEOGRAPHIC(c) xmlIsIdeographicQ(c)
192 * @c: an UNICODE value (int)
194 * Macro to check the following production in the XML spec:
197 * [84] Letter ::= BaseChar | Ideographic
199 #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
203 * @c: an xmlChar value (normally unsigned char)
205 * Macro behaves like IS_LETTER, but only check base chars
208 #define IS_LETTER_CH(c) xmlIsBaseChar_ch(c)
212 * @c: an xmlChar value
214 * Macro to check [a-zA-Z]
217 #define IS_ASCII_LETTER(c) (((0x41 <= (c)) && ((c) <= 0x5a)) || \
218 ((0x61 <= (c)) && ((c) <= 0x7a)))
222 * @c: an xmlChar value
224 * Macro to check [0-9]
227 #define IS_ASCII_DIGIT(c) ((0x30 <= (c)) && ((c) <= 0x39))
231 * @c: an UNICODE value (int)
233 * Macro to check the following production in the XML spec:
236 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
238 #define IS_PUBIDCHAR(c) xmlIsPubidCharQ(c)
242 * @c: an xmlChar value (normally unsigned char)
244 * Same as IS_PUBIDCHAR but for single-byte value
246 #define IS_PUBIDCHAR_CH(c) xmlIsPubidChar_ch(c)
250 * @p: and UTF8 string pointer
252 * Skips the end of line chars.
254 #define SKIP_EOL(p) \
255 if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
256 if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
260 * @p: and UTF8 string pointer
262 * Skips to the next '>' char.
264 #define MOVETO_ENDTAG(p) \
265 while ((*p) && (*(p) != '>')) (p)++
269 * @p: and UTF8 string pointer
271 * Skips to the next '<' char.
273 #define MOVETO_STARTTAG(p) \
274 while ((*p) && (*(p) != '<')) (p)++
277 * Global variables used for predefined strings.
279 XMLPUBVAR const xmlChar xmlStringText[];
280 XMLPUBVAR const xmlChar xmlStringTextNoenc[];
281 XMLPUBVAR const xmlChar xmlStringComment[];
284 * Function to finish the work of the macros where needed.
286 XMLPUBFUN int XMLCALL xmlIsLetter (int c);
291 XMLPUBFUN xmlParserCtxtPtr XMLCALL
292 xmlCreateFileParserCtxt (const char *filename);
293 XMLPUBFUN xmlParserCtxtPtr XMLCALL
294 xmlCreateURLParserCtxt (const char *filename,
296 XMLPUBFUN xmlParserCtxtPtr XMLCALL
297 xmlCreateMemoryParserCtxt(const char *buffer,
299 XMLPUBFUN xmlParserCtxtPtr XMLCALL
300 xmlCreateEntityParserCtxt(const xmlChar *URL,
302 const xmlChar *base);
303 XMLPUBFUN int XMLCALL
304 xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
305 xmlCharEncoding enc);
306 XMLPUBFUN int XMLCALL
307 xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,
308 xmlCharEncodingHandlerPtr handler);
309 XMLPUBFUN int XMLCALL
310 xmlSwitchInputEncoding (xmlParserCtxtPtr ctxt,
311 xmlParserInputPtr input,
312 xmlCharEncodingHandlerPtr handler);
315 /* internal error reporting */
316 XMLPUBFUN void XMLCALL
317 __xmlErrEncoding (xmlParserCtxtPtr ctxt,
318 xmlParserErrors xmlerr,
320 const xmlChar * str1,
321 const xmlChar * str2);
327 XMLPUBFUN xmlParserInputPtr XMLCALL
328 xmlNewStringInputStream (xmlParserCtxtPtr ctxt,
329 const xmlChar *buffer);
330 XMLPUBFUN xmlParserInputPtr XMLCALL
331 xmlNewEntityInputStream (xmlParserCtxtPtr ctxt,
332 xmlEntityPtr entity);
333 XMLPUBFUN int XMLCALL
334 xmlPushInput (xmlParserCtxtPtr ctxt,
335 xmlParserInputPtr input);
336 XMLPUBFUN xmlChar XMLCALL
337 xmlPopInput (xmlParserCtxtPtr ctxt);
338 XMLPUBFUN void XMLCALL
339 xmlFreeInputStream (xmlParserInputPtr input);
340 XMLPUBFUN xmlParserInputPtr XMLCALL
341 xmlNewInputFromFile (xmlParserCtxtPtr ctxt,
342 const char *filename);
343 XMLPUBFUN xmlParserInputPtr XMLCALL
344 xmlNewInputStream (xmlParserCtxtPtr ctxt);
349 XMLPUBFUN xmlChar * XMLCALL
350 xmlSplitQName (xmlParserCtxtPtr ctxt,
355 * Generic production rules.
357 XMLPUBFUN const xmlChar * XMLCALL
358 xmlParseName (xmlParserCtxtPtr ctxt);
359 XMLPUBFUN xmlChar * XMLCALL
360 xmlParseNmtoken (xmlParserCtxtPtr ctxt);
361 XMLPUBFUN xmlChar * XMLCALL
362 xmlParseEntityValue (xmlParserCtxtPtr ctxt,
364 XMLPUBFUN xmlChar * XMLCALL
365 xmlParseAttValue (xmlParserCtxtPtr ctxt);
366 XMLPUBFUN xmlChar * XMLCALL
367 xmlParseSystemLiteral (xmlParserCtxtPtr ctxt);
368 XMLPUBFUN xmlChar * XMLCALL
369 xmlParsePubidLiteral (xmlParserCtxtPtr ctxt);
370 XMLPUBFUN void XMLCALL
371 xmlParseCharData (xmlParserCtxtPtr ctxt,
373 XMLPUBFUN xmlChar * XMLCALL
374 xmlParseExternalID (xmlParserCtxtPtr ctxt,
377 XMLPUBFUN void XMLCALL
378 xmlParseComment (xmlParserCtxtPtr ctxt);
379 XMLPUBFUN const xmlChar * XMLCALL
380 xmlParsePITarget (xmlParserCtxtPtr ctxt);
381 XMLPUBFUN void XMLCALL
382 xmlParsePI (xmlParserCtxtPtr ctxt);
383 XMLPUBFUN void XMLCALL
384 xmlParseNotationDecl (xmlParserCtxtPtr ctxt);
385 XMLPUBFUN void XMLCALL
386 xmlParseEntityDecl (xmlParserCtxtPtr ctxt);
387 XMLPUBFUN int XMLCALL
388 xmlParseDefaultDecl (xmlParserCtxtPtr ctxt,
390 XMLPUBFUN xmlEnumerationPtr XMLCALL
391 xmlParseNotationType (xmlParserCtxtPtr ctxt);
392 XMLPUBFUN xmlEnumerationPtr XMLCALL
393 xmlParseEnumerationType (xmlParserCtxtPtr ctxt);
394 XMLPUBFUN int XMLCALL
395 xmlParseEnumeratedType (xmlParserCtxtPtr ctxt,
396 xmlEnumerationPtr *tree);
397 XMLPUBFUN int XMLCALL
398 xmlParseAttributeType (xmlParserCtxtPtr ctxt,
399 xmlEnumerationPtr *tree);
400 XMLPUBFUN void XMLCALL
401 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt);
402 XMLPUBFUN xmlElementContentPtr XMLCALL
403 xmlParseElementMixedContentDecl
404 (xmlParserCtxtPtr ctxt,
406 XMLPUBFUN xmlElementContentPtr XMLCALL
407 xmlParseElementChildrenContentDecl
408 (xmlParserCtxtPtr ctxt,
410 XMLPUBFUN int XMLCALL
411 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,
413 xmlElementContentPtr *result);
414 XMLPUBFUN int XMLCALL
415 xmlParseElementDecl (xmlParserCtxtPtr ctxt);
416 XMLPUBFUN void XMLCALL
417 xmlParseMarkupDecl (xmlParserCtxtPtr ctxt);
418 XMLPUBFUN int XMLCALL
419 xmlParseCharRef (xmlParserCtxtPtr ctxt);
420 XMLPUBFUN xmlEntityPtr XMLCALL
421 xmlParseEntityRef (xmlParserCtxtPtr ctxt);
422 XMLPUBFUN void XMLCALL
423 xmlParseReference (xmlParserCtxtPtr ctxt);
424 XMLPUBFUN void XMLCALL
425 xmlParsePEReference (xmlParserCtxtPtr ctxt);
426 XMLPUBFUN void XMLCALL
427 xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt);
428 #ifdef LIBXML_SAX1_ENABLED
429 XMLPUBFUN const xmlChar * XMLCALL
430 xmlParseAttribute (xmlParserCtxtPtr ctxt,
432 XMLPUBFUN const xmlChar * XMLCALL
433 xmlParseStartTag (xmlParserCtxtPtr ctxt);
434 XMLPUBFUN void XMLCALL
435 xmlParseEndTag (xmlParserCtxtPtr ctxt);
436 #endif /* LIBXML_SAX1_ENABLED */
437 XMLPUBFUN void XMLCALL
438 xmlParseCDSect (xmlParserCtxtPtr ctxt);
439 XMLPUBFUN void XMLCALL
440 xmlParseContent (xmlParserCtxtPtr ctxt);
441 XMLPUBFUN void XMLCALL
442 xmlParseElement (xmlParserCtxtPtr ctxt);
443 XMLPUBFUN xmlChar * XMLCALL
444 xmlParseVersionNum (xmlParserCtxtPtr ctxt);
445 XMLPUBFUN xmlChar * XMLCALL
446 xmlParseVersionInfo (xmlParserCtxtPtr ctxt);
447 XMLPUBFUN xmlChar * XMLCALL
448 xmlParseEncName (xmlParserCtxtPtr ctxt);
449 XMLPUBFUN const xmlChar * XMLCALL
450 xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);
451 XMLPUBFUN int XMLCALL
452 xmlParseSDDecl (xmlParserCtxtPtr ctxt);
453 XMLPUBFUN void XMLCALL
454 xmlParseXMLDecl (xmlParserCtxtPtr ctxt);
455 XMLPUBFUN void XMLCALL
456 xmlParseTextDecl (xmlParserCtxtPtr ctxt);
457 XMLPUBFUN void XMLCALL
458 xmlParseMisc (xmlParserCtxtPtr ctxt);
459 XMLPUBFUN void XMLCALL
460 xmlParseExternalSubset (xmlParserCtxtPtr ctxt,
461 const xmlChar *ExternalID,
462 const xmlChar *SystemID);
464 * XML_SUBSTITUTE_NONE:
466 * If no entities need to be substituted.
468 #define XML_SUBSTITUTE_NONE 0
470 * XML_SUBSTITUTE_REF:
472 * Whether general entities need to be substituted.
474 #define XML_SUBSTITUTE_REF 1
476 * XML_SUBSTITUTE_PEREF:
478 * Whether parameter entities need to be substituted.
480 #define XML_SUBSTITUTE_PEREF 2
482 * XML_SUBSTITUTE_BOTH:
484 * Both general and parameter entities need to be substituted.
486 #define XML_SUBSTITUTE_BOTH 3
488 XMLPUBFUN xmlChar * XMLCALL
489 xmlStringDecodeEntities (xmlParserCtxtPtr ctxt,
495 XMLPUBFUN xmlChar * XMLCALL
496 xmlStringLenDecodeEntities (xmlParserCtxtPtr ctxt,
505 * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP.
507 XMLPUBFUN int XMLCALL nodePush (xmlParserCtxtPtr ctxt,
509 XMLPUBFUN xmlNodePtr XMLCALL nodePop (xmlParserCtxtPtr ctxt);
510 XMLPUBFUN int XMLCALL inputPush (xmlParserCtxtPtr ctxt,
511 xmlParserInputPtr value);
512 XMLPUBFUN xmlParserInputPtr XMLCALL inputPop (xmlParserCtxtPtr ctxt);
513 XMLPUBFUN const xmlChar * XMLCALL namePop (xmlParserCtxtPtr ctxt);
514 XMLPUBFUN int XMLCALL namePush (xmlParserCtxtPtr ctxt,
515 const xmlChar *value);
518 * other commodities shared between parser.c and parserInternals.
520 XMLPUBFUN int XMLCALL xmlSkipBlankChars (xmlParserCtxtPtr ctxt);
521 XMLPUBFUN int XMLCALL xmlStringCurrentChar (xmlParserCtxtPtr ctxt,
524 XMLPUBFUN void XMLCALL xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
525 XMLPUBFUN int XMLCALL xmlCheckLanguageID (const xmlChar *lang);
528 * Really core function shared with HTML parser.
530 XMLPUBFUN int XMLCALL xmlCurrentChar (xmlParserCtxtPtr ctxt,
532 XMLPUBFUN int XMLCALL xmlCopyCharMultiByte (xmlChar *out,
534 XMLPUBFUN int XMLCALL xmlCopyChar (int len,
537 XMLPUBFUN void XMLCALL xmlNextChar (xmlParserCtxtPtr ctxt);
538 XMLPUBFUN void XMLCALL xmlParserInputShrink (xmlParserInputPtr in);
540 #ifdef LIBXML_HTML_ENABLED
542 * Actually comes from the HTML parser but launched from the init stuff.
544 XMLPUBFUN void XMLCALL htmlInitAutoClose (void);
545 XMLPUBFUN htmlParserCtxtPtr XMLCALL htmlCreateFileParserCtxt(const char *filename,
546 const char *encoding);
550 * Specific function to keep track of entities references
551 * and used by the XSLT debugger.
553 #ifdef LIBXML_LEGACY_ENABLED
555 * xmlEntityReferenceFunc:
557 * @firstNode: the fist node in the chunk
558 * @lastNode: the last nod in the chunk
560 * Callback function used when one needs to be able to track back the
561 * provenance of a chunk of nodes inherited from an entity replacement.
563 typedef void (*xmlEntityReferenceFunc) (xmlEntityPtr ent,
564 xmlNodePtr firstNode,
565 xmlNodePtr lastNode);
567 XMLPUBFUN void XMLCALL xmlSetEntityReferenceFunc (xmlEntityReferenceFunc func);
569 XMLPUBFUN xmlChar * XMLCALL
570 xmlParseQuotedString (xmlParserCtxtPtr ctxt);
571 XMLPUBFUN void XMLCALL
572 xmlParseNamespace (xmlParserCtxtPtr ctxt);
573 XMLPUBFUN xmlChar * XMLCALL
574 xmlNamespaceParseNSDef (xmlParserCtxtPtr ctxt);
575 XMLPUBFUN xmlChar * XMLCALL
576 xmlScanName (xmlParserCtxtPtr ctxt);
577 XMLPUBFUN xmlChar * XMLCALL
578 xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt);
579 XMLPUBFUN void XMLCALL xmlParserHandleReference(xmlParserCtxtPtr ctxt);
580 XMLPUBFUN xmlChar * XMLCALL
581 xmlNamespaceParseQName (xmlParserCtxtPtr ctxt,
586 XMLPUBFUN xmlChar * XMLCALL
587 xmlDecodeEntities (xmlParserCtxtPtr ctxt,
593 XMLPUBFUN void XMLCALL
594 xmlHandleEntity (xmlParserCtxtPtr ctxt,
595 xmlEntityPtr entity);
597 #endif /* LIBXML_LEGACY_ENABLED */
603 XMLPUBFUN void XMLCALL
604 xmlErrMemory (xmlParserCtxtPtr ctxt,
611 #endif /* __XML_PARSER_INTERNALS_H__ */