2 * This file is included by xml.rl
10 # Common XML grammar rules based on the XML 1.0 BNF from:
11 # http://www.jelks.nu/XML/xmlebnf.html
16 S = (0x20 | 0x9 | 0xD | 0xA)+;
18 # WAS PubidChar = 0x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%];
19 PubidChar = 0x20 | 0xD | 0xA | [a-zA-Z0-9] | [\-'()+,./:=?;!*#@$_%];
21 PubidLiteral = '"' PubidChar* '"' | "'" (PubidChar - "'")* "'";
23 Name = (Letter | '_' | ':') (NameChar)*;
25 Comment = '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->';
27 # Used strong subtraction operator, and replaced * with +. Ragel complained since using
28 # * results in a machine that accepts 0 length strings, and later it's only used in an
29 # optional construct anyway.
31 CharData_Old = [^<&]* - ([^<&]* ']]>' [^<&]*);
32 CharData = [^<&]+ -- ']]>';
34 SystemLiteral = ('"' [^"]* '"') | ("'" [^']* "'");
38 VersionNum = ([a-zA-Z0-9_.:] | '-')+;
40 # WAS S 'version' Eq (' VersionNum ' | " VersionNum ") - fixed quotes
41 VersionInfo = S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"');
43 ExternalID = 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral;
45 PublicID = 'PUBLIC' S PubidLiteral;
47 NotationDecl = '<!NOTATION' S Name S (ExternalID | PublicID) S? '>';
49 EncName = [A-Za-z] ([A-Za-z0-9._] | '-')*;
51 EncodingDecl = S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" );
53 # UNUSED TextDecl = '<?xml' VersionInfo? EncodingDecl S? '?>';
55 NDataDecl = S 'NDATA' S Name;
57 PEReference = '%' Name ';';
59 EntityRef = '&' Name ';';
61 CharRef = '&#' [0-9]+ ';' | '&0x' [0-9a-fA-F]+ ';';
63 Reference = EntityRef | CharRef;
65 EntityValue = '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'";
67 PEDef = EntityValue | ExternalID;
69 EntityDef = EntityValue | (ExternalID NDataDecl?);
71 PEDecl = '<!ENTITY' S '%' S Name S PEDef S? '>';
73 GEDecl = '<!ENTITY' S Name S EntityDef S? '>';
75 EntityDecl = GEDecl | PEDecl;
77 Mixed = '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')';
79 # WAS cp = (Name | choice | seq) ('?' | '*' | '+')?;
81 # WAS seq = '(' S? cp ( S? ',' S? cp )* S? ')';
83 # WAS choice = '(' S? cp ( S? '|' S? cp )* S? ')';
85 # WAS children = (choice | seq) ('?' | '*' | '+')?;
87 # TODO put validation for this in and make it clearer
88 alt = '?' | '*' | '+';
97 contentspec = 'EMPTY' | 'ANY' | Mixed | children;
99 elementdecl = '<!ELEMENT' S Name S contentspec S? '>';
101 AttValue = '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'";
103 Attribute = Name Eq AttValue;
105 Nmtoken = (NameChar)+;
107 # UNUSED Nmtokens = Nmtoken (S Nmtoken)*;
109 Enumeration = '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')';
111 NotationType = 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')';
113 EnumeratedType = NotationType | Enumeration;
115 TokenizedType = 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS';
117 StringType = 'CDATA';
119 AttType = StringType | TokenizedType | EnumeratedType;
121 DefaultDecl = '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue);
123 AttDef = S Name S AttType S DefaultDecl;
125 AttlistDecl = '<!ATTLIST' S Name AttDef* S? '>';
127 EmptyElemTag = '<' Name (S Attribute)* S? '/>';
129 ETag = '</' Name S? '>';
131 PITarget_Old = Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'));
132 PITarget = Name -- "xml"i;
134 PI = '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>';
136 markupdecl = elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment;
138 doctypedecl = '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>';
140 # TODO extSubsetDecl = ( markupdecl | conditionalSect | PEReference | S )*;
141 # UNUSED extSubsetDecl = ( markupdecl | PEReference | S )*;
143 # UNUSED extSubset = TextDecl? extSubsetDecl;
145 # UNUSED Ignore = Char* - (Char* ('<![' | ']]>') Char*);
147 # TODO: ignoreSectContents = Ignore ('<![' ignoreSectContents ']]>' Ignore)*;
148 # UNUSED ignoreSectContents = Ignore ('<![' ']]>' Ignore)*;
150 # UNUSED ignoreSect = '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>';
152 # UNUSED includeSect = '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>';
154 # UNUSED conditionalSect = includeSect | ignoreSect;
156 STag = '<' Name (S Attribute)* S? '>';
158 CDStart = '<![CDATA[';
162 # WAS CData = (Char* - (Char* ']]>' Char*));
163 CData = (Char* -- CDEnd);
165 CDSect = CDStart CData CDEnd;
167 # UNUSED Subcode = ([a-z] | [A-Z])+;
169 # UNUSED UserCode = ('x' | 'X') '-' ([a-z] | [A-Z])+;
171 # UNUSED IanaCode = ('i' | 'I') '-' ([a-z] | [A-Z])+;
173 # UNUSED ISO639Code = ([a-z] | [A-Z]) ([a-z] | [A-Z]);
175 # UNUSED Langcode = ISO639Code | IanaCode | UserCode;
177 # UNUSED LanguageID = Langcode ('-' Subcode)*;
179 SDDecl = S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'));
181 # UNUSED extPE = TextDecl? extSubsetDecl;
183 Misc = Comment | PI | S;
185 XMLDecl = '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>';
187 prolog = XMLDecl? Misc* (doctypedecl Misc*)?;
189 # UNUSED Names = Name (S Name)*;
191 # Added fcall - TODO check logic is correct
192 # UNUSED extParsedEnt = TextDecl? @{fcall content;};
194 # TODO tag stack validation
196 # WAS element = EmptyElemTag | STag content ETag
197 # WAS content = (element | CharData | Reference | CDSect | PI | Comment)*;
198 content = (EmptyElemTag | STag | ETag | CharData | Reference | CDSect | PI | Comment)*;
200 # WAS document = prolog element Misc*;
201 document = prolog ( EmptyElemTag | ( STag content ETag ) ) Misc*;