test/xmlcommon.rl

   1 /*
   2  * This file is included by xml.rl
   3  *
   4  * @IGNORE: yes
   5  */
   6
   7 %%{
   8
   9   #
  10   # Common XML grammar rules based on the XML 1.0 BNF from:
  11   # http://www.jelks.nu/XML/xmlebnf.html
  12   #
  13
  14   machine CommonXml;
  15
  16         S = (0x20 | 0x9 | 0xD | 0xA)+;
  17
  18         # WAS PubidChar = 0x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%];
  19         PubidChar = 0x20 | 0xD | 0xA | [a-zA-Z0-9] | [\-'()+,./:=?;!*#@$_%];
  20
  21         PubidLiteral = '"' PubidChar* '"' | "'" (PubidChar - "'")* "'";
  22
  23         Name = (Letter | '_' | ':') (NameChar)*;
  24
  25         Comment = '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->';
  26
  27   # Used strong subtraction operator, and replaced * with +. Ragel complained since using
  28   # * results in a machine that accepts 0 length strings, and later it's only used in an
  29   # optional construct anyway.
  30   #
  31   CharData_Old = [^<&]* - ([^<&]* ']]>' [^<&]*);
  32         CharData = [^<&]+ -- ']]>';
  33
  34         SystemLiteral = ('"' [^"]* '"') | ("'" [^']* "'");
  35
  36         Eq = S? '=' S?;
  37
  38         VersionNum = ([a-zA-Z0-9_.:] | '-')+;
  39
  40   # WAS S 'version' Eq (' VersionNum ' | " VersionNum ") - fixed quotes
  41         VersionInfo = S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"');
  42
  43         ExternalID = 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral;
  44
  45         PublicID = 'PUBLIC' S PubidLiteral;
  46
  47         NotationDecl = '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>';
  48
  49         EncName = [A-Za-z] ([A-Za-z0-9._] | '-')*;
  50
  51         EncodingDecl = S 'encoding' Eq ('"' EncName  '"' |  "'" EncName "'" );
  52
  53         # UNUSED TextDecl = '<?xml' VersionInfo? EncodingDecl S? '?>';
  54
  55         NDataDecl = S 'NDATA' S Name;
  56
  57         PEReference = '%' Name ';';
  58
  59         EntityRef = '&' Name ';';
  60
  61         CharRef = '&#' [0-9]+ ';' | '&0x' [0-9a-fA-F]+ ';';
  62
  63         Reference = EntityRef | CharRef;
  64
  65         EntityValue = '"' ([^%&"] | PEReference | Reference)* '"' |  "'" ([^%&'] | PEReference | Reference)* "'";
  66
  67         PEDef = EntityValue | ExternalID;
  68
  69         EntityDef = EntityValue | (ExternalID NDataDecl?);
  70
  71         PEDecl = '<!ENTITY' S '%' S Name S PEDef S? '>';
  72
  73         GEDecl = '<!ENTITY' S Name S EntityDef S? '>';
  74
  75         EntityDecl = GEDecl | PEDecl;
  76
  77         Mixed = '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')';
  78
  79         # WAS cp = (Name | choice | seq) ('?' | '*' | '+')?;
  80
  81         # WAS seq = '(' S? cp ( S? ',' S? cp )* S? ')';
  82
  83         # WAS choice = '(' S? cp ( S? '|' S? cp )* S? ')';
  84
  85   # WAS children = (choice | seq) ('?' | '*' | '+')?;
  86
  87   # TODO put validation for this in and make it clearer
  88   alt = '?' | '*' | '+';
  89         children = '(' S?
  90                    ( ( Name alt? )  |
  91                      '(' |
  92                       ( ')' alt? ) |
  93                       [,|] |
  94                       S )
  95                     ')' alt?;
  96
  97         contentspec = 'EMPTY' | 'ANY' | Mixed | children;
  98
  99         elementdecl = '<!ELEMENT' S Name S contentspec S? '>';
 100
 101         AttValue = '"' ([^<&"] | Reference)* '"' |  "'" ([^<&'] | Reference)* "'";
 102
 103         Attribute = Name Eq AttValue;
 104
 105         Nmtoken = (NameChar)+;
 106
 107         # UNUSED Nmtokens = Nmtoken (S Nmtoken)*;
 108
 109         Enumeration = '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')';
 110
 111         NotationType = 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')';
 112
 113         EnumeratedType = NotationType | Enumeration;
 114
 115         TokenizedType = 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS';
 116
 117         StringType = 'CDATA';
 118
 119         AttType = StringType | TokenizedType | EnumeratedType;
 120
 121         DefaultDecl = '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue);
 122
 123         AttDef = S Name S AttType S DefaultDecl;
 124
 125         AttlistDecl = '<!ATTLIST' S Name AttDef* S? '>';
 126
 127         EmptyElemTag = '<' Name (S Attribute)* S? '/>';
 128
 129         ETag = '</' Name S? '>';
 130
 131   PITarget_Old = Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'));
 132         PITarget = Name -- "xml"i;
 133
 134         PI = '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>';
 135
 136         markupdecl = elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment;
 137
 138         doctypedecl = '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>';
 139
 140         # TODO extSubsetDecl = ( markupdecl | conditionalSect | PEReference | S )*;
 141         # UNUSED extSubsetDecl = ( markupdecl | PEReference | S )*;
 142
 143         # UNUSED extSubset = TextDecl? extSubsetDecl;
 144
 145         # UNUSED Ignore = Char* - (Char* ('<![' | ']]>') Char*);
 146
 147         # TODO: ignoreSectContents = Ignore ('<![' ignoreSectContents ']]>' Ignore)*;
 148         # UNUSED ignoreSectContents = Ignore ('<![' ']]>' Ignore)*;
 149
 150         # UNUSED ignoreSect = '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>';
 151
 152         # UNUSED includeSect = '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>';
 153
 154         # UNUSED conditionalSect = includeSect | ignoreSect;
 155
 156         STag = '<' Name (S Attribute)* S? '>';
 157
 158         CDStart = '<![CDATA[';
 159
 160         CDEnd = ']]>';
 161
 162         # WAS CData = (Char* - (Char* ']]>' Char*));
 163         CData = (Char* -- CDEnd);
 164
 165         CDSect = CDStart CData CDEnd;
 166
 167         # UNUSED Subcode = ([a-z] | [A-Z])+;
 168
 169         # UNUSED UserCode = ('x' | 'X') '-' ([a-z] | [A-Z])+;
 170
 171         # UNUSED IanaCode = ('i' | 'I') '-' ([a-z] | [A-Z])+;
 172
 173         # UNUSED ISO639Code = ([a-z] | [A-Z]) ([a-z] | [A-Z]);
 174
 175         # UNUSED Langcode = ISO639Code |  IanaCode |  UserCode;
 176
 177         # UNUSED LanguageID = Langcode ('-' Subcode)*;
 178
 179         SDDecl = S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'));
 180
 181         # UNUSED extPE = TextDecl? extSubsetDecl;
 182
 183         Misc = Comment | PI |  S;
 184
 185         XMLDecl = '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>';
 186
 187         prolog = XMLDecl? Misc* (doctypedecl Misc*)?;
 188
 189         # UNUSED Names = Name (S Name)*;
 190
 191   # Added fcall - TODO check logic is correct
 192         # UNUSED extParsedEnt = TextDecl? @{fcall content;};
 193
 194   # TODO tag stack validation
 195
 196   # WAS element = EmptyElemTag | STag content ETag
 197         # WAS content = (element | CharData | Reference | CDSect | PI | Comment)*;
 198         content = (EmptyElemTag | STag | ETag | CharData | Reference | CDSect | PI | Comment)*;
 199
 200         # WAS document = prolog element Misc*;
 201         document = prolog ( EmptyElemTag | ( STag content ETag ) ) Misc*;
 202
 203         main := document;
 204
 205 }%%