1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 //---------------------------------------------------------------------------------
5 // Generated Header File. Do not edit by hand.
6 // This file contains the state table for the ICU Regular Expression Pattern Parser
7 // It is generated by the Perl script "regexcst.pl" from
8 // the rule parser state definitions file "regexcst.txt".
10 // Copyright (C) 2002-2016 International Business Machines Corporation
11 // and others. All rights reserved.
13 //---------------------------------------------------------------------------------
17 #include "unicode/utypes.h"
21 // Character classes for regex pattern scanning.
23 static const uint8_t kRuleSet_ascii_letter = 128;
24 static const uint8_t kRuleSet_digit_char = 129;
25 static const uint8_t kRuleSet_rule_char = 130;
28 enum Regex_PatternParseAction {
35 doContinueNamedCapture,
61 doSetBeginDifference1,
94 doSetBeginIntersection1,
100 doCompleteNamedBackRef,
106 doOpenNonCaptureParen,
120 doEscapedLiteralChar,
123 doMismatchedParenErr,
132 doContinueNamedBackRef,
136 //-------------------------------------------------------------------------------
138 // RegexTableEl represents the structure of a row in the transition table
139 // for the pattern parser state machine.
140 //-------------------------------------------------------------------------------
141 struct RegexTableEl {
142 Regex_PatternParseAction fAction;
143 uint8_t fCharClass; // 0-127: an individual ASCII character
144 // 128-255: character class index
145 uint8_t fNextState; // 0-250: normal next-state numbers
146 // 255: pop next-state from stack.
151 static const struct RegexTableEl gRuleParseStateTable[] = {
152 {doNOP, 0, 0, 0, TRUE}
153 , {doPatStart, 255, 2,0, FALSE} // 1 start
154 , {doLiteralChar, 254, 14,0, TRUE} // 2 term
155 , {doLiteralChar, 130, 14,0, TRUE} // 3
156 , {doSetBegin, 91 /* [ */, 123, 205, TRUE} // 4
157 , {doNOP, 40 /* ( */, 27,0, TRUE} // 5
158 , {doDotAny, 46 /* . */, 14,0, TRUE} // 6
159 , {doCaret, 94 /* ^ */, 14,0, TRUE} // 7
160 , {doDollar, 36 /* $ */, 14,0, TRUE} // 8
161 , {doNOP, 92 /* \ */, 89,0, TRUE} // 9
162 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 10
163 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 11
164 , {doPatFinish, 253, 2,0, FALSE} // 12
165 , {doRuleError, 255, 206,0, FALSE} // 13
166 , {doNOP, 42 /* * */, 68,0, TRUE} // 14 expr-quant
167 , {doNOP, 43 /* + */, 71,0, TRUE} // 15
168 , {doNOP, 63 /* ? */, 74,0, TRUE} // 16
169 , {doIntervalInit, 123 /* { */, 77,0, TRUE} // 17
170 , {doNOP, 40 /* ( */, 23,0, TRUE} // 18
171 , {doNOP, 255, 20,0, FALSE} // 19
172 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 20 expr-cont
173 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 21
174 , {doNOP, 255, 2,0, FALSE} // 22
175 , {doSuppressComments, 63 /* ? */, 25,0, TRUE} // 23 open-paren-quant
176 , {doNOP, 255, 27,0, FALSE} // 24
177 , {doNOP, 35 /* # */, 50, 14, TRUE} // 25 open-paren-quant2
178 , {doNOP, 255, 29,0, FALSE} // 26
179 , {doSuppressComments, 63 /* ? */, 29,0, TRUE} // 27 open-paren
180 , {doOpenCaptureParen, 255, 2, 14, FALSE} // 28
181 , {doOpenNonCaptureParen, 58 /* : */, 2, 14, TRUE} // 29 open-paren-extended
182 , {doOpenAtomicParen, 62 /* > */, 2, 14, TRUE} // 30
183 , {doOpenLookAhead, 61 /* = */, 2, 20, TRUE} // 31
184 , {doOpenLookAheadNeg, 33 /* ! */, 2, 20, TRUE} // 32
185 , {doNOP, 60 /* < */, 46,0, TRUE} // 33
186 , {doNOP, 35 /* # */, 50, 2, TRUE} // 34
187 , {doBeginMatchMode, 105 /* i */, 53,0, FALSE} // 35
188 , {doBeginMatchMode, 100 /* d */, 53,0, FALSE} // 36
189 , {doBeginMatchMode, 109 /* m */, 53,0, FALSE} // 37
190 , {doBeginMatchMode, 115 /* s */, 53,0, FALSE} // 38
191 , {doBeginMatchMode, 117 /* u */, 53,0, FALSE} // 39
192 , {doBeginMatchMode, 119 /* w */, 53,0, FALSE} // 40
193 , {doBeginMatchMode, 120 /* x */, 53,0, FALSE} // 41
194 , {doBeginMatchMode, 45 /* - */, 53,0, FALSE} // 42
195 , {doConditionalExpr, 40 /* ( */, 206,0, TRUE} // 43
196 , {doPerlInline, 123 /* { */, 206,0, TRUE} // 44
197 , {doBadOpenParenType, 255, 206,0, FALSE} // 45
198 , {doOpenLookBehind, 61 /* = */, 2, 20, TRUE} // 46 open-paren-lookbehind
199 , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE} // 47
200 , {doBeginNamedCapture, 128, 64,0, FALSE} // 48
201 , {doBadOpenParenType, 255, 206,0, FALSE} // 49
202 , {doNOP, 41 /* ) */, 255,0, TRUE} // 50 paren-comment
203 , {doMismatchedParenErr, 253, 206,0, FALSE} // 51
204 , {doNOP, 255, 50,0, TRUE} // 52
205 , {doMatchMode, 105 /* i */, 53,0, TRUE} // 53 paren-flag
206 , {doMatchMode, 100 /* d */, 53,0, TRUE} // 54
207 , {doMatchMode, 109 /* m */, 53,0, TRUE} // 55
208 , {doMatchMode, 115 /* s */, 53,0, TRUE} // 56
209 , {doMatchMode, 117 /* u */, 53,0, TRUE} // 57
210 , {doMatchMode, 119 /* w */, 53,0, TRUE} // 58
211 , {doMatchMode, 120 /* x */, 53,0, TRUE} // 59
212 , {doMatchMode, 45 /* - */, 53,0, TRUE} // 60
213 , {doSetMatchMode, 41 /* ) */, 2,0, TRUE} // 61
214 , {doMatchModeParen, 58 /* : */, 2, 14, TRUE} // 62
215 , {doBadModeFlag, 255, 206,0, FALSE} // 63
216 , {doContinueNamedCapture, 128, 64,0, TRUE} // 64 named-capture
217 , {doContinueNamedCapture, 129, 64,0, TRUE} // 65
218 , {doOpenCaptureParen, 62 /* > */, 2, 14, TRUE} // 66
219 , {doBadNamedCapture, 255, 206,0, FALSE} // 67
220 , {doNGStar, 63 /* ? */, 20,0, TRUE} // 68 quant-star
221 , {doPossessiveStar, 43 /* + */, 20,0, TRUE} // 69
222 , {doStar, 255, 20,0, FALSE} // 70
223 , {doNGPlus, 63 /* ? */, 20,0, TRUE} // 71 quant-plus
224 , {doPossessivePlus, 43 /* + */, 20,0, TRUE} // 72
225 , {doPlus, 255, 20,0, FALSE} // 73
226 , {doNGOpt, 63 /* ? */, 20,0, TRUE} // 74 quant-opt
227 , {doPossessiveOpt, 43 /* + */, 20,0, TRUE} // 75
228 , {doOpt, 255, 20,0, FALSE} // 76
229 , {doNOP, 129, 79,0, FALSE} // 77 interval-open
230 , {doIntervalError, 255, 206,0, FALSE} // 78
231 , {doIntevalLowerDigit, 129, 79,0, TRUE} // 79 interval-lower
232 , {doNOP, 44 /* , */, 83,0, TRUE} // 80
233 , {doIntervalSame, 125 /* } */, 86,0, TRUE} // 81
234 , {doIntervalError, 255, 206,0, FALSE} // 82
235 , {doIntervalUpperDigit, 129, 83,0, TRUE} // 83 interval-upper
236 , {doNOP, 125 /* } */, 86,0, TRUE} // 84
237 , {doIntervalError, 255, 206,0, FALSE} // 85
238 , {doNGInterval, 63 /* ? */, 20,0, TRUE} // 86 interval-type
239 , {doPossessiveInterval, 43 /* + */, 20,0, TRUE} // 87
240 , {doInterval, 255, 20,0, FALSE} // 88
241 , {doBackslashA, 65 /* A */, 2,0, TRUE} // 89 backslash
242 , {doBackslashB, 66 /* B */, 2,0, TRUE} // 90
243 , {doBackslashb, 98 /* b */, 2,0, TRUE} // 91
244 , {doBackslashd, 100 /* d */, 14,0, TRUE} // 92
245 , {doBackslashD, 68 /* D */, 14,0, TRUE} // 93
246 , {doBackslashG, 71 /* G */, 2,0, TRUE} // 94
247 , {doBackslashh, 104 /* h */, 14,0, TRUE} // 95
248 , {doBackslashH, 72 /* H */, 14,0, TRUE} // 96
249 , {doNOP, 107 /* k */, 115,0, TRUE} // 97
250 , {doNamedChar, 78 /* N */, 14,0, FALSE} // 98
251 , {doProperty, 112 /* p */, 14,0, FALSE} // 99
252 , {doProperty, 80 /* P */, 14,0, FALSE} // 100
253 , {doBackslashR, 82 /* R */, 14,0, TRUE} // 101
254 , {doEnterQuoteMode, 81 /* Q */, 2,0, TRUE} // 102
255 , {doBackslashS, 83 /* S */, 14,0, TRUE} // 103
256 , {doBackslashs, 115 /* s */, 14,0, TRUE} // 104
257 , {doBackslashv, 118 /* v */, 14,0, TRUE} // 105
258 , {doBackslashV, 86 /* V */, 14,0, TRUE} // 106
259 , {doBackslashW, 87 /* W */, 14,0, TRUE} // 107
260 , {doBackslashw, 119 /* w */, 14,0, TRUE} // 108
261 , {doBackslashX, 88 /* X */, 14,0, TRUE} // 109
262 , {doBackslashZ, 90 /* Z */, 2,0, TRUE} // 110
263 , {doBackslashz, 122 /* z */, 2,0, TRUE} // 111
264 , {doBackRef, 129, 14,0, TRUE} // 112
265 , {doEscapeError, 253, 206,0, FALSE} // 113
266 , {doEscapedLiteralChar, 255, 14,0, TRUE} // 114
267 , {doBeginNamedBackRef, 60 /* < */, 117,0, TRUE} // 115 named-backref
268 , {doBadNamedCapture, 255, 206,0, FALSE} // 116
269 , {doContinueNamedBackRef, 128, 119,0, TRUE} // 117 named-backref-2
270 , {doBadNamedCapture, 255, 206,0, FALSE} // 118
271 , {doContinueNamedBackRef, 128, 119,0, TRUE} // 119 named-backref-3
272 , {doContinueNamedBackRef, 129, 119,0, TRUE} // 120
273 , {doCompleteNamedBackRef, 62 /* > */, 14,0, TRUE} // 121
274 , {doBadNamedCapture, 255, 206,0, FALSE} // 122
275 , {doSetNegate, 94 /* ^ */, 126,0, TRUE} // 123 set-open
276 , {doSetPosixProp, 58 /* : */, 128,0, FALSE} // 124
277 , {doNOP, 255, 126,0, FALSE} // 125
278 , {doSetLiteral, 93 /* ] */, 141,0, TRUE} // 126 set-open2
279 , {doNOP, 255, 131,0, FALSE} // 127
280 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 128 set-posix
281 , {doNOP, 58 /* : */, 131,0, FALSE} // 129
282 , {doRuleError, 255, 206,0, FALSE} // 130
283 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 131 set-start
284 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 132
285 , {doNOP, 92 /* \ */, 191,0, TRUE} // 133
286 , {doNOP, 45 /* - */, 137,0, TRUE} // 134
287 , {doNOP, 38 /* & */, 139,0, TRUE} // 135
288 , {doSetLiteral, 255, 141,0, TRUE} // 136
289 , {doRuleError, 45 /* - */, 206,0, FALSE} // 137 set-start-dash
290 , {doSetAddDash, 255, 141,0, FALSE} // 138
291 , {doRuleError, 38 /* & */, 206,0, FALSE} // 139 set-start-amp
292 , {doSetAddAmp, 255, 141,0, FALSE} // 140
293 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 141 set-after-lit
294 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 142
295 , {doNOP, 45 /* - */, 178,0, TRUE} // 143
296 , {doNOP, 38 /* & */, 169,0, TRUE} // 144
297 , {doNOP, 92 /* \ */, 191,0, TRUE} // 145
298 , {doSetNoCloseError, 253, 206,0, FALSE} // 146
299 , {doSetLiteral, 255, 141,0, TRUE} // 147
300 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 148 set-after-set
301 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 149
302 , {doNOP, 45 /* - */, 171,0, TRUE} // 150
303 , {doNOP, 38 /* & */, 166,0, TRUE} // 151
304 , {doNOP, 92 /* \ */, 191,0, TRUE} // 152
305 , {doSetNoCloseError, 253, 206,0, FALSE} // 153
306 , {doSetLiteral, 255, 141,0, TRUE} // 154
307 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 155 set-after-range
308 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 156
309 , {doNOP, 45 /* - */, 174,0, TRUE} // 157
310 , {doNOP, 38 /* & */, 176,0, TRUE} // 158
311 , {doNOP, 92 /* \ */, 191,0, TRUE} // 159
312 , {doSetNoCloseError, 253, 206,0, FALSE} // 160
313 , {doSetLiteral, 255, 141,0, TRUE} // 161
314 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 162 set-after-op
315 , {doSetOpError, 93 /* ] */, 206,0, FALSE} // 163
316 , {doNOP, 92 /* \ */, 191,0, TRUE} // 164
317 , {doSetLiteral, 255, 141,0, TRUE} // 165
318 , {doSetBeginIntersection1, 91 /* [ */, 123, 148, TRUE} // 166 set-set-amp
319 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 167
320 , {doSetAddAmp, 255, 141,0, FALSE} // 168
321 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 169 set-lit-amp
322 , {doSetAddAmp, 255, 141,0, FALSE} // 170
323 , {doSetBeginDifference1, 91 /* [ */, 123, 148, TRUE} // 171 set-set-dash
324 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 172
325 , {doSetAddDash, 255, 141,0, FALSE} // 173
326 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 174 set-range-dash
327 , {doSetAddDash, 255, 141,0, FALSE} // 175
328 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 176 set-range-amp
329 , {doSetAddAmp, 255, 141,0, FALSE} // 177
330 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 178 set-lit-dash
331 , {doSetAddDash, 91 /* [ */, 141,0, FALSE} // 179
332 , {doSetAddDash, 93 /* ] */, 141,0, FALSE} // 180
333 , {doNOP, 92 /* \ */, 183,0, TRUE} // 181
334 , {doSetRange, 255, 155,0, TRUE} // 182
335 , {doSetOpError, 115 /* s */, 206,0, FALSE} // 183 set-lit-dash-escape
336 , {doSetOpError, 83 /* S */, 206,0, FALSE} // 184
337 , {doSetOpError, 119 /* w */, 206,0, FALSE} // 185
338 , {doSetOpError, 87 /* W */, 206,0, FALSE} // 186
339 , {doSetOpError, 100 /* d */, 206,0, FALSE} // 187
340 , {doSetOpError, 68 /* D */, 206,0, FALSE} // 188
341 , {doSetNamedRange, 78 /* N */, 155,0, FALSE} // 189
342 , {doSetRange, 255, 155,0, TRUE} // 190
343 , {doSetProp, 112 /* p */, 148,0, FALSE} // 191 set-escape
344 , {doSetProp, 80 /* P */, 148,0, FALSE} // 192
345 , {doSetNamedChar, 78 /* N */, 141,0, FALSE} // 193
346 , {doSetBackslash_s, 115 /* s */, 155,0, TRUE} // 194
347 , {doSetBackslash_S, 83 /* S */, 155,0, TRUE} // 195
348 , {doSetBackslash_w, 119 /* w */, 155,0, TRUE} // 196
349 , {doSetBackslash_W, 87 /* W */, 155,0, TRUE} // 197
350 , {doSetBackslash_d, 100 /* d */, 155,0, TRUE} // 198
351 , {doSetBackslash_D, 68 /* D */, 155,0, TRUE} // 199
352 , {doSetBackslash_h, 104 /* h */, 155,0, TRUE} // 200
353 , {doSetBackslash_H, 72 /* H */, 155,0, TRUE} // 201
354 , {doSetBackslash_v, 118 /* v */, 155,0, TRUE} // 202
355 , {doSetBackslash_V, 86 /* V */, 155,0, TRUE} // 203
356 , {doSetLiteralEscaped, 255, 141,0, TRUE} // 204
357 , {doSetFinish, 255, 14,0, FALSE} // 205 set-finish
358 , {doExit, 255, 206,0, TRUE} // 206 errorDeath
360 static const char * const RegexStateNames[] = { 0,
389 "open-paren-extended",
406 "open-paren-lookbehind",
543 "set-lit-dash-escape",