2 * Copyright (C) 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef RegexPattern_h
27 #define RegexPattern_h
30 #include "yarr/jswtfbridge.h"
31 #include "yarr/yarr/RegexCommon.h"
34 namespace JSC { namespace Yarr {
36 #define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers.
37 #define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers.
38 #define RegexStackSpaceForBackTrackInfoBackReference 2
39 #define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
40 #define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1
41 #define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
42 #define RegexStackSpaceForBackTrackInfoParenthesesTerminal 1
43 #define RegexStackSpaceForBackTrackInfoParentheses 4
45 struct PatternDisjunction;
47 struct CharacterRange {
51 CharacterRange(UChar begin, UChar end)
59 * Wraps a table and indicates inversion. Can be efficiently borrowed
60 * between character classes, so it's refcounted.
62 struct CharacterClassTable {
65 jsrefcount m_refcount;
67 /* Ownership transferred to caller. */
68 static CharacterClassTable *create(const char* table, bool inverted)
70 // FIXME: bug 574459 -- no NULL checks done by any of the callers, all
71 // of which are in RegExpJitTables.h.
72 /* We can't (easily) use js_new() here because the constructor is private. */
73 void *memory = js_malloc(sizeof(CharacterClassTable));
74 return memory ? new(memory) CharacterClassTable(table, inverted) : NULL;
77 void incref() { JS_ATOMIC_INCREMENT(&m_refcount); }
78 void decref() { if (JS_ATOMIC_DECREMENT(&m_refcount) == 0) js_delete(this); }
81 CharacterClassTable(const char* table, bool inverted)
83 , m_inverted(inverted)
89 struct CharacterClass {
90 // All CharacterClass instances have to have the full set of matches and ranges,
91 // they may have an optional table for faster lookups (which must match the
92 // specified matches and ranges)
93 CharacterClass(CharacterClassTable *table)
104 typedef js::Vector<UChar, 0, js::SystemAllocPolicy> UChars;
105 typedef js::Vector<CharacterRange, 0, js::SystemAllocPolicy> CharacterRanges;
107 CharacterRanges m_ranges;
108 UChars m_matchesUnicode;
109 CharacterRanges m_rangesUnicode;
110 CharacterClassTable *m_table;
113 enum QuantifierType {
114 QuantifierFixedCount,
123 TypeAssertionWordBoundary,
124 TypePatternCharacter,
127 TypeForwardReference,
128 TypeParenthesesSubpattern,
129 TypeParentheticalAssertion
131 bool invertOrCapture;
133 UChar patternCharacter;
134 CharacterClass* characterClass;
135 unsigned subpatternId;
137 PatternDisjunction* disjunction;
138 unsigned subpatternId;
139 unsigned lastSubpatternId;
144 QuantifierType quantityType;
145 unsigned quantityCount;
147 unsigned frameLocation;
149 PatternTerm(UChar ch)
150 : type(PatternTerm::TypePatternCharacter)
152 patternCharacter = ch;
153 quantityType = QuantifierFixedCount;
157 PatternTerm(CharacterClass* charClass, bool invert)
158 : type(PatternTerm::TypeCharacterClass)
159 , invertOrCapture(invert)
161 characterClass = charClass;
162 quantityType = QuantifierFixedCount;
166 PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool invertOrCapture)
168 , invertOrCapture(invertOrCapture)
170 parentheses.disjunction = disjunction;
171 parentheses.subpatternId = subpatternId;
172 parentheses.isCopy = false;
173 parentheses.isTerminal = false;
174 quantityType = QuantifierFixedCount;
178 PatternTerm(Type type, bool invert = false)
180 , invertOrCapture(invert)
182 quantityType = QuantifierFixedCount;
186 PatternTerm(unsigned spatternId)
187 : type(TypeBackReference)
188 , invertOrCapture(false)
190 subpatternId = spatternId;
191 quantityType = QuantifierFixedCount;
195 static PatternTerm ForwardReference()
197 return PatternTerm(TypeForwardReference);
200 static PatternTerm BOL()
202 return PatternTerm(TypeAssertionBOL);
205 static PatternTerm EOL()
207 return PatternTerm(TypeAssertionEOL);
210 static PatternTerm WordBoundary(bool invert)
212 return PatternTerm(TypeAssertionWordBoundary, invert);
217 return invertOrCapture;
222 return invertOrCapture;
225 void quantify(unsigned count, QuantifierType type)
227 quantityCount = count;
232 struct PatternAlternative {
233 PatternAlternative(PatternDisjunction* disjunction)
234 : m_parent(disjunction)
235 , m_onceThrough(false)
236 , m_hasFixedSize(false)
237 , m_startsWithBOL(false)
238 , m_containsBOL(false)
242 PatternTerm& lastTerm()
244 JS_ASSERT(m_terms.length());
245 return m_terms[m_terms.length() - 1];
248 void removeLastTerm()
250 JS_ASSERT(m_terms.length());
254 void setOnceThrough()
256 m_onceThrough = true;
261 return m_onceThrough;
264 js::Vector<PatternTerm, 0, js::SystemAllocPolicy> m_terms;
265 PatternDisjunction* m_parent;
266 unsigned m_minimumSize;
267 bool m_onceThrough : 1;
268 bool m_hasFixedSize : 1;
269 bool m_startsWithBOL : 1;
270 bool m_containsBOL : 1;
273 template<typename T, size_t N, class AP>
275 deleteAllValues(js::Vector<T*,N,AP> &vector)
277 for (T** t = vector.begin(); t < vector.end(); ++t)
281 struct PatternDisjunction {
282 PatternDisjunction(PatternAlternative* parent = 0)
284 , m_hasFixedSize(false)
288 ~PatternDisjunction()
290 deleteAllValues(m_alternatives);
293 PatternAlternative* addNewAlternative()
295 // FIXME: bug 574459 -- no NULL check
296 PatternAlternative* alternative = js_new<PatternAlternative>(this);
297 m_alternatives.append(alternative);
301 js::Vector<PatternAlternative*, 0, js::SystemAllocPolicy> m_alternatives;
302 PatternAlternative* m_parent;
303 unsigned m_minimumSize;
304 unsigned m_callFrameSize;
308 // You probably don't want to be calling these functions directly
309 // (please to be calling newlineCharacterClass() et al on your
310 // friendly neighborhood RegexPattern instance to get nicely
312 CharacterClass* newlineCreate();
313 CharacterClass* digitsCreate();
314 CharacterClass* spacesCreate();
315 CharacterClass* wordcharCreate();
316 CharacterClass* nondigitsCreate();
317 CharacterClass* nonspacesCreate();
318 CharacterClass* nonwordcharCreate();
320 struct RegexPattern {
321 RegexPattern(bool ignoreCase, bool multiline)
322 : m_ignoreCase(ignoreCase)
323 , m_multiline(multiline)
324 , m_containsBackreferences(false)
325 , m_containsBOL(false)
326 , m_numSubpatterns(0)
327 , m_maxBackReference(0)
334 , nonwordcharCached(0)
340 deleteAllValues(m_disjunctions);
341 deleteAllValues(m_userCharacterClasses);
346 m_numSubpatterns = 0;
347 m_maxBackReference = 0;
349 m_containsBackreferences = false;
350 m_containsBOL = false;
358 nonwordcharCached = 0;
360 deleteAllValues(m_disjunctions);
361 m_disjunctions.clear();
362 deleteAllValues(m_userCharacterClasses);
363 m_userCharacterClasses.clear();
366 bool containsIllegalBackReference()
368 return m_maxBackReference > m_numSubpatterns;
371 CharacterClass* newlineCharacterClass()
374 m_userCharacterClasses.append(newlineCached = newlineCreate());
375 return newlineCached;
377 CharacterClass* digitsCharacterClass()
380 m_userCharacterClasses.append(digitsCached = digitsCreate());
383 CharacterClass* spacesCharacterClass()
386 m_userCharacterClasses.append(spacesCached = spacesCreate());
389 CharacterClass* wordcharCharacterClass()
392 m_userCharacterClasses.append(wordcharCached = wordcharCreate());
393 return wordcharCached;
395 CharacterClass* nondigitsCharacterClass()
397 if (!nondigitsCached)
398 m_userCharacterClasses.append(nondigitsCached = nondigitsCreate());
399 return nondigitsCached;
401 CharacterClass* nonspacesCharacterClass()
403 if (!nonspacesCached)
404 m_userCharacterClasses.append(nonspacesCached = nonspacesCreate());
405 return nonspacesCached;
407 CharacterClass* nonwordcharCharacterClass()
409 if (!nonwordcharCached)
410 m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate());
411 return nonwordcharCached;
414 typedef js::Vector<PatternDisjunction*, 4, js::SystemAllocPolicy> PatternDisjunctions;
415 typedef js::Vector<CharacterClass*, 0, js::SystemAllocPolicy> CharacterClasses;
416 bool m_ignoreCase : 1;
417 bool m_multiline : 1;
418 bool m_containsBackreferences : 1;
419 bool m_containsBOL : 1;
420 unsigned m_numSubpatterns;
421 unsigned m_maxBackReference;
422 PatternDisjunction *m_body;
423 PatternDisjunctions m_disjunctions;
424 CharacterClasses m_userCharacterClasses;
427 CharacterClass* newlineCached;
428 CharacterClass* digitsCached;
429 CharacterClass* spacesCached;
430 CharacterClass* wordcharCached;
431 CharacterClass* nondigitsCached;
432 CharacterClass* nonspacesCached;
433 CharacterClass* nonwordcharCached;
436 } } // namespace JSC::Yarr
438 #endif // RegexPattern_h