2 * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
3 * Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile, Inc.
5 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "RegExpCache.h"
28 #include "yarr/Yarr.h"
29 #include "yarr/YarrJIT.h"
33 #include <wtf/Assertions.h>
34 #include <wtf/OwnArrayPtr.h>
37 #define REGEXP_FUNC_TEST_DATA_GEN 0
41 const ClassInfo RegExp::s_info = { "RegExp", 0, 0, 0, CREATE_METHOD_TABLE(RegExp) };
43 RegExpFlags regExpFlags(const UString& string)
45 RegExpFlags flags = NoFlags;
47 for (unsigned i = 0; i < string.length(); ++i) {
50 if (flags & FlagGlobal)
52 flags = static_cast<RegExpFlags>(flags | FlagGlobal);
56 if (flags & FlagIgnoreCase)
58 flags = static_cast<RegExpFlags>(flags | FlagIgnoreCase);
62 if (flags & FlagMultiline)
64 flags = static_cast<RegExpFlags>(flags | FlagMultiline);
75 #if REGEXP_FUNC_TEST_DATA_GEN
76 class RegExpFunctionalTestCollector {
77 // This class is not thread safe.
79 static const char* const s_fileName;
82 static RegExpFunctionalTestCollector* get();
84 ~RegExpFunctionalTestCollector();
86 void outputOneTest(RegExp*, UString, int, int*, int);
87 void clearRegExp(RegExp* regExp)
89 if (regExp == m_lastRegExp)
94 RegExpFunctionalTestCollector();
96 void outputEscapedUString(const UString&, bool escapeSlash = false);
98 static RegExpFunctionalTestCollector* s_instance;
100 RegExp* m_lastRegExp;
103 const char* const RegExpFunctionalTestCollector::s_fileName = "/tmp/RegExpTestsData";
104 RegExpFunctionalTestCollector* RegExpFunctionalTestCollector::s_instance = 0;
106 RegExpFunctionalTestCollector* RegExpFunctionalTestCollector::get()
109 s_instance = new RegExpFunctionalTestCollector();
114 void RegExpFunctionalTestCollector::outputOneTest(RegExp* regExp, UString s, int startOffset, int* ovector, int result)
116 if ((!m_lastRegExp) || (m_lastRegExp != regExp)) {
117 m_lastRegExp = regExp;
119 outputEscapedUString(regExp->pattern(), true);
121 if (regExp->global())
123 if (regExp->ignoreCase())
125 if (regExp->multiline())
127 fprintf(m_file, "\n");
130 fprintf(m_file, " \"");
131 outputEscapedUString(s);
132 fprintf(m_file, "\", %d, %d, (", startOffset, result);
133 for (unsigned i = 0; i <= regExp->numSubpatterns(); i++) {
134 int subPatternBegin = ovector[i * 2];
135 int subPatternEnd = ovector[i * 2 + 1];
136 if (subPatternBegin == -1)
138 fprintf(m_file, "%d, %d", subPatternBegin, subPatternEnd);
139 if (i < regExp->numSubpatterns())
143 fprintf(m_file, ")\n");
147 RegExpFunctionalTestCollector::RegExpFunctionalTestCollector()
149 m_file = fopen(s_fileName, "r+");
151 m_file = fopen(s_fileName, "w+");
153 fseek(m_file, 0L, SEEK_END);
156 RegExpFunctionalTestCollector::~RegExpFunctionalTestCollector()
162 void RegExpFunctionalTestCollector::outputEscapedUString(const UString& s, bool escapeSlash)
164 int len = s.length();
166 for (int i = 0; i < len; ++i) {
171 fputs("\\0", m_file);
174 fputs("\\a", m_file);
177 fputs("\\b", m_file);
180 fputs("\\f", m_file);
183 fputs("\\n", m_file);
186 fputs("\\r", m_file);
189 fputs("\\t", m_file);
192 fputs("\\v", m_file);
196 fputs("\\/", m_file);
201 fputs("\\\"", m_file);
204 fputs("\\\\", m_file);
211 fprintf(m_file, "\\u%04x", c);
220 struct RegExpRepresentation {
222 Yarr::YarrCodeBlock m_regExpJITCode;
224 OwnPtr<Yarr::BytecodePattern> m_regExpBytecode;
227 RegExp::RegExp(JSGlobalData& globalData, const UString& patternString, RegExpFlags flags)
228 : JSCell(globalData, globalData.regExpStructure.get())
229 , m_state(NotCompiled)
230 , m_patternString(patternString)
232 , m_constructionError(0)
233 , m_numSubpatterns(0)
234 #if ENABLE(REGEXP_TRACING)
235 , m_rtMatchCallCount(0)
236 , m_rtMatchFoundCount(0)
241 void RegExp::finishCreation(JSGlobalData& globalData)
243 Base::finishCreation(globalData);
244 Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
245 if (m_constructionError)
246 m_state = ParseError;
248 m_numSubpatterns = pattern.m_numSubpatterns;
253 #if REGEXP_FUNC_TEST_DATA_GEN
254 RegExpFunctionalTestCollector::get()->clearRegExp(this);
258 RegExp* RegExp::createWithoutCaching(JSGlobalData& globalData, const UString& patternString, RegExpFlags flags)
260 RegExp* regExp = new (allocateCell<RegExp>(globalData.heap)) RegExp(globalData, patternString, flags);
261 regExp->finishCreation(globalData);
265 RegExp* RegExp::create(JSGlobalData& globalData, const UString& patternString, RegExpFlags flags)
267 return globalData.regExpCache()->lookupOrCreate(patternString, flags);
270 void RegExp::compile(JSGlobalData* globalData, Yarr::YarrCharSize charSize)
272 Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
273 if (m_constructionError) {
274 ASSERT_NOT_REACHED();
275 m_state = ParseError;
278 ASSERT(m_numSubpatterns == pattern.m_numSubpatterns);
280 if (!m_representation) {
281 ASSERT(m_state == NotCompiled);
282 m_representation = adoptPtr(new RegExpRepresentation);
283 globalData->regExpCache()->addToStrongCache(this);
288 if (!pattern.m_containsBackreferences && globalData->canUseJIT()) {
289 Yarr::jitCompile(pattern, charSize, globalData, m_representation->m_regExpJITCode);
290 #if ENABLE(YARR_JIT_DEBUG)
291 if (!m_representation->m_regExpJITCode.isFallBack())
296 if (!m_representation->m_regExpJITCode.isFallBack()) {
303 UNUSED_PARAM(charSize);
306 m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator);
309 void RegExp::compileIfNecessary(JSGlobalData& globalData, Yarr::YarrCharSize charSize)
311 // If the state is NotCompiled or ParseError, then there is no representation.
312 // If there is a representation, and the state must be either JITCode or ByteCode.
313 ASSERT(!!m_representation == (m_state == JITCode || m_state == ByteCode));
315 if (m_representation) {
317 if (m_state != JITCode)
319 if ((charSize == Yarr::Char8) && (m_representation->m_regExpJITCode.has8BitCode()))
321 if ((charSize == Yarr::Char16) && (m_representation->m_regExpJITCode.has16BitCode()))
328 compile(&globalData, charSize);
332 int RegExp::match(JSGlobalData& globalData, const UString& s, int startOffset, Vector<int, 32>* ovector)
337 #if ENABLE(REGEXP_TRACING)
338 m_rtMatchCallCount++;
341 if (static_cast<unsigned>(startOffset) > s.length() || s.isNull())
344 if (m_state != ParseError) {
345 compileIfNecessary(globalData, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
347 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
349 Vector<int, 32> nonReturnedOvector;
351 ovector->resize(offsetVectorSize);
352 offsetVector = ovector->data();
354 nonReturnedOvector.resize(offsetVectorSize);
355 offsetVector = nonReturnedOvector.data();
358 ASSERT(offsetVector);
359 // Initialize offsetVector with the return value (index 0) and the
360 // first subpattern start indicies (even index values) set to -1.
361 // No need to init the subpattern end indicies.
362 for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
363 offsetVector[j] = -1;
367 if (m_state == JITCode) {
369 result = Yarr::execute(m_representation->m_regExpJITCode, s.characters8(), startOffset, s.length(), offsetVector);
371 result = Yarr::execute(m_representation->m_regExpJITCode, s.characters16(), startOffset, s.length(), offsetVector);
372 #if ENABLE(YARR_JIT_DEBUG)
373 matchCompareWithInterpreter(s, startOffset, offsetVector, result);
377 result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s, startOffset, s.length(), offsetVector);
378 ASSERT(result >= -1);
380 #if REGEXP_FUNC_TEST_DATA_GEN
381 RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result);
384 #if ENABLE(REGEXP_TRACING)
386 m_rtMatchFoundCount++;
395 void RegExp::invalidateCode()
397 if (!m_representation)
399 m_state = NotCompiled;
400 m_representation.clear();
403 #if ENABLE(YARR_JIT_DEBUG)
404 void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* offsetVector, int jitResult)
406 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
407 Vector<int, 32> interpreterOvector;
408 interpreterOvector.resize(offsetVectorSize);
409 int* interpreterOffsetVector = interpreterOvector.data();
410 int interpreterResult = 0;
413 // Initialize interpreterOffsetVector with the return value (index 0) and the
414 // first subpattern start indicies (even index values) set to -1.
415 // No need to init the subpattern end indicies.
416 for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
417 interpreterOffsetVector[j] = -1;
419 interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s, startOffset, s.length(), interpreterOffsetVector);
421 if (jitResult != interpreterResult)
424 for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++)
425 if ((offsetVector[j] != interpreterOffsetVector[j])
426 || ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1])))
430 fprintf(stderr, "RegExp Discrepency for /%s/\n string input ", pattern().utf8().data());
431 unsigned segmentLen = s.length() - static_cast<unsigned>(startOffset);
433 fprintf(stderr, (segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset);
435 if (jitResult != interpreterResult) {
436 fprintf(stderr, " JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult);
439 fprintf(stderr, " Correct result = %d\n", jitResult);
443 for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) {
444 if (offsetVector[j] != interpreterOffsetVector[j])
445 fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]);
446 if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1]))
447 fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]);
454 #if ENABLE(REGEXP_TRACING)
455 void RegExp::printTraceData()
457 char formattedPattern[41];
460 strncpy(rawPattern, pattern().utf8().data(), 40);
461 rawPattern[40]= '\0';
463 int pattLen = strlen(rawPattern);
465 snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern);
468 Yarr::YarrCodeBlock& codeBlock = m_representation->m_regExpJITCode;
470 const size_t jitAddrSize = 20;
471 char jitAddr[jitAddrSize];
472 if (m_state == JITCode)
473 snprintf(jitAddr, jitAddrSize, "fallback");
475 snprintf(jitAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.getAddr()));
477 const char* jitAddr = "JIT Off";
480 printf("%-40.40s %16.16s %10d %10d\n", formattedPattern, jitAddr, m_rtMatchCallCount, m_rtMatchFoundCount);