QRegularExpression: improve JIT memory handling
authorGiuseppe D'Angelo <dangelog@gmail.com>
Thu, 23 Feb 2012 02:53:08 +0000 (02:53 +0000)
committerQt by Nokia <qt-info@nokia.com>
Thu, 8 Mar 2012 21:00:47 +0000 (22:00 +0100)
PCRE's JIT uses by default 32K on the pcre_exec caller's stack. This
is fine for most situations, but in some cases (esp. patterns with
lot of recursion) more memory is required.

Therefore, if a match execution fails due to exhausting JIT memory,
we let PCRE allocate up to 512KB to be used for the JIT's stack.

The pointer to the allocated memory is put in thread local storage
(so it can be reused from the same thread, if needed, and automatically
goes away when the thread dies).

Change-Id: Ica5fb7d517068befff88ebb198a603a26ec5d8a7
Reviewed-by: Bradley T. Hughes <bradley.hughes@nokia.com>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
src/corelib/tools/qregularexpression.cpp
tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.cpp
tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.h

index 0fa7d64..5deb485 100644 (file)
@@ -47,6 +47,8 @@
 #include <QtCore/qvector.h>
 #include <QtCore/qstringlist.h>
 #include <QtCore/qdebug.h>
+#include <QtCore/qthreadstorage.h>
+#include <QtCore/qglobal.h>
 
 #include <pcre.h>
 
@@ -989,6 +991,47 @@ void QRegularExpressionPrivate::getPatternInfo()
             (patternNewlineSetting == PCRE_NEWLINE_ANYCRLF);
 }
 
+
+/*!
+    \class QPcreJitStackPointer
+    \internal
+
+    Simple "smartpointer" wrapper around a pcre_jit_stack, to be used with
+    QThreadStorage.
+*/
+class QPcreJitStackPointer
+{
+    Q_DISABLE_COPY(QPcreJitStackPointer);
+
+public:
+    QPcreJitStackPointer()
+    {
+        // The default JIT stack size in PCRE is 32K,
+        // we allocate from 32K up to 512K.
+        stack = pcre16_jit_stack_alloc(32*1024, 512*1024);
+    }
+    ~QPcreJitStackPointer()
+    {
+        if (stack)
+            pcre16_jit_stack_free(stack);
+    }
+
+    pcre16_jit_stack *stack;
+};
+
+Q_GLOBAL_STATIC(QThreadStorage<QPcreJitStackPointer *>, jitStacks)
+
+/*!
+    \internal
+*/
+static pcre16_jit_stack *qtPcreCallback(void *)
+{
+    if (jitStacks()->hasLocalData())
+        return jitStacks()->localData()->stack;
+
+    return 0;
+}
+
 /*!
     \internal
 */
@@ -1044,6 +1087,9 @@ pcre16_extra *QRegularExpressionPrivate::optimizePattern()
     const char *err;
     studyData = pcre16_study(compiledPattern, studyOptions, &err);
 
+    if (studyData && studyData->flags & PCRE_EXTRA_EXECUTABLE_JIT)
+        pcre16_assign_jit_stack(studyData, qtPcreCallback, 0);
+
     if (!studyData && err)
         qWarning("QRegularExpressionPrivate::optimizePattern(): pcre_study failed: %s", err);
 
@@ -1070,6 +1116,32 @@ int QRegularExpressionPrivate::captureIndexForName(const QString &name) const
 /*!
     \internal
 
+    This is a simple wrapper for pcre16_exec for handling the case in which the
+    JIT runs out of memory. In that case, we allocate a thread-local JIT stack
+    and re-run pcre16_exec.
+*/
+static int pcre16SafeExec(const pcre16 *code, const pcre16_extra *extra,
+                          const unsigned short *subject, int length,
+                          int startOffset, int options,
+                          int *ovector, int ovecsize)
+{
+    int result = pcre16_exec(code, extra, subject, length,
+                             startOffset, options, ovector, ovecsize);
+
+    if (result == PCRE_ERROR_JIT_STACKLIMIT && !jitStacks()->hasLocalData()) {
+        QPcreJitStackPointer *p = new QPcreJitStackPointer;
+        jitStacks()->setLocalData(p);
+
+        result = pcre16_exec(code, extra, subject, length,
+                             startOffset, options, ovector, ovecsize);
+    }
+
+    return result;
+}
+
+/*!
+    \internal
+
     Performs a match of type \a matchType on the given \a subject string with
     options \a matchOptions and returns the QRegularExpressionMatchPrivate of
     the result. It also advances a match if a previous result is given as \a
@@ -1134,15 +1206,15 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString
     int result;
 
     if (!previousMatchWasEmpty) {
-        result = pcre16_exec(compiledPattern, currentStudyData,
-                             subjectUtf16, subjectLength,
-                             offset, pcreOptions,
-                             captureOffsets, captureOffsetsCount);
+        result = pcre16SafeExec(compiledPattern, currentStudyData,
+                                subjectUtf16, subjectLength,
+                                offset, pcreOptions,
+                                captureOffsets, captureOffsetsCount);
     } else {
-        result = pcre16_exec(compiledPattern, currentStudyData,
-                             subjectUtf16, subjectLength,
-                             offset, pcreOptions | PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED,
-                             captureOffsets, captureOffsetsCount);
+        result = pcre16SafeExec(compiledPattern, currentStudyData,
+                                subjectUtf16, subjectLength,
+                                offset, pcreOptions | PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED,
+                                captureOffsets, captureOffsetsCount);
 
         if (result == PCRE_ERROR_NOMATCH) {
             ++offset;
@@ -1157,10 +1229,10 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString
                 ++offset;
             }
 
-            result = pcre16_exec(compiledPattern, currentStudyData,
-                                 subjectUtf16, subjectLength,
-                                 offset, pcreOptions,
-                                 captureOffsets, captureOffsetsCount);
+            result = pcre16SafeExec(compiledPattern, currentStudyData,
+                                    subjectUtf16, subjectLength,
+                                    offset, pcreOptions,
+                                    captureOffsets, captureOffsetsCount);
         }
     }
 
index 72157c0..38b82ec 100644 (file)
@@ -1196,3 +1196,30 @@ void tst_QRegularExpression::captureCount()
     if (!re.isValid())
         QCOMPARE(re.captureCount(), -1);
 }
+
+void tst_QRegularExpression::pcreJitStackUsage_data()
+{
+    QTest::addColumn<QString>("pattern");
+    QTest::addColumn<QString>("subject");
+    // these patterns cause enough backtrack (or even infinite recursion)
+    // in the regexp engine, so that JIT requests more memory.
+    QTest::newRow("jitstack01") << "(?(R)a*(?1)|((?R))b)" << "aaaabcde";
+    QTest::newRow("jitstack02") << "(?(R)a*(?1)|((?R))b)" << "aaaaaaabcde";
+}
+
+void tst_QRegularExpression::pcreJitStackUsage()
+{
+    QFETCH(QString, pattern);
+    QFETCH(QString, subject);
+
+    QRegularExpression re(pattern);
+    QVERIFY(re.isValid());
+    QRegularExpressionMatch match = re.match(subject);
+    consistencyCheck(match);
+    QRegularExpressionMatchIterator iterator = re.globalMatch(subject);
+    consistencyCheck(iterator);
+    while (iterator.hasNext()) {
+        match = iterator.next();
+        consistencyCheck(match);
+    }
+}
index 1a703a8..fd8bdfa 100644 (file)
@@ -71,6 +71,8 @@ private slots:
     void operatoreq();
     void captureCount_data();
     void captureCount();
+    void pcreJitStackUsage_data();
+    void pcreJitStackUsage();
 
 private:
     void provideRegularExpressions();