From: Giuseppe D'Angelo Date: Mon, 17 Aug 2015 09:28:16 +0000 (+0200) Subject: QRegularExpression: fix matching over QStringRefs X-Git-Tag: v5.5.90+alpha1~18^2^2~15 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ee15bef3ea24a78e5fd4708c0a1cebee2e4fac5d;p=platform%2Fupstream%2Fqtbase.git QRegularExpression: fix matching over QStringRefs Playing with the offset argument of pcre_exec is not equivalent to adjusting the pointer to the subject string. In particular, PCRE can go behind the offset to check for lookbehinds or "transition" metacharacters (\b, \B, etc.). This made the code that deals with QStringRefs not matching in behavior with the corresponding code dealing with QStrings. For instance, QString subject("Miss"); QRegularExpression re("(?<=M)iss"); re.match(subject.mid(1)); // doesn't match re.match(subject.midRef(1)); // matches!!! Instead, actually adjust the pointer to the subject string so that the behavior is identical. A broken test that relied on the equivalence is also removed. Change-Id: If96333241ef59621d7f5a6a170ebd0a186844874 Reviewed-by: Volker Krause Reviewed-by: Thiago Macieira --- diff --git a/src/corelib/tools/qregularexpression.cpp b/src/corelib/tools/qregularexpression.cpp index 2e3c2ca79f..070e68154f 100644 --- a/src/corelib/tools/qregularexpression.cpp +++ b/src/corelib/tools/qregularexpression.cpp @@ -1,7 +1,7 @@ /**************************************************************************** ** -** Copyright (C) 2012 Giuseppe D'Angelo . -** Copyright (C) 2012 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo +** Copyright (C) 2015 Giuseppe D'Angelo . +** Copyright (C) 2015 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo ** Copyright (C) 2015 The Qt Company Ltd. ** Contact: http://www.qt.io/licensing/ ** @@ -1325,48 +1325,45 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString int * const captureOffsets = priv->capturedOffsets.data(); const int captureOffsetsCount = priv->capturedOffsets.size(); - int realOffset = offset + subjectStart; - const int realSubjectLength = subjectLength + subjectStart; - - const unsigned short * const subjectUtf16 = subject.utf16(); + const unsigned short * const subjectUtf16 = subject.utf16() + subjectStart; int result; if (!previousMatchWasEmpty) { result = pcre16SafeExec(compiledPattern, currentStudyData, - subjectUtf16, realSubjectLength, - realOffset, pcreOptions, + subjectUtf16, subjectLength, + offset, pcreOptions, captureOffsets, captureOffsetsCount); } else { result = pcre16SafeExec(compiledPattern, currentStudyData, - subjectUtf16, realSubjectLength, - realOffset, pcreOptions | PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED, + subjectUtf16, subjectLength, + offset, pcreOptions | PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED, captureOffsets, captureOffsetsCount); if (result == PCRE_ERROR_NOMATCH) { - ++realOffset; + ++offset; if (usingCrLfNewlines - && realOffset < realSubjectLength - && subjectUtf16[realOffset - 1] == QLatin1Char('\r') - && subjectUtf16[realOffset] == QLatin1Char('\n')) { - ++realOffset; - } else if (realOffset < realSubjectLength - && QChar::isLowSurrogate(subjectUtf16[realOffset])) { - ++realOffset; + && offset < subjectLength + && subjectUtf16[offset - 1] == QLatin1Char('\r') + && subjectUtf16[offset] == QLatin1Char('\n')) { + ++offset; + } else if (offset < subjectLength + && QChar::isLowSurrogate(subjectUtf16[offset])) { + ++offset; } result = pcre16SafeExec(compiledPattern, currentStudyData, - subjectUtf16, realSubjectLength, - realOffset, pcreOptions, + subjectUtf16, subjectLength, + offset, pcreOptions, captureOffsets, captureOffsetsCount); } } #ifdef QREGULAREXPRESSION_DEBUG qDebug() << "Matching" << pattern << "against" << subject - << "starting at" << subjectStart << "len" << subjectLength << "real len" << realSubjectLength - << "offset" << offset << "real offset" << realOffset + << "starting at" << subjectStart << "len" << subjectLength + << "offset" << offset << matchType << matchOptions << previousMatchWasEmpty << "result" << result; #endif @@ -2041,7 +2038,7 @@ QString QRegularExpressionMatch::captured(int nth) const if (start == -1) // didn't capture return QString(); - return d->subject.mid(start, capturedLength(nth)); + return d->subject.mid(start + d->subjectStart, capturedLength(nth)); } /*! @@ -2062,7 +2059,7 @@ QStringRef QRegularExpressionMatch::capturedRef(int nth) const if (start == -1) // didn't capture return QStringRef(); - return d->subject.midRef(start, capturedLength(nth)); + return d->subject.midRef(start + d->subjectStart, capturedLength(nth)); } /*! diff --git a/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.cpp b/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.cpp index 99f6a31267..d185e64251 100644 --- a/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.cpp +++ b/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.cpp @@ -1,7 +1,7 @@ /**************************************************************************** ** -** Copyright (C) 2012 Giuseppe D'Angelo . -** Copyright (C) 2013 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo +** Copyright (C) 2015 Giuseppe D'Angelo . +** Copyright (C) 2015 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo ** Contact: http://www.qt.io/licensing/ ** ** This file is part of the test suite of the Qt Toolkit. @@ -306,19 +306,6 @@ static void testMatch(const QRegularExpression ®exp, matchType, matchOptions, result); - - // offset <= 0 tested above; now also test stringrefs not spanning over - // the entire subject. Note that the offset can be negative, hence the above - // tests can't be merged into this one - for (int i = 1; i <= offset; ++i) { - testMatchImpl(regexp, - matchingMethodForStringRef, - QStringRef(&subject, i, subject.length() - i), - offset - i, - matchType, - matchOptions, - result); - } } typedef QRegularExpressionMatch (QRegularExpression::*QREMatchStringPMF)(const QString &, int, QRegularExpression::MatchType, QRegularExpression::MatchOptions) const; @@ -736,6 +723,18 @@ void tst_QRegularExpression::normalMatch_data() // *** + m.clear(); + m.isValid = true; m.hasMatch = true; + m.captured << "bcd"; + QTest::newRow("match12") + << QRegularExpression("\\Bbcd\\B") + << "abcde" + << 1 + << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption) + << m; + + // *** + m.clear(); m.isValid = true; QTest::newRow("nomatch01") << QRegularExpression("\\d+") @@ -1705,3 +1704,360 @@ void tst_QRegularExpression::JOptionUsage() re.optimize(); QCOMPARE(re.isValid(), isValid); } + +void tst_QRegularExpression::QStringAndQStringRefEquivalence() +{ + const QString subject = QStringLiteral("Mississippi"); + { + const QRegularExpression re("\\Biss\\B"); + QVERIFY(re.isValid()); + { + const QRegularExpressionMatch match = re.match(subject); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 1); + QCOMPARE(match.capturedEnd(), 4); + } + { + const QRegularExpressionMatch match = re.match(QStringRef(&subject)); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 1); + QCOMPARE(match.capturedEnd(), 4); + } + { + const QRegularExpressionMatch match = re.match(subject, 1); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 1); + QCOMPARE(match.capturedEnd(), 4); + } + { + const QRegularExpressionMatch match = re.match(QStringRef(&subject), 1); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 1); + QCOMPARE(match.capturedEnd(), 4); + } + { + const QRegularExpressionMatch match = re.match(subject.mid(1)); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 3); + QCOMPARE(match.capturedEnd(), 6); + } + { + const QRegularExpressionMatch match = re.match(subject.midRef(1)); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 3); + QCOMPARE(match.capturedEnd(), 6); + } + { + const QRegularExpressionMatch match = re.match(subject.mid(1), 1); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 3); + QCOMPARE(match.capturedEnd(), 6); + } + { + const QRegularExpressionMatch match = re.match(subject.midRef(1), 1); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 3); + QCOMPARE(match.capturedEnd(), 6); + } + { + const QRegularExpressionMatch match = re.match(subject, 4); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 4); + QCOMPARE(match.capturedEnd(), 7); + } + { + const QRegularExpressionMatch match = re.match(QStringRef(&subject), 4); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 4); + QCOMPARE(match.capturedEnd(), 7); + } + { + const QRegularExpressionMatch match = re.match(subject.mid(4)); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(!match.hasMatch()); + } + { + const QRegularExpressionMatch match = re.match(subject.midRef(4)); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(!match.hasMatch()); + } + + { + QRegularExpressionMatchIterator i = re.globalMatch(subject); + QVERIFY(i.isValid()); + + consistencyCheck(i); + QVERIFY(i.hasNext()); + const QRegularExpressionMatch match1 = i.next(); + consistencyCheck(match1); + QVERIFY(match1.isValid()); + QVERIFY(match1.hasMatch()); + QCOMPARE(match1.captured(), QStringLiteral("iss")); + QCOMPARE(match1.capturedStart(), 1); + QCOMPARE(match1.capturedEnd(), 4); + + consistencyCheck(i); + QVERIFY(i.hasNext()); + const QRegularExpressionMatch match2 = i.next(); + consistencyCheck(match2); + QVERIFY(match2.isValid()); + QVERIFY(match2.hasMatch()); + QCOMPARE(match2.captured(), QStringLiteral("iss")); + QCOMPARE(match2.capturedStart(), 4); + QCOMPARE(match2.capturedEnd(), 7); + + QVERIFY(!i.hasNext()); + } + { + QRegularExpressionMatchIterator i = re.globalMatch(QStringRef(&subject)); + QVERIFY(i.isValid()); + + consistencyCheck(i); + QVERIFY(i.hasNext()); + const QRegularExpressionMatch match1 = i.next(); + consistencyCheck(match1); + QVERIFY(match1.isValid()); + QVERIFY(match1.hasMatch()); + QCOMPARE(match1.captured(), QStringLiteral("iss")); + QCOMPARE(match1.capturedStart(), 1); + QCOMPARE(match1.capturedEnd(), 4); + + consistencyCheck(i); + QVERIFY(i.hasNext()); + const QRegularExpressionMatch match2 = i.next(); + consistencyCheck(match2); + QVERIFY(match2.isValid()); + QVERIFY(match2.hasMatch()); + QCOMPARE(match2.captured(), QStringLiteral("iss")); + QCOMPARE(match2.capturedStart(), 4); + QCOMPARE(match2.capturedEnd(), 7); + + QVERIFY(!i.hasNext()); + } + { + QRegularExpressionMatchIterator i = re.globalMatch(subject, 1); + QVERIFY(i.isValid()); + + consistencyCheck(i); + QVERIFY(i.hasNext()); + const QRegularExpressionMatch match1 = i.next(); + consistencyCheck(match1); + QVERIFY(match1.isValid()); + QVERIFY(match1.hasMatch()); + QCOMPARE(match1.captured(), QStringLiteral("iss")); + QCOMPARE(match1.capturedStart(), 1); + QCOMPARE(match1.capturedEnd(), 4); + + consistencyCheck(i); + QVERIFY(i.hasNext()); + const QRegularExpressionMatch match2 = i.next(); + consistencyCheck(match2); + QVERIFY(match2.isValid()); + QVERIFY(match2.hasMatch()); + QCOMPARE(match2.captured(), QStringLiteral("iss")); + QCOMPARE(match2.capturedStart(), 4); + QCOMPARE(match2.capturedEnd(), 7); + + QVERIFY(!i.hasNext()); + } + { + QRegularExpressionMatchIterator i = re.globalMatch(QStringRef(&subject), 1); + QVERIFY(i.isValid()); + + consistencyCheck(i); + QVERIFY(i.hasNext()); + const QRegularExpressionMatch match1 = i.next(); + consistencyCheck(match1); + QVERIFY(match1.isValid()); + QVERIFY(match1.hasMatch()); + QCOMPARE(match1.captured(), QStringLiteral("iss")); + QCOMPARE(match1.capturedStart(), 1); + QCOMPARE(match1.capturedEnd(), 4); + + consistencyCheck(i); + QVERIFY(i.hasNext()); + const QRegularExpressionMatch match2 = i.next(); + consistencyCheck(match2); + QVERIFY(match2.isValid()); + QVERIFY(match2.hasMatch()); + QCOMPARE(match2.captured(), QStringLiteral("iss")); + QCOMPARE(match2.capturedStart(), 4); + QCOMPARE(match2.capturedEnd(), 7); + + QVERIFY(!i.hasNext()); + } + { + QRegularExpressionMatchIterator i = re.globalMatch(subject.mid(1)); + QVERIFY(i.isValid()); + + consistencyCheck(i); + QVERIFY(i.hasNext()); + const QRegularExpressionMatch match = i.next(); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 3); + QCOMPARE(match.capturedEnd(), 6); + + QVERIFY(!i.hasNext()); + } + { + QRegularExpressionMatchIterator i = re.globalMatch(subject.midRef(1)); + QVERIFY(i.isValid()); + + consistencyCheck(i); + QVERIFY(i.hasNext()); + const QRegularExpressionMatch match = i.next(); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 3); + QCOMPARE(match.capturedEnd(), 6); + + QVERIFY(!i.hasNext()); + } + { + QRegularExpressionMatchIterator i = re.globalMatch(subject.mid(1), 1); + QVERIFY(i.isValid()); + + consistencyCheck(i); + QVERIFY(i.hasNext()); + const QRegularExpressionMatch match = i.next(); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 3); + QCOMPARE(match.capturedEnd(), 6); + + QVERIFY(!i.hasNext()); + } + { + QRegularExpressionMatchIterator i = re.globalMatch(subject.midRef(1), 1); + QVERIFY(i.isValid()); + + consistencyCheck(i); + QVERIFY(i.hasNext()); + const QRegularExpressionMatch match = i.next(); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 3); + QCOMPARE(match.capturedEnd(), 6); + + QVERIFY(!i.hasNext()); + } + { + QRegularExpressionMatchIterator i = re.globalMatch(subject.mid(1), 1); + QVERIFY(i.isValid()); + + consistencyCheck(i); + QVERIFY(i.hasNext()); + const QRegularExpressionMatch match = i.next(); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 3); + QCOMPARE(match.capturedEnd(), 6); + + QVERIFY(!i.hasNext()); + } + { + QRegularExpressionMatchIterator i = re.globalMatch(subject.midRef(1), 1); + QVERIFY(i.isValid()); + + consistencyCheck(i); + QVERIFY(i.hasNext()); + const QRegularExpressionMatch match = i.next(); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 3); + QCOMPARE(match.capturedEnd(), 6); + + QVERIFY(!i.hasNext()); + } + + { + QRegularExpressionMatchIterator i = re.globalMatch(subject, 4); + QVERIFY(i.isValid()); + + consistencyCheck(i); + QVERIFY(i.hasNext()); + const QRegularExpressionMatch match = i.next(); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 4); + QCOMPARE(match.capturedEnd(), 7); + + QVERIFY(!i.hasNext()); + } + { + QRegularExpressionMatchIterator i = re.globalMatch(QStringRef(&subject), 4); + QVERIFY(i.isValid()); + + consistencyCheck(i); + QVERIFY(i.hasNext()); + const QRegularExpressionMatch match = i.next(); + consistencyCheck(match); + QVERIFY(match.isValid()); + QVERIFY(match.hasMatch()); + QCOMPARE(match.captured(), QStringLiteral("iss")); + QCOMPARE(match.capturedStart(), 4); + QCOMPARE(match.capturedEnd(), 7); + + QVERIFY(!i.hasNext()); + } + { + QRegularExpressionMatchIterator i = re.globalMatch(subject.mid(4)); + consistencyCheck(i); + QVERIFY(i.isValid()); + QVERIFY(!i.hasNext()); + } + { + QRegularExpressionMatchIterator i = re.globalMatch(subject.midRef(4)); + consistencyCheck(i); + QVERIFY(i.isValid()); + QVERIFY(!i.hasNext()); + } + } +} diff --git a/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.h b/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.h index 578a4129ec..aed262d15d 100644 --- a/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.h +++ b/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.h @@ -72,6 +72,7 @@ private slots: void regularExpressionMatch(); void JOptionUsage_data(); void JOptionUsage(); + void QStringAndQStringRefEquivalence(); private: void provideRegularExpressions();