1 /****************************************************************************
3 ** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/
6 ** This file is part of the QtCore module of the Qt Toolkit.
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** GNU Lesser General Public License Usage
10 ** This file may be used under the terms of the GNU Lesser General Public
11 ** License version 2.1 as published by the Free Software Foundation and
12 ** appearing in the file LICENSE.LGPL included in the packaging of this
13 ** file. Please review the following information to ensure the GNU Lesser
14 ** General Public License version 2.1 requirements will be met:
15 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
17 ** In addition, as a special exception, Nokia gives you certain additional
18 ** rights. These rights are described in the Nokia Qt LGPL Exception
19 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
21 ** GNU General Public License Usage
22 ** Alternatively, this file may be used under the terms of the GNU General
23 ** Public License version 3.0 as published by the Free Software Foundation
24 ** and appearing in the file LICENSE.GPL included in the packaging of this
25 ** file. Please review the following information to ensure the GNU General
26 ** Public License version 3.0 requirements will be met:
27 ** http://www.gnu.org/copyleft/gpl.html.
30 ** Alternatively, this file may be used in accordance with the terms and
31 ** conditions contained in a signed written agreement between you and Nokia.
40 ****************************************************************************/
42 #include "qstringmatcher.h"
46 static void bm_init_skiptable(const ushort *uc, int len, uchar *skiptable, Qt::CaseSensitivity cs)
48 int l = qMin(len, 255);
49 memset(skiptable, l, 256*sizeof(uchar));
51 if (cs == Qt::CaseSensitive) {
53 skiptable[*uc & 0xff] = l;
57 const ushort *start = uc;
59 skiptable[foldCase(uc, start) & 0xff] = l;
65 static inline int bm_find(const ushort *uc, uint l, int index, const ushort *puc, uint pl,
66 const uchar *skiptable, Qt::CaseSensitivity cs)
69 return index > (int)l ? -1 : index;
70 const uint pl_minus_one = pl - 1;
72 register const ushort *current = uc + index + pl_minus_one;
73 const ushort *end = uc + l;
74 if (cs == Qt::CaseSensitive) {
75 while (current < end) {
76 uint skip = skiptable[*current & 0xff];
80 if (*(current - skip) != puc[pl_minus_one-skip])
84 if (skip > pl_minus_one) // we have a match
85 return (current - uc) - pl_minus_one;
87 // in case we don't have a match we are a bit inefficient as we only skip by one
88 // when we have the non matching char in the string.
89 if (skiptable[*(current - skip) & 0xff] == pl)
94 if (current > end - skip)
99 while (current < end) {
100 uint skip = skiptable[foldCase(current, uc) & 0xff];
104 if (foldCase(current - skip, uc) != foldCase(puc + pl_minus_one - skip, puc))
108 if (skip > pl_minus_one) // we have a match
109 return (current - uc) - pl_minus_one;
110 // in case we don't have a match we are a bit inefficient as we only skip by one
111 // when we have the non matching char in the string.
112 if (skiptable[foldCase(current - skip, uc) & 0xff] == pl)
117 if (current > end - skip)
122 return -1; // not found
126 \class QStringMatcher
127 \brief The QStringMatcher class holds a sequence of characters that
128 can be quickly matched in a Unicode string.
131 \ingroup string-processing
133 This class is useful when you have a sequence of \l{QChar}s that
134 you want to repeatedly match against some strings (perhaps in a
135 loop), or when you want to search for the same sequence of
136 characters multiple times in the same string. Using a matcher
137 object and indexIn() is faster than matching a plain QString with
138 QString::indexOf() if repeated matching takes place. This class
139 offers no benefit if you are doing one-off string matches.
141 Create the QStringMatcher with the QString you want to search
142 for. Then call indexIn() on the QString that you want to search.
144 \sa QString, QByteArrayMatcher, QRegExp
148 Constructs an empty string matcher that won't match anything.
149 Call setPattern() to give it a pattern to match.
151 QStringMatcher::QStringMatcher()
152 : d_ptr(0), q_cs(Qt::CaseSensitive)
154 qMemSet(q_data, 0, sizeof(q_data));
158 Constructs a string matcher that will search for \a pattern, with
159 case sensitivity \a cs.
161 Call indexIn() to perform a search.
163 QStringMatcher::QStringMatcher(const QString &pattern, Qt::CaseSensitivity cs)
164 : d_ptr(0), q_pattern(pattern), q_cs(cs)
166 p.uc = pattern.unicode();
167 p.len = pattern.size();
168 bm_init_skiptable((const ushort *)p.uc, p.len, p.q_skiptable, cs);
172 \fn QStringMatcher::QStringMatcher(const QChar *uc, int length, Qt::CaseSensitivity cs)
175 Constructs a string matcher that will search for the pattern referred to
176 by \a uc with the given \a length and case sensitivity specified by \a cs.
178 QStringMatcher::QStringMatcher(const QChar *uc, int len, Qt::CaseSensitivity cs)
183 bm_init_skiptable((const ushort *)p.uc, len, p.q_skiptable, cs);
187 Copies the \a other string matcher to this string matcher.
189 QStringMatcher::QStringMatcher(const QStringMatcher &other)
196 Destroys the string matcher.
198 QStringMatcher::~QStringMatcher()
203 Assigns the \a other string matcher to this string matcher.
205 QStringMatcher &QStringMatcher::operator=(const QStringMatcher &other)
207 if (this != &other) {
208 q_pattern = other.q_pattern;
210 memcpy(q_data, other.q_data, sizeof(q_data));
216 Sets the string that this string matcher will search for to \a
219 \sa pattern(), setCaseSensitivity(), indexIn()
221 void QStringMatcher::setPattern(const QString &pattern)
224 p.uc = pattern.unicode();
225 p.len = pattern.size();
226 bm_init_skiptable((const ushort *)pattern.unicode(), pattern.size(), p.q_skiptable, q_cs);
230 \fn QString QStringMatcher::pattern() const
232 Returns the string pattern that this string matcher will search
238 QString QStringMatcher::pattern() const
240 if (!q_pattern.isEmpty())
242 return QString(p.uc, p.len);
246 Sets the case sensitivity setting of this string matcher to \a
249 \sa caseSensitivity(), setPattern(), indexIn()
251 void QStringMatcher::setCaseSensitivity(Qt::CaseSensitivity cs)
255 bm_init_skiptable((const ushort *)q_pattern.unicode(), q_pattern.size(), p.q_skiptable, cs);
260 Searches the string \a str from character position \a from
261 (default 0, i.e. from the first character), for the string
262 pattern() that was set in the constructor or in the most recent
263 call to setPattern(). Returns the position where the pattern()
264 matched in \a str, or -1 if no match was found.
266 \sa setPattern(), setCaseSensitivity()
268 int QStringMatcher::indexIn(const QString &str, int from) const
272 return bm_find((const ushort *)str.unicode(), str.size(), from,
273 (const ushort *)p.uc, p.len,
274 p.q_skiptable, q_cs);
280 Searches the string starting at \a str (of length \a length) from
281 character position \a from (default 0, i.e. from the first
282 character), for the string pattern() that was set in the
283 constructor or in the most recent call to setPattern(). Returns
284 the position where the pattern() matched in \a str, or -1 if no
287 \sa setPattern(), setCaseSensitivity()
289 int QStringMatcher::indexIn(const QChar *str, int length, int from) const
293 return bm_find((const ushort *)str, length, from,
294 (const ushort *)p.uc, p.len,
295 p.q_skiptable, q_cs);
299 \fn Qt::CaseSensitivity QStringMatcher::caseSensitivity() const
301 Returns the case sensitivity setting for this string matcher.
303 \sa setCaseSensitivity()
310 int qFindStringBoyerMoore(
311 const QChar *haystack, int haystackLen, int haystackOffset,
312 const QChar *needle, int needleLen, Qt::CaseSensitivity cs)
314 uchar skiptable[256];
315 bm_init_skiptable((const ushort *)needle, needleLen, skiptable, cs);
316 if (haystackOffset < 0)
318 return bm_find((const ushort *)haystack, haystackLen, haystackOffset,
319 (const ushort *)needle, needleLen, skiptable, cs);