1 /****************************************************************************
3 ** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/
6 ** This file is part of the QtCore module of the Qt Toolkit.
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** GNU Lesser General Public License Usage
10 ** This file may be used under the terms of the GNU Lesser General Public
11 ** License version 2.1 as published by the Free Software Foundation and
12 ** appearing in the file LICENSE.LGPL included in the packaging of this
13 ** file. Please review the following information to ensure the GNU Lesser
14 ** General Public License version 2.1 requirements will be met:
15 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
17 ** In addition, as a special exception, Nokia gives you certain additional
18 ** rights. These rights are described in the Nokia Qt LGPL Exception
19 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
21 ** GNU General Public License Usage
22 ** Alternatively, this file may be used under the terms of the GNU General
23 ** Public License version 3.0 as published by the Free Software Foundation
24 ** and appearing in the file LICENSE.GPL included in the packaging of this
25 ** file. Please review the following information to ensure the GNU General
26 ** Public License version 3.0 requirements will be met:
27 ** http://www.gnu.org/copyleft/gpl.html.
30 ** Alternatively, this file may be used in accordance with the terms and
31 ** conditions contained in a signed written agreement between you and Nokia.
40 ****************************************************************************/
41 #include <QtCore/qtextboundaryfinder.h>
42 #include <QtCore/qvarlengtharray.h>
44 #include <private/qunicodetables_p.h>
45 #include <private/qunicodetools_p.h>
49 class QTextBoundaryFinderPrivate
52 HB_CharAttributes attributes[1];
55 static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int length, HB_CharAttributes *attributes)
57 QVarLengthArray<HB_ScriptItem> scriptItems;
59 const ushort *string = reinterpret_cast<const ushort *>(chars);
60 const ushort *unicode = string;
61 // correctly assign script, isTab and isObject to the script analysis
62 const ushort *uc = unicode;
63 const ushort *e = uc + length;
64 int script = QUnicodeTables::Common;
65 int lastScript = QUnicodeTables::Common;
66 const ushort *start = uc;
68 int s = QUnicodeTables::script(*uc);
69 if (s != QUnicodeTables::Inherited)
71 if (*uc == QChar::ObjectReplacementCharacter || *uc == QChar::LineSeparator || *uc == 9)
72 script = QUnicodeTables::Common;
73 if (script != lastScript) {
76 item.pos = start - string;
77 item.length = uc - start;
78 item.script = (HB_Script)lastScript;
79 item.bidiLevel = 0; // ### what's the proper value?
80 scriptItems.append(item);
89 item.pos = start - string;
90 item.length = uc - start;
91 item.script = (HB_Script)lastScript;
92 item.bidiLevel = 0; // ### what's the proper value?
93 scriptItems.append(item);
96 QCharAttributeOptions options = 0;
97 if (type == QTextBoundaryFinder::Word)
98 options |= GetWordBreaks;
99 else if (type == QTextBoundaryFinder::Sentence)
100 options |= GetSentenceBreaks;
101 qGetCharAttributes(string, length, scriptItems.data(), scriptItems.count(), attributes, options);
105 \class QTextBoundaryFinder
107 \brief The QTextBoundaryFinder class provides a way of finding Unicode text boundaries in a string.
112 \ingroup string-processing
115 QTextBoundaryFinder allows to find Unicode text boundaries in a
116 string, similar to the Unicode text boundary specification (see
117 http://www.unicode.org/reports/tr29/tr29-11.html).
119 QTextBoundaryFinder can operate on a QString in four possible
120 modes depending on the value of \a BoundaryType.
122 Units of Unicode characters that make up what the user thinks of
123 as a character or basic unit of the language are here called
124 Grapheme clusters. The two unicode characters 'A' + diaeresis do
125 for example form one grapheme cluster as the user thinks of them
126 as one character, yet it is in this case represented by two
129 Word boundaries are there to locate the start and end of what a
130 language considers to be a word.
132 Line break boundaries give possible places where a line break
133 might happen and sentence boundaries will show the beginning and
134 end of whole sentences.
136 The first position in a string is always a valid boundary and
137 refers to the position before the first character. The last
138 position at the length of the string is also valid and refers
139 to the position after the last character.
143 \enum QTextBoundaryFinder::BoundaryType
145 \value Grapheme Finds a grapheme which is the smallest boundary. It
146 including letters, punctation marks, numerals and more.
147 \value Word Finds a word.
148 \value Line Finds possible positions for breaking the text into multiple
150 \value Sentence Finds sentence boundaries. These include periods, question
155 \enum QTextBoundaryFinder::BoundaryReason
157 \value NotAtBoundary The boundary finder is not at a boundary position.
158 \value StartWord The boundary finder is at the start of a word.
159 \value EndWord The boundary finder is at the end of a word.
163 Constructs an invalid QTextBoundaryFinder object.
165 QTextBoundaryFinder::QTextBoundaryFinder()
175 Copies the QTextBoundaryFinder object, \a other.
177 QTextBoundaryFinder::QTextBoundaryFinder(const QTextBoundaryFinder &other)
181 , length(other.length)
185 d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
187 memcpy(d, other.d, length*sizeof(HB_CharAttributes));
191 Assigns the object, \a other, to another QTextBoundaryFinder object.
193 QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &other)
201 length = other.length;
204 QTextBoundaryFinderPrivate *newD = (QTextBoundaryFinderPrivate *)
205 realloc(freePrivate ? d : 0, length*sizeof(HB_CharAttributes));
209 memcpy(d, other.d, length*sizeof(HB_CharAttributes));
215 Destructs the QTextBoundaryFinder object.
217 QTextBoundaryFinder::~QTextBoundaryFinder()
224 Creates a QTextBoundaryFinder object of \a type operating on \a string.
226 QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &string)
229 , chars(string.unicode())
230 , length(string.length())
234 d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
236 init(t, chars, length, d->attributes);
240 Creates a QTextBoundaryFinder object of \a type operating on \a chars
243 \a buffer is an optional working buffer of size \a bufferSize you can pass to
244 the QTextBoundaryFinder. If the buffer is large enough to hold the working
245 data required, it will use this instead of allocating its own buffer.
247 \warning QTextBoundaryFinder does not create a copy of \a chars. It is the
248 application programmer's responsibility to ensure the array is allocated for
249 as long as the QTextBoundaryFinder object stays alive. The same applies to
252 QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars, int length, unsigned char *buffer, int bufferSize)
258 if (buffer && (uint)bufferSize >= length*sizeof(HB_CharAttributes)) {
259 d = (QTextBoundaryFinderPrivate *)buffer;
262 d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
266 init(t, chars, length, d->attributes);
270 Moves the finder to the start of the string. This is equivalent to setPosition(0).
272 \sa setPosition(), position()
274 void QTextBoundaryFinder::toStart()
280 Moves the finder to the end of the string. This is equivalent to setPosition(string.length()).
282 \sa setPosition(), position()
284 void QTextBoundaryFinder::toEnd()
290 Returns the current position of the QTextBoundaryFinder.
292 The range is from 0 (the beginning of the string) to the length of
293 the string inclusive.
297 int QTextBoundaryFinder::position() const
303 Sets the current position of the QTextBoundaryFinder to \a position.
305 If \a position is out of bounds, it will be bound to only valid
306 positions. In this case, valid positions are from 0 to the length of
307 the string inclusive.
311 void QTextBoundaryFinder::setPosition(int position)
313 pos = qBound(0, position, length);
316 /*! \fn QTextBoundaryFinder::BoundaryType QTextBoundaryFinder::type() const
318 Returns the type of the QTextBoundaryFinder.
321 /*! \fn bool QTextBoundaryFinder::isValid() const
323 Returns true if the text boundary finder is valid; otherwise returns false.
324 A default QTextBoundaryFinder is invalid.
328 Returns the string the QTextBoundaryFinder object operates on.
330 QString QTextBoundaryFinder::string() const
332 if (chars == s.unicode() && length == s.length())
334 return QString(chars, length);
339 Moves the QTextBoundaryFinder to the next boundary position and returns that position.
341 Returns -1 if there is no next boundary.
343 int QTextBoundaryFinder::toNextBoundary()
350 if (pos < 0 || pos >= length) {
360 while (pos < length && !d->attributes[pos].charStop)
364 while (pos < length && !d->attributes[pos].wordBoundary)
368 while (pos < length && !d->attributes[pos].sentenceBoundary)
373 while (pos < length && d->attributes[pos-1].lineBreakType < HB_Break)
382 Moves the QTextBoundaryFinder to the previous boundary position and returns that position.
384 Returns -1 if there is no previous boundary.
386 int QTextBoundaryFinder::toPreviousBoundary()
393 if (pos <= 0 || pos > length) {
403 while (pos > 0 && !d->attributes[pos].charStop)
407 while (pos > 0 && !d->attributes[pos].wordBoundary)
411 while (pos > 0 && !d->attributes[pos].sentenceBoundary)
415 while (pos > 0 && d->attributes[pos-1].lineBreakType < HB_Break)
424 Returns true if the object's position() is currently at a valid text boundary.
426 bool QTextBoundaryFinder::isAtBoundary() const
436 return d->attributes[pos].charStop;
438 return d->attributes[pos].wordBoundary;
440 return (pos > 0) ? d->attributes[pos-1].lineBreakType >= HB_Break : true;
442 return d->attributes[pos].sentenceBoundary;
448 Returns the reasons for the boundary finder to have chosen the current position as a boundary.
450 QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() const
453 return NotAtBoundary;
454 if (! isAtBoundary())
455 return NotAtBoundary;
457 if (d->attributes[pos].whiteSpace)
458 return NotAtBoundary;
462 if (d->attributes[length-1].whiteSpace)
463 return NotAtBoundary;
467 const bool nextIsSpace = d->attributes[pos].whiteSpace;
468 const bool prevIsSpace = d->attributes[pos - 1].whiteSpace;
470 if (prevIsSpace && !nextIsSpace)
472 else if (!prevIsSpace && nextIsSpace)
474 else if (!prevIsSpace && !nextIsSpace)
475 return BoundaryReasons(StartWord | EndWord);
477 return NotAtBoundary;