2 * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef TextIterator_h
27 #define TextIterator_h
29 #include "core/dom/Range.h"
30 #include "core/editing/FindOptions.h"
31 #include "platform/heap/Handle.h"
32 #include "wtf/Vector.h"
38 class RenderTextFragment;
40 enum TextIteratorBehavior {
41 TextIteratorDefaultBehavior = 0,
42 TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0,
43 TextIteratorEntersTextControls = 1 << 1,
44 TextIteratorIgnoresStyleVisibility = 1 << 2,
45 TextIteratorEmitsOriginalText = 1 << 3,
46 TextIteratorStopsOnFormControls = 1 << 4,
47 TextIteratorEmitsImageAltText = 1 << 5,
48 TextIteratorEntersAuthorShadowRoots = 1 << 6,
49 TextIteratorEmitsObjectReplacementCharacter = 1 << 7,
50 TextIteratorDoesNotBreakAtReplacedElement = 1 << 8
52 typedef unsigned TextIteratorBehaviorFlags;
54 String plainText(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
55 String plainText(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
56 PassRefPtrWillBeRawPtr<Range> findPlainText(const Range*, const String&, FindOptions);
57 void findPlainText(const Position& inputStart, const Position& inputEnd, const String&, FindOptions, Position& resultStart, Position& resultEnd);
68 unsigned size() const;
72 Vector<unsigned, 1> m_words;
75 // Iterates through the DOM range, returning all the text, and 0-length boundaries
76 // at points where replaced elements break up the text flow. The text comes back in
77 // chunks so as to optimize for performance of the iteration.
82 explicit TextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
83 // [start, end] indicates the document range that the iteration should take place within (both ends inclusive).
84 TextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
87 bool atEnd() const { return !m_positionNode || m_shouldStop; }
89 bool isInsideReplacedElement() const;
91 int length() const { return m_textLength; }
92 UChar characterAt(unsigned index) const;
93 String substring(unsigned position, unsigned length) const;
94 void appendTextToStringBuilder(StringBuilder&, unsigned position = 0, unsigned maxLength = UINT_MAX) const;
96 template<typename BufferType>
97 void appendTextTo(BufferType& output, unsigned position = 0)
99 ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(length()));
100 unsigned lengthToAppend = length() - position;
103 if (m_singleCharacterBuffer) {
105 ASSERT(length() == 1);
106 output.append(&m_singleCharacterBuffer, 1);
108 string().appendTo(output, startOffset() + position, lengthToAppend);
112 PassRefPtrWillBeRawPtr<Range> createRange() const;
115 Document* ownerDocument() const;
116 Node* startContainer() const;
117 Node* endContainer() const;
118 int startOffset() const;
119 int endOffset() const;
120 Position startPosition() const;
121 Position endPosition() const;
123 bool breaksAtReplacedElement() { return m_breaksAtReplacedElement; }
125 // Computes the length of the given range using a text iterator. The default
126 // iteration behavior is to always emit object replacement characters for
127 // replaced elements. When |forSelectionPreservation| is set to true, it
128 // also emits spaces for other non-text nodes using the
129 // |TextIteratorEmitsCharactersBetweenAllVisiblePosition| mode.
130 static int rangeLength(const Range*, bool forSelectionPreservation = false);
131 static int rangeLength(const Position& start, const Position& end, bool forSelectionPreservation = false);
132 static PassRefPtrWillBeRawPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
133 static void subrange(Position& start, Position& end, int characterOffset, int characterCount);
136 enum IterationProgress {
138 HandledAuthorShadowRoots,
139 HandledUserAgentShadowRoot,
144 void initialize(const Position& start, const Position& end);
146 void flushPositionOffsets() const;
147 int positionStartOffset() const { return m_positionStartOffset; }
148 const String& string() const { return m_text; }
150 bool shouldRepresentNodeOffsetZero();
151 bool shouldEmitSpaceBeforeAndAfterNode(Node*);
152 void representNodeOffsetZero();
153 bool handleTextNode();
154 bool handleReplacedElement();
155 bool handleNonTextNode();
156 void handleTextBox();
157 void handleTextNodeFirstLetter(RenderTextFragment*);
158 bool hasVisibleTextNode(RenderText*);
159 void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
160 void emitText(Node* textNode, RenderText* renderer, int textStartOffset, int textEndOffset);
162 // Current position, not necessarily of the text being returned, but position
163 // as we walk through the DOM tree.
164 RawPtrWillBeMember<Node> m_node;
166 IterationProgress m_iterationProgress;
167 BitStack m_fullyClippedStack;
171 RawPtrWillBeMember<Node> m_startContainer;
173 RawPtrWillBeMember<Node> m_endContainer;
175 RawPtrWillBeMember<Node> m_pastEndNode;
177 // The current text and its position, in the form to be returned from the iterator.
178 RawPtrWillBeMember<Node> m_positionNode;
179 mutable RawPtrWillBeMember<Node> m_positionOffsetBaseNode;
180 mutable int m_positionStartOffset;
181 mutable int m_positionEndOffset;
185 // Used when there is still some pending text from the current node; when these
186 // are false and 0, we go back to normal iterating.
187 bool m_needsAnotherNewline;
188 InlineTextBox* m_textBox;
189 // Used when iteration over :first-letter text to save pointer to
190 // remaining text box.
191 InlineTextBox* m_remainingTextBox;
192 // Used to point to RenderText object for :first-letter.
193 RawPtrWillBeMember<RenderText> m_firstLetterText;
195 // Used to do the whitespace collapsing logic.
196 RawPtrWillBeMember<Text> m_lastTextNode;
197 bool m_lastTextNodeEndedWithCollapsedSpace;
198 UChar m_lastCharacter;
200 // Used for whitespace characters that aren't in the DOM, so we can point at them.
201 // If non-zero, overrides m_text.
202 UChar m_singleCharacterBuffer;
204 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
205 Vector<InlineTextBox*> m_sortedTextBoxes;
206 size_t m_sortedTextBoxesPosition;
208 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
211 // Used by selection preservation code. There should be one character emitted between every VisiblePosition
212 // in the Range used to create the TextIterator.
213 // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
214 // moveParagraphs to not clone/destroy moved content.
215 bool m_emitsCharactersBetweenAllVisiblePositions;
216 bool m_entersTextControls;
218 // Used in pasting inside password field.
219 bool m_emitsOriginalText;
220 // Used when deciding text fragment created by :first-letter should be looked into.
221 bool m_handledFirstLetter;
222 // Used when the visibility of the style should not affect text gathering.
223 bool m_ignoresStyleVisibility;
224 // Used when the iteration should stop if form controls are reached.
225 bool m_stopsOnFormControls;
226 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
229 bool m_emitsImageAltText;
231 bool m_entersAuthorShadowRoots;
233 bool m_emitsObjectReplacementCharacter;
235 bool m_breaksAtReplacedElement;
238 // Iterates through the DOM range, returning all the text, and 0-length boundaries
239 // at points where replaced elements break up the text flow. The text comes back in
240 // chunks so as to optimize for performance of the iteration.
241 class SimplifiedBackwardsTextIterator {
244 explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
245 SimplifiedBackwardsTextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
247 bool atEnd() const { return !m_positionNode || m_shouldStop; }
250 int length() const { return m_textLength; }
252 Node* node() const { return m_node; }
254 template<typename BufferType>
255 void prependTextTo(BufferType& output)
259 if (m_singleCharacterBuffer)
260 output.prepend(&m_singleCharacterBuffer, 1);
262 m_textContainer.prependTo(output, m_textOffset, m_textLength);
265 Node* startContainer() const;
266 int endOffset() const;
267 Position startPosition() const;
268 Position endPosition() const;
271 void init(Node* startNode, Node* endNode, int startOffset, int endOffset);
273 bool handleTextNode();
274 RenderText* handleFirstLetter(int& startOffset, int& offsetInNode);
275 bool handleReplacedElement();
276 bool handleNonTextNode();
277 void emitCharacter(UChar, Node*, int startOffset, int endOffset);
278 bool advanceRespectingRange(Node*);
280 // Current position, not necessarily of the text being returned, but position
281 // as we walk through the DOM tree.
282 RawPtrWillBeMember<Node> m_node;
285 bool m_handledChildren;
286 BitStack m_fullyClippedStack;
289 RawPtrWillBeMember<Node> m_startNode;
291 // Start of the range.
292 RawPtrWillBeMember<Node> m_endNode;
295 // The current text and its position, in the form to be returned from the iterator.
296 RawPtrWillBeMember<Node> m_positionNode;
297 int m_positionStartOffset;
298 int m_positionEndOffset;
300 String m_textContainer; // We're interested in the range [m_textOffset, m_textOffset + m_textLength) of m_textContainer.
304 // Used to do the whitespace logic.
305 RawPtrWillBeMember<Text> m_lastTextNode;
306 UChar m_lastCharacter;
308 // Used for whitespace characters that aren't in the DOM, so we can point at them.
309 UChar m_singleCharacterBuffer;
311 // Whether m_node has advanced beyond the iteration range (i.e. m_startNode).
312 bool m_havePassedStartNode;
314 // Should handle first-letter renderer in the next call to handleTextNode.
315 bool m_shouldHandleFirstLetter;
317 // Used when the iteration should stop if form controls are reached.
318 bool m_stopsOnFormControls;
320 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
323 // Used in pasting inside password field.
324 bool m_emitsOriginalText;
327 // Builds on the text iterator, adding a character position so we can walk one
328 // character at a time, or faster, as needed. Useful for searching.
329 class CharacterIterator {
332 explicit CharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
333 CharacterIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
335 void advance(int numCharacters);
337 bool atBreak() const { return m_atBreak; }
338 bool atEnd() const { return m_textIterator.atEnd(); }
340 int length() const { return m_textIterator.length() - m_runOffset; }
341 UChar characterAt(unsigned index) const { return m_textIterator.characterAt(m_runOffset + index); }
343 template<typename BufferType>
344 void appendTextTo(BufferType& output) { m_textIterator.appendTextTo(output, m_runOffset); }
346 int characterOffset() const { return m_offset; }
347 PassRefPtrWillBeRawPtr<Range> createRange() const;
349 Document* ownerDocument() const;
350 Node* startContainer() const;
351 Node* endContainer() const;
352 int startOffset() const;
353 int endOffset() const;
354 Position startPosition() const;
355 Position endPosition() const;
364 TextIterator m_textIterator;
367 class BackwardsCharacterIterator {
370 explicit BackwardsCharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
371 BackwardsCharacterIterator(const Position&, const Position&, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
375 bool atEnd() const { return m_textIterator.atEnd(); }
377 Position endPosition() const;
384 SimplifiedBackwardsTextIterator m_textIterator;
387 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
388 // meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching.
389 class WordAwareIterator {
392 explicit WordAwareIterator(const Position& start, const Position& end);
393 ~WordAwareIterator();
395 bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
398 String substring(unsigned position, unsigned length) const;
399 UChar characterAt(unsigned index) const;
403 Vector<UChar> m_buffer;
404 // Did we have to look ahead in the textIterator to confirm the current chunk?
406 TextIterator m_textIterator;