2 * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef TextIterator_h
27 #define TextIterator_h
29 #include "FindOptions.h"
31 #include <wtf/Vector.h>
37 class RenderTextFragment;
39 enum TextIteratorBehavior {
40 TextIteratorDefaultBehavior = 0,
41 TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0,
42 TextIteratorEntersTextControls = 1 << 1,
43 TextIteratorEmitsTextsWithoutTranscoding = 1 << 2,
44 TextIteratorIgnoresStyleVisibility = 1 << 3,
45 TextIteratorEmitsObjectReplacementCharacters = 1 << 4,
46 TextIteratorEmitsOriginalText = 1 << 5,
47 TextIteratorStopsOnFormControls = 1 << 6
50 // FIXME: Can't really answer this question correctly without knowing the white-space mode.
51 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
52 inline bool isCollapsibleWhitespace(UChar c)
63 String plainText(const Range*, TextIteratorBehavior defaultBehavior = TextIteratorDefaultBehavior);
64 UChar* plainTextToMallocAllocatedBuffer(const Range*, unsigned& bufferLength, bool isDisplayString, TextIteratorBehavior = TextIteratorDefaultBehavior);
65 PassRefPtr<Range> findPlainText(const Range*, const String&, FindOptions);
76 unsigned size() const;
80 Vector<unsigned, 1> m_words;
83 // Iterates through the DOM range, returning all the text, and 0-length boundaries
84 // at points where replaced elements break up the text flow. The text comes back in
85 // chunks so as to optimize for performance of the iteration.
91 explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
93 bool atEnd() const { return !m_positionNode || m_shouldStop; }
96 int length() const { return m_textLength; }
97 const UChar* characters() const { return m_textCharacters; }
99 PassRefPtr<Range> range() const;
102 static int rangeLength(const Range*, bool spacesForReplacedElements = false);
103 static PassRefPtr<Range> rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
104 static bool getLocationAndLengthFromRange(Element* scope, const Range*, size_t& location, size_t& length);
105 static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
109 bool shouldRepresentNodeOffsetZero();
110 bool shouldEmitSpaceBeforeAndAfterNode(Node*);
111 void representNodeOffsetZero();
112 bool handleTextNode();
113 bool handleReplacedElement();
114 bool handleNonTextNode();
115 void handleTextBox();
116 void handleTextNodeFirstLetter(RenderTextFragment*);
117 bool hasVisibleTextNode(RenderText*);
118 void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
119 void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset);
120 void emitText(Node* textNode, int textStartOffset, int textEndOffset);
122 // Current position, not necessarily of the text being returned, but position
123 // as we walk through the DOM tree.
127 bool m_handledChildren;
128 BitStack m_fullyClippedStack;
131 Node* m_startContainer;
133 Node* m_endContainer;
137 // The current text and its position, in the form to be returned from the iterator.
138 Node* m_positionNode;
139 mutable Node* m_positionOffsetBaseNode;
140 mutable int m_positionStartOffset;
141 mutable int m_positionEndOffset;
142 const UChar* m_textCharacters;
144 // Hold string m_textCharacters points to so we ensure it won't be deleted.
147 // Used when there is still some pending text from the current node; when these
148 // are false and 0, we go back to normal iterating.
149 bool m_needsAnotherNewline;
150 InlineTextBox* m_textBox;
151 // Used when iteration over :first-letter text to save pointer to
152 // remaining text box.
153 InlineTextBox* m_remainingTextBox;
154 // Used to point to RenderText object for :first-letter.
155 RenderText *m_firstLetterText;
157 // Used to do the whitespace collapsing logic.
158 Node* m_lastTextNode;
159 bool m_lastTextNodeEndedWithCollapsedSpace;
160 UChar m_lastCharacter;
162 // Used for whitespace characters that aren't in the DOM, so we can point at them.
163 UChar m_singleCharacterBuffer;
165 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
166 Vector<InlineTextBox*> m_sortedTextBoxes;
167 size_t m_sortedTextBoxesPosition;
169 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
172 // Used by selection preservation code. There should be one character emitted between every VisiblePosition
173 // in the Range used to create the TextIterator.
174 // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
175 // moveParagraphs to not clone/destroy moved content.
176 bool m_emitsCharactersBetweenAllVisiblePositions;
177 bool m_entersTextControls;
179 // Used when we want texts for copying, pasting, and transposing.
180 bool m_emitsTextWithoutTranscoding;
181 // Used in pasting inside password field.
182 bool m_emitsOriginalText;
183 // Used when deciding text fragment created by :first-letter should be looked into.
184 bool m_handledFirstLetter;
185 // Used when the visibility of the style should not affect text gathering.
186 bool m_ignoresStyleVisibility;
187 // Used when emitting the special 0xFFFC character is required.
188 bool m_emitsObjectReplacementCharacters;
189 // Used when the iteration should stop if form controls are reached.
190 bool m_stopsOnFormControls;
191 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
195 // Iterates through the DOM range, returning all the text, and 0-length boundaries
196 // at points where replaced elements break up the text flow. The text comes back in
197 // chunks so as to optimize for performance of the iteration.
198 class SimplifiedBackwardsTextIterator {
200 SimplifiedBackwardsTextIterator();
201 explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
203 bool atEnd() const { return !m_positionNode || m_shouldStop; }
206 int length() const { return m_textLength; }
207 const UChar* characters() const { return m_textCharacters; }
209 PassRefPtr<Range> range() const;
213 bool handleTextNode();
214 RenderText* handleFirstLetter(int& startOffset, int& offsetInNode);
215 bool handleReplacedElement();
216 bool handleNonTextNode();
217 void emitCharacter(UChar, Node*, int startOffset, int endOffset);
218 bool advanceRespectingRange(Node*);
220 // Current position, not necessarily of the text being returned, but position
221 // as we walk through the DOM tree.
225 bool m_handledChildren;
226 BitStack m_fullyClippedStack;
231 // Start of the range.
235 // The current text and its position, in the form to be returned from the iterator.
236 Node* m_positionNode;
237 int m_positionStartOffset;
238 int m_positionEndOffset;
239 const UChar* m_textCharacters;
242 // Used to do the whitespace logic.
243 Node* m_lastTextNode;
244 UChar m_lastCharacter;
246 // Used for whitespace characters that aren't in the DOM, so we can point at them.
247 UChar m_singleCharacterBuffer;
249 // Whether m_node has advanced beyond the iteration range (i.e. m_startNode).
250 bool m_havePassedStartNode;
252 // Should handle first-letter renderer in the next call to handleTextNode.
253 bool m_shouldHandleFirstLetter;
255 // Used when the iteration should stop if form controls are reached.
256 bool m_stopsOnFormControls;
258 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
262 // Builds on the text iterator, adding a character position so we can walk one
263 // character at a time, or faster, as needed. Useful for searching.
264 class CharacterIterator {
267 explicit CharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
269 void advance(int numCharacters);
271 bool atBreak() const { return m_atBreak; }
272 bool atEnd() const { return m_textIterator.atEnd(); }
274 int length() const { return m_textIterator.length() - m_runOffset; }
275 const UChar* characters() const { return m_textIterator.characters() + m_runOffset; }
276 String string(int numChars);
278 int characterOffset() const { return m_offset; }
279 PassRefPtr<Range> range() const;
286 TextIterator m_textIterator;
289 class BackwardsCharacterIterator {
291 BackwardsCharacterIterator();
292 explicit BackwardsCharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
296 bool atEnd() const { return m_textIterator.atEnd(); }
298 PassRefPtr<Range> range() const;
305 SimplifiedBackwardsTextIterator m_textIterator;
308 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
309 // meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching.
310 class WordAwareIterator {
313 explicit WordAwareIterator(const Range*);
314 ~WordAwareIterator();
316 bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
320 const UChar* characters() const;
322 // Range of the text we're currently returning
323 PassRefPtr<Range> range() const { return m_range; }
326 // text from the previous chunk from the textIterator
327 const UChar* m_previousText;
328 int m_previousLength;
330 // many chunks from textIterator concatenated
331 Vector<UChar> m_buffer;
333 // Did we have to look ahead in the textIterator to confirm the current chunk?
336 RefPtr<Range> m_range;
338 TextIterator m_textIterator;