2 * Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
3 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "core/editing/TextCheckingHelper.h"
30 #include "bindings/core/v8/ExceptionState.h"
31 #include "bindings/core/v8/ExceptionStatePlaceholder.h"
32 #include "core/dom/Document.h"
33 #include "core/dom/DocumentMarkerController.h"
34 #include "core/dom/Range.h"
35 #include "core/editing/TextIterator.h"
36 #include "core/editing/VisiblePosition.h"
37 #include "core/editing/VisibleUnits.h"
38 #include "core/frame/LocalFrame.h"
39 #include "core/frame/Settings.h"
40 #include "core/page/SpellCheckerClient.h"
41 #include "platform/text/TextBreakIterator.h"
42 #include "platform/text/TextCheckerClient.h"
46 static void findBadGrammars(TextCheckerClient& client, const UChar* text, int start, int length, Vector<TextCheckingResult>& results)
48 int checkLocation = start;
49 int checkLength = length;
51 while (0 < checkLength) {
52 int badGrammarLocation = -1;
53 int badGrammarLength = 0;
54 Vector<GrammarDetail> badGrammarDetails;
55 client.checkGrammarOfString(String(text + checkLocation, checkLength), badGrammarDetails, &badGrammarLocation, &badGrammarLength);
56 if (!badGrammarLength)
58 ASSERT(0 <= badGrammarLocation && badGrammarLocation <= checkLength);
59 ASSERT(0 < badGrammarLength && badGrammarLocation + badGrammarLength <= checkLength);
60 TextCheckingResult badGrammar;
61 badGrammar.decoration = TextDecorationTypeGrammar;
62 badGrammar.location = checkLocation + badGrammarLocation;
63 badGrammar.length = badGrammarLength;
64 badGrammar.details.swap(badGrammarDetails);
65 results.append(badGrammar);
67 checkLocation += (badGrammarLocation + badGrammarLength);
68 checkLength -= (badGrammarLocation + badGrammarLength);
72 static void findMisspellings(TextCheckerClient& client, const UChar* text, int start, int length, Vector<TextCheckingResult>& results)
74 TextBreakIterator* iterator = wordBreakIterator(text + start, length);
77 int wordStart = iterator->current();
78 while (0 <= wordStart) {
79 int wordEnd = iterator->next();
82 int wordLength = wordEnd - wordStart;
83 int misspellingLocation = -1;
84 int misspellingLength = 0;
85 client.checkSpellingOfString(String(text + start + wordStart, wordLength), &misspellingLocation, &misspellingLength);
86 if (0 < misspellingLength) {
87 ASSERT(0 <= misspellingLocation && misspellingLocation <= wordLength);
88 ASSERT(0 < misspellingLength && misspellingLocation + misspellingLength <= wordLength);
89 TextCheckingResult misspelling;
90 misspelling.decoration = TextDecorationTypeSpelling;
91 misspelling.location = start + wordStart + misspellingLocation;
92 misspelling.length = misspellingLength;
93 misspelling.replacement = client.getAutoCorrectSuggestionForMisspelledWord(String(text + misspelling.location, misspelling.length));
94 results.append(misspelling);
101 static PassRefPtrWillBeRawPtr<Range> expandToParagraphBoundary(PassRefPtrWillBeRawPtr<Range> range)
103 RefPtrWillBeRawPtr<Range> paragraphRange = range->cloneRange();
104 setStart(paragraphRange.get(), startOfParagraph(VisiblePosition(range->startPosition())));
105 setEnd(paragraphRange.get(), endOfParagraph(VisiblePosition(range->endPosition())));
106 return paragraphRange;
109 TextCheckingParagraph::TextCheckingParagraph(PassRefPtrWillBeRawPtr<Range> checkingRange)
110 : m_checkingRange(checkingRange)
111 , m_checkingStart(-1)
113 , m_checkingLength(-1)
117 TextCheckingParagraph::TextCheckingParagraph(PassRefPtrWillBeRawPtr<Range> checkingRange, PassRefPtrWillBeRawPtr<Range> paragraphRange)
118 : m_checkingRange(checkingRange)
119 , m_paragraphRange(paragraphRange)
120 , m_checkingStart(-1)
122 , m_checkingLength(-1)
126 TextCheckingParagraph::~TextCheckingParagraph()
130 void TextCheckingParagraph::expandRangeToNextEnd()
132 ASSERT(m_checkingRange);
133 setEnd(paragraphRange().get(), endOfParagraph(startOfNextParagraph(VisiblePosition(paragraphRange()->startPosition()))));
134 invalidateParagraphRangeValues();
137 void TextCheckingParagraph::invalidateParagraphRangeValues()
139 m_checkingStart = m_checkingEnd = -1;
140 m_offsetAsRange = nullptr;
144 int TextCheckingParagraph::rangeLength() const
146 ASSERT(m_checkingRange);
147 return TextIterator::rangeLength(paragraphRange().get());
150 PassRefPtrWillBeRawPtr<Range> TextCheckingParagraph::paragraphRange() const
152 ASSERT(m_checkingRange);
153 if (!m_paragraphRange)
154 m_paragraphRange = expandToParagraphBoundary(checkingRange());
155 return m_paragraphRange;
158 PassRefPtrWillBeRawPtr<Range> TextCheckingParagraph::subrange(int characterOffset, int characterCount) const
160 ASSERT(m_checkingRange);
161 return TextIterator::subrange(paragraphRange().get(), characterOffset, characterCount);
164 int TextCheckingParagraph::offsetTo(const Position& position, ExceptionState& exceptionState) const
166 ASSERT(m_checkingRange);
167 RefPtrWillBeRawPtr<Range> range = offsetAsRange()->cloneRange();
168 range->setEnd(position.containerNode(), position.computeOffsetInContainerNode(), exceptionState);
169 if (exceptionState.hadException())
171 return TextIterator::rangeLength(range.get());
174 bool TextCheckingParagraph::isEmpty() const
176 // Both predicates should have same result, but we check both just for sure.
177 // We need to investigate to remove this redundancy.
178 return isRangeEmpty() || isTextEmpty();
181 PassRefPtrWillBeRawPtr<Range> TextCheckingParagraph::offsetAsRange() const
183 ASSERT(m_checkingRange);
184 if (!m_offsetAsRange)
185 m_offsetAsRange = Range::create(paragraphRange()->startContainer()->document(), paragraphRange()->startPosition(), checkingRange()->startPosition());
187 return m_offsetAsRange;
190 const String& TextCheckingParagraph::text() const
192 ASSERT(m_checkingRange);
193 if (m_text.isEmpty())
194 m_text = plainText(paragraphRange().get());
198 int TextCheckingParagraph::checkingStart() const
200 ASSERT(m_checkingRange);
201 if (m_checkingStart == -1)
202 m_checkingStart = TextIterator::rangeLength(offsetAsRange().get());
203 return m_checkingStart;
206 int TextCheckingParagraph::checkingEnd() const
208 ASSERT(m_checkingRange);
209 if (m_checkingEnd == -1)
210 m_checkingEnd = checkingStart() + TextIterator::rangeLength(checkingRange().get());
211 return m_checkingEnd;
214 int TextCheckingParagraph::checkingLength() const
216 ASSERT(m_checkingRange);
217 if (-1 == m_checkingLength)
218 m_checkingLength = TextIterator::rangeLength(checkingRange().get());
219 return m_checkingLength;
222 TextCheckingHelper::TextCheckingHelper(SpellCheckerClient& client, PassRefPtrWillBeRawPtr<Range> range)
226 ASSERT_ARG(m_range, m_range);
229 TextCheckingHelper::~TextCheckingHelper()
233 String TextCheckingHelper::findFirstMisspelling(int& firstMisspellingOffset, bool markAll, RefPtrWillBeRawPtr<Range>& firstMisspellingRange)
235 WordAwareIterator it(m_range.get());
236 firstMisspellingOffset = 0;
238 String firstMisspelling;
239 int currentChunkOffset = 0;
241 while (!it.atEnd()) {
242 int length = it.length();
244 // Skip some work for one-space-char hunks
245 if (!(length == 1 && it.characterAt(0) == ' ')) {
247 int misspellingLocation = -1;
248 int misspellingLength = 0;
249 m_client->textChecker().checkSpellingOfString(it.substring(0, length), &misspellingLocation, &misspellingLength);
251 // 5490627 shows that there was some code path here where the String constructor below crashes.
252 // We don't know exactly what combination of bad input caused this, so we're making this much
253 // more robust against bad input on release builds.
254 ASSERT(misspellingLength >= 0);
255 ASSERT(misspellingLocation >= -1);
256 ASSERT(!misspellingLength || misspellingLocation >= 0);
257 ASSERT(misspellingLocation < length);
258 ASSERT(misspellingLength <= length);
259 ASSERT(misspellingLocation + misspellingLength <= length);
261 if (misspellingLocation >= 0 && misspellingLength > 0 && misspellingLocation < length && misspellingLength <= length && misspellingLocation + misspellingLength <= length) {
263 // Compute range of misspelled word
264 RefPtrWillBeRawPtr<Range> misspellingRange = TextIterator::subrange(m_range.get(), currentChunkOffset + misspellingLocation, misspellingLength);
266 // Remember first-encountered misspelling and its offset.
267 if (!firstMisspelling) {
268 firstMisspellingOffset = currentChunkOffset + misspellingLocation;
269 firstMisspelling = it.substring(misspellingLocation, misspellingLength);
270 firstMisspellingRange = misspellingRange;
273 // Store marker for misspelled word.
274 misspellingRange->startContainer()->document().markers().addMarker(misspellingRange.get(), DocumentMarker::Spelling);
276 // Bail out if we're marking only the first misspelling, and not all instances.
282 currentChunkOffset += length;
286 return firstMisspelling;
289 String TextCheckingHelper::findFirstMisspellingOrBadGrammar(bool checkGrammar, bool& outIsSpelling, int& outFirstFoundOffset, GrammarDetail& outGrammarDetail)
291 if (!unifiedTextCheckerEnabled())
294 String firstFoundItem;
295 String misspelledWord;
296 String badGrammarPhrase;
298 // Initialize out parameters; these will be updated if we find something to return.
299 outIsSpelling = true;
300 outFirstFoundOffset = 0;
301 outGrammarDetail.location = -1;
302 outGrammarDetail.length = 0;
303 outGrammarDetail.guesses.clear();
304 outGrammarDetail.userDescription = "";
306 // Expand the search range to encompass entire paragraphs, since text checking needs that much context.
307 // Determine the character offset from the start of the paragraph to the start of the original search range,
308 // since we will want to ignore results in this area.
309 RefPtrWillBeRawPtr<Range> paragraphRange = m_range->cloneRange();
310 setStart(paragraphRange.get(), startOfParagraph(VisiblePosition(m_range->startPosition())));
311 int totalRangeLength = TextIterator::rangeLength(paragraphRange.get());
312 setEnd(paragraphRange.get(), endOfParagraph(VisiblePosition(m_range->startPosition())));
314 RefPtrWillBeRawPtr<Range> offsetAsRange = Range::create(paragraphRange->startContainer()->document(), paragraphRange->startPosition(), m_range->startPosition());
315 int rangeStartOffset = TextIterator::rangeLength(offsetAsRange.get());
316 int totalLengthProcessed = 0;
318 bool firstIteration = true;
319 bool lastIteration = false;
320 while (totalLengthProcessed < totalRangeLength) {
321 // Iterate through the search range by paragraphs, checking each one for spelling and grammar.
322 int currentLength = TextIterator::rangeLength(paragraphRange.get());
323 int currentStartOffset = firstIteration ? rangeStartOffset : 0;
324 int currentEndOffset = currentLength;
325 if (inSameParagraph(VisiblePosition(paragraphRange->startPosition()), VisiblePosition(m_range->endPosition()))) {
326 // Determine the character offset from the end of the original search range to the end of the paragraph,
327 // since we will want to ignore results in this area.
328 RefPtrWillBeRawPtr<Range> endOffsetAsRange = Range::create(paragraphRange->startContainer()->document(), paragraphRange->startPosition(), m_range->endPosition());
329 currentEndOffset = TextIterator::rangeLength(endOffsetAsRange.get());
330 lastIteration = true;
332 if (currentStartOffset < currentEndOffset) {
333 String paragraphString = plainText(paragraphRange.get());
334 if (paragraphString.length() > 0) {
335 bool foundGrammar = false;
336 int spellingLocation = 0;
337 int grammarPhraseLocation = 0;
338 int grammarDetailLocation = 0;
339 unsigned grammarDetailIndex = 0;
341 Vector<TextCheckingResult> results;
342 TextCheckingTypeMask checkingTypes = checkGrammar ? (TextCheckingTypeSpelling | TextCheckingTypeGrammar) : TextCheckingTypeSpelling;
343 checkTextOfParagraph(m_client->textChecker(), paragraphString, checkingTypes, results);
345 for (unsigned i = 0; i < results.size(); i++) {
346 const TextCheckingResult* result = &results[i];
347 if (result->decoration == TextDecorationTypeSpelling && result->location >= currentStartOffset && result->location + result->length <= currentEndOffset) {
348 ASSERT(result->length > 0 && result->location >= 0);
349 spellingLocation = result->location;
350 misspelledWord = paragraphString.substring(result->location, result->length);
351 ASSERT(misspelledWord.length());
354 if (checkGrammar && result->decoration == TextDecorationTypeGrammar && result->location < currentEndOffset && result->location + result->length > currentStartOffset) {
355 ASSERT(result->length > 0 && result->location >= 0);
356 // We can't stop after the first grammar result, since there might still be a spelling result after
357 // it begins but before the first detail in it, but we can stop if we find a second grammar result.
360 for (unsigned j = 0; j < result->details.size(); j++) {
361 const GrammarDetail* detail = &result->details[j];
362 ASSERT(detail->length > 0 && detail->location >= 0);
363 if (result->location + detail->location >= currentStartOffset && result->location + detail->location + detail->length <= currentEndOffset && (!foundGrammar || result->location + detail->location < grammarDetailLocation)) {
364 grammarDetailIndex = j;
365 grammarDetailLocation = result->location + detail->location;
370 grammarPhraseLocation = result->location;
371 outGrammarDetail = result->details[grammarDetailIndex];
372 badGrammarPhrase = paragraphString.substring(result->location, result->length);
373 ASSERT(badGrammarPhrase.length());
378 if (!misspelledWord.isEmpty() && (!checkGrammar || badGrammarPhrase.isEmpty() || spellingLocation <= grammarDetailLocation)) {
379 int spellingOffset = spellingLocation - currentStartOffset;
380 if (!firstIteration) {
381 RefPtrWillBeRawPtr<Range> paragraphOffsetAsRange = Range::create(paragraphRange->startContainer()->document(), m_range->startPosition(), paragraphRange->startPosition());
382 spellingOffset += TextIterator::rangeLength(paragraphOffsetAsRange.get());
384 outIsSpelling = true;
385 outFirstFoundOffset = spellingOffset;
386 firstFoundItem = misspelledWord;
389 if (checkGrammar && !badGrammarPhrase.isEmpty()) {
390 int grammarPhraseOffset = grammarPhraseLocation - currentStartOffset;
391 if (!firstIteration) {
392 RefPtrWillBeRawPtr<Range> paragraphOffsetAsRange = Range::create(paragraphRange->startContainer()->document(), m_range->startPosition(), paragraphRange->startPosition());
393 grammarPhraseOffset += TextIterator::rangeLength(paragraphOffsetAsRange.get());
395 outIsSpelling = false;
396 outFirstFoundOffset = grammarPhraseOffset;
397 firstFoundItem = badGrammarPhrase;
402 if (lastIteration || totalLengthProcessed + currentLength >= totalRangeLength)
404 VisiblePosition newParagraphStart = startOfNextParagraph(VisiblePosition(paragraphRange->endPosition()));
405 setStart(paragraphRange.get(), newParagraphStart);
406 setEnd(paragraphRange.get(), endOfParagraph(newParagraphStart));
407 firstIteration = false;
408 totalLengthProcessed += currentLength;
410 return firstFoundItem;
413 int TextCheckingHelper::findFirstGrammarDetail(const Vector<GrammarDetail>& grammarDetails, int badGrammarPhraseLocation, int startOffset, int endOffset, bool markAll) const
415 // Found some bad grammar. Find the earliest detail range that starts in our search range (if any).
416 // Optionally add a DocumentMarker for each detail in the range.
417 int earliestDetailLocationSoFar = -1;
418 int earliestDetailIndex = -1;
419 for (unsigned i = 0; i < grammarDetails.size(); i++) {
420 const GrammarDetail* detail = &grammarDetails[i];
421 ASSERT(detail->length > 0 && detail->location >= 0);
423 int detailStartOffsetInParagraph = badGrammarPhraseLocation + detail->location;
425 // Skip this detail if it starts before the original search range
426 if (detailStartOffsetInParagraph < startOffset)
429 // Skip this detail if it starts after the original search range
430 if (detailStartOffsetInParagraph >= endOffset)
434 RefPtrWillBeRawPtr<Range> badGrammarRange = TextIterator::subrange(m_range.get(), badGrammarPhraseLocation - startOffset + detail->location, detail->length);
435 badGrammarRange->startContainer()->document().markers().addMarker(badGrammarRange.get(), DocumentMarker::Grammar, detail->userDescription);
438 // Remember this detail only if it's earlier than our current candidate (the details aren't in a guaranteed order)
439 if (earliestDetailIndex < 0 || earliestDetailLocationSoFar > detail->location) {
440 earliestDetailIndex = i;
441 earliestDetailLocationSoFar = detail->location;
445 return earliestDetailIndex;
448 String TextCheckingHelper::findFirstBadGrammar(GrammarDetail& outGrammarDetail, int& outGrammarPhraseOffset, bool markAll)
450 // Initialize out parameters; these will be updated if we find something to return.
451 outGrammarDetail.location = -1;
452 outGrammarDetail.length = 0;
453 outGrammarDetail.guesses.clear();
454 outGrammarDetail.userDescription = "";
455 outGrammarPhraseOffset = 0;
457 String firstBadGrammarPhrase;
459 // Expand the search range to encompass entire paragraphs, since grammar checking needs that much context.
460 // Determine the character offset from the start of the paragraph to the start of the original search range,
461 // since we will want to ignore results in this area.
462 TextCheckingParagraph paragraph(m_range);
464 // Start checking from beginning of paragraph, but skip past results that occur before the start of the original search range.
466 while (startOffset < paragraph.checkingEnd()) {
467 Vector<GrammarDetail> grammarDetails;
468 int badGrammarPhraseLocation = -1;
469 int badGrammarPhraseLength = 0;
470 m_client->textChecker().checkGrammarOfString(paragraph.textSubstring(startOffset), grammarDetails, &badGrammarPhraseLocation, &badGrammarPhraseLength);
472 if (!badGrammarPhraseLength) {
473 ASSERT(badGrammarPhraseLocation == -1);
477 ASSERT(badGrammarPhraseLocation >= 0);
478 badGrammarPhraseLocation += startOffset;
481 // Found some bad grammar. Find the earliest detail range that starts in our search range (if any).
482 int badGrammarIndex = findFirstGrammarDetail(grammarDetails, badGrammarPhraseLocation, paragraph.checkingStart(), paragraph.checkingEnd(), markAll);
483 if (badGrammarIndex >= 0) {
484 ASSERT(static_cast<unsigned>(badGrammarIndex) < grammarDetails.size());
485 outGrammarDetail = grammarDetails[badGrammarIndex];
488 // If we found a detail in range, then we have found the first bad phrase (unless we found one earlier but
489 // kept going so we could mark all instances).
490 if (badGrammarIndex >= 0 && firstBadGrammarPhrase.isEmpty()) {
491 outGrammarPhraseOffset = badGrammarPhraseLocation - paragraph.checkingStart();
492 firstBadGrammarPhrase = paragraph.textSubstring(badGrammarPhraseLocation, badGrammarPhraseLength);
494 // Found one. We're done now, unless we're marking each instance.
499 // These results were all between the start of the paragraph and the start of the search range; look
500 // beyond this phrase.
501 startOffset = badGrammarPhraseLocation + badGrammarPhraseLength;
504 return firstBadGrammarPhrase;
507 void TextCheckingHelper::markAllMisspellings(RefPtrWillBeRawPtr<Range>& firstMisspellingRange)
509 // Use the "markAll" feature of findFirstMisspelling. Ignore the return value and the "out parameter";
510 // all we need to do is mark every instance.
512 findFirstMisspelling(ignoredOffset, true, firstMisspellingRange);
515 void TextCheckingHelper::markAllBadGrammar()
517 // Use the "markAll" feature of ofindFirstBadGrammar. Ignore the return value and "out parameters"; all we need to
518 // do is mark every instance.
519 GrammarDetail ignoredGrammarDetail;
521 findFirstBadGrammar(ignoredGrammarDetail, ignoredOffset, true);
524 bool TextCheckingHelper::unifiedTextCheckerEnabled() const
529 Document& doc = m_range->ownerDocument();
530 return blink::unifiedTextCheckerEnabled(doc.frame());
533 void checkTextOfParagraph(TextCheckerClient& client, const String& text, TextCheckingTypeMask checkingTypes, Vector<TextCheckingResult>& results)
535 Vector<UChar> characters;
536 text.appendTo(characters);
537 unsigned length = text.length();
539 Vector<TextCheckingResult> spellingResult;
540 if (checkingTypes & TextCheckingTypeSpelling)
541 findMisspellings(client, characters.data(), 0, length, spellingResult);
543 Vector<TextCheckingResult> grammarResult;
544 if (checkingTypes & TextCheckingTypeGrammar) {
545 // Only checks grammartical error before the first misspellings
546 int grammarCheckLength = length;
547 for (size_t i = 0; i < spellingResult.size(); ++i) {
548 if (spellingResult[i].location < grammarCheckLength)
549 grammarCheckLength = spellingResult[i].location;
552 findBadGrammars(client, characters.data(), 0, grammarCheckLength, grammarResult);
555 if (grammarResult.size())
556 results.swap(grammarResult);
558 if (spellingResult.size()) {
559 if (results.isEmpty())
560 results.swap(spellingResult);
562 results.appendVector(spellingResult);
566 bool unifiedTextCheckerEnabled(const LocalFrame* frame)
571 const Settings* settings = frame->settings();
575 return settings->unifiedTextCheckerEnabled();