Upstream version 7.36.149.0
[platform/framework/web/crosswalk.git] / src / third_party / WebKit / Source / platform / text / TextBreakIteratorICU.cpp
1 /*
2  * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
3  * Copyright (C) 2007, 2011, 2012 Apple Inc. All rights reserved.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public License
16  * along with this library; see the file COPYING.LIB.  If not, write to
17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  * Boston, MA 02110-1301, USA.
19  *
20  */
21
22 #include "config.h"
23 #include "platform/text/TextBreakIterator.h"
24
25 #include "platform/text/TextBreakIteratorInternalICU.h"
26 #include "wtf/Assertions.h"
27 #include "wtf/HashMap.h"
28 #include "wtf/PassOwnPtr.h"
29 #include "wtf/ThreadSpecific.h"
30 #include "wtf/ThreadingPrimitives.h"
31 #include "wtf/text/AtomicString.h"
32 #include "wtf/text/CString.h"
33 #include "wtf/text/WTFString.h"
34 #include <unicode/rbbi.h>
35 #include <unicode/ubrk.h>
36
37 using namespace WTF;
38 using namespace std;
39
40 namespace WebCore {
41
42 class LineBreakIteratorPool {
43     WTF_MAKE_NONCOPYABLE(LineBreakIteratorPool);
44 public:
45     static LineBreakIteratorPool& sharedPool()
46     {
47         static WTF::ThreadSpecific<LineBreakIteratorPool>* pool = new WTF::ThreadSpecific<LineBreakIteratorPool>;
48         return **pool;
49     }
50
51     static PassOwnPtr<LineBreakIteratorPool> create() { return adoptPtr(new LineBreakIteratorPool); }
52
53     icu::BreakIterator* take(const AtomicString& locale)
54     {
55         icu::BreakIterator* iterator = 0;
56         for (size_t i = 0; i < m_pool.size(); ++i) {
57             if (m_pool[i].first == locale) {
58                 iterator = m_pool[i].second;
59                 m_pool.remove(i);
60                 break;
61             }
62         }
63
64         if (!iterator) {
65             UErrorCode openStatus = U_ZERO_ERROR;
66             bool localeIsEmpty = locale.isEmpty();
67             iterator = icu::BreakIterator::createLineInstance(localeIsEmpty ? icu::Locale(currentTextBreakLocaleID()) : icu::Locale(locale.utf8().data()), openStatus);
68             // locale comes from a web page and it can be invalid, leading ICU
69             // to fail, in which case we fall back to the default locale.
70             if (!localeIsEmpty && U_FAILURE(openStatus)) {
71                 openStatus = U_ZERO_ERROR;
72                 iterator = icu::BreakIterator::createLineInstance(icu::Locale(currentTextBreakLocaleID()), openStatus);
73             }
74
75             if (U_FAILURE(openStatus)) {
76                 WTF_LOG_ERROR("icu::BreakIterator construction failed with status %d", openStatus);
77                 return 0;
78             }
79         }
80
81         ASSERT(!m_vendedIterators.contains(iterator));
82         m_vendedIterators.set(iterator, locale);
83         return iterator;
84     }
85
86     void put(icu::BreakIterator* iterator)
87     {
88         ASSERT_ARG(iterator, m_vendedIterators.contains(iterator));
89
90         if (m_pool.size() == capacity) {
91             delete(m_pool[0].second);
92             m_pool.remove(0);
93         }
94
95         m_pool.append(Entry(m_vendedIterators.take(iterator), iterator));
96     }
97
98 private:
99     LineBreakIteratorPool() { }
100
101     static const size_t capacity = 4;
102
103     typedef pair<AtomicString, icu::BreakIterator*> Entry;
104     typedef Vector<Entry, capacity> Pool;
105     Pool m_pool;
106     HashMap<icu::BreakIterator*, AtomicString> m_vendedIterators;
107
108     friend WTF::ThreadSpecific<LineBreakIteratorPool>::operator LineBreakIteratorPool*();
109 };
110
111 enum TextContext { NoContext, PriorContext, PrimaryContext };
112
113 const int textBufferCapacity = 16;
114
115 typedef struct {
116     UText text;
117     UChar buffer[textBufferCapacity];
118 } UTextWithBuffer;
119
120 static inline int64_t textPinIndex(int64_t& index, int64_t limit)
121 {
122     if (index < 0)
123         index = 0;
124     else if (index > limit)
125         index = limit;
126     return index;
127 }
128
129 static inline int64_t textNativeLength(UText* text)
130 {
131     return text->a + text->b;
132 }
133
134 // Relocate pointer from source into destination as required.
135 static void textFixPointer(const UText* source, UText* destination, const void*& pointer)
136 {
137     if (pointer >= source->pExtra && pointer < static_cast<char*>(source->pExtra) + source->extraSize) {
138         // Pointer references source extra buffer.
139         pointer = static_cast<char*>(destination->pExtra) + (static_cast<const char*>(pointer) - static_cast<const char*>(source->pExtra));
140     } else if (pointer >= source && pointer < reinterpret_cast<const char*>(source) + source->sizeOfStruct) {
141         // Pointer references source text structure, but not source extra buffer.
142         pointer = reinterpret_cast<char*>(destination) + (static_cast<const char*>(pointer) - reinterpret_cast<const char*>(source));
143     }
144 }
145
146 static UText* textClone(UText* destination, const UText* source, UBool deep, UErrorCode* status)
147 {
148     ASSERT_UNUSED(deep, !deep);
149     if (U_FAILURE(*status))
150         return 0;
151     int32_t extraSize = source->extraSize;
152     destination = utext_setup(destination, extraSize, status);
153     if (U_FAILURE(*status))
154         return destination;
155     void* extraNew = destination->pExtra;
156     int32_t flags = destination->flags;
157     int sizeToCopy = min(source->sizeOfStruct, destination->sizeOfStruct);
158     memcpy(destination, source, sizeToCopy);
159     destination->pExtra = extraNew;
160     destination->flags = flags;
161     memcpy(destination->pExtra, source->pExtra, extraSize);
162     textFixPointer(source, destination, destination->context);
163     textFixPointer(source, destination, destination->p);
164     textFixPointer(source, destination, destination->q);
165     ASSERT(!destination->r);
166     const void * chunkContents = static_cast<const void*>(destination->chunkContents);
167     textFixPointer(source, destination, chunkContents);
168     destination->chunkContents = static_cast<const UChar*>(chunkContents);
169     return destination;
170 }
171
172 static int32_t textExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode* errorCode)
173 {
174     // In the present context, this text provider is used only with ICU functions
175     // that do not perform an extract operation.
176     ASSERT_NOT_REACHED();
177     *errorCode = U_UNSUPPORTED_ERROR;
178     return 0;
179 }
180
181 static void textClose(UText* text)
182 {
183     text->context = 0;
184 }
185
186 static inline TextContext textGetContext(const UText* text, int64_t nativeIndex, UBool forward)
187 {
188     if (!text->b || nativeIndex > text->b)
189         return PrimaryContext;
190     if (nativeIndex == text->b)
191         return forward ? PrimaryContext : PriorContext;
192     return PriorContext;
193 }
194
195 static inline TextContext textLatin1GetCurrentContext(const UText* text)
196 {
197     if (!text->chunkContents)
198         return NoContext;
199     return text->chunkContents == text->pExtra ? PrimaryContext : PriorContext;
200 }
201
202 static void textLatin1MoveInPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
203 {
204     ASSERT(text->chunkContents == text->pExtra);
205     if (forward) {
206         ASSERT(nativeIndex >= text->b && nativeIndex < nativeLength);
207         text->chunkNativeStart = nativeIndex;
208         text->chunkNativeLimit = nativeIndex + text->extraSize / sizeof(UChar);
209         if (text->chunkNativeLimit > nativeLength)
210             text->chunkNativeLimit = nativeLength;
211     } else {
212         ASSERT(nativeIndex > text->b && nativeIndex <= nativeLength);
213         text->chunkNativeLimit = nativeIndex;
214         text->chunkNativeStart = nativeIndex - text->extraSize / sizeof(UChar);
215         if (text->chunkNativeStart < text->b)
216             text->chunkNativeStart = text->b;
217     }
218     int64_t length = text->chunkNativeLimit - text->chunkNativeStart;
219     // Ensure chunk length is well defined if computed length exceeds int32_t range.
220     ASSERT(length <= numeric_limits<int32_t>::max());
221     text->chunkLength = length <= numeric_limits<int32_t>::max() ? static_cast<int32_t>(length) : 0;
222     text->nativeIndexingLimit = text->chunkLength;
223     text->chunkOffset = forward ? 0 : text->chunkLength;
224     StringImpl::copyChars(const_cast<UChar*>(text->chunkContents), static_cast<const LChar*>(text->p) + (text->chunkNativeStart - text->b), static_cast<unsigned>(text->chunkLength));
225 }
226
227 static void textLatin1SwitchToPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
228 {
229     ASSERT(!text->chunkContents || text->chunkContents == text->q);
230     text->chunkContents = static_cast<const UChar*>(text->pExtra);
231     textLatin1MoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
232 }
233
234 static void textLatin1MoveInPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
235 {
236     ASSERT(text->chunkContents == text->q);
237     ASSERT(forward ? nativeIndex < text->b : nativeIndex <= text->b);
238     ASSERT_UNUSED(nativeLength, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
239     ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
240     text->chunkNativeStart = 0;
241     text->chunkNativeLimit = text->b;
242     text->chunkLength = text->b;
243     text->nativeIndexingLimit = text->chunkLength;
244     int64_t offset = nativeIndex - text->chunkNativeStart;
245     // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length.
246     ASSERT(offset <= numeric_limits<int32_t>::max());
247     text->chunkOffset = min(offset <= numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength);
248 }
249
250 static void textLatin1SwitchToPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
251 {
252     ASSERT(!text->chunkContents || text->chunkContents == text->pExtra);
253     text->chunkContents = static_cast<const UChar*>(text->q);
254     textLatin1MoveInPriorContext(text, nativeIndex, nativeLength, forward);
255 }
256
257 static inline bool textInChunkOrOutOfRange(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward, UBool& isAccessible)
258 {
259     if (forward) {
260         if (nativeIndex >= text->chunkNativeStart && nativeIndex < text->chunkNativeLimit) {
261             int64_t offset = nativeIndex - text->chunkNativeStart;
262             // Ensure chunk offset is well formed if computed offset exceeds int32_t range.
263             ASSERT(offset <= numeric_limits<int32_t>::max());
264             text->chunkOffset = offset <= numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0;
265             isAccessible = TRUE;
266             return true;
267         }
268         if (nativeIndex >= nativeLength && text->chunkNativeLimit == nativeLength) {
269             text->chunkOffset = text->chunkLength;
270             isAccessible = FALSE;
271             return true;
272         }
273     } else {
274         if (nativeIndex > text->chunkNativeStart && nativeIndex <= text->chunkNativeLimit) {
275             int64_t offset = nativeIndex - text->chunkNativeStart;
276             // Ensure chunk offset is well formed if computed offset exceeds int32_t range.
277             ASSERT(offset <= numeric_limits<int32_t>::max());
278             text->chunkOffset = offset <= numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0;
279             isAccessible = TRUE;
280             return true;
281         }
282         if (nativeIndex <= 0 && !text->chunkNativeStart) {
283             text->chunkOffset = 0;
284             isAccessible = FALSE;
285             return true;
286         }
287     }
288     return false;
289 }
290
291 static UBool textLatin1Access(UText* text, int64_t nativeIndex, UBool forward)
292 {
293     if (!text->context)
294         return FALSE;
295     int64_t nativeLength = textNativeLength(text);
296     UBool isAccessible;
297     if (textInChunkOrOutOfRange(text, nativeIndex, nativeLength, forward, isAccessible))
298         return isAccessible;
299     nativeIndex = textPinIndex(nativeIndex, nativeLength - 1);
300     TextContext currentContext = textLatin1GetCurrentContext(text);
301     TextContext newContext = textGetContext(text, nativeIndex, forward);
302     ASSERT(newContext != NoContext);
303     if (newContext == currentContext) {
304         if (currentContext == PrimaryContext) {
305             textLatin1MoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
306         } else {
307             textLatin1MoveInPriorContext(text, nativeIndex, nativeLength, forward);
308         }
309     } else if (newContext == PrimaryContext) {
310         textLatin1SwitchToPrimaryContext(text, nativeIndex, nativeLength, forward);
311     } else {
312         ASSERT(newContext == PriorContext);
313         textLatin1SwitchToPriorContext(text, nativeIndex, nativeLength, forward);
314     }
315     return TRUE;
316 }
317
318 static const struct UTextFuncs textLatin1Funcs = {
319     sizeof(UTextFuncs),
320     0, 0, 0,
321     textClone,
322     textNativeLength,
323     textLatin1Access,
324     textExtract,
325     0, 0, 0, 0,
326     textClose,
327     0, 0, 0,
328 };
329
330 static void textInit(UText* text, const UTextFuncs* funcs, const void* string, unsigned length, const UChar* priorContext, int priorContextLength)
331 {
332     text->pFuncs = funcs;
333     text->providerProperties = 1 << UTEXT_PROVIDER_STABLE_CHUNKS;
334     text->context = string;
335     text->p = string;
336     text->a = length;
337     text->q = priorContext;
338     text->b = priorContextLength;
339 }
340
341 static UText* textOpenLatin1(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status)
342 {
343     if (U_FAILURE(*status))
344         return 0;
345
346     if (!string || length > static_cast<unsigned>(numeric_limits<int32_t>::max())) {
347         *status = U_ILLEGAL_ARGUMENT_ERROR;
348         return 0;
349     }
350     UText* text = utext_setup(&utWithBuffer->text, sizeof(utWithBuffer->buffer), status);
351     if (U_FAILURE(*status)) {
352         ASSERT(!text);
353         return 0;
354     }
355     textInit(text, &textLatin1Funcs, string, length, priorContext, priorContextLength);
356     return text;
357 }
358
359 static inline TextContext textUTF16GetCurrentContext(const UText* text)
360 {
361     if (!text->chunkContents)
362         return NoContext;
363     return text->chunkContents == text->p ? PrimaryContext : PriorContext;
364 }
365
366 static void textUTF16MoveInPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
367 {
368     ASSERT(text->chunkContents == text->p);
369     ASSERT_UNUSED(forward, forward ? nativeIndex >= text->b : nativeIndex > text->b);
370     ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
371     text->chunkNativeStart = text->b;
372     text->chunkNativeLimit = nativeLength;
373     int64_t length = text->chunkNativeLimit - text->chunkNativeStart;
374     // Ensure chunk length is well defined if computed length exceeds int32_t range.
375     ASSERT(length <= numeric_limits<int32_t>::max());
376     text->chunkLength = length <= numeric_limits<int32_t>::max() ? static_cast<int32_t>(length) : 0;
377     text->nativeIndexingLimit = text->chunkLength;
378     int64_t offset = nativeIndex - text->chunkNativeStart;
379     // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length.
380     ASSERT(offset <= numeric_limits<int32_t>::max());
381     text->chunkOffset = min(offset <= numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength);
382 }
383
384 static void textUTF16SwitchToPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
385 {
386     ASSERT(!text->chunkContents || text->chunkContents == text->q);
387     text->chunkContents = static_cast<const UChar*>(text->p);
388     textUTF16MoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
389 }
390
391 static void textUTF16MoveInPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
392 {
393     ASSERT(text->chunkContents == text->q);
394     ASSERT(forward ? nativeIndex < text->b : nativeIndex <= text->b);
395     ASSERT_UNUSED(nativeLength, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
396     ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
397     text->chunkNativeStart = 0;
398     text->chunkNativeLimit = text->b;
399     text->chunkLength = text->b;
400     text->nativeIndexingLimit = text->chunkLength;
401     int64_t offset = nativeIndex - text->chunkNativeStart;
402     // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length.
403     ASSERT(offset <= numeric_limits<int32_t>::max());
404     text->chunkOffset = min(offset <= numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength);
405 }
406
407 static void textUTF16SwitchToPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
408 {
409     ASSERT(!text->chunkContents || text->chunkContents == text->p);
410     text->chunkContents = static_cast<const UChar*>(text->q);
411     textUTF16MoveInPriorContext(text, nativeIndex, nativeLength, forward);
412 }
413
414 static UBool textUTF16Access(UText* text, int64_t nativeIndex, UBool forward)
415 {
416     if (!text->context)
417         return FALSE;
418     int64_t nativeLength = textNativeLength(text);
419     UBool isAccessible;
420     if (textInChunkOrOutOfRange(text, nativeIndex, nativeLength, forward, isAccessible))
421         return isAccessible;
422     nativeIndex = textPinIndex(nativeIndex, nativeLength - 1);
423     TextContext currentContext = textUTF16GetCurrentContext(text);
424     TextContext newContext = textGetContext(text, nativeIndex, forward);
425     ASSERT(newContext != NoContext);
426     if (newContext == currentContext) {
427         if (currentContext == PrimaryContext) {
428             textUTF16MoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
429         } else {
430             textUTF16MoveInPriorContext(text, nativeIndex, nativeLength, forward);
431         }
432     } else if (newContext == PrimaryContext) {
433         textUTF16SwitchToPrimaryContext(text, nativeIndex, nativeLength, forward);
434     } else {
435         ASSERT(newContext == PriorContext);
436         textUTF16SwitchToPriorContext(text, nativeIndex, nativeLength, forward);
437     }
438     return TRUE;
439 }
440
441 static const struct UTextFuncs textUTF16Funcs = {
442     sizeof(UTextFuncs),
443     0, 0, 0,
444     textClone,
445     textNativeLength,
446     textUTF16Access,
447     textExtract,
448     0, 0, 0, 0,
449     textClose,
450     0, 0, 0,
451 };
452
453 static UText* textOpenUTF16(UText* text, const UChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status)
454 {
455     if (U_FAILURE(*status))
456         return 0;
457
458     if (!string || length > static_cast<unsigned>(numeric_limits<int32_t>::max())) {
459         *status = U_ILLEGAL_ARGUMENT_ERROR;
460         return 0;
461     }
462
463     text = utext_setup(text, 0, status);
464     if (U_FAILURE(*status)) {
465         ASSERT(!text);
466         return 0;
467     }
468     textInit(text, &textUTF16Funcs, string, length, priorContext, priorContextLength);
469     return text;
470 }
471
472 static UText emptyText = UTEXT_INITIALIZER;
473
474 static TextBreakIterator* wordBreakIterator(const LChar* string, int length)
475 {
476     UErrorCode errorCode = U_ZERO_ERROR;
477     static TextBreakIterator* breakIter = 0;
478     if (!breakIter) {
479         breakIter = icu::BreakIterator::createWordInstance(icu::Locale(currentTextBreakLocaleID()), errorCode);
480         ASSERT_WITH_MESSAGE(U_SUCCESS(errorCode), "ICU could not open a break iterator: %s (%d)", u_errorName(errorCode), errorCode);
481         if (!breakIter)
482             return 0;
483     }
484
485     UTextWithBuffer textLocal;
486     textLocal.text = emptyText;
487     textLocal.text.extraSize = sizeof(textLocal.buffer);
488     textLocal.text.pExtra = textLocal.buffer;
489
490     UErrorCode openStatus = U_ZERO_ERROR;
491     UText* text = textOpenLatin1(&textLocal, string, length, 0, 0, &openStatus);
492     if (U_FAILURE(openStatus)) {
493         WTF_LOG_ERROR("textOpenLatin1 failed with status %d", openStatus);
494         return 0;
495     }
496
497     UErrorCode setTextStatus = U_ZERO_ERROR;
498     breakIter->setText(text, setTextStatus);
499     if (U_FAILURE(setTextStatus))
500         WTF_LOG_ERROR("BreakIterator::seText failed with status %d", setTextStatus);
501
502     utext_close(text);
503
504     return breakIter;
505 }
506
507 static void setText16(TextBreakIterator* iter, const UChar* string, int length)
508 {
509     UErrorCode errorCode = U_ZERO_ERROR;
510     UText uText = UTEXT_INITIALIZER;
511     utext_openUChars(&uText, string, length, &errorCode);
512     if (U_FAILURE(errorCode))
513         return;
514     iter->setText(&uText, errorCode);
515 }
516
517 TextBreakIterator* wordBreakIterator(const UChar* string, int length)
518 {
519     UErrorCode errorCode = U_ZERO_ERROR;
520     static TextBreakIterator* breakIter = 0;
521     if (!breakIter) {
522         breakIter = icu::BreakIterator::createWordInstance(icu::Locale(currentTextBreakLocaleID()), errorCode);
523         ASSERT_WITH_MESSAGE(U_SUCCESS(errorCode), "ICU could not open a break iterator: %s (%d)", u_errorName(errorCode), errorCode);
524         if (!breakIter)
525             return 0;
526     }
527     setText16(breakIter, string, length);
528     return breakIter;
529 }
530
531 TextBreakIterator* wordBreakIterator(const String& string, int start, int length)
532 {
533     if (string.isEmpty())
534         return 0;
535     if (string.is8Bit())
536         return wordBreakIterator(string.characters8() + start, length);
537     return wordBreakIterator(string.characters16() + start, length);
538 }
539
540 TextBreakIterator* acquireLineBreakIterator(const LChar* string, int length, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength)
541 {
542     TextBreakIterator* iterator = LineBreakIteratorPool::sharedPool().take(locale);
543     if (!iterator)
544         return 0;
545
546     UTextWithBuffer textLocal;
547     textLocal.text = emptyText;
548     textLocal.text.extraSize = sizeof(textLocal.buffer);
549     textLocal.text.pExtra = textLocal.buffer;
550
551     UErrorCode openStatus = U_ZERO_ERROR;
552     UText* text = textOpenLatin1(&textLocal, string, length, priorContext, priorContextLength, &openStatus);
553     if (U_FAILURE(openStatus)) {
554         WTF_LOG_ERROR("textOpenLatin1 failed with status %d", openStatus);
555         return 0;
556     }
557
558     UErrorCode setTextStatus = U_ZERO_ERROR;
559     iterator->setText(text, setTextStatus);
560     if (U_FAILURE(setTextStatus)) {
561         WTF_LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
562         return 0;
563     }
564
565     utext_close(text);
566
567     return iterator;
568 }
569
570 TextBreakIterator* acquireLineBreakIterator(const UChar* string, int length, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength)
571 {
572     TextBreakIterator* iterator = LineBreakIteratorPool::sharedPool().take(locale);
573     if (!iterator)
574         return 0;
575
576     UText textLocal = UTEXT_INITIALIZER;
577
578     UErrorCode openStatus = U_ZERO_ERROR;
579     UText* text = textOpenUTF16(&textLocal, string, length, priorContext, priorContextLength, &openStatus);
580     if (U_FAILURE(openStatus)) {
581         WTF_LOG_ERROR("textOpenUTF16 failed with status %d", openStatus);
582         return 0;
583     }
584
585     UErrorCode setTextStatus = U_ZERO_ERROR;
586     iterator->setText(text, setTextStatus);
587     if (U_FAILURE(setTextStatus)) {
588         WTF_LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
589         return 0;
590     }
591
592     utext_close(text);
593
594     return iterator;
595 }
596
597 void releaseLineBreakIterator(TextBreakIterator* iterator)
598 {
599     ASSERT_ARG(iterator, iterator);
600
601     LineBreakIteratorPool::sharedPool().put(iterator);
602 }
603
604 static TextBreakIterator* nonSharedCharacterBreakIterator;
605
606 static inline bool compareAndSwapNonSharedCharacterBreakIterator(TextBreakIterator* expected, TextBreakIterator* newValue)
607 {
608     DEFINE_STATIC_LOCAL(Mutex, nonSharedCharacterBreakIteratorMutex, ());
609     MutexLocker locker(nonSharedCharacterBreakIteratorMutex);
610     if (nonSharedCharacterBreakIterator != expected)
611         return false;
612     nonSharedCharacterBreakIterator = newValue;
613     return true;
614 }
615
616 NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(const String& string)
617     : m_is8Bit(true)
618     , m_charaters8(0)
619     , m_offset(0)
620     , m_length(0)
621     , m_iterator(0)
622 {
623     if (string.isEmpty())
624         return;
625
626     m_is8Bit = string.is8Bit();
627
628     if (m_is8Bit) {
629         m_charaters8 = string.characters8();
630         m_offset = 0;
631         m_length = string.length();
632         return;
633     }
634
635     createIteratorForBuffer(string.characters16(), string.length());
636 }
637
638 NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(const UChar* buffer, unsigned length)
639     : m_is8Bit(false)
640     , m_charaters8(0)
641     , m_offset(0)
642     , m_length(0)
643     , m_iterator(0)
644 {
645     createIteratorForBuffer(buffer, length);
646 }
647
648 void NonSharedCharacterBreakIterator::createIteratorForBuffer(const UChar* buffer, unsigned length)
649 {
650     m_iterator = nonSharedCharacterBreakIterator;
651     bool createdIterator = m_iterator && compareAndSwapNonSharedCharacterBreakIterator(m_iterator, 0);
652     if (!createdIterator) {
653         UErrorCode errorCode = U_ZERO_ERROR;
654         m_iterator = icu::BreakIterator::createCharacterInstance(icu::Locale(currentTextBreakLocaleID()), errorCode);
655         ASSERT_WITH_MESSAGE(U_SUCCESS(errorCode), "ICU could not open a break iterator: %s (%d)", u_errorName(errorCode), errorCode);
656     }
657
658     setText16(m_iterator, buffer, length);
659 }
660
661 NonSharedCharacterBreakIterator::~NonSharedCharacterBreakIterator()
662 {
663     if (m_is8Bit)
664         return;
665     if (!compareAndSwapNonSharedCharacterBreakIterator(0, m_iterator))
666         delete m_iterator;
667 }
668
669 int NonSharedCharacterBreakIterator::next()
670 {
671     if (!m_is8Bit)
672         return m_iterator->next();
673
674     if (m_offset >= m_length)
675         return TextBreakDone;
676
677     m_offset += clusterLengthStartingAt(m_offset);
678     return m_offset;
679 }
680
681 int NonSharedCharacterBreakIterator::current()
682 {
683     if (!m_is8Bit)
684         return m_iterator->current();
685     return m_offset;
686 }
687
688 bool NonSharedCharacterBreakIterator::isBreak(int offset) const
689 {
690     if (!m_is8Bit)
691         return m_iterator->isBoundary(offset);
692     return !isLFAfterCR(offset);
693 }
694
695 int NonSharedCharacterBreakIterator::preceding(int offset) const
696 {
697     if (!m_is8Bit)
698         return m_iterator->preceding(offset);
699     if (offset <= 0)
700         return TextBreakDone;
701     if (isLFAfterCR(offset))
702         return offset - 2;
703     return offset - 1;
704 }
705
706 int NonSharedCharacterBreakIterator::following(int offset) const
707 {
708     if (!m_is8Bit)
709         return m_iterator->following(offset);
710     if (static_cast<unsigned>(offset) >= m_length)
711         return TextBreakDone;
712     return offset + clusterLengthStartingAt(offset);
713 }
714
715 TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
716 {
717     UErrorCode openStatus = U_ZERO_ERROR;
718     static TextBreakIterator* iterator = 0;
719     if (!iterator) {
720         iterator =  icu::BreakIterator::createSentenceInstance(icu::Locale(currentTextBreakLocaleID()), openStatus);
721         ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
722         if (!iterator)
723             return 0;
724     }
725
726     setText16(iterator, string, length);
727     return iterator;
728 }
729
730 bool isWordTextBreak(TextBreakIterator* iterator)
731 {
732     icu::RuleBasedBreakIterator* ruleBasedBreakIterator = static_cast<icu::RuleBasedBreakIterator*>(iterator);
733     int ruleStatus = ruleBasedBreakIterator->getRuleStatus();
734     return ruleStatus != UBRK_WORD_NONE;
735 }
736
737 static TextBreakIterator* setUpIteratorWithRules(const char* breakRules, const UChar* string, int length)
738 {
739     if (!string)
740         return 0;
741
742     static TextBreakIterator* iterator = 0;
743     if (!iterator) {
744         UParseError parseStatus;
745         UErrorCode openStatus = U_ZERO_ERROR;
746         Vector<UChar> rules;
747         String(breakRules).appendTo(rules);
748
749         iterator = new icu::RuleBasedBreakIterator(icu::UnicodeString(rules.data(), rules.size()), parseStatus, openStatus);
750         ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
751         if (!iterator)
752             return 0;
753     }
754
755     setText16(iterator, string, length);
756     return iterator;
757 }
758
759 TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
760 {
761     // This rule set is based on character-break iterator rules of ICU 4.0
762     // <http://source.icu-project.org/repos/icu/icu/tags/release-4-0/source/data/brkitr/char.txt>.
763     // The major differences from the original ones are listed below:
764     // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier;
765     // * Removed rules that prevent a cursor from moving after prepend characters (Bug 24342);
766     // * Added rules that prevent a cursor from moving after virama signs of Indic languages except Tamil (Bug 15790), and;
767     // * Added rules that prevent a cursor from moving before Japanese half-width katakara voiced marks.
768     // * Added rules for regional indicator symbols.
769     static const char* const kRules =
770         "$CR      = [\\p{Grapheme_Cluster_Break = CR}];"
771         "$LF      = [\\p{Grapheme_Cluster_Break = LF}];"
772         "$Control = [\\p{Grapheme_Cluster_Break = Control}];"
773         "$VoiceMarks = [\\uFF9E\\uFF9F];"  // Japanese half-width katakana voiced marks
774         "$Extend  = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 \\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];"
775         "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];"
776         "$L       = [\\p{Grapheme_Cluster_Break = L}];"
777         "$V       = [\\p{Grapheme_Cluster_Break = V}];"
778         "$T       = [\\p{Grapheme_Cluster_Break = T}];"
779         "$LV      = [\\p{Grapheme_Cluster_Break = LV}];"
780         "$LVT     = [\\p{Grapheme_Cluster_Break = LVT}];"
781         "$Hin0    = [\\u0905-\\u0939];"    // Devanagari Letter A,...,Ha
782         "$HinV    = \\u094D;"              // Devanagari Sign Virama
783         "$Hin1    = [\\u0915-\\u0939];"    // Devanagari Letter Ka,...,Ha
784         "$Ben0    = [\\u0985-\\u09B9];"    // Bengali Letter A,...,Ha
785         "$BenV    = \\u09CD;"              // Bengali Sign Virama
786         "$Ben1    = [\\u0995-\\u09B9];"    // Bengali Letter Ka,...,Ha
787         "$Pan0    = [\\u0A05-\\u0A39];"    // Gurmukhi Letter A,...,Ha
788         "$PanV    = \\u0A4D;"              // Gurmukhi Sign Virama
789         "$Pan1    = [\\u0A15-\\u0A39];"    // Gurmukhi Letter Ka,...,Ha
790         "$Guj0    = [\\u0A85-\\u0AB9];"    // Gujarati Letter A,...,Ha
791         "$GujV    = \\u0ACD;"              // Gujarati Sign Virama
792         "$Guj1    = [\\u0A95-\\u0AB9];"    // Gujarati Letter Ka,...,Ha
793         "$Ori0    = [\\u0B05-\\u0B39];"    // Oriya Letter A,...,Ha
794         "$OriV    = \\u0B4D;"              // Oriya Sign Virama
795         "$Ori1    = [\\u0B15-\\u0B39];"    // Oriya Letter Ka,...,Ha
796         "$Tel0    = [\\u0C05-\\u0C39];"    // Telugu Letter A,...,Ha
797         "$TelV    = \\u0C4D;"              // Telugu Sign Virama
798         "$Tel1    = [\\u0C14-\\u0C39];"    // Telugu Letter Ka,...,Ha
799         "$Kan0    = [\\u0C85-\\u0CB9];"    // Kannada Letter A,...,Ha
800         "$KanV    = \\u0CCD;"              // Kannada Sign Virama
801         "$Kan1    = [\\u0C95-\\u0CB9];"    // Kannada Letter A,...,Ha
802         "$Mal0    = [\\u0D05-\\u0D39];"    // Malayalam Letter A,...,Ha
803         "$MalV    = \\u0D4D;"              // Malayalam Sign Virama
804         "$Mal1    = [\\u0D15-\\u0D39];"    // Malayalam Letter A,...,Ha
805         "$RI      = [\\U0001F1E6-\\U0001F1FF];" // Emoji regional indicators
806         "!!chain;"
807         "!!forward;"
808         "$CR $LF;"
809         "$L ($L | $V | $LV | $LVT);"
810         "($LV | $V) ($V | $T);"
811         "($LVT | $T) $T;"
812         "[^$Control $CR $LF] $Extend;"
813         "[^$Control $CR $LF] $SpacingMark;"
814         "$RI $RI / $RI;"
815         "$RI $RI;"
816         "$Hin0 $HinV $Hin1;"               // Devanagari Virama (forward)
817         "$Ben0 $BenV $Ben1;"               // Bengali Virama (forward)
818         "$Pan0 $PanV $Pan1;"               // Gurmukhi Virama (forward)
819         "$Guj0 $GujV $Guj1;"               // Gujarati Virama (forward)
820         "$Ori0 $OriV $Ori1;"               // Oriya Virama (forward)
821         "$Tel0 $TelV $Tel1;"               // Telugu Virama (forward)
822         "$Kan0 $KanV $Kan1;"               // Kannada Virama (forward)
823         "$Mal0 $MalV $Mal1;"               // Malayalam Virama (forward)
824         "!!reverse;"
825         "$LF $CR;"
826         "($L | $V | $LV | $LVT) $L;"
827         "($V | $T) ($LV | $V);"
828         "$T ($LVT | $T);"
829         "$Extend      [^$Control $CR $LF];"
830         "$SpacingMark [^$Control $CR $LF];"
831         "$RI $RI / $RI $RI;"
832         "$RI $RI;"
833         "$Hin1 $HinV $Hin0;"               // Devanagari Virama (backward)
834         "$Ben1 $BenV $Ben0;"               // Bengali Virama (backward)
835         "$Pan1 $PanV $Pan0;"               // Gurmukhi Virama (backward)
836         "$Guj1 $GujV $Guj0;"               // Gujarati Virama (backward)
837         "$Ori1 $OriV $Ori0;"               // Gujarati Virama (backward)
838         "$Tel1 $TelV $Tel0;"               // Telugu Virama (backward)
839         "$Kan1 $KanV $Kan0;"               // Kannada Virama (backward)
840         "$Mal1 $MalV $Mal0;"               // Malayalam Virama (backward)
841         "!!safe_reverse;"
842         "!!safe_forward;";
843
844     return setUpIteratorWithRules(kRules, string, length);
845 }
846
847 }