- add sources.
[platform/framework/web/crosswalk.git] / src / base / i18n / break_iterator_unittest.cc
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/i18n/break_iterator.h"
6
7 #include "base/strings/string_piece.h"
8 #include "base/strings/stringprintf.h"
9 #include "base/strings/utf_string_conversions.h"
10 #include "testing/gtest/include/gtest/gtest.h"
11
12 namespace base {
13 namespace i18n {
14
15 TEST(BreakIteratorTest, BreakWordEmpty) {
16   string16 empty;
17   BreakIterator iter(empty, BreakIterator::BREAK_WORD);
18   ASSERT_TRUE(iter.Init());
19   EXPECT_FALSE(iter.Advance());
20   EXPECT_FALSE(iter.IsWord());
21   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
22   EXPECT_FALSE(iter.IsWord());
23 }
24
25 TEST(BreakIteratorTest, BreakWord) {
26   string16 space(UTF8ToUTF16(" "));
27   string16 str(UTF8ToUTF16(" foo bar! \npouet boom"));
28   BreakIterator iter(str, BreakIterator::BREAK_WORD);
29   ASSERT_TRUE(iter.Init());
30   EXPECT_TRUE(iter.Advance());
31   EXPECT_FALSE(iter.IsWord());
32   EXPECT_EQ(space, iter.GetString());
33   EXPECT_TRUE(iter.Advance());
34   EXPECT_TRUE(iter.IsWord());
35   EXPECT_EQ(UTF8ToUTF16("foo"), iter.GetString());
36   EXPECT_TRUE(iter.Advance());
37   EXPECT_FALSE(iter.IsWord());
38   EXPECT_EQ(space, iter.GetString());
39   EXPECT_TRUE(iter.Advance());
40   EXPECT_TRUE(iter.IsWord());
41   EXPECT_EQ(UTF8ToUTF16("bar"), iter.GetString());
42   EXPECT_TRUE(iter.Advance());
43   EXPECT_FALSE(iter.IsWord());
44   EXPECT_EQ(UTF8ToUTF16("!"), iter.GetString());
45   EXPECT_TRUE(iter.Advance());
46   EXPECT_FALSE(iter.IsWord());
47   EXPECT_EQ(space, iter.GetString());
48   EXPECT_TRUE(iter.Advance());
49   EXPECT_FALSE(iter.IsWord());
50   EXPECT_EQ(UTF8ToUTF16("\n"), iter.GetString());
51   EXPECT_TRUE(iter.Advance());
52   EXPECT_TRUE(iter.IsWord());
53   EXPECT_EQ(UTF8ToUTF16("pouet"), iter.GetString());
54   EXPECT_TRUE(iter.Advance());
55   EXPECT_FALSE(iter.IsWord());
56   EXPECT_EQ(space, iter.GetString());
57   EXPECT_TRUE(iter.Advance());
58   EXPECT_TRUE(iter.IsWord());
59   EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString());
60   EXPECT_FALSE(iter.Advance());
61   EXPECT_FALSE(iter.IsWord());
62   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
63   EXPECT_FALSE(iter.IsWord());
64 }
65
66 TEST(BreakIteratorTest, BreakWide16) {
67   // Two greek words separated by space.
68   const string16 str(WideToUTF16(
69       L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
70       L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2"));
71   const string16 word1(str.substr(0, 10));
72   const string16 word2(str.substr(11, 5));
73   BreakIterator iter(str, BreakIterator::BREAK_WORD);
74   ASSERT_TRUE(iter.Init());
75   EXPECT_TRUE(iter.Advance());
76   EXPECT_TRUE(iter.IsWord());
77   EXPECT_EQ(word1, iter.GetString());
78   EXPECT_TRUE(iter.Advance());
79   EXPECT_FALSE(iter.IsWord());
80   EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
81   EXPECT_TRUE(iter.Advance());
82   EXPECT_TRUE(iter.IsWord());
83   EXPECT_EQ(word2, iter.GetString());
84   EXPECT_FALSE(iter.Advance());
85   EXPECT_FALSE(iter.IsWord());
86   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
87   EXPECT_FALSE(iter.IsWord());
88 }
89
90 TEST(BreakIteratorTest, BreakWide32) {
91   // U+1D49C MATHEMATICAL SCRIPT CAPITAL A
92   const char* very_wide_char = "\xF0\x9D\x92\x9C";
93   const string16 str(
94       UTF8ToUTF16(base::StringPrintf("%s a", very_wide_char)));
95   const string16 very_wide_word(str.substr(0, 2));
96
97   BreakIterator iter(str, BreakIterator::BREAK_WORD);
98   ASSERT_TRUE(iter.Init());
99   EXPECT_TRUE(iter.Advance());
100   EXPECT_TRUE(iter.IsWord());
101   EXPECT_EQ(very_wide_word, iter.GetString());
102   EXPECT_TRUE(iter.Advance());
103   EXPECT_FALSE(iter.IsWord());
104   EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
105   EXPECT_TRUE(iter.Advance());
106   EXPECT_TRUE(iter.IsWord());
107   EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString());
108   EXPECT_FALSE(iter.Advance());
109   EXPECT_FALSE(iter.IsWord());
110   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
111   EXPECT_FALSE(iter.IsWord());
112 }
113
114 TEST(BreakIteratorTest, BreakSpaceEmpty) {
115   string16 empty;
116   BreakIterator iter(empty, BreakIterator::BREAK_SPACE);
117   ASSERT_TRUE(iter.Init());
118   EXPECT_FALSE(iter.Advance());
119   EXPECT_FALSE(iter.IsWord());
120   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
121   EXPECT_FALSE(iter.IsWord());
122 }
123
124 TEST(BreakIteratorTest, BreakSpace) {
125   string16 str(UTF8ToUTF16(" foo bar! \npouet boom"));
126   BreakIterator iter(str, BreakIterator::BREAK_SPACE);
127   ASSERT_TRUE(iter.Init());
128   EXPECT_TRUE(iter.Advance());
129   EXPECT_FALSE(iter.IsWord());
130   EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
131   EXPECT_TRUE(iter.Advance());
132   EXPECT_FALSE(iter.IsWord());
133   EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString());
134   EXPECT_TRUE(iter.Advance());
135   EXPECT_FALSE(iter.IsWord());
136   EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString());
137   EXPECT_TRUE(iter.Advance());
138   EXPECT_FALSE(iter.IsWord());
139   EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString());
140   EXPECT_TRUE(iter.Advance());
141   EXPECT_FALSE(iter.IsWord());
142   EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString());
143   EXPECT_FALSE(iter.Advance());
144   EXPECT_FALSE(iter.IsWord());
145   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
146   EXPECT_FALSE(iter.IsWord());
147 }
148
149 TEST(BreakIteratorTest, BreakSpaceSP) {
150   string16 str(UTF8ToUTF16(" foo bar! \npouet boom "));
151   BreakIterator iter(str, BreakIterator::BREAK_SPACE);
152   ASSERT_TRUE(iter.Init());
153   EXPECT_TRUE(iter.Advance());
154   EXPECT_FALSE(iter.IsWord());
155   EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
156   EXPECT_TRUE(iter.Advance());
157   EXPECT_FALSE(iter.IsWord());
158   EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString());
159   EXPECT_TRUE(iter.Advance());
160   EXPECT_FALSE(iter.IsWord());
161   EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString());
162   EXPECT_TRUE(iter.Advance());
163   EXPECT_FALSE(iter.IsWord());
164   EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString());
165   EXPECT_TRUE(iter.Advance());
166   EXPECT_FALSE(iter.IsWord());
167   EXPECT_EQ(UTF8ToUTF16("boom "), iter.GetString());
168   EXPECT_FALSE(iter.Advance());
169   EXPECT_FALSE(iter.IsWord());
170   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
171   EXPECT_FALSE(iter.IsWord());
172 }
173
174 TEST(BreakIteratorTest, BreakSpacekWide16) {
175   // Two Greek words.
176   const string16 str(WideToUTF16(
177       L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
178       L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2"));
179   const string16 word1(str.substr(0, 11));
180   const string16 word2(str.substr(11, 5));
181   BreakIterator iter(str, BreakIterator::BREAK_SPACE);
182   ASSERT_TRUE(iter.Init());
183   EXPECT_TRUE(iter.Advance());
184   EXPECT_FALSE(iter.IsWord());
185   EXPECT_EQ(word1, iter.GetString());
186   EXPECT_TRUE(iter.Advance());
187   EXPECT_FALSE(iter.IsWord());
188   EXPECT_EQ(word2, iter.GetString());
189   EXPECT_FALSE(iter.Advance());
190   EXPECT_FALSE(iter.IsWord());
191   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
192   EXPECT_FALSE(iter.IsWord());
193 }
194
195 TEST(BreakIteratorTest, BreakSpaceWide32) {
196   // U+1D49C MATHEMATICAL SCRIPT CAPITAL A
197   const char* very_wide_char = "\xF0\x9D\x92\x9C";
198   const string16 str(
199       UTF8ToUTF16(base::StringPrintf("%s a", very_wide_char)));
200   const string16 very_wide_word(str.substr(0, 3));
201
202   BreakIterator iter(str, BreakIterator::BREAK_SPACE);
203   ASSERT_TRUE(iter.Init());
204   EXPECT_TRUE(iter.Advance());
205   EXPECT_FALSE(iter.IsWord());
206   EXPECT_EQ(very_wide_word, iter.GetString());
207   EXPECT_TRUE(iter.Advance());
208   EXPECT_FALSE(iter.IsWord());
209   EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString());
210   EXPECT_FALSE(iter.Advance());
211   EXPECT_FALSE(iter.IsWord());
212   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
213   EXPECT_FALSE(iter.IsWord());
214 }
215
216 TEST(BreakIteratorTest, BreakLineEmpty) {
217   string16 empty;
218   BreakIterator iter(empty, BreakIterator::BREAK_NEWLINE);
219   ASSERT_TRUE(iter.Init());
220   EXPECT_FALSE(iter.Advance());
221   EXPECT_FALSE(iter.IsWord());
222   EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
223   EXPECT_FALSE(iter.IsWord());
224 }
225
226 TEST(BreakIteratorTest, BreakLine) {
227   string16 nl(UTF8ToUTF16("\n"));
228   string16 str(UTF8ToUTF16("\nfoo bar!\n\npouet boom"));
229   BreakIterator iter(str, BreakIterator::BREAK_NEWLINE);
230   ASSERT_TRUE(iter.Init());
231   EXPECT_TRUE(iter.Advance());
232   EXPECT_FALSE(iter.IsWord());
233   EXPECT_EQ(nl, iter.GetString());
234   EXPECT_TRUE(iter.Advance());
235   EXPECT_FALSE(iter.IsWord());
236   EXPECT_EQ(UTF8ToUTF16("foo bar!\n"), iter.GetString());
237   EXPECT_TRUE(iter.Advance());
238   EXPECT_FALSE(iter.IsWord());
239   EXPECT_EQ(nl, iter.GetString());
240   EXPECT_TRUE(iter.Advance());
241   EXPECT_FALSE(iter.IsWord());
242   EXPECT_EQ(UTF8ToUTF16("pouet boom"), iter.GetString());
243   EXPECT_FALSE(iter.Advance());
244   EXPECT_FALSE(iter.IsWord());
245   EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
246   EXPECT_FALSE(iter.IsWord());
247 }
248
249 TEST(BreakIteratorTest, BreakLineNL) {
250   string16 nl(UTF8ToUTF16("\n"));
251   string16 str(UTF8ToUTF16("\nfoo bar!\n\npouet boom\n"));
252   BreakIterator iter(str, BreakIterator::BREAK_NEWLINE);
253   ASSERT_TRUE(iter.Init());
254   EXPECT_TRUE(iter.Advance());
255   EXPECT_FALSE(iter.IsWord());
256   EXPECT_EQ(nl, iter.GetString());
257   EXPECT_TRUE(iter.Advance());
258   EXPECT_FALSE(iter.IsWord());
259   EXPECT_EQ(UTF8ToUTF16("foo bar!\n"), iter.GetString());
260   EXPECT_TRUE(iter.Advance());
261   EXPECT_FALSE(iter.IsWord());
262   EXPECT_EQ(nl, iter.GetString());
263   EXPECT_TRUE(iter.Advance());
264   EXPECT_FALSE(iter.IsWord());
265   EXPECT_EQ(UTF8ToUTF16("pouet boom\n"), iter.GetString());
266   EXPECT_FALSE(iter.Advance());
267   EXPECT_FALSE(iter.IsWord());
268   EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
269   EXPECT_FALSE(iter.IsWord());
270 }
271
272 TEST(BreakIteratorTest, BreakLineWide16) {
273   // Two Greek words separated by newline.
274   const string16 str(WideToUTF16(
275       L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
276       L"\x03bf\x03c2\x000a\x0399\x03c3\x03c4\x03cc\x03c2"));
277   const string16 line1(str.substr(0, 11));
278   const string16 line2(str.substr(11, 5));
279   BreakIterator iter(str, BreakIterator::BREAK_NEWLINE);
280   ASSERT_TRUE(iter.Init());
281   EXPECT_TRUE(iter.Advance());
282   EXPECT_FALSE(iter.IsWord());
283   EXPECT_EQ(line1, iter.GetString());
284   EXPECT_TRUE(iter.Advance());
285   EXPECT_FALSE(iter.IsWord());
286   EXPECT_EQ(line2, iter.GetString());
287   EXPECT_FALSE(iter.Advance());
288   EXPECT_FALSE(iter.IsWord());
289   EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
290   EXPECT_FALSE(iter.IsWord());
291 }
292
293 TEST(BreakIteratorTest, BreakLineWide32) {
294   // U+1D49C MATHEMATICAL SCRIPT CAPITAL A
295   const char* very_wide_char = "\xF0\x9D\x92\x9C";
296   const string16 str(
297       UTF8ToUTF16(base::StringPrintf("%s\na", very_wide_char)));
298   const string16 very_wide_line(str.substr(0, 3));
299   BreakIterator iter(str, BreakIterator::BREAK_NEWLINE);
300   ASSERT_TRUE(iter.Init());
301   EXPECT_TRUE(iter.Advance());
302   EXPECT_FALSE(iter.IsWord());
303   EXPECT_EQ(very_wide_line, iter.GetString());
304   EXPECT_TRUE(iter.Advance());
305   EXPECT_FALSE(iter.IsWord());
306   EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString());
307   EXPECT_FALSE(iter.Advance());
308   EXPECT_FALSE(iter.IsWord());
309   EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
310   EXPECT_FALSE(iter.IsWord());
311 }
312
313 TEST(BreakIteratorTest, BreakCharacter) {
314   static const wchar_t* kCharacters[] = {
315     // An English word consisting of four ASCII characters.
316     L"w", L"o", L"r", L"d", L" ",
317     // A Hindi word (which means "Hindi") consisting of three Devanagari
318     // characters.
319     L"\x0939\x093F", L"\x0928\x094D", L"\x0926\x0940", L" ",
320     // A Thai word (which means "feel") consisting of three Thai characters.
321     L"\x0E23\x0E39\x0E49", L"\x0E2A\x0E36", L"\x0E01", L" ",
322   };
323   std::vector<string16> characters;
324   string16 text;
325   for (size_t i = 0; i < arraysize(kCharacters); ++i) {
326     characters.push_back(WideToUTF16(kCharacters[i]));
327     text.append(characters.back());
328   }
329   BreakIterator iter(text, BreakIterator::BREAK_CHARACTER);
330   ASSERT_TRUE(iter.Init());
331   for (size_t i = 0; i < arraysize(kCharacters); ++i) {
332     EXPECT_TRUE(iter.Advance());
333     EXPECT_EQ(characters[i], iter.GetString());
334   }
335 }
336
337 }  // namespace i18n
338 }  // namespace base