Fix emulator build error
[platform/framework/web/chromium-efl.git] / base / i18n / string_search_unittest.cc
1 // Copyright 2011 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/i18n/string_search.h"
6
7 #include <stddef.h>
8
9 #include <string>
10 #include <vector>
11
12 #include "base/i18n/rtl.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "testing/gtest/include/gtest/gtest.h"
15 #include "third_party/icu/source/i18n/unicode/usearch.h"
16
17 namespace base {
18 namespace i18n {
19
20 #define EXPECT_MATCH_IGNORE_CASE(find_this, in_this, ex_start, ex_len)         \
21   {                                                                            \
22     size_t index = 0;                                                          \
23     size_t length = 0;                                                         \
24     EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(find_this, in_this, &index, \
25                                                    &length));                  \
26     EXPECT_EQ(ex_start, index);                                                \
27     EXPECT_EQ(ex_len, length);                                                 \
28     index = 0;                                                                 \
29     length = 0;                                                                \
30     EXPECT_TRUE(                                                               \
31         StringSearch(find_this, in_this, &index, &length, false, true));       \
32     EXPECT_EQ(ex_start, index);                                                \
33     EXPECT_EQ(ex_len, length);                                                 \
34   }
35
36 #define EXPECT_MATCH_SENSITIVE(find_this, in_this, ex_start, ex_len)    \
37   {                                                                     \
38     size_t index = 0;                                                   \
39     size_t length = 0;                                                  \
40     EXPECT_TRUE(                                                        \
41         StringSearch(find_this, in_this, &index, &length, true, true)); \
42     EXPECT_EQ(ex_start, index);                                         \
43     EXPECT_EQ(ex_len, length);                                          \
44   }
45
46 #define EXPECT_MATCH_IGNORE_CASE_BACKWARDS(find_this, in_this, ex_start,  \
47                                            ex_len)                        \
48   {                                                                       \
49     size_t index = 0;                                                     \
50     size_t length = 0;                                                    \
51     EXPECT_TRUE(                                                          \
52         StringSearch(find_this, in_this, &index, &length, false, false)); \
53     EXPECT_EQ(ex_start, index);                                           \
54     EXPECT_EQ(ex_len, length);                                            \
55   }
56
57 #define EXPECT_MATCH_SENSITIVE_BACKWARDS(find_this, in_this, ex_start, ex_len) \
58   {                                                                            \
59     size_t index = 0;                                                          \
60     size_t length = 0;                                                         \
61     EXPECT_TRUE(                                                               \
62         StringSearch(find_this, in_this, &index, &length, true, false));       \
63     EXPECT_EQ(ex_start, index);                                                \
64     EXPECT_EQ(ex_len, length);                                                 \
65   }
66
67 #define EXPECT_MISS_IGNORE_CASE(find_this, in_this)                      \
68   {                                                                      \
69     size_t index = 0;                                                    \
70     size_t length = 0;                                                   \
71     EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(find_this, in_this,  \
72                                                     &index, &length));   \
73     index = 0;                                                           \
74     length = 0;                                                          \
75     EXPECT_FALSE(                                                        \
76         StringSearch(find_this, in_this, &index, &length, false, true)); \
77   }
78
79 #define EXPECT_MISS_SENSITIVE(find_this, in_this)                       \
80   {                                                                     \
81     size_t index = 0;                                                   \
82     size_t length = 0;                                                  \
83     EXPECT_FALSE(                                                       \
84         StringSearch(find_this, in_this, &index, &length, true, true)); \
85   }
86
87 #define EXPECT_MISS_IGNORE_CASE_BACKWARDS(find_this, in_this)             \
88   {                                                                       \
89     size_t index = 0;                                                     \
90     size_t length = 0;                                                    \
91     EXPECT_FALSE(                                                         \
92         StringSearch(find_this, in_this, &index, &length, false, false)); \
93   }
94
95 #define EXPECT_MISS_SENSITIVE_BACKWARDS(find_this, in_this)              \
96   {                                                                      \
97     size_t index = 0;                                                    \
98     size_t length = 0;                                                   \
99     EXPECT_FALSE(                                                        \
100         StringSearch(find_this, in_this, &index, &length, true, false)); \
101   }
102
103 // Note on setting default locale for testing: The current default locale on
104 // the Mac trybot is en_US_POSIX, with which primary-level collation strength
105 // string search is case-sensitive, when normally it should be
106 // case-insensitive. In other locales (including en_US which English speakers
107 // in the U.S. use), this search would be case-insensitive as expected.
108
109 TEST(StringSearchTest, ASCII) {
110   std::string default_locale(uloc_getDefault());
111   bool locale_is_posix = (default_locale == "en_US_POSIX");
112   if (locale_is_posix)
113     SetICUDefaultLocale("en_US");
114
115   EXPECT_MATCH_IGNORE_CASE(u"hello", u"hello world", 0U, 5U);
116
117   EXPECT_MISS_IGNORE_CASE(u"h    e l l o", u"h   e l l o");
118
119   EXPECT_MATCH_IGNORE_CASE(u"aabaaa", u"aaabaabaaa", 4U, 6U);
120
121   EXPECT_MISS_IGNORE_CASE(u"searching within empty string", std::u16string());
122
123   EXPECT_MATCH_IGNORE_CASE(std::u16string(), u"searching for empty string", 0U,
124                            0U);
125
126   EXPECT_MATCH_IGNORE_CASE(u"case insensitivity", u"CaSe InSeNsItIvItY", 0U,
127                            18U);
128
129   EXPECT_MATCH_SENSITIVE(u"aabaaa", u"aaabaabaaa", 4U, 6U);
130
131   EXPECT_MISS_SENSITIVE(u"searching within empty string", std::u16string());
132
133   EXPECT_MATCH_SENSITIVE(std::u16string(), u"searching for empty string", 0U,
134                          0U);
135
136   EXPECT_MISS_SENSITIVE(u"case insensitivity", u"CaSe InSeNsItIvItY");
137
138   if (locale_is_posix)
139     SetICUDefaultLocale(default_locale.data());
140 }
141
142 TEST(StringSearchTest, UnicodeLocaleIndependent) {
143   // Base characters
144   const std::u16string e_base = u"e";
145   const std::u16string E_base = u"E";
146   const std::u16string a_base = u"a";
147
148   // Composed characters
149   const std::u16string e_with_acute_accent = u"\u00e9";
150   const std::u16string E_with_acute_accent = u"\u00c9";
151   const std::u16string e_with_grave_accent = u"\u00e8";
152   const std::u16string E_with_grave_accent = u"\u00c8";
153   const std::u16string a_with_acute_accent = u"\u00e1";
154
155   // Decomposed characters
156   const std::u16string e_with_acute_combining_mark = u"e\u0301";
157   const std::u16string E_with_acute_combining_mark = u"E\u0301";
158   const std::u16string e_with_grave_combining_mark = u"e\u0300";
159   const std::u16string E_with_grave_combining_mark = u"E\u0300";
160   const std::u16string a_with_acute_combining_mark = u"a\u0301";
161
162   std::string default_locale(uloc_getDefault());
163   bool locale_is_posix = (default_locale == "en_US_POSIX");
164   if (locale_is_posix)
165     SetICUDefaultLocale("en_US");
166
167   EXPECT_MATCH_IGNORE_CASE(e_base, e_with_acute_accent, 0U,
168                            e_with_acute_accent.size());
169
170   EXPECT_MATCH_IGNORE_CASE(e_with_acute_accent, e_base, 0U, e_base.size());
171
172   EXPECT_MATCH_IGNORE_CASE(e_base, e_with_acute_combining_mark, 0U,
173                            e_with_acute_combining_mark.size());
174
175   EXPECT_MATCH_IGNORE_CASE(e_with_acute_combining_mark, e_base, 0U,
176                            e_base.size());
177
178   EXPECT_MATCH_IGNORE_CASE(e_with_acute_combining_mark, e_with_acute_accent, 0U,
179                            e_with_acute_accent.size());
180
181   EXPECT_MATCH_IGNORE_CASE(e_with_acute_accent, e_with_acute_combining_mark, 0U,
182                            e_with_acute_combining_mark.size());
183
184   EXPECT_MATCH_IGNORE_CASE(e_with_acute_combining_mark,
185                            e_with_grave_combining_mark, 0U,
186                            e_with_grave_combining_mark.size());
187
188   EXPECT_MATCH_IGNORE_CASE(e_with_grave_combining_mark,
189                            e_with_acute_combining_mark, 0U,
190                            e_with_acute_combining_mark.size());
191
192   EXPECT_MATCH_IGNORE_CASE(e_with_acute_combining_mark, e_with_grave_accent, 0U,
193                            e_with_grave_accent.size());
194
195   EXPECT_MATCH_IGNORE_CASE(e_with_grave_accent, e_with_acute_combining_mark, 0U,
196                            e_with_acute_combining_mark.size());
197
198   EXPECT_MATCH_IGNORE_CASE(E_with_acute_accent, e_with_acute_accent, 0U,
199                            e_with_acute_accent.size());
200
201   EXPECT_MATCH_IGNORE_CASE(E_with_grave_accent, e_with_acute_accent, 0U,
202                            e_with_acute_accent.size());
203
204   EXPECT_MATCH_IGNORE_CASE(E_with_acute_combining_mark, e_with_grave_accent, 0U,
205                            e_with_grave_accent.size());
206
207   EXPECT_MATCH_IGNORE_CASE(E_with_grave_combining_mark, e_with_acute_accent, 0U,
208                            e_with_acute_accent.size());
209
210   EXPECT_MATCH_IGNORE_CASE(E_base, e_with_grave_accent, 0U,
211                            e_with_grave_accent.size());
212
213   EXPECT_MISS_IGNORE_CASE(a_with_acute_accent, e_with_acute_accent);
214
215   EXPECT_MISS_IGNORE_CASE(a_with_acute_combining_mark,
216                           e_with_acute_combining_mark);
217
218   EXPECT_MISS_SENSITIVE(e_base, e_with_acute_accent);
219
220   EXPECT_MISS_SENSITIVE(e_with_acute_accent, e_base);
221
222   EXPECT_MISS_SENSITIVE(e_base, e_with_acute_combining_mark);
223
224   EXPECT_MISS_SENSITIVE(e_with_acute_combining_mark, e_base);
225
226   EXPECT_MATCH_SENSITIVE(e_with_acute_combining_mark, e_with_acute_accent, 0U,
227                          1U);
228
229   EXPECT_MATCH_SENSITIVE(e_with_acute_accent, e_with_acute_combining_mark, 0U,
230                          2U);
231
232   EXPECT_MISS_SENSITIVE(e_with_acute_combining_mark,
233                         e_with_grave_combining_mark);
234
235   EXPECT_MISS_SENSITIVE(e_with_grave_combining_mark,
236                         e_with_acute_combining_mark);
237
238   EXPECT_MISS_SENSITIVE(e_with_acute_combining_mark, e_with_grave_accent);
239
240   EXPECT_MISS_SENSITIVE(e_with_grave_accent, e_with_acute_combining_mark);
241
242   EXPECT_MISS_SENSITIVE(E_with_acute_accent, e_with_acute_accent);
243
244   EXPECT_MISS_SENSITIVE(E_with_grave_accent, e_with_acute_accent);
245
246   EXPECT_MISS_SENSITIVE(E_with_acute_combining_mark, e_with_grave_accent);
247
248   EXPECT_MISS_SENSITIVE(E_with_grave_combining_mark, e_with_acute_accent);
249
250   EXPECT_MISS_SENSITIVE(E_base, e_with_grave_accent);
251
252   EXPECT_MISS_SENSITIVE(a_with_acute_accent, e_with_acute_accent);
253
254   EXPECT_MISS_SENSITIVE(a_with_acute_combining_mark,
255                         e_with_acute_combining_mark);
256
257   EXPECT_MATCH_SENSITIVE(a_with_acute_combining_mark,
258                          a_with_acute_combining_mark, 0U, 2U);
259
260   if (locale_is_posix)
261     SetICUDefaultLocale(default_locale.data());
262 }
263
264 TEST(StringSearchTest, UnicodeLocaleDependent) {
265   // Base characters
266   const std::u16string a_base = u"a";
267
268   // Composed characters
269   const std::u16string a_with_ring = u"\u00e5";
270
271   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
272                                                  nullptr));
273   EXPECT_TRUE(StringSearch(a_base, a_with_ring, nullptr, nullptr, false, true));
274
275   const char* default_locale = uloc_getDefault();
276   SetICUDefaultLocale("da");
277
278   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
279                                                   nullptr));
280   EXPECT_FALSE(
281       StringSearch(a_base, a_with_ring, nullptr, nullptr, false, true));
282
283   SetICUDefaultLocale(default_locale);
284 }
285
286 TEST(StringSearchTest, SearchBackwards) {
287   std::string default_locale(uloc_getDefault());
288   bool locale_is_posix = (default_locale == "en_US_POSIX");
289   if (locale_is_posix)
290     SetICUDefaultLocale("en_US");
291
292   EXPECT_MATCH_IGNORE_CASE_BACKWARDS(u"ab", u"ABAB", 2U, 2U);
293   EXPECT_MATCH_SENSITIVE_BACKWARDS(u"ab", u"abab", 2U, 2U);
294   EXPECT_MISS_SENSITIVE_BACKWARDS(u"ab", u"ABAB");
295
296   if (locale_is_posix)
297     SetICUDefaultLocale(default_locale.data());
298 }
299
300 TEST(StringSearchTest, FixedPatternMultipleSearch) {
301   std::string default_locale(uloc_getDefault());
302   bool locale_is_posix = (default_locale == "en_US_POSIX");
303   if (locale_is_posix)
304     SetICUDefaultLocale("en_US");
305
306   size_t index = 0;
307   size_t length = 0;
308
309   // Search "foo" over multiple texts.
310   FixedPatternStringSearch query1(u"foo", true);
311   EXPECT_TRUE(query1.Search(u"12foo34", &index, &length, true));
312   EXPECT_EQ(2U, index);
313   EXPECT_EQ(3U, length);
314   EXPECT_FALSE(query1.Search(u"bye", &index, &length, true));
315   EXPECT_FALSE(query1.Search(u"FOO", &index, &length, true));
316   EXPECT_TRUE(query1.Search(u"foobarfoo", &index, &length, true));
317   EXPECT_EQ(0U, index);
318   EXPECT_EQ(3U, length);
319   EXPECT_TRUE(query1.Search(u"foobarfoo", &index, &length, false));
320   EXPECT_EQ(6U, index);
321   EXPECT_EQ(3U, length);
322
323   // Search "hello" over multiple texts.
324   FixedPatternStringSearchIgnoringCaseAndAccents query2(u"hello");
325   EXPECT_TRUE(query2.Search(u"12hello34", &index, &length));
326   EXPECT_EQ(2U, index);
327   EXPECT_EQ(5U, length);
328   EXPECT_FALSE(query2.Search(u"bye", &index, &length));
329   EXPECT_TRUE(query2.Search(u"hELLo", &index, &length));
330   EXPECT_EQ(0U, index);
331   EXPECT_EQ(5U, length);
332
333   if (locale_is_posix)
334     SetICUDefaultLocale(default_locale.data());
335 }
336
337 TEST(StringSearchTest, RepeatingStringSearch) {
338   struct MatchResult {
339     int match_index;
340     int match_length;
341   };
342
343   std::string default_locale(uloc_getDefault());
344   bool locale_is_posix = (default_locale == "en_US_POSIX");
345   if (locale_is_posix)
346     SetICUDefaultLocale("en_US");
347
348   const char16_t kPattern[] = u"fox";
349   const char16_t kTarget[] = u"The quick brown fox jumped over the lazy Fox";
350
351   // Case sensitive.
352   {
353     const MatchResult kExpectation[] = {{16, 3}};
354
355     RepeatingStringSearch searcher(kPattern, kTarget, /*case_sensitive=*/true);
356     std::vector<MatchResult> results;
357     int match_index;
358     int match_length;
359     while (searcher.NextMatchResult(match_index, match_length)) {
360       results.push_back(
361           {.match_index = match_index, .match_length = match_length});
362     }
363
364     ASSERT_EQ(std::size(kExpectation), results.size());
365     for (size_t i = 0; i < results.size(); ++i) {
366       EXPECT_EQ(results[i].match_index, kExpectation[i].match_index);
367       EXPECT_EQ(results[i].match_length, kExpectation[i].match_length);
368     }
369   }
370
371   // Case insensitive.
372   {
373     const MatchResult kExpectation[] = {{16, 3}, {41, 3}};
374
375     RepeatingStringSearch searcher(kPattern, kTarget, /*case_sensitive=*/false);
376     std::vector<MatchResult> results;
377     int match_index;
378     int match_length;
379     while (searcher.NextMatchResult(match_index, match_length)) {
380       results.push_back(
381           {.match_index = match_index, .match_length = match_length});
382     }
383
384     ASSERT_EQ(std::size(kExpectation), results.size());
385     for (size_t i = 0; i < results.size(); ++i) {
386       EXPECT_EQ(results[i].match_index, kExpectation[i].match_index);
387       EXPECT_EQ(results[i].match_length, kExpectation[i].match_length);
388     }
389   }
390
391   if (locale_is_posix)
392     SetICUDefaultLocale(default_locale.data());
393 }
394
395 }  // namespace i18n
396 }  // namespace base