Upload upstream chromium 71.0.3578.0
[platform/framework/web/chromium-efl.git] / base / i18n / string_search_unittest.cc
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <stddef.h>
6
7 #include <string>
8
9 #include "base/i18n/rtl.h"
10 #include "base/i18n/string_search.h"
11 #include "base/strings/string16.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 #include "third_party/icu/source/i18n/unicode/usearch.h"
15
16 namespace base {
17 namespace i18n {
18
19 // Note on setting default locale for testing: The current default locale on
20 // the Mac trybot is en_US_POSIX, with which primary-level collation strength
21 // string search is case-sensitive, when normally it should be
22 // case-insensitive. In other locales (including en_US which English speakers
23 // in the U.S. use), this search would be case-insensitive as expected.
24
25 TEST(StringSearchTest, ASCII) {
26   std::string default_locale(uloc_getDefault());
27   bool locale_is_posix = (default_locale == "en_US_POSIX");
28   if (locale_is_posix)
29     SetICUDefaultLocale("en_US");
30
31   size_t index = 0;
32   size_t length = 0;
33
34   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
35       ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length));
36   EXPECT_EQ(0U, index);
37   EXPECT_EQ(5U, length);
38
39   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
40       ASCIIToUTF16("h    e l l o"), ASCIIToUTF16("h   e l l o"),
41       &index, &length));
42
43   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
44       ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length));
45   EXPECT_EQ(4U, index);
46   EXPECT_EQ(6U, length);
47
48   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
49       ASCIIToUTF16("searching within empty string"), string16(),
50       &index, &length));
51
52   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
53       string16(), ASCIIToUTF16("searching for empty string"), &index, &length));
54   EXPECT_EQ(0U, index);
55   EXPECT_EQ(0U, length);
56
57   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
58       ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),
59       &index, &length));
60   EXPECT_EQ(0U, index);
61   EXPECT_EQ(18U, length);
62
63   if (locale_is_posix)
64     SetICUDefaultLocale(default_locale.data());
65 }
66
67 TEST(StringSearchTest, UnicodeLocaleIndependent) {
68   // Base characters
69   const string16 e_base = WideToUTF16(L"e");
70   const string16 E_base = WideToUTF16(L"E");
71   const string16 a_base = WideToUTF16(L"a");
72
73   // Composed characters
74   const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");
75   const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");
76   const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
77   const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
78   const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");
79
80   // Decomposed characters
81   const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");
82   const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");
83   const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
84   const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
85   const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");
86
87   std::string default_locale(uloc_getDefault());
88   bool locale_is_posix = (default_locale == "en_US_POSIX");
89   if (locale_is_posix)
90     SetICUDefaultLocale("en_US");
91
92   size_t index = 0;
93   size_t length = 0;
94
95   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
96       e_base, e_with_acute_accent, &index, &length));
97   EXPECT_EQ(0U, index);
98   EXPECT_EQ(e_with_acute_accent.size(), length);
99
100   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
101       e_with_acute_accent, e_base, &index, &length));
102   EXPECT_EQ(0U, index);
103   EXPECT_EQ(e_base.size(), length);
104
105   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
106       e_base, e_with_acute_combining_mark, &index, &length));
107   EXPECT_EQ(0U, index);
108   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
109
110   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
111       e_with_acute_combining_mark, e_base, &index, &length));
112   EXPECT_EQ(0U, index);
113   EXPECT_EQ(e_base.size(), length);
114
115   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
116       e_with_acute_combining_mark, e_with_acute_accent,
117       &index, &length));
118   EXPECT_EQ(0U, index);
119   EXPECT_EQ(e_with_acute_accent.size(), length);
120
121   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
122       e_with_acute_accent, e_with_acute_combining_mark,
123       &index, &length));
124   EXPECT_EQ(0U, index);
125   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
126
127   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
128       e_with_acute_combining_mark, e_with_grave_combining_mark,
129       &index, &length));
130   EXPECT_EQ(0U, index);
131   EXPECT_EQ(e_with_grave_combining_mark.size(), length);
132
133   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
134       e_with_grave_combining_mark, e_with_acute_combining_mark,
135       &index, &length));
136   EXPECT_EQ(0U, index);
137   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
138
139   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
140       e_with_acute_combining_mark, e_with_grave_accent, &index, &length));
141   EXPECT_EQ(0U, index);
142   EXPECT_EQ(e_with_grave_accent.size(), length);
143
144   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
145       e_with_grave_accent, e_with_acute_combining_mark, &index, &length));
146   EXPECT_EQ(0U, index);
147   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
148
149   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
150       E_with_acute_accent, e_with_acute_accent, &index, &length));
151   EXPECT_EQ(0U, index);
152   EXPECT_EQ(e_with_acute_accent.size(), length);
153
154   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
155       E_with_grave_accent, e_with_acute_accent, &index, &length));
156   EXPECT_EQ(0U, index);
157   EXPECT_EQ(e_with_acute_accent.size(), length);
158
159   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
160       E_with_acute_combining_mark, e_with_grave_accent, &index, &length));
161   EXPECT_EQ(0U, index);
162   EXPECT_EQ(e_with_grave_accent.size(), length);
163
164   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
165       E_with_grave_combining_mark, e_with_acute_accent, &index, &length));
166   EXPECT_EQ(0U, index);
167   EXPECT_EQ(e_with_acute_accent.size(), length);
168
169   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
170       E_base, e_with_grave_accent, &index, &length));
171   EXPECT_EQ(0U, index);
172   EXPECT_EQ(e_with_grave_accent.size(), length);
173
174   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
175       a_with_acute_accent, e_with_acute_accent, &index, &length));
176
177   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
178       a_with_acute_combining_mark, e_with_acute_combining_mark,
179       &index, &length));
180
181   if (locale_is_posix)
182     SetICUDefaultLocale(default_locale.data());
183 }
184
185 TEST(StringSearchTest, UnicodeLocaleDependent) {
186   // Base characters
187   const string16 a_base = WideToUTF16(L"a");
188
189   // Composed characters
190   const string16 a_with_ring = WideToUTF16(L"\u00e5");
191
192   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
193                                                  nullptr));
194
195   const char* default_locale = uloc_getDefault();
196   SetICUDefaultLocale("da");
197
198   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
199                                                   nullptr));
200
201   SetICUDefaultLocale(default_locale);
202 }
203
204 TEST(StringSearchTest, FixedPatternMultipleSearch) {
205   std::string default_locale(uloc_getDefault());
206   bool locale_is_posix = (default_locale == "en_US_POSIX");
207   if (locale_is_posix)
208     SetICUDefaultLocale("en_US");
209
210   size_t index = 0;
211   size_t length = 0;
212
213   // Search "hello" over multiple texts.
214   FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello"));
215   EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length));
216   EXPECT_EQ(2U, index);
217   EXPECT_EQ(5U, length);
218   EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length));
219   EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length));
220   EXPECT_EQ(0U, index);
221   EXPECT_EQ(5U, length);
222
223   if (locale_is_posix)
224     SetICUDefaultLocale(default_locale.data());
225 }
226
227 }  // namespace i18n
228 }  // namespace base