1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
9 #include "base/i18n/rtl.h"
10 #include "base/i18n/string_search.h"
11 #include "base/strings/string16.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 #include "third_party/icu/source/i18n/unicode/usearch.h"
19 // Note on setting default locale for testing: The current default locale on
20 // the Mac trybot is en_US_POSIX, with which primary-level collation strength
21 // string search is case-sensitive, when normally it should be
22 // case-insensitive. In other locales (including en_US which English speakers
23 // in the U.S. use), this search would be case-insensitive as expected.
25 TEST(StringSearchTest, ASCII) {
26 std::string default_locale(uloc_getDefault());
27 bool locale_is_posix = (default_locale == "en_US_POSIX");
29 SetICUDefaultLocale("en_US");
34 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
35 ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length));
37 EXPECT_EQ(5U, length);
39 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
40 ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o"),
43 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
44 ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length));
46 EXPECT_EQ(6U, length);
48 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
49 ASCIIToUTF16("searching within empty string"), string16(),
52 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
53 string16(), ASCIIToUTF16("searching for empty string"), &index, &length));
55 EXPECT_EQ(0U, length);
57 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
58 ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),
61 EXPECT_EQ(18U, length);
64 SetICUDefaultLocale(default_locale.data());
67 TEST(StringSearchTest, UnicodeLocaleIndependent) {
69 const string16 e_base = WideToUTF16(L"e");
70 const string16 E_base = WideToUTF16(L"E");
71 const string16 a_base = WideToUTF16(L"a");
73 // Composed characters
74 const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");
75 const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");
76 const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
77 const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
78 const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");
80 // Decomposed characters
81 const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");
82 const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");
83 const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
84 const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
85 const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");
87 std::string default_locale(uloc_getDefault());
88 bool locale_is_posix = (default_locale == "en_US_POSIX");
90 SetICUDefaultLocale("en_US");
95 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
96 e_base, e_with_acute_accent, &index, &length));
98 EXPECT_EQ(e_with_acute_accent.size(), length);
100 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
101 e_with_acute_accent, e_base, &index, &length));
102 EXPECT_EQ(0U, index);
103 EXPECT_EQ(e_base.size(), length);
105 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
106 e_base, e_with_acute_combining_mark, &index, &length));
107 EXPECT_EQ(0U, index);
108 EXPECT_EQ(e_with_acute_combining_mark.size(), length);
110 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
111 e_with_acute_combining_mark, e_base, &index, &length));
112 EXPECT_EQ(0U, index);
113 EXPECT_EQ(e_base.size(), length);
115 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
116 e_with_acute_combining_mark, e_with_acute_accent,
118 EXPECT_EQ(0U, index);
119 EXPECT_EQ(e_with_acute_accent.size(), length);
121 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
122 e_with_acute_accent, e_with_acute_combining_mark,
124 EXPECT_EQ(0U, index);
125 EXPECT_EQ(e_with_acute_combining_mark.size(), length);
127 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
128 e_with_acute_combining_mark, e_with_grave_combining_mark,
130 EXPECT_EQ(0U, index);
131 EXPECT_EQ(e_with_grave_combining_mark.size(), length);
133 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
134 e_with_grave_combining_mark, e_with_acute_combining_mark,
136 EXPECT_EQ(0U, index);
137 EXPECT_EQ(e_with_acute_combining_mark.size(), length);
139 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
140 e_with_acute_combining_mark, e_with_grave_accent, &index, &length));
141 EXPECT_EQ(0U, index);
142 EXPECT_EQ(e_with_grave_accent.size(), length);
144 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
145 e_with_grave_accent, e_with_acute_combining_mark, &index, &length));
146 EXPECT_EQ(0U, index);
147 EXPECT_EQ(e_with_acute_combining_mark.size(), length);
149 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
150 E_with_acute_accent, e_with_acute_accent, &index, &length));
151 EXPECT_EQ(0U, index);
152 EXPECT_EQ(e_with_acute_accent.size(), length);
154 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
155 E_with_grave_accent, e_with_acute_accent, &index, &length));
156 EXPECT_EQ(0U, index);
157 EXPECT_EQ(e_with_acute_accent.size(), length);
159 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
160 E_with_acute_combining_mark, e_with_grave_accent, &index, &length));
161 EXPECT_EQ(0U, index);
162 EXPECT_EQ(e_with_grave_accent.size(), length);
164 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
165 E_with_grave_combining_mark, e_with_acute_accent, &index, &length));
166 EXPECT_EQ(0U, index);
167 EXPECT_EQ(e_with_acute_accent.size(), length);
169 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
170 E_base, e_with_grave_accent, &index, &length));
171 EXPECT_EQ(0U, index);
172 EXPECT_EQ(e_with_grave_accent.size(), length);
174 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
175 a_with_acute_accent, e_with_acute_accent, &index, &length));
177 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
178 a_with_acute_combining_mark, e_with_acute_combining_mark,
182 SetICUDefaultLocale(default_locale.data());
185 TEST(StringSearchTest, UnicodeLocaleDependent) {
187 const string16 a_base = WideToUTF16(L"a");
189 // Composed characters
190 const string16 a_with_ring = WideToUTF16(L"\u00e5");
192 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
195 const char* default_locale = uloc_getDefault();
196 SetICUDefaultLocale("da");
198 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
201 SetICUDefaultLocale(default_locale);
204 TEST(StringSearchTest, FixedPatternMultipleSearch) {
205 std::string default_locale(uloc_getDefault());
206 bool locale_is_posix = (default_locale == "en_US_POSIX");
208 SetICUDefaultLocale("en_US");
213 // Search "hello" over multiple texts.
214 FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello"));
215 EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length));
216 EXPECT_EQ(2U, index);
217 EXPECT_EQ(5U, length);
218 EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length));
219 EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length));
220 EXPECT_EQ(0U, index);
221 EXPECT_EQ(5U, length);
224 SetICUDefaultLocale(default_locale.data());