1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
10 #include "base/cxx17_backports.h"
11 #include "base/i18n/rtl.h"
12 #include "base/i18n/string_search.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "testing/gtest/include/gtest/gtest.h"
15 #include "third_party/icu/source/i18n/unicode/usearch.h"
20 #define EXPECT_MATCH_IGNORE_CASE(find_this, in_this, ex_start, ex_len) \
24 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(find_this, in_this, &index, \
26 EXPECT_EQ(ex_start, index); \
27 EXPECT_EQ(ex_len, length); \
31 StringSearch(find_this, in_this, &index, &length, false, true)); \
32 EXPECT_EQ(ex_start, index); \
33 EXPECT_EQ(ex_len, length); \
36 #define EXPECT_MATCH_SENSITIVE(find_this, in_this, ex_start, ex_len) \
41 StringSearch(find_this, in_this, &index, &length, true, true)); \
42 EXPECT_EQ(ex_start, index); \
43 EXPECT_EQ(ex_len, length); \
46 #define EXPECT_MATCH_IGNORE_CASE_BACKWARDS(find_this, in_this, ex_start, \
52 StringSearch(find_this, in_this, &index, &length, false, false)); \
53 EXPECT_EQ(ex_start, index); \
54 EXPECT_EQ(ex_len, length); \
57 #define EXPECT_MATCH_SENSITIVE_BACKWARDS(find_this, in_this, ex_start, ex_len) \
62 StringSearch(find_this, in_this, &index, &length, true, false)); \
63 EXPECT_EQ(ex_start, index); \
64 EXPECT_EQ(ex_len, length); \
67 #define EXPECT_MISS_IGNORE_CASE(find_this, in_this) \
71 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(find_this, in_this, \
76 StringSearch(find_this, in_this, &index, &length, false, true)); \
79 #define EXPECT_MISS_SENSITIVE(find_this, in_this) \
84 StringSearch(find_this, in_this, &index, &length, true, true)); \
87 #define EXPECT_MISS_IGNORE_CASE_BACKWARDS(find_this, in_this) \
92 StringSearch(find_this, in_this, &index, &length, false, false)); \
95 #define EXPECT_MISS_SENSITIVE_BACKWARDS(find_this, in_this) \
100 StringSearch(find_this, in_this, &index, &length, true, false)); \
103 // Note on setting default locale for testing: The current default locale on
104 // the Mac trybot is en_US_POSIX, with which primary-level collation strength
105 // string search is case-sensitive, when normally it should be
106 // case-insensitive. In other locales (including en_US which English speakers
107 // in the U.S. use), this search would be case-insensitive as expected.
109 TEST(StringSearchTest, ASCII) {
110 std::string default_locale(uloc_getDefault());
111 bool locale_is_posix = (default_locale == "en_US_POSIX");
113 SetICUDefaultLocale("en_US");
115 EXPECT_MATCH_IGNORE_CASE(u"hello", u"hello world", 0U, 5U);
117 EXPECT_MISS_IGNORE_CASE(u"h e l l o", u"h e l l o");
119 EXPECT_MATCH_IGNORE_CASE(u"aabaaa", u"aaabaabaaa", 4U, 6U);
121 EXPECT_MISS_IGNORE_CASE(u"searching within empty string", std::u16string());
123 EXPECT_MATCH_IGNORE_CASE(std::u16string(), u"searching for empty string", 0U,
126 EXPECT_MATCH_IGNORE_CASE(u"case insensitivity", u"CaSe InSeNsItIvItY", 0U,
129 EXPECT_MATCH_SENSITIVE(u"aabaaa", u"aaabaabaaa", 4U, 6U);
131 EXPECT_MISS_SENSITIVE(u"searching within empty string", std::u16string());
133 EXPECT_MATCH_SENSITIVE(std::u16string(), u"searching for empty string", 0U,
136 EXPECT_MISS_SENSITIVE(u"case insensitivity", u"CaSe InSeNsItIvItY");
139 SetICUDefaultLocale(default_locale.data());
142 TEST(StringSearchTest, UnicodeLocaleIndependent) {
144 const std::u16string e_base = u"e";
145 const std::u16string E_base = u"E";
146 const std::u16string a_base = u"a";
148 // Composed characters
149 const std::u16string e_with_acute_accent = u"\u00e9";
150 const std::u16string E_with_acute_accent = u"\u00c9";
151 const std::u16string e_with_grave_accent = u"\u00e8";
152 const std::u16string E_with_grave_accent = u"\u00c8";
153 const std::u16string a_with_acute_accent = u"\u00e1";
155 // Decomposed characters
156 const std::u16string e_with_acute_combining_mark = u"e\u0301";
157 const std::u16string E_with_acute_combining_mark = u"E\u0301";
158 const std::u16string e_with_grave_combining_mark = u"e\u0300";
159 const std::u16string E_with_grave_combining_mark = u"E\u0300";
160 const std::u16string a_with_acute_combining_mark = u"a\u0301";
162 std::string default_locale(uloc_getDefault());
163 bool locale_is_posix = (default_locale == "en_US_POSIX");
165 SetICUDefaultLocale("en_US");
167 EXPECT_MATCH_IGNORE_CASE(e_base, e_with_acute_accent, 0U,
168 e_with_acute_accent.size());
170 EXPECT_MATCH_IGNORE_CASE(e_with_acute_accent, e_base, 0U, e_base.size());
172 EXPECT_MATCH_IGNORE_CASE(e_base, e_with_acute_combining_mark, 0U,
173 e_with_acute_combining_mark.size());
175 EXPECT_MATCH_IGNORE_CASE(e_with_acute_combining_mark, e_base, 0U,
178 EXPECT_MATCH_IGNORE_CASE(e_with_acute_combining_mark, e_with_acute_accent, 0U,
179 e_with_acute_accent.size());
181 EXPECT_MATCH_IGNORE_CASE(e_with_acute_accent, e_with_acute_combining_mark, 0U,
182 e_with_acute_combining_mark.size());
184 EXPECT_MATCH_IGNORE_CASE(e_with_acute_combining_mark,
185 e_with_grave_combining_mark, 0U,
186 e_with_grave_combining_mark.size());
188 EXPECT_MATCH_IGNORE_CASE(e_with_grave_combining_mark,
189 e_with_acute_combining_mark, 0U,
190 e_with_acute_combining_mark.size());
192 EXPECT_MATCH_IGNORE_CASE(e_with_acute_combining_mark, e_with_grave_accent, 0U,
193 e_with_grave_accent.size());
195 EXPECT_MATCH_IGNORE_CASE(e_with_grave_accent, e_with_acute_combining_mark, 0U,
196 e_with_acute_combining_mark.size());
198 EXPECT_MATCH_IGNORE_CASE(E_with_acute_accent, e_with_acute_accent, 0U,
199 e_with_acute_accent.size());
201 EXPECT_MATCH_IGNORE_CASE(E_with_grave_accent, e_with_acute_accent, 0U,
202 e_with_acute_accent.size());
204 EXPECT_MATCH_IGNORE_CASE(E_with_acute_combining_mark, e_with_grave_accent, 0U,
205 e_with_grave_accent.size());
207 EXPECT_MATCH_IGNORE_CASE(E_with_grave_combining_mark, e_with_acute_accent, 0U,
208 e_with_acute_accent.size());
210 EXPECT_MATCH_IGNORE_CASE(E_base, e_with_grave_accent, 0U,
211 e_with_grave_accent.size());
213 EXPECT_MISS_IGNORE_CASE(a_with_acute_accent, e_with_acute_accent);
215 EXPECT_MISS_IGNORE_CASE(a_with_acute_combining_mark,
216 e_with_acute_combining_mark);
218 EXPECT_MISS_SENSITIVE(e_base, e_with_acute_accent);
220 EXPECT_MISS_SENSITIVE(e_with_acute_accent, e_base);
222 EXPECT_MISS_SENSITIVE(e_base, e_with_acute_combining_mark);
224 EXPECT_MISS_SENSITIVE(e_with_acute_combining_mark, e_base);
226 EXPECT_MATCH_SENSITIVE(e_with_acute_combining_mark, e_with_acute_accent, 0U,
229 EXPECT_MATCH_SENSITIVE(e_with_acute_accent, e_with_acute_combining_mark, 0U,
232 EXPECT_MISS_SENSITIVE(e_with_acute_combining_mark,
233 e_with_grave_combining_mark);
235 EXPECT_MISS_SENSITIVE(e_with_grave_combining_mark,
236 e_with_acute_combining_mark);
238 EXPECT_MISS_SENSITIVE(e_with_acute_combining_mark, e_with_grave_accent);
240 EXPECT_MISS_SENSITIVE(e_with_grave_accent, e_with_acute_combining_mark);
242 EXPECT_MISS_SENSITIVE(E_with_acute_accent, e_with_acute_accent);
244 EXPECT_MISS_SENSITIVE(E_with_grave_accent, e_with_acute_accent);
246 EXPECT_MISS_SENSITIVE(E_with_acute_combining_mark, e_with_grave_accent);
248 EXPECT_MISS_SENSITIVE(E_with_grave_combining_mark, e_with_acute_accent);
250 EXPECT_MISS_SENSITIVE(E_base, e_with_grave_accent);
252 EXPECT_MISS_SENSITIVE(a_with_acute_accent, e_with_acute_accent);
254 EXPECT_MISS_SENSITIVE(a_with_acute_combining_mark,
255 e_with_acute_combining_mark);
257 EXPECT_MATCH_SENSITIVE(a_with_acute_combining_mark,
258 a_with_acute_combining_mark, 0U, 2U);
261 SetICUDefaultLocale(default_locale.data());
264 TEST(StringSearchTest, UnicodeLocaleDependent) {
266 const std::u16string a_base = u"a";
268 // Composed characters
269 const std::u16string a_with_ring = u"\u00e5";
271 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
273 EXPECT_TRUE(StringSearch(a_base, a_with_ring, nullptr, nullptr, false, true));
275 const char* default_locale = uloc_getDefault();
276 SetICUDefaultLocale("da");
278 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
281 StringSearch(a_base, a_with_ring, nullptr, nullptr, false, true));
283 SetICUDefaultLocale(default_locale);
286 TEST(StringSearchTest, SearchBackwards) {
287 std::string default_locale(uloc_getDefault());
288 bool locale_is_posix = (default_locale == "en_US_POSIX");
290 SetICUDefaultLocale("en_US");
292 EXPECT_MATCH_IGNORE_CASE_BACKWARDS(u"ab", u"ABAB", 2U, 2U);
293 EXPECT_MATCH_SENSITIVE_BACKWARDS(u"ab", u"abab", 2U, 2U);
294 EXPECT_MISS_SENSITIVE_BACKWARDS(u"ab", u"ABAB");
297 SetICUDefaultLocale(default_locale.data());
300 TEST(StringSearchTest, FixedPatternMultipleSearch) {
301 std::string default_locale(uloc_getDefault());
302 bool locale_is_posix = (default_locale == "en_US_POSIX");
304 SetICUDefaultLocale("en_US");
309 // Search "foo" over multiple texts.
310 FixedPatternStringSearch query1(u"foo", true);
311 EXPECT_TRUE(query1.Search(u"12foo34", &index, &length, true));
312 EXPECT_EQ(2U, index);
313 EXPECT_EQ(3U, length);
314 EXPECT_FALSE(query1.Search(u"bye", &index, &length, true));
315 EXPECT_FALSE(query1.Search(u"FOO", &index, &length, true));
316 EXPECT_TRUE(query1.Search(u"foobarfoo", &index, &length, true));
317 EXPECT_EQ(0U, index);
318 EXPECT_EQ(3U, length);
319 EXPECT_TRUE(query1.Search(u"foobarfoo", &index, &length, false));
320 EXPECT_EQ(6U, index);
321 EXPECT_EQ(3U, length);
323 // Search "hello" over multiple texts.
324 FixedPatternStringSearchIgnoringCaseAndAccents query2(u"hello");
325 EXPECT_TRUE(query2.Search(u"12hello34", &index, &length));
326 EXPECT_EQ(2U, index);
327 EXPECT_EQ(5U, length);
328 EXPECT_FALSE(query2.Search(u"bye", &index, &length));
329 EXPECT_TRUE(query2.Search(u"hELLo", &index, &length));
330 EXPECT_EQ(0U, index);
331 EXPECT_EQ(5U, length);
334 SetICUDefaultLocale(default_locale.data());
337 TEST(StringSearchTest, RepeatingStringSearch) {
343 std::string default_locale(uloc_getDefault());
344 bool locale_is_posix = (default_locale == "en_US_POSIX");
346 SetICUDefaultLocale("en_US");
348 const char16_t kPattern[] = u"fox";
349 const char16_t kTarget[] = u"The quick brown fox jumped over the lazy Fox";
353 const MatchResult kExpectation[] = {{16, 3}};
355 RepeatingStringSearch searcher(kPattern, kTarget, /*case_sensitive=*/true);
356 std::vector<MatchResult> results;
359 while (searcher.NextMatchResult(match_index, match_length)) {
361 {.match_index = match_index, .match_length = match_length});
364 ASSERT_EQ(base::size(kExpectation), results.size());
365 for (size_t i = 0; i < results.size(); ++i) {
366 EXPECT_EQ(results[i].match_index, kExpectation[i].match_index);
367 EXPECT_EQ(results[i].match_length, kExpectation[i].match_length);
373 const MatchResult kExpectation[] = {{16, 3}, {41, 3}};
375 RepeatingStringSearch searcher(kPattern, kTarget, /*case_sensitive=*/false);
376 std::vector<MatchResult> results;
379 while (searcher.NextMatchResult(match_index, match_length)) {
381 {.match_index = match_index, .match_length = match_length});
384 ASSERT_EQ(base::size(kExpectation), results.size());
385 for (size_t i = 0; i < results.size(); ++i) {
386 EXPECT_EQ(results[i].match_index, kExpectation[i].match_index);
387 EXPECT_EQ(results[i].match_length, kExpectation[i].match_length);
392 SetICUDefaultLocale(default_locale.data());