1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/i18n/rtl.h"
9 #include "base/files/file_path.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/sys_string_conversions.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 #include "testing/platform_test.h"
15 #include "third_party/icu/source/i18n/unicode/usearch.h"
22 // A test utility function to set the application default text direction.
23 void SetRTL(bool rtl) {
24 // Override the current locale/direction.
25 SetICUDefaultLocale(rtl ? "he" : "en");
26 EXPECT_EQ(rtl, IsRTL());
31 class RTLTest : public PlatformTest {
34 TEST_F(RTLTest, GetFirstStrongCharacterDirection) {
37 TextDirection direction;
39 // Test pure LTR string.
40 { L"foo bar", LEFT_TO_RIGHT },
41 // Test pure RTL string.
42 { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
43 // Test bidi string in which the first character with strong directionality
44 // is a character with type L.
45 { L"foo \x05d0 bar", LEFT_TO_RIGHT },
46 // Test bidi string in which the first character with strong directionality
47 // is a character with type R.
48 { L"\x05d0 foo bar", RIGHT_TO_LEFT },
49 // Test bidi string which starts with a character with weak directionality
50 // and in which the first character with strong directionality is a
51 // character with type L.
52 { L"!foo \x05d0 bar", LEFT_TO_RIGHT },
53 // Test bidi string which starts with a character with weak directionality
54 // and in which the first character with strong directionality is a
55 // character with type R.
56 { L",\x05d0 foo bar", RIGHT_TO_LEFT },
57 // Test bidi string in which the first character with strong directionality
58 // is a character with type LRE.
59 { L"\x202a \x05d0 foo bar", LEFT_TO_RIGHT },
60 // Test bidi string in which the first character with strong directionality
61 // is a character with type LRO.
62 { L"\x202d \x05d0 foo bar", LEFT_TO_RIGHT },
63 // Test bidi string in which the first character with strong directionality
64 // is a character with type RLE.
65 { L"\x202b foo \x05d0 bar", RIGHT_TO_LEFT },
66 // Test bidi string in which the first character with strong directionality
67 // is a character with type RLO.
68 { L"\x202e foo \x05d0 bar", RIGHT_TO_LEFT },
69 // Test bidi string in which the first character with strong directionality
70 // is a character with type AL.
71 { L"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT },
72 // Test a string without strong directionality characters.
73 { L",!.{}", LEFT_TO_RIGHT },
75 { L"", LEFT_TO_RIGHT },
76 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
77 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
80 #if defined(WCHAR_T_IS_UTF32)
81 L" ! \x10910" L"abc 123",
82 #elif defined(WCHAR_T_IS_UTF16)
83 L" ! \xd802\xdd10" L"abc 123",
85 #error wchar_t should be either UTF-16 or UTF-32
89 #if defined(WCHAR_T_IS_UTF32)
90 L" ! \x10401" L"abc 123",
91 #elif defined(WCHAR_T_IS_UTF16)
92 L" ! \xd801\xdc01" L"abc 123",
94 #error wchar_t should be either UTF-16 or UTF-32
99 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
100 EXPECT_EQ(cases[i].direction,
101 GetFirstStrongCharacterDirection(WideToUTF16(cases[i].text)));
105 // Note that the cases with LRE, LRO, RLE and RLO are invalid for
106 // GetLastStrongCharacterDirection because they should be followed by PDF
108 TEST_F(RTLTest, GetLastStrongCharacterDirection) {
111 TextDirection direction;
113 // Test pure LTR string.
114 { L"foo bar", LEFT_TO_RIGHT },
115 // Test pure RTL string.
116 { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
117 // Test bidi string in which the last character with strong directionality
118 // is a character with type L.
119 { L"foo \x05d0 bar", LEFT_TO_RIGHT },
120 // Test bidi string in which the last character with strong directionality
121 // is a character with type R.
122 { L"\x05d0 foo bar \x05d3", RIGHT_TO_LEFT },
123 // Test bidi string which ends with a character with weak directionality
124 // and in which the last character with strong directionality is a
125 // character with type L.
126 { L"!foo \x05d0 bar!", LEFT_TO_RIGHT },
127 // Test bidi string which ends with a character with weak directionality
128 // and in which the last character with strong directionality is a
129 // character with type R.
130 { L",\x05d0 foo bar \x05d1,", RIGHT_TO_LEFT },
131 // Test bidi string in which the last character with strong directionality
132 // is a character with type AL.
133 { L"\x0622 foo \x05d0 bar \x0622", RIGHT_TO_LEFT },
134 // Test a string without strong directionality characters.
135 { L",!.{}", LEFT_TO_RIGHT },
136 // Test empty string.
137 { L"", LEFT_TO_RIGHT },
138 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
139 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
142 #if defined(WCHAR_T_IS_UTF32)
143 L"abc 123" L" ! \x10910 !",
144 #elif defined(WCHAR_T_IS_UTF16)
145 L"abc 123" L" ! \xd802\xdd10 !",
147 #error wchar_t should be either UTF-16 or UTF-32
151 #if defined(WCHAR_T_IS_UTF32)
152 L"abc 123" L" ! \x10401 !",
153 #elif defined(WCHAR_T_IS_UTF16)
154 L"abc 123" L" ! \xd801\xdc01 !",
156 #error wchar_t should be either UTF-16 or UTF-32
161 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
162 EXPECT_EQ(cases[i].direction,
163 GetLastStrongCharacterDirection(WideToUTF16(cases[i].text)));
166 TEST_F(RTLTest, GetStringDirection) {
169 TextDirection direction;
171 // Test pure LTR string.
172 { L"foobar", LEFT_TO_RIGHT },
173 { L".foobar", LEFT_TO_RIGHT },
174 { L"foo, bar", LEFT_TO_RIGHT },
175 // Test pure LTR with strong directionality characters of type LRE.
176 { L"\x202a\x202a", LEFT_TO_RIGHT },
177 { L".\x202a\x202a", LEFT_TO_RIGHT },
178 { L"\x202a, \x202a", LEFT_TO_RIGHT },
179 // Test pure LTR with strong directionality characters of type LRO.
180 { L"\x202d\x202d", LEFT_TO_RIGHT },
181 { L".\x202d\x202d", LEFT_TO_RIGHT },
182 { L"\x202d, \x202d", LEFT_TO_RIGHT },
183 // Test pure LTR with various types of strong directionality characters.
184 { L"foo \x202a\x202d", LEFT_TO_RIGHT },
185 { L".\x202d foo \x202a", LEFT_TO_RIGHT },
186 { L"\x202a, \x202d foo", LEFT_TO_RIGHT },
187 // Test pure RTL with strong directionality characters of type R.
188 { L"\x05d0\x05d0", RIGHT_TO_LEFT },
189 { L".\x05d0\x05d0", RIGHT_TO_LEFT },
190 { L"\x05d0, \x05d0", RIGHT_TO_LEFT },
191 // Test pure RTL with strong directionality characters of type RLE.
192 { L"\x202b\x202b", RIGHT_TO_LEFT },
193 { L".\x202b\x202b", RIGHT_TO_LEFT },
194 { L"\x202b, \x202b", RIGHT_TO_LEFT },
195 // Test pure RTL with strong directionality characters of type RLO.
196 { L"\x202e\x202e", RIGHT_TO_LEFT },
197 { L".\x202e\x202e", RIGHT_TO_LEFT },
198 { L"\x202e, \x202e", RIGHT_TO_LEFT },
199 // Test pure RTL with strong directionality characters of type AL.
200 { L"\x0622\x0622", RIGHT_TO_LEFT },
201 { L".\x0622\x0622", RIGHT_TO_LEFT },
202 { L"\x0622, \x0622", RIGHT_TO_LEFT },
203 // Test pure RTL with various types of strong directionality characters.
204 { L"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT },
205 { L".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT },
206 { L"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT },
207 // Test bidi strings.
208 { L"foo \x05d0 bar", UNKNOWN_DIRECTION },
209 { L"\x202b foo bar", UNKNOWN_DIRECTION },
210 { L"!foo \x0622 bar", UNKNOWN_DIRECTION },
211 { L"\x202a\x202b", UNKNOWN_DIRECTION },
212 { L"\x202e\x202d", UNKNOWN_DIRECTION },
213 { L"\x0622\x202a", UNKNOWN_DIRECTION },
214 { L"\x202d\x05d0", UNKNOWN_DIRECTION },
215 // Test a string without strong directionality characters.
216 { L",!.{}", LEFT_TO_RIGHT },
217 // Test empty string.
218 { L"", LEFT_TO_RIGHT },
220 #if defined(WCHAR_T_IS_UTF32)
221 L" ! \x10910" L"abc 123",
222 #elif defined(WCHAR_T_IS_UTF16)
223 L" ! \xd802\xdd10" L"abc 123",
225 #error wchar_t should be either UTF-16 or UTF-32
229 #if defined(WCHAR_T_IS_UTF32)
230 L" ! \x10401" L"abc 123",
231 #elif defined(WCHAR_T_IS_UTF16)
232 L" ! \xd801\xdc01" L"abc 123",
234 #error wchar_t should be either UTF-16 or UTF-32
239 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
240 EXPECT_EQ(cases[i].direction,
241 GetStringDirection(WideToUTF16(cases[i].text)));
244 TEST_F(RTLTest, WrapPathWithLTRFormatting) {
245 const wchar_t* cases[] = {
246 // Test common path, such as "c:\foo\bar".
248 // Test path with file name, such as "c:\foo\bar\test.jpg".
249 L"c:/foo/bar/test.jpg",
250 // Test path ending with punctuation, such as "c:\(foo)\bar.".
252 // Test path ending with separator, such as "c:\foo\bar\".
254 // Test path with RTL character.
256 // Test path with 2 level RTL directory names.
258 // Test path with mixed RTL/LTR directory names and ending with punctuation.
259 L"c:/\x05d0/\x0622/(foo)/b.a.r.",
260 // Test path without driver name, such as "/foo/bar/test/jpg".
261 L"/foo/bar/test.jpg",
262 // Test path start with current directory, such as "./foo".
264 // Test path start with parent directory, such as "../foo/bar.jpg".
266 // Test absolute path, such as "//foo/bar.jpg".
268 // Test path with mixed RTL/LTR directory names.
269 L"c:/foo/\x05d0/\x0622/\x05d1.jpg",
274 for (size_t i = 0; i < arraysize(cases); ++i) {
277 std::wstring win_path(cases[i]);
278 std::replace(win_path.begin(), win_path.end(), '/', '\\');
279 path = FilePath(win_path);
280 std::wstring wrapped_expected =
281 std::wstring(L"\x202a") + win_path + L"\x202c";
283 path = FilePath(base::SysWideToNativeMB(cases[i]));
284 std::wstring wrapped_expected =
285 std::wstring(L"\x202a") + cases[i] + L"\x202c";
287 string16 localized_file_path_string;
288 WrapPathWithLTRFormatting(path, &localized_file_path_string);
290 std::wstring wrapped_actual = UTF16ToWide(localized_file_path_string);
291 EXPECT_EQ(wrapped_expected, wrapped_actual);
295 TEST_F(RTLTest, WrapString) {
296 const wchar_t* cases[] = {
303 L"\x5d0" L"a" L"\x5d1",
306 const bool was_rtl = IsRTL();
308 for (size_t i = 0; i < 2; ++i) {
309 // Toggle the application default text direction (to try each direction).
313 WrapStringWithLTRFormatting(&empty);
314 EXPECT_TRUE(empty.empty());
315 WrapStringWithRTLFormatting(&empty);
316 EXPECT_TRUE(empty.empty());
318 for (size_t i = 0; i < arraysize(cases); ++i) {
319 string16 input = WideToUTF16(cases[i]);
320 string16 ltr_wrap = input;
321 WrapStringWithLTRFormatting(<r_wrap);
322 EXPECT_EQ(ltr_wrap[0], kLeftToRightEmbeddingMark);
323 EXPECT_EQ(ltr_wrap.substr(1, ltr_wrap.length() - 2), input);
324 EXPECT_EQ(ltr_wrap[ltr_wrap.length() -1], kPopDirectionalFormatting);
326 string16 rtl_wrap = input;
327 WrapStringWithRTLFormatting(&rtl_wrap);
328 EXPECT_EQ(rtl_wrap[0], kRightToLeftEmbeddingMark);
329 EXPECT_EQ(rtl_wrap.substr(1, rtl_wrap.length() - 2), input);
330 EXPECT_EQ(rtl_wrap[rtl_wrap.length() -1], kPopDirectionalFormatting);
334 EXPECT_EQ(was_rtl, IsRTL());
337 TEST_F(RTLTest, GetDisplayStringInLTRDirectionality) {
343 { L"test", false, true },
344 { L"test.html", false, true },
345 { L"\x05d0\x05d1\x05d2", true, true },
346 { L"\x05d0\x05d1\x05d2.txt", true, true },
347 { L"\x05d0" L"abc", true, true },
348 { L"\x05d0" L"abc.txt", true, true },
349 { L"abc\x05d0\x05d1", false, true },
350 { L"abc\x05d0\x05d1.jpg", false, true },
353 const bool was_rtl = IsRTL();
355 for (size_t i = 0; i < 2; ++i) {
356 // Toggle the application default text direction (to try each direction).
358 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
359 string16 input = WideToUTF16(cases[i].path);
360 string16 output = GetDisplayStringInLTRDirectionality(input);
361 // Test the expected wrapping behavior for the current UI directionality.
362 if (IsRTL() ? cases[i].wrap_rtl : cases[i].wrap_ltr)
363 EXPECT_NE(output, input);
365 EXPECT_EQ(output, input);
369 EXPECT_EQ(was_rtl, IsRTL());
372 TEST_F(RTLTest, GetTextDirection) {
373 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar"));
374 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar_EG"));
375 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he"));
376 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he_IL"));
377 // iw is an obsolete code for Hebrew.
378 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("iw"));
379 // Although we're not yet localized to Farsi and Urdu, we
380 // do have the text layout direction information for them.
381 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("fa"));
382 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ur"));
384 // Enable these when we include the minimal locale data for Azerbaijani
385 // written in Arabic and Dhivehi. At the moment, our copy of
386 // ICU data does not have entries for them.
387 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("az_Arab"));
388 // Dhivehi that uses Thaana script.
389 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("dv"));
391 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("en"));
392 // Chinese in China with '-'.
393 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("zh-CN"));
394 // Filipino : 3-letter code
395 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("fil"));
397 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ru"));
398 // Japanese that uses multiple scripts
399 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ja"));
402 TEST_F(RTLTest, UnadjustStringForLocaleDirection) {
403 // These test strings are borrowed from WrapPathWithLTRFormatting
404 const wchar_t* cases[] = {
410 L"\x202a \x05d0 foo bar",
411 L"\x202d \x05d0 foo bar",
412 L"\x202b foo \x05d0 bar",
413 L"\x202e foo \x05d0 bar",
414 L"\x0622 foo \x05d0 bar",
417 const bool was_rtl = IsRTL();
419 for (size_t i = 0; i < 2; ++i) {
420 // Toggle the application default text direction (to try each direction).
423 for (size_t i = 0; i < arraysize(cases); ++i) {
424 string16 test_case = WideToUTF16(cases[i]);
425 string16 adjusted_string = test_case;
427 if (!AdjustStringForLocaleDirection(&adjusted_string))
430 EXPECT_NE(test_case, adjusted_string);
431 EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string));
432 EXPECT_EQ(test_case, adjusted_string) << " for test case [" << test_case
433 << "] with IsRTL() == " << IsRTL();
437 EXPECT_EQ(was_rtl, IsRTL());