1 // Copyright 2011 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/i18n/rtl.h"
12 #include "base/check_op.h"
13 #include "base/command_line.h"
14 #include "base/files/file_path.h"
15 #include "base/i18n/base_i18n_switches.h"
16 #include "base/logging.h"
17 #include "base/strings/string_split.h"
18 #include "base/strings/string_util.h"
19 #include "base/strings/sys_string_conversions.h"
20 #include "base/strings/utf_string_conversions.h"
21 #include "build/build_config.h"
22 #include "third_party/icu/source/common/unicode/locid.h"
23 #include "third_party/icu/source/common/unicode/uchar.h"
24 #include "third_party/icu/source/common/unicode/uscript.h"
25 #include "third_party/icu/source/i18n/unicode/coll.h"
28 #include "base/debug/crash_logging.h"
29 #include "base/ios/ios_util.h"
34 // Extract language, country and variant, but ignore keywords. For example,
35 // en-US, ca@valencia, ca-ES@valencia.
36 std::string GetLocaleString(const icu::Locale& locale) {
37 const char* language = locale.getLanguage();
38 const char* country = locale.getCountry();
39 const char* variant = locale.getVariant();
40 const char* script = locale.getScript();
43 (language != nullptr && *language != '\0') ? language : "und";
45 if (script != nullptr && *script != '\0') {
50 if (country != nullptr && *country != '\0') {
55 if (variant != nullptr && *variant != '\0')
56 result += '@' + base::ToLowerASCII(variant);
61 // Returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if |character| has strong
62 // directionality, returns UNKNOWN_DIRECTION if it doesn't. Please refer to
63 // http://unicode.org/reports/tr9/ for more information.
64 base::i18n::TextDirection GetCharacterDirection(UChar32 character) {
65 static bool has_switch = base::CommandLine::ForCurrentProcess()->HasSwitch(
66 switches::kForceTextDirection);
68 base::CommandLine* command_line = base::CommandLine::ForCurrentProcess();
69 std::string force_flag =
70 command_line->GetSwitchValueASCII(switches::kForceTextDirection);
72 if (force_flag == switches::kForceDirectionRTL)
73 return base::i18n::RIGHT_TO_LEFT;
74 if (force_flag == switches::kForceDirectionLTR)
75 return base::i18n::LEFT_TO_RIGHT;
77 // Now that we have the character, we use ICU in order to query for the
78 // appropriate Unicode BiDi character type.
79 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
82 case U_RIGHT_TO_LEFT_ARABIC:
83 case U_RIGHT_TO_LEFT_EMBEDDING:
84 case U_RIGHT_TO_LEFT_OVERRIDE:
85 return base::i18n::RIGHT_TO_LEFT;
87 case U_LEFT_TO_RIGHT_EMBEDDING:
88 case U_LEFT_TO_RIGHT_OVERRIDE:
89 return base::i18n::LEFT_TO_RIGHT;
91 return base::i18n::UNKNOWN_DIRECTION;
99 // Represents the locale-specific ICU text direction.
100 static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
102 // Convert the ICU default locale to a string.
103 std::string GetConfiguredLocale() {
104 return GetLocaleString(icu::Locale::getDefault());
107 // Convert the ICU canonicalized locale to a string.
108 std::string GetCanonicalLocale(const std::string& locale) {
109 return GetLocaleString(icu::Locale::createCanonical(locale.c_str()));
112 // Convert Chrome locale name to ICU locale name
113 std::string ICULocaleName(const std::string& locale_string) {
114 // If not Spanish, just return it.
115 if (locale_string.substr(0, 2) != "es")
116 return locale_string;
117 // Expand es to es-ES.
118 if (EqualsCaseInsensitiveASCII(locale_string, "es"))
120 // Map es-419 (Latin American Spanish) to es-FOO depending on the system
121 // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map
122 // to es-MX (the most populous in Spanish-speaking Latin America).
123 if (EqualsCaseInsensitiveASCII(locale_string, "es-419")) {
124 const icu::Locale& locale = icu::Locale::getDefault();
125 std::string language = locale.getLanguage();
126 const char* country = locale.getCountry();
127 if (EqualsCaseInsensitiveASCII(language, "es") &&
128 !EqualsCaseInsensitiveASCII(country, "es")) {
135 // Currently, Chrome has only "es" and "es-419", but later we may have
136 // more specific "es-RR".
137 return locale_string;
140 void SetICUDefaultLocale(const std::string& locale_string) {
141 #if BUILDFLAG(IS_IOS)
142 static base::debug::CrashKeyString* crash_key_locale =
143 base::debug::AllocateCrashKeyString("icu_locale_input",
144 base::debug::CrashKeySize::Size256);
145 base::debug::SetCrashKeyString(crash_key_locale, locale_string);
147 icu::Locale locale(ICULocaleName(locale_string).c_str());
148 UErrorCode error_code = U_ZERO_ERROR;
149 const char* lang = locale.getLanguage();
150 if (lang != nullptr && *lang != '\0') {
151 icu::Locale::setDefault(locale, error_code);
153 LOG(ERROR) << "Failed to set the ICU default locale to " << locale_string
154 << ". Falling back to en-US.";
155 icu::Locale::setDefault(icu::Locale::getUS(), error_code);
157 g_icu_text_direction = UNKNOWN_DIRECTION;
164 void SetRTLForTesting(bool rtl) {
165 SetICUDefaultLocale(rtl ? "he" : "en");
166 DCHECK_EQ(rtl, IsRTL());
170 if (g_icu_text_direction == UNKNOWN_DIRECTION) {
171 const icu::Locale& locale = icu::Locale::getDefault();
172 g_icu_text_direction = GetTextDirectionForLocaleInStartUp(locale.getName());
174 return g_icu_text_direction == RIGHT_TO_LEFT;
177 TextDirection GetForcedTextDirection() {
178 // On iOS, check for RTL forcing.
179 #if BUILDFLAG(IS_IOS)
180 if (base::ios::IsInForcedRTL())
181 return base::i18n::RIGHT_TO_LEFT;
184 base::CommandLine* command_line = base::CommandLine::ForCurrentProcess();
185 if (command_line->HasSwitch(switches::kForceUIDirection)) {
186 std::string force_flag =
187 command_line->GetSwitchValueASCII(switches::kForceUIDirection);
189 if (force_flag == switches::kForceDirectionLTR)
190 return base::i18n::LEFT_TO_RIGHT;
192 if (force_flag == switches::kForceDirectionRTL)
193 return base::i18n::RIGHT_TO_LEFT;
196 return base::i18n::UNKNOWN_DIRECTION;
199 TextDirection GetTextDirectionForLocaleInStartUp(const char* locale_name) {
200 // Check for direction forcing.
201 TextDirection forced_direction = GetForcedTextDirection();
202 if (forced_direction != UNKNOWN_DIRECTION)
203 return forced_direction;
205 // This list needs to be updated in alphabetical order if we add more RTL
207 static const char kRTLLanguageCodes[][3] = {"ar", "fa", "he", "iw", "ur"};
208 std::vector<StringPiece> locale_split =
209 SplitStringPiece(locale_name, "-_", KEEP_WHITESPACE, SPLIT_WANT_ALL);
210 const StringPiece& language_code = locale_split[0];
211 if (std::binary_search(kRTLLanguageCodes,
212 kRTLLanguageCodes + std::size(kRTLLanguageCodes),
214 return RIGHT_TO_LEFT;
215 return LEFT_TO_RIGHT;
218 TextDirection GetTextDirectionForLocale(const char* locale_name) {
219 // Check for direction forcing.
220 TextDirection forced_direction = GetForcedTextDirection();
221 if (forced_direction != UNKNOWN_DIRECTION)
222 return forced_direction;
224 UErrorCode status = U_ZERO_ERROR;
225 ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
226 DCHECK(U_SUCCESS(status));
227 // Treat anything other than RTL as LTR.
228 return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
231 TextDirection GetFirstStrongCharacterDirection(const std::u16string& text) {
232 const char16_t* string = text.c_str();
233 size_t length = text.length();
235 while (position < length) {
237 size_t next_position = position;
238 U16_NEXT(string, next_position, length, character);
239 TextDirection direction = GetCharacterDirection(character);
240 if (direction != UNKNOWN_DIRECTION)
242 position = next_position;
244 return LEFT_TO_RIGHT;
247 TextDirection GetLastStrongCharacterDirection(const std::u16string& text) {
248 const char16_t* string = text.c_str();
249 size_t position = text.length();
250 while (position > 0) {
252 size_t prev_position = position;
253 U16_PREV(string, 0, prev_position, character);
254 TextDirection direction = GetCharacterDirection(character);
255 if (direction != UNKNOWN_DIRECTION)
257 position = prev_position;
259 return LEFT_TO_RIGHT;
262 TextDirection GetStringDirection(const std::u16string& text) {
263 const char16_t* string = text.c_str();
264 size_t length = text.length();
267 TextDirection result(UNKNOWN_DIRECTION);
268 while (position < length) {
270 size_t next_position = position;
271 U16_NEXT(string, next_position, length, character);
272 TextDirection direction = GetCharacterDirection(character);
273 if (direction != UNKNOWN_DIRECTION) {
274 if (result != UNKNOWN_DIRECTION && result != direction)
275 return UNKNOWN_DIRECTION;
278 position = next_position;
281 // Handle the case of a string not containing any strong directionality
282 // characters defaulting to LEFT_TO_RIGHT.
283 if (result == UNKNOWN_DIRECTION)
284 return LEFT_TO_RIGHT;
289 #if BUILDFLAG(IS_WIN)
290 bool AdjustStringForLocaleDirection(std::u16string* text) {
291 if (!IsRTL() || text->empty())
294 // Marking the string as LTR if the locale is RTL and the string does not
295 // contain strong RTL characters. Otherwise, mark the string as RTL.
296 bool has_rtl_chars = StringContainsStrongRTLChars(*text);
298 WrapStringWithLTRFormatting(text);
300 WrapStringWithRTLFormatting(text);
305 bool UnadjustStringForLocaleDirection(std::u16string* text) {
306 if (!IsRTL() || text->empty())
309 *text = StripWrappingBidiControlCharacters(*text);
313 bool AdjustStringForLocaleDirection(std::u16string* text) {
314 // On OS X & GTK the directionality of a label is determined by the first
315 // strongly directional character.
316 // However, we want to make sure that in an LTR-language-UI all strings are
317 // left aligned and vice versa.
318 // A problem can arise if we display a string which starts with user input.
319 // User input may be of the opposite directionality to the UI. So the whole
320 // string will be displayed in the opposite directionality, e.g. if we want to
321 // display in an LTR UI [such as US English]:
323 // EMAN_NOISNETXE is now installed.
325 // Since EXTENSION_NAME begins with a strong RTL char, the label's
326 // directionality will be set to RTL and the string will be displayed visually
329 // .is now installed EMAN_NOISNETXE
331 // In order to solve this issue, we prepend an LRM to the string. An LRM is a
332 // strongly directional LTR char.
333 // We also append an LRM at the end, which ensures that we're in an LTR
336 // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the
337 // box so there is no issue with displaying zero-width bidi control characters
338 // on any system. Thus no need for the !IsRTL() check here.
342 bool ui_direction_is_rtl = IsRTL();
344 bool has_rtl_chars = StringContainsStrongRTLChars(*text);
345 if (!ui_direction_is_rtl && has_rtl_chars) {
346 WrapStringWithRTLFormatting(text);
347 text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
349 text->push_back(kLeftToRightMark);
350 } else if (ui_direction_is_rtl && has_rtl_chars) {
351 WrapStringWithRTLFormatting(text);
352 text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
354 text->push_back(kRightToLeftMark);
355 } else if (ui_direction_is_rtl) {
356 WrapStringWithLTRFormatting(text);
357 text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
359 text->push_back(kRightToLeftMark);
367 bool UnadjustStringForLocaleDirection(std::u16string* text) {
371 size_t begin_index = 0;
372 char16_t begin = text->at(begin_index);
373 if (begin == kLeftToRightMark ||
374 begin == kRightToLeftMark) {
378 size_t end_index = text->length() - 1;
379 char16_t end = text->at(end_index);
380 if (end == kLeftToRightMark ||
381 end == kRightToLeftMark) {
385 std::u16string unmarked_text =
386 text->substr(begin_index, end_index - begin_index + 1);
387 *text = StripWrappingBidiControlCharacters(unmarked_text);
391 #endif // !BUILDFLAG(IS_WIN)
393 void EnsureTerminatedDirectionalFormatting(std::u16string* text) {
395 for (auto c : *text) {
396 if (c == kLeftToRightEmbeddingMark || c == kRightToLeftEmbeddingMark ||
397 c == kLeftToRightOverride || c == kRightToLeftOverride) {
399 } else if (c == kPopDirectionalFormatting && count > 0) {
403 for (int j = 0; j < count; j++)
404 text->push_back(kPopDirectionalFormatting);
407 void SanitizeUserSuppliedString(std::u16string* text) {
408 EnsureTerminatedDirectionalFormatting(text);
409 AdjustStringForLocaleDirection(text);
412 bool StringContainsStrongRTLChars(const std::u16string& text) {
413 const char16_t* string = text.c_str();
414 size_t length = text.length();
416 while (position < length) {
418 size_t next_position = position;
419 U16_NEXT(string, next_position, length, character);
421 // Now that we have the character, we use ICU in order to query for the
422 // appropriate Unicode BiDi character type.
423 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
424 if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
427 position = next_position;
433 void WrapStringWithLTRFormatting(std::u16string* text) {
437 // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
438 text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
439 kLeftToRightEmbeddingMark);
441 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
442 text->push_back(kPopDirectionalFormatting);
445 void WrapStringWithRTLFormatting(std::u16string* text) {
449 // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
450 text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
451 kRightToLeftEmbeddingMark);
453 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
454 text->push_back(kPopDirectionalFormatting);
457 void WrapPathWithLTRFormatting(const FilePath& path,
458 std::u16string* rtl_safe_path) {
459 // Wrap the overall path with LRE-PDF pair which essentialy marks the
460 // string as a Left-To-Right string.
461 // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
462 rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
463 #if BUILDFLAG(IS_APPLE)
464 rtl_safe_path->append(UTF8ToUTF16(path.value()));
465 #elif BUILDFLAG(IS_WIN)
466 rtl_safe_path->append(AsString16(path.value()));
467 #else // BUILDFLAG(IS_POSIX) && !BUILDFLAG(IS_APPLE)
468 std::wstring wide_path = base::SysNativeMBToWide(path.value());
469 rtl_safe_path->append(WideToUTF16(wide_path));
471 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
472 rtl_safe_path->push_back(kPopDirectionalFormatting);
475 std::u16string GetDisplayStringInLTRDirectionality(const std::u16string& text) {
476 // Always wrap the string in RTL UI (it may be appended to RTL string).
477 // Also wrap strings with an RTL first strong character direction in LTR UI.
478 if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) {
479 std::u16string text_mutable(text);
480 WrapStringWithLTRFormatting(&text_mutable);
486 std::u16string StripWrappingBidiControlCharacters(const std::u16string& text) {
489 size_t begin_index = 0;
490 char16_t begin = text[begin_index];
491 if (begin == kLeftToRightEmbeddingMark ||
492 begin == kRightToLeftEmbeddingMark ||
493 begin == kLeftToRightOverride ||
494 begin == kRightToLeftOverride)
496 size_t end_index = text.length() - 1;
497 if (text[end_index] == kPopDirectionalFormatting)
499 return text.substr(begin_index, end_index - begin_index + 1);