1 // Copyright (C) 2014 Google Inc.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
23 #include "util/string_split.h"
26 namespace addressinput {
28 Language::Language(const std::string& language_tag) : tag(language_tag),
30 has_latin_script(false) {
31 // Character '-' is the separator for subtags in the BCP 47. However, some
32 // legacy code generates tags with '_' instead of '-'.
33 static const char kSubtagsSeparator = '-';
34 static const char kAlternativeSubtagsSeparator = '_';
36 tag.begin(), tag.end(), kAlternativeSubtagsSeparator, kSubtagsSeparator);
38 // OK to use 'tolower' because BCP 47 tags are always in ASCII.
39 std::string lowercase = tag;
41 lowercase.begin(), lowercase.end(), lowercase.begin(), tolower);
43 base = lowercase.substr(0, lowercase.find(kSubtagsSeparator));
45 // The lowercase BCP 47 subtag for Latin script.
46 static const char kLowercaseLatinScript[] = "latn";
47 std::vector<std::string> subtags;
48 SplitString(lowercase, kSubtagsSeparator, &subtags);
50 // Support only the second and third position for the script.
52 (subtags.size() > 1 && subtags[1] == kLowercaseLatinScript) ||
53 (subtags.size() > 2 && subtags[2] == kLowercaseLatinScript);
56 Language::~Language() {}
58 Language ChooseBestAddressLanguage(const Rule& address_region_rule,
59 const Language& ui_language) {
60 if (address_region_rule.GetLanguages().empty()) {
64 std::vector<Language> available_languages;
65 for (std::vector<std::string>::const_iterator
66 language_tag_it = address_region_rule.GetLanguages().begin();
67 language_tag_it != address_region_rule.GetLanguages().end();
69 available_languages.push_back(Language(*language_tag_it));
72 if (ui_language.tag.empty()) {
73 return available_languages.front();
76 bool has_latin_format = !address_region_rule.GetLatinFormat().empty();
78 // The conventionally formatted BCP 47 Latin script with a preceding subtag
80 static const char kLatinScriptSuffix[] = "-Latn";
81 Language latin_script_language(
82 available_languages.front().base + kLatinScriptSuffix);
83 if (has_latin_format && ui_language.has_latin_script) {
84 return latin_script_language;
87 for (std::vector<Language>::const_iterator
88 available_lang_it = available_languages.begin();
89 available_lang_it != available_languages.end(); ++available_lang_it) {
90 // Base language comparison works because no region supports the same base
91 // language with different scripts, for now. For example, no region supports
92 // "zh-Hant" and "zh-Hans" at the same time.
93 if (ui_language.base == available_lang_it->base) {
94 return *available_lang_it;
98 return has_latin_format ? latin_script_language : available_languages.front();
101 } // namespace addressinput