'i18n-extension.h',
'i18n-locale.cc',
'i18n-locale.h',
+ 'i18n-utils.cc',
+ 'i18n-utils.h',
+ 'language-matcher.cc',
+ 'language-matcher.h',
'<(SHARED_INTERMEDIATE_DIR)/i18n-js.cc',
],
'include_dirs': [
v8::Handle<v8::String> name) {
if (name->Equals(v8::String::New("NativeJSLocale"))) {
return v8::FunctionTemplate::New(I18NLocale::JSLocale);
- } else if (name->Equals(v8::String::New("NativeJSAvailableLocales"))) {
- return v8::FunctionTemplate::New(I18NLocale::JSAvailableLocales);
- } else if (name->Equals(v8::String::New("NativeJSMaximizedLocale"))) {
- return v8::FunctionTemplate::New(I18NLocale::JSMaximizedLocale);
- } else if (name->Equals(v8::String::New("NativeJSMinimizedLocale"))) {
- return v8::FunctionTemplate::New(I18NLocale::JSMinimizedLocale);
- } else if (name->Equals(v8::String::New("NativeJSDisplayLanguage"))) {
- return v8::FunctionTemplate::New(I18NLocale::JSDisplayLanguage);
- } else if (name->Equals(v8::String::New("NativeJSDisplayScript"))) {
- return v8::FunctionTemplate::New(I18NLocale::JSDisplayScript);
- } else if (name->Equals(v8::String::New("NativeJSDisplayRegion"))) {
- return v8::FunctionTemplate::New(I18NLocale::JSDisplayRegion);
- } else if (name->Equals(v8::String::New("NativeJSDisplayName"))) {
- return v8::FunctionTemplate::New(I18NLocale::JSDisplayName);
} else if (name->Equals(v8::String::New("NativeJSBreakIterator"))) {
return v8::FunctionTemplate::New(BreakIterator::JSBreakIterator);
} else if (name->Equals(v8::String::New("NativeJSCollator"))) {
#include "i18n-locale.h"
-#include <algorithm>
-#include <string>
-
+#include "i18n-utils.h"
+#include "language-matcher.h"
#include "unicode/locid.h"
#include "unicode/uloc.h"
+#include "utils.h"
namespace v8 {
namespace internal {
-v8::Handle<v8::Value> I18NLocale::JSLocale(const v8::Arguments& args) {
- // TODO(cira): Fetch browser locale. Accept en-US as good default for now.
- // We could possibly pass browser locale as a parameter in the constructor.
- std::string locale_name("en-US");
- if (args.Length() == 1 && args[0]->IsString()) {
- locale_name = *v8::String::Utf8Value(args[0]->ToString());
- }
-
- v8::Local<v8::Object> locale = v8::Object::New();
- locale->Set(v8::String::New("locale"), v8::String::New(locale_name.c_str()));
-
- icu::Locale icu_locale(locale_name.c_str());
-
- const char* language = icu_locale.getLanguage();
- locale->Set(v8::String::New("language"), v8::String::New(language));
-
- const char* script = icu_locale.getScript();
- if (strlen(script)) {
- locale->Set(v8::String::New("script"), v8::String::New(script));
- }
-
- const char* region = icu_locale.getCountry();
- if (strlen(region)) {
- locale->Set(v8::String::New("region"), v8::String::New(region));
- }
-
- return locale;
-}
-
-// TODO(cira): Filter out locales that Chrome doesn't support.
-v8::Handle<v8::Value> I18NLocale::JSAvailableLocales(
- const v8::Arguments& args) {
- v8::Local<v8::Array> all_locales = v8::Array::New();
-
- int count = 0;
- const icu::Locale* icu_locales = icu::Locale::getAvailableLocales(count);
- for (int i = 0; i < count; ++i) {
- all_locales->Set(i, v8::String::New(icu_locales[i].getName()));
- }
-
- return all_locales;
-}
-
-// Use - as tag separator, not _ that ICU uses.
-static std::string NormalizeLocale(const std::string& locale) {
- std::string result(locale);
- // TODO(cira): remove STL dependency.
- std::replace(result.begin(), result.end(), '_', '-');
- return result;
-}
+const char* const I18NLocale::kLocaleID = "localeID";
+const char* const I18NLocale::kRegionID = "regionID";
+const char* const I18NLocale::kICULocaleID = "icuLocaleID";
-v8::Handle<v8::Value> I18NLocale::JSMaximizedLocale(const v8::Arguments& args) {
- if (!args.Length() || !args[0]->IsString()) {
- return v8::Undefined();
- }
+v8::Handle<v8::Value> I18NLocale::JSLocale(const v8::Arguments& args) {
+ v8::HandleScope handle_scope;
- UErrorCode status = U_ZERO_ERROR;
- std::string locale_name = *v8::String::Utf8Value(args[0]->ToString());
- char max_locale[ULOC_FULLNAME_CAPACITY];
- uloc_addLikelySubtags(locale_name.c_str(), max_locale,
- sizeof(max_locale), &status);
- if (U_FAILURE(status)) {
+ if (args.Length() != 1 || !args[0]->IsObject()) {
return v8::Undefined();
}
- return v8::String::New(NormalizeLocale(max_locale).c_str());
-}
+ v8::Local<v8::Object> settings = args[0]->ToObject();
-v8::Handle<v8::Value> I18NLocale::JSMinimizedLocale(const v8::Arguments& args) {
- if (!args.Length() || !args[0]->IsString()) {
+ // Get best match for locale.
+ v8::TryCatch try_catch;
+ v8::Handle<v8::Value> locale_id = settings->Get(v8::String::New(kLocaleID));
+ if (try_catch.HasCaught()) {
return v8::Undefined();
}
- UErrorCode status = U_ZERO_ERROR;
- std::string locale_name = *v8::String::Utf8Value(args[0]->ToString());
- char min_locale[ULOC_FULLNAME_CAPACITY];
- uloc_minimizeSubtags(locale_name.c_str(), min_locale,
- sizeof(min_locale), &status);
- if (U_FAILURE(status)) {
- return v8::Undefined();
+ LocaleIDMatch result;
+ if (locale_id->IsArray()) {
+ LanguageMatcher::GetBestMatchForPriorityList(
+ v8::Handle<v8::Array>::Cast(locale_id), &result);
+ } else if (locale_id->IsString()) {
+ LanguageMatcher::GetBestMatchForString(locale_id->ToString(), &result);
+ } else {
+ LanguageMatcher::GetBestMatchForString(v8::String::New(""), &result);
}
- return v8::String::New(NormalizeLocale(min_locale).c_str());
-}
+ // Get best match for region.
+ char region_id[ULOC_COUNTRY_CAPACITY];
+ I18NUtils::StrNCopy(region_id, ULOC_COUNTRY_CAPACITY, "");
-// Common code for JSDisplayXXX methods.
-static v8::Handle<v8::Value> GetDisplayItem(const v8::Arguments& args,
- const std::string& item) {
- if (args.Length() != 2 || !args[0]->IsString() || !args[1]->IsString()) {
+ v8::Handle<v8::Value> region = settings->Get(v8::String::New(kRegionID));
+ if (try_catch.HasCaught()) {
return v8::Undefined();
}
- std::string base_locale = *v8::String::Utf8Value(args[0]->ToString());
- icu::Locale icu_locale(base_locale.c_str());
- icu::Locale display_locale =
- icu::Locale(*v8::String::Utf8Value(args[1]->ToString()));
- icu::UnicodeString result;
- if (item == "language") {
- icu_locale.getDisplayLanguage(display_locale, result);
- } else if (item == "script") {
- icu_locale.getDisplayScript(display_locale, result);
- } else if (item == "region") {
- icu_locale.getDisplayCountry(display_locale, result);
- } else if (item == "name") {
- icu_locale.getDisplayName(display_locale, result);
- } else {
- return v8::Undefined();
+ if (!GetBestMatchForRegionID(result.icu_id, region, region_id)) {
+ // Set region id to empty string because region couldn't be inferred.
+ I18NUtils::StrNCopy(region_id, ULOC_COUNTRY_CAPACITY, "");
}
- if (result.length()) {
- return v8::String::New(
- reinterpret_cast<const uint16_t*>(result.getBuffer()), result.length());
- }
+ // Build JavaScript object that contains bcp and icu locale ID and region ID.
+ v8::Handle<v8::Object> locale = v8::Object::New();
+ locale->Set(v8::String::New(kLocaleID), v8::String::New(result.bcp47_id));
+ locale->Set(v8::String::New(kICULocaleID), v8::String::New(result.icu_id));
+ locale->Set(v8::String::New(kRegionID), v8::String::New(region_id));
- return v8::Undefined();
+ return handle_scope.Close(locale);
}
-v8::Handle<v8::Value> I18NLocale::JSDisplayLanguage(const v8::Arguments& args) {
- return GetDisplayItem(args, "language");
-}
-
-v8::Handle<v8::Value> I18NLocale::JSDisplayScript(const v8::Arguments& args) {
- return GetDisplayItem(args, "script");
-}
-
-v8::Handle<v8::Value> I18NLocale::JSDisplayRegion(const v8::Arguments& args) {
- return GetDisplayItem(args, "region");
-}
+bool I18NLocale::GetBestMatchForRegionID(
+ const char* locale_id, v8::Handle<v8::Value> region_id, char* result) {
+ if (region_id->IsString() && region_id->ToString()->Length() != 0) {
+ icu::Locale user_locale(
+ icu::Locale("und", *v8::String::Utf8Value(region_id->ToString())));
+ I18NUtils::StrNCopy(
+ result, ULOC_COUNTRY_CAPACITY, user_locale.getCountry());
+ return true;
+ }
+ // Maximize locale_id to infer the region (e.g. expand "de" to "de-Latn-DE"
+ // and grab "DE" from the result).
+ UErrorCode status = U_ZERO_ERROR;
+ char maximized_locale[ULOC_FULLNAME_CAPACITY];
+ uloc_addLikelySubtags(
+ locale_id, maximized_locale, ULOC_FULLNAME_CAPACITY, &status);
+ uloc_getCountry(maximized_locale, result, ULOC_COUNTRY_CAPACITY, &status);
-v8::Handle<v8::Value> I18NLocale::JSDisplayName(const v8::Arguments& args) {
- return GetDisplayItem(args, "name");
+ return !U_FAILURE(status);
}
} } // namespace v8::internal
// Implementations of window.Locale methods.
static v8::Handle<v8::Value> JSLocale(const v8::Arguments& args);
- static v8::Handle<v8::Value> JSAvailableLocales(const v8::Arguments& args);
- static v8::Handle<v8::Value> JSMaximizedLocale(const v8::Arguments& args);
- static v8::Handle<v8::Value> JSMinimizedLocale(const v8::Arguments& args);
- static v8::Handle<v8::Value> JSDisplayLanguage(const v8::Arguments& args);
- static v8::Handle<v8::Value> JSDisplayScript(const v8::Arguments& args);
- static v8::Handle<v8::Value> JSDisplayRegion(const v8::Arguments& args);
- static v8::Handle<v8::Value> JSDisplayName(const v8::Arguments& args);
+
+ // Infers region id given the locale id, or uses user specified region id.
+ // Result is canonicalized.
+ // Returns status of ICU operation (maximizing locale or get region call).
+ static bool GetBestMatchForRegionID(
+ const char* locale_id, v8::Handle<v8::Value> regions, char* result);
+
+ private:
+ // Key name for localeID parameter.
+ static const char* const kLocaleID;
+ // Key name for regionID parameter.
+ static const char* const kRegionID;
+ // Key name for the icuLocaleID result.
+ static const char* const kICULocaleID;
};
} } // namespace v8::internal
--- /dev/null
+// Copyright 2011 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "i18n-utils.h"
+
+#include <string.h>
+
+namespace v8 {
+namespace internal {
+
+// static
+void I18NUtils::StrNCopy(char* dest, int length, const char* src) {
+ if (!dest || !src) return;
+
+ strncpy(dest, src, length);
+ dest[length - 1] = '\0';
+}
+
+} } // namespace v8::internal
--- /dev/null
+// Copyright 2011 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef V8_EXTENSIONS_EXPERIMENTAL_I18N_UTILS_H_
+#define V8_EXTENSIONS_EXPERIMENTAL_I18N_UTILS_H_
+
+namespace v8 {
+namespace internal {
+
+class I18NUtils {
+ public:
+ // Safe string copy. Null terminates the destination. Copies at most
+ // (length - 1) bytes.
+ // We can't use snprintf since it's not supported on all relevant platforms.
+ // We can't use OS::SNPrintF, it's only for internal code.
+ // TODO(cira): Find a way to use OS::SNPrintF instead.
+ static void StrNCopy(char* dest, int length, const char* src);
+
+ private:
+ I18NUtils() {}
+};
+
+} } // namespace v8::internal
+
+#endif // V8_EXTENSIONS_EXPERIMENTAL_I18N_UTILS_H_
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// TODO(cira): Remove v8 prefix from v8Locale once we have stable API.
-v8Locale = function(optLocale) {
+// TODO(cira): Rename v8Locale into LocaleInfo once we have stable API.
+/**
+ * LocaleInfo class is an aggregate class of all i18n API calls.
+ * @param {Object} settings - localeID and regionID to create LocaleInfo from.
+ * {Array.<string>|string} settings.localeID -
+ * Unicode identifier of the locale.
+ * See http://unicode.org/reports/tr35/#BCP_47_Conformance
+ * {string} settings.regionID - ISO3166 region ID with addition of
+ * invalid, undefined and reserved region codes.
+ * @constructor
+ */
+v8Locale = function(settings) {
native function NativeJSLocale();
- var properties = NativeJSLocale(optLocale);
- this.locale = properties.locale;
- this.language = properties.language;
- this.script = properties.script;
- this.region = properties.region;
-};
-
-v8Locale.availableLocales = function() {
- native function NativeJSAvailableLocales();
- return NativeJSAvailableLocales();
-};
-v8Locale.prototype.maximizedLocale = function() {
- native function NativeJSMaximizedLocale();
- return new v8Locale(NativeJSMaximizedLocale(this.locale));
-};
-
-v8Locale.prototype.minimizedLocale = function() {
- native function NativeJSMinimizedLocale();
- return new v8Locale(NativeJSMinimizedLocale(this.locale));
-};
-
-v8Locale.prototype.displayLocale_ = function(displayLocale) {
- var result = this.locale;
- if (displayLocale !== undefined) {
- result = displayLocale.locale;
+ // Assume user wanted to do v8Locale("sr");
+ if (typeof(settings) === "string") {
+ settings = {'localeID': settings};
}
- return result;
-};
-v8Locale.prototype.displayLanguage = function(optDisplayLocale) {
- var displayLocale = this.displayLocale_(optDisplayLocale);
- native function NativeJSDisplayLanguage();
- return NativeJSDisplayLanguage(this.locale, displayLocale);
-};
-
-v8Locale.prototype.displayScript = function(optDisplayLocale) {
- var displayLocale = this.displayLocale_(optDisplayLocale);
- native function NativeJSDisplayScript();
- return NativeJSDisplayScript(this.locale, displayLocale);
-};
+ var properties = NativeJSLocale(
+ v8Locale.createSettingsOrDefault_(settings, {'localeID': 'root'}));
-v8Locale.prototype.displayRegion = function(optDisplayLocale) {
- var displayLocale = this.displayLocale_(optDisplayLocale);
- native function NativeJSDisplayRegion();
- return NativeJSDisplayRegion(this.locale, displayLocale);
+ // Keep the resolved ICU locale ID around to avoid resolving localeID to
+ // ICU locale ID every time BreakIterator, Collator and so forth are called.
+ this.__icuLocaleID__ = properties.icuLocaleID;
+ this.options = {'localeID': properties.localeID,
+ 'regionID': properties.regionID};
};
-v8Locale.prototype.displayName = function(optDisplayLocale) {
- var displayLocale = this.displayLocale_(optDisplayLocale);
- native function NativeJSDisplayName();
- return NativeJSDisplayName(this.locale, displayLocale);
+/**
+ * Clones existing locale with possible overrides for some of the options.
+ * @param {!Object} settings - overrides for current locale settings.
+ * @returns {Object} - new LocaleInfo object.
+ */
+v8Locale.prototype.derive = function(settings) {
+ return new v8Locale(
+ v8Locale.createSettingsOrDefault_(settings, this.options));
};
+/**
+ * v8BreakIterator class implements locale aware segmenatation.
+ * It is not part of EcmaScript proposal.
+ * @param {Object} locale - locale object to pass to break
+ * iterator implementation.
+ * @param {string} type - type of segmenatation:
+ * - character
+ * - word
+ * - sentence
+ * - line
+ * @constructor
+ */
v8Locale.v8BreakIterator = function(locale, type) {
native function NativeJSBreakIterator();
- var iterator = NativeJSBreakIterator(locale, type);
+
+ locale = v8Locale.createLocaleOrDefault_(locale);
+ // BCP47 ID would work in this case, but we use ICU locale for consistency.
+ var iterator = NativeJSBreakIterator(locale.__icuLocaleID__, type);
iterator.type = type;
return iterator;
};
+/**
+ * Type of the break we encountered during previous iteration.
+ * @type{Enum}
+ */
v8Locale.v8BreakIterator.BreakType = {
'unknown': -1,
'none': 0,
'ideo': 400
};
+/**
+ * Creates new v8BreakIterator based on current locale.
+ * @param {string} - type of segmentation. See constructor.
+ * @returns {Object} - new v8BreakIterator object.
+ */
v8Locale.prototype.v8CreateBreakIterator = function(type) {
- return new v8Locale.v8BreakIterator(this.locale, type);
+ return new v8Locale.v8BreakIterator(this, type);
};
// TODO(jungshik): Set |collator.options| to actually recognized / resolved
// values.
-v8Locale.Collator = function(locale, options) {
+/**
+ * Collator class implements locale-aware sort.
+ * @param {Object} locale - locale object to pass to collator implementation.
+ * @param {Object} settings - collation flags:
+ * - ignoreCase
+ * - ignoreAccents
+ * - numeric
+ * @constructor
+ */
+v8Locale.Collator = function(locale, settings) {
native function NativeJSCollator();
- var collator = NativeJSCollator(locale,
- options === undefined ? {} : options);
+
+ locale = v8Locale.createLocaleOrDefault_(locale);
+ var collator = NativeJSCollator(
+ locale.__icuLocaleID__, v8Locale.createSettingsOrDefault_(settings, {}));
return collator;
};
-v8Locale.prototype.createCollator = function(options) {
- return new v8Locale.Collator(this.locale, options);
+/**
+ * Creates new Collator based on current locale.
+ * @param {Object} - collation flags. See constructor.
+ * @returns {Object} - new v8BreakIterator object.
+ */
+v8Locale.prototype.createCollator = function(settings) {
+ return new v8Locale.Collator(this, settings);
+};
+
+/**
+ * Merges user settings and defaults.
+ * Settings that are not of object type are rejected.
+ * Actual property values are not validated, but whitespace is trimmed if they
+ * are strings.
+ * @param {!Object} settings - user provided settings.
+ * @param {!Object} defaults - default values for this type of settings.
+ * @returns {Object} - valid settings object.
+ */
+v8Locale.createSettingsOrDefault_ = function(settings, defaults) {
+ if (!settings || typeof(settings) !== 'object' ) {
+ return defaults;
+ }
+ for (var key in defaults) {
+ if (!settings.hasOwnProperty(key)) {
+ settings[key] = defaults[key];
+ }
+ }
+ // Clean up values, like trimming whitespace.
+ for (var key in settings) {
+ if (typeof(settings[key]) === "string") {
+ settings[key] = settings[key].trim();
+ }
+ }
+
+ return settings;
+};
+
+/**
+ * If locale is valid (defined and of v8Locale type) we return it. If not
+ * we create default locale and return it.
+ * @param {!Object} locale - user provided locale.
+ * @returns {Object} - v8Locale object.
+ */
+v8Locale.createLocaleOrDefault_ = function(locale) {
+ if (!locale || !(locale instanceof v8Locale)) {
+ return new v8Locale();
+ } else {
+ return locale;
+ }
};
--- /dev/null
+// Copyright 2011 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// TODO(cira): Remove LanguageMatcher from v8 when ICU implements
+// language matching API.
+
+#include "language-matcher.h"
+
+#include "i18n-utils.h"
+#include "unicode/datefmt.h" // For getAvailableLocales
+#include "unicode/locid.h"
+#include "unicode/uloc.h"
+#include "utils.h"
+
+namespace v8 {
+namespace internal {
+
+const unsigned int LanguageMatcher::kLanguageWeight = 75;
+const unsigned int LanguageMatcher::kScriptWeight = 20;
+const unsigned int LanguageMatcher::kRegionWeight = 5;
+const unsigned int LanguageMatcher::kThreshold = 50;
+const unsigned int LanguageMatcher::kPositionBonus = 1;
+const char* const LanguageMatcher::kDefaultLocale = "root";
+
+static const char* GetLanguageException(const char*);
+static bool BCP47ToICUFormat(const char*, char*);
+static int CompareLocaleSubtags(const char*, const char*);
+static bool BuildLocaleName(const char*, const char*, LocaleIDMatch*);
+
+LocaleIDMatch::LocaleIDMatch()
+ : score(-1) {
+ I18NUtils::StrNCopy(
+ bcp47_id, ULOC_FULLNAME_CAPACITY, LanguageMatcher::kDefaultLocale);
+
+ I18NUtils::StrNCopy(
+ icu_id, ULOC_FULLNAME_CAPACITY, LanguageMatcher::kDefaultLocale);
+}
+
+LocaleIDMatch& LocaleIDMatch::operator=(const LocaleIDMatch& rhs) {
+ I18NUtils::StrNCopy(this->bcp47_id, ULOC_FULLNAME_CAPACITY, rhs.bcp47_id);
+ I18NUtils::StrNCopy(this->icu_id, ULOC_FULLNAME_CAPACITY, rhs.icu_id);
+ this->score = rhs.score;
+
+ return *this;
+}
+
+// static
+void LanguageMatcher::GetBestMatchForPriorityList(
+ v8::Handle<v8::Array> locales, LocaleIDMatch* result) {
+ v8::HandleScope handle_scope;
+
+ unsigned int position_bonus = locales->Length() * kPositionBonus;
+
+ int max_score = 0;
+ LocaleIDMatch match;
+ for (unsigned int i = 0; i < locales->Length(); ++i) {
+ position_bonus -= kPositionBonus;
+
+ v8::TryCatch try_catch;
+ v8::Local<v8::Value> locale_id = locales->Get(v8::Integer::New(i));
+
+ // Return default if exception is raised when reading parameter.
+ if (try_catch.HasCaught()) break;
+
+ // JavaScript arrays can be heterogenous so check each item
+ // if it's a string.
+ if (!locale_id->IsString()) continue;
+
+ if (!CompareToSupportedLocaleIDList(locale_id->ToString(), &match)) {
+ continue;
+ }
+
+ // Skip items under threshold.
+ if (match.score < kThreshold) continue;
+
+ match.score += position_bonus;
+ if (match.score > max_score) {
+ *result = match;
+
+ max_score = match.score;
+ }
+ }
+}
+
+// static
+void LanguageMatcher::GetBestMatchForString(
+ v8::Handle<v8::String> locale, LocaleIDMatch* result) {
+ LocaleIDMatch match;
+
+ if (CompareToSupportedLocaleIDList(locale, &match) &&
+ match.score >= kThreshold) {
+ *result = match;
+ }
+}
+
+// static
+bool LanguageMatcher::CompareToSupportedLocaleIDList(
+ v8::Handle<v8::String> locale_id, LocaleIDMatch* result) {
+ static int32_t available_count = 0;
+ // Depending on how ICU data is built, locales returned by
+ // Locale::getAvailableLocale() are not guaranteed to support DateFormat,
+ // Collation and other services. We can call getAvailableLocale() of all the
+ // services we want to support and take the intersection of them all, but
+ // using DateFormat::getAvailableLocales() should suffice.
+ // TODO(cira): Maybe make this thread-safe?
+ static const icu::Locale* available_locales =
+ icu::DateFormat::getAvailableLocales(available_count);
+
+ // Skip this locale_id if it's not in ASCII.
+ static LocaleIDMatch default_match;
+ v8::String::AsciiValue ascii_value(locale_id);
+ if (*ascii_value == NULL) return false;
+
+ char locale[ULOC_FULLNAME_CAPACITY];
+ if (!BCP47ToICUFormat(*ascii_value, locale)) return false;
+
+ icu::Locale input_locale(locale);
+
+ // Position of the best match locale in list of available locales.
+ int position = -1;
+ const char* language = GetLanguageException(input_locale.getLanguage());
+ const char* script = input_locale.getScript();
+ const char* region = input_locale.getCountry();
+ for (int32_t i = 0; i < available_count; ++i) {
+ int current_score = 0;
+ int sign =
+ CompareLocaleSubtags(language, available_locales[i].getLanguage());
+ current_score += sign * kLanguageWeight;
+
+ sign = CompareLocaleSubtags(script, available_locales[i].getScript());
+ current_score += sign * kScriptWeight;
+
+ sign = CompareLocaleSubtags(region, available_locales[i].getCountry());
+ current_score += sign * kRegionWeight;
+
+ if (current_score >= kThreshold && current_score > result->score) {
+ result->score = current_score;
+ position = i;
+ }
+ }
+
+ // Didn't find any good matches so use defaults.
+ if (position == -1) return false;
+
+ return BuildLocaleName(available_locales[position].getBaseName(),
+ input_locale.getName(), result);
+}
+
+// For some unsupported language subtags it is better to fallback to related
+// language that is supported than to default.
+static const char* GetLanguageException(const char* language) {
+ // Serbo-croatian to Serbian.
+ if (!strcmp(language, "sh")) return "sr";
+
+ // Norweigan to Norweiaan to Norwegian Bokmal.
+ if (!strcmp(language, "no")) return "nb";
+
+ // Moldavian to Romanian.
+ if (!strcmp(language, "mo")) return "ro";
+
+ // Tagalog to Filipino.
+ if (!strcmp(language, "tl")) return "fil";
+
+ return language;
+}
+
+// Converts user input from BCP47 locale id format to ICU compatible format.
+// Returns false if uloc_forLanguageTag call fails or if extension is too long.
+static bool BCP47ToICUFormat(const char* locale_id, char* result) {
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t locale_size = 0;
+
+ char locale[ULOC_FULLNAME_CAPACITY];
+ I18NUtils::StrNCopy(locale, ULOC_FULLNAME_CAPACITY, locale_id);
+
+ // uloc_forLanguageTag has a bug where long extension can crash the code.
+ // We need to check if extension part of language id conforms to the length.
+ // ICU bug: http://bugs.icu-project.org/trac/ticket/8519
+ const char* extension = strstr(locale_id, "-u-");
+ if (extension != NULL &&
+ strlen(extension) > ULOC_KEYWORD_AND_VALUES_CAPACITY) {
+ // Truncate to get non-crashing string, but still preserve base language.
+ int base_length = strlen(locale_id) - strlen(extension);
+ locale[base_length] = '\0';
+ }
+
+ uloc_forLanguageTag(locale, result, ULOC_FULLNAME_CAPACITY,
+ &locale_size, &status);
+ return !U_FAILURE(status);
+}
+
+// Compares locale id subtags.
+// Returns 1 for match or -1 for mismatch.
+static int CompareLocaleSubtags(const char* lsubtag, const char* rsubtag) {
+ return strcmp(lsubtag, rsubtag) == 0 ? 1 : -1;
+}
+
+// Builds a BCP47 compliant locale id from base name of matched locale and
+// full user specified locale.
+// Returns false if uloc_toLanguageTag failed to convert locale id.
+// Example:
+// base_name of matched locale (ICU ID): de_DE
+// input_locale_name (ICU ID): de_AT@collation=phonebk
+// result (ICU ID): de_DE@collation=phonebk
+// result (BCP47 ID): de-DE-u-co-phonebk
+static bool BuildLocaleName(const char* base_name,
+ const char* input_locale_name,
+ LocaleIDMatch* result) {
+ I18NUtils::StrNCopy(result->icu_id, ULOC_LANG_CAPACITY, base_name);
+
+ // Get extensions (if any) from the original locale.
+ const char* extension = strchr(input_locale_name, ULOC_KEYWORD_SEPARATOR);
+ if (extension != NULL) {
+ I18NUtils::StrNCopy(result->icu_id + strlen(base_name),
+ ULOC_KEYWORD_AND_VALUES_CAPACITY, extension);
+ } else {
+ I18NUtils::StrNCopy(result->icu_id, ULOC_LANG_CAPACITY, base_name);
+ }
+
+ // Convert ICU locale name into BCP47 format.
+ UErrorCode status = U_ZERO_ERROR;
+ uloc_toLanguageTag(result->icu_id, result->bcp47_id,
+ ULOC_FULLNAME_CAPACITY, false, &status);
+ return !U_FAILURE(status);
+}
+
+} } // namespace v8::internal
--- /dev/null
+// Copyright 2011 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef V8_EXTENSIONS_EXPERIMENTAL_LANGUAGE_MATCHER_H_
+#define V8_EXTENSIONS_EXPERIMENTAL_LANGUAGE_MATCHER_H_
+
+#include <v8.h>
+
+#include "unicode/uloc.h"
+
+namespace v8 {
+namespace internal {
+
+struct LocaleIDMatch {
+ LocaleIDMatch();
+
+ LocaleIDMatch& operator=(const LocaleIDMatch& rhs);
+
+ // Bcp47 locale id - "de-Latn-DE-u-co-phonebk".
+ char bcp47_id[ULOC_FULLNAME_CAPACITY];
+
+ // ICU locale id - "de_Latn_DE@collation=phonebk".
+ char icu_id[ULOC_FULLNAME_CAPACITY];
+
+ // Score for this locale.
+ int score;
+};
+
+class LanguageMatcher {
+ public:
+ // Default locale.
+ static const char* const kDefaultLocale;
+
+ // Finds best supported locale for a given a list of locale identifiers.
+ // It preserves the extension for the locale id.
+ static void GetBestMatchForPriorityList(
+ v8::Handle<v8::Array> locale_list, LocaleIDMatch* result);
+
+ // Finds best supported locale for a single locale identifier.
+ // It preserves the extension for the locale id.
+ static void GetBestMatchForString(
+ v8::Handle<v8::String> locale_id, LocaleIDMatch* result);
+
+ private:
+ // If langauge subtags match add this amount to the score.
+ static const unsigned int kLanguageWeight;
+
+ // If script subtags match add this amount to the score.
+ static const unsigned int kScriptWeight;
+
+ // If region subtags match add this amount to the score.
+ static const unsigned int kRegionWeight;
+
+ // LocaleID match score has to be over this number to accept the match.
+ static const unsigned int kThreshold;
+
+ // For breaking ties in priority queue.
+ static const unsigned int kPositionBonus;
+
+ LanguageMatcher();
+
+ // Compares locale_id to the supported list of locales and returns best
+ // match.
+ // Returns false if it fails to convert locale id from ICU to BCP47 format.
+ static bool CompareToSupportedLocaleIDList(v8::Handle<v8::String> locale_id,
+ LocaleIDMatch* result);
+};
+
+} } // namespace v8::internal
+
+#endif // V8_EXTENSIONS_EXPERIMENTAL_LANGUAGE_MATCHER_H_