- add sources.
[platform/framework/web/crosswalk.git] / src / components / translate / language_detection / language_detection_util_unittest.cc
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/translate/language_detection/language_detection_util.h"
6
7 #include "base/strings/string16.h"
8 #include "base/strings/utf_string_conversions.h"
9 #include "components/translate/common/translate_constants.h"
10 #include "testing/gtest/include/gtest/gtest.h"
11
12 typedef testing::Test LanguageDetectionUtilTest;
13
14 // Tests that well-known language code typos are fixed.
15 TEST_F(LanguageDetectionUtilTest, LanguageCodeTypoCorrection) {
16   std::string language;
17
18   // Strip the second and later codes.
19   language = std::string("ja,en");
20   translate::CorrectLanguageCodeTypo(&language);
21   EXPECT_EQ("ja", language);
22
23   // Replace dash with hyphen.
24   language = std::string("ja_JP");
25   translate::CorrectLanguageCodeTypo(&language);
26   EXPECT_EQ("ja-JP", language);
27
28   // Correct wrong cases.
29   language = std::string("JA-jp");
30   translate::CorrectLanguageCodeTypo(&language);
31   EXPECT_EQ("ja-JP", language);
32 }
33
34 // Tests if the language codes' format is invalid.
35 TEST_F(LanguageDetectionUtilTest, IsValidLanguageCode) {
36   std::string language;
37
38   language = std::string("ja");
39   EXPECT_TRUE(translate::IsValidLanguageCode(language));
40
41   language = std::string("ja-JP");
42   EXPECT_TRUE(translate::IsValidLanguageCode(language));
43
44   language = std::string("ceb");
45   EXPECT_TRUE(translate::IsValidLanguageCode(language));
46
47   language = std::string("ceb-XX");
48   EXPECT_TRUE(translate::IsValidLanguageCode(language));
49
50   // Invalid because the sub code consists of a number.
51   language = std::string("utf-8");
52   EXPECT_FALSE(translate::IsValidLanguageCode(language));
53
54   // Invalid because of six characters after hyphen.
55   language = std::string("ja-YUKARI");
56   EXPECT_FALSE(translate::IsValidLanguageCode(language));
57
58   // Invalid because of four characters.
59   language = std::string("DHMO");
60   EXPECT_FALSE(translate::IsValidLanguageCode(language));
61 }
62
63 // Tests that similar language table works.
64 TEST_F(LanguageDetectionUtilTest, SimilarLanguageCode) {
65   EXPECT_TRUE(translate::IsSameOrSimilarLanguages("en", "en"));
66   EXPECT_FALSE(translate::IsSameOrSimilarLanguages("en", "ja"));
67
68   // Language codes are same if the main parts are same. The synonyms should be
69   // took into account (ex: 'iw' and 'he').
70   EXPECT_TRUE(translate::IsSameOrSimilarLanguages("sr-ME", "sr"));
71   EXPECT_TRUE(translate::IsSameOrSimilarLanguages("sr", "sr-ME"));
72   EXPECT_TRUE(translate::IsSameOrSimilarLanguages("he", "he-IL"));
73   EXPECT_TRUE(translate::IsSameOrSimilarLanguages("eng", "eng-US"));
74   EXPECT_TRUE(translate::IsSameOrSimilarLanguages("eng-US", "eng"));
75   EXPECT_FALSE(translate::IsSameOrSimilarLanguages("eng", "enm"));
76
77   // Even though the main parts are different, some special language pairs are
78   // recognized as same languages.
79   EXPECT_TRUE(translate::IsSameOrSimilarLanguages("bs", "hr"));
80   EXPECT_TRUE(translate::IsSameOrSimilarLanguages("ne", "hi"));
81   EXPECT_FALSE(translate::IsSameOrSimilarLanguages("bs", "hi"));
82 }
83
84 // Tests that well-known languages which often have wrong server configuration
85 // are handles.
86 TEST_F(LanguageDetectionUtilTest, WellKnownWrongConfiguration) {
87   EXPECT_TRUE(translate::MaybeServerWrongConfiguration("en", "ja"));
88   EXPECT_TRUE(translate::MaybeServerWrongConfiguration("en-US", "ja"));
89   EXPECT_TRUE(translate::MaybeServerWrongConfiguration("en", "zh-CN"));
90   EXPECT_FALSE(translate::MaybeServerWrongConfiguration("ja", "en"));
91   EXPECT_FALSE(translate::MaybeServerWrongConfiguration("en", "he"));
92 }
93
94 // Tests that the language meta tag providing wrong information is ignored by
95 // LanguageDetectionUtil due to disagreement between meta tag and CLD.
96 TEST_F(LanguageDetectionUtilTest, CLDDisagreeWithWrongLanguageCode) {
97   base::string16 contents = ASCIIToUTF16(
98       "<html><head><meta http-equiv='Content-Language' content='ja'></head>"
99       "<body>This is a page apparently written in English. Even though "
100       "content-language is provided, the value will be ignored if the value "
101       "is suspicious.</body></html>");
102   std::string cld_language;
103   bool is_cld_reliable;
104   std::string language = translate::DeterminePageLanguage(std::string("ja"),
105                                                           std::string(),
106                                                           contents,
107                                                           &cld_language,
108                                                           &is_cld_reliable);
109   EXPECT_EQ(translate::kUnknownLanguageCode, language);
110   EXPECT_EQ("en", cld_language);
111   EXPECT_TRUE(is_cld_reliable);
112 }
113
114 // Tests that the language meta tag providing "en-US" style information is
115 // agreed by CLD.
116 TEST_F(LanguageDetectionUtilTest, CLDAgreeWithLanguageCodeHavingCountryCode) {
117   base::string16 contents = ASCIIToUTF16(
118       "<html><head><meta http-equiv='Content-Language' content='en-US'></head>"
119       "<body>This is a page apparently written in English. Even though "
120       "content-language is provided, the value will be ignored if the value "
121       "is suspicious.</body></html>");
122   std::string cld_language;
123   bool is_cld_reliable;
124   std::string language = translate::DeterminePageLanguage(std::string("en-US"),
125                                                           std::string(),
126                                                           contents,
127                                                           &cld_language,
128                                                           &is_cld_reliable);
129   EXPECT_EQ("en-US", language);
130   EXPECT_EQ("en", cld_language);
131   EXPECT_TRUE(is_cld_reliable);
132 }
133
134 // Tests that the language meta tag providing wrong information is ignored and
135 // CLD's language will be adopted by LanguageDetectionUtil due to an invalid
136 // meta tag.
137 TEST_F(LanguageDetectionUtilTest, InvalidLanguageMetaTagProviding) {
138   base::string16 contents = ASCIIToUTF16(
139       "<html><head><meta http-equiv='Content-Language' content='utf-8'></head>"
140       "<body>This is a page apparently written in English. Even though "
141       "content-language is provided, the value will be ignored and CLD's"
142       " language will be adopted if the value is invalid.</body></html>");
143   std::string cld_language;
144   bool is_cld_reliable;
145   std::string language = translate::DeterminePageLanguage(std::string("utf-8"),
146                                                           std::string(),
147                                                           contents,
148                                                           &cld_language,
149                                                           &is_cld_reliable);
150   EXPECT_EQ("en", language);
151   EXPECT_EQ("en", cld_language);
152   EXPECT_TRUE(is_cld_reliable);
153 }
154
155 // Tests that the language meta tag providing wrong information is ignored
156 // because of valid html lang attribute.
157 TEST_F(LanguageDetectionUtilTest, AdoptHtmlLang) {
158   base::string16 contents = ASCIIToUTF16(
159       "<html lang='en'><head><meta http-equiv='Content-Language' content='ja'>"
160       "</head><body>This is a page apparently written in English. Even though "
161       "content-language is provided, the value will be ignored if the value "
162       "is suspicious.</body></html>");
163   std::string cld_language;
164   bool is_cld_reliable;
165   std::string language = translate::DeterminePageLanguage(std::string("ja"),
166                                                           std::string("en"),
167                                                           contents,
168                                                           &cld_language,
169                                                           &is_cld_reliable);
170   EXPECT_EQ("en", language);
171   EXPECT_EQ("en", cld_language);
172   EXPECT_TRUE(is_cld_reliable);
173 }