1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Unit test compact language detector
7 // Small version, covering these languages only:
8 // Arabic Bulgarian Catalan Chinese ChineseT Croatian Czech Danish Dutch
9 // English Estonian Finnish French German Greek Hebrew Hindi Hungarian
10 // Icelandic Indonesian Italian Japanese Korean Latvian Lithuanian Norwegian
11 // Polish Portuguese Romanian Russian Serbian Slovak Slovenian Spanish
12 // Swedish Tagalog Thai Turkish Ukrainian Vietnamese
14 // Additional single-language scripts recognized for free:
15 // Armenian Cherokee Dhivehi Georgian Gujarati Inuktitut Kannada Khmer
16 // Laothian Malayalam Oriya Punjabi Sinhalese Syriac Telugu Tamil
20 #include "testing/gtest/include/gtest/gtest.h"
21 #include "encodings/compact_lang_det/compact_lang_det.h"
22 #include "encodings/compact_lang_det/ext_lang_enc.h"
23 #include "encodings/compact_lang_det/unittest_data.h"
25 #include "encodings/compact_lang_det/win/cld_commandlineflags.h"
26 #include "encodings/compact_lang_det/win/cld_google.h"
29 // These are all included here to make the unit test self-contained.
30 const char* kTeststr_en =
31 "confiscation of goods is assigned as the penalty part most of the courts "
32 "consist of members and when it is necessary to bring public cases before a "
33 "jury of members two courts combine for the purpose the most important cases "
34 "of all are brought jurors or";
37 // UTF8 constants. Use a UTF-8 aware editor for this file
38 const char* kTeststr_ks =
39 "\xe0\xa4\xa8\xe0\xa5\x87\xe0\xa4\xaa\xe0\xa4\xbe\xe0\xa4\xb2 \xe0\xa4\x8f"
40 "\xe0\xa4\xb8\xe0\xa4\xbf\xe0\xa4\xaf\xe0\xa4\xbe \xe0\xa4\xae\xe0\xa4\x82"
41 "\xe0\xa4\x9c \xe0\xa4\x85\xe0\xa4\x96 \xe0\xa4\xae\xe0\xa5\x81\xe0\xa4\xb2"
42 "\xe0\xa5\x81\xe0\xa4\x95 \xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c\xe0\xa4\xa7"
43 "\xe0\xa4\xbe\xe0\xa4\xa8\xe0\xa5\x80 \xe0\xa4\x95\xe0\xa4\xbe\xe0\xa4\xa0"
44 "\xe0\xa4\xae\xe0\xa4\xbe\xe0\xa4\xa1\xe0\xa5\x8c\xe0\xa4\x82 \xe0\xa4\xa8"
45 "\xe0\xa5\x87\xe0\xa4\xaa\xe0\xa4\xbe\xe0\xa4\xb2 \xe0\xa4\x85\xe0\xa4\xa7"
46 "\xe0\xa4\xbf\xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c\xe0\xa5\x8d\xe0\xa4\xaf "
47 "\xe0\xa4\xaa\xe0\xa5\x87\xe0\xa4\xb0\xe0\xa5\x87\xe0\xa4\x97\xe0\xa5\x8d"
48 "\xe0\xa4\xb5\xe0\xa4\xbe\xe0\xa4\xaf \xe0\xa4\xa6\xe0\xa4\x95\xe0\xa5\x8d"
49 "\xe0\xa4\xb7\xe0\xa4\xbf\xe0\xa4\xa3 \xe0\xa4\x85\xe0\xa4\xae\xe0\xa5\x87"
50 "\xe0\xa4\xb0\xe0\xa4\xbf\xe0\xa4\x95\xe0\xa4\xbe \xe0\xa4\xae\xe0\xa4\xb9"
51 "\xe0\xa4\xbe\xe0\xa4\xa6\xe0\xa5\x8d\xe0\xa4\xb5\xe0\xa5\x80\xe0\xa4\xaa"
52 "\xe0\xa5\x87 \xe0\xa4\xae\xe0\xa4\xa7\xe0\xa5\x8d \xe0\xa4\xaf\xe0\xa4\x95"
53 "\xe0\xa5\x8d\xe0\xa4\xb7\xe0\xa5\x87\xe0\xa4\xa4\xe0\xa5\x8d\xe0\xa4\xb0"
54 "\xe0\xa5\x87 \xe0\xa4\x8f\xe0\xa4\x95 \xe0\xa4\xa6\xe0\xa5\x87\xe0\xa4\xb6 "
55 "\xe0\xa4\x85\xe0\xa4\xb8\xe0\xa5\x8d \xe0\xa4\xa4\xe0\xa4\xbf \xe0\xa4\xab"
56 "\xe0\xa4\xa3\xe0\xa5\x80\xe0\xa4\xb6\xe0\xa5\x8d\xe0\xa4\xb5\xe0\xa4\xb0 "
57 "\xe0\xa4\xa8\xe0\xa4\xbe\xe0\xa4\xa5 \xe0\xa4\xb0\xe0\xa5\x87\xe0\xa4\xa3"
58 "\xe0\xa5\x81 \xe0\xa4\xab\xe0\xa4\xbf\xe0\xa4\x9c\xe0\xa5\x80 \xe0\xa4\x9b"
59 "\xe0\xa5\x81 \xe0\xa4\xa6\xe0\xa4\x95\xe0\xa5\x8d\xe0\xa4\xb7\xe0\xa4\xbf"
60 "\xe0\xa4\xa3 \xe0\xa4\xaa\xe0\xa5\x8d\xe0\xa4\xb0\xe0\xa4\xb6\xe0\xa4\xbe"
61 "\xe0\xa4\xa8\xe0\xa5\x8d \xe0\xa4\xa4 \xe0\xa4\xae\xe0\xa4\xb9\xe0\xa4\xbe"
62 "\xe0\xa4\xb8\xe0\xa4\xbe\xe0\xa4\x97\xe0\xa4\xb0 \xe0\xa4\xae\xe0\xa4\x82"
63 "\xe0\xa4\x9c \xe0\xa4\x85\xe0\xa4\x96 \xe0\xa4\xa6\xe0\xa5\x87\xe0\xa4\xb6 "
64 "\xe0\xa4\xac\xe0\xa4\xb9\xe0\xa4\xbe\xe0\xa4\xae\xe0\xa4\xbe\xe0\xa4\xb8 "
65 "\xe0\xa4\x9b\xe0\xa5\x81 \xe0\xa4\x95\xe0\xa5\x87\xe0\xa4\xb0\xe0\xa5\x87"
66 "\xe0\xa4\xac\xe0\xa4\xbf\xe0\xa4\xaf\xe0\xa4\xa8 \xe0\xa4\xae\xe0\xa4\x82"
67 "\xe0\xa4\x9c \xe0\xa4\x85\xe0\xa4\x96 \xe0\xa4\xae\xe0\xa5\x81\xe0\xa4\xb2"
68 "\xe0\xa5\x81\xe0\xa4\x96 \xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c\xe0\xa4\xa7"
69 "\xe0\xa4\xbe\xe0\xa4\xa8\xe0\xa5\x80 \xe0\xa4\xa8\xe0\xa4\xb8\xe0\xa5\x8c "
70 "\xe0\xa4\xb8\xe0\xa4\xae\xe0\xa5\x8d \xe0\xa4\xac\xe0\xa4\xa6\xe0\xa5\x8d"
71 "\xe0\xa4\x98 \xe0\xa4\xb5\xe0\xa4\xbf\xe0\xa4\xb7\xe0\xa4\xaf \xe0\xa4\xac"
72 "\xe0\xa5\x81\xe0\xa4\xb0\xe0\xa5\x81\xe0\xa4\x82\xe0\xa4\xa1\xe0\xa5\x80 "
73 "\xe0\xa4\x85\xe0\xa4\xab\xe0\xa5\x8d\xe0\xa4\xb0\xe0\xa5\x80\xe0\xa4\x95"
74 "\xe0\xa4\xbe \xe0\xa4\xae\xe0\xa4\xb9\xe0\xa4\xbe\xe0\xa4\xa6\xe0\xa5\x8d"
75 "\xe0\xa4\xb5\xe0\xa5\x80\xe0\xa4\xaa\xe0\xa5\x87 \xe0\xa4\xae\xe0\xa4\xa7"
76 "\xe0\xa5\x8d \xe0\xa4\xaf\xe0\xa4\x95\xe0\xa5\x8d\xe0\xa4\xb7\xe0\xa5\x87"
77 "\xe0\xa4\xa4\xe0\xa5\x8d\xe0\xa4\xb0\xe0\xa5\x87 \xe0\xa4\xa6\xe0\xa5\x87"
78 "\xe0\xa4\xb6 \xe0\xa4\x85\xe0\xa4\xb8\xe0\xa5\x8d \xe0\xa4\xa4\xe0\xa4\xbf "
79 "\xe0\xa4\xb8\xe0\xa4\xae\xe0\xa5\x8d \xe0\xa4\xac\xe0\xa4\xa6\xe0\xa5\x8d"
80 "\xe0\xa4\x98 \xe0\xa4\xb5\xe0\xa4\xbf\xe0\xa4\xb7\xe0\xa4\xaf";
82 // Test strings. This will be squeezed because of the repetitions.
83 const char* kTeststr_kr_repetitions =
84 "<meta charset=\"utf-8\" />\n\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
85 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
86 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
87 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
88 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
89 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
90 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
91 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
92 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
93 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
94 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
95 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
96 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
97 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
98 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
99 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
100 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
101 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
102 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
103 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
104 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
105 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
106 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
107 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
108 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
109 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
110 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
111 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
112 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
113 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
114 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
115 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
116 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
117 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
118 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
119 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
120 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
121 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
122 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
123 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
124 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
125 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
126 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
127 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
128 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
129 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
130 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
131 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
132 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
133 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
134 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
135 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
136 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
137 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
138 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
139 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
140 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
141 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
142 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
143 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
144 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
145 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
146 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
147 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
148 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
149 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
150 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
151 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
152 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
153 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
154 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
155 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
156 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
157 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
158 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
159 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
160 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
161 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
162 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
163 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
164 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
165 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
166 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
167 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
168 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
169 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
170 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
171 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
172 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
173 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
174 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
175 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
176 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
177 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
178 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
179 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
180 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
181 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
182 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
183 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
184 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
185 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
186 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
187 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
188 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
189 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
190 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
191 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
192 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
193 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
194 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
195 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
196 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
197 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
198 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
199 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
200 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
201 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
202 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
203 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
204 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
205 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
206 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
207 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
208 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
209 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
210 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
211 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
212 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
213 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
214 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
215 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
216 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
217 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
218 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
219 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
220 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
221 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
222 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
223 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
224 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
225 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
226 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
227 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
228 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
229 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
230 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
231 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
232 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
233 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
234 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
235 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
236 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
237 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
238 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
239 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
240 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
241 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
242 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
243 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
244 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
245 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
246 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
247 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
248 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
249 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
250 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
251 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
252 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
253 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
254 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
255 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
256 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
257 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
258 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
259 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
260 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
261 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
262 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
263 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
264 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
265 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
266 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
267 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
268 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
269 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
270 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
271 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
272 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
273 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
274 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
275 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
276 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
277 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
278 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
279 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
280 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
281 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
282 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
283 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
284 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
285 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
286 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
287 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
288 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
289 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
290 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
291 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
292 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
293 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
294 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
295 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
296 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
297 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
298 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
299 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
300 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
301 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
302 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
303 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
304 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
305 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
306 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
307 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
308 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
309 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
310 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
311 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
312 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
313 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
314 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
315 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
316 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
317 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
318 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
319 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
320 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
321 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
322 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
323 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
324 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
325 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
326 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
327 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
328 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
329 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
330 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
331 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
332 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
333 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
334 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
335 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
336 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
337 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
338 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
339 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
340 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
341 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
342 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
343 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
344 "\x9b\x98\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
345 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
346 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
347 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
348 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
349 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
350 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
351 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
352 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
353 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
354 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
355 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
356 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
357 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
358 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
359 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
360 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
361 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
362 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
363 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
364 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
365 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
366 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
367 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
368 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
369 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
370 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
371 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
375 // const char* kTeststr_ks =
376 // \u0928\u0947\u092A\u093E\u0932\u0020\u090F\u0938\u093F\u092F\u093E\u0020
377 // \u092E\u0902\u091C\u0020\u0905\u0916\u0020\u092E\u0941\u0932\u0941\u0915
378 // \u0020\u0930\u093E\u091C\u0927\u093E\u0928\u0940\u0020\u0915\u093E\u0920
379 // \u092E\u093E\u0921\u094C\u0902\u0020\u0928\u0947\u092A\u093E\u0932\u0020
380 // \u0905\u0927\u093F\u0930\u093E\u091C\u094D\u092F\u0020\u092A\u0947\u0930
381 // \u0947\u0917\u094D\u0935\u093E\u092F\u0020
382 // \u0926\u0915\u094D\u0937\u093F\u0923\u0020\u0905\u092E\u0947\u0930\u093F
383 // \u0915\u093E\u0020\u092E\u0939\u093E\u0926\u094D\u0935\u0940\u092A\u0947
384 // \u0020\u092E\u0927\u094D\u0020\u092F\u0915\u094D\u0937\u0947\u0924\u094D
385 // \u0930\u0947\u0020\u090F\u0915\u0020\u0926\u0947\u0936\u0020\u0905\u0938
386 // \u094D\u0020\u0924\u093F\u0020\u092B\u0923\u0940\u0936\u094D\u0935\u0930
387 // \u0020\u0928\u093E\u0925\u0020\u0930\u0947\u0923\u0941\u0020
388 // \u092B\u093F\u091C\u0940\u0020\u091B\u0941\u0020\u0926\u0915\u094D\u0937
389 // \u093F\u0923\u0020\u092A\u094D\u0930\u0936\u093E\u0928\u094D\u0020\u0924
390 // \u0020\u092E\u0939\u093E\u0938\u093E\u0917\u0930\u0020\u092E\u0902\u091C
391 // \u0020\u0905\u0916\u0020\u0926\u0947\u0936\u0020\u092C\u0939\u093E\u092E
392 // \u093E\u0938\u0020\u091B\u0941\u0020\u0915\u0947\u0930\u0947\u092C\u093F
393 // \u092F\u0928\u0020\u092E\u0902\u091C\u0020
394 // \u0905\u0916\u0020\u092E\u0941\u0932\u0941\u0916\u0020\u0930\u093E\u091C
395 // \u0927\u093E\u0928\u0940\u0020\u0928\u0938\u094C\u0020\u0938\u092E\u094D
396 // \u0020\u092C\u0926\u094D\u0918\u0020\u0935\u093F\u0937\u092F\u0020\u092C
397 // \u0941\u0930\u0941\u0902\u0921\u0940\u0020\u0905\u092B\u094D\u0930\u0940
398 // \u0915\u093E\u0020\u092E\u0939\u093E\u0926\u094D\u0935\u0940\u092A\u0947
399 // \u0020\u092E\u0927\u094D\u0020
400 // \u092F\u0915\u094D\u0937\u0947\u0924\u094D\u0930\u0947\u0020\u0926\u0947
401 // \u0936\u0020\u0905\u0938\u094D\u0020\u0924\u093F\u0020\u0938\u092E\u094D
402 // \u0020\u092C\u0926\u094D\u0918\u0020\u0935\u093F\u0937\u092F
407 class CompactLangDetTest : public testing::Test {
409 // Objects declared here can be used by all tests in the test case for Foo.
411 // Detect language of plaintext src
412 Language TestCompactLangDetPlain(const char* src) {
413 bool is_plain_text = true;
416 Language lang = CompactLangDet::DetectLanguage(NULL, src, strlen(src),
423 // Detect extended language of plaintext src
424 Language TestExtCompactLangDetPlain(const char* src) {
425 bool is_plain_text = true;
426 Language language3[3];
431 Language lang = CompactLangDet::ExtDetectLanguageSummary(NULL,
440 }; // end class CompactLangDetTest
443 TEST_F(CompactLangDetTest, EasyTests) {
444 EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_en));
445 EXPECT_EQ(HINDI, TestCompactLangDetPlain(kTeststr_hi_Deva));
449 TEST_F(CompactLangDetTest, FullTests) {
450 // Only the tests reflecting the currently used detection tables are enabled.
452 // Do all the languages in all their scripts
453 //// EXPECT_EQ(AFAR, TestCompactLangDetPlain(kTeststr_aa_Latn));
454 //// EXPECT_EQ(ABKHAZIAN, TestCompactLangDetPlain(kTeststr_ab_Cyrl));
455 EXPECT_EQ(AFRIKAANS, TestCompactLangDetPlain(kTeststr_af_Latn));
456 //// EXPECT_EQ(AMHARIC, TestCompactLangDetPlain(kTeststr_am_Ethi));
457 EXPECT_EQ(ARABIC, TestCompactLangDetPlain(kTeststr_ar_Arab));
458 //// EXPECT_EQ(ASSAMESE, TestCompactLangDetPlain(kTeststr_as_Beng));
459 //// EXPECT_EQ(AYMARA, TestCompactLangDetPlain(kTeststr_ay_Latn));
460 // AZERBAIJANI Arab & Cyrl removed 2008.05.27. Just AZERBAIJANI Latn left
461 // EXPECT_EQ(AZERBAIJANI, TestCompactLangDetPlain(kTeststr_az_Arab));
462 // Missing data: az-Cyrl
463 //// EXPECT_EQ(AZERBAIJANI, TestCompactLangDetPlain(kTeststr_az_Latn));
465 //// EXPECT_EQ(BASHKIR, TestCompactLangDetPlain(kTeststr_ba_Cyrl));
466 EXPECT_EQ(BELARUSIAN, TestCompactLangDetPlain(kTeststr_be_Cyrl));
467 EXPECT_EQ(BULGARIAN, TestCompactLangDetPlain(kTeststr_bg_Cyrl));
468 //// EXPECT_EQ(BIHARI, TestCompactLangDetPlain(kTeststr_bh_Deva));
469 //// EXPECT_EQ(BISLAMA, TestCompactLangDetPlain(kTeststr_bi_Latn));
470 //// EXPECT_EQ(BENGALI, TestCompactLangDetPlain(kTeststr_bn_Beng));
472 //// EXPECT_EQ(TIBETAN, TestCompactLangDetPlain(kTeststr_bo_Tibt));
473 //// EXPECT_EQ(BRETON, TestCompactLangDetPlain(kTeststr_br_Latn));
474 EXPECT_EQ(SERBIAN, TestCompactLangDetPlain(kTeststr_bs_Cyrl)); // NOTE: Not BOSNIAN
475 //// EXPECT_EQ(CROATIAN, TestCompactLangDetPlain(kTeststr_bs_Latn)); // NOTE: Not BOSNIAN
477 EXPECT_EQ(CATALAN, TestCompactLangDetPlain(kTeststr_ca_Latn));
478 EXPECT_EQ(CHEROKEE, TestCompactLangDetPlain(kTeststr_chr_Cher));
479 //// EXPECT_EQ(CORSICAN, TestCompactLangDetPlain(kTeststr_co_Latn));
480 // No CREOLES_AND_PIDGINS_ENGLISH_BASED
481 // No CREOLES_AND_PIDGINS_FRENCH_BASED
482 // No CREOLES_AND_PIDGINS_OTHER
483 // No CREOLES_AND_PIDGINS_PORTUGUESE_BASED
484 EXPECT_EQ(CZECH, TestCompactLangDetPlain(kTeststr_cs_Latn));
485 EXPECT_EQ(WELSH, TestCompactLangDetPlain(kTeststr_cy_Latn));
487 EXPECT_EQ(DANISH, TestCompactLangDetPlain(kTeststr_da_Latn));
488 EXPECT_EQ(GERMAN, TestCompactLangDetPlain(kTeststr_de_Latn));
489 EXPECT_EQ(DHIVEHI, TestCompactLangDetPlain(kTeststr_dv_Thaa));
490 //// EXPECT_EQ(DZONGKHA, TestCompactLangDetPlain(kTeststr_dz_Tibt));
492 EXPECT_EQ(GREEK, TestCompactLangDetPlain(kTeststr_el_Grek));
493 EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_en_Latn));
494 //// EXPECT_EQ(ESPERANTO, TestCompactLangDetPlain(kTeststr_eo_Latn));
495 EXPECT_EQ(SPANISH, TestCompactLangDetPlain(kTeststr_es_Latn));
496 EXPECT_EQ(ESTONIAN, TestCompactLangDetPlain(kTeststr_et_Latn));
497 //// EXPECT_EQ(BASQUE, TestCompactLangDetPlain(kTeststr_eu_Latn));
499 EXPECT_EQ(PERSIAN, TestCompactLangDetPlain(kTeststr_fa_Arab));
500 EXPECT_EQ(FINNISH, TestCompactLangDetPlain(kTeststr_fi_Latn));
501 //// EXPECT_EQ(FIJIAN, TestCompactLangDetPlain(kTeststr_fj_Latn));
502 //// EXPECT_EQ(FAROESE, TestCompactLangDetPlain(kTeststr_fo_Latn));
503 EXPECT_EQ(FRENCH, TestCompactLangDetPlain(kTeststr_fr_Latn));
504 //// EXPECT_EQ(FRISIAN, TestCompactLangDetPlain(kTeststr_fy_Latn));
506 EXPECT_EQ(IRISH, TestCompactLangDetPlain(kTeststr_ga_Latn));
507 //// EXPECT_EQ(SCOTS_GAELIC, TestCompactLangDetPlain(kTeststr_gd_Latn));
508 //// EXPECT_EQ(GALICIAN, TestCompactLangDetPlain(kTeststr_gl_Latn));
509 //// EXPECT_EQ(GUARANI, TestCompactLangDetPlain(kTeststr_gn_Latn));
510 EXPECT_EQ(GUJARATI, TestCompactLangDetPlain(kTeststr_gu_Gujr));
511 //// EXPECT_EQ(MANX, TestCompactLangDetPlain(kTeststr_gv_Latn));
513 //// EXPECT_EQ(HAUSA, TestCompactLangDetPlain(kTeststr_ha_Latn));
514 EXPECT_EQ(HINDI, TestCompactLangDetPlain(kTeststr_hi_Deva));
515 EXPECT_EQ(CROATIAN, TestCompactLangDetPlain(kTeststr_hr_Latn)); // NOTE: now CROATIAN
516 //// EXPECT_EQ(HAITIAN_CREOLE, TestCompactLangDetPlain(kTeststr_ht_Latn));
517 EXPECT_EQ(HUNGARIAN, TestCompactLangDetPlain(kTeststr_hu_Latn));
518 EXPECT_EQ(ARMENIAN, TestCompactLangDetPlain(kTeststr_hy_Armn));
520 //// EXPECT_EQ(INTERLINGUA, TestCompactLangDetPlain(kTeststr_ia_Latn));
521 EXPECT_EQ(MALAY, TestCompactLangDetPlain(kTeststr_id_Latn));
522 //// EXPECT_EQ(INTERLINGUE, TestCompactLangDetPlain(kTeststr_ie_Latn));
523 //// EXPECT_EQ(INUPIAK, TestCompactLangDetPlain(kTeststr_ik_Latn));
524 EXPECT_EQ(ICELANDIC, TestCompactLangDetPlain(kTeststr_is_Latn));
525 EXPECT_EQ(ITALIAN, TestCompactLangDetPlain(kTeststr_it_Latn));
526 EXPECT_EQ(INUKTITUT, TestCompactLangDetPlain(kTeststr_iu_Cans));
527 EXPECT_EQ(HEBREW, TestCompactLangDetPlain(kTeststr_iw_Hebr));
529 EXPECT_EQ(JAPANESE, TestCompactLangDetPlain(kTeststr_ja_Hani));
530 //// EXPECT_EQ(JAVANESE, TestCompactLangDetPlain(kTeststr_jw_Latn));
532 EXPECT_EQ(GEORGIAN, TestCompactLangDetPlain(kTeststr_ka_Geor));
533 //// EXPECT_EQ(KHASI, TestCompactLangDetPlain(kTeststr_kha_Latn));
534 //// EXPECT_EQ(KAZAKH, TestCompactLangDetPlain(kTeststr_kk_Arab));
535 //// EXPECT_EQ(KAZAKH, TestCompactLangDetPlain(kTeststr_kk_Cyrl));
536 //// EXPECT_EQ(KAZAKH, TestCompactLangDetPlain(kTeststr_kk_Latn));
537 //// EXPECT_EQ(GREENLANDIC, TestCompactLangDetPlain(kTeststr_kl_Latn));
538 EXPECT_EQ(KHMER, TestCompactLangDetPlain(kTeststr_km_Khmr));
539 EXPECT_EQ(KANNADA, TestCompactLangDetPlain(kTeststr_kn_Knda));
540 EXPECT_EQ(KOREAN, TestCompactLangDetPlain(kTeststr_ko_Hani));
541 //// EXPECT_EQ(KASHMIRI, TestCompactLangDetPlain(kTeststr_ks_Deva));
542 // KURDISH Latn removed 2008.05.27. Just KURDISH Arab left
543 //// EXPECT_EQ(KURDISH, TestCompactLangDetPlain(kTeststr_ku_Arab));
544 // EXPECT_EQ(KURDISH, TestCompactLangDetPlain(kTeststr_ku_Latn));
545 //// EXPECT_EQ(KYRGYZ, TestCompactLangDetPlain(kTeststr_ky_Arab));
546 //// EXPECT_EQ(KYRGYZ, TestCompactLangDetPlain(kTeststr_ky_Cyrl));
548 //// EXPECT_EQ(LATIN, TestCompactLangDetPlain(kTeststr_la_Latn));
549 //// EXPECT_EQ(LUXEMBOURGISH, TestCompactLangDetPlain(kTeststr_lb_Latn));
550 //// EXPECT_EQ(GANDA, TestCompactLangDetPlain(kTeststr_lg_Latn));
551 //// EXPECT_EQ(LINGALA, TestCompactLangDetPlain(kTeststr_ln_Latn));
552 EXPECT_EQ(LAOTHIAN, TestCompactLangDetPlain(kTeststr_lo_Laoo));
553 EXPECT_EQ(LITHUANIAN, TestCompactLangDetPlain(kTeststr_lt_Latn));
554 EXPECT_EQ(LATVIAN, TestCompactLangDetPlain(kTeststr_lv_Latn));
556 //// EXPECT_EQ(MALAGASY, TestCompactLangDetPlain(kTeststr_mg_Latn));
557 //// EXPECT_EQ(MAORI, TestCompactLangDetPlain(kTeststr_mi_Latn));
558 EXPECT_EQ(MACEDONIAN, TestCompactLangDetPlain(kTeststr_mk_Cyrl));
559 EXPECT_EQ(MALAYALAM, TestCompactLangDetPlain(kTeststr_ml_Mlym));
560 //// EXPECT_EQ(MONGOLIAN, TestCompactLangDetPlain(kTeststr_mn_Cyrl));
561 //// EXPECT_EQ(MOLDAVIAN, TestCompactLangDetPlain(kTeststr_mo_Cyrl));
562 //// EXPECT_EQ(MARATHI, TestCompactLangDetPlain(kTeststr_mr_Deva));
563 EXPECT_EQ(MALAY, TestCompactLangDetPlain(kTeststr_ms_Latn));
564 // EXPECT_EQ(MALAY, TestCompactLangDetPlain(kTeststr_ms_Latn2));
565 EXPECT_EQ(MALAY, TestCompactLangDetPlain(kTeststr_ms_Latn3));
566 //// EXPECT_EQ(MALTESE, TestCompactLangDetPlain(kTeststr_mt_Latn));
567 //// EXPECT_EQ(BURMESE, TestCompactLangDetPlain(kTeststr_my_Latn));
568 //// EXPECT_EQ(BURMESE, TestCompactLangDetPlain(kTeststr_my_Mymr));
570 //// EXPECT_EQ(NAURU, TestCompactLangDetPlain(kTeststr_na_Latn));
571 //// EXPECT_EQ(NEPALI, TestCompactLangDetPlain(kTeststr_ne_Deva));
572 EXPECT_EQ(DUTCH, TestCompactLangDetPlain(kTeststr_nl_Latn));
573 //// EXPECT_EQ(NORWEGIAN_N, TestCompactLangDetPlain(kTeststr_nn_Latn));
574 EXPECT_EQ(NORWEGIAN, TestCompactLangDetPlain(kTeststr_no_Latn));
576 //// EXPECT_EQ(OCCITAN, TestCompactLangDetPlain(kTeststr_oc_Latn));
577 //// EXPECT_EQ(OROMO, TestCompactLangDetPlain(kTeststr_om_Latn));
578 EXPECT_EQ(ORIYA, TestCompactLangDetPlain(kTeststr_or_Orya));
580 EXPECT_EQ(PUNJABI, TestCompactLangDetPlain(kTeststr_pa_Guru));
581 EXPECT_EQ(POLISH, TestCompactLangDetPlain(kTeststr_pl_Latn));
582 //// EXPECT_EQ(PASHTO, TestCompactLangDetPlain(kTeststr_ps_Arab));
583 EXPECT_EQ(PORTUGUESE, TestCompactLangDetPlain(kTeststr_pt_BR)); // NOTE: not PORTUGUESE_B
586 //// EXPECT_EQ(QUECHUA, TestCompactLangDetPlain(kTeststr_qu_Latn));
588 //// EXPECT_EQ(RHAETO_ROMANCE, TestCompactLangDetPlain(kTeststr_rm_Latn));
589 //// EXPECT_EQ(RUNDI, TestCompactLangDetPlain(kTeststr_rn_Latn));
590 EXPECT_EQ(ROMANIAN, TestCompactLangDetPlain(kTeststr_ro_Latn));
591 EXPECT_EQ(RUSSIAN, TestCompactLangDetPlain(kTeststr_ru_Cyrl));
592 //// EXPECT_EQ(KINYARWANDA, TestCompactLangDetPlain(kTeststr_rw_Latn));
594 //// EXPECT_EQ(SANSKRIT, TestCompactLangDetPlain(kTeststr_sa_Deva));
595 //// EXPECT_EQ(SANSKRIT, TestCompactLangDetPlain(kTeststr_sa_Latn));
596 //// EXPECT_EQ(SCOTS, TestCompactLangDetPlain(kTeststr_sco_Latn));
597 //// EXPECT_EQ(SINDHI, TestCompactLangDetPlain(kTeststr_sd_Arab));
598 //// EXPECT_EQ(SANGO, TestCompactLangDetPlain(kTeststr_sg_Latn));
599 // No SERBO_CROATIAN (sh)
600 EXPECT_EQ(SINHALESE, TestCompactLangDetPlain(kTeststr_si_Sinh));
601 //// EXPECT_EQ(LIMBU, TestCompactLangDetPlain(kTeststr_sit_NP));
602 EXPECT_EQ(SLOVAK, TestCompactLangDetPlain(kTeststr_sk_Latn));
603 EXPECT_EQ(SLOVENIAN, TestCompactLangDetPlain(kTeststr_sl_Latn));
604 //// EXPECT_EQ(SAMOAN, TestCompactLangDetPlain(kTeststr_sm_Latn));
605 //// EXPECT_EQ(SHONA, TestCompactLangDetPlain(kTeststr_sn_Latn));
606 //// EXPECT_EQ(SOMALI, TestCompactLangDetPlain(kTeststr_so_Latn));
607 //// EXPECT_EQ(ALBANIAN, TestCompactLangDetPlain(kTeststr_sq_Latn));
608 EXPECT_EQ(SERBIAN, TestCompactLangDetPlain(kTeststr_sr_Cyrl)); // NOTE: now SERBIAN
609 EXPECT_EQ(CROATIAN, TestCompactLangDetPlain(kTeststr_sr_Latn)); // NOTE: Not SERBIAN
610 EXPECT_EQ(CROATIAN, TestCompactLangDetPlain(kTeststr_sr_ME_Latn)); // NOTE: not SERBIAN nor MONTENEGRIN
611 //// EXPECT_EQ(SISWANT, TestCompactLangDetPlain(kTeststr_ss_Latn));
612 //// EXPECT_EQ(SESOTHO, TestCompactLangDetPlain(kTeststr_st_Latn));
613 //// EXPECT_EQ(SUNDANESE, TestCompactLangDetPlain(kTeststr_su_Latn));
614 EXPECT_EQ(SWEDISH, TestCompactLangDetPlain(kTeststr_sv_Latn));
615 EXPECT_EQ(SWAHILI, TestCompactLangDetPlain(kTeststr_sw_Latn));
616 EXPECT_EQ(SYRIAC, TestCompactLangDetPlain(kTeststr_syr_Syrc));
618 EXPECT_EQ(TAMIL, TestCompactLangDetPlain(kTeststr_ta_Taml));
619 EXPECT_EQ(TELUGU, TestCompactLangDetPlain(kTeststr_te_Telu));
620 // Tajik Arab removed 2008.05.27. Just Tajik Cyrl left
621 // EXPECT_EQ(TAJIK, TestCompactLangDetPlain(kTeststr_tg_Arab));
622 //// EXPECT_EQ(TAJIK, TestCompactLangDetPlain(kTeststr_tg_Cyrl));
623 EXPECT_EQ(THAI, TestCompactLangDetPlain(kTeststr_th_Thai));
624 //// EXPECT_EQ(TIGRINYA, TestCompactLangDetPlain(kTeststr_ti_Ethi));
625 //// EXPECT_EQ(TURKMEN, TestCompactLangDetPlain(kTeststr_tk_Cyrl));
626 //// EXPECT_EQ(TURKMEN, TestCompactLangDetPlain(kTeststr_tk_Latn));
627 EXPECT_EQ(TAGALOG, TestCompactLangDetPlain(kTeststr_tl_Latn));
628 //// EXPECT_EQ(TSWANA, TestCompactLangDetPlain(kTeststr_tn_Latn));
629 //// EXPECT_EQ(TONGA, TestCompactLangDetPlain(kTeststr_to_Latn));
630 EXPECT_EQ(TURKISH, TestCompactLangDetPlain(kTeststr_tr_Latn));
631 //// EXPECT_EQ(TSONGA, TestCompactLangDetPlain(kTeststr_ts_Latn));
632 //// EXPECT_EQ(TATAR, TestCompactLangDetPlain(kTeststr_tt_Cyrl));
633 //// EXPECT_EQ(TATAR, TestCompactLangDetPlain(kTeststr_tt_Latn));
634 //// EXPECT_EQ(TWI, TestCompactLangDetPlain(kTeststr_tw_Latn));
636 //// EXPECT_EQ(UIGHUR, TestCompactLangDetPlain(kTeststr_ug_Arab));
637 //// EXPECT_EQ(UIGHUR, TestCompactLangDetPlain(kTeststr_ug_Cyrl));
638 //// EXPECT_EQ(UIGHUR, TestCompactLangDetPlain(kTeststr_ug_Latn));
639 EXPECT_EQ(UKRAINIAN, TestCompactLangDetPlain(kTeststr_uk_Cyrl));
640 //// EXPECT_EQ(URDU, TestCompactLangDetPlain(kTeststr_ur_Arab));
641 //// EXPECT_EQ(UZBEK, TestCompactLangDetPlain(kTeststr_uz_Arab));
642 //// EXPECT_EQ(UZBEK, TestCompactLangDetPlain(kTeststr_uz_Cyrl));
643 //// EXPECT_EQ(UZBEK, TestCompactLangDetPlain(kTeststr_uz_Latn));
645 EXPECT_EQ(VIETNAMESE, TestCompactLangDetPlain(kTeststr_vi_Latn));
646 //// EXPECT_EQ(VOLAPUK, TestCompactLangDetPlain(kTeststr_vo_Latn));
648 //// EXPECT_EQ(WOLOF, TestCompactLangDetPlain(kTeststr_wo_Latn));
650 //// EXPECT_EQ(XHOSA, TestCompactLangDetPlain(kTeststr_xh_Latn));
652 EXPECT_EQ(YIDDISH, TestCompactLangDetPlain(kTeststr_yi_Hebr));
653 //// EXPECT_EQ(YORUBA, TestCompactLangDetPlain(kTeststr_yo_Latn));
655 // Zhuang Hani removed 2008.05.13. Just Zhuang Latn left
656 // EXPECT_EQ(ZHUANG, TestCompactLangDetPlain(kTeststr_za_Hani));
657 //// EXPECT_EQ(ZHUANG, TestCompactLangDetPlain(kTeststr_za_Latn));
658 EXPECT_EQ(CHINESE, TestCompactLangDetPlain(kTeststr_zh_Hani));
659 EXPECT_EQ(CHINESE_T, TestCompactLangDetPlain(kTeststr_zh_TW));
660 //// EXPECT_EQ(ZULU, TestCompactLangDetPlain(kTeststr_zu_Latn));
661 // No TG_UNKNOWN_LANGUAGE
662 // No UNKNOWN_LANGUAGE
664 // This test should be executed with ASAN.
665 EXPECT_EQ(KOREAN, TestCompactLangDetPlain(kTeststr_kr_repetitions));
669 TEST_F(CompactLangDetTest, ExtendedTests) {
670 // Do the extended languages, with them not-allowed then allowed
671 // These turn out to be extraordinarily sensitive forms of garbage bytes
672 //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_tlh_Latn));
673 //// EXPECT_EQ(X_KLINGON, TestExtCompactLangDetPlain(kTeststr_tlh_Latn));
675 //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_zzp_Latn));
676 //// EXPECT_EQ(X_PIG_LATIN, TestExtCompactLangDetPlain(kTeststr_zzp_Latn));
678 //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_xx_Bugi));
679 //// EXPECT_EQ(X_BUGINESE, TestExtCompactLangDetPlain(kTeststr_xx_Bugi));
681 //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_xx_Goth));
682 //// EXPECT_EQ(X_GOTHIC, TestExtCompactLangDetPlain(kTeststr_xx_Goth));
684 // Next three now removed permanently from probability tables (May 2008)
685 // (used to be X_BORK_BORK_BORK, X_ELMER_FUDD, X_HACKER).
687 // Small changes in probability tables may cause these non-texts to
688 // change detection result. If that happens, cross-check that
689 // the new result is not because of a bug, then change the expected values.
690 EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_zzb_Latn));
691 EXPECT_EQ(ENGLISH, TestExtCompactLangDetPlain(kTeststr_zzb_Latn));
693 EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_zze_Latn));
694 EXPECT_EQ(ENGLISH, TestExtCompactLangDetPlain(kTeststr_zze_Latn));
696 //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_zzh_Latn));
697 //// EXPECT_EQ(ENGLISH, TestExtCompactLangDetPlain(kTeststr_zzh_Latn));
703 #if !defined(CLD_WINDOWS)
704 int main(int argc, char** argv) {
705 FLAGS_logtostderr = true;
706 InitGoogle("Unit test for CLD small", &argc, &argv, false);
707 return RUN_ALL_TESTS();