2 * Copyright (c) 2006, 2007, 2008, 2009, 2010, 2012 Google Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #include "platform/fonts/win/FontFallbackWin.h"
34 #include "SkFontMgr.h"
35 #include "SkTypeface.h"
36 #include "wtf/HashMap.h"
37 #include "wtf/text/StringHash.h"
38 #include "wtf/text/WTFString.h"
40 #include <unicode/locid.h>
41 #include <unicode/uchar.h>
47 static inline bool isFontPresent(const UChar* fontName, SkFontMgr* fontManager)
49 String family = fontName;
50 RefPtr<SkTypeface> tf = adoptRef(fontManager->legacyCreateTypeface(family.utf8().data(), SkTypeface::kNormal));
54 SkTypeface::LocalizedStrings* actualFamilies = tf->createFamilyNameIterator();
55 bool matchesRequestedFamily = false;
56 SkTypeface::LocalizedString actualFamily;
57 while (actualFamilies->next(&actualFamily)) {
58 if (equalIgnoringCase(family, AtomicString::fromUTF8(actualFamily.fString.c_str()))) {
59 matchesRequestedFamily = true;
63 actualFamilies->unref();
65 return matchesRequestedFamily;
68 // A simple mapping from UScriptCode to family name. This is a sparse array,
69 // which works well since the range of UScriptCode values is small.
70 typedef const UChar* ScriptToFontMap[USCRIPT_CODE_LIMIT];
72 void initializeScriptMonospaceFontMap(ScriptToFontMap& scriptFontMap, SkFontMgr* fontManager)
79 static const FontMap fontMap[] = {
80 { USCRIPT_HEBREW, L"courier new" },
81 { USCRIPT_ARABIC, L"courier new" },
84 for (size_t i = 0; i < WTF_ARRAY_LENGTH(fontMap); ++i)
85 scriptFontMap[fontMap[i].script] = fontMap[i].family;
88 void initializeScriptFontMap(ScriptToFontMap& scriptFontMap, SkFontMgr* fontManager)
95 static const FontMap fontMap[] = {
96 {USCRIPT_LATIN, L"times new roman"},
97 {USCRIPT_GREEK, L"times new roman"},
98 {USCRIPT_CYRILLIC, L"times new roman"},
99 // FIXME: Consider trying new Vista fonts before XP fonts for CJK.
100 // Some Vista users do want to use Vista cleartype CJK fonts. If we
101 // did, the results of tests with CJK characters would have to be
102 // regenerated for Vista.
103 {USCRIPT_SIMPLIFIED_HAN, L"simsun"},
104 {USCRIPT_TRADITIONAL_HAN, L"pmingliu"},
105 {USCRIPT_HIRAGANA, L"ms pgothic"},
106 {USCRIPT_KATAKANA, L"ms pgothic"},
107 {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"},
108 {USCRIPT_HANGUL, L"gulim"},
109 {USCRIPT_THAI, L"tahoma"},
110 {USCRIPT_HEBREW, L"david"},
111 {USCRIPT_ARABIC, L"tahoma"},
112 {USCRIPT_DEVANAGARI, L"mangal"},
113 {USCRIPT_BENGALI, L"vrinda"},
114 {USCRIPT_GURMUKHI, L"raavi"},
115 {USCRIPT_GUJARATI, L"shruti"},
116 {USCRIPT_TAMIL, L"latha"},
117 {USCRIPT_TELUGU, L"gautami"},
118 {USCRIPT_KANNADA, L"tunga"},
119 {USCRIPT_GEORGIAN, L"sylfaen"},
120 {USCRIPT_ARMENIAN, L"sylfaen"},
121 {USCRIPT_THAANA, L"mv boli"},
122 {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"},
123 {USCRIPT_CHEROKEE, L"plantagenet cherokee"},
124 {USCRIPT_MONGOLIAN, L"mongolian balti"},
125 // For USCRIPT_COMMON, we map blocks to scripts when
129 struct ScriptToFontFamilies {
131 const UChar** families;
134 // Kartika on Vista or earlier lacks the support for Chillu
135 // letters added to Unicode 5.1.
136 // Try AnjaliOldLipi (a very widely used Malaylalam font with the full
137 // Unicode 5.x support) before falling back to Kartika.
138 static const UChar* malayalamFonts[] = {L"AnjaliOldLipi", L"Lohit Malayalam", L"Kartika", L"Rachana", 0};
139 // Try Khmer OS before Vista fonts because 'Khmer OS' goes along better
140 // with Latin and looks better/larger for the same size.
141 static const UChar* khmerFonts[] = {L"Khmer OS", L"MoolBoran", L"DaunPenh", L"Code2000", 0};
142 // For the following 6 scripts, two or fonts are listed. The fonts in
143 // the 1st slot are not available on Windows XP. To support these
144 // scripts on XP, listed in the rest of slots are widely used
146 static const UChar* ethiopicFonts[] = {L"Nyala", L"Abyssinica SIL", L"Ethiopia Jiret", L"Visual Geez Unicode", L"GF Zemen Unicode", 0};
147 static const UChar* oriyaFonts[] = {L"Kalinga", L"ori1Uni", L"Lohit Oriya", 0};
148 static const UChar* laoFonts[] = {L"DokChampa", L"Saysettha OT", L"Phetsarath OT", L"Code2000", 0};
149 static const UChar* tibetanFonts[] = {L"Microsoft Himalaya", L"Jomolhari", L"Tibetan Machine Uni", 0};
150 static const UChar* sinhalaFonts[] = {L"Iskoola Pota", L"AksharUnicode", 0};
151 static const UChar* yiFonts[] = {L"Microsoft Yi Balti", L"Nuosu SIL", L"Code2000", 0};
152 // http://www.bethmardutho.org/support/meltho/download/index.php
153 static const UChar* syriacFonts[] = {L"Estrangelo Edessa", L"Estrangelo Nisibin", L"Code2000", 0};
154 // No Myanmar/Burmese font is shipped with Windows, yet. Try a few
155 // widely available/used ones that supports Unicode 5.1 or later.
156 static const UChar* myanmarFonts[] = {L"Padauk", L"Parabaik", L"Myanmar3", L"Code2000", 0};
158 static const ScriptToFontFamilies scriptToFontFamilies[] = {
159 {USCRIPT_MALAYALAM, malayalamFonts},
160 {USCRIPT_KHMER, khmerFonts},
161 {USCRIPT_ETHIOPIC, ethiopicFonts},
162 {USCRIPT_ORIYA, oriyaFonts},
163 {USCRIPT_LAO, laoFonts},
164 {USCRIPT_TIBETAN, tibetanFonts},
165 {USCRIPT_SINHALA, sinhalaFonts},
166 {USCRIPT_YI, yiFonts},
167 {USCRIPT_SYRIAC, syriacFonts},
168 {USCRIPT_MYANMAR, myanmarFonts},
171 for (size_t i = 0; i < WTF_ARRAY_LENGTH(fontMap); ++i)
172 scriptFontMap[fontMap[i].script] = fontMap[i].family;
174 // FIXME: Instead of scanning the hard-coded list, we have to
175 // use EnumFont* to 'inspect' fonts to pick up fonts covering scripts
176 // when it's possible (e.g. using OS/2 table). If we do that, this
177 // had better be pulled out of here.
178 for (size_t i = 0; i < WTF_ARRAY_LENGTH(scriptToFontFamilies); ++i) {
179 UScriptCode script = scriptToFontFamilies[i].script;
180 scriptFontMap[script] = 0;
181 const UChar** familyPtr = scriptToFontFamilies[i].families;
183 if (isFontPresent(*familyPtr, fontManager)) {
184 scriptFontMap[script] = *familyPtr;
191 // Initialize the locale-dependent mapping.
192 // Since Chrome synchronizes the ICU default locale with its UI locale,
193 // this ICU locale tells the current UI locale of Chrome.
194 icu::Locale locale = icu::Locale::getDefault();
195 const UChar* localeFamily = 0;
196 if (locale == icu::Locale::getJapanese()) {
197 localeFamily = scriptFontMap[USCRIPT_HIRAGANA];
198 } else if (locale == icu::Locale::getKorean()) {
199 localeFamily = scriptFontMap[USCRIPT_HANGUL];
200 } else if (locale == icu::Locale::getTraditionalChinese()) {
201 localeFamily = scriptFontMap[USCRIPT_TRADITIONAL_HAN];
203 // For other locales, use the simplified Chinese font for Han.
204 localeFamily = scriptFontMap[USCRIPT_SIMPLIFIED_HAN];
207 scriptFontMap[USCRIPT_HAN] = localeFamily;
210 // There are a lot of characters in USCRIPT_COMMON that can be covered
211 // by fonts for scripts closely related to them. See
212 // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]
213 // FIXME: make this more efficient with a wider coverage
214 UScriptCode getScriptBasedOnUnicodeBlock(int ucs4)
216 UBlockCode block = ublock_getCode(ucs4);
218 case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
220 case UBLOCK_HIRAGANA:
221 case UBLOCK_KATAKANA:
222 return USCRIPT_HIRAGANA;
224 return USCRIPT_ARABIC;
228 return USCRIPT_GREEK;
229 case UBLOCK_DEVANAGARI:
230 // For Danda and Double Danda (U+0964, U+0965), use a Devanagari
231 // font for now although they're used by other scripts as well.
232 // Without a context, we can't do any better.
233 return USCRIPT_DEVANAGARI;
234 case UBLOCK_ARMENIAN:
235 return USCRIPT_ARMENIAN;
236 case UBLOCK_GEORGIAN:
237 return USCRIPT_GEORGIAN;
239 return USCRIPT_KANNADA;
241 return USCRIPT_COMMON;
245 UScriptCode getScript(int ucs4)
247 UErrorCode err = U_ZERO_ERROR;
248 UScriptCode script = uscript_getScript(ucs4, &err);
249 // If script is invalid, common or inherited or there's an error,
250 // infer a script based on the unicode block of a character.
251 if (script <= USCRIPT_INHERITED || U_FAILURE(err))
252 script = getScriptBasedOnUnicodeBlock(ucs4);
258 // FIXME: this is font fallback code version 0.1
259 // - Cover all the scripts
260 // - Get the default font for each script/generic family from the
261 // preference instead of hardcoding in the source.
262 // (at least, read values from the registry for IE font settings).
263 // - Support generic families (from FontDescription)
264 // - If the default font for a script is not available,
265 // try some more fonts known to support it. Finally, we can
266 // use EnumFontFamilies or similar APIs to come up with a list of
267 // fonts supporting the script and cache the result.
268 // - Consider using UnicodeSet (or UnicodeMap) converted from
269 // GLYPHSET (BMP) or directly read from truetype cmap tables to
270 // keep track of which character is supported by which font
271 // - Update script_font_cache in response to WM_FONTCHANGE
273 const UChar* getFontFamilyForScript(UScriptCode script,
274 FontDescription::GenericFamilyType generic,
275 SkFontMgr* fontManager)
277 static ScriptToFontMap scriptFontMap;
278 static ScriptToFontMap scriptMonospaceFontMap;
279 static bool initialized = false;
281 initializeScriptFontMap(scriptFontMap, fontManager);
282 initializeScriptMonospaceFontMap(scriptMonospaceFontMap, fontManager);
285 if (script == USCRIPT_INVALID_CODE)
287 ASSERT(script < USCRIPT_CODE_LIMIT);
288 if (generic == FontDescription::MonospaceFamily && scriptMonospaceFontMap[script])
289 return scriptMonospaceFontMap[script];
290 return scriptFontMap[script];
294 // - Handle 'Inherited', 'Common' and 'Unknown'
295 // (see http://www.unicode.org/reports/tr24/#Usage_Model )
296 // For 'Inherited' and 'Common', perhaps we need to
297 // accept another parameter indicating the previous family
298 // and just return it.
299 // - All the characters (or characters up to the point a single
300 // font can cover) need to be taken into account
301 const UChar* getFallbackFamily(UChar32 character,
302 FontDescription::GenericFamilyType generic,
303 UScriptCode* scriptChecked,
304 SkFontMgr* fontManager)
307 UScriptCode script = getScript(character);
309 // For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for
310 // Han (determined in a locale-dependent way above). Full-width ASCII
311 // characters are rather widely used in Japanese and Chinese documents and
312 // they're fully covered by Chinese, Japanese and Korean fonts.
313 if (0xFF00 < character && character < 0xFF5F)
314 script = USCRIPT_HAN;
316 if (script == USCRIPT_COMMON)
317 script = getScriptBasedOnUnicodeBlock(character);
319 const UChar* family = getFontFamilyForScript(script, generic, fontManager);
320 // Another lame work-around to cover non-BMP characters.
321 // If the font family for script is not found or the character is
322 // not in BMP (> U+FFFF), we resort to the hard-coded list of
323 // fallback fonts for now.
324 if (!family || character > 0xFFFF) {
325 int plane = character >> 16;
328 family = L"code2001";
331 // Use a Traditional Chinese ExtB font if in Traditional Chinese locale.
332 // Otherwise, use a Simplified Chinese ExtB font. Windows Japanese
333 // fonts do support a small subset of ExtB (that are included in JIS X 0213),
334 // but its coverage is rather sparse.
335 // Eventually, this should be controlled by lang/xml:lang.
336 if (icu::Locale::getDefault() == icu::Locale::getTraditionalChinese())
337 family = L"pmingliu-extb";
339 family = L"simsun-extb";
342 family = L"lucida sans unicode";
347 *scriptChecked = script;