2 * fontconfig/src/fclang.c
4 * Copyright © 2002 Keith Packard
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of the author(s) not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. The authors make no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
16 * THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
30 const FcChar8 lang[8];
31 const FcCharSet charset;
39 #include "../fc-lang/fclang.h"
44 FcChar32 map[NUM_LANG_SET_MAP];
47 static int FcLangSetIndex (const FcChar8 *lang);
51 FcLangSetBitSet (FcLangSet *ls,
56 id = fcLangCharSetIndices[id];
58 if (bucket >= ls->map_size)
59 return; /* shouldn't happen really */
61 ls->map[bucket] |= ((FcChar32) 1 << (id & 0x1f));
65 FcLangSetBitGet (const FcLangSet *ls,
70 id = fcLangCharSetIndices[id];
72 if (bucket >= ls->map_size)
75 return ((ls->map[bucket] >> (id & 0x1f)) & 1) ? FcTrue : FcFalse;
79 FcLangSetBitReset (FcLangSet *ls,
84 id = fcLangCharSetIndices[id];
86 if (bucket >= ls->map_size)
87 return; /* shouldn't happen really */
89 ls->map[bucket] &= ~((FcChar32) 1 << (id & 0x1f));
93 FcFreeTypeLangSet (const FcCharSet *charset,
94 const FcChar8 *exclusiveLang)
98 const FcCharSet *exclusiveCharset = 0;
102 exclusiveCharset = FcLangGetCharSet (exclusiveLang);
103 ls = FcLangSetCreate ();
106 if (FcDebug() & FC_DBG_LANGSET)
108 printf ("font charset");
109 FcCharSetPrint (charset);
112 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
114 if (FcDebug() & FC_DBG_LANGSET)
116 printf ("%s charset", fcLangCharSets[i].lang);
117 FcCharSetPrint (&fcLangCharSets[i].charset);
122 * Check for Han charsets to make fonts
123 * which advertise support for a single language
124 * not support other Han languages
126 if (exclusiveCharset &&
127 FcFreeTypeIsExclusiveLang (fcLangCharSets[i].lang))
129 if (fcLangCharSets[i].charset.num != exclusiveCharset->num)
132 for (j = 0; j < fcLangCharSets[i].charset.num; j++)
133 if (FcCharSetLeaf(&fcLangCharSets[i].charset, j) !=
134 FcCharSetLeaf(exclusiveCharset, j))
137 missing = FcCharSetSubtractCount (&fcLangCharSets[i].charset, charset);
138 if (FcDebug() & FC_DBG_SCANV)
140 if (missing && missing < 10)
142 FcCharSet *missed = FcCharSetSubtract (&fcLangCharSets[i].charset,
145 FcChar32 map[FC_CHARSET_MAP_SIZE];
148 printf ("\n%s(%u) ", fcLangCharSets[i].lang, missing);
150 for (ucs4 = FcCharSetFirstPage (missed, map, &next);
151 ucs4 != FC_CHARSET_DONE;
152 ucs4 = FcCharSetNextPage (missed, map, &next))
155 for (i = 0; i < FC_CHARSET_MAP_SIZE; i++)
158 for (j = 0; j < 32; j++)
159 if (map[i] & (1 << j))
160 printf (" %04x", ucs4 + i * 32 + j);
164 FcCharSetDestroy (missed);
167 printf ("%s(%u) ", fcLangCharSets[i].lang, missing);
170 FcLangSetBitSet (ls, i);
173 if (FcDebug() & FC_DBG_SCANV)
181 FcLangNormalize (const FcChar8 *lang)
183 FcChar8 *result = NULL, *s, *orig;
184 char *territory, *encoding, *modifier;
185 size_t llen, tlen = 0, mlen = 0, ssize;
190 if (FcStrCmpIgnoreCase (lang, (const FcChar8 *)"C") == 0 ||
191 FcStrCmpIgnoreCase (lang, (const FcChar8 *)"POSIX") == 0)
193 result = FcStrCopy ((const FcChar8 *)"en");
197 s = FcStrCopy (lang);
200 /* store the original length of 's' here to let FcMemFree know
201 * the correct size since we breaks 's' from now on.
203 ssize = strlen ((const char *)s) + 1;
205 /* from the comments in glibc:
207 * LOCALE can consist of up to four recognized parts for the XPG syntax:
209 * language[_territory[.codeset]][@modifier]
211 * Beside the first all of them are allowed to be missing. If the
212 * full specified locale is not found, the less specific one are
213 * looked for. The various part will be stripped off according to
214 * the following order:
216 * (2) normalized codeset
220 * So since we don't take care of the codeset part here, what patterns
221 * we need to deal with is:
223 * 1. language_territory@modifier
224 * 2. language@modifier
227 * then. and maybe no need to try language_territory here.
229 modifier = strchr ((const char *) s, '@');
234 mlen = strlen (modifier);
236 encoding = strchr ((const char *) s, '.');
243 memmove (encoding, modifier, mlen + 1);
247 territory = strchr ((const char *) s, '_');
249 territory = strchr ((const char *) s, '-');
254 tlen = strlen (territory);
256 llen = strlen ((const char *) s);
257 if (llen < 2 || llen > 3)
259 fprintf (stderr, "Fontconfig warning: ignoring %s: not a valid language tag\n",
263 if (territory && (tlen < 2 || tlen > 3))
265 fprintf (stderr, "Fontconfig warning: ignoring %s: not a valid region tag\n",
273 orig = FcStrDowncase (s);
278 if (FcDebug () & FC_DBG_LANGSET)
279 printf("Checking the existence of %s.orth\n", s);
280 if (FcLangSetIndex (s) < 0)
282 memmove (territory - 1, territory + tlen, (mlen > 0 ? mlen + 1 : 0) + 1);
284 modifier = territory;
289 /* we'll miss the opportunity to reduce the correct size
290 * of the allocated memory for the string after that.
292 FcMemFree (FC_MEM_STRING, ssize);
293 FcMemAlloc (FC_MEM_STRING, strlen((const char *)s) + 1);
300 if (FcDebug () & FC_DBG_LANGSET)
301 printf("Checking the existence of %s.orth\n", s);
302 if (FcLangSetIndex (s) < 0)
307 /* we'll miss the opportunity to reduce the correct size
308 * of the allocated memory for the string after that.
310 FcMemFree (FC_MEM_STRING, ssize);
311 FcMemAlloc (FC_MEM_STRING, strlen((const char *)s) + 1);
316 if (FcDebug () & FC_DBG_LANGSET)
317 printf("Checking the existence of %s.orth\n", s);
318 if (FcLangSetIndex (s) < 0)
320 /* there seems no languages matched in orth.
321 * add the language as is for fallback.
329 /* we'll miss the opportunity to reduce the correct size
330 * of the allocated memory for the string after that.
332 FcMemFree (FC_MEM_STRING, ssize);
333 FcMemAlloc (FC_MEM_STRING, strlen((const char *)s) + 1);
343 FcMemFree (FC_MEM_STRING, ssize);
346 if (FcDebug () & FC_DBG_LANGSET)
349 printf ("normalized: %s -> %s\n", lang, result);
351 printf ("Unable to normalize %s\n", lang);
357 #define FcLangEnd(c) ((c) == '-' || (c) == '\0')
360 FcLangCompare (const FcChar8 *s1, const FcChar8 *s2)
363 FcLangResult result = FcLangDifferentLang;
374 if (FcLangEnd (c1) && FcLangEnd (c2))
375 result = FcLangDifferentTerritory;
381 result = FcLangDifferentTerritory;
386 * Return FcTrue when super contains sub.
388 * super contains sub if super and sub have the same
389 * language and either the same country or one
390 * is missing the country
394 FcLangContains (const FcChar8 *super, const FcChar8 *sub)
407 /* see if super has a country while sub is mising one */
408 if (c1 == '-' && c2 == '\0')
410 /* see if sub has a country while super is mising one */
411 if (c1 == '\0' && c2 == '-')
421 FcLangGetCharSet (const FcChar8 *lang)
426 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
428 switch (FcLangCompare (lang, fcLangCharSets[i].lang)) {
430 return &fcLangCharSets[i].charset;
431 case FcLangDifferentTerritory:
434 case FcLangDifferentLang:
441 return &fcLangCharSets[country].charset;
450 langs = FcStrSetCreate();
454 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
455 FcStrSetAdd (langs, fcLangCharSets[i].lang);
461 FcLangSetCreate (void)
465 ls = malloc (sizeof (FcLangSet));
468 FcMemAlloc (FC_MEM_LANGSET, sizeof (FcLangSet));
469 memset (ls->map, '\0', sizeof (ls->map));
470 ls->map_size = NUM_LANG_SET_MAP;
476 FcLangSetDestroy (FcLangSet *ls)
479 FcStrSetDestroy (ls->extra);
480 FcMemFree (FC_MEM_LANGSET, sizeof (FcLangSet));
485 FcLangSetCopy (const FcLangSet *ls)
489 new = FcLangSetCreate ();
492 memset (new->map, '\0', sizeof (new->map));
493 memcpy (new->map, ls->map, FC_MIN (sizeof (new->map), ls->map_size * sizeof (ls->map[0])));
499 new->extra = FcStrSetCreate ();
503 list = FcStrListCreate (ls->extra);
507 while ((extra = FcStrListNext (list)))
508 if (!FcStrSetAdd (new->extra, extra))
510 FcStrListDone (list);
513 FcStrListDone (list);
517 FcLangSetDestroy (new);
523 FcLangSetIndex (const FcChar8 *lang)
525 int low, high, mid = 0;
527 FcChar8 firstChar = FcToLower(lang[0]);
528 FcChar8 secondChar = firstChar ? FcToLower(lang[1]) : '\0';
533 high = fcLangCharSetRanges[0].begin;
535 else if(firstChar > 'z')
537 low = fcLangCharSetRanges[25].begin;
538 high = NUM_LANG_CHAR_SET - 1;
542 low = fcLangCharSetRanges[firstChar - 'a'].begin;
543 high = fcLangCharSetRanges[firstChar - 'a'].end;
546 return -low; /* next entry after where it would be */
551 mid = (high + low) >> 1;
552 if(fcLangCharSets[mid].lang[0] != firstChar)
553 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang, lang);
555 { /* fast path for resolving 2-letter languages (by far the most common) after
556 * finding the first char (probably already true because of the hash table) */
557 cmp = fcLangCharSets[mid].lang[1] - secondChar;
559 (fcLangCharSets[mid].lang[2] != '\0' ||
562 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang+2,
579 FcLangSetAdd (FcLangSet *ls, const FcChar8 *lang)
583 id = FcLangSetIndex (lang);
586 FcLangSetBitSet (ls, id);
591 ls->extra = FcStrSetCreate ();
595 return FcStrSetAdd (ls->extra, lang);
599 FcLangSetDel (FcLangSet *ls, const FcChar8 *lang)
603 id = FcLangSetIndex (lang);
606 FcLangSetBitReset (ls, id);
610 FcStrSetDel (ls->extra, lang);
616 FcLangSetHasLang (const FcLangSet *ls, const FcChar8 *lang)
619 FcLangResult best, r;
622 id = FcLangSetIndex (lang);
625 else if (FcLangSetBitGet (ls, id))
627 best = FcLangDifferentLang;
628 for (i = id - 1; i >= 0; i--)
630 r = FcLangCompare (lang, fcLangCharSets[i].lang);
631 if (r == FcLangDifferentLang)
633 if (FcLangSetBitGet (ls, i) && r < best)
636 for (i = id; i < NUM_LANG_CHAR_SET; i++)
638 r = FcLangCompare (lang, fcLangCharSets[i].lang);
639 if (r == FcLangDifferentLang)
641 if (FcLangSetBitGet (ls, i) && r < best)
646 FcStrList *list = FcStrListCreate (ls->extra);
651 while (best > FcLangEqual && (extra = FcStrListNext (list)))
653 r = FcLangCompare (lang, extra);
657 FcStrListDone (list);
664 FcLangSetCompareStrSet (const FcLangSet *ls, FcStrSet *set)
666 FcStrList *list = FcStrListCreate (set);
667 FcLangResult r, best = FcLangDifferentLang;
672 while (best > FcLangEqual && (extra = FcStrListNext (list)))
674 r = FcLangSetHasLang (ls, extra);
678 FcStrListDone (list);
684 FcLangSetCompare (const FcLangSet *lsa, const FcLangSet *lsb)
687 FcLangResult best, r;
689 count = FC_MIN (lsa->map_size, lsb->map_size);
690 count = FC_MIN (NUM_LANG_SET_MAP, count);
691 for (i = 0; i < count; i++)
692 if (lsa->map[i] & lsb->map[i])
694 best = FcLangDifferentLang;
695 for (j = 0; j < NUM_COUNTRY_SET; j++)
696 for (i = 0; i < count; i++)
697 if ((lsa->map[i] & fcLangCountrySets[j][i]) &&
698 (lsb->map[i] & fcLangCountrySets[j][i]))
700 best = FcLangDifferentTerritory;
705 r = FcLangSetCompareStrSet (lsb, lsa->extra);
709 if (best > FcLangEqual && lsb->extra)
711 r = FcLangSetCompareStrSet (lsa, lsb->extra);
719 * Used in computing values -- mustn't allocate any storage
720 * XXX Not thread-safe
723 FcLangSetPromote (const FcChar8 *lang)
726 static FcStrSet strs;
730 memset (ls.map, '\0', sizeof (ls.map));
731 ls.map_size = NUM_LANG_SET_MAP;
733 id = FcLangSetIndex (lang);
736 FcLangSetBitSet (&ls, id);
745 str = (FcChar8 *) lang;
751 FcLangSetHash (const FcLangSet *ls)
756 count = FC_MIN (ls->map_size, NUM_LANG_SET_MAP);
757 for (i = 0; i < count; i++)
765 FcNameParseLangSet (const FcChar8 *string)
767 FcChar8 lang[32], c = 0;
771 ls = FcLangSetCreate ();
777 for(i = 0; i < 31;i++)
780 if(c == '\0' || c == '|')
781 break; /* end of this code */
785 if (!FcLangSetAdd (ls, lang))
792 FcLangSetDestroy (ls);
798 FcNameUnparseLangSet (FcStrBuf *buf, const FcLangSet *ls)
802 FcBool first = FcTrue;
804 count = FC_MIN (ls->map_size, NUM_LANG_SET_MAP);
805 for (i = 0; i < count; i++)
807 if ((bits = ls->map[i]))
809 for (bit = 0; bit <= 31; bit++)
810 if (bits & (1 << bit))
812 int id = (i << 5) | bit;
814 if (!FcStrBufChar (buf, '|'))
816 if (!FcStrBufString (buf, fcLangCharSets[fcLangCharSetIndicesInv[id]].lang))
824 FcStrList *list = FcStrListCreate (ls->extra);
829 while ((extra = FcStrListNext (list)))
832 if (!FcStrBufChar (buf, '|'))
834 FcStrListDone (list);
837 if (!FcStrBufString (buf, extra))
839 FcStrListDone (list);
844 FcStrListDone (list);
850 FcLangSetEqual (const FcLangSet *lsa, const FcLangSet *lsb)
854 count = FC_MIN (lsa->map_size, lsb->map_size);
855 count = FC_MIN (NUM_LANG_SET_MAP, count);
856 for (i = 0; i < count; i++)
858 if (lsa->map[i] != lsb->map[i])
861 if (!lsa->extra && !lsb->extra)
863 if (lsa->extra && lsb->extra)
864 return FcStrSetEqual (lsa->extra, lsb->extra);
869 FcLangSetContainsLang (const FcLangSet *ls, const FcChar8 *lang)
874 id = FcLangSetIndex (lang);
877 else if (FcLangSetBitGet (ls, id))
880 * search up and down among equal languages for a match
882 for (i = id - 1; i >= 0; i--)
884 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
886 if (FcLangSetBitGet (ls, i) &&
887 FcLangContains (fcLangCharSets[i].lang, lang))
890 for (i = id; i < NUM_LANG_CHAR_SET; i++)
892 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
894 if (FcLangSetBitGet (ls, i) &&
895 FcLangContains (fcLangCharSets[i].lang, lang))
900 FcStrList *list = FcStrListCreate (ls->extra);
905 while ((extra = FcStrListNext (list)))
907 if (FcLangContains (extra, lang))
910 FcStrListDone (list);
919 * return FcTrue if lsa contains every language in lsb
922 FcLangSetContains (const FcLangSet *lsa, const FcLangSet *lsb)
927 if (FcDebug() & FC_DBG_MATCHV)
929 printf ("FcLangSet "); FcLangSetPrint (lsa);
930 printf (" contains "); FcLangSetPrint (lsb);
934 * check bitmaps for missing language support
936 count = FC_MIN (lsa->map_size, lsb->map_size);
937 count = FC_MIN (NUM_LANG_SET_MAP, count);
938 for (i = 0; i < count; i++)
940 missing = lsb->map[i] & ~lsa->map[i];
943 for (j = 0; j < 32; j++)
944 if (missing & (1 << j))
946 if (!FcLangSetContainsLang (lsa,
947 fcLangCharSets[fcLangCharSetIndicesInv[i*32 + j]].lang))
949 if (FcDebug() & FC_DBG_MATCHV)
950 printf ("\tMissing bitmap %s\n", fcLangCharSets[fcLangCharSetIndicesInv[i*32+j]].lang);
958 FcStrList *list = FcStrListCreate (lsb->extra);
963 while ((extra = FcStrListNext (list)))
965 if (!FcLangSetContainsLang (lsa, extra))
967 if (FcDebug() & FC_DBG_MATCHV)
968 printf ("\tMissing string %s\n", extra);
972 FcStrListDone (list);
981 FcLangSetSerializeAlloc (FcSerialize *serialize, const FcLangSet *l)
983 if (!FcSerializeAlloc (serialize, l, sizeof (FcLangSet)))
989 FcLangSetSerialize(FcSerialize *serialize, const FcLangSet *l)
991 FcLangSet *l_serialize = FcSerializePtr (serialize, l);
995 memset (l_serialize->map, '\0', sizeof (l_serialize->map));
996 memcpy (l_serialize->map, l->map, FC_MIN (sizeof (l_serialize->map), l->map_size * sizeof (l->map[0])));
997 l_serialize->map_size = NUM_LANG_SET_MAP;
998 l_serialize->extra = NULL; /* We don't serialize ls->extra */
1003 FcLangSetGetLangs (const FcLangSet *ls)
1008 langs = FcStrSetCreate();
1012 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
1013 if (FcLangSetBitGet (ls, i))
1014 FcStrSetAdd (langs, fcLangCharSets[i].lang);
1018 FcStrList *list = FcStrListCreate (ls->extra);
1023 while ((extra = FcStrListNext (list)))
1024 FcStrSetAdd (langs, extra);
1026 FcStrListDone (list);
1034 FcLangSetOperate(const FcLangSet *a,
1036 FcBool (*func) (FcLangSet *ls,
1039 FcLangSet *langset = FcLangSetCopy (a);
1040 FcStrList *sl = FcStrListCreate (FcLangSetGetLangs (b));
1043 while ((str = FcStrListNext (sl)))
1045 func (langset, str);
1053 FcLangSetUnion (const FcLangSet *a, const FcLangSet *b)
1055 return FcLangSetOperate(a, b, FcLangSetAdd);
1059 FcLangSetSubtract (const FcLangSet *a, const FcLangSet *b)
1061 return FcLangSetOperate(a, b, FcLangSetDel);
1065 #include "fcaliastail.h"
1066 #include "fcftaliastail.h"