2 * fontconfig/src/fclang.c
4 * Copyright © 2002 Keith Packard
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of the author(s) not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. The authors make no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
16 * THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
30 const FcChar8 lang[8];
31 const FcCharSet charset;
39 #include "../fc-lang/fclang.h"
44 FcChar32 map[NUM_LANG_SET_MAP];
47 static int FcLangSetIndex (const FcChar8 *lang);
51 FcLangSetBitSet (FcLangSet *ls,
56 id = fcLangCharSetIndices[id];
58 if (bucket >= ls->map_size)
59 return; /* shouldn't happen really */
61 ls->map[bucket] |= ((FcChar32) 1 << (id & 0x1f));
65 FcLangSetBitGet (const FcLangSet *ls,
70 id = fcLangCharSetIndices[id];
72 if (bucket >= ls->map_size)
75 return ((ls->map[bucket] >> (id & 0x1f)) & 1) ? FcTrue : FcFalse;
79 FcLangSetBitReset (FcLangSet *ls,
84 id = fcLangCharSetIndices[id];
86 if (bucket >= ls->map_size)
87 return; /* shouldn't happen really */
89 ls->map[bucket] &= ~((FcChar32) 1 << (id & 0x1f));
93 FcFreeTypeLangSet (const FcCharSet *charset,
94 const FcChar8 *exclusiveLang)
98 const FcCharSet *exclusiveCharset = 0;
102 exclusiveCharset = FcLangGetCharSet (exclusiveLang);
103 ls = FcLangSetCreate ();
106 if (FcDebug() & FC_DBG_LANGSET)
108 printf ("font charset");
109 FcCharSetPrint (charset);
112 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
114 if (FcDebug() & FC_DBG_LANGSET)
116 printf ("%s charset", fcLangCharSets[i].lang);
117 FcCharSetPrint (&fcLangCharSets[i].charset);
122 * Check for Han charsets to make fonts
123 * which advertise support for a single language
124 * not support other Han languages
126 if (exclusiveCharset &&
127 FcFreeTypeIsExclusiveLang (fcLangCharSets[i].lang))
129 if (fcLangCharSets[i].charset.num != exclusiveCharset->num)
132 for (j = 0; j < fcLangCharSets[i].charset.num; j++)
133 if (FcCharSetLeaf(&fcLangCharSets[i].charset, j) !=
134 FcCharSetLeaf(exclusiveCharset, j))
137 missing = FcCharSetSubtractCount (&fcLangCharSets[i].charset, charset);
138 if (FcDebug() & FC_DBG_SCANV)
140 if (missing && missing < 10)
142 FcCharSet *missed = FcCharSetSubtract (&fcLangCharSets[i].charset,
145 FcChar32 map[FC_CHARSET_MAP_SIZE];
148 printf ("\n%s(%u) ", fcLangCharSets[i].lang, missing);
150 for (ucs4 = FcCharSetFirstPage (missed, map, &next);
151 ucs4 != FC_CHARSET_DONE;
152 ucs4 = FcCharSetNextPage (missed, map, &next))
155 for (i = 0; i < FC_CHARSET_MAP_SIZE; i++)
158 for (j = 0; j < 32; j++)
159 if (map[i] & (1 << j))
160 printf (" %04x", ucs4 + i * 32 + j);
164 FcCharSetDestroy (missed);
167 printf ("%s(%u) ", fcLangCharSets[i].lang, missing);
170 FcLangSetBitSet (ls, i);
173 if (FcDebug() & FC_DBG_SCANV)
181 FcLangNormalize (const FcChar8 *lang)
183 FcChar8 *result = NULL, *s, *orig;
184 char *territory, *encoding, *modifier;
185 size_t llen, tlen = 0, mlen = 0;
190 if (FcStrCmpIgnoreCase (lang, (const FcChar8 *)"C") == 0 ||
191 FcStrCmpIgnoreCase (lang, (const FcChar8 *)"POSIX") == 0)
193 result = FcStrCopy ((const FcChar8 *)"en");
197 s = FcStrCopy (lang);
201 /* from the comments in glibc:
203 * LOCALE can consist of up to four recognized parts for the XPG syntax:
205 * language[_territory[.codeset]][@modifier]
207 * Beside the first all of them are allowed to be missing. If the
208 * full specified locale is not found, the less specific one are
209 * looked for. The various part will be stripped off according to
210 * the following order:
212 * (2) normalized codeset
216 * So since we don't take care of the codeset part here, what patterns
217 * we need to deal with is:
219 * 1. language_territory@modifier
220 * 2. language@modifier
223 * then. and maybe no need to try language_territory here.
225 modifier = strchr ((const char *) s, '@');
230 mlen = strlen (modifier);
232 encoding = strchr ((const char *) s, '.');
239 memmove (encoding, modifier, mlen + 1);
243 territory = strchr ((const char *) s, '_');
245 territory = strchr ((const char *) s, '-');
250 tlen = strlen (territory);
252 llen = strlen ((const char *) s);
253 if (llen < 2 || llen > 3)
255 fprintf (stderr, "Fontconfig warning: ignoring %s: not a valid language tag\n",
259 if (territory && (tlen < 2 || tlen > 3))
261 fprintf (stderr, "Fontconfig warning: ignoring %s: not a valid region tag\n",
269 orig = FcStrDowncase (s);
274 if (FcDebug () & FC_DBG_LANGSET)
275 printf("Checking the existence of %s.orth\n", s);
276 if (FcLangSetIndex (s) < 0)
278 memmove (territory - 1, territory + tlen, (mlen > 0 ? mlen + 1 : 0) + 1);
280 modifier = territory;
285 /* we'll miss the opportunity to reduce the correct size
286 * of the allocated memory for the string after that.
294 if (FcDebug () & FC_DBG_LANGSET)
295 printf("Checking the existence of %s.orth\n", s);
296 if (FcLangSetIndex (s) < 0)
301 /* we'll miss the opportunity to reduce the correct size
302 * of the allocated memory for the string after that.
308 if (FcDebug () & FC_DBG_LANGSET)
309 printf("Checking the existence of %s.orth\n", s);
310 if (FcLangSetIndex (s) < 0)
312 /* there seems no languages matched in orth.
313 * add the language as is for fallback.
321 /* we'll miss the opportunity to reduce the correct size
322 * of the allocated memory for the string after that.
333 if (FcDebug () & FC_DBG_LANGSET)
336 printf ("normalized: %s -> %s\n", lang, result);
338 printf ("Unable to normalize %s\n", lang);
344 #define FcLangEnd(c) ((c) == '-' || (c) == '\0')
347 FcLangCompare (const FcChar8 *s1, const FcChar8 *s2)
350 FcLangResult result = FcLangDifferentLang;
361 if (FcLangEnd (c1) && FcLangEnd (c2))
362 result = FcLangDifferentTerritory;
368 result = FcLangDifferentTerritory;
373 * Return FcTrue when super contains sub.
375 * super contains sub if super and sub have the same
376 * language and either the same country or one
377 * is missing the country
381 FcLangContains (const FcChar8 *super, const FcChar8 *sub)
394 /* see if super has a country while sub is mising one */
395 if (c1 == '-' && c2 == '\0')
397 /* see if sub has a country while super is mising one */
398 if (c1 == '\0' && c2 == '-')
408 FcLangGetCharSet (const FcChar8 *lang)
413 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
415 switch (FcLangCompare (lang, fcLangCharSets[i].lang)) {
417 return &fcLangCharSets[i].charset;
418 case FcLangDifferentTerritory:
421 case FcLangDifferentLang:
428 return &fcLangCharSets[country].charset;
437 langs = FcStrSetCreate();
441 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
442 FcStrSetAdd (langs, fcLangCharSets[i].lang);
448 FcLangSetCreate (void)
452 ls = malloc (sizeof (FcLangSet));
455 memset (ls->map, '\0', sizeof (ls->map));
456 ls->map_size = NUM_LANG_SET_MAP;
462 FcLangSetDestroy (FcLangSet *ls)
465 FcStrSetDestroy (ls->extra);
470 FcLangSetCopy (const FcLangSet *ls)
474 new = FcLangSetCreate ();
477 memset (new->map, '\0', sizeof (new->map));
478 memcpy (new->map, ls->map, FC_MIN (sizeof (new->map), ls->map_size * sizeof (ls->map[0])));
484 new->extra = FcStrSetCreate ();
488 list = FcStrListCreate (ls->extra);
492 while ((extra = FcStrListNext (list)))
493 if (!FcStrSetAdd (new->extra, extra))
495 FcStrListDone (list);
498 FcStrListDone (list);
502 FcLangSetDestroy (new);
508 FcLangSetIndex (const FcChar8 *lang)
510 int low, high, mid = 0;
512 FcChar8 firstChar = FcToLower(lang[0]);
513 FcChar8 secondChar = firstChar ? FcToLower(lang[1]) : '\0';
518 high = fcLangCharSetRanges[0].begin;
520 else if(firstChar > 'z')
522 low = fcLangCharSetRanges[25].begin;
523 high = NUM_LANG_CHAR_SET - 1;
527 low = fcLangCharSetRanges[firstChar - 'a'].begin;
528 high = fcLangCharSetRanges[firstChar - 'a'].end;
531 return -low; /* next entry after where it would be */
536 mid = (high + low) >> 1;
537 if(fcLangCharSets[mid].lang[0] != firstChar)
538 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang, lang);
540 { /* fast path for resolving 2-letter languages (by far the most common) after
541 * finding the first char (probably already true because of the hash table) */
542 cmp = fcLangCharSets[mid].lang[1] - secondChar;
544 (fcLangCharSets[mid].lang[2] != '\0' ||
547 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang+2,
564 FcLangSetAdd (FcLangSet *ls, const FcChar8 *lang)
568 id = FcLangSetIndex (lang);
571 FcLangSetBitSet (ls, id);
576 ls->extra = FcStrSetCreate ();
580 return FcStrSetAdd (ls->extra, lang);
584 FcLangSetDel (FcLangSet *ls, const FcChar8 *lang)
588 id = FcLangSetIndex (lang);
591 FcLangSetBitReset (ls, id);
595 FcStrSetDel (ls->extra, lang);
601 FcLangSetHasLang (const FcLangSet *ls, const FcChar8 *lang)
604 FcLangResult best, r;
607 id = FcLangSetIndex (lang);
610 else if (FcLangSetBitGet (ls, id))
612 best = FcLangDifferentLang;
613 for (i = id - 1; i >= 0; i--)
615 r = FcLangCompare (lang, fcLangCharSets[i].lang);
616 if (r == FcLangDifferentLang)
618 if (FcLangSetBitGet (ls, i) && r < best)
621 for (i = id; i < NUM_LANG_CHAR_SET; i++)
623 r = FcLangCompare (lang, fcLangCharSets[i].lang);
624 if (r == FcLangDifferentLang)
626 if (FcLangSetBitGet (ls, i) && r < best)
631 FcStrList *list = FcStrListCreate (ls->extra);
636 while (best > FcLangEqual && (extra = FcStrListNext (list)))
638 r = FcLangCompare (lang, extra);
642 FcStrListDone (list);
649 FcLangSetCompareStrSet (const FcLangSet *ls, FcStrSet *set)
651 FcStrList *list = FcStrListCreate (set);
652 FcLangResult r, best = FcLangDifferentLang;
657 while (best > FcLangEqual && (extra = FcStrListNext (list)))
659 r = FcLangSetHasLang (ls, extra);
663 FcStrListDone (list);
669 FcLangSetCompare (const FcLangSet *lsa, const FcLangSet *lsb)
672 FcLangResult best, r;
674 count = FC_MIN (lsa->map_size, lsb->map_size);
675 count = FC_MIN (NUM_LANG_SET_MAP, count);
676 for (i = 0; i < count; i++)
677 if (lsa->map[i] & lsb->map[i])
679 best = FcLangDifferentLang;
680 for (j = 0; j < NUM_COUNTRY_SET; j++)
681 for (i = 0; i < count; i++)
682 if ((lsa->map[i] & fcLangCountrySets[j][i]) &&
683 (lsb->map[i] & fcLangCountrySets[j][i]))
685 best = FcLangDifferentTerritory;
690 r = FcLangSetCompareStrSet (lsb, lsa->extra);
694 if (best > FcLangEqual && lsb->extra)
696 r = FcLangSetCompareStrSet (lsa, lsb->extra);
704 * Used in computing values -- mustn't allocate any storage
705 * XXX Not thread-safe
708 FcLangSetPromote (const FcChar8 *lang)
711 static FcStrSet strs;
715 memset (ls.map, '\0', sizeof (ls.map));
716 ls.map_size = NUM_LANG_SET_MAP;
718 id = FcLangSetIndex (lang);
721 FcLangSetBitSet (&ls, id);
730 str = (FcChar8 *) lang;
736 FcLangSetHash (const FcLangSet *ls)
741 count = FC_MIN (ls->map_size, NUM_LANG_SET_MAP);
742 for (i = 0; i < count; i++)
750 FcNameParseLangSet (const FcChar8 *string)
752 FcChar8 lang[32], c = 0;
756 ls = FcLangSetCreate ();
762 for(i = 0; i < 31;i++)
765 if(c == '\0' || c == '|')
766 break; /* end of this code */
770 if (!FcLangSetAdd (ls, lang))
777 FcLangSetDestroy (ls);
783 FcNameUnparseLangSet (FcStrBuf *buf, const FcLangSet *ls)
787 FcBool first = FcTrue;
789 count = FC_MIN (ls->map_size, NUM_LANG_SET_MAP);
790 for (i = 0; i < count; i++)
792 if ((bits = ls->map[i]))
794 for (bit = 0; bit <= 31; bit++)
795 if (bits & (1 << bit))
797 int id = (i << 5) | bit;
799 if (!FcStrBufChar (buf, '|'))
801 if (!FcStrBufString (buf, fcLangCharSets[fcLangCharSetIndicesInv[id]].lang))
809 FcStrList *list = FcStrListCreate (ls->extra);
814 while ((extra = FcStrListNext (list)))
817 if (!FcStrBufChar (buf, '|'))
819 FcStrListDone (list);
822 if (!FcStrBufString (buf, extra))
824 FcStrListDone (list);
829 FcStrListDone (list);
835 FcLangSetEqual (const FcLangSet *lsa, const FcLangSet *lsb)
839 count = FC_MIN (lsa->map_size, lsb->map_size);
840 count = FC_MIN (NUM_LANG_SET_MAP, count);
841 for (i = 0; i < count; i++)
843 if (lsa->map[i] != lsb->map[i])
846 if (!lsa->extra && !lsb->extra)
848 if (lsa->extra && lsb->extra)
849 return FcStrSetEqual (lsa->extra, lsb->extra);
854 FcLangSetContainsLang (const FcLangSet *ls, const FcChar8 *lang)
859 id = FcLangSetIndex (lang);
862 else if (FcLangSetBitGet (ls, id))
865 * search up and down among equal languages for a match
867 for (i = id - 1; i >= 0; i--)
869 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
871 if (FcLangSetBitGet (ls, i) &&
872 FcLangContains (fcLangCharSets[i].lang, lang))
875 for (i = id; i < NUM_LANG_CHAR_SET; i++)
877 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
879 if (FcLangSetBitGet (ls, i) &&
880 FcLangContains (fcLangCharSets[i].lang, lang))
885 FcStrList *list = FcStrListCreate (ls->extra);
890 while ((extra = FcStrListNext (list)))
892 if (FcLangContains (extra, lang))
895 FcStrListDone (list);
904 * return FcTrue if lsa contains every language in lsb
907 FcLangSetContains (const FcLangSet *lsa, const FcLangSet *lsb)
912 if (FcDebug() & FC_DBG_MATCHV)
914 printf ("FcLangSet "); FcLangSetPrint (lsa);
915 printf (" contains "); FcLangSetPrint (lsb);
919 * check bitmaps for missing language support
921 count = FC_MIN (lsa->map_size, lsb->map_size);
922 count = FC_MIN (NUM_LANG_SET_MAP, count);
923 for (i = 0; i < count; i++)
925 missing = lsb->map[i] & ~lsa->map[i];
928 for (j = 0; j < 32; j++)
929 if (missing & (1 << j))
931 if (!FcLangSetContainsLang (lsa,
932 fcLangCharSets[fcLangCharSetIndicesInv[i*32 + j]].lang))
934 if (FcDebug() & FC_DBG_MATCHV)
935 printf ("\tMissing bitmap %s\n", fcLangCharSets[fcLangCharSetIndicesInv[i*32+j]].lang);
943 FcStrList *list = FcStrListCreate (lsb->extra);
948 while ((extra = FcStrListNext (list)))
950 if (!FcLangSetContainsLang (lsa, extra))
952 if (FcDebug() & FC_DBG_MATCHV)
953 printf ("\tMissing string %s\n", extra);
957 FcStrListDone (list);
966 FcLangSetSerializeAlloc (FcSerialize *serialize, const FcLangSet *l)
968 if (!FcSerializeAlloc (serialize, l, sizeof (FcLangSet)))
974 FcLangSetSerialize(FcSerialize *serialize, const FcLangSet *l)
976 FcLangSet *l_serialize = FcSerializePtr (serialize, l);
980 memset (l_serialize->map, '\0', sizeof (l_serialize->map));
981 memcpy (l_serialize->map, l->map, FC_MIN (sizeof (l_serialize->map), l->map_size * sizeof (l->map[0])));
982 l_serialize->map_size = NUM_LANG_SET_MAP;
983 l_serialize->extra = NULL; /* We don't serialize ls->extra */
988 FcLangSetGetLangs (const FcLangSet *ls)
993 langs = FcStrSetCreate();
997 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
998 if (FcLangSetBitGet (ls, i))
999 FcStrSetAdd (langs, fcLangCharSets[i].lang);
1003 FcStrList *list = FcStrListCreate (ls->extra);
1008 while ((extra = FcStrListNext (list)))
1009 FcStrSetAdd (langs, extra);
1011 FcStrListDone (list);
1019 FcLangSetOperate(const FcLangSet *a,
1021 FcBool (*func) (FcLangSet *ls,
1024 FcLangSet *langset = FcLangSetCopy (a);
1025 FcStrList *sl = FcStrListCreate (FcLangSetGetLangs (b));
1028 while ((str = FcStrListNext (sl)))
1030 func (langset, str);
1038 FcLangSetUnion (const FcLangSet *a, const FcLangSet *b)
1040 return FcLangSetOperate(a, b, FcLangSetAdd);
1044 FcLangSetSubtract (const FcLangSet *a, const FcLangSet *b)
1046 return FcLangSetOperate(a, b, FcLangSetDel);
1050 #include "fcaliastail.h"
1051 #include "fcftaliastail.h"