2 * fontconfig/src/fclang.c
4 * Copyright © 2002 Keith Packard
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of the author(s) not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. The authors make no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
16 * THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
28 /* Objects MT-safe for readonly access. */
31 const FcChar8 lang[16];
32 const FcCharSet charset;
40 #include "../fc-lang/fclang.h"
45 FcChar32 map[NUM_LANG_SET_MAP];
48 static int FcLangSetIndex (const FcChar8 *lang);
52 FcLangSetBitSet (FcLangSet *ls,
57 id = fcLangCharSetIndices[id];
59 if (bucket >= ls->map_size)
60 return; /* shouldn't happen really */
62 ls->map[bucket] |= ((FcChar32) 1U << (id & 0x1f));
66 FcLangSetBitGet (const FcLangSet *ls,
71 id = fcLangCharSetIndices[id];
73 if (bucket >= ls->map_size)
76 return ((ls->map[bucket] >> (id & 0x1f)) & 1) ? FcTrue : FcFalse;
80 FcLangSetBitReset (FcLangSet *ls,
85 id = fcLangCharSetIndices[id];
87 if (bucket >= ls->map_size)
88 return; /* shouldn't happen really */
90 ls->map[bucket] &= ~((FcChar32) 1U << (id & 0x1f));
94 FcFreeTypeLangSet (const FcCharSet *charset,
95 const FcChar8 *exclusiveLang)
99 const FcCharSet *exclusiveCharset = 0;
103 exclusiveCharset = FcLangGetCharSet (exclusiveLang);
104 ls = FcLangSetCreate ();
107 if (FcDebug() & FC_DBG_LANGSET)
109 printf ("font charset");
110 FcCharSetPrint (charset);
113 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
115 if (FcDebug() & FC_DBG_LANGSET)
117 printf ("%s charset", fcLangCharSets[i].lang);
118 FcCharSetPrint (&fcLangCharSets[i].charset);
123 * Check for Han charsets to make fonts
124 * which advertise support for a single language
125 * not support other Han languages
127 if (exclusiveCharset &&
128 FcFreeTypeIsExclusiveLang (fcLangCharSets[i].lang))
130 if (fcLangCharSets[i].charset.num != exclusiveCharset->num)
133 for (j = 0; j < fcLangCharSets[i].charset.num; j++)
134 if (FcCharSetLeaf(&fcLangCharSets[i].charset, j) !=
135 FcCharSetLeaf(exclusiveCharset, j))
138 missing = FcCharSetSubtractCount (&fcLangCharSets[i].charset, charset);
139 if (FcDebug() & FC_DBG_SCANV)
141 if (missing && missing < 10)
143 FcCharSet *missed = FcCharSetSubtract (&fcLangCharSets[i].charset,
146 FcChar32 map[FC_CHARSET_MAP_SIZE];
149 printf ("\n%s(%u) ", fcLangCharSets[i].lang, missing);
151 for (ucs4 = FcCharSetFirstPage (missed, map, &next);
152 ucs4 != FC_CHARSET_DONE;
153 ucs4 = FcCharSetNextPage (missed, map, &next))
156 for (i = 0; i < FC_CHARSET_MAP_SIZE; i++)
159 for (j = 0; j < 32; j++)
160 if (map[i] & (1U << j))
161 printf (" %04x", ucs4 + i * 32 + j);
165 FcCharSetDestroy (missed);
168 printf ("%s(%u) ", fcLangCharSets[i].lang, missing);
171 FcLangSetBitSet (ls, i);
174 if (FcDebug() & FC_DBG_SCANV)
182 FcLangNormalize (const FcChar8 *lang)
184 FcChar8 *result = NULL, *s, *orig;
185 char *territory, *encoding, *modifier;
186 size_t llen, tlen = 0, mlen = 0;
191 /* might be called without initialization */
194 if (FcStrCmpIgnoreCase (lang, (const FcChar8 *)"C") == 0 ||
195 FcStrCmpIgnoreCase (lang, (const FcChar8 *)"C.UTF-8") == 0 ||
196 FcStrCmpIgnoreCase (lang, (const FcChar8 *)"C.utf8") == 0 ||
197 FcStrCmpIgnoreCase (lang, (const FcChar8 *)"POSIX") == 0)
199 result = FcStrCopy ((const FcChar8 *)"en");
203 s = FcStrCopy (lang);
207 /* from the comments in glibc:
209 * LOCALE can consist of up to four recognized parts for the XPG syntax:
211 * language[_territory[.codeset]][@modifier]
213 * Beside the first all of them are allowed to be missing. If the
214 * full specified locale is not found, the less specific one are
215 * looked for. The various part will be stripped off according to
216 * the following order:
218 * (2) normalized codeset
222 * So since we don't take care of the codeset part here, what patterns
223 * we need to deal with is:
225 * 1. language_territory@modifier
226 * 2. language@modifier
229 * then. and maybe no need to try language_territory here.
231 modifier = strchr ((const char *) s, '@');
236 mlen = strlen (modifier);
238 encoding = strchr ((const char *) s, '.');
245 memmove (encoding, modifier, mlen + 1);
249 territory = strchr ((const char *) s, '_');
251 territory = strchr ((const char *) s, '-');
256 tlen = strlen (territory);
258 llen = strlen ((const char *) s);
259 if (llen < 2 || llen > 3)
261 fprintf (stderr, "Fontconfig warning: ignoring %s: not a valid language tag\n",
265 if (territory && (tlen < 2 || tlen > 3) &&
266 !(territory[0] == 'z' && tlen < 5))
268 fprintf (stderr, "Fontconfig warning: ignoring %s: not a valid region tag\n",
276 orig = FcStrDowncase (s);
281 if (FcDebug () & FC_DBG_LANGSET)
282 printf("Checking the existence of %s.orth\n", s);
283 if (FcLangSetIndex (s) < 0)
285 memmove (territory - 1, territory + tlen, (mlen > 0 ? mlen + 1 : 0) + 1);
287 modifier = territory;
292 /* we'll miss the opportunity to reduce the correct size
293 * of the allocated memory for the string after that.
301 if (FcDebug () & FC_DBG_LANGSET)
302 printf("Checking the existence of %s.orth\n", s);
303 if (FcLangSetIndex (s) < 0)
308 /* we'll miss the opportunity to reduce the correct size
309 * of the allocated memory for the string after that.
315 if (FcDebug () & FC_DBG_LANGSET)
316 printf("Checking the existence of %s.orth\n", s);
317 if (FcLangSetIndex (s) < 0)
319 /* there seems no languages matched in orth.
320 * add the language as is for fallback.
328 /* we'll miss the opportunity to reduce the correct size
329 * of the allocated memory for the string after that.
340 if (FcDebug () & FC_DBG_LANGSET)
343 printf ("normalized: %s -> %s\n", lang, result);
345 printf ("Unable to normalize %s\n", lang);
351 #define FcLangEnd(c) ((c) == '-' || (c) == '\0')
354 FcLangCompare (const FcChar8 *s1, const FcChar8 *s2)
357 FcLangResult result = FcLangDifferentLang;
358 const FcChar8 *s1_orig = s1;
361 is_und = FcToLower (s1[0]) == 'u' &&
362 FcToLower (s1[1]) == 'n' &&
363 FcToLower (s1[2]) == 'd' &&
375 if (!is_und && FcLangEnd (c1) && FcLangEnd (c2))
376 result = FcLangDifferentTerritory;
381 return is_und ? result : FcLangEqual;
386 result = FcLangDifferentTerritory;
389 /* If we parsed past "und-", then do not consider it undefined anymore,
390 * as there's *something* specified. */
391 if (is_und && s1 - s1_orig == 4)
397 * Return FcTrue when super contains sub.
399 * super contains sub if super and sub have the same
400 * language and either the same country or one
401 * is missing the country
405 FcLangContains (const FcChar8 *super, const FcChar8 *sub)
418 /* see if super has a country while sub is mising one */
419 if (c1 == '-' && c2 == '\0')
421 /* see if sub has a country while super is mising one */
422 if (c1 == '\0' && c2 == '-')
432 FcLangGetCharSet (const FcChar8 *lang)
437 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
439 switch (FcLangCompare (lang, fcLangCharSets[i].lang)) {
441 return &fcLangCharSets[i].charset;
442 case FcLangDifferentTerritory:
445 case FcLangDifferentLang:
452 return &fcLangCharSets[country].charset;
461 langs = FcStrSetCreate();
465 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
466 FcStrSetAdd (langs, fcLangCharSets[i].lang);
472 FcLangSetCreate (void)
476 ls = malloc (sizeof (FcLangSet));
479 memset (ls->map, '\0', sizeof (ls->map));
480 ls->map_size = NUM_LANG_SET_MAP;
486 FcLangSetDestroy (FcLangSet *ls)
492 FcStrSetDestroy (ls->extra);
497 FcLangSetCopy (const FcLangSet *ls)
504 new = FcLangSetCreate ();
507 memset (new->map, '\0', sizeof (new->map));
508 memcpy (new->map, ls->map, FC_MIN (sizeof (new->map), ls->map_size * sizeof (ls->map[0])));
514 new->extra = FcStrSetCreate ();
518 list = FcStrListCreate (ls->extra);
522 while ((extra = FcStrListNext (list)))
523 if (!FcStrSetAdd (new->extra, extra))
525 FcStrListDone (list);
528 FcStrListDone (list);
532 FcLangSetDestroy (new);
537 /* When the language isn't found, the return value r is such that:
539 * 2) -r -1 is the index of the first language in fcLangCharSets that comes
540 * after the 'lang' argument in lexicographic order.
542 * The -1 is necessary to avoid problems with language id 0 (otherwise, we
543 * wouldn't be able to distinguish between “language found, id is 0” and
544 * “language not found, sorts right before the language with id 0”).
547 FcLangSetIndex (const FcChar8 *lang)
549 int low, high, mid = 0;
551 FcChar8 firstChar = FcToLower(lang[0]);
552 FcChar8 secondChar = firstChar ? FcToLower(lang[1]) : '\0';
557 high = fcLangCharSetRanges[0].begin;
559 else if(firstChar > 'z')
561 low = fcLangCharSetRanges[25].begin;
562 high = NUM_LANG_CHAR_SET - 1;
566 low = fcLangCharSetRanges[firstChar - 'a'].begin;
567 high = fcLangCharSetRanges[firstChar - 'a'].end;
570 return -(low+1); /* one past next entry after where it would be */
575 mid = (high + low) >> 1;
576 if(fcLangCharSets[mid].lang[0] != firstChar)
577 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang, lang);
579 { /* fast path for resolving 2-letter languages (by far the most common) after
580 * finding the first char (probably already true because of the hash table) */
581 cmp = fcLangCharSets[mid].lang[1] - secondChar;
583 (fcLangCharSets[mid].lang[2] != '\0' ||
586 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang+2,
603 FcLangSetAdd (FcLangSet *ls, const FcChar8 *lang)
607 id = FcLangSetIndex (lang);
610 FcLangSetBitSet (ls, id);
615 ls->extra = FcStrSetCreate ();
619 return FcStrSetAdd (ls->extra, lang);
623 FcLangSetDel (FcLangSet *ls, const FcChar8 *lang)
627 id = FcLangSetIndex (lang);
630 FcLangSetBitReset (ls, id);
634 FcStrSetDel (ls->extra, lang);
640 FcLangSetHasLang (const FcLangSet *ls, const FcChar8 *lang)
643 FcLangResult best, r;
646 id = FcLangSetIndex (lang);
649 else if (FcLangSetBitGet (ls, id))
651 best = FcLangDifferentLang;
652 for (i = id - 1; i >= 0; i--)
654 r = FcLangCompare (lang, fcLangCharSets[i].lang);
655 if (r == FcLangDifferentLang)
657 if (FcLangSetBitGet (ls, i) && r < best)
660 for (i = id; i < NUM_LANG_CHAR_SET; i++)
662 r = FcLangCompare (lang, fcLangCharSets[i].lang);
663 if (r == FcLangDifferentLang)
665 if (FcLangSetBitGet (ls, i) && r < best)
670 FcStrList *list = FcStrListCreate (ls->extra);
675 while (best > FcLangEqual && (extra = FcStrListNext (list)))
677 r = FcLangCompare (lang, extra);
681 FcStrListDone (list);
688 FcLangSetCompareStrSet (const FcLangSet *ls, FcStrSet *set)
690 FcStrList *list = FcStrListCreate (set);
691 FcLangResult r, best = FcLangDifferentLang;
696 while (best > FcLangEqual && (extra = FcStrListNext (list)))
698 r = FcLangSetHasLang (ls, extra);
702 FcStrListDone (list);
708 FcLangSetCompare (const FcLangSet *lsa, const FcLangSet *lsb)
711 FcLangResult best, r;
712 FcChar32 aInCountrySet, bInCountrySet;
714 count = FC_MIN (lsa->map_size, lsb->map_size);
715 count = FC_MIN (NUM_LANG_SET_MAP, count);
716 for (i = 0; i < count; i++)
717 if (lsa->map[i] & lsb->map[i])
719 best = FcLangDifferentLang;
720 for (j = 0; j < NUM_COUNTRY_SET; j++)
725 for (i = 0; i < count; i++)
727 aInCountrySet |= lsa->map[i] & fcLangCountrySets[j][i];
728 bInCountrySet |= lsb->map[i] & fcLangCountrySets[j][i];
730 if (aInCountrySet && bInCountrySet)
732 best = FcLangDifferentTerritory;
739 r = FcLangSetCompareStrSet (lsb, lsa->extra);
743 if (best > FcLangEqual && lsb->extra)
745 r = FcLangSetCompareStrSet (lsa, lsb->extra);
753 * Used in computing values -- mustn't allocate any storage
756 FcLangSetPromote (const FcChar8 *lang, FcValuePromotionBuffer *vbuf)
763 } FcLangSetPromotionBuffer;
764 FcLangSetPromotionBuffer *buf = (FcLangSetPromotionBuffer *) vbuf;
766 FC_ASSERT_STATIC (sizeof (FcLangSetPromotionBuffer) <= sizeof (FcValuePromotionBuffer));
768 memset (buf->ls.map, '\0', sizeof (buf->ls.map));
769 buf->ls.map_size = NUM_LANG_SET_MAP;
773 id = FcLangSetIndex (lang);
776 FcLangSetBitSet (&buf->ls, id);
780 buf->ls.extra = &buf->strs;
783 buf->strs.strs = &buf->str;
784 FcRefInit (&buf->strs.ref, 1);
785 buf->str = (FcChar8 *) lang;
792 FcLangSetHash (const FcLangSet *ls)
797 count = FC_MIN (ls->map_size, NUM_LANG_SET_MAP);
798 for (i = 0; i < count; i++)
806 FcNameParseLangSet (const FcChar8 *string)
808 FcChar8 lang[32], c = 0;
812 ls = FcLangSetCreate ();
818 for(i = 0; i < 31;i++)
821 if(c == '\0' || c == '|')
822 break; /* end of this code */
826 if (!FcLangSetAdd (ls, lang))
833 FcLangSetDestroy (ls);
839 FcNameUnparseLangSet (FcStrBuf *buf, const FcLangSet *ls)
843 FcBool first = FcTrue;
845 count = FC_MIN (ls->map_size, NUM_LANG_SET_MAP);
846 for (i = 0; i < count; i++)
848 if ((bits = ls->map[i]))
850 for (bit = 0; bit <= 31; bit++)
851 if (bits & (1U << bit))
853 int id = (i << 5) | bit;
855 if (!FcStrBufChar (buf, '|'))
857 if (!FcStrBufString (buf, fcLangCharSets[fcLangCharSetIndicesInv[id]].lang))
865 FcStrList *list = FcStrListCreate (ls->extra);
870 while ((extra = FcStrListNext (list)))
873 if (!FcStrBufChar (buf, '|'))
875 FcStrListDone (list);
878 if (!FcStrBufString (buf, extra))
880 FcStrListDone (list);
885 FcStrListDone (list);
891 FcLangSetEqual (const FcLangSet *lsa, const FcLangSet *lsb)
895 count = FC_MIN (lsa->map_size, lsb->map_size);
896 count = FC_MIN (NUM_LANG_SET_MAP, count);
897 for (i = 0; i < count; i++)
899 if (lsa->map[i] != lsb->map[i])
902 if (!lsa->extra && !lsb->extra)
904 if (lsa->extra && lsb->extra)
905 return FcStrSetEqual (lsa->extra, lsb->extra);
910 FcLangSetContainsLang (const FcLangSet *ls, const FcChar8 *lang)
915 id = FcLangSetIndex (lang);
918 else if (FcLangSetBitGet (ls, id))
921 * search up and down among equal languages for a match
923 for (i = id - 1; i >= 0; i--)
925 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
927 if (FcLangSetBitGet (ls, i) &&
928 FcLangContains (fcLangCharSets[i].lang, lang))
931 for (i = id; i < NUM_LANG_CHAR_SET; i++)
933 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
935 if (FcLangSetBitGet (ls, i) &&
936 FcLangContains (fcLangCharSets[i].lang, lang))
941 FcStrList *list = FcStrListCreate (ls->extra);
946 while ((extra = FcStrListNext (list)))
948 if (FcLangContains (extra, lang))
951 FcStrListDone (list);
960 * return FcTrue if lsa contains every language in lsb
963 FcLangSetContains (const FcLangSet *lsa, const FcLangSet *lsb)
968 if (FcDebug() & FC_DBG_MATCHV)
970 printf ("FcLangSet "); FcLangSetPrint (lsa);
971 printf (" contains "); FcLangSetPrint (lsb);
975 * check bitmaps for missing language support
977 count = FC_MIN (lsa->map_size, lsb->map_size);
978 count = FC_MIN (NUM_LANG_SET_MAP, count);
979 for (i = 0; i < count; i++)
981 missing = lsb->map[i] & ~lsa->map[i];
984 for (j = 0; j < 32; j++)
985 if (missing & (1U << j))
987 if (!FcLangSetContainsLang (lsa,
988 fcLangCharSets[fcLangCharSetIndicesInv[i*32 + j]].lang))
990 if (FcDebug() & FC_DBG_MATCHV)
991 printf ("\tMissing bitmap %s\n", fcLangCharSets[fcLangCharSetIndicesInv[i*32+j]].lang);
999 FcStrList *list = FcStrListCreate (lsb->extra);
1004 while ((extra = FcStrListNext (list)))
1006 if (!FcLangSetContainsLang (lsa, extra))
1008 if (FcDebug() & FC_DBG_MATCHV)
1009 printf ("\tMissing string %s\n", extra);
1013 FcStrListDone (list);
1022 FcLangSetSerializeAlloc (FcSerialize *serialize, const FcLangSet *l)
1024 if (!FcSerializeAlloc (serialize, l, sizeof (FcLangSet)))
1030 FcLangSetSerialize(FcSerialize *serialize, const FcLangSet *l)
1032 FcLangSet *l_serialize = FcSerializePtr (serialize, l);
1036 memset (l_serialize->map, '\0', sizeof (l_serialize->map));
1037 memcpy (l_serialize->map, l->map, FC_MIN (sizeof (l_serialize->map), l->map_size * sizeof (l->map[0])));
1038 l_serialize->map_size = NUM_LANG_SET_MAP;
1039 l_serialize->extra = NULL; /* We don't serialize ls->extra */
1044 FcLangSetGetLangs (const FcLangSet *ls)
1049 langs = FcStrSetCreate();
1053 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
1054 if (FcLangSetBitGet (ls, i))
1055 FcStrSetAdd (langs, fcLangCharSets[i].lang);
1059 FcStrList *list = FcStrListCreate (ls->extra);
1064 while ((extra = FcStrListNext (list)))
1065 FcStrSetAdd (langs, extra);
1067 FcStrListDone (list);
1075 FcLangSetOperate(const FcLangSet *a,
1077 FcBool (*func) (FcLangSet *ls,
1080 FcLangSet *langset = FcLangSetCopy (a);
1081 FcStrSet *set = FcLangSetGetLangs (b);
1082 FcStrList *sl = FcStrListCreate (set);
1085 FcStrSetDestroy (set);
1086 while ((str = FcStrListNext (sl)))
1088 func (langset, str);
1096 FcLangSetUnion (const FcLangSet *a, const FcLangSet *b)
1098 return FcLangSetOperate(a, b, FcLangSetAdd);
1102 FcLangSetSubtract (const FcLangSet *a, const FcLangSet *b)
1104 return FcLangSetOperate(a, b, FcLangSetDel);
1108 #include "fcaliastail.h"
1109 #include "fcftaliastail.h"