2 * Copyright © 2009,2010 Red Hat, Inc.
3 * Copyright © 2011,2012 Google, Inc.
5 * This is part of HarfBuzz, a text shaping library.
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 * Red Hat Author(s): Behdad Esfahbod
26 * Google Author(s): Behdad Esfahbod
29 #include "hb-private.hh"
31 #include "hb-mutex-private.hh"
32 #include "hb-object-private.hh"
42 hb_options_union_t _hb_options;
45 _hb_options_init (void)
49 u.opts.initialized = 1;
51 char *c = getenv ("HB_OPTIONS");
52 u.opts.uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible");
54 /* This is idempotent and threadsafe. */
63 * @str: (array length=len) (element-type uint8_t):
73 hb_tag_from_string (const char *str, int len)
78 if (!str || !len || !*str)
81 if (len < 0 || len > 4)
83 for (i = 0; i < (unsigned) len && str[i]; i++)
88 return HB_TAG (tag[0], tag[1], tag[2], tag[3]);
94 * @buf: (out caller-allocates) (array fixed-size=4) (element-type uint8_t):
101 hb_tag_to_string (hb_tag_t tag, char *buf)
103 buf[0] = (char) (uint8_t) (tag >> 24);
104 buf[1] = (char) (uint8_t) (tag >> 16);
105 buf[2] = (char) (uint8_t) (tag >> 8);
106 buf[3] = (char) (uint8_t) (tag >> 0);
112 const char direction_strings[][4] = {
120 * hb_direction_from_string:
121 * @str: (array length=len) (element-type uint8_t):
131 hb_direction_from_string (const char *str, int len)
133 if (unlikely (!str || !len || !*str))
134 return HB_DIRECTION_INVALID;
136 /* Lets match loosely: just match the first letter, such that
137 * all of "ltr", "left-to-right", etc work!
139 char c = TOLOWER (str[0]);
140 for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++)
141 if (c == direction_strings[i][0])
142 return (hb_direction_t) (HB_DIRECTION_LTR + i);
144 return HB_DIRECTION_INVALID;
148 * hb_direction_to_string:
153 * Return value: (transfer none):
158 hb_direction_to_string (hb_direction_t direction)
160 if (likely ((unsigned int) (direction - HB_DIRECTION_LTR)
161 < ARRAY_LENGTH (direction_strings)))
162 return direction_strings[direction - HB_DIRECTION_LTR];
170 struct hb_language_impl_t {
174 static const char canon_map[256] = {
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', 0, 0,
178 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0,
179 '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
180 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, '-',
181 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
182 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0
186 lang_equal (hb_language_t v1,
189 const unsigned char *p1 = (const unsigned char *) v1;
190 const unsigned char *p2 = (const unsigned char *) v2;
192 while (*p1 && *p1 == canon_map[*p2]) {
197 return *p1 == canon_map[*p2];
202 lang_hash (const void *key)
204 const unsigned char *p = key;
206 while (canon_map[*p])
208 h = (h << 5) - h + canon_map[*p];
217 struct hb_language_item_t {
219 struct hb_language_item_t *next;
222 inline bool operator == (const char *s) const {
223 return lang_equal (lang, s);
226 inline hb_language_item_t & operator = (const char *s) {
227 /* If a custom allocated is used calling strdup() pairs
228 badly with a call to the custom free() in finish() below.
229 Therefore don't call strdup(), implement its behavior.
231 size_t len = strlen(s) + 1;
232 lang = (hb_language_t) malloc(len);
235 memcpy((unsigned char *) lang, s, len);
236 for (unsigned char *p = (unsigned char *) lang; *p; p++)
243 void finish (void) { free ((void *) lang); }
247 /* Thread-safe lock-free language list */
249 static hb_language_item_t *langs;
256 hb_language_item_t *next = langs->next;
264 static hb_language_item_t *
265 lang_find_or_insert (const char *key)
268 hb_language_item_t *first_lang = (hb_language_item_t *) hb_atomic_ptr_get (&langs);
270 for (hb_language_item_t *lang = first_lang; lang; lang = lang->next)
274 /* Not found; allocate one. */
275 hb_language_item_t *lang = (hb_language_item_t *) calloc (1, sizeof (hb_language_item_t));
276 if (unlikely (!lang))
278 lang->next = first_lang;
280 if (unlikely (!lang->lang))
286 if (!hb_atomic_ptr_cmpexch (&langs, first_lang, lang)) {
294 atexit (free_langs); /* First person registers atexit() callback. */
302 * hb_language_from_string:
303 * @str: (array length=len) (element-type uint8_t): a string representing
304 * ISO 639 language code
305 * @len: length of the @str, or -1 if it is %NULL-terminated.
307 * Converts @str representing an ISO 639 language code to the corresponding
310 * Return value: (transfer none):
311 * The #hb_language_t corresponding to the ISO 639 language code.
316 hb_language_from_string (const char *str, int len)
318 if (!str || !len || !*str)
319 return HB_LANGUAGE_INVALID;
321 hb_language_item_t *item = nullptr;
324 /* NUL-terminate it. */
326 len = MIN (len, (int) sizeof (strbuf) - 1);
327 memcpy (strbuf, str, len);
329 item = lang_find_or_insert (strbuf);
332 item = lang_find_or_insert (str);
334 return likely (item) ? item->lang : HB_LANGUAGE_INVALID;
338 * hb_language_to_string:
339 * @language: an #hb_language_t to convert.
341 * See hb_language_from_string().
343 * Return value: (transfer none):
344 * A %NULL-terminated string representing the @language. Must not be freed by
350 hb_language_to_string (hb_language_t language)
352 /* This is actually nullptr-safe! */
357 * hb_language_get_default:
361 * Return value: (transfer none):
366 hb_language_get_default (void)
368 static hb_language_t default_language = HB_LANGUAGE_INVALID;
370 hb_language_t language = (hb_language_t) hb_atomic_ptr_get (&default_language);
371 if (unlikely (language == HB_LANGUAGE_INVALID)) {
372 language = hb_language_from_string (setlocale (LC_CTYPE, nullptr), -1);
373 (void) hb_atomic_ptr_cmpexch (&default_language, HB_LANGUAGE_INVALID, language);
376 return default_language;
383 * hb_script_from_iso15924_tag:
384 * @tag: an #hb_tag_t representing an ISO 15924 tag.
386 * Converts an ISO 15924 script tag to a corresponding #hb_script_t.
389 * An #hb_script_t corresponding to the ISO 15924 tag.
394 hb_script_from_iso15924_tag (hb_tag_t tag)
396 if (unlikely (tag == HB_TAG_NONE))
397 return HB_SCRIPT_INVALID;
399 /* Be lenient, adjust case (one capital letter followed by three small letters) */
400 tag = (tag & 0xDFDFDFDFu) | 0x00202020u;
404 /* These graduated from the 'Q' private-area codes, but
405 * the old code is still aliased by Unicode, and the Qaai
406 * one in use by ICU. */
407 case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED;
408 case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC;
410 /* Script variants from http://unicode.org/iso15924/ */
411 case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC;
412 case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN;
413 case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN;
414 case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC;
415 case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC;
416 case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC;
419 /* If it looks right, just use the tag as a script */
420 if (((uint32_t) tag & 0xE0E0E0E0u) == 0x40606060u)
421 return (hb_script_t) tag;
423 /* Otherwise, return unknown */
424 return HB_SCRIPT_UNKNOWN;
428 * hb_script_from_string:
429 * @str: (array length=len) (element-type uint8_t): a string representing an
431 * @len: length of the @str, or -1 if it is %NULL-terminated.
433 * Converts a string @str representing an ISO 15924 script tag to a
434 * corresponding #hb_script_t. Shorthand for hb_tag_from_string() then
435 * hb_script_from_iso15924_tag().
438 * An #hb_script_t corresponding to the ISO 15924 tag.
443 hb_script_from_string (const char *str, int len)
445 return hb_script_from_iso15924_tag (hb_tag_from_string (str, len));
449 * hb_script_to_iso15924_tag:
450 * @script: an #hb_script_ to convert.
452 * See hb_script_from_iso15924_tag().
455 * An #hb_tag_t representing an ISO 15924 script tag.
460 hb_script_to_iso15924_tag (hb_script_t script)
462 return (hb_tag_t) script;
466 * hb_script_get_horizontal_direction:
476 hb_script_get_horizontal_direction (hb_script_t script)
478 /* http://goo.gl/x9ilM */
479 switch ((hb_tag_t) script)
481 /* Unicode-1.1 additions */
482 case HB_SCRIPT_ARABIC:
483 case HB_SCRIPT_HEBREW:
485 /* Unicode-3.0 additions */
486 case HB_SCRIPT_SYRIAC:
487 case HB_SCRIPT_THAANA:
489 /* Unicode-4.0 additions */
490 case HB_SCRIPT_CYPRIOT:
492 /* Unicode-4.1 additions */
493 case HB_SCRIPT_KHAROSHTHI:
495 /* Unicode-5.0 additions */
496 case HB_SCRIPT_PHOENICIAN:
499 /* Unicode-5.1 additions */
500 case HB_SCRIPT_LYDIAN:
502 /* Unicode-5.2 additions */
503 case HB_SCRIPT_AVESTAN:
504 case HB_SCRIPT_IMPERIAL_ARAMAIC:
505 case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI:
506 case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN:
507 case HB_SCRIPT_OLD_SOUTH_ARABIAN:
508 case HB_SCRIPT_OLD_TURKIC:
509 case HB_SCRIPT_SAMARITAN:
511 /* Unicode-6.0 additions */
512 case HB_SCRIPT_MANDAIC:
514 /* Unicode-6.1 additions */
515 case HB_SCRIPT_MEROITIC_CURSIVE:
516 case HB_SCRIPT_MEROITIC_HIEROGLYPHS:
518 /* Unicode-7.0 additions */
519 case HB_SCRIPT_MANICHAEAN:
520 case HB_SCRIPT_MENDE_KIKAKUI:
521 case HB_SCRIPT_NABATAEAN:
522 case HB_SCRIPT_OLD_NORTH_ARABIAN:
523 case HB_SCRIPT_PALMYRENE:
524 case HB_SCRIPT_PSALTER_PAHLAVI:
526 /* Unicode-8.0 additions */
527 case HB_SCRIPT_HATRAN:
528 case HB_SCRIPT_OLD_HUNGARIAN:
530 /* Unicode-9.0 additions */
531 case HB_SCRIPT_ADLAM:
533 return HB_DIRECTION_RTL;
536 return HB_DIRECTION_LTR;
540 /* hb_user_data_array_t */
543 hb_user_data_array_t::set (hb_user_data_key_t *key,
545 hb_destroy_func_t destroy,
552 if (!data && !destroy) {
553 items.remove (key, lock);
557 hb_user_data_item_t item = {key, data, destroy};
558 bool ret = !!items.replace_or_insert (item, lock, (bool) replace);
564 hb_user_data_array_t::get (hb_user_data_key_t *key)
566 hb_user_data_item_t item = {nullptr, nullptr, nullptr};
568 return items.find (key, &item, lock) ? item.data : nullptr;
576 * @major: (out): Library major version component.
577 * @minor: (out): Library minor version component.
578 * @micro: (out): Library micro version component.
580 * Returns library version as three integer components.
585 hb_version (unsigned int *major,
589 *major = HB_VERSION_MAJOR;
590 *minor = HB_VERSION_MINOR;
591 *micro = HB_VERSION_MICRO;
597 * Returns library version as a string with three components.
599 * Return value: library version string.
604 hb_version_string (void)
606 return HB_VERSION_STRING;
610 * hb_version_atleast:
622 hb_version_atleast (unsigned int major,
626 return HB_VERSION_ATLEAST (major, minor, micro);
631 /* hb_feature_t and hb_variation_t */
634 parse_space (const char **pp, const char *end)
636 while (*pp < end && ISSPACE (**pp))
642 parse_char (const char **pp, const char *end, char c)
644 parse_space (pp, end);
646 if (*pp == end || **pp != c)
654 parse_uint (const char **pp, const char *end, unsigned int *pv)
657 unsigned int len = MIN (ARRAY_LENGTH (buf) - 1, (unsigned int) (end - *pp));
658 strncpy (buf, *pp, len);
665 /* Intentionally use strtol instead of strtoul, such that
666 * -1 turns into "big number"... */
668 v = strtol (p, &pend, 0);
669 if (errno || p == pend)
678 parse_uint32 (const char **pp, const char *end, uint32_t *pv)
681 unsigned int len = MIN (ARRAY_LENGTH (buf) - 1, (unsigned int) (end - *pp));
682 strncpy (buf, *pp, len);
689 /* Intentionally use strtol instead of strtoul, such that
690 * -1 turns into "big number"... */
692 v = strtol (p, &pend, 0);
693 if (errno || p == pend)
701 #if defined (HAVE_NEWLOCALE) && defined (HAVE_STRTOD_L)
702 #define USE_XLOCALE 1
703 #define HB_LOCALE_T locale_t
704 #define HB_CREATE_LOCALE(locName) newlocale (LC_ALL_MASK, locName, nullptr)
705 #define HB_FREE_LOCALE(loc) freelocale (loc)
706 #elif defined(_MSC_VER)
707 #define USE_XLOCALE 1
708 #define HB_LOCALE_T _locale_t
709 #define HB_CREATE_LOCALE(locName) _create_locale (LC_ALL, locName)
710 #define HB_FREE_LOCALE(loc) _free_locale (loc)
711 #define strtod_l(a, b, c) _strtod_l ((a), (b), (c))
716 static HB_LOCALE_T C_locale;
723 HB_FREE_LOCALE (C_locale);
731 HB_LOCALE_T C = (HB_LOCALE_T) hb_atomic_ptr_get (&C_locale);
735 C = HB_CREATE_LOCALE ("C");
737 if (!hb_atomic_ptr_cmpexch (&C_locale, nullptr, C))
739 HB_FREE_LOCALE (C_locale);
744 atexit (free_C_locale); /* First person registers atexit() callback. */
753 parse_float (const char **pp, const char *end, float *pv)
756 unsigned int len = MIN (ARRAY_LENGTH (buf) - 1, (unsigned int) (end - *pp));
757 strncpy (buf, *pp, len);
766 v = strtod_l (p, &pend, get_C_locale ());
768 v = strtod (p, &pend);
770 if (errno || p == pend)
779 parse_bool (const char **pp, const char *end, uint32_t *pv)
781 parse_space (pp, end);
784 while (*pp < end && ISALPHA(**pp))
787 /* CSS allows on/off as aliases 1/0. */
788 if (*pp - p == 2 && 0 == strncmp (p, "on", 2))
790 else if (*pp - p == 3 && 0 == strncmp (p, "off", 3))
801 parse_feature_value_prefix (const char **pp, const char *end, hb_feature_t *feature)
803 if (parse_char (pp, end, '-'))
806 parse_char (pp, end, '+');
814 parse_tag (const char **pp, const char *end, hb_tag_t *tag)
816 parse_space (pp, end);
820 if (*pp < end && (**pp == '\'' || **pp == '"'))
827 while (*pp < end && ISALNUM(**pp))
830 if (p == *pp || *pp - p > 4)
833 *tag = hb_tag_from_string (p, *pp - p);
837 /* CSS expects exactly four bytes. And we only allow quotations for
838 * CSS compatibility. So, enforce the length. */
841 if (*pp == end || **pp != quote)
850 parse_feature_indices (const char **pp, const char *end, hb_feature_t *feature)
852 parse_space (pp, end);
857 feature->end = (unsigned int) -1;
859 if (!parse_char (pp, end, '['))
862 has_start = parse_uint (pp, end, &feature->start);
864 if (parse_char (pp, end, ':')) {
865 parse_uint (pp, end, &feature->end);
868 feature->end = feature->start + 1;
871 return parse_char (pp, end, ']');
875 parse_feature_value_postfix (const char **pp, const char *end, hb_feature_t *feature)
877 bool had_equal = parse_char (pp, end, '=');
878 bool had_value = parse_uint32 (pp, end, &feature->value) ||
879 parse_bool (pp, end, &feature->value);
880 /* CSS doesn't use equal-sign between tag and value.
881 * If there was an equal-sign, then there *must* be a value.
882 * A value without an equal-sign is ok, but not required. */
883 return !had_equal || had_value;
887 parse_one_feature (const char **pp, const char *end, hb_feature_t *feature)
889 return parse_feature_value_prefix (pp, end, feature) &&
890 parse_tag (pp, end, &feature->tag) &&
891 parse_feature_indices (pp, end, feature) &&
892 parse_feature_value_postfix (pp, end, feature) &&
893 parse_space (pp, end) &&
898 * hb_feature_from_string:
899 * @str: (array length=len) (element-type uint8_t): a string to parse
900 * @len: length of @str, or -1 if string is %NULL terminated
901 * @feature: (out): the #hb_feature_t to initialize with the parsed values
903 * Parses a string into a #hb_feature_t.
905 * TODO: document the syntax here.
908 * %true if @str is successfully parsed, %false otherwise.
913 hb_feature_from_string (const char *str, int len,
914 hb_feature_t *feature)
921 if (likely (parse_one_feature (&str, str + len, &feat)))
929 memset (feature, 0, sizeof (*feature));
934 * hb_feature_to_string:
935 * @feature: an #hb_feature_t to convert
936 * @buf: (array length=size) (out): output string
937 * @size: the allocated size of @buf
939 * Converts a #hb_feature_t into a %NULL-terminated string in the format
940 * understood by hb_feature_from_string(). The client in responsible for
941 * allocating big enough size for @buf, 128 bytes is more than enough.
946 hb_feature_to_string (hb_feature_t *feature,
947 char *buf, unsigned int size)
949 if (unlikely (!size)) return;
952 unsigned int len = 0;
953 if (feature->value == 0)
955 hb_tag_to_string (feature->tag, s + len);
957 while (len && s[len - 1] == ' ')
959 if (feature->start != 0 || feature->end != (unsigned int) -1)
963 len += MAX (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->start));
964 if (feature->end != feature->start + 1) {
966 if (feature->end != (unsigned int) -1)
967 len += MAX (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->end));
971 if (feature->value > 1)
974 len += MAX (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->value));
976 assert (len < ARRAY_LENGTH (s));
977 len = MIN (len, size - 1);
978 memcpy (buf, s, len);
985 parse_variation_value (const char **pp, const char *end, hb_variation_t *variation)
987 parse_char (pp, end, '='); /* Optional. */
988 return parse_float (pp, end, &variation->value);
992 parse_one_variation (const char **pp, const char *end, hb_variation_t *variation)
994 return parse_tag (pp, end, &variation->tag) &&
995 parse_variation_value (pp, end, variation) &&
996 parse_space (pp, end) &&
1001 * hb_variation_from_string:
1006 hb_variation_from_string (const char *str, int len,
1007 hb_variation_t *variation)
1014 if (likely (parse_one_variation (&str, str + len, &var)))
1022 memset (variation, 0, sizeof (*variation));
1027 * hb_variation_to_string:
1032 hb_variation_to_string (hb_variation_t *variation,
1033 char *buf, unsigned int size)
1035 if (unlikely (!size)) return;
1038 unsigned int len = 0;
1039 hb_tag_to_string (variation->tag, s + len);
1041 while (len && s[len - 1] == ' ')
1044 len += MAX (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%g", variation->value));
1046 assert (len < ARRAY_LENGTH (s));
1047 len = MIN (len, size - 1);
1048 memcpy (buf, s, len);