2 * Copyright © 2009,2010 Red Hat, Inc.
3 * Copyright © 2011,2012 Google, Inc.
5 * This is part of HarfBuzz, a text shaping library.
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 * Red Hat Author(s): Behdad Esfahbod
26 * Google Author(s): Behdad Esfahbod
29 #include "hb-private.hh"
31 #include "hb-mutex-private.hh"
32 #include "hb-object-private.hh"
42 hb_options_union_t _hb_options;
45 _hb_options_init (void)
49 u.opts.initialized = 1;
51 char *c = getenv ("HB_OPTIONS");
52 u.opts.uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible");
54 /* This is idempotent and threadsafe. */
63 * @str: (array length=len) (element-type uint8_t):
73 hb_tag_from_string (const char *str, int len)
78 if (!str || !len || !*str)
81 if (len < 0 || len > 4)
83 for (i = 0; i < (unsigned) len && str[i]; i++)
88 return HB_TAG (tag[0], tag[1], tag[2], tag[3]);
94 * @buf: (out caller-allocates) (array fixed-size=4) (element-type uint8_t):
101 hb_tag_to_string (hb_tag_t tag, char *buf)
103 buf[0] = (char) (uint8_t) (tag >> 24);
104 buf[1] = (char) (uint8_t) (tag >> 16);
105 buf[2] = (char) (uint8_t) (tag >> 8);
106 buf[3] = (char) (uint8_t) (tag >> 0);
112 const char direction_strings[][4] = {
120 * hb_direction_from_string:
121 * @str: (array length=len) (element-type uint8_t):
131 hb_direction_from_string (const char *str, int len)
133 if (unlikely (!str || !len || !*str))
134 return HB_DIRECTION_INVALID;
136 /* Lets match loosely: just match the first letter, such that
137 * all of "ltr", "left-to-right", etc work!
139 char c = TOLOWER (str[0]);
140 for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++)
141 if (c == direction_strings[i][0])
142 return (hb_direction_t) (HB_DIRECTION_LTR + i);
144 return HB_DIRECTION_INVALID;
148 * hb_direction_to_string:
153 * Return value: (transfer none):
158 hb_direction_to_string (hb_direction_t direction)
160 if (likely ((unsigned int) (direction - HB_DIRECTION_LTR)
161 < ARRAY_LENGTH (direction_strings)))
162 return direction_strings[direction - HB_DIRECTION_LTR];
170 struct hb_language_impl_t {
174 static const char canon_map[256] = {
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', 0, 0,
178 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0,
179 '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
180 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, '-',
181 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
182 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0
186 lang_equal (hb_language_t v1,
189 const unsigned char *p1 = (const unsigned char *) v1;
190 const unsigned char *p2 = (const unsigned char *) v2;
192 while (*p1 && *p1 == canon_map[*p2]) {
197 return *p1 == canon_map[*p2];
202 lang_hash (const void *key)
204 const unsigned char *p = key;
206 while (canon_map[*p])
208 h = (h << 5) - h + canon_map[*p];
217 struct hb_language_item_t {
219 struct hb_language_item_t *next;
222 inline bool operator == (const char *s) const {
223 return lang_equal (lang, s);
226 inline hb_language_item_t & operator = (const char *s) {
227 /* If a custom allocated is used calling strdup() pairs
228 badly with a call to the custom free() in fini() below.
229 Therefore don't call strdup(), implement its behavior.
231 size_t len = strlen(s) + 1;
232 lang = (hb_language_t) malloc(len);
235 memcpy((unsigned char *) lang, s, len);
236 for (unsigned char *p = (unsigned char *) lang; *p; p++)
243 void fini (void) { free ((void *) lang); }
247 /* Thread-safe lock-free language list */
249 static hb_language_item_t *langs;
256 hb_language_item_t *first_lang = (hb_language_item_t *) hb_atomic_ptr_get (&langs);
257 if (!hb_atomic_ptr_cmpexch (&langs, first_lang, nullptr))
261 hb_language_item_t *next = first_lang->next;
269 static hb_language_item_t *
270 lang_find_or_insert (const char *key)
273 hb_language_item_t *first_lang = (hb_language_item_t *) hb_atomic_ptr_get (&langs);
275 for (hb_language_item_t *lang = first_lang; lang; lang = lang->next)
279 /* Not found; allocate one. */
280 hb_language_item_t *lang = (hb_language_item_t *) calloc (1, sizeof (hb_language_item_t));
281 if (unlikely (!lang))
283 lang->next = first_lang;
285 if (unlikely (!lang->lang))
291 if (!hb_atomic_ptr_cmpexch (&langs, first_lang, lang)) {
299 atexit (free_langs); /* First person registers atexit() callback. */
307 * hb_language_from_string:
308 * @str: (array length=len) (element-type uint8_t): a string representing
309 * ISO 639 language code
310 * @len: length of the @str, or -1 if it is %NULL-terminated.
312 * Converts @str representing an ISO 639 language code to the corresponding
315 * Return value: (transfer none):
316 * The #hb_language_t corresponding to the ISO 639 language code.
321 hb_language_from_string (const char *str, int len)
323 if (!str || !len || !*str)
324 return HB_LANGUAGE_INVALID;
326 hb_language_item_t *item = nullptr;
329 /* NUL-terminate it. */
331 len = MIN (len, (int) sizeof (strbuf) - 1);
332 memcpy (strbuf, str, len);
334 item = lang_find_or_insert (strbuf);
337 item = lang_find_or_insert (str);
339 return likely (item) ? item->lang : HB_LANGUAGE_INVALID;
343 * hb_language_to_string:
344 * @language: an #hb_language_t to convert.
346 * See hb_language_from_string().
348 * Return value: (transfer none):
349 * A %NULL-terminated string representing the @language. Must not be freed by
355 hb_language_to_string (hb_language_t language)
357 /* This is actually nullptr-safe! */
362 * hb_language_get_default:
366 * Return value: (transfer none):
371 hb_language_get_default (void)
373 static hb_language_t default_language = HB_LANGUAGE_INVALID;
375 hb_language_t language = (hb_language_t) hb_atomic_ptr_get (&default_language);
376 if (unlikely (language == HB_LANGUAGE_INVALID)) {
377 language = hb_language_from_string (setlocale (LC_CTYPE, nullptr), -1);
378 (void) hb_atomic_ptr_cmpexch (&default_language, HB_LANGUAGE_INVALID, language);
381 return default_language;
388 * hb_script_from_iso15924_tag:
389 * @tag: an #hb_tag_t representing an ISO 15924 tag.
391 * Converts an ISO 15924 script tag to a corresponding #hb_script_t.
394 * An #hb_script_t corresponding to the ISO 15924 tag.
399 hb_script_from_iso15924_tag (hb_tag_t tag)
401 if (unlikely (tag == HB_TAG_NONE))
402 return HB_SCRIPT_INVALID;
404 /* Be lenient, adjust case (one capital letter followed by three small letters) */
405 tag = (tag & 0xDFDFDFDFu) | 0x00202020u;
409 /* These graduated from the 'Q' private-area codes, but
410 * the old code is still aliased by Unicode, and the Qaai
411 * one in use by ICU. */
412 case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED;
413 case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC;
415 /* Script variants from https://unicode.org/iso15924/ */
416 case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC;
417 case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN;
418 case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN;
419 case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC;
420 case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC;
421 case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC;
424 /* If it looks right, just use the tag as a script */
425 if (((uint32_t) tag & 0xE0E0E0E0u) == 0x40606060u)
426 return (hb_script_t) tag;
428 /* Otherwise, return unknown */
429 return HB_SCRIPT_UNKNOWN;
433 * hb_script_from_string:
434 * @str: (array length=len) (element-type uint8_t): a string representing an
436 * @len: length of the @str, or -1 if it is %NULL-terminated.
438 * Converts a string @str representing an ISO 15924 script tag to a
439 * corresponding #hb_script_t. Shorthand for hb_tag_from_string() then
440 * hb_script_from_iso15924_tag().
443 * An #hb_script_t corresponding to the ISO 15924 tag.
448 hb_script_from_string (const char *str, int len)
450 return hb_script_from_iso15924_tag (hb_tag_from_string (str, len));
454 * hb_script_to_iso15924_tag:
455 * @script: an #hb_script_ to convert.
457 * See hb_script_from_iso15924_tag().
460 * An #hb_tag_t representing an ISO 15924 script tag.
465 hb_script_to_iso15924_tag (hb_script_t script)
467 return (hb_tag_t) script;
471 * hb_script_get_horizontal_direction:
481 hb_script_get_horizontal_direction (hb_script_t script)
483 /* https://docs.google.com/spreadsheets/d/1Y90M0Ie3MUJ6UVCRDOypOtijlMDLNNyyLk36T6iMu0o */
484 switch ((hb_tag_t) script)
486 /* Unicode-1.1 additions */
487 case HB_SCRIPT_ARABIC:
488 case HB_SCRIPT_HEBREW:
490 /* Unicode-3.0 additions */
491 case HB_SCRIPT_SYRIAC:
492 case HB_SCRIPT_THAANA:
494 /* Unicode-4.0 additions */
495 case HB_SCRIPT_CYPRIOT:
497 /* Unicode-4.1 additions */
498 case HB_SCRIPT_KHAROSHTHI:
500 /* Unicode-5.0 additions */
501 case HB_SCRIPT_PHOENICIAN:
504 /* Unicode-5.1 additions */
505 case HB_SCRIPT_LYDIAN:
507 /* Unicode-5.2 additions */
508 case HB_SCRIPT_AVESTAN:
509 case HB_SCRIPT_IMPERIAL_ARAMAIC:
510 case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI:
511 case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN:
512 case HB_SCRIPT_OLD_SOUTH_ARABIAN:
513 case HB_SCRIPT_OLD_TURKIC:
514 case HB_SCRIPT_SAMARITAN:
516 /* Unicode-6.0 additions */
517 case HB_SCRIPT_MANDAIC:
519 /* Unicode-6.1 additions */
520 case HB_SCRIPT_MEROITIC_CURSIVE:
521 case HB_SCRIPT_MEROITIC_HIEROGLYPHS:
523 /* Unicode-7.0 additions */
524 case HB_SCRIPT_MANICHAEAN:
525 case HB_SCRIPT_MENDE_KIKAKUI:
526 case HB_SCRIPT_NABATAEAN:
527 case HB_SCRIPT_OLD_NORTH_ARABIAN:
528 case HB_SCRIPT_PALMYRENE:
529 case HB_SCRIPT_PSALTER_PAHLAVI:
531 /* Unicode-8.0 additions */
532 case HB_SCRIPT_HATRAN:
533 case HB_SCRIPT_OLD_HUNGARIAN:
535 /* Unicode-9.0 additions */
536 case HB_SCRIPT_ADLAM:
538 /* Unicode-11.0 additions */
539 case HB_SCRIPT_HANIFI_ROHINGYA:
540 case HB_SCRIPT_OLD_SOGDIAN:
541 case HB_SCRIPT_SOGDIAN:
543 return HB_DIRECTION_RTL;
546 /* https://github.com/harfbuzz/harfbuzz/issues/1000 */
547 case HB_SCRIPT_OLD_ITALIC:
549 return HB_DIRECTION_INVALID;
552 return HB_DIRECTION_LTR;
556 /* hb_user_data_array_t */
559 hb_user_data_array_t::set (hb_user_data_key_t *key,
561 hb_destroy_func_t destroy,
568 if (!data && !destroy) {
569 items.remove (key, lock);
573 hb_user_data_item_t item = {key, data, destroy};
574 bool ret = !!items.replace_or_insert (item, lock, (bool) replace);
580 hb_user_data_array_t::get (hb_user_data_key_t *key)
582 hb_user_data_item_t item = {nullptr, nullptr, nullptr};
584 return items.find (key, &item, lock) ? item.data : nullptr;
592 * @major: (out): Library major version component.
593 * @minor: (out): Library minor version component.
594 * @micro: (out): Library micro version component.
596 * Returns library version as three integer components.
601 hb_version (unsigned int *major,
605 *major = HB_VERSION_MAJOR;
606 *minor = HB_VERSION_MINOR;
607 *micro = HB_VERSION_MICRO;
613 * Returns library version as a string with three components.
615 * Return value: library version string.
620 hb_version_string (void)
622 return HB_VERSION_STRING;
626 * hb_version_atleast:
638 hb_version_atleast (unsigned int major,
642 return HB_VERSION_ATLEAST (major, minor, micro);
647 /* hb_feature_t and hb_variation_t */
650 parse_space (const char **pp, const char *end)
652 while (*pp < end && ISSPACE (**pp))
658 parse_char (const char **pp, const char *end, char c)
660 parse_space (pp, end);
662 if (*pp == end || **pp != c)
670 parse_uint (const char **pp, const char *end, unsigned int *pv)
673 unsigned int len = MIN (ARRAY_LENGTH (buf) - 1, (unsigned int) (end - *pp));
674 strncpy (buf, *pp, len);
681 /* Intentionally use strtol instead of strtoul, such that
682 * -1 turns into "big number"... */
684 v = strtol (p, &pend, 0);
685 if (errno || p == pend)
694 parse_uint32 (const char **pp, const char *end, uint32_t *pv)
697 unsigned int len = MIN (ARRAY_LENGTH (buf) - 1, (unsigned int) (end - *pp));
698 strncpy (buf, *pp, len);
705 /* Intentionally use strtol instead of strtoul, such that
706 * -1 turns into "big number"... */
708 v = strtol (p, &pend, 0);
709 if (errno || p == pend)
717 #if defined (HAVE_NEWLOCALE) && defined (HAVE_STRTOD_L)
718 #define USE_XLOCALE 1
719 #define HB_LOCALE_T locale_t
720 #define HB_CREATE_LOCALE(locName) newlocale (LC_ALL_MASK, locName, nullptr)
721 #define HB_FREE_LOCALE(loc) freelocale (loc)
722 #elif defined(_MSC_VER)
723 #define USE_XLOCALE 1
724 #define HB_LOCALE_T _locale_t
725 #define HB_CREATE_LOCALE(locName) _create_locale (LC_ALL, locName)
726 #define HB_FREE_LOCALE(loc) _free_locale (loc)
727 #define strtod_l(a, b, c) _strtod_l ((a), (b), (c))
732 static HB_LOCALE_T C_locale;
739 HB_LOCALE_T locale = (HB_LOCALE_T) hb_atomic_ptr_get (&C_locale);
741 if (!hb_atomic_ptr_cmpexch (&C_locale, locale, nullptr))
745 HB_FREE_LOCALE (locale);
753 HB_LOCALE_T C = (HB_LOCALE_T) hb_atomic_ptr_get (&C_locale);
757 C = HB_CREATE_LOCALE ("C");
759 if (!hb_atomic_ptr_cmpexch (&C_locale, nullptr, C))
761 HB_FREE_LOCALE (C_locale);
766 atexit (free_C_locale); /* First person registers atexit() callback. */
775 parse_float (const char **pp, const char *end, float *pv)
778 unsigned int len = MIN (ARRAY_LENGTH (buf) - 1, (unsigned int) (end - *pp));
779 strncpy (buf, *pp, len);
788 v = strtod_l (p, &pend, get_C_locale ());
790 v = strtod (p, &pend);
792 if (errno || p == pend)
801 parse_bool (const char **pp, const char *end, uint32_t *pv)
803 parse_space (pp, end);
806 while (*pp < end && ISALPHA(**pp))
809 /* CSS allows on/off as aliases 1/0. */
810 if (*pp - p == 2 && 0 == strncmp (p, "on", 2))
812 else if (*pp - p == 3 && 0 == strncmp (p, "off", 3))
823 parse_feature_value_prefix (const char **pp, const char *end, hb_feature_t *feature)
825 if (parse_char (pp, end, '-'))
828 parse_char (pp, end, '+');
836 parse_tag (const char **pp, const char *end, hb_tag_t *tag)
838 parse_space (pp, end);
842 if (*pp < end && (**pp == '\'' || **pp == '"'))
849 while (*pp < end && ISALNUM(**pp))
852 if (p == *pp || *pp - p > 4)
855 *tag = hb_tag_from_string (p, *pp - p);
859 /* CSS expects exactly four bytes. And we only allow quotations for
860 * CSS compatibility. So, enforce the length. */
863 if (*pp == end || **pp != quote)
872 parse_feature_indices (const char **pp, const char *end, hb_feature_t *feature)
874 parse_space (pp, end);
879 feature->end = (unsigned int) -1;
881 if (!parse_char (pp, end, '['))
884 has_start = parse_uint (pp, end, &feature->start);
886 if (parse_char (pp, end, ':')) {
887 parse_uint (pp, end, &feature->end);
890 feature->end = feature->start + 1;
893 return parse_char (pp, end, ']');
897 parse_feature_value_postfix (const char **pp, const char *end, hb_feature_t *feature)
899 bool had_equal = parse_char (pp, end, '=');
900 bool had_value = parse_uint32 (pp, end, &feature->value) ||
901 parse_bool (pp, end, &feature->value);
902 /* CSS doesn't use equal-sign between tag and value.
903 * If there was an equal-sign, then there *must* be a value.
904 * A value without an equal-sign is ok, but not required. */
905 return !had_equal || had_value;
909 parse_one_feature (const char **pp, const char *end, hb_feature_t *feature)
911 return parse_feature_value_prefix (pp, end, feature) &&
912 parse_tag (pp, end, &feature->tag) &&
913 parse_feature_indices (pp, end, feature) &&
914 parse_feature_value_postfix (pp, end, feature) &&
915 parse_space (pp, end) &&
920 * hb_feature_from_string:
921 * @str: (array length=len) (element-type uint8_t): a string to parse
922 * @len: length of @str, or -1 if string is %NULL terminated
923 * @feature: (out): the #hb_feature_t to initialize with the parsed values
925 * Parses a string into a #hb_feature_t.
927 * TODO: document the syntax here.
930 * %true if @str is successfully parsed, %false otherwise.
935 hb_feature_from_string (const char *str, int len,
936 hb_feature_t *feature)
943 if (likely (parse_one_feature (&str, str + len, &feat)))
951 memset (feature, 0, sizeof (*feature));
956 * hb_feature_to_string:
957 * @feature: an #hb_feature_t to convert
958 * @buf: (array length=size) (out): output string
959 * @size: the allocated size of @buf
961 * Converts a #hb_feature_t into a %NULL-terminated string in the format
962 * understood by hb_feature_from_string(). The client in responsible for
963 * allocating big enough size for @buf, 128 bytes is more than enough.
968 hb_feature_to_string (hb_feature_t *feature,
969 char *buf, unsigned int size)
971 if (unlikely (!size)) return;
974 unsigned int len = 0;
975 if (feature->value == 0)
977 hb_tag_to_string (feature->tag, s + len);
979 while (len && s[len - 1] == ' ')
981 if (feature->start != 0 || feature->end != (unsigned int) -1)
985 len += MAX (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->start));
986 if (feature->end != feature->start + 1) {
988 if (feature->end != (unsigned int) -1)
989 len += MAX (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->end));
993 if (feature->value > 1)
996 len += MAX (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->value));
998 assert (len < ARRAY_LENGTH (s));
999 len = MIN (len, size - 1);
1000 memcpy (buf, s, len);
1004 /* hb_variation_t */
1007 parse_variation_value (const char **pp, const char *end, hb_variation_t *variation)
1009 parse_char (pp, end, '='); /* Optional. */
1010 return parse_float (pp, end, &variation->value);
1014 parse_one_variation (const char **pp, const char *end, hb_variation_t *variation)
1016 return parse_tag (pp, end, &variation->tag) &&
1017 parse_variation_value (pp, end, variation) &&
1018 parse_space (pp, end) &&
1023 * hb_variation_from_string:
1028 hb_variation_from_string (const char *str, int len,
1029 hb_variation_t *variation)
1036 if (likely (parse_one_variation (&str, str + len, &var)))
1044 memset (variation, 0, sizeof (*variation));
1049 * hb_variation_to_string:
1054 hb_variation_to_string (hb_variation_t *variation,
1055 char *buf, unsigned int size)
1057 if (unlikely (!size)) return;
1060 unsigned int len = 0;
1061 hb_tag_to_string (variation->tag, s + len);
1063 while (len && s[len - 1] == ' ')
1066 len += MAX (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%g", variation->value));
1068 assert (len < ARRAY_LENGTH (s));
1069 len = MIN (len, size - 1);
1070 memcpy (buf, s, len);