2 * Copyright © 2009,2010 Red Hat, Inc.
3 * Copyright © 2011,2012 Google, Inc.
5 * This is part of HarfBuzz, a text shaping library.
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 * Red Hat Author(s): Behdad Esfahbod
26 * Google Author(s): Behdad Esfahbod
29 #include "hb-private.hh"
31 #include "hb-mutex-private.hh"
32 #include "hb-object-private.hh"
39 hb_options_union_t _hb_options;
42 _hb_options_init (void)
46 u.opts.initialized = 1;
48 char *c = getenv ("HB_OPTIONS");
49 u.opts.uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible");
51 /* This is idempotent and threadsafe. */
60 * @str: (array length=len) (element-type uint8_t):
70 hb_tag_from_string (const char *str, int len)
75 if (!str || !len || !*str)
78 if (len < 0 || len > 4)
80 for (i = 0; i < (unsigned) len && str[i]; i++)
85 return HB_TAG_CHAR4 (tag);
91 * @buf: (out caller-allocates) (array fixed-size=4) (element-type uint8_t):
98 hb_tag_to_string (hb_tag_t tag, char *buf)
100 buf[0] = (char) (uint8_t) (tag >> 24);
101 buf[1] = (char) (uint8_t) (tag >> 16);
102 buf[2] = (char) (uint8_t) (tag >> 8);
103 buf[3] = (char) (uint8_t) (tag >> 0);
109 const char direction_strings[][4] = {
117 * hb_direction_from_string:
118 * @str: (array length=len) (element-type uint8_t):
128 hb_direction_from_string (const char *str, int len)
130 if (unlikely (!str || !len || !*str))
131 return HB_DIRECTION_INVALID;
133 /* Lets match loosely: just match the first letter, such that
134 * all of "ltr", "left-to-right", etc work!
136 char c = TOLOWER (str[0]);
137 for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++)
138 if (c == direction_strings[i][0])
139 return (hb_direction_t) (HB_DIRECTION_LTR + i);
141 return HB_DIRECTION_INVALID;
145 * hb_direction_to_string:
150 * Return value: (transfer none):
155 hb_direction_to_string (hb_direction_t direction)
157 if (likely ((unsigned int) (direction - HB_DIRECTION_LTR)
158 < ARRAY_LENGTH (direction_strings)))
159 return direction_strings[direction - HB_DIRECTION_LTR];
167 struct hb_language_impl_t {
171 static const char canon_map[256] = {
172 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', 0, 0,
175 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0,
176 '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
177 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, '-',
178 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
179 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0
183 lang_equal (hb_language_t v1,
186 const unsigned char *p1 = (const unsigned char *) v1;
187 const unsigned char *p2 = (const unsigned char *) v2;
189 while (*p1 && *p1 == canon_map[*p2]) {
194 return *p1 == canon_map[*p2];
199 lang_hash (const void *key)
201 const unsigned char *p = key;
203 while (canon_map[*p])
205 h = (h << 5) - h + canon_map[*p];
214 struct hb_language_item_t {
216 struct hb_language_item_t *next;
219 inline bool operator == (const char *s) const {
220 return lang_equal (lang, s);
223 inline hb_language_item_t & operator = (const char *s) {
224 lang = (hb_language_t) strdup (s);
225 for (unsigned char *p = (unsigned char *) lang; *p; p++)
231 void finish (void) { free ((void *) lang); }
235 /* Thread-safe lock-free language list */
237 static hb_language_item_t *langs;
241 void free_langs (void)
244 hb_language_item_t *next = langs->next;
252 static hb_language_item_t *
253 lang_find_or_insert (const char *key)
256 hb_language_item_t *first_lang = (hb_language_item_t *) hb_atomic_ptr_get (&langs);
258 for (hb_language_item_t *lang = first_lang; lang; lang = lang->next)
262 /* Not found; allocate one. */
263 hb_language_item_t *lang = (hb_language_item_t *) calloc (1, sizeof (hb_language_item_t));
264 if (unlikely (!lang))
266 lang->next = first_lang;
269 if (!hb_atomic_ptr_cmpexch (&langs, first_lang, lang)) {
277 atexit (free_langs); /* First person registers atexit() callback. */
285 * hb_language_from_string:
286 * @str: (array length=len) (element-type uint8_t): a string representing
287 * ISO 639 language code
288 * @len: length of the @str, or -1 if it is %NULL-terminated.
290 * Converts @str representing an ISO 639 language code to the corresponding
293 * Return value: (transfer none):
294 * The #hb_language_t corresponding to the ISO 639 language code.
299 hb_language_from_string (const char *str, int len)
301 if (!str || !len || !*str)
302 return HB_LANGUAGE_INVALID;
304 hb_language_item_t *item = NULL;
307 /* NUL-terminate it. */
309 len = MIN (len, (int) sizeof (strbuf) - 1);
310 memcpy (strbuf, str, len);
312 item = lang_find_or_insert (strbuf);
315 item = lang_find_or_insert (str);
317 return likely (item) ? item->lang : HB_LANGUAGE_INVALID;
321 * hb_language_to_string:
322 * @language: an #hb_language_t to convert.
324 * See hb_language_from_string().
326 * Return value: (transfer none):
327 * A %NULL-terminated string representing the @language. Must not be freed by
333 hb_language_to_string (hb_language_t language)
335 /* This is actually NULL-safe! */
340 * hb_language_get_default:
344 * Return value: (transfer none):
349 hb_language_get_default (void)
351 static hb_language_t default_language = HB_LANGUAGE_INVALID;
353 hb_language_t language = (hb_language_t) hb_atomic_ptr_get (&default_language);
354 if (unlikely (language == HB_LANGUAGE_INVALID)) {
355 language = hb_language_from_string (setlocale (LC_CTYPE, NULL), -1);
356 (void) hb_atomic_ptr_cmpexch (&default_language, HB_LANGUAGE_INVALID, language);
359 return default_language;
366 * hb_script_from_iso15924_tag:
367 * @tag: an #hb_tag_t representing an ISO 15924 tag.
369 * Converts an ISO 15924 script tag to a corresponding #hb_script_t.
372 * An #hb_script_t corresponding to the ISO 15924 tag.
377 hb_script_from_iso15924_tag (hb_tag_t tag)
379 if (unlikely (tag == HB_TAG_NONE))
380 return HB_SCRIPT_INVALID;
382 /* Be lenient, adjust case (one capital letter followed by three small letters) */
383 tag = (tag & 0xDFDFDFDFu) | 0x00202020u;
387 /* These graduated from the 'Q' private-area codes, but
388 * the old code is still aliased by Unicode, and the Qaai
389 * one in use by ICU. */
390 case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED;
391 case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC;
393 /* Script variants from http://unicode.org/iso15924/ */
394 case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC;
395 case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN;
396 case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN;
397 case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC;
398 case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC;
399 case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC;
402 /* If it looks right, just use the tag as a script */
403 if (((uint32_t) tag & 0xE0E0E0E0u) == 0x40606060u)
404 return (hb_script_t) tag;
406 /* Otherwise, return unknown */
407 return HB_SCRIPT_UNKNOWN;
411 * hb_script_from_string:
412 * @str: (array length=len) (element-type uint8_t): a string representing an
414 * @len: length of the @str, or -1 if it is %NULL-terminated.
416 * Converts a string @str representing an ISO 15924 script tag to a
417 * corresponding #hb_script_t. Shorthand for hb_tag_from_string() then
418 * hb_script_from_iso15924_tag().
421 * An #hb_script_t corresponding to the ISO 15924 tag.
426 hb_script_from_string (const char *str, int len)
428 return hb_script_from_iso15924_tag (hb_tag_from_string (str, len));
432 * hb_script_to_iso15924_tag:
433 * @script: an #hb_script_ to convert.
435 * See hb_script_from_iso15924_tag().
438 * An #hb_tag_t representing an ISO 15924 script tag.
443 hb_script_to_iso15924_tag (hb_script_t script)
445 return (hb_tag_t) script;
449 * hb_script_get_horizontal_direction:
459 hb_script_get_horizontal_direction (hb_script_t script)
461 /* http://goo.gl/x9ilM */
462 switch ((hb_tag_t) script)
464 /* Unicode-1.1 additions */
465 case HB_SCRIPT_ARABIC:
466 case HB_SCRIPT_HEBREW:
468 /* Unicode-3.0 additions */
469 case HB_SCRIPT_SYRIAC:
470 case HB_SCRIPT_THAANA:
472 /* Unicode-4.0 additions */
473 case HB_SCRIPT_CYPRIOT:
475 /* Unicode-4.1 additions */
476 case HB_SCRIPT_KHAROSHTHI:
478 /* Unicode-5.0 additions */
479 case HB_SCRIPT_PHOENICIAN:
482 /* Unicode-5.1 additions */
483 case HB_SCRIPT_LYDIAN:
485 /* Unicode-5.2 additions */
486 case HB_SCRIPT_AVESTAN:
487 case HB_SCRIPT_IMPERIAL_ARAMAIC:
488 case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI:
489 case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN:
490 case HB_SCRIPT_OLD_SOUTH_ARABIAN:
491 case HB_SCRIPT_OLD_TURKIC:
492 case HB_SCRIPT_SAMARITAN:
494 /* Unicode-6.0 additions */
495 case HB_SCRIPT_MANDAIC:
497 /* Unicode-6.1 additions */
498 case HB_SCRIPT_MEROITIC_CURSIVE:
499 case HB_SCRIPT_MEROITIC_HIEROGLYPHS:
501 /* Unicode-7.0 additions */
502 case HB_SCRIPT_MANICHAEAN:
503 case HB_SCRIPT_MENDE_KIKAKUI:
504 case HB_SCRIPT_NABATAEAN:
505 case HB_SCRIPT_OLD_NORTH_ARABIAN:
506 case HB_SCRIPT_PALMYRENE:
507 case HB_SCRIPT_PSALTER_PAHLAVI:
509 /* Unicode-8.0 additions */
510 case HB_SCRIPT_OLD_HUNGARIAN:
512 /* Unicode-9.0 additions */
513 case HB_SCRIPT_ADLAM:
515 return HB_DIRECTION_RTL;
518 return HB_DIRECTION_LTR;
522 /* hb_user_data_array_t */
525 hb_user_data_array_t::set (hb_user_data_key_t *key,
527 hb_destroy_func_t destroy,
534 if (!data && !destroy) {
535 items.remove (key, lock);
539 hb_user_data_item_t item = {key, data, destroy};
540 bool ret = !!items.replace_or_insert (item, lock, (bool) replace);
546 hb_user_data_array_t::get (hb_user_data_key_t *key)
548 hb_user_data_item_t item = {NULL, NULL, NULL};
550 return items.find (key, &item, lock) ? item.data : NULL;
558 * @major: (out): Library major version component.
559 * @minor: (out): Library minor version component.
560 * @micro: (out): Library micro version component.
562 * Returns library version as three integer components.
567 hb_version (unsigned int *major,
571 *major = HB_VERSION_MAJOR;
572 *minor = HB_VERSION_MINOR;
573 *micro = HB_VERSION_MICRO;
579 * Returns library version as a string with three components.
581 * Return value: library version string.
586 hb_version_string (void)
588 return HB_VERSION_STRING;
592 * hb_version_atleast:
604 hb_version_atleast (unsigned int major,
608 return HB_VERSION_ATLEAST (major, minor, micro);
613 /* hb_feature_t and hb_variation_t */
616 parse_space (const char **pp, const char *end)
618 while (*pp < end && ISSPACE (**pp))
624 parse_char (const char **pp, const char *end, char c)
626 parse_space (pp, end);
628 if (*pp == end || **pp != c)
636 parse_uint (const char **pp, const char *end, unsigned int *pv)
639 unsigned int len = MIN (ARRAY_LENGTH (buf) - 1, (unsigned int) (end - *pp));
640 strncpy (buf, *pp, len);
647 /* Intentionally use strtol instead of strtoul, such that
648 * -1 turns into "big number"... */
650 v = strtol (p, &pend, 0);
651 if (errno || p == pend)
660 parse_float (const char **pp, const char *end, float *pv)
663 unsigned int len = MIN (ARRAY_LENGTH (buf) - 1, (unsigned int) (end - *pp));
664 strncpy (buf, *pp, len);
672 v = strtod (p, &pend);
673 if (errno || p == pend)
682 parse_bool (const char **pp, const char *end, unsigned int *pv)
684 parse_space (pp, end);
687 while (*pp < end && ISALPHA(**pp))
690 /* CSS allows on/off as aliases 1/0. */
691 if (*pp - p == 2 || 0 == strncmp (p, "on", 2))
693 else if (*pp - p == 3 || 0 == strncmp (p, "off", 2))
704 parse_feature_value_prefix (const char **pp, const char *end, hb_feature_t *feature)
706 if (parse_char (pp, end, '-'))
709 parse_char (pp, end, '+');
717 parse_tag (const char **pp, const char *end, hb_tag_t *tag)
719 parse_space (pp, end);
723 if (*pp < end && (**pp == '\'' || **pp == '"'))
730 while (*pp < end && ISALNUM(**pp))
733 if (p == *pp || *pp - p > 4)
736 *tag = hb_tag_from_string (p, *pp - p);
740 /* CSS expects exactly four bytes. And we only allow quotations for
741 * CSS compatibility. So, enforce the length. */
744 if (*pp == end || **pp != quote)
753 parse_feature_indices (const char **pp, const char *end, hb_feature_t *feature)
755 parse_space (pp, end);
760 feature->end = (unsigned int) -1;
762 if (!parse_char (pp, end, '['))
765 has_start = parse_uint (pp, end, &feature->start);
767 if (parse_char (pp, end, ':')) {
768 parse_uint (pp, end, &feature->end);
771 feature->end = feature->start + 1;
774 return parse_char (pp, end, ']');
778 parse_feature_value_postfix (const char **pp, const char *end, hb_feature_t *feature)
780 bool had_equal = parse_char (pp, end, '=');
781 bool had_value = parse_uint (pp, end, &feature->value) ||
782 parse_bool (pp, end, &feature->value);
783 /* CSS doesn't use equal-sign between tag and value.
784 * If there was an equal-sign, then there *must* be a value.
785 * A value without an eqaul-sign is ok, but not required. */
786 return !had_equal || had_value;
790 parse_one_feature (const char **pp, const char *end, hb_feature_t *feature)
792 return parse_feature_value_prefix (pp, end, feature) &&
793 parse_tag (pp, end, &feature->tag) &&
794 parse_feature_indices (pp, end, feature) &&
795 parse_feature_value_postfix (pp, end, feature) &&
796 parse_space (pp, end) &&
801 * hb_feature_from_string:
802 * @str: (array length=len) (element-type uint8_t): a string to parse
803 * @len: length of @str, or -1 if string is %NULL terminated
804 * @feature: (out): the #hb_feature_t to initialize with the parsed values
806 * Parses a string into a #hb_feature_t.
808 * TODO: document the syntax here.
811 * %true if @str is successfully parsed, %false otherwise.
816 hb_feature_from_string (const char *str, int len,
817 hb_feature_t *feature)
824 if (likely (parse_one_feature (&str, str + len, &feat)))
832 memset (feature, 0, sizeof (*feature));
837 * hb_feature_to_string:
838 * @feature: an #hb_feature_t to convert
839 * @buf: (array length=size) (out): output string
840 * @size: the allocated size of @buf
842 * Converts a #hb_feature_t into a %NULL-terminated string in the format
843 * understood by hb_feature_from_string(). The client in responsible for
844 * allocating big enough size for @buf, 128 bytes is more than enough.
849 hb_feature_to_string (hb_feature_t *feature,
850 char *buf, unsigned int size)
852 if (unlikely (!size)) return;
855 unsigned int len = 0;
856 if (feature->value == 0)
858 hb_tag_to_string (feature->tag, s + len);
860 while (len && s[len - 1] == ' ')
862 if (feature->start != 0 || feature->end != (unsigned int) -1)
866 len += MAX (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->start));
867 if (feature->end != feature->start + 1) {
869 if (feature->end != (unsigned int) -1)
870 len += MAX (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->end));
874 if (feature->value > 1)
877 len += MAX (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->value));
879 assert (len < ARRAY_LENGTH (s));
880 len = MIN (len, size - 1);
881 memcpy (buf, s, len);
888 parse_variation_value (const char **pp, const char *end, hb_variation_t *variation)
890 parse_char (pp, end, '='); /* Optional. */
891 return parse_float (pp, end, &variation->value);
895 parse_one_variation (const char **pp, const char *end, hb_variation_t *variation)
897 return parse_tag (pp, end, &variation->tag) &&
898 parse_variation_value (pp, end, variation) &&
899 parse_space (pp, end) &&
904 * hb_variation_from_string:
909 hb_variation_from_string (const char *str, int len,
910 hb_variation_t *variation)
917 if (likely (parse_one_variation (&str, str + len, &var)))
925 memset (variation, 0, sizeof (*variation));
930 * hb_variation_to_string:
935 hb_variation_to_string (hb_variation_t *variation,
936 char *buf, unsigned int size)
938 if (unlikely (!size)) return;
941 unsigned int len = 0;
942 hb_tag_to_string (variation->tag, s + len);
944 while (len && s[len - 1] == ' ')
947 len += MAX (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%g", variation->value));
949 assert (len < ARRAY_LENGTH (s));
950 len = MIN (len, size - 1);
951 memcpy (buf, s, len);