2 * Copyright © 2009,2010 Red Hat, Inc.
3 * Copyright © 2011 Google, Inc.
5 * This is part of HarfBuzz, a text shaping library.
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 * Red Hat Author(s): Behdad Esfahbod
26 * Google Author(s): Behdad Esfahbod
29 #include "hb-private.hh"
37 hb_tag_from_string (const char *s)
45 for (i = 0; i < 4 && s[i]; i++)
50 return HB_TAG_CHAR4 (tag);
56 const char direction_strings[][4] = {
64 hb_direction_from_string (const char *str)
66 if (unlikely (!str || !*str))
67 return HB_DIRECTION_INVALID;
69 /* Lets match loosely: just match the first letter, such that
70 * all of "ltr", "left-to-right", etc work!
72 char c = TOLOWER (str[0]);
73 for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++)
74 if (c == direction_strings[i][0])
75 return (hb_direction_t) i;
77 return HB_DIRECTION_INVALID;
81 hb_direction_to_string (hb_direction_t direction)
83 if (likely ((unsigned int) direction < ARRAY_LENGTH (direction_strings)))
84 return direction_strings[direction];
92 struct _hb_language_t {
96 static const char canon_map[256] = {
97 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
98 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', 0, 0,
100 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0,
101 '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
102 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, '-',
103 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
104 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0
108 lang_equal (const void *v1,
111 const unsigned char *p1 = (const unsigned char *) v1;
112 const unsigned char *p2 = (const unsigned char *) v2;
114 while (canon_map[*p1] && canon_map[*p1] == canon_map[*p2])
119 return (canon_map[*p1] == canon_map[*p2]);
124 lang_hash (const void *key)
126 const unsigned char *p = key;
128 while (canon_map[*p])
130 h = (h << 5) - h + canon_map[*p];
140 hb_language_from_string (const char *str)
142 static unsigned int num_langs;
143 static unsigned int num_alloced;
144 static hb_language_t *langs;
148 /* TODO Use a hash table or something */
153 for (i = 0; i < num_langs; i++)
154 if (lang_equal (str, langs[i]->s))
157 if (unlikely (num_langs == num_alloced)) {
158 unsigned int new_alloced = 2 * (8 + num_alloced);
159 hb_language_t *new_langs = (hb_language_t *) realloc (langs, new_alloced * sizeof (langs[0]));
162 num_alloced = new_alloced;
166 langs[i] = (hb_language_t) strdup (str);
167 for (p = (unsigned char *) langs[i]->s; *p; p++)
176 hb_language_to_string (hb_language_t language)
185 hb_script_from_iso15924_tag (hb_tag_t tag)
187 if (unlikely (tag == HB_TAG_NONE))
188 return HB_SCRIPT_INVALID;
190 /* Be lenient, adjust case (one capital letter followed by three small letters) */
191 tag = (tag & 0xDFDFDFDF) | 0x00202020;
195 /* These graduated from the 'Q' private-area codes, but
196 * the old code is still aliased by Unicode, and the Qaai
197 * one in use by ICU. */
198 case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED;
199 case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC;
201 /* Script variants from http://unicode.org/iso15924/ */
202 case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC;
203 case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN;
204 case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN;
205 case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC;
206 case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC;
207 case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC;
210 /* If it looks right, just use the tag as a script */
211 if (((uint32_t) tag & 0xE0E0E0E0) == 0x40606060)
212 return (hb_script_t) tag;
214 /* Otherwise, return unknown */
215 return HB_SCRIPT_UNKNOWN;
219 hb_script_from_string (const char *s)
221 return hb_script_from_iso15924_tag (hb_tag_from_string (s));
225 hb_script_to_iso15924_tag (hb_script_t script)
227 return (hb_tag_t) script;
231 hb_script_get_horizontal_direction (hb_script_t script)
233 switch ((hb_tag_t) script)
235 case HB_SCRIPT_ARABIC:
236 case HB_SCRIPT_HEBREW:
237 case HB_SCRIPT_SYRIAC:
238 case HB_SCRIPT_THAANA:
240 /* Unicode-4.0 additions */
241 case HB_SCRIPT_CYPRIOT:
243 /* Unicode-5.0 additions */
244 case HB_SCRIPT_PHOENICIAN:
247 /* Unicode-5.2 additions */
248 case HB_SCRIPT_AVESTAN:
249 case HB_SCRIPT_IMPERIAL_ARAMAIC:
250 case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI:
251 case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN:
252 case HB_SCRIPT_OLD_SOUTH_ARABIAN:
253 case HB_SCRIPT_OLD_TURKIC:
254 case HB_SCRIPT_SAMARITAN:
256 /* Unicode-6.0 additions */
257 case HB_SCRIPT_MANDAIC:
259 return HB_DIRECTION_RTL;
262 return HB_DIRECTION_LTR;