2 * Copyright © 2009 Red Hat, Inc.
3 * Copyright © 2011 Codethink Limited
4 * Copyright © 2010,2011,2012 Google, Inc.
6 * This is part of HarfBuzz, a text shaping library.
8 * Permission is hereby granted, without written agreement and without
9 * license or royalty fees, to use, copy, modify, and distribute this
10 * software and its documentation for any purpose, provided that the
11 * above copyright notice and the following two paragraphs appear in
12 * all copies of this software.
14 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
15 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
16 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
17 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
20 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
21 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
22 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
23 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
24 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
26 * Red Hat Author(s): Behdad Esfahbod
27 * Codethink Author(s): Ryan Lortie
28 * Google Author(s): Behdad Esfahbod
33 #include "hb-unicode.hh"
39 * @short_description: Unicode character property access
42 * Unicode functions are used to access Unicode character properties.
43 * With these functions, client programs can query various properties from
44 * the Unicode Character Database for any code point, such as General
45 * Category (gc), Script (sc), Canonical Combining Class (ccc), etc.
47 * Client programs can optionally pass in their own Unicode functions
48 * that implement the same queries. The set of functions available is
49 * defined by the virtual methods in #hb_unicode_funcs_t.
51 * HarfBuzz provides built-in default functions for each method in
52 * #hb_unicode_funcs_t.
60 static hb_unicode_combining_class_t
61 hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
62 hb_codepoint_t unicode HB_UNUSED,
63 void *user_data HB_UNUSED)
65 return HB_UNICODE_COMBINING_CLASS_NOT_REORDERED;
68 #ifndef HB_DISABLE_DEPRECATED
70 hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
71 hb_codepoint_t unicode HB_UNUSED,
72 void *user_data HB_UNUSED)
78 static hb_unicode_general_category_t
79 hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
80 hb_codepoint_t unicode HB_UNUSED,
81 void *user_data HB_UNUSED)
83 return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
87 hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
88 hb_codepoint_t unicode,
89 void *user_data HB_UNUSED)
95 hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
96 hb_codepoint_t unicode HB_UNUSED,
97 void *user_data HB_UNUSED)
99 return HB_SCRIPT_UNKNOWN;
103 hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
104 hb_codepoint_t a HB_UNUSED,
105 hb_codepoint_t b HB_UNUSED,
106 hb_codepoint_t *ab HB_UNUSED,
107 void *user_data HB_UNUSED)
113 hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
114 hb_codepoint_t ab HB_UNUSED,
115 hb_codepoint_t *a HB_UNUSED,
116 hb_codepoint_t *b HB_UNUSED,
117 void *user_data HB_UNUSED)
123 #ifndef HB_DISABLE_DEPRECATED
125 hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
126 hb_codepoint_t u HB_UNUSED,
127 hb_codepoint_t *decomposed HB_UNUSED,
128 void *user_data HB_UNUSED)
134 #if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB)
137 #if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
142 * hb_unicode_funcs_get_default:
144 * Fetches a pointer to the default Unicode-functions structure that is used
145 * when no functions are explicitly set on #hb_buffer_t.
147 * Return value: (transfer none): a pointer to the #hb_unicode_funcs_t Unicode-functions structure
152 hb_unicode_funcs_get_default ()
154 #if !defined(HB_NO_UNICODE_FUNCS) && !defined(HB_NO_UCD)
155 return hb_ucd_get_unicode_funcs ();
156 #elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB)
157 return hb_glib_get_unicode_funcs ();
158 #elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
159 return hb_icu_get_unicode_funcs ();
161 #define HB_UNICODE_FUNCS_NIL 1
162 return hb_unicode_funcs_get_empty ();
166 #if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL)
167 #error "Could not find any Unicode functions implementation, you have to provide your own"
168 #error "Consider building hb-ucd.cc. If you absolutely want to build without any, check the code."
172 * hb_unicode_funcs_create: (Xconstructor)
173 * @parent: (nullable): Parent Unicode-functions structure
175 * Creates a new #hb_unicode_funcs_t structure of Unicode functions.
177 * Return value: (transfer full): The Unicode-functions structure
182 hb_unicode_funcs_create (hb_unicode_funcs_t *parent)
184 hb_unicode_funcs_t *ufuncs;
186 if (!(ufuncs = hb_object_create<hb_unicode_funcs_t> ()))
187 return hb_unicode_funcs_get_empty ();
190 parent = hb_unicode_funcs_get_empty ();
192 hb_unicode_funcs_make_immutable (parent);
193 ufuncs->parent = hb_unicode_funcs_reference (parent);
195 ufuncs->func = parent->func;
197 /* We can safely copy user_data from parent since we hold a reference
198 * onto it and it's immutable. We should not copy the destroy notifiers
200 ufuncs->user_data = parent->user_data;
206 DEFINE_NULL_INSTANCE (hb_unicode_funcs_t) =
208 HB_OBJECT_HEADER_STATIC,
210 nullptr, /* parent */
212 #define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil,
213 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
214 #undef HB_UNICODE_FUNC_IMPLEMENT
219 * hb_unicode_funcs_get_empty:
221 * Fetches the singleton empty Unicode-functions structure.
223 * Return value: (transfer full): The empty Unicode-functions structure
228 hb_unicode_funcs_get_empty ()
230 return const_cast<hb_unicode_funcs_t *> (&Null (hb_unicode_funcs_t));
234 * hb_unicode_funcs_reference: (skip)
235 * @ufuncs: The Unicode-functions structure
237 * Increases the reference count on a Unicode-functions structure.
239 * Return value: (transfer full): The Unicode-functions structure
244 hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs)
246 return hb_object_reference (ufuncs);
250 * hb_unicode_funcs_destroy: (skip)
251 * @ufuncs: The Unicode-functions structure
253 * Decreases the reference count on a Unicode-functions structure. When
254 * the reference count reaches zero, the Unicode-functions structure is
255 * destroyed, freeing all memory.
260 hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs)
262 if (!hb_object_destroy (ufuncs)) return;
264 #define HB_UNICODE_FUNC_IMPLEMENT(name) \
265 if (ufuncs->destroy.name) ufuncs->destroy.name (ufuncs->user_data.name);
266 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
267 #undef HB_UNICODE_FUNC_IMPLEMENT
269 hb_unicode_funcs_destroy (ufuncs->parent);
275 * hb_unicode_funcs_set_user_data: (skip)
276 * @ufuncs: The Unicode-functions structure
277 * @key: The user-data key
278 * @data: A pointer to the user data
279 * @destroy: (nullable): A callback to call when @data is not needed anymore
280 * @replace: Whether to replace an existing data with the same key
282 * Attaches a user-data key/data pair to the specified Unicode-functions structure.
284 * Return value: %true if success, %false otherwise
289 hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
290 hb_user_data_key_t *key,
292 hb_destroy_func_t destroy,
295 return hb_object_set_user_data (ufuncs, key, data, destroy, replace);
299 * hb_unicode_funcs_get_user_data: (skip)
300 * @ufuncs: The Unicode-functions structure
301 * @key: The user-data key to query
303 * Fetches the user-data associated with the specified key,
304 * attached to the specified Unicode-functions structure.
306 * Return value: (transfer none): A pointer to the user data
311 hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
312 hb_user_data_key_t *key)
314 return hb_object_get_user_data (ufuncs, key);
319 * hb_unicode_funcs_make_immutable:
320 * @ufuncs: The Unicode-functions structure
322 * Makes the specified Unicode-functions structure
328 hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs)
330 if (hb_object_is_immutable (ufuncs))
333 hb_object_make_immutable (ufuncs);
337 * hb_unicode_funcs_is_immutable:
338 * @ufuncs: The Unicode-functions structure
340 * Tests whether the specified Unicode-functions structure
343 * Return value: %true if @ufuncs is immutable, %false otherwise
348 hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs)
350 return hb_object_is_immutable (ufuncs);
354 * hb_unicode_funcs_get_parent:
355 * @ufuncs: The Unicode-functions structure
357 * Fetches the parent of the Unicode-functions structure
360 * Return value: The parent Unicode-functions structure
365 hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs)
367 return ufuncs->parent ? ufuncs->parent : hb_unicode_funcs_get_empty ();
371 #define HB_UNICODE_FUNC_IMPLEMENT(name) \
374 hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \
375 hb_unicode_##name##_func_t func, \
377 hb_destroy_func_t destroy) \
379 if (hb_object_is_immutable (ufuncs)) \
382 if (ufuncs->destroy.name) \
383 ufuncs->destroy.name (ufuncs->user_data.name); \
386 ufuncs->func.name = func; \
387 ufuncs->user_data.name = user_data; \
388 ufuncs->destroy.name = destroy; \
390 ufuncs->func.name = ufuncs->parent->func.name; \
391 ufuncs->user_data.name = ufuncs->parent->user_data.name; \
392 ufuncs->destroy.name = nullptr; \
396 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
397 #undef HB_UNICODE_FUNC_IMPLEMENT
400 #define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \
403 hb_unicode_##name (hb_unicode_funcs_t *ufuncs, \
404 hb_codepoint_t unicode) \
406 return ufuncs->name (unicode); \
408 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
409 #undef HB_UNICODE_FUNC_IMPLEMENT
412 * hb_unicode_compose:
413 * @ufuncs: The Unicode-functions structure
414 * @a: The first Unicode code point to compose
415 * @b: The second Unicode code point to compose
416 * @ab: (out): The composition of @a, @b
418 * Fetches the composition of a sequence of two Unicode
421 * Calls the composition function of the specified
422 * Unicode-functions structure @ufuncs.
424 * Return value: %true if @a and @b composed, %false otherwise
429 hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
434 return ufuncs->compose (a, b, ab);
438 * hb_unicode_decompose:
439 * @ufuncs: The Unicode-functions structure
440 * @ab: Unicode code point to decompose
441 * @a: (out): The first code point of the decomposition of @ab
442 * @b: (out): The second code point of the decomposition of @ab
444 * Fetches the decomposition of a Unicode code point.
446 * Calls the decomposition function of the specified
447 * Unicode-functions structure @ufuncs.
449 * Return value: %true if @ab was decomposed, %false otherwise
454 hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
459 return ufuncs->decompose (ab, a, b);
462 #ifndef HB_DISABLE_DEPRECATED
464 * hb_unicode_decompose_compatibility:
465 * @ufuncs: The Unicode-functions structure
466 * @u: Code point to decompose
467 * @decomposed: (out): Compatibility decomposition of @u
469 * Fetches the compatibility decomposition of a Unicode
470 * code point. Deprecated.
472 * Return value: length of @decomposed.
478 hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
480 hb_codepoint_t *decomposed)
482 return ufuncs->decompose_compatibility (u, decomposed);
487 #ifndef HB_NO_OT_SHAPE
488 /* See hb-unicode.hh for details. */
490 _hb_modified_combining_class[256] =
492 0, /* HB_UNICODE_COMBINING_CLASS_NOT_REORDERED */
493 1, /* HB_UNICODE_COMBINING_CLASS_OVERLAY */
495 7, /* HB_UNICODE_COMBINING_CLASS_NUKTA */
496 8, /* HB_UNICODE_COMBINING_CLASS_KANA_VOICING */
497 9, /* HB_UNICODE_COMBINING_CLASS_VIRAMA */
500 HB_MODIFIED_COMBINING_CLASS_CCC10,
501 HB_MODIFIED_COMBINING_CLASS_CCC11,
502 HB_MODIFIED_COMBINING_CLASS_CCC12,
503 HB_MODIFIED_COMBINING_CLASS_CCC13,
504 HB_MODIFIED_COMBINING_CLASS_CCC14,
505 HB_MODIFIED_COMBINING_CLASS_CCC15,
506 HB_MODIFIED_COMBINING_CLASS_CCC16,
507 HB_MODIFIED_COMBINING_CLASS_CCC17,
508 HB_MODIFIED_COMBINING_CLASS_CCC18,
509 HB_MODIFIED_COMBINING_CLASS_CCC19,
510 HB_MODIFIED_COMBINING_CLASS_CCC20,
511 HB_MODIFIED_COMBINING_CLASS_CCC21,
512 HB_MODIFIED_COMBINING_CLASS_CCC22,
513 HB_MODIFIED_COMBINING_CLASS_CCC23,
514 HB_MODIFIED_COMBINING_CLASS_CCC24,
515 HB_MODIFIED_COMBINING_CLASS_CCC25,
516 HB_MODIFIED_COMBINING_CLASS_CCC26,
519 HB_MODIFIED_COMBINING_CLASS_CCC27,
520 HB_MODIFIED_COMBINING_CLASS_CCC28,
521 HB_MODIFIED_COMBINING_CLASS_CCC29,
522 HB_MODIFIED_COMBINING_CLASS_CCC30,
523 HB_MODIFIED_COMBINING_CLASS_CCC31,
524 HB_MODIFIED_COMBINING_CLASS_CCC32,
525 HB_MODIFIED_COMBINING_CLASS_CCC33,
526 HB_MODIFIED_COMBINING_CLASS_CCC34,
527 HB_MODIFIED_COMBINING_CLASS_CCC35,
530 HB_MODIFIED_COMBINING_CLASS_CCC36,
533 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
534 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
538 HB_MODIFIED_COMBINING_CLASS_CCC84,
539 85, 86, 87, 88, 89, 90,
540 HB_MODIFIED_COMBINING_CLASS_CCC91,
541 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
544 HB_MODIFIED_COMBINING_CLASS_CCC103,
546 HB_MODIFIED_COMBINING_CLASS_CCC107,
547 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
550 HB_MODIFIED_COMBINING_CLASS_CCC118,
552 HB_MODIFIED_COMBINING_CLASS_CCC122,
553 123, 124, 125, 126, 127, 128,
556 HB_MODIFIED_COMBINING_CLASS_CCC129,
557 HB_MODIFIED_COMBINING_CLASS_CCC130,
559 HB_MODIFIED_COMBINING_CLASS_CCC132,
560 133, 134, 135, 136, 137, 138, 139,
563 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
564 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
565 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
566 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
567 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
568 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
570 200, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT */
572 202, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW */
573 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
574 214, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE */
576 216, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT */
578 218, /* HB_UNICODE_COMBINING_CLASS_BELOW_LEFT */
580 220, /* HB_UNICODE_COMBINING_CLASS_BELOW */
582 222, /* HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT */
584 224, /* HB_UNICODE_COMBINING_CLASS_LEFT */
586 226, /* HB_UNICODE_COMBINING_CLASS_RIGHT */
588 228, /* HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT */
590 230, /* HB_UNICODE_COMBINING_CLASS_ABOVE */
592 232, /* HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT */
593 233, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW */
594 234, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE */
595 235, 236, 237, 238, 239,
596 240, /* HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT */
597 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
598 255, /* HB_UNICODE_COMBINING_CLASS_INVALID */
606 #ifndef HB_NO_EMOJI_SEQUENCES
608 #include "hb-unicode-emoji-table.hh"
611 _hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp)
613 return _hb_emoji_is_Extended_Pictographic (cp);