6a15cb00cbcce61a820d4728f835e4e07100ce11
[platform/upstream/harfbuzz.git] / src / hb-unicode.h
1 /*
2  * Copyright © 2009  Red Hat, Inc.
3  * Copyright © 2011  Codethink Limited
4  * Copyright © 2011,2012  Google, Inc.
5  *
6  *  This is part of HarfBuzz, a text shaping library.
7  *
8  * Permission is hereby granted, without written agreement and without
9  * license or royalty fees, to use, copy, modify, and distribute this
10  * software and its documentation for any purpose, provided that the
11  * above copyright notice and the following two paragraphs appear in
12  * all copies of this software.
13  *
14  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
15  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
16  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
17  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18  * DAMAGE.
19  *
20  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
21  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
22  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
23  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
24  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25  *
26  * Red Hat Author(s): Behdad Esfahbod
27  * Codethink Author(s): Ryan Lortie
28  * Google Author(s): Behdad Esfahbod
29  */
30
31 #ifndef HB_H_IN
32 #error "Include <hb.h> instead."
33 #endif
34
35 #ifndef HB_UNICODE_H
36 #define HB_UNICODE_H
37
38 #include "hb-common.h"
39
40 HB_BEGIN_DECLS
41
42
43 /* hb_unicode_general_category_t */
44
45 /* Unicode Character Database property: General_Category (gc) */
46 typedef enum
47 {
48   HB_UNICODE_GENERAL_CATEGORY_CONTROL,                  /* Cc */
49   HB_UNICODE_GENERAL_CATEGORY_FORMAT,                   /* Cf */
50   HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED,               /* Cn */
51   HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE,              /* Co */
52   HB_UNICODE_GENERAL_CATEGORY_SURROGATE,                /* Cs */
53   HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER,         /* Ll */
54   HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER,          /* Lm */
55   HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER,             /* Lo */
56   HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER,         /* Lt */
57   HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER,         /* Lu */
58   HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK,             /* Mc */
59   HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK,           /* Me */
60   HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,         /* Mn */
61   HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER,           /* Nd */
62   HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER,            /* Nl */
63   HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER,             /* No */
64   HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION,      /* Pc */
65   HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION,         /* Pd */
66   HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION,        /* Pe */
67   HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION,        /* Pf */
68   HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION,      /* Pi */
69   HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION,        /* Po */
70   HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION,         /* Ps */
71   HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL,          /* Sc */
72   HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL,          /* Sk */
73   HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL,              /* Sm */
74   HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL,             /* So */
75   HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR,           /* Zl */
76   HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR,      /* Zp */
77   HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR           /* Zs */
78 } hb_unicode_general_category_t;
79
80 /* hb_unicode_combining_class_t */
81
82 /* Note: newer versions of Unicode may add new values.  Clients should be ready to handle
83  * any value in the 0..254 range being returned from hb_unicode_combining_class().
84  */
85
86 /* Unicode Character Database property: Canonical_Combining_Class (ccc) */
87 typedef enum
88 {
89   HB_UNICODE_COMBINING_CLASS_NOT_REORDERED      = 0,
90   HB_UNICODE_COMBINING_CLASS_OVERLAY            = 1,
91   HB_UNICODE_COMBINING_CLASS_NUKTA              = 7,
92   HB_UNICODE_COMBINING_CLASS_KANA_VOICING       = 8,
93   HB_UNICODE_COMBINING_CLASS_VIRAMA             = 9,
94
95   /* Hebrew */
96   HB_UNICODE_COMBINING_CLASS_CCC10      =  10,
97   HB_UNICODE_COMBINING_CLASS_CCC11      =  11,
98   HB_UNICODE_COMBINING_CLASS_CCC12      =  12,
99   HB_UNICODE_COMBINING_CLASS_CCC13      =  13,
100   HB_UNICODE_COMBINING_CLASS_CCC14      =  14,
101   HB_UNICODE_COMBINING_CLASS_CCC15      =  15,
102   HB_UNICODE_COMBINING_CLASS_CCC16      =  16,
103   HB_UNICODE_COMBINING_CLASS_CCC17      =  17,
104   HB_UNICODE_COMBINING_CLASS_CCC18      =  18,
105   HB_UNICODE_COMBINING_CLASS_CCC19      =  19,
106   HB_UNICODE_COMBINING_CLASS_CCC20      =  20,
107   HB_UNICODE_COMBINING_CLASS_CCC21      =  21,
108   HB_UNICODE_COMBINING_CLASS_CCC22      =  22,
109   HB_UNICODE_COMBINING_CLASS_CCC23      =  23,
110   HB_UNICODE_COMBINING_CLASS_CCC24      =  24,
111   HB_UNICODE_COMBINING_CLASS_CCC25      =  25,
112   HB_UNICODE_COMBINING_CLASS_CCC26      =  26,
113
114   /* Arabic */
115   HB_UNICODE_COMBINING_CLASS_CCC27      =  27,
116   HB_UNICODE_COMBINING_CLASS_CCC28      =  28,
117   HB_UNICODE_COMBINING_CLASS_CCC29      =  29,
118   HB_UNICODE_COMBINING_CLASS_CCC30      =  30,
119   HB_UNICODE_COMBINING_CLASS_CCC31      =  31,
120   HB_UNICODE_COMBINING_CLASS_CCC32      =  32,
121   HB_UNICODE_COMBINING_CLASS_CCC33      =  33,
122   HB_UNICODE_COMBINING_CLASS_CCC34      =  34,
123   HB_UNICODE_COMBINING_CLASS_CCC35      =  35,
124
125   /* Syriac */
126   HB_UNICODE_COMBINING_CLASS_CCC36      =  36,
127
128   /* Telugu */
129   HB_UNICODE_COMBINING_CLASS_CCC84      =  84,
130   HB_UNICODE_COMBINING_CLASS_CCC91      =  91,
131
132   /* Thai */
133   HB_UNICODE_COMBINING_CLASS_CCC103     = 103,
134   HB_UNICODE_COMBINING_CLASS_CCC107     = 107,
135
136   /* Lao */
137   HB_UNICODE_COMBINING_CLASS_CCC118     = 118,
138   HB_UNICODE_COMBINING_CLASS_CCC122     = 122,
139
140   /* Tibetan */
141   HB_UNICODE_COMBINING_CLASS_CCC129     = 129,
142   HB_UNICODE_COMBINING_CLASS_CCC130     = 130,
143   HB_UNICODE_COMBINING_CLASS_CCC133     = 132,
144
145
146   HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT        = 200,
147   HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW             = 202,
148   HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE             = 214,
149   HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT       = 216,
150   HB_UNICODE_COMBINING_CLASS_BELOW_LEFT                 = 218,
151   HB_UNICODE_COMBINING_CLASS_BELOW                      = 220,
152   HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT                = 222,
153   HB_UNICODE_COMBINING_CLASS_LEFT                       = 224,
154   HB_UNICODE_COMBINING_CLASS_RIGHT                      = 226,
155   HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT                 = 228,
156   HB_UNICODE_COMBINING_CLASS_ABOVE                      = 230,
157   HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT                = 232,
158   HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW               = 233,
159   HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE               = 234,
160
161   HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT             = 240,
162
163   HB_UNICODE_COMBINING_CLASS_INVALID    = 255
164 } hb_unicode_combining_class_t;
165
166
167 /*
168  * hb_unicode_funcs_t
169  */
170
171 typedef struct hb_unicode_funcs_t hb_unicode_funcs_t;
172
173
174 /*
175  * just give me the best implementation you've got there.
176  */
177 HB_EXTERN hb_unicode_funcs_t *
178 hb_unicode_funcs_get_default (void);
179
180
181 HB_EXTERN hb_unicode_funcs_t *
182 hb_unicode_funcs_create (hb_unicode_funcs_t *parent);
183
184 HB_EXTERN hb_unicode_funcs_t *
185 hb_unicode_funcs_get_empty (void);
186
187 HB_EXTERN hb_unicode_funcs_t *
188 hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs);
189
190 HB_EXTERN void
191 hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs);
192
193 HB_EXTERN hb_bool_t
194 hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
195                                 hb_user_data_key_t *key,
196                                 void *              data,
197                                 hb_destroy_func_t   destroy,
198                                 hb_bool_t           replace);
199
200
201 HB_EXTERN void *
202 hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
203                                 hb_user_data_key_t *key);
204
205
206 HB_EXTERN void
207 hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs);
208
209 HB_EXTERN hb_bool_t
210 hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs);
211
212 HB_EXTERN hb_unicode_funcs_t *
213 hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs);
214
215
216 /*
217  * funcs
218  */
219
220 /* typedefs */
221
222 typedef hb_unicode_combining_class_t    (*hb_unicode_combining_class_func_t)    (hb_unicode_funcs_t *ufuncs,
223                                                                                  hb_codepoint_t      unicode,
224                                                                                  void               *user_data);
225 typedef unsigned int                    (*hb_unicode_eastasian_width_func_t)    (hb_unicode_funcs_t *ufuncs,
226                                                                                  hb_codepoint_t      unicode,
227                                                                                  void               *user_data);
228 typedef hb_unicode_general_category_t   (*hb_unicode_general_category_func_t)   (hb_unicode_funcs_t *ufuncs,
229                                                                                  hb_codepoint_t      unicode,
230                                                                                  void               *user_data);
231 typedef hb_codepoint_t                  (*hb_unicode_mirroring_func_t)          (hb_unicode_funcs_t *ufuncs,
232                                                                                  hb_codepoint_t      unicode,
233                                                                                  void               *user_data);
234 typedef hb_script_t                     (*hb_unicode_script_func_t)             (hb_unicode_funcs_t *ufuncs,
235                                                                                  hb_codepoint_t      unicode,
236                                                                                  void               *user_data);
237
238 typedef hb_bool_t                       (*hb_unicode_compose_func_t)            (hb_unicode_funcs_t *ufuncs,
239                                                                                  hb_codepoint_t      a,
240                                                                                  hb_codepoint_t      b,
241                                                                                  hb_codepoint_t     *ab,
242                                                                                  void               *user_data);
243 typedef hb_bool_t                       (*hb_unicode_decompose_func_t)          (hb_unicode_funcs_t *ufuncs,
244                                                                                  hb_codepoint_t      ab,
245                                                                                  hb_codepoint_t     *a,
246                                                                                  hb_codepoint_t     *b,
247                                                                                  void               *user_data);
248
249 /**
250  * hb_unicode_decompose_compatibility_func_t:
251  * @ufuncs: a Unicode function structure
252  * @u: codepoint to decompose
253  * @decomposed: address of codepoint array (of length %HB_UNICODE_MAX_DECOMPOSITION_LEN) to write decomposition into
254  * @user_data: user data pointer as passed to hb_unicode_funcs_set_decompose_compatibility_func()
255  *
256  * Fully decompose @u to its Unicode compatibility decomposition. The codepoints of the decomposition will be written to @decomposed.
257  * The complete length of the decomposition will be returned.
258  *
259  * If @u has no compatibility decomposition, zero should be returned.
260  *
261  * The Unicode standard guarantees that a buffer of length %HB_UNICODE_MAX_DECOMPOSITION_LEN codepoints will always be sufficient for any
262  * compatibility decomposition plus an terminating value of 0.  Consequently, @decompose must be allocated by the caller to be at least this length.  Implementations
263  * of this function type must ensure that they do not write past the provided array.
264  *
265  * Return value: number of codepoints in the full compatibility decomposition of @u, or 0 if no decomposition available.
266  */
267 typedef unsigned int                    (*hb_unicode_decompose_compatibility_func_t)    (hb_unicode_funcs_t *ufuncs,
268                                                                                          hb_codepoint_t      u,
269                                                                                          hb_codepoint_t     *decomposed,
270                                                                                          void               *user_data);
271
272 /* See Unicode 6.1 for details on the maximum decomposition length. */
273 #define HB_UNICODE_MAX_DECOMPOSITION_LEN (18+1) /* codepoints */
274
275 /* setters */
276
277 /**
278  * hb_unicode_funcs_set_combining_class_func:
279  * @ufuncs: a Unicode function structure
280  * @func: (closure user_data) (destroy destroy) (scope notified):
281  * @user_data:
282  * @destroy:
283  *
284  * 
285  *
286  * Since: 0.9.2
287  **/
288 HB_EXTERN void
289 hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs,
290                                            hb_unicode_combining_class_func_t func,
291                                            void *user_data, hb_destroy_func_t destroy);
292
293 /**
294  * hb_unicode_funcs_set_eastasian_width_func:
295  * @ufuncs: a Unicode function structure
296  * @func: (closure user_data) (destroy destroy) (scope notified):
297  * @user_data:
298  * @destroy:
299  *
300  * 
301  *
302  * Since: 0.9.2
303  **/
304 HB_EXTERN void
305 hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs,
306                                            hb_unicode_eastasian_width_func_t func,
307                                            void *user_data, hb_destroy_func_t destroy);
308
309 /**
310  * hb_unicode_funcs_set_general_category_func:
311  * @ufuncs: a Unicode function structure
312  * @func: (closure user_data) (destroy destroy) (scope notified):
313  * @user_data:
314  * @destroy:
315  *
316  * 
317  *
318  * Since: 0.9.2
319  **/
320 HB_EXTERN void
321 hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs,
322                                             hb_unicode_general_category_func_t func,
323                                             void *user_data, hb_destroy_func_t destroy);
324
325 /**
326  * hb_unicode_funcs_set_mirroring_func:
327  * @ufuncs: a Unicode function structure
328  * @func: (closure user_data) (destroy destroy) (scope notified):
329  * @user_data:
330  * @destroy:
331  *
332  * 
333  *
334  * Since: 0.9.2
335  **/
336 HB_EXTERN void
337 hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs,
338                                      hb_unicode_mirroring_func_t func,
339                                      void *user_data, hb_destroy_func_t destroy);
340
341 /**
342  * hb_unicode_funcs_set_script_func:
343  * @ufuncs: a Unicode function structure
344  * @func: (closure user_data) (destroy destroy) (scope notified):
345  * @user_data:
346  * @destroy:
347  *
348  * 
349  *
350  * Since: 0.9.2
351  **/
352 HB_EXTERN void
353 hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs,
354                                   hb_unicode_script_func_t func,
355                                   void *user_data, hb_destroy_func_t destroy);
356
357 /**
358  * hb_unicode_funcs_set_compose_func:
359  * @ufuncs: a Unicode function structure
360  * @func: (closure user_data) (destroy destroy) (scope notified):
361  * @user_data:
362  * @destroy:
363  *
364  * 
365  *
366  * Since: 0.9.2
367  **/
368 HB_EXTERN void
369 hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs,
370                                    hb_unicode_compose_func_t func,
371                                    void *user_data, hb_destroy_func_t destroy);
372
373 /**
374  * hb_unicode_funcs_set_decompose_func:
375  * @ufuncs: a Unicode function structure
376  * @func: (closure user_data) (destroy destroy) (scope notified):
377  * @user_data:
378  * @destroy:
379  *
380  * 
381  *
382  * Since: 0.9.2
383  **/
384 HB_EXTERN void
385 hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs,
386                                      hb_unicode_decompose_func_t func,
387                                      void *user_data, hb_destroy_func_t destroy);
388
389 /**
390  * hb_unicode_funcs_set_decompose_compatibility_func:
391  * @ufuncs: a Unicode function structure
392  * @func: (closure user_data) (destroy destroy) (scope notified):
393  * @user_data:
394  * @destroy:
395  *
396  * 
397  *
398  * Since: 0.9.2
399  **/
400 HB_EXTERN void
401 hb_unicode_funcs_set_decompose_compatibility_func (hb_unicode_funcs_t *ufuncs,
402                                                    hb_unicode_decompose_compatibility_func_t func,
403                                                    void *user_data, hb_destroy_func_t destroy);
404
405 /* accessors */
406
407 /**
408  * hb_unicode_combining_class:
409  *
410  * Since: 0.9.2
411  **/
412 HB_EXTERN hb_unicode_combining_class_t
413 hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs,
414                             hb_codepoint_t unicode);
415
416 /**
417  * hb_unicode_eastasian_width:
418  *
419  * Since: 0.9.2
420  **/
421 HB_EXTERN unsigned int
422 hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs,
423                             hb_codepoint_t unicode);
424
425 /**
426  * hb_unicode_general_category:
427  *
428  * Since: 0.9.2
429  **/
430 HB_EXTERN hb_unicode_general_category_t
431 hb_unicode_general_category (hb_unicode_funcs_t *ufuncs,
432                              hb_codepoint_t unicode);
433
434 /**
435  * hb_unicode_mirroring:
436  *
437  * Since: 0.9.2
438  **/
439 HB_EXTERN hb_codepoint_t
440 hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs,
441                       hb_codepoint_t unicode);
442
443 /**
444  * hb_unicode_script:
445  *
446  * Since: 0.9.2
447  **/
448 HB_EXTERN hb_script_t
449 hb_unicode_script (hb_unicode_funcs_t *ufuncs,
450                    hb_codepoint_t unicode);
451
452 /**
453  * hb_unicode_compose:
454  *
455  * Since: 0.9.2
456  **/
457 HB_EXTERN hb_bool_t
458 hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
459                     hb_codepoint_t      a,
460                     hb_codepoint_t      b,
461                     hb_codepoint_t     *ab);
462
463 /**
464  * hb_unicode_decompose:
465  *
466  * Since: 0.9.2
467  **/
468 HB_EXTERN hb_bool_t
469 hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
470                       hb_codepoint_t      ab,
471                       hb_codepoint_t     *a,
472                       hb_codepoint_t     *b);
473
474 /**
475  * hb_unicode_decompose_compatibility:
476  *
477  * Since: 0.9.2
478  **/
479 HB_EXTERN unsigned int
480 hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
481                                     hb_codepoint_t      u,
482                                     hb_codepoint_t     *decomposed);
483
484 HB_END_DECLS
485
486 #endif /* HB_UNICODE_H */