1 /* vim: set tabstop=4 shiftwidth=4: */
4 * Line breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer.
7 * Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages
11 * arising from the use of this software.
13 * Permission is granted to anyone to use this software for any purpose,
14 * including commercial applications, and to alter it and redistribute
15 * it freely, subject to the following restrictions:
17 * 1. The origin of this software must not be misrepresented; you must
18 * not claim that you wrote the original software. If you use this
19 * software in a product, an acknowledgement in the product
20 * documentation would be appreciated but is not required.
21 * 2. Altered source versions must be plainly marked as such, and must
22 * not be misrepresented as being the original software.
23 * 3. This notice may not be removed or altered from any source
26 * The main reference is Unicode Standard Annex 14 (UAX #14):
27 * <URL:http://www.unicode.org/reports/tr14/>
29 * When this library was designed, this annex was at Revision 19, for
31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
33 * This library has been updated according to Revision 24, for
35 * <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
37 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html>
42 * @file linebreakdef.h
44 * Definitions of internal data structures, declarations of global
45 * variables, and function prototypes for the line breaking algorithm.
47 * @version 2.0, 2010/01/03
52 * Constant value to mark the end of string. It is not a valid Unicode
58 * Line break classes. This is a direct mapping of Table 1 of Unicode
59 * Standard Annex 14, Revision 19.
63 /* This is used to signal an error condition. */
64 LBP_Undefined, /**< Undefined */
66 /* The following break classes are treated in the pair table. */
67 LBP_OP, /**< Opening punctuation */
68 LBP_CL, /**< Closing punctuation */
69 LBP_CP, /**< Closing parenthesis */
70 LBP_QU, /**< Ambiguous quotation */
72 LBP_NS, /**< Non-starters */
73 LBP_EX, /**< Exclamation/Interrogation */
74 LBP_SY, /**< Symbols allowing break after */
75 LBP_IS, /**< Infix separator */
76 LBP_PR, /**< Prefix */
77 LBP_PO, /**< Postfix */
78 LBP_NU, /**< Numeric */
79 LBP_AL, /**< Alphabetic */
80 LBP_ID, /**< Ideographic */
81 LBP_IN, /**< Inseparable characters */
82 LBP_HY, /**< Hyphen */
83 LBP_BA, /**< Break after */
84 LBP_BB, /**< Break before */
85 LBP_B2, /**< Break on either side (but not pair) */
86 LBP_ZW, /**< Zero-width space */
87 LBP_CM, /**< Combining marks */
88 LBP_WJ, /**< Word joiner */
89 LBP_H2, /**< Hangul LV */
90 LBP_H3, /**< Hangul LVT */
91 LBP_JL, /**< Hangul L Jamo */
92 LBP_JV, /**< Hangul V Jamo */
93 LBP_JT, /**< Hangul T Jamo */
95 /* The following break classes are not treated in the pair table */
96 LBP_AI, /**< Ambiguous (alphabetic or ideograph) */
97 LBP_BK, /**< Break (mandatory) */
98 LBP_CB, /**< Contingent break */
99 LBP_CR, /**< Carriage return */
100 LBP_LF, /**< Line feed */
101 LBP_NL, /**< Next line */
102 LBP_SA, /**< South-East Asian */
103 LBP_SG, /**< Surrogates */
104 LBP_SP, /**< Space */
105 LBP_XX /**< Unknown */
109 * Struct for entries of line break properties. The array of the
110 * entries \e must be sorted.
112 struct LineBreakProperties
114 utf32_t start; /**< Starting coding point */
115 utf32_t end; /**< End coding point */
116 enum LineBreakClass prop; /**< The line breaking property */
120 * Struct for association of language-specific line breaking properties
121 * with language names.
123 struct LineBreakPropertiesLang
125 const char *lang; /**< Language name */
126 size_t namelen; /**< Length of name to match */
127 struct LineBreakProperties *lbp; /**< Pointer to associated data */
131 * Abstract function interface for #lb_get_next_char_utf8,
132 * #lb_get_next_char_utf16, and #lb_get_next_char_utf32.
134 typedef utf32_t (*get_next_char_t)(const void *, size_t, size_t *);
137 extern struct LineBreakProperties lb_prop_default[];
138 extern struct LineBreakPropertiesLang lb_prop_lang_map[];
140 /* Function Prototype */
141 utf32_t lb_get_next_char_utf8(const utf8_t *s, size_t len, size_t *ip);
142 utf32_t lb_get_next_char_utf16(const utf16_t *s, size_t len, size_t *ip);
143 utf32_t lb_get_next_char_utf32(const utf32_t *s, size_t len, size_t *ip);
149 get_next_char_t get_next_char);