1 /* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
4 * Line breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer.
7 * Copyright (C) 2008-2015 Wu Yongwei <wuyongwei at gmail dot com>
8 * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
10 * This software is provided 'as-is', without any express or implied
11 * warranty. In no event will the author be held liable for any damages
12 * arising from the use of this software.
14 * Permission is granted to anyone to use this software for any purpose,
15 * including commercial applications, and to alter it and redistribute
16 * it freely, subject to the following restrictions:
18 * 1. The origin of this software must not be misrepresented; you must
19 * not claim that you wrote the original software. If you use this
20 * software in a product, an acknowledgement in the product
21 * documentation would be appreciated but is not required.
22 * 2. Altered source versions must be plainly marked as such, and must
23 * not be misrepresented as being the original software.
24 * 3. This notice may not be removed or altered from any source
27 * The main reference is Unicode Standard Annex 14 (UAX #14):
28 * <URL:http://www.unicode.org/reports/tr14/>
30 * When this library was designed, this annex was at Revision 19, for
32 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
34 * This library has been updated according to Revision 33, for
36 * <URL:http://www.unicode.org/reports/tr14/tr14-33.html>
38 * The Unicode Terms of Use are available at
39 * <URL:http://www.unicode.org/copyright.html>
43 * @file linebreakdef.h
45 * Definitions of internal data structures, declarations of global
46 * variables, and function prototypes for the line breaking algorithm.
48 * @version 2.6, 2015/04/18
50 * @author Petr Filipsky
53 #include "unibreakdef.h"
56 * Line break classes. This is a direct mapping of Table 1 of Unicode
57 * Standard Annex 14, Revision 26.
61 /* This is used to signal an error condition. */
62 LBP_Undefined, /**< Undefined */
64 /* The following break classes are treated in the pair table. */
65 LBP_OP, /**< Opening punctuation */
66 LBP_CL, /**< Closing punctuation */
67 LBP_CP, /**< Closing parenthesis */
68 LBP_QU, /**< Ambiguous quotation */
70 LBP_NS, /**< Non-starters */
71 LBP_EX, /**< Exclamation/Interrogation */
72 LBP_SY, /**< Symbols allowing break after */
73 LBP_IS, /**< Infix separator */
74 LBP_PR, /**< Prefix */
75 LBP_PO, /**< Postfix */
76 LBP_NU, /**< Numeric */
77 LBP_AL, /**< Alphabetic */
78 LBP_HL, /**< Hebrew letter */
79 LBP_ID, /**< Ideographic */
80 LBP_IN, /**< Inseparable characters */
81 LBP_HY, /**< Hyphen */
82 LBP_BA, /**< Break after */
83 LBP_BB, /**< Break before */
84 LBP_B2, /**< Break on either side (but not pair) */
85 LBP_ZW, /**< Zero-width space */
86 LBP_CM, /**< Combining marks */
87 LBP_WJ, /**< Word joiner */
88 LBP_H2, /**< Hangul LV */
89 LBP_H3, /**< Hangul LVT */
90 LBP_JL, /**< Hangul L Jamo */
91 LBP_JV, /**< Hangul V Jamo */
92 LBP_JT, /**< Hangul T Jamo */
93 LBP_RI, /**< Regional indicator */
95 /* The following break classes are not treated in the pair table */
96 LBP_AI, /**< Ambiguous (alphabetic or ideograph) */
97 LBP_BK, /**< Break (mandatory) */
98 LBP_CB, /**< Contingent break */
99 LBP_CJ, /**< Conditional Japanese starter */
100 LBP_CR, /**< Carriage return */
101 LBP_LF, /**< Line feed */
102 LBP_NL, /**< Next line */
103 LBP_SA, /**< South-East Asian */
104 LBP_SG, /**< Surrogates */
105 LBP_SP, /**< Space */
106 LBP_XX /**< Unknown */
110 * Struct for entries of line break properties. The array of the
111 * entries \e must be sorted.
113 struct LineBreakProperties
115 utf32_t start; /**< Starting coding point */
116 utf32_t end; /**< End coding point */
117 enum LineBreakClass prop; /**< The line breaking property */
121 * Struct for association of language-specific line breaking properties
122 * with language names.
124 struct LineBreakPropertiesLang
126 const char *lang; /**< Language name */
127 size_t namelen; /**< Length of name to match */
128 struct LineBreakProperties *lbp; /**< Pointer to associated data */
132 * Context representing internal state of the line breaking algorithm.
133 * This is useful to callers if incremental analysis is wanted.
135 struct LineBreakContext
137 const char *lang; /**< Language name */
138 struct LineBreakProperties *lbpLang;/**< Pointer to LineBreakProperties */
139 enum LineBreakClass lbcCur; /**< Breaking class of current codepoint */
140 enum LineBreakClass lbcNew; /**< Breaking class of next codepoint */
141 enum LineBreakClass lbcLast; /**< Breaking class of last codepoint */
142 int fLb21aHebrew; /**< Flag for Hebrew letters (LB21a) */
146 extern struct LineBreakProperties lb_prop_default[];
147 extern struct LineBreakPropertiesLang lb_prop_lang_map[];
149 /* Function Prototype */
150 void lb_init_break_context(
151 struct LineBreakContext *lbpCtx,
154 int lb_process_next_char(
155 struct LineBreakContext *lbpCtx,
162 get_next_char_t get_next_char);