1 /* vim: set tabstop=4 shiftwidth=4: */
4 * Line breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer.
7 * Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages
11 * arising from the use of this software.
13 * Permission is granted to anyone to use this software for any purpose,
14 * including commercial applications, and to alter it and redistribute
15 * it freely, subject to the following restrictions:
17 * 1. The origin of this software must not be misrepresented; you must
18 * not claim that you wrote the original software. If you use this
19 * software in a product, an acknowledgement in the product
20 * documentation would be appreciated but is not required.
21 * 2. Altered source versions must be plainly marked as such, and must
22 * not be misrepresented as being the original software.
23 * 3. This notice may not be removed or altered from any source
26 * The main reference is Unicode Standard Annex 14 (UAX #14):
27 * <URL:http://www.unicode.org/reports/tr14/>
29 * When this library was designed, this annex was at Revision 19, for
31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
33 * This library has been updated according to Revision 24, for
35 * <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
37 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html>
44 * Implementation of the line breaking algorithm as described in Unicode
47 * @version 2.0, 2010/01/03
54 #include "linebreak.h"
55 #include "linebreakdef.h"
58 * Size of the second-level index to the line breaking properties.
60 #define LINEBREAK_INDEX_SIZE 40
63 * Version number of the library.
65 const int linebreak_version = LINEBREAK_VERSION;
68 * Enumeration of break actions. They are used in the break action
73 DIR_BRK, /**< Direct break opportunity */
74 IND_BRK, /**< Indirect break opportunity */
75 CMI_BRK, /**< Indirect break opportunity for combining marks */
76 CMP_BRK, /**< Prohibited break for combining marks */
77 PRH_BRK /**< Prohibited break */
81 * Break action pair table. This is a direct mapping of Table 2 of
82 * Unicode Standard Annex 14, Revision 24.
84 static enum BreakAction baTable[LBP_JT][LBP_JT] = {
86 PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
87 PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
88 PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, CMP_BRK,
89 PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK },
91 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
92 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
93 DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
94 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
96 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
97 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK,
98 DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
99 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
101 PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
102 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
103 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
104 PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
106 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
107 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
108 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
109 PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
111 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
112 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
113 DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
114 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
116 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
117 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
118 DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
119 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
121 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
122 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
123 DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
124 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
126 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
127 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
128 DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
129 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
131 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
132 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
133 DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
134 PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
136 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
137 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
138 DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
139 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
141 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
142 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK,
143 IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
144 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
146 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
147 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
148 IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
149 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
151 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
152 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
153 IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
154 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
156 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
157 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
158 IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
159 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
161 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
162 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
163 DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
164 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
166 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
167 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
168 DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
169 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
171 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
172 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
173 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
174 PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
176 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
177 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
178 DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK, CMI_BRK,
179 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
181 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
182 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
183 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK, DIR_BRK,
184 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
186 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
187 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
188 IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
189 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
191 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
192 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
193 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
194 PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
196 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
197 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
198 IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
199 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK },
201 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
202 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
203 IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
204 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK },
206 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
207 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
208 IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
209 PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK },
211 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
212 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
213 IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
214 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK },
216 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
217 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
218 IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
219 PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK }
223 * Struct for the second-level index to the line breaking properties.
225 struct LineBreakPropertiesIndex
227 utf32_t end; /**< End coding point */
228 struct LineBreakProperties *lbp;/**< Pointer to line breaking properties */
232 * Second-level index to the line breaking properties.
234 static struct LineBreakPropertiesIndex lb_prop_index[LINEBREAK_INDEX_SIZE] =
236 { 0xFFFFFFFF, lb_prop_default }
240 * Initializes the second-level index to the line breaking properties.
241 * If it is not called, the performance of #get_char_lb_class_lang (and
242 * thus the main functionality) can be pretty bad, especially for big
243 * code points like those of Chinese.
245 void init_linebreak(void)
253 while (lb_prop_default[len].prop != LBP_Undefined)
255 step = len / LINEBREAK_INDEX_SIZE;
257 for (i = 0; i < LINEBREAK_INDEX_SIZE; ++i)
259 lb_prop_index[i].lbp = lb_prop_default + iPropDefault;
260 iPropDefault += step;
261 lb_prop_index[i].end = lb_prop_default[iPropDefault].start - 1;
263 lb_prop_index[--i].end = 0xFFFFFFFF;
267 * Gets the language-specific line breaking properties.
269 * @param lang language of the text
270 * @return pointer to the language-specific line breaking
271 * properties array if found; \c NULL otherwise
273 static struct LineBreakProperties *get_lb_prop_lang(const char *lang)
275 struct LineBreakPropertiesLang *lbplIter;
278 for (lbplIter = lb_prop_lang_map; lbplIter->lang != NULL; ++lbplIter)
280 if (strncmp(lang, lbplIter->lang, lbplIter->namelen) == 0)
282 return lbplIter->lbp;
290 * Gets the line breaking class of a character from a line breaking
293 * @param ch character to check
294 * @param lbp pointer to the line breaking properties array
295 * @return the line breaking class if found; \c LBP_XX otherwise
297 static enum LineBreakClass get_char_lb_class(
299 struct LineBreakProperties *lbp)
301 while (lbp->prop != LBP_Undefined && ch >= lbp->start)
311 * Gets the line breaking class of a character from the default line
312 * breaking properties array.
314 * @param ch character to check
315 * @return the line breaking class if found; \c LBP_XX otherwise
317 static enum LineBreakClass get_char_lb_class_default(
321 while (ch > lb_prop_index[i].end)
323 assert(i < LINEBREAK_INDEX_SIZE);
324 return get_char_lb_class(ch, lb_prop_index[i].lbp);
328 * Gets the line breaking class of a character for a specific
329 * language. This function will check the language-specific data first,
330 * and then the default data if there is no language-specific property
331 * available for the character.
333 * @param ch character to check
334 * @param lbpLang pointer to the language-specific line breaking
336 * @return the line breaking class if found; \c LBP_XX
339 static enum LineBreakClass get_char_lb_class_lang(
341 struct LineBreakProperties *lbpLang)
343 enum LineBreakClass lbcResult;
345 /* Find the language-specific line breaking class for a character */
348 lbcResult = get_char_lb_class(ch, lbpLang);
349 if (lbcResult != LBP_XX)
353 /* Find the generic language-specific line breaking class, if no
354 * language context is provided, or language-specific data are not
355 * available for the specific character in the specified language */
356 return get_char_lb_class_default(ch);
360 * Resolves the line breaking class for certain ambiguous or complicated
361 * characters. They are treated in a simplistic way in this
364 * @param lbc line breaking class to resolve
365 * @param lang language of the text
366 * @return the resolved line breaking class
368 static enum LineBreakClass resolve_lb_class(
369 enum LineBreakClass lbc,
376 (strncmp(lang, "zh", 2) == 0 || /* Chinese */
377 strncmp(lang, "ja", 2) == 0 || /* Japanese */
378 strncmp(lang, "ko", 2) == 0)) /* Korean */
393 * Gets the next Unicode character in a UTF-8 sequence. The index will
394 * be advanced to the next complete character, unless the end of string
395 * is reached in the middle of a UTF-8 sequence.
397 * @param[in] s input UTF-8 string
398 * @param[in] len length of the string in bytes
399 * @param[in,out] ip pointer to the index
400 * @return the Unicode character beginning at the index; or
401 * #EOS if end of input is encountered
403 utf32_t lb_get_next_char_utf8(
416 if (ch < 0xC2 || ch > 0xF4)
417 { /* One-byte sequence, tail (should not occur), or invalid */
422 { /* Two-byte sequence */
425 res = ((ch & 0x1F) << 6) + (s[*ip + 1] & 0x3F);
430 { /* Three-byte sequence */
433 res = ((ch & 0x0F) << 12) +
434 ((s[*ip + 1] & 0x3F) << 6) +
435 ((s[*ip + 2] & 0x3F));
440 { /* Four-byte sequence */
443 res = ((ch & 0x07) << 18) +
444 ((s[*ip + 1] & 0x3F) << 12) +
445 ((s[*ip + 2] & 0x3F) << 6) +
446 ((s[*ip + 3] & 0x3F));
453 * Gets the next Unicode character in a UTF-16 sequence. The index will
454 * be advanced to the next complete character, unless the end of string
455 * is reached in the middle of a UTF-16 surrogate pair.
457 * @param[in] s input UTF-16 string
458 * @param[in] len length of the string in words
459 * @param[in,out] ip pointer to the index
460 * @return the Unicode character beginning at the index; or
461 * #EOS if end of input is encountered
463 utf32_t lb_get_next_char_utf16(
475 if (ch < 0xD800 || ch > 0xDBFF)
476 { /* If the character is not a high surrogate */
480 { /* If the input ends here (an error) */
484 if (s[*ip] < 0xDC00 || s[*ip] > 0xDFFF)
485 { /* If the next character is not the low surrogate (an error) */
488 /* Return the constructed character and advance the index again */
489 return (((utf32_t)ch & 0x3FF) << 10) + (s[(*ip)++] & 0x3FF) + 0x10000;
493 * Gets the next Unicode character in a UTF-32 sequence. The index will
494 * be advanced to the next character.
496 * @param[in] s input UTF-32 string
497 * @param[in] len length of the string in dwords
498 * @param[in,out] ip pointer to the index
499 * @return the Unicode character beginning at the index; or
500 * #EOS if end of input is encountered
502 utf32_t lb_get_next_char_utf32(
514 * Sets the line breaking information for a generic input string.
516 * @param[in] s input string
517 * @param[in] len length of the input
518 * @param[in] lang language of the input
519 * @param[out] brks pointer to the output breaking data,
520 * containing #LINEBREAK_MUSTBREAK,
521 * #LINEBREAK_ALLOWBREAK, #LINEBREAK_NOBREAK,
522 * or #LINEBREAK_INSIDEACHAR
523 * @param[in] get_next_char function to get the next UTF-32 character
530 get_next_char_t get_next_char)
533 enum LineBreakClass lbcCur;
534 enum LineBreakClass lbcNew;
535 enum LineBreakClass lbcLast;
536 struct LineBreakProperties *lbpLang;
539 // TIZEN ONLY : (2013.08.19) for special processing at Zero-width space character
543 --posLast; /* To be ++'d later */
544 ch = get_next_char(s, len, &posCur);
547 lbpLang = get_lb_prop_lang(lang);
548 lbcCur = resolve_lb_class(get_char_lb_class_lang(ch, lbpLang), lang);
549 lbcNew = LBP_Undefined;
553 /* Special treatment for the first character */
570 /* Process a line till an explicit break or end of string */
573 for (++posLast; posLast < posCur - 1; ++posLast)
575 brks[posLast] = LINEBREAK_INSIDEACHAR;
577 assert(posLast == posCur - 1);
579 ch = get_next_char(s, len, &posCur);
582 lbcNew = get_char_lb_class_lang(ch, lbpLang);
583 if (lbcCur == LBP_BK || (lbcCur == LBP_CR && lbcNew != LBP_LF))
585 brks[posLast] = LINEBREAK_MUSTBREAK;
586 lbcCur = resolve_lb_class(lbcNew, lang);
590 // TIZEN ONLY : (2013.08.19) for special processing at Zero-width space character
595 brks[posLast] = LINEBREAK_NOBREAK;
600 brks[posLast] = LINEBREAK_NOBREAK;
604 brks[posLast] = LINEBREAK_NOBREAK;
608 brks[posLast] = LINEBREAK_ALLOWBREAK;
615 lbcNew = resolve_lb_class(lbcNew, lang);
617 assert(lbcCur <= LBP_JT);
618 assert(lbcNew <= LBP_JT);
619 switch (baTable[lbcCur - 1][lbcNew - 1])
622 brks[posLast] = LINEBREAK_ALLOWBREAK;
626 if (lbcLast == LBP_SP)
628 brks[posLast] = LINEBREAK_ALLOWBREAK;
632 brks[posLast] = LINEBREAK_NOBREAK;
636 brks[posLast] = LINEBREAK_NOBREAK;
637 if (lbcLast != LBP_SP)
641 brks[posLast] = LINEBREAK_NOBREAK;
648 // TIZEN ONLY - START
649 if (lbcCur == LBP_ZW && !zw_flag)
654 ch = get_next_char(s, len, &posCur);
655 lbcCur = resolve_lb_class(get_char_lb_class_lang(ch, lbpLang), lang);
656 lbcNew = LBP_Undefined;
661 if (lbcCur == LBP_ZW)
662 brks[posLast] = LINEBREAK_ALLOWBREAK;
664 brks[posLast] = LINEBREAK_NOBREAK;
669 // TIZEN ONLY(20131106): For Hangul word wrap
672 case LBP_H2: /**< Hangul LV */
673 case LBP_H3: /**< Hangul LVT */
674 case LBP_JL: /**< Hangul L Jamo */
675 case LBP_JV: /**< Hangul V Jamo */
676 case LBP_JT: /**< Hangul T Jamo */
685 case LBP_H2: /**< Hangul LV */
686 case LBP_H3: /**< Hangul LVT */
687 case LBP_JL: /**< Hangul L Jamo */
688 case LBP_JV: /**< Hangul V Jamo */
689 case LBP_JT: /**< Hangul T Jamo */
700 brks[posLast] = LINEBREAK_NOBREAK;
705 brks[posLast] = LINEBREAK_NOBREAK;
709 brks[posLast] = LINEBREAK_NOBREAK;
713 brks[posLast] = LINEBREAK_ALLOWBREAK;
720 lbcNew = resolve_lb_class(lbcNew, lang);
722 assert(lbcCur <= LBP_JT);
723 assert(lbcNew <= LBP_JT);
724 switch (baTable[lbcCur - 1][lbcNew - 1])
727 brks[posLast] = LINEBREAK_ALLOWBREAK;
731 if (lbcLast == LBP_SP)
733 brks[posLast] = LINEBREAK_ALLOWBREAK;
737 brks[posLast] = LINEBREAK_NOBREAK;
741 brks[posLast] = LINEBREAK_NOBREAK;
742 if (lbcLast != LBP_SP)
746 brks[posLast] = LINEBREAK_NOBREAK;
754 assert(posLast == posCur - 1 && posCur <= len);
755 /* Break after the last character */
756 brks[posLast] = LINEBREAK_MUSTBREAK;
757 /* When the input contains incomplete sequences */
760 brks[posCur++] = LINEBREAK_INSIDEACHAR;
765 * Sets the line breaking information for a UTF-8 input string.
767 * @param[in] s input UTF-8 string
768 * @param[in] len length of the input
769 * @param[in] lang language of the input
770 * @param[out] brks pointer to the output breaking data, containing
771 * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
772 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
774 void set_linebreaks_utf8(
780 set_linebreaks(s, len, lang, brks,
781 (get_next_char_t)lb_get_next_char_utf8);
785 * Sets the line breaking information for a UTF-16 input string.
787 * @param[in] s input UTF-16 string
788 * @param[in] len length of the input
789 * @param[in] lang language of the input
790 * @param[out] brks pointer to the output breaking data, containing
791 * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
792 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
794 void set_linebreaks_utf16(
800 set_linebreaks(s, len, lang, brks,
801 (get_next_char_t)lb_get_next_char_utf16);
805 * Sets the line breaking information for a UTF-32 input string.
807 * @param[in] s input UTF-32 string
808 * @param[in] len length of the input
809 * @param[in] lang language of the input
810 * @param[out] brks pointer to the output breaking data, containing
811 * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
812 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
814 void set_linebreaks_utf32(
820 set_linebreaks(s, len, lang, brks,
821 (get_next_char_t)lb_get_next_char_utf32);
825 * Tells whether a line break can occur between two Unicode characters.
826 * This is a wrapper function to expose a simple interface. Generally
827 * speaking, it is better to use #set_linebreaks_utf32 instead, since
828 * complicated cases involving combining marks, spaces, etc. cannot be
829 * correctly processed.
831 * @param char1 the first Unicode character
832 * @param char2 the second Unicode character
833 * @param lang language of the input
834 * @return one of #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
835 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
837 int is_line_breakable(
846 set_linebreaks_utf32(s, 2, lang, brks);