1 /* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
4 * Word breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer.
7 * Copyright (C) 2013-2015 Tom Hacohen <tom at stosb dot com>
9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages
11 * arising from the use of this software.
13 * Permission is granted to anyone to use this software for any purpose,
14 * including commercial applications, and to alter it and redistribute
15 * it freely, subject to the following restrictions:
17 * 1. The origin of this software must not be misrepresented; you must
18 * not claim that you wrote the original software. If you use this
19 * software in a product, an acknowledgement in the product
20 * documentation would be appreciated but is not required.
21 * 2. Altered source versions must be plainly marked as such, and must
22 * not be misrepresented as being the original software.
23 * 3. This notice may not be removed or altered from any source
26 * The main reference is Unicode Standard Annex 29 (UAX #29):
27 * <URL:http://unicode.org/reports/tr29>
29 * When this library was designed, this annex was at Revision 17, for
31 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
33 * This library has been updated according to Revision 25, for
35 * <URL:http://www.unicode.org/reports/tr29/tr29-25.html>
37 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html>
44 * Header file for the word breaking (segmentation) algorithm.
46 * @version 2.5, 2015/04/18
54 #include "unibreakbase.h"
60 #define WORDBREAK_BREAK 0 /**< Break is allowed */
61 #define WORDBREAK_NOBREAK 1 /**< No break is allowed */
62 #define WORDBREAK_INSIDEACHAR 2 /**< A UTF-8/16 sequence is unfinished */
64 void init_wordbreak(void);
65 void set_wordbreaks_utf8(
66 const utf8_t *s, size_t len, const char* lang, char *brks);
67 void set_wordbreaks_utf16(
68 const utf16_t *s, size_t len, const char* lang, char *brks);
69 void set_wordbreaks_utf32(
70 const utf32_t *s, size_t len, const char* lang, char *brks);