text/dali/internal/libunibreak/linebreakdef.h

   1 /* vim: set tabstop=4 shiftwidth=4: */
   2
   3 /*
   4  * Line breaking in a Unicode sequence.  Designed to be used in a
   5  * generic text renderer.
   6  *
   7  * Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
   8  *
   9  * This software is provided 'as-is', without any express or implied
  10  * warranty.  In no event will the author be held liable for any damages
  11  * arising from the use of this software.
  12  *
  13  * Permission is granted to anyone to use this software for any purpose,
  14  * including commercial applications, and to alter it and redistribute
  15  * it freely, subject to the following restrictions:
  16  *
  17  * 1. The origin of this software must not be misrepresented; you must
  18  *    not claim that you wrote the original software.  If you use this
  19  *    software in a product, an acknowledgement in the product
  20  *    documentation would be appreciated but is not required.
  21  * 2. Altered source versions must be plainly marked as such, and must
  22  *    not be misrepresented as being the original software.
  23  * 3. This notice may not be removed or altered from any source
  24  *    distribution.
  25  *
  26  * The main reference is Unicode Standard Annex 14 (UAX #14):
  27  *              <URL:http://www.unicode.org/reports/tr14/>
  28  *
  29  * When this library was designed, this annex was at Revision 19, for
  30  * Unicode 5.0.0:
  31  *              <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
  32  *
  33  * This library has been updated according to Revision 24, for
  34  * Unicode 5.2.0:
  35  *              <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
  36  *
  37  * The Unicode Terms of Use are available at
  38  *              <URL:http://www.unicode.org/copyright.html>
  39  */
  40
  41 /**
  42  * @file        linebreakdef.h
  43  *
  44  * Definitions of internal data structures, declarations of global
  45  * variables, and function prototypes for the line breaking algorithm.
  46  *
  47  * @version     2.0, 2010/01/03
  48  * @author      Wu Yongwei
  49  */
  50
  51 /**
  52  * Constant value to mark the end of string.  It is not a valid Unicode
  53  * character.
  54  */
  55 #define EOS 0xFFFF
  56
  57 /**
  58  * Line break classes.  This is a direct mapping of Table 1 of Unicode
  59  * Standard Annex 14, Revision 19.
  60  */
  61 enum LineBreakClass
  62 {
  63         /* This is used to signal an error condition. */
  64         LBP_Undefined,  /**< Undefined */
  65
  66         /* The following break classes are treated in the pair table. */
  67         LBP_OP,                 /**< Opening punctuation */
  68         LBP_CL,                 /**< Closing punctuation */
  69         LBP_CP,                 /**< Closing parenthesis */
  70         LBP_QU,                 /**< Ambiguous quotation */
  71         LBP_GL,                 /**< Glue */
  72         LBP_NS,                 /**< Non-starters */
  73         LBP_EX,                 /**< Exclamation/Interrogation */
  74         LBP_SY,                 /**< Symbols allowing break after */
  75         LBP_IS,                 /**< Infix separator */
  76         LBP_PR,                 /**< Prefix */
  77         LBP_PO,                 /**< Postfix */
  78         LBP_NU,                 /**< Numeric */
  79         LBP_AL,                 /**< Alphabetic */
  80         LBP_ID,                 /**< Ideographic */
  81         LBP_IN,                 /**< Inseparable characters */
  82         LBP_HY,                 /**< Hyphen */
  83         LBP_BA,                 /**< Break after */
  84         LBP_BB,                 /**< Break before */
  85         LBP_B2,                 /**< Break on either side (but not pair) */
  86         LBP_ZW,                 /**< Zero-width space */
  87         LBP_CM,                 /**< Combining marks */
  88         LBP_WJ,                 /**< Word joiner */
  89         LBP_H2,                 /**< Hangul LV */
  90         LBP_H3,                 /**< Hangul LVT */
  91         LBP_JL,                 /**< Hangul L Jamo */
  92         LBP_JV,                 /**< Hangul V Jamo */
  93         LBP_JT,                 /**< Hangul T Jamo */
  94
  95         /* The following break classes are not treated in the pair table */
  96         LBP_AI,                 /**< Ambiguous (alphabetic or ideograph) */
  97         LBP_BK,                 /**< Break (mandatory) */
  98         LBP_CB,                 /**< Contingent break */
  99         LBP_CR,                 /**< Carriage return */
 100         LBP_LF,                 /**< Line feed */
 101         LBP_NL,                 /**< Next line */
 102         LBP_SA,                 /**< South-East Asian */
 103         LBP_SG,                 /**< Surrogates */
 104         LBP_SP,                 /**< Space */
 105         LBP_XX                  /**< Unknown */
 106 };
 107
 108 /**
 109  * Struct for entries of line break properties.  The array of the
 110  * entries \e must be sorted.
 111  */
 112 struct LineBreakProperties
 113 {
 114         utf32_t start;                          /**< Starting coding point */
 115         utf32_t end;                            /**< End coding point */
 116         enum LineBreakClass prop;       /**< The line breaking property */
 117 };
 118
 119 /**
 120  * Struct for association of language-specific line breaking properties
 121  * with language names.
 122  */
 123 struct LineBreakPropertiesLang
 124 {
 125         const char *lang;                                       /**< Language name */
 126         size_t namelen;                                         /**< Length of name to match */
 127         struct LineBreakProperties *lbp;        /**< Pointer to associated data */
 128 };
 129
 130 /**
 131  * Abstract function interface for #lb_get_next_char_utf8,
 132  * #lb_get_next_char_utf16, and #lb_get_next_char_utf32.
 133  */
 134 typedef utf32_t (*get_next_char_t)(const void *, size_t, size_t *);
 135
 136 /* Declarations */
 137 extern struct LineBreakProperties lb_prop_default[];
 138 extern struct LineBreakPropertiesLang lb_prop_lang_map[];
 139
 140 /* Function Prototype */
 141 utf32_t lb_get_next_char_utf8(const utf8_t *s, size_t len, size_t *ip);
 142 utf32_t lb_get_next_char_utf16(const utf16_t *s, size_t len, size_t *ip);
 143 utf32_t lb_get_next_char_utf32(const utf32_t *s, size_t len, size_t *ip);
 144 void set_linebreaks(
 145                 const void *s,
 146                 size_t len,
 147                 const char *lang,
 148                 char *brks,
 149                 get_next_char_t get_next_char);