X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=text%2Fdali%2Finternal%2Flibunibreak%2Flinebreak.c;h=2b28f0567ce29cc14ee56017d2c363acd5fb70d0;hb=c6d52122583816fcea9233ca1e99b3567a6a8fe4;hp=62a81046fce75722fa4dd5f6eaec16c70403d2e4;hpb=602d26fdc2196b419a84ce32a61f0814f66a64bf;p=platform%2Fcore%2Fuifw%2Fdali-adaptor.git diff --git a/text/dali/internal/libunibreak/linebreak.c b/text/dali/internal/libunibreak/linebreak.c index 62a8104..2b28f05 100644 --- a/text/dali/internal/libunibreak/linebreak.c +++ b/text/dali/internal/libunibreak/linebreak.c @@ -4,7 +4,7 @@ * Line breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2008-2013 Wu Yongwei + * Copyright (C) 2008-2015 Wu Yongwei * Copyright (C) 2013 Petr Filipsky * * This software is provided 'as-is', without any express or implied @@ -31,9 +31,9 @@ * Unicode 5.0.0: * * - * This library has been updated according to Revision 30, for - * Unicode 6.2.0: - * + * This library has been updated according to Revision 33, for + * Unicode 7.0.0: + * * * The Unicode Terms of Use are available at * @@ -45,7 +45,7 @@ * Implementation of the line breaking algorithm as described in Unicode * Standard Annex 14. * - * @version 2.5, 2013/11/14 + * @version 2.7, 2015/04/18 * @author Wu Yongwei * @author Petr Filipsky */ @@ -67,11 +67,6 @@ #define LINEBREAK_INDEX_SIZE 40 /** - * Version number of the library. - */ -const int linebreak_version = LINEBREAK_VERSION; - -/** * Enumeration of break actions. They are used in the break action * pair table below. */ @@ -451,7 +446,7 @@ static enum LineBreakClass resolve_lb_class( * @post \a lbpCtx->lbcCur has the updated line break class */ static void treat_first_char( - struct LineBreakContext* lbpCtx) + struct LineBreakContext *lbpCtx) { switch (lbpCtx->lbcCur) { @@ -465,6 +460,8 @@ static void treat_first_char( case LBP_SP: lbpCtx->lbcCur = LBP_WJ; /* Leading space treated as WJ */ break; + case LBP_HL: + lbpCtx->fLb21aHebrew = 1; /* Rule LB21a */ default: break; } @@ -485,7 +482,7 @@ static void treat_first_char( * table lookup is needed */ static int get_lb_result_simple( - struct LineBreakContext* lbpCtx) + struct LineBreakContext *lbpCtx) { if (lbpCtx->lbcCur == LBP_BK || (lbpCtx->lbcCur == LBP_CR && lbpCtx->lbcNew != LBP_LF)) @@ -528,14 +525,46 @@ static int get_lb_result_simple( * #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK */ static int get_lb_result_lookup( - struct LineBreakContext* lbpCtx) + struct LineBreakContext *lbpCtx) { - /* TODO: Rule LB21a, as introduced by Revision 28 of UAX#14, is not - * yet implemented below. */ int brk = LINEBREAK_UNDEFINED; + assert((lbpCtx->lbcCur > 0) && (lbpCtx->lbcCur <= LBP_RI)); assert((lbpCtx->lbcNew > 0) && (lbpCtx->lbcNew <= LBP_RI)); - switch (baTable[lbpCtx->lbcCur - 1][lbpCtx->lbcNew - 1]) + + /* Fix for Hangul word wrap */ + enum LineBreakClass lbcCur, lbcNew; + + switch (lbpCtx->lbcCur) + { + case LBP_H2: /**< Hangul LV */ + case LBP_H3: /**< Hangul LVT */ + case LBP_JL: /**< Hangul L Jamo */ + case LBP_JV: /**< Hangul V Jamo */ + case LBP_JT: /**< Hangul T Jamo */ + lbcCur = LBP_AL; + break; + default: + lbcCur = lbpCtx->lbcCur; + break; + } + + switch (lbpCtx->lbcNew) + { + case LBP_H2: /**< Hangul LV */ + case LBP_H3: /**< Hangul LVT */ + case LBP_JL: /**< Hangul L Jamo */ + case LBP_JV: /**< Hangul V Jamo */ + case LBP_JT: /**< Hangul T Jamo */ + lbcNew = LBP_AL; + break; + default: + lbcNew = lbpCtx->lbcNew; + break; + } + + switch (baTable[lbcCur - 1][lbcNew - 1]) + /* END */ { case DIR_BRK: brk = LINEBREAK_ALLOWBREAK; @@ -555,6 +584,19 @@ static int get_lb_result_lookup( brk = LINEBREAK_NOBREAK; break; } + + /* Special processing due to rule LB21a */ + if (lbpCtx->fLb21aHebrew && + (lbpCtx->lbcCur == LBP_HY || lbpCtx->lbcCur == LBP_BA)) + { + brk = LINEBREAK_NOBREAK; + lbpCtx->fLb21aHebrew = 0; + } + else if (!(lbpCtx->lbcNew == LBP_HY || lbpCtx->lbcNew == LBP_BA)) + { + lbpCtx->fLb21aHebrew = (lbpCtx->lbcNew == LBP_HL); + } + lbpCtx->lbcCur = lbpCtx->lbcNew; return brk; } @@ -568,9 +610,9 @@ static int get_lb_result_lookup( * @post the line breaking context is initialized */ void lb_init_break_context( - struct LineBreakContext* lbpCtx, + struct LineBreakContext *lbpCtx, utf32_t ch, - const char* lang) + const char *lang) { lbpCtx->lang = lang; lbpCtx->lbpLang = get_lb_prop_lang(lang); @@ -579,6 +621,7 @@ void lb_init_break_context( lbpCtx->lbcCur = resolve_lb_class( get_char_lb_class_lang(ch, lbpCtx->lbpLang), lbpCtx->lang); + lbpCtx->fLb21aHebrew = 0; treat_first_char(lbpCtx); } @@ -593,7 +636,7 @@ void lb_init_break_context( * @post the line breaking context is updated */ int lb_process_next_char( - struct LineBreakContext* lbpCtx, + struct LineBreakContext *lbpCtx, utf32_t ch ) { int brk; @@ -618,127 +661,6 @@ int lb_process_next_char( } /** - * Gets the next Unicode character in a UTF-8 sequence. The index will - * be advanced to the next complete character, unless the end of string - * is reached in the middle of a UTF-8 sequence. - * - * @param[in] s input UTF-8 string - * @param[in] len length of the string in bytes - * @param[in,out] ip pointer to the index - * @return the Unicode character beginning at the index; or - * #EOS if end of input is encountered - */ -utf32_t lb_get_next_char_utf8( - const utf8_t *s, - size_t len, - size_t *ip) -{ - utf8_t ch; - utf32_t res; - - assert(*ip <= len); - if (*ip == len) - return EOS; - ch = s[*ip]; - - if (ch < 0xC2 || ch > 0xF4) - { /* One-byte sequence, tail (should not occur), or invalid */ - *ip += 1; - return ch; - } - else if (ch < 0xE0) - { /* Two-byte sequence */ - if (*ip + 2 > len) - return EOS; - res = ((ch & 0x1F) << 6) + (s[*ip + 1] & 0x3F); - *ip += 2; - return res; - } - else if (ch < 0xF0) - { /* Three-byte sequence */ - if (*ip + 3 > len) - return EOS; - res = ((ch & 0x0F) << 12) + - ((s[*ip + 1] & 0x3F) << 6) + - ((s[*ip + 2] & 0x3F)); - *ip += 3; - return res; - } - else - { /* Four-byte sequence */ - if (*ip + 4 > len) - return EOS; - res = ((ch & 0x07) << 18) + - ((s[*ip + 1] & 0x3F) << 12) + - ((s[*ip + 2] & 0x3F) << 6) + - ((s[*ip + 3] & 0x3F)); - *ip += 4; - return res; - } -} - -/** - * Gets the next Unicode character in a UTF-16 sequence. The index will - * be advanced to the next complete character, unless the end of string - * is reached in the middle of a UTF-16 surrogate pair. - * - * @param[in] s input UTF-16 string - * @param[in] len length of the string in words - * @param[in,out] ip pointer to the index - * @return the Unicode character beginning at the index; or - * #EOS if end of input is encountered - */ -utf32_t lb_get_next_char_utf16( - const utf16_t *s, - size_t len, - size_t *ip) -{ - utf16_t ch; - - assert(*ip <= len); - if (*ip == len) - return EOS; - ch = s[(*ip)++]; - - if (ch < 0xD800 || ch > 0xDBFF) - { /* If the character is not a high surrogate */ - return ch; - } - if (*ip == len) - { /* If the input ends here (an error) */ - --(*ip); - return EOS; - } - if (s[*ip] < 0xDC00 || s[*ip] > 0xDFFF) - { /* If the next character is not the low surrogate (an error) */ - return ch; - } - /* Return the constructed character and advance the index again */ - return (((utf32_t)ch & 0x3FF) << 10) + (s[(*ip)++] & 0x3FF) + 0x10000; -} - -/** - * Gets the next Unicode character in a UTF-32 sequence. The index will - * be advanced to the next character. - * - * @param[in] s input UTF-32 string - * @param[in] len length of the string in dwords - * @param[in,out] ip pointer to the index - * @return the Unicode character beginning at the index; or - * #EOS if end of input is encountered - */ -utf32_t lb_get_next_char_utf32( - const utf32_t *s, - size_t len, - size_t *ip) -{ - assert(*ip <= len); - if (*ip == len) - return EOS; - return s[(*ip)++]; -} - -/** * Sets the line breaking information for a generic input string. * * @param[in] s input string @@ -809,7 +731,7 @@ void set_linebreaks_utf8( char *brks) { set_linebreaks(s, len, lang, brks, - (get_next_char_t)lb_get_next_char_utf8); + (get_next_char_t)ub_get_next_char_utf8); } /** @@ -829,7 +751,7 @@ void set_linebreaks_utf16( char *brks) { set_linebreaks(s, len, lang, brks, - (get_next_char_t)lb_get_next_char_utf16); + (get_next_char_t)ub_get_next_char_utf16); } /** @@ -849,7 +771,7 @@ void set_linebreaks_utf32( char *brks) { set_linebreaks(s, len, lang, brks, - (get_next_char_t)lb_get_next_char_utf32); + (get_next_char_t)ub_get_next_char_utf32); } /** @@ -868,7 +790,7 @@ void set_linebreaks_utf32( int is_line_breakable( utf32_t char1, utf32_t char2, - const char* lang) + const char *lang) { utf32_t s[2]; char brks[2];