X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=src%2Fkeysym-utf.c;h=77696725457d097739d2a5c6d83d2a72e3533cbe;hb=8cd688c06378ebdb2def6f310fb1e78898b75bde;hp=cb8f304a845f11a6d0b2031b576871afcdc4f63c;hpb=6992de408a890be7bd66f7d829dc84eb9c4b81a6;p=platform%2Fupstream%2Flibxkbcommon.git diff --git a/src/keysym-utf.c b/src/keysym-utf.c index cb8f304..7769672 100644 --- a/src/keysym-utf.c +++ b/src/keysym-utf.c @@ -34,14 +34,20 @@ * This software is in the public domain. Share and enjoy! * */ -#include + +#include "config.h" + #include "xkbcommon/xkbcommon.h" #include "utils.h" +#include "utf8.h" +/* We don't use the uint32_t types here, to save some space. */ struct codepair { - xkb_keysym_t keysym; + uint16_t keysym; uint16_t ucs; -} keysymtab[] = { +}; + +static const struct codepair keysymtab[] = { { 0x01a1, 0x0104 }, /* Aogonek Ą LATIN CAPITAL LETTER A WITH OGONEK */ { 0x01a2, 0x02d8 }, /* breve ˘ BREVE */ { 0x01a3, 0x0141 }, /* Lstroke Ł LATIN CAPITAL LETTER L WITH STROKE */ @@ -517,7 +523,7 @@ struct codepair { { 0x0aa8, 0x200a }, /* hairspace   HAIR SPACE */ { 0x0aa9, 0x2014 }, /* emdash — EM DASH */ { 0x0aaa, 0x2013 }, /* endash – EN DASH */ - /* 0x0aac signifblank ? ??? */ + { 0x0aac, 0x2423 }, /* signifblank ␣ OPEN BOX */ { 0x0aae, 0x2026 }, /* ellipsis … HORIZONTAL ELLIPSIS */ { 0x0aaf, 0x2025 }, /* doubbaselinedot ‥ TWO DOT LEADER */ { 0x0ab0, 0x2153 }, /* onethird ⅓ VULGAR FRACTION ONE THIRD */ @@ -530,9 +536,9 @@ struct codepair { { 0x0ab7, 0x215a }, /* fivesixths ⅚ VULGAR FRACTION FIVE SIXTHS */ { 0x0ab8, 0x2105 }, /* careof ℅ CARE OF */ { 0x0abb, 0x2012 }, /* figdash ‒ FIGURE DASH */ - { 0x0abc, 0x2329 }, /* leftanglebracket 〈 LEFT-POINTING ANGLE BRACKET */ + { 0x0abc, 0x27e8 }, /* leftanglebracket ⟨ MATHEMATICAL LEFT ANGLE BRACKET */ { 0x0abd, 0x002e }, /* decimalpoint . FULL STOP */ - { 0x0abe, 0x232a }, /* rightanglebracket 〉 RIGHT-POINTING ANGLE BRACKET */ + { 0x0abe, 0x27e9 }, /* rightanglebracket ⟩ MATHEMATICAL RIGHT ANGLE BRACKET */ /* 0x0abf marker ? ??? */ { 0x0ac3, 0x215b }, /* oneeighth ⅛ VULGAR FRACTION ONE EIGHTH */ { 0x0ac4, 0x215c }, /* threeeighths ⅜ VULGAR FRACTION THREE EIGHTHS */ @@ -550,6 +556,7 @@ struct codepair { { 0x0ad2, 0x201c }, /* leftdoublequotemark “ LEFT DOUBLE QUOTATION MARK */ { 0x0ad3, 0x201d }, /* rightdoublequotemark ” RIGHT DOUBLE QUOTATION MARK */ { 0x0ad4, 0x211e }, /* prescription ℞ PRESCRIPTION TAKE */ + { 0x0ad5, 0x2030 }, /* permille ‰ PER MILLE SIGN */ { 0x0ad6, 0x2032 }, /* minutes ′ PRIME */ { 0x0ad7, 0x2033 }, /* seconds ″ DOUBLE PRIME */ { 0x0ad9, 0x271d }, /* latincross ✝ LATIN CROSS */ @@ -607,8 +614,8 @@ struct codepair { { 0x0bd6, 0x222a }, /* downshoe ∪ UNION */ { 0x0bd8, 0x2283 }, /* rightshoe ⊃ SUPERSET OF */ { 0x0bda, 0x2282 }, /* leftshoe ⊂ SUBSET OF */ - { 0x0bdc, 0x22a2 }, /* lefttack ⊢ RIGHT TACK */ - { 0x0bfc, 0x22a3 }, /* righttack ⊣ LEFT TACK */ + { 0x0bdc, 0x22a3 }, /* lefttack ⊣ LEFT TACK */ + { 0x0bfc, 0x22a2 }, /* righttack ⊢ RIGHT TACK */ { 0x0cdf, 0x2017 }, /* hebrew_doublelowline ‗ DOUBLE LOW LINE */ { 0x0ce0, 0x05d0 }, /* hebrew_aleph א HEBREW LETTER ALEF */ { 0x0ce1, 0x05d1 }, /* hebrew_bet ב HEBREW LETTER BET */ @@ -803,7 +810,7 @@ struct codepair { { 0x0ef0, 0x3171 }, /* Hangul_SunkyeongeumMieum ㅱ HANGUL LETTER KAPYEOUNMIEUM */ { 0x0ef1, 0x3178 }, /* Hangul_SunkyeongeumPieub ㅸ HANGUL LETTER KAPYEOUNPIEUP */ { 0x0ef2, 0x317f }, /* Hangul_PanSios ㅿ HANGUL LETTER PANSIOS */ -/* 0x0ef3 Hangul_KkogjiDalrinIeung ? ??? */ + { 0x0ef3, 0x3181 }, /* Hangul_KkogjiDalrinIeung ㆁ HANGUL LETTER YESIEUNG */ { 0x0ef4, 0x3184 }, /* Hangul_SunkyeongeumPhieuf ㆄ HANGUL LETTER KAPYEOUNPHIEUPH */ { 0x0ef5, 0x3186 }, /* Hangul_YeorinHieuh ㆆ HANGUL LETTER YEORINHIEUH */ { 0x0ef6, 0x318d }, /* Hangul_AraeA ㆍ HANGUL LETTER ARAEA */ @@ -829,55 +836,95 @@ struct codepair { { 0x20aa, 0x20aa }, /* NewSheqelSign ₪ NEW SHEQEL SIGN */ { 0x20ab, 0x20ab }, /* DongSign ₫ DONG SIGN */ { 0x20ac, 0x20ac }, /* EuroSign € EURO SIGN */ - - { 0xff80, 0x0020 }, /* KP_Space SPACE */ - { 0xffaa, 0x002a }, /* KP_Multiply * ASTERISK */ - { 0xffab, 0x002b }, /* KP_Plus + PLUS SIGN */ - /* XXX: It's debatable what KP_Separator and KP_Decimal should represent, - * as well as locale-specific. So just enforce English colonial - * hegemony on the world for the time being. */ - { 0xffac, 0x002e }, /* KP_Separator . FULL STOP */ - { 0xffad, 0x002d }, /* KP_Subtract - HYPHEN-MINUS */ - { 0xffae, 0x002e }, /* KP_Decimal . FULL STOP */ - { 0xffaf, 0x002f }, /* KP_Divide / SOLIDUS */ - { 0xffbd, 0x003d }, /* KP_Equal = EQUAL SIGN */ }; -_X_EXPORT uint32_t -xkb_keysym_to_utf32(xkb_keysym_t keysym) +/* binary search with range check */ +static uint32_t +bin_search(const struct codepair *table, size_t length, xkb_keysym_t keysym) { - int min = 0; - int max = sizeof(keysymtab) / sizeof(struct codepair) - 1; - int mid; + size_t first = 0; + size_t last = length; + + if (keysym < table[0].keysym || keysym > table[length].keysym) + return 0; + + /* binary search in table */ + while (last >= first) { + size_t mid = (first + last) / 2; + if (table[mid].keysym < keysym) + first = mid + 1; + else if (table[mid].keysym > keysym) + last = mid - 1; + else /* found it */ + return table[mid].ucs; + } + /* no matching Unicode value found in table */ + return 0; +} + +XKB_EXPORT uint32_t +xkb_keysym_to_utf32(xkb_keysym_t keysym) +{ /* first check for Latin-1 characters (1:1 mapping) */ if ((keysym >= 0x0020 && keysym <= 0x007e) || (keysym >= 0x00a0 && keysym <= 0x00ff)) return keysym; - if (keysym >= 0xffb0 && keysym <= 0xffb9) - return keysym - (0xffb0 - 0x0030); + /* patch encoding botch */ + if (keysym == XKB_KEY_KP_Space) + return XKB_KEY_space & 0x7f; - /* also check for directly encoded 24-bit UCS characters */ - if ((keysym & 0xff000000) == 0x01000000) - return keysym & 0x00ffffff; + /* special keysyms */ + if ((keysym >= XKB_KEY_BackSpace && keysym <= XKB_KEY_Clear) || + (keysym >= XKB_KEY_KP_Multiply && keysym <= XKB_KEY_KP_9) || + keysym == XKB_KEY_Return || keysym == XKB_KEY_Escape || + keysym == XKB_KEY_Delete || keysym == XKB_KEY_KP_Tab || + keysym == XKB_KEY_KP_Enter || keysym == XKB_KEY_KP_Equal) + return keysym & 0x7f; - /* binary search in table */ - while (max >= min) { - mid = (min + max) / 2; - if (keysymtab[mid].keysym < keysym) - min = mid + 1; - else if (keysymtab[mid].keysym > keysym) - max = mid - 1; - else /* found it */ - return keysymtab[mid].ucs; - } + /* also check for directly encoded Unicode codepoints */ + /* + * In theory, this is supposed to start from 0x100100, such that the ASCII + * range, which is already covered by 0x00-0xff, can't be encoded in two + * ways. However, changing this after a couple of decades probably won't + * go well, so it stays as it is. + */ + if (0x01000000 <= keysym && keysym <= 0x0110ffff) + return keysym - 0x01000000; - /* no matching Unicode value found */ - return 0; + /* search main table */ + return bin_search(keysymtab, ARRAY_SIZE(keysymtab) - 1, keysym); } +XKB_EXPORT xkb_keysym_t +xkb_utf32_to_keysym(uint32_t ucs) +{ + /* first check for Latin-1 characters (1:1 mapping) */ + if ((ucs >= 0x0020 && ucs <= 0x007e) || + (ucs >= 0x00a0 && ucs <= 0x00ff)) + return ucs; + /* special keysyms */ + if ((ucs >= (XKB_KEY_BackSpace & 0x7f) && ucs <= (XKB_KEY_Clear & 0x7f)) || + ucs == (XKB_KEY_Return & 0x7f) || ucs == (XKB_KEY_Escape & 0x7f)) + return ucs | 0xff00; + if (ucs == (XKB_KEY_Delete & 0x7f)) + return XKB_KEY_Delete; + + /* Unicode non-symbols and code points outside Unicode planes */ + if ((ucs >= 0xfdd0 && ucs <= 0xfdef) || + ucs > 0x10ffff || (ucs & 0xfffe) == 0xfffe) + return XKB_KEY_NoSymbol; + + /* search main table */ + for (size_t i = 0; i < ARRAY_SIZE(keysymtab); i++) + if (keysymtab[i].ucs == ucs) + return keysymtab[i].keysym; + + /* Use direct encoding if everything else fails */ + return ucs | 0x01000000; +} /* * Copyright © 2012 Intel Corporation @@ -904,43 +951,7 @@ xkb_keysym_to_utf32(xkb_keysym_t keysym) * Author: Rob Bradford */ -static int -utf32_to_utf8(uint32_t unichar, char *buffer) -{ - int count, shift, length; - uint8_t head; - - if (unichar <= 0x007f) { - buffer[0] = unichar; - buffer[1] = '\0'; - return 2; - } else if (unichar <= 0x07FF) { - length = 2; - head = 0xc0; - } else if (unichar <= 0xffff) { - length = 3; - head = 0xe0; - } else if (unichar <= 0x1fffff) { - length = 4; - head = 0xf0; - } else if (unichar <= 0x3ffffff) { - length = 5; - head = 0xf8; - } else { - length = 6; - head = 0xfc; - } - - for (count = length - 1, shift = 0; count > 0; count--, shift += 6) - buffer[count] = 0x80 | ((unichar >> shift) & 0x3f); - - buffer[0] = head | ((unichar >> shift) & 0x3f); - buffer[length] = '\0'; - - return length + 1; -} - -_X_EXPORT int +XKB_EXPORT int xkb_keysym_to_utf8(xkb_keysym_t keysym, char *buffer, size_t size) { uint32_t codepoint;