X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=src%2Fkeysym-utf.c;h=0bb9a4f1cd0996ffd5890ef488ee0a75bcea9399;hb=1608c9d4a753e66c7e46dc2a5be69079470ec75b;hp=b1f6f155ec6ece4040a9bf129850974c72c0621d;hpb=239a5be10e7bd248e64d4da62c192c01bd93bd60;p=platform%2Fupstream%2Flibxkbcommon.git diff --git a/src/keysym-utf.c b/src/keysym-utf.c index b1f6f15..0bb9a4f 100644 --- a/src/keysym-utf.c +++ b/src/keysym-utf.c @@ -35,8 +35,13 @@ * */ +#include "config.h" + #include "xkbcommon/xkbcommon.h" #include "utils.h" +#include "utf8.h" + +#define NO_KEYSYM_UNICODE_CONVERSION 0 /* We don't use the uint32_t types here, to save some space. */ struct codepair { @@ -44,7 +49,7 @@ struct codepair { uint16_t ucs; }; -const struct codepair keysymtab[] = { +static const struct codepair keysymtab[] = { { 0x01a1, 0x0104 }, /* Aogonek Ą LATIN CAPITAL LETTER A WITH OGONEK */ { 0x01a2, 0x02d8 }, /* breve ˘ BREVE */ { 0x01a3, 0x0141 }, /* Lstroke Ł LATIN CAPITAL LETTER L WITH STROKE */ @@ -520,7 +525,7 @@ const struct codepair keysymtab[] = { { 0x0aa8, 0x200a }, /* hairspace   HAIR SPACE */ { 0x0aa9, 0x2014 }, /* emdash — EM DASH */ { 0x0aaa, 0x2013 }, /* endash – EN DASH */ - /* 0x0aac signifblank ? ??? */ + { 0x0aac, 0x2423 }, /* signifblank ␣ OPEN BOX */ { 0x0aae, 0x2026 }, /* ellipsis … HORIZONTAL ELLIPSIS */ { 0x0aaf, 0x2025 }, /* doubbaselinedot ‥ TWO DOT LEADER */ { 0x0ab0, 0x2153 }, /* onethird ⅓ VULGAR FRACTION ONE THIRD */ @@ -533,9 +538,9 @@ const struct codepair keysymtab[] = { { 0x0ab7, 0x215a }, /* fivesixths ⅚ VULGAR FRACTION FIVE SIXTHS */ { 0x0ab8, 0x2105 }, /* careof ℅ CARE OF */ { 0x0abb, 0x2012 }, /* figdash ‒ FIGURE DASH */ - { 0x0abc, 0x2329 }, /* leftanglebracket 〈 LEFT-POINTING ANGLE BRACKET */ + { 0x0abc, 0x27e8 }, /* leftanglebracket ⟨ MATHEMATICAL LEFT ANGLE BRACKET */ { 0x0abd, 0x002e }, /* decimalpoint . FULL STOP */ - { 0x0abe, 0x232a }, /* rightanglebracket 〉 RIGHT-POINTING ANGLE BRACKET */ + { 0x0abe, 0x27e9 }, /* rightanglebracket ⟩ MATHEMATICAL RIGHT ANGLE BRACKET */ /* 0x0abf marker ? ??? */ { 0x0ac3, 0x215b }, /* oneeighth ⅛ VULGAR FRACTION ONE EIGHTH */ { 0x0ac4, 0x215c }, /* threeeighths ⅜ VULGAR FRACTION THREE EIGHTHS */ @@ -553,6 +558,7 @@ const struct codepair keysymtab[] = { { 0x0ad2, 0x201c }, /* leftdoublequotemark “ LEFT DOUBLE QUOTATION MARK */ { 0x0ad3, 0x201d }, /* rightdoublequotemark ” RIGHT DOUBLE QUOTATION MARK */ { 0x0ad4, 0x211e }, /* prescription ℞ PRESCRIPTION TAKE */ + { 0x0ad5, 0x2030 }, /* permille ‰ PER MILLE SIGN */ { 0x0ad6, 0x2032 }, /* minutes ′ PRIME */ { 0x0ad7, 0x2033 }, /* seconds ″ DOUBLE PRIME */ { 0x0ad9, 0x271d }, /* latincross ✝ LATIN CROSS */ @@ -610,8 +616,8 @@ const struct codepair keysymtab[] = { { 0x0bd6, 0x222a }, /* downshoe ∪ UNION */ { 0x0bd8, 0x2283 }, /* rightshoe ⊃ SUPERSET OF */ { 0x0bda, 0x2282 }, /* leftshoe ⊂ SUBSET OF */ - { 0x0bdc, 0x22a2 }, /* lefttack ⊢ RIGHT TACK */ - { 0x0bfc, 0x22a3 }, /* righttack ⊣ LEFT TACK */ + { 0x0bdc, 0x22a3 }, /* lefttack ⊣ LEFT TACK */ + { 0x0bfc, 0x22a2 }, /* righttack ⊢ RIGHT TACK */ { 0x0cdf, 0x2017 }, /* hebrew_doublelowline ‗ DOUBLE LOW LINE */ { 0x0ce0, 0x05d0 }, /* hebrew_aleph א HEBREW LETTER ALEF */ { 0x0ce1, 0x05d1 }, /* hebrew_bet ב HEBREW LETTER BET */ @@ -806,7 +812,7 @@ const struct codepair keysymtab[] = { { 0x0ef0, 0x3171 }, /* Hangul_SunkyeongeumMieum ㅱ HANGUL LETTER KAPYEOUNMIEUM */ { 0x0ef1, 0x3178 }, /* Hangul_SunkyeongeumPieub ㅸ HANGUL LETTER KAPYEOUNPIEUP */ { 0x0ef2, 0x317f }, /* Hangul_PanSios ㅿ HANGUL LETTER PANSIOS */ -/* 0x0ef3 Hangul_KkogjiDalrinIeung ? ??? */ + { 0x0ef3, 0x3181 }, /* Hangul_KkogjiDalrinIeung ㆁ HANGUL LETTER YESIEUNG */ { 0x0ef4, 0x3184 }, /* Hangul_SunkyeongeumPhieuf ㆄ HANGUL LETTER KAPYEOUNPHIEUPH */ { 0x0ef5, 0x3186 }, /* Hangul_YeorinHieuh ㆆ HANGUL LETTER YEORINHIEUH */ { 0x0ef6, 0x318d }, /* Hangul_AraeA ㆍ HANGUL LETTER ARAEA */ @@ -815,92 +821,106 @@ const struct codepair keysymtab[] = { { 0x0ef9, 0x11f0 }, /* Hangul_J_KkogjiDalrinIeung ᇰ HANGUL JONGSEONG YESIEUNG */ { 0x0efa, 0x11f9 }, /* Hangul_J_YeorinHieuh ᇹ HANGUL JONGSEONG YEORINHIEUH */ { 0x0eff, 0x20a9 }, /* Korean_Won ₩ WON SIGN */ - { 0x13a4, 0x20ac }, /* Euro € EURO SIGN */ { 0x13bc, 0x0152 }, /* OE Œ LATIN CAPITAL LIGATURE OE */ { 0x13bd, 0x0153 }, /* oe œ LATIN SMALL LIGATURE OE */ { 0x13be, 0x0178 }, /* Ydiaeresis Ÿ LATIN CAPITAL LETTER Y WITH DIAERESIS */ - { 0x20a0, 0x20a0 }, /* EcuSign ₠ EURO-CURRENCY SIGN */ - { 0x20a1, 0x20a1 }, /* ColonSign ₡ COLON SIGN */ - { 0x20a2, 0x20a2 }, /* CruzeiroSign ₢ CRUZEIRO SIGN */ - { 0x20a3, 0x20a3 }, /* FFrancSign ₣ FRENCH FRANC SIGN */ - { 0x20a4, 0x20a4 }, /* LiraSign ₤ LIRA SIGN */ - { 0x20a5, 0x20a5 }, /* MillSign ₥ MILL SIGN */ - { 0x20a6, 0x20a6 }, /* NairaSign ₦ NAIRA SIGN */ - { 0x20a7, 0x20a7 }, /* PesetaSign ₧ PESETA SIGN */ - { 0x20a8, 0x20a8 }, /* RupeeSign ₨ RUPEE SIGN */ - { 0x20a9, 0x20a9 }, /* WonSign ₩ WON SIGN */ - { 0x20aa, 0x20aa }, /* NewSheqelSign ₪ NEW SHEQEL SIGN */ - { 0x20ab, 0x20ab }, /* DongSign ₫ DONG SIGN */ { 0x20ac, 0x20ac }, /* EuroSign € EURO SIGN */ }; -const struct codepair keysymtab_kp[] = { - { 0xff80, 0x0020 }, /* KP_Space SPACE */ - { 0xffaa, 0x002a }, /* KP_Multiply * ASTERISK */ - { 0xffab, 0x002b }, /* KP_Plus + PLUS SIGN */ - /* XXX: It's debatable what KP_Separator and KP_Decimal should represent, - * as well as locale-specific. So just enforce English colonial - * hegemony on the world for the time being. */ - { 0xffac, 0x002e }, /* KP_Separator . FULL STOP */ - { 0xffad, 0x002d }, /* KP_Subtract - HYPHEN-MINUS */ - { 0xffae, 0x002e }, /* KP_Decimal . FULL STOP */ - { 0xffaf, 0x002f }, /* KP_Divide / SOLIDUS */ - { 0xffbd, 0x003d }, /* KP_Equal = EQUAL SIGN */ -}; - /* binary search with range check */ static uint32_t bin_search(const struct codepair *table, size_t length, xkb_keysym_t keysym) { - int min = 0; - int max = length; - int mid; + size_t first = 0; + size_t last = length; if (keysym < table[0].keysym || keysym > table[length].keysym) return 0; /* binary search in table */ - while (max >= min) { - mid = (min + max) / 2; + while (last >= first) { + size_t mid = (first + last) / 2; if (table[mid].keysym < keysym) - min = mid + 1; + first = mid + 1; else if (table[mid].keysym > keysym) - max = mid - 1; + last = mid - 1; else /* found it */ return table[mid].ucs; } /* no matching Unicode value found in table */ - return 0; + return NO_KEYSYM_UNICODE_CONVERSION; } -#define N_ELEMENTS(x) sizeof(x) / sizeof(x[0]) - XKB_EXPORT uint32_t xkb_keysym_to_utf32(xkb_keysym_t keysym) { - uint32_t retval = 0; - /* first check for Latin-1 characters (1:1 mapping) */ if ((keysym >= 0x0020 && keysym <= 0x007e) || (keysym >= 0x00a0 && keysym <= 0x00ff)) return keysym; - if (keysym >= 0xffb0 && keysym <= 0xffb9) - return keysym - (0xffb0 - 0x0030); + /* patch encoding botch */ + if (keysym == XKB_KEY_KP_Space) + return XKB_KEY_space & 0x7f; + + /* special keysyms */ + if ((keysym >= XKB_KEY_BackSpace && keysym <= XKB_KEY_Clear) || + (keysym >= XKB_KEY_KP_Multiply && keysym <= XKB_KEY_KP_9) || + keysym == XKB_KEY_Return || keysym == XKB_KEY_Escape || + keysym == XKB_KEY_Delete || keysym == XKB_KEY_KP_Tab || + keysym == XKB_KEY_KP_Enter || keysym == XKB_KEY_KP_Equal) + return keysym & 0x7f; + + /* also check for directly encoded Unicode codepoints */ + + /* Exclude surrogates: they are invalid in UTF-32. + * See https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G28875 + * for further details. + */ + if (0x0100d800 <= keysym && keysym <= 0x0100dfff) + return NO_KEYSYM_UNICODE_CONVERSION; + /* + * In theory, this is supposed to start from 0x100100, such that the ASCII + * range, which is already covered by 0x00-0xff, can't be encoded in two + * ways. However, changing this after a couple of decades probably won't + * go well, so it stays as it is. + */ + if (0x01000000 <= keysym && keysym <= 0x0110ffff) + return keysym - 0x01000000; + + /* search main table */ + return bin_search(keysymtab, ARRAY_SIZE(keysymtab) - 1, keysym); +} + +XKB_EXPORT xkb_keysym_t +xkb_utf32_to_keysym(uint32_t ucs) +{ + /* first check for Latin-1 characters (1:1 mapping) */ + if ((ucs >= 0x0020 && ucs <= 0x007e) || + (ucs >= 0x00a0 && ucs <= 0x00ff)) + return ucs; - /* also check for directly encoded 24-bit UCS characters */ - if ((keysym & 0xff000000) == 0x01000000) - return keysym & 0x00ffffff; + /* special keysyms */ + if ((ucs >= (XKB_KEY_BackSpace & 0x7f) && ucs <= (XKB_KEY_Clear & 0x7f)) || + ucs == (XKB_KEY_Return & 0x7f) || ucs == (XKB_KEY_Escape & 0x7f)) + return ucs | 0xff00; + if (ucs == (XKB_KEY_Delete & 0x7f)) + return XKB_KEY_Delete; - /* search smaller keypad table */ - retval = bin_search(keysymtab_kp, N_ELEMENTS(keysymtab_kp) - 1, keysym); + /* Unicode non-symbols and code points outside Unicode planes */ + if ((ucs >= 0xd800 && ucs <= 0xdfff) || + (ucs >= 0xfdd0 && ucs <= 0xfdef) || + ucs > 0x10ffff || (ucs & 0xfffe) == 0xfffe) + return XKB_KEY_NoSymbol; /* search main table */ - if (!retval) - retval = bin_search(keysymtab, N_ELEMENTS(keysymtab) - 1, keysym); + for (size_t i = 0; i < ARRAY_SIZE(keysymtab); i++) + if (keysymtab[i].ucs == ucs) + return keysymtab[i].keysym; - return retval; + /* Use direct encoding if everything else fails */ + return ucs | 0x01000000; } /* @@ -928,47 +948,6 @@ xkb_keysym_to_utf32(xkb_keysym_t keysym) * Author: Rob Bradford */ -static int -utf32_to_utf8(uint32_t unichar, char *buffer) -{ - int count, shift, length; - uint8_t head; - - if (unichar <= 0x007f) { - buffer[0] = unichar; - buffer[1] = '\0'; - return 2; - } - else if (unichar <= 0x07FF) { - length = 2; - head = 0xc0; - } - else if (unichar <= 0xffff) { - length = 3; - head = 0xe0; - } - else if (unichar <= 0x1fffff) { - length = 4; - head = 0xf0; - } - else if (unichar <= 0x3ffffff) { - length = 5; - head = 0xf8; - } - else { - length = 6; - head = 0xfc; - } - - for (count = length - 1, shift = 0; count > 0; count--, shift += 6) - buffer[count] = 0x80 | ((unichar >> shift) & 0x3f); - - buffer[0] = head | ((unichar >> shift) & 0x3f); - buffer[length] = '\0'; - - return length + 1; -} - XKB_EXPORT int xkb_keysym_to_utf8(xkb_keysym_t keysym, char *buffer, size_t size) { @@ -979,7 +958,7 @@ xkb_keysym_to_utf8(xkb_keysym_t keysym, char *buffer, size_t size) codepoint = xkb_keysym_to_utf32(keysym); - if (codepoint == 0) + if (codepoint == NO_KEYSYM_UNICODE_CONVERSION) return 0; return utf32_to_utf8(codepoint, buffer);