*
*/
+#include "config.h"
+
#include "xkbcommon/xkbcommon.h"
#include "utils.h"
#include "utf8.h"
+#define NO_KEYSYM_UNICODE_CONVERSION 0
+
/* We don't use the uint32_t types here, to save some space. */
struct codepair {
uint16_t keysym;
{ 0x0bd6, 0x222a }, /* downshoe ∪ UNION */
{ 0x0bd8, 0x2283 }, /* rightshoe ⊃ SUPERSET OF */
{ 0x0bda, 0x2282 }, /* leftshoe ⊂ SUBSET OF */
- { 0x0bdc, 0x22a2 }, /* lefttack ⊢ RIGHT TACK */
- { 0x0bfc, 0x22a3 }, /* righttack ⊣ LEFT TACK */
+ { 0x0bdc, 0x22a3 }, /* lefttack ⊣ LEFT TACK */
+ { 0x0bfc, 0x22a2 }, /* righttack ⊢ RIGHT TACK */
{ 0x0cdf, 0x2017 }, /* hebrew_doublelowline ‗ DOUBLE LOW LINE */
{ 0x0ce0, 0x05d0 }, /* hebrew_aleph א HEBREW LETTER ALEF */
{ 0x0ce1, 0x05d1 }, /* hebrew_bet ב HEBREW LETTER BET */
{ 0x0ef0, 0x3171 }, /* Hangul_SunkyeongeumMieum ㅱ HANGUL LETTER KAPYEOUNMIEUM */
{ 0x0ef1, 0x3178 }, /* Hangul_SunkyeongeumPieub ㅸ HANGUL LETTER KAPYEOUNPIEUP */
{ 0x0ef2, 0x317f }, /* Hangul_PanSios ㅿ HANGUL LETTER PANSIOS */
-/* 0x0ef3 Hangul_KkogjiDalrinIeung ? ??? */
+ { 0x0ef3, 0x3181 }, /* Hangul_KkogjiDalrinIeung ㆁ HANGUL LETTER YESIEUNG */
{ 0x0ef4, 0x3184 }, /* Hangul_SunkyeongeumPhieuf ㆄ HANGUL LETTER KAPYEOUNPHIEUPH */
{ 0x0ef5, 0x3186 }, /* Hangul_YeorinHieuh ㆆ HANGUL LETTER YEORINHIEUH */
{ 0x0ef6, 0x318d }, /* Hangul_AraeA ㆍ HANGUL LETTER ARAEA */
{ 0x0ef9, 0x11f0 }, /* Hangul_J_KkogjiDalrinIeung ᇰ HANGUL JONGSEONG YESIEUNG */
{ 0x0efa, 0x11f9 }, /* Hangul_J_YeorinHieuh ᇹ HANGUL JONGSEONG YEORINHIEUH */
{ 0x0eff, 0x20a9 }, /* Korean_Won ₩ WON SIGN */
- { 0x13a4, 0x20ac }, /* Euro € EURO SIGN */
{ 0x13bc, 0x0152 }, /* OE Œ LATIN CAPITAL LIGATURE OE */
{ 0x13bd, 0x0153 }, /* oe œ LATIN SMALL LIGATURE OE */
{ 0x13be, 0x0178 }, /* Ydiaeresis Ÿ LATIN CAPITAL LETTER Y WITH DIAERESIS */
- { 0x20a0, 0x20a0 }, /* EcuSign ₠ EURO-CURRENCY SIGN */
- { 0x20a1, 0x20a1 }, /* ColonSign ₡ COLON SIGN */
- { 0x20a2, 0x20a2 }, /* CruzeiroSign ₢ CRUZEIRO SIGN */
- { 0x20a3, 0x20a3 }, /* FFrancSign ₣ FRENCH FRANC SIGN */
- { 0x20a4, 0x20a4 }, /* LiraSign ₤ LIRA SIGN */
- { 0x20a5, 0x20a5 }, /* MillSign ₥ MILL SIGN */
- { 0x20a6, 0x20a6 }, /* NairaSign ₦ NAIRA SIGN */
- { 0x20a7, 0x20a7 }, /* PesetaSign ₧ PESETA SIGN */
- { 0x20a8, 0x20a8 }, /* RupeeSign ₨ RUPEE SIGN */
- { 0x20a9, 0x20a9 }, /* WonSign ₩ WON SIGN */
- { 0x20aa, 0x20aa }, /* NewSheqelSign ₪ NEW SHEQEL SIGN */
- { 0x20ab, 0x20ab }, /* DongSign ₫ DONG SIGN */
{ 0x20ac, 0x20ac }, /* EuroSign € EURO SIGN */
};
}
/* no matching Unicode value found in table */
- return 0;
+ return NO_KEYSYM_UNICODE_CONVERSION;
}
XKB_EXPORT uint32_t
return keysym & 0x7f;
/* also check for directly encoded Unicode codepoints */
+
+ /* Exclude surrogates: they are invalid in UTF-32.
+ * See https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G28875
+ * for further details.
+ */
+ if (0x0100d800 <= keysym && keysym <= 0x0100dfff)
+ return NO_KEYSYM_UNICODE_CONVERSION;
/*
* In theory, this is supposed to start from 0x100100, such that the ASCII
* range, which is already covered by 0x00-0xff, can't be encoded in two
return bin_search(keysymtab, ARRAY_SIZE(keysymtab) - 1, keysym);
}
+XKB_EXPORT xkb_keysym_t
+xkb_utf32_to_keysym(uint32_t ucs)
+{
+ /* first check for Latin-1 characters (1:1 mapping) */
+ if ((ucs >= 0x0020 && ucs <= 0x007e) ||
+ (ucs >= 0x00a0 && ucs <= 0x00ff))
+ return ucs;
+
+ /* special keysyms */
+ if ((ucs >= (XKB_KEY_BackSpace & 0x7f) && ucs <= (XKB_KEY_Clear & 0x7f)) ||
+ ucs == (XKB_KEY_Return & 0x7f) || ucs == (XKB_KEY_Escape & 0x7f))
+ return ucs | 0xff00;
+ if (ucs == (XKB_KEY_Delete & 0x7f))
+ return XKB_KEY_Delete;
+
+ /* Unicode non-symbols and code points outside Unicode planes */
+ if ((ucs >= 0xd800 && ucs <= 0xdfff) ||
+ (ucs >= 0xfdd0 && ucs <= 0xfdef) ||
+ ucs > 0x10ffff || (ucs & 0xfffe) == 0xfffe)
+ return XKB_KEY_NoSymbol;
+
+ /* search main table */
+ for (size_t i = 0; i < ARRAY_SIZE(keysymtab); i++)
+ if (keysymtab[i].ucs == ucs)
+ return keysymtab[i].keysym;
+
+ /* Use direct encoding if everything else fails */
+ return ucs | 0x01000000;
+}
+
/*
* Copyright © 2012 Intel Corporation
*
codepoint = xkb_keysym_to_utf32(keysym);
- if (codepoint == 0)
+ if (codepoint == NO_KEYSYM_UNICODE_CONVERSION)
return 0;
return utf32_to_utf8(codepoint, buffer);