+ /* patch encoding botch */
+ if (keysym == XKB_KEY_KP_Space)
+ return XKB_KEY_space & 0x7f;
+
+ /* special keysyms */
+ if ((keysym >= XKB_KEY_BackSpace && keysym <= XKB_KEY_Clear) ||
+ (keysym >= XKB_KEY_KP_Multiply && keysym <= XKB_KEY_KP_9) ||
+ keysym == XKB_KEY_Return || keysym == XKB_KEY_Escape ||
+ keysym == XKB_KEY_Delete || keysym == XKB_KEY_KP_Tab ||
+ keysym == XKB_KEY_KP_Enter || keysym == XKB_KEY_KP_Equal)
+ return keysym & 0x7f;
+
+ /* also check for directly encoded Unicode codepoints */
+
+ /* Exclude surrogates: they are invalid in UTF-32.
+ * See https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G28875
+ * for further details.
+ */
+ if (0x0100d800 <= keysym && keysym <= 0x0100dfff)
+ return NO_KEYSYM_UNICODE_CONVERSION;
+ /*
+ * In theory, this is supposed to start from 0x100100, such that the ASCII
+ * range, which is already covered by 0x00-0xff, can't be encoded in two
+ * ways. However, changing this after a couple of decades probably won't
+ * go well, so it stays as it is.
+ */
+ if (0x01000000 <= keysym && keysym <= 0x0110ffff)
+ return keysym - 0x01000000;
+
+ /* search main table */
+ return bin_search(keysymtab, ARRAY_SIZE(keysymtab) - 1, keysym);
+}
+
+XKB_EXPORT xkb_keysym_t
+xkb_utf32_to_keysym(uint32_t ucs)
+{
+ /* first check for Latin-1 characters (1:1 mapping) */
+ if ((ucs >= 0x0020 && ucs <= 0x007e) ||
+ (ucs >= 0x00a0 && ucs <= 0x00ff))
+ return ucs;