Compose: add iterator API
[platform/upstream/libxkbcommon.git] / src / compose / parser.c
index 9469aa4..e1b81de 100644 (file)
@@ -52,6 +52,8 @@ OR PERFORMANCE OF THIS SOFTWARE.
 
 ******************************************************************/
 
+#include "config.h"
+
 #include <errno.h>
 
 #include "utils.h"
@@ -61,60 +63,19 @@ OR PERFORMANCE OF THIS SOFTWARE.
 #include "utf8.h"
 #include "parser.h"
 
-#define MAX_LHS_LEN 10
-#define MAX_INCLUDE_DEPTH 5
-
-#define KEYSYM_FROM_NAME_CACHE_SIZE 8
-
-/*
- * xkb_keysym_from_name() is fairly slow, because for internal reasons
- * it must use strcasecmp().
- * A small cache reduces about 20% from the compilation time of
- * en_US.UTF-8/Compose.
- */
-struct keysym_from_name_cache {
-    struct {
-        char name[64];
-        xkb_keysym_t keysym;
-    } cache[KEYSYM_FROM_NAME_CACHE_SIZE];
-    unsigned next;
-};
-
-static xkb_keysym_t
-cached_keysym_from_name(struct keysym_from_name_cache *cache,
-                        const char *name, size_t len)
-{
-    xkb_keysym_t keysym;
-
-    if (len >= sizeof(cache->cache[0].name))
-        return XKB_KEY_NoSymbol;
-
-    for (unsigned i = 0; i < KEYSYM_FROM_NAME_CACHE_SIZE; i++)
-        if (streq(cache->cache[i].name, name))
-            return cache->cache[i].keysym;
-
-    keysym = xkb_keysym_from_name(name, XKB_KEYSYM_NO_FLAGS);
-    strcpy(cache->cache[cache->next].name, name);
-    cache->cache[cache->next].keysym = keysym;
-    cache->next = (cache->next + 1) % KEYSYM_FROM_NAME_CACHE_SIZE;
-    return keysym;
-}
-
 /*
  * Grammar adapted from libX11/modules/im/ximcp/imLcPrs.c.
  * See also the XCompose(5) manpage.
  *
- * We don't support the MODIFIER rules, which are commented out.
- *
  * FILE          ::= { [PRODUCTION] [COMMENT] "\n" | INCLUDE }
  * INCLUDE       ::= "include" '"' INCLUDE_STRING '"'
  * PRODUCTION    ::= LHS ":" RHS [ COMMENT ]
  * COMMENT       ::= "#" {<any character except null or newline>}
  * LHS           ::= EVENT { EVENT }
- * EVENT         ::= "<" keysym ">"
- * # EVENT         ::= [MODIFIER_LIST] "<" keysym ">"
- * # MODIFIER_LIST ::= ("!" {MODIFIER} ) | "None"
- * # MODIFIER      ::= ["~"] modifier_name
+ * EVENT         ::= [MODIFIER_LIST] "<" keysym ">"
+ * MODIFIER_LIST ::= (["!"] {MODIFIER} ) | "None"
+ * MODIFIER      ::= ["~"] MODIFIER_NAME
+ * MODIFIER_NAME ::= ("Ctrl"|"Lock"|"Caps"|"Shift"|"Alt"|"Meta")
  * RHS           ::= ( STRING | keysym | STRING keysym )
  * STRING        ::= '"' { CHAR } '"'
  * CHAR          ::= GRAPHIC_CHAR | ESCAPED_CHAR
@@ -141,36 +102,39 @@ enum rules_token {
     TOK_INCLUDE_STRING,
     TOK_LHS_KEYSYM,
     TOK_COLON,
+    TOK_BANG,
+    TOK_TILDE,
     TOK_STRING,
-    TOK_RHS_KEYSYM,
+    TOK_IDENT,
     TOK_ERROR
 };
 
 /* Values returned with some tokens, like yylval. */
 union lvalue {
-    const char *string;
-    xkb_keysym_t keysym;
+    struct {
+        /* Still \0-terminated. */
+        const char *str;
+        size_t len;
+    } string;
 };
 
 static enum rules_token
 lex(struct scanner *s, union lvalue *val)
 {
-    struct keysym_from_name_cache *cache = s->priv;
-
 skip_more_whitespace_and_comments:
     /* Skip spaces. */
-    while (is_space(peek(s)))
-        if (next(s) == '\n')
+    while (is_space(scanner_peek(s)))
+        if (scanner_next(s) == '\n')
             return TOK_END_OF_LINE;
 
     /* Skip comments. */
-    if (chr(s, '#')) {
-        skip_to_eol(s);
+    if (scanner_chr(s, '#')) {
+        scanner_skip_to_eol(s);
         goto skip_more_whitespace_and_comments;
     }
 
     /* See if we're done. */
-    if (eof(s)) return TOK_END_OF_FILE;
+    if (scanner_eof(s)) return TOK_END_OF_FILE;
 
     /* New token. */
     s->token_line = s->line;
@@ -178,62 +142,63 @@ skip_more_whitespace_and_comments:
     s->buf_pos = 0;
 
     /* LHS Keysym. */
-    if (chr(s, '<')) {
-        while (peek(s) != '>' && !eol(s))
-            buf_append(s, next(s));
-        if (!chr(s, '>')) {
+    if (scanner_chr(s, '<')) {
+        while (scanner_peek(s) != '>' && !scanner_eol(s) && !scanner_eof(s))
+            scanner_buf_append(s, scanner_next(s));
+        if (!scanner_chr(s, '>')) {
             scanner_err(s, "unterminated keysym literal");
             return TOK_ERROR;
         }
-        if (!buf_append(s, '\0')) {
+        if (!scanner_buf_append(s, '\0')) {
             scanner_err(s, "keysym literal is too long");
             return TOK_ERROR;
         }
-        val->keysym = cached_keysym_from_name(cache, s->buf, s->buf_pos);
-        if (val->keysym == XKB_KEY_NoSymbol) {
-            scanner_err(s, "unrecognized keysym \"%s\" on left-hand side", s->buf);
-            return TOK_ERROR;
-        }
+        val->string.str = s->buf;
+        val->string.len = s->buf_pos;
         return TOK_LHS_KEYSYM;
     }
 
     /* Colon. */
-    if (chr(s, ':'))
+    if (scanner_chr(s, ':'))
         return TOK_COLON;
+    if (scanner_chr(s, '!'))
+        return TOK_BANG;
+    if (scanner_chr(s, '~'))
+        return TOK_TILDE;
 
     /* String literal. */
-    if (chr(s, '\"')) {
-        while (!eof(s) && !eol(s) && peek(s) != '\"') {
-            if (chr(s, '\\')) {
+    if (scanner_chr(s, '\"')) {
+        while (!scanner_eof(s) && !scanner_eol(s) && scanner_peek(s) != '\"') {
+            if (scanner_chr(s, '\\')) {
                 uint8_t o;
-                if (chr(s, '\\')) {
-                    buf_append(s, '\\');
+                if (scanner_chr(s, '\\')) {
+                    scanner_buf_append(s, '\\');
                 }
-                else if (chr(s, '"')) {
-                    buf_append(s, '"');
+                else if (scanner_chr(s, '"')) {
+                    scanner_buf_append(s, '"');
                 }
-                else if (chr(s, 'x') || chr(s, 'X')) {
-                    if (hex(s, &o))
-                        buf_append(s, (char) o);
+                else if (scanner_chr(s, 'x') || scanner_chr(s, 'X')) {
+                    if (scanner_hex(s, &o))
+                        scanner_buf_append(s, (char) o);
                     else
                         scanner_warn(s, "illegal hexadecimal escape sequence in string literal");
                 }
-                else if (oct(s, &o)) {
-                    buf_append(s, (char) o);
+                else if (scanner_oct(s, &o)) {
+                    scanner_buf_append(s, (char) o);
                 }
                 else {
-                    scanner_warn(s, "unknown escape sequence (%c) in string literal", peek(s));
+                    scanner_warn(s, "unknown escape sequence (%c) in string literal", scanner_peek(s));
                     /* Ignore. */
                 }
             } else {
-                buf_append(s, next(s));
+                scanner_buf_append(s, scanner_next(s));
             }
         }
-        if (!chr(s, '\"')) {
+        if (!scanner_chr(s, '\"')) {
             scanner_err(s, "unterminated string literal");
             return TOK_ERROR;
         }
-        if (!buf_append(s, '\0')) {
+        if (!scanner_buf_append(s, '\0')) {
             scanner_err(s, "string literal is too long");
             return TOK_ERROR;
         }
@@ -241,16 +206,17 @@ skip_more_whitespace_and_comments:
             scanner_err(s, "string literal is not a valid UTF-8 string");
             return TOK_ERROR;
         }
-        val->string = s->buf;
+        val->string.str = s->buf;
+        val->string.len = s->buf_pos;
         return TOK_STRING;
     }
 
-    /* RHS keysym or include. */
-    if (is_alpha(peek(s)) || peek(s) == '_') {
+    /* Identifier or include. */
+    if (is_alpha(scanner_peek(s)) || scanner_peek(s) == '_') {
         s->buf_pos = 0;
-        while (is_alnum(peek(s)) || peek(s) == '_')
-            buf_append(s, next(s));
-        if (!buf_append(s, '\0')) {
+        while (is_alnum(scanner_peek(s)) || scanner_peek(s) == '_')
+            scanner_buf_append(s, scanner_next(s));
+        if (!scanner_buf_append(s, '\0')) {
             scanner_err(s, "identifier is too long");
             return TOK_ERROR;
         }
@@ -258,17 +224,13 @@ skip_more_whitespace_and_comments:
         if (streq(s->buf, "include"))
             return TOK_INCLUDE;
 
-        val->keysym = cached_keysym_from_name(cache, s->buf, s->buf_pos);
-        if (val->keysym == XKB_KEY_NoSymbol) {
-            scanner_err(s, "unrecognized keysym \"%s\" on right-hand side", s->buf);
-            return TOK_ERROR;
-        }
-        return TOK_RHS_KEYSYM;
+        val->string.str = s->buf;
+        val->string.len = s->buf_pos;
+        return TOK_IDENT;
     }
 
-    /* Skip line. */
-    while (!eof(s) && !eol(s))
-        next(s);
+    /* Discard rest of line. */
+    scanner_skip_to_eol(s);
 
     scanner_err(s, "unrecognized token");
     return TOK_ERROR;
@@ -278,72 +240,73 @@ static enum rules_token
 lex_include_string(struct scanner *s, struct xkb_compose_table *table,
                    union lvalue *val_out)
 {
-    while (is_space(peek(s)))
-        if (next(s) == '\n')
+    while (is_space(scanner_peek(s)))
+        if (scanner_next(s) == '\n')
             return TOK_END_OF_LINE;
 
     s->token_line = s->line;
     s->token_column = s->column;
     s->buf_pos = 0;
 
-    if (!chr(s, '\"')) {
+    if (!scanner_chr(s, '\"')) {
         scanner_err(s, "include statement must be followed by a path");
         return TOK_ERROR;
     }
 
-    while (!eof(s) && !eol(s) && peek(s) != '\"') {
-        if (chr(s, '%')) {
-            if (chr(s, '%')) {
-                buf_append(s, '%');
+    while (!scanner_eof(s) && !scanner_eol(s) && scanner_peek(s) != '\"') {
+        if (scanner_chr(s, '%')) {
+            if (scanner_chr(s, '%')) {
+                scanner_buf_append(s, '%');
             }
-            else if (chr(s, 'H')) {
-                const char *home = secure_getenv("HOME");
+            else if (scanner_chr(s, 'H')) {
+                const char *home = xkb_context_getenv(table->ctx, "HOME");
                 if (!home) {
                     scanner_err(s, "%%H was used in an include statement, but the HOME environment variable is not set");
                     return TOK_ERROR;
                 }
-                if (!buf_appends(s, home)) {
+                if (!scanner_buf_appends(s, home)) {
                     scanner_err(s, "include path after expanding %%H is too long");
                     return TOK_ERROR;
                 }
             }
-            else if (chr(s, 'L')) {
-                char *path = get_locale_compose_file_path(table->locale);
+            else if (scanner_chr(s, 'L')) {
+                char *path = get_locale_compose_file_path(table->ctx, table->locale);
                 if (!path) {
                     scanner_err(s, "failed to expand %%L to the locale Compose file");
                     return TOK_ERROR;
                 }
-                if (!buf_appends(s, path)) {
+                if (!scanner_buf_appends(s, path)) {
                     free(path);
                     scanner_err(s, "include path after expanding %%L is too long");
                     return TOK_ERROR;
                 }
                 free(path);
             }
-            else if (chr(s, 'S')) {
-                const char *xlocaledir = get_xlocaledir_path();
-                if (!buf_appends(s, xlocaledir)) {
+            else if (scanner_chr(s, 'S')) {
+                const char *xlocaledir = get_xlocaledir_path(table->ctx);
+                if (!scanner_buf_appends(s, xlocaledir)) {
                     scanner_err(s, "include path after expanding %%S is too long");
                     return TOK_ERROR;
                 }
             }
             else {
-                scanner_err(s, "unknown %% format (%c) in include statement", peek(s));
+                scanner_err(s, "unknown %% format (%c) in include statement", scanner_peek(s));
                 return TOK_ERROR;
             }
         } else {
-            buf_append(s, next(s));
+            scanner_buf_append(s, scanner_next(s));
         }
     }
-    if (!chr(s, '\"')) {
+    if (!scanner_chr(s, '\"')) {
         scanner_err(s, "unterminated include statement");
         return TOK_ERROR;
     }
-    if (!buf_append(s, '\0')) {
+    if (!scanner_buf_append(s, '\0')) {
         scanner_err(s, "include path is too long");
         return TOK_ERROR;
     }
-    val_out->string = s->buf;
+    val_out->string.str = s->buf;
+    val_out->string.len = s->buf_pos;
     return TOK_INCLUDE_STRING;
 }
 
@@ -352,103 +315,143 @@ struct production {
     unsigned int len;
     xkb_keysym_t keysym;
     char string[256];
+    /* At least one of these is true. */
     bool has_keysym;
     bool has_string;
-};
 
-static uint32_t
-add_node(struct xkb_compose_table *table, xkb_keysym_t keysym)
-{
-    struct compose_node new = {
-        .keysym = keysym,
-        .next = 0,
-        .is_leaf = true,
-    };
-    darray_append(table->nodes, new);
-    return darray_size(table->nodes) - 1;
-}
+    /* The matching is as follows: (active_mods & modmask) == mods. */
+    xkb_mod_mask_t modmask;
+    xkb_mod_mask_t mods;
+};
 
 static void
 add_production(struct xkb_compose_table *table, struct scanner *s,
                const struct production *production)
 {
-    unsigned lhs_pos;
-    uint32_t curr;
-    struct compose_node *node;
-
-    curr = 0;
-    node = &darray_item(table->nodes, curr);
+    unsigned lhs_pos = 0;
+    uint32_t curr = darray_size(table->nodes) == 1 ? 0 : 1;
+    uint32_t *pptr = NULL;
+    struct compose_node *node = NULL;
+
+    /* Warn before potentially going over the limit, discard silently after. */
+    if (darray_size(table->nodes) + production->len + MAX_LHS_LEN > MAX_COMPOSE_NODES)
+        scanner_warn(s, "too many sequences for one Compose file; will ignore further lines");
+    if (darray_size(table->nodes) + production->len >= MAX_COMPOSE_NODES)
+        return;
 
     /*
-     * Insert the sequence to the trie, creating new nodes as needed.
+     * Insert the sequence to the ternary search tree, creating new nodes as
+     * needed.
      *
-     * TODO: This can be sped up a bit by first trying the path that the
-     * previous production took, and only then doing the linear search
-     * through the trie levels.  This will work because sequences in the
-     * Compose files are often clustered by a common prefix; especially
-     * in the 1st and 2nd keysyms, which is where the largest variation
-     * (thus, longest search) is.
+     * TODO: We insert in the order given, this means some inputs can create
+     * long O(n) chains, which results in total O(n^2) parsing time. We should
+     * ensure the tree is reasonably balanced somehow.
      */
-    for (lhs_pos = 0; lhs_pos < production->len; lhs_pos++) {
-        while (production->lhs[lhs_pos] != node->keysym) {
-            if (node->next == 0) {
-                uint32_t next = add_node(table, production->lhs[lhs_pos]);
-                /* Refetch since add_node could have realloc()ed. */
-                node = &darray_item(table->nodes, curr);
-                node->next = next;
+    while (true) {
+        const xkb_keysym_t keysym = production->lhs[lhs_pos];
+        const bool last = lhs_pos + 1 == production->len;
+
+        if (curr == 0) {
+            /*
+             * Create a new node and update the parent pointer to it.
+             * Update the pointer first because the append invalidates it.
+             */
+            struct compose_node new = {
+                .keysym = keysym,
+                .lokid = 0,
+                .hikid = 0,
+                .internal = {
+                    .eqkid = 0,
+                    .is_leaf = false,
+                },
+            };
+            curr = darray_size(table->nodes);
+            if (pptr != NULL) {
+                *pptr = curr;
+                pptr = NULL;
             }
-
-            curr = node->next;
-            node = &darray_item(table->nodes, curr);
+            darray_append(table->nodes, new);
         }
 
-        if (lhs_pos + 1 == production->len)
-            break;
+        node = &darray_item(table->nodes, curr);
 
-        if (node->is_leaf) {
-            if (node->u.leaf.utf8 != 0 ||
-                node->u.leaf.keysym != XKB_KEY_NoSymbol) {
+        if (keysym < node->keysym) {
+            pptr = &node->lokid;
+            curr = node->lokid;
+        } else if (keysym > node->keysym) {
+            pptr = &node->hikid;
+            curr = node->hikid;
+        } else if (!last) {
+            if (node->is_leaf) {
                 scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding");
-                node->u.leaf.utf8 = 0;
-                node->u.leaf.keysym = XKB_KEY_NoSymbol;
+                node->internal.eqkid = 0;
+                node->internal.is_leaf = false;
             }
-
-            {
-                uint32_t successor = add_node(table, production->lhs[lhs_pos + 1]);
-                /* Refetch since add_node could have realloc()ed. */
-                node = &darray_item(table->nodes, curr);
-                node->is_leaf = false;
-                node->u.successor = successor;
+            lhs_pos++;
+            pptr = &node->internal.eqkid;
+            curr = node->internal.eqkid;
+        } else {
+            if (node->is_leaf) {
+                bool same_string =
+                    (node->leaf.utf8 == 0 && !production->has_string) ||
+                    (
+                        node->leaf.utf8 != 0 && production->has_string &&
+                        streq(&darray_item(table->utf8, node->leaf.utf8),
+                              production->string)
+                    );
+                bool same_keysym =
+                    (node->leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) ||
+                    (
+                        node->leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym &&
+                        node->leaf.keysym == production->keysym
+                    );
+                if (same_string && same_keysym) {
+                    scanner_warn(s, "this compose sequence is a duplicate of another; skipping line");
+                    return;
+                } else {
+                    scanner_warn(s, "this compose sequence already exists; overriding");
+                }
+            } else if (node->internal.eqkid != 0) {
+                scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
+                return;
+            }
+            node->is_leaf = true;
+            if (production->has_string) {
+                node->leaf.utf8 = darray_size(table->utf8);
+                darray_append_items(table->utf8, production->string,
+                                    strlen(production->string) + 1);
             }
+            if (production->has_keysym) {
+                node->leaf.keysym = production->keysym;
+            }
+            return;
         }
-
-        curr = node->u.successor;
-        node = &darray_item(table->nodes, curr);
     }
+}
 
-    if (!node->is_leaf) {
-        scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
-        return;
-    }
+/* Should match resolve_modifier(). */
+#define ALL_MODS_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3))
 
-    if (node->u.leaf.utf8 != 0 || node->u.leaf.keysym != XKB_KEY_NoSymbol) {
-        if (streq(&darray_item(table->utf8, node->u.leaf.utf8),
-                  production->string) &&
-            node->u.leaf.keysym == production->keysym) {
-            scanner_warn(s, "this compose sequence is a duplicate of another; skipping line");
-            return;
-        }
-        scanner_warn(s, "this compose sequence already exists; overriding");
-    }
+static xkb_mod_index_t
+resolve_modifier(const char *name)
+{
+    static const struct {
+        const char *name;
+        xkb_mod_index_t mod;
+    } mods[] = {
+        { "Shift", 0 },
+        { "Ctrl", 2 },
+        { "Alt", 3 },
+        { "Meta", 3 },
+        { "Lock", 1 },
+        { "Caps", 1 },
+    };
 
-    if (production->has_string) {
-        node->u.leaf.utf8 = darray_size(table->utf8);
-        darray_append_items(table->utf8, production->string,
-                            strlen(production->string) + 1);
-    }
-    if (production->has_keysym) {
-        node->u.leaf.keysym = production->keysym;
-    }
+    for (unsigned i = 0; i < ARRAY_SIZE(mods); i++)
+        if (streq(name, mods[i].name))
+            return mods[i].mod;
+
+    return XKB_MOD_INVALID;
 }
 
 static bool
@@ -461,7 +464,7 @@ do_include(struct xkb_compose_table *table, struct scanner *s,
 {
     FILE *file;
     bool ok;
-    const char *string;
+    char *string;
     size_t size;
     struct scanner new_s;
 
@@ -471,7 +474,7 @@ do_include(struct xkb_compose_table *table, struct scanner *s,
         return false;
     }
 
-    file = fopen(s->buf, "r");
+    file = fopen(path, "rb");
     if (!file) {
         scanner_err(s, "failed to open included Compose file \"%s\": %s",
                     path, strerror(errno));
@@ -485,7 +488,7 @@ do_include(struct xkb_compose_table *table, struct scanner *s,
         goto err_file;
     }
 
-    scanner_init(&new_s, table->ctx, string, size, path);
+    scanner_init(&new_s, table->ctx, string, size, path, s->priv);
 
     ok = parse(table, &new_s, include_depth + 1);
     if (!ok)
@@ -504,6 +507,7 @@ parse(struct xkb_compose_table *table, struct scanner *s,
 {
     enum rules_token tok;
     union lvalue val;
+    xkb_keysym_t keysym;
     struct production production;
     enum { MAX_ERRORS = 10 };
     int num_errors = 0;
@@ -512,6 +516,8 @@ initial:
     production.len = 0;
     production.has_keysym = false;
     production.has_string = false;
+    production.mods = 0;
+    production.modmask = 0;
 
     /* fallthrough */
 
@@ -523,11 +529,8 @@ initial_eol:
         goto finished;
     case TOK_INCLUDE:
         goto include;
-    case TOK_LHS_KEYSYM:
-        production.lhs[production.len++] = val.keysym;
-        goto lhs;
     default:
-        goto unexpected;
+        goto lhs_tok;
     }
 
 include:
@@ -541,7 +544,7 @@ include:
 include_eol:
     switch (tok = lex(s, &val)) {
     case TOK_END_OF_LINE:
-        if (!do_include(table, s, val.string, include_depth))
+        if (!do_include(table, s, val.string.str, include_depth))
             goto fail;
         goto initial;
     default:
@@ -549,25 +552,88 @@ include_eol:
     }
 
 lhs:
-    switch (tok = lex(s, &val)) {
+    tok = lex(s, &val);
+lhs_tok:
+    switch (tok) {
+    case TOK_COLON:
+        if (production.len <= 0) {
+            scanner_warn(s, "expected at least one keysym on left-hand side; skipping line");
+            goto skip;
+        }
+        goto rhs;
+    case TOK_IDENT:
+        if (streq(val.string.str, "None")) {
+            production.mods = 0;
+            production.modmask = ALL_MODS_MASK;
+            goto lhs_keysym;
+        }
+        goto lhs_mod_list_tok;
+    case TOK_TILDE:
+        goto lhs_mod_list_tok;
+    case TOK_BANG:
+        production.modmask = ALL_MODS_MASK;
+        goto lhs_mod_list;
+    default:
+        goto lhs_keysym_tok;
+    }
+
+lhs_keysym:
+    tok = lex(s, &val);
+lhs_keysym_tok:
+    switch (tok) {
     case TOK_LHS_KEYSYM:
+        keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS);
+        if (keysym == XKB_KEY_NoSymbol) {
+            scanner_err(s, "unrecognized keysym \"%s\" on left-hand side",
+                        val.string.str);
+            goto error;
+        }
         if (production.len + 1 > MAX_LHS_LEN) {
             scanner_warn(s, "too many keysyms (%d) on left-hand side; skipping line",
                          MAX_LHS_LEN + 1);
             goto skip;
         }
-        production.lhs[production.len++] = val.keysym;
+        production.lhs[production.len++] = keysym;
+        production.mods = 0;
+        production.modmask = 0;
         goto lhs;
-    case TOK_COLON:
-        if (production.len <= 0) {
-            scanner_warn(s, "expected at least one keysym on left-hand side; skipping line");
-            goto skip;
-        }
-        goto rhs;
     default:
         goto unexpected;
     }
 
+lhs_mod_list:
+    tok = lex(s, &val);
+lhs_mod_list_tok: {
+        bool tilde = false;
+        xkb_mod_index_t mod;
+
+        if (tok != TOK_TILDE && tok != TOK_IDENT)
+            goto lhs_keysym_tok;
+
+        if (tok == TOK_TILDE) {
+            tilde = true;
+            tok = lex(s, &val);
+        }
+
+        if (tok != TOK_IDENT)
+            goto unexpected;
+
+        mod = resolve_modifier(val.string.str);
+        if (mod == XKB_MOD_INVALID) {
+            scanner_err(s, "unrecognized modifier \"%s\"",
+                        val.string.str);
+            goto error;
+        }
+
+        production.modmask |= 1 << mod;
+        if (tilde)
+            production.mods &= ~(1 << mod);
+        else
+            production.mods |= 1 << mod;
+
+        goto lhs_mod_list;
+    }
+
 rhs:
     switch (tok = lex(s, &val)) {
     case TOK_STRING:
@@ -575,24 +641,31 @@ rhs:
             scanner_warn(s, "right-hand side can have at most one string; skipping line");
             goto skip;
         }
-        if (*val.string == '\0') {
+        if (val.string.len <= 0) {
             scanner_warn(s, "right-hand side string must not be empty; skipping line");
             goto skip;
         }
-        if (strlen(val.string) >= sizeof(production.string)) {
+        if (val.string.len >= sizeof(production.string)) {
             scanner_warn(s, "right-hand side string is too long; skipping line");
             goto skip;
         }
-        strcpy(production.string, val.string);
+        strcpy(production.string, val.string.str);
         production.has_string = true;
         goto rhs;
-    case TOK_RHS_KEYSYM:
+    case TOK_IDENT:
+        keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS);
+        if (keysym == XKB_KEY_NoSymbol) {
+            scanner_err(s, "unrecognized keysym \"%s\" on right-hand side",
+                        val.string.str);
+            goto error;
+        }
         if (production.has_keysym) {
             scanner_warn(s, "right-hand side can have at most one keysym; skipping line");
             goto skip;
         }
-        production.keysym = val.keysym;
+        production.keysym = keysym;
         production.has_keysym = true;
+        /* fallthrough */
     case TOK_END_OF_LINE:
         if (!production.has_string && !production.has_keysym) {
             scanner_warn(s, "right-hand side must have at least one of string or keysym; skipping line");
@@ -607,7 +680,7 @@ rhs:
 unexpected:
     if (tok != TOK_ERROR)
         scanner_err(s, "unexpected token");
-
+error:
     num_errors++;
     if (num_errors <= MAX_ERRORS)
         goto skip;
@@ -633,10 +706,7 @@ parse_string(struct xkb_compose_table *table, const char *string, size_t len,
              const char *file_name)
 {
     struct scanner s;
-    struct keysym_from_name_cache cache;
-    scanner_init(&s, table->ctx, string, len, file_name);
-    memset(&cache, 0, sizeof(cache));
-    s.priv = &cache;
+    scanner_init(&s, table->ctx, string, len, file_name, NULL);
     if (!parse(table, &s, 0))
         return false;
     /* Maybe the allocator can use the excess space. */
@@ -649,12 +719,14 @@ bool
 parse_file(struct xkb_compose_table *table, FILE *file, const char *file_name)
 {
     bool ok;
-    const char *string;
+    char *string;
     size_t size;
 
     ok = map_file(file, &string, &size);
     if (!ok) {
-        log_err(table->ctx, "Couldn't read Compose file %s: %s\n",
+        log_err(table->ctx,
+                XKB_LOG_MESSAGE_NO_ID,
+                "Couldn't read Compose file %s: %s\n",
                 file_name, strerror(errno));
         return false;
     }