X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=src%2Fcompose%2Fparser.c;h=e1b81dea2cfb63b2bef1b5a39d0adca86fb1d35d;hb=a17701327e4bc6daa54114041a9707e00e15d82c;hp=10433fea975a1d117bdb9108e1c07e0de85b5e4a;hpb=3c0c3afa09b3f46479840b5ca046a9c76b3d5d63;p=platform%2Fupstream%2Flibxkbcommon.git diff --git a/src/compose/parser.c b/src/compose/parser.c index 10433fe..e1b81de 100644 --- a/src/compose/parser.c +++ b/src/compose/parser.c @@ -52,6 +52,8 @@ OR PERFORMANCE OF THIS SOFTWARE. ******************************************************************/ +#include "config.h" + #include #include "utils.h" @@ -61,60 +63,19 @@ OR PERFORMANCE OF THIS SOFTWARE. #include "utf8.h" #include "parser.h" -#define MAX_LHS_LEN 10 -#define MAX_INCLUDE_DEPTH 5 - -#define KEYSYM_FROM_NAME_CACHE_SIZE 8 - -/* - * xkb_keysym_from_name() is fairly slow, because for internal reasons - * it must use strcasecmp(). - * A small cache reduces about 20% from the compilation time of - * en_US.UTF-8/Compose. - */ -struct keysym_from_name_cache { - struct { - char name[64]; - xkb_keysym_t keysym; - } cache[KEYSYM_FROM_NAME_CACHE_SIZE]; - unsigned next; -}; - -static xkb_keysym_t -cached_keysym_from_name(struct keysym_from_name_cache *cache, - const char *name, size_t len) -{ - xkb_keysym_t keysym; - - if (len >= sizeof(cache->cache[0].name)) - return XKB_KEY_NoSymbol; - - for (unsigned i = 0; i < KEYSYM_FROM_NAME_CACHE_SIZE; i++) - if (streq(cache->cache[i].name, name)) - return cache->cache[i].keysym; - - keysym = xkb_keysym_from_name(name, XKB_KEYSYM_NO_FLAGS); - strcpy(cache->cache[cache->next].name, name); - cache->cache[cache->next].keysym = keysym; - cache->next = (cache->next + 1) % KEYSYM_FROM_NAME_CACHE_SIZE; - return keysym; -} - /* * Grammar adapted from libX11/modules/im/ximcp/imLcPrs.c. * See also the XCompose(5) manpage. * - * We don't support the MODIFIER rules, which are commented out. - * * FILE ::= { [PRODUCTION] [COMMENT] "\n" | INCLUDE } * INCLUDE ::= "include" '"' INCLUDE_STRING '"' * PRODUCTION ::= LHS ":" RHS [ COMMENT ] * COMMENT ::= "#" {} * LHS ::= EVENT { EVENT } - * EVENT ::= "<" keysym ">" - * # EVENT ::= [MODIFIER_LIST] "<" keysym ">" - * # MODIFIER_LIST ::= ("!" {MODIFIER} ) | "None" - * # MODIFIER ::= ["~"] modifier_name + * EVENT ::= [MODIFIER_LIST] "<" keysym ">" + * MODIFIER_LIST ::= (["!"] {MODIFIER} ) | "None" + * MODIFIER ::= ["~"] MODIFIER_NAME + * MODIFIER_NAME ::= ("Ctrl"|"Lock"|"Caps"|"Shift"|"Alt"|"Meta") * RHS ::= ( STRING | keysym | STRING keysym ) * STRING ::= '"' { CHAR } '"' * CHAR ::= GRAPHIC_CHAR | ESCAPED_CHAR @@ -141,8 +102,10 @@ enum rules_token { TOK_INCLUDE_STRING, TOK_LHS_KEYSYM, TOK_COLON, + TOK_BANG, + TOK_TILDE, TOK_STRING, - TOK_RHS_KEYSYM, + TOK_IDENT, TOK_ERROR }; @@ -160,18 +123,18 @@ lex(struct scanner *s, union lvalue *val) { skip_more_whitespace_and_comments: /* Skip spaces. */ - while (is_space(peek(s))) - if (next(s) == '\n') + while (is_space(scanner_peek(s))) + if (scanner_next(s) == '\n') return TOK_END_OF_LINE; /* Skip comments. */ - if (chr(s, '#')) { - skip_to_eol(s); + if (scanner_chr(s, '#')) { + scanner_skip_to_eol(s); goto skip_more_whitespace_and_comments; } /* See if we're done. */ - if (eof(s)) return TOK_END_OF_FILE; + if (scanner_eof(s)) return TOK_END_OF_FILE; /* New token. */ s->token_line = s->line; @@ -179,14 +142,14 @@ skip_more_whitespace_and_comments: s->buf_pos = 0; /* LHS Keysym. */ - if (chr(s, '<')) { - while (peek(s) != '>' && !eol(s)) - buf_append(s, next(s)); - if (!chr(s, '>')) { + if (scanner_chr(s, '<')) { + while (scanner_peek(s) != '>' && !scanner_eol(s) && !scanner_eof(s)) + scanner_buf_append(s, scanner_next(s)); + if (!scanner_chr(s, '>')) { scanner_err(s, "unterminated keysym literal"); return TOK_ERROR; } - if (!buf_append(s, '\0')) { + if (!scanner_buf_append(s, '\0')) { scanner_err(s, "keysym literal is too long"); return TOK_ERROR; } @@ -196,42 +159,46 @@ skip_more_whitespace_and_comments: } /* Colon. */ - if (chr(s, ':')) + if (scanner_chr(s, ':')) return TOK_COLON; + if (scanner_chr(s, '!')) + return TOK_BANG; + if (scanner_chr(s, '~')) + return TOK_TILDE; /* String literal. */ - if (chr(s, '\"')) { - while (!eof(s) && !eol(s) && peek(s) != '\"') { - if (chr(s, '\\')) { + if (scanner_chr(s, '\"')) { + while (!scanner_eof(s) && !scanner_eol(s) && scanner_peek(s) != '\"') { + if (scanner_chr(s, '\\')) { uint8_t o; - if (chr(s, '\\')) { - buf_append(s, '\\'); + if (scanner_chr(s, '\\')) { + scanner_buf_append(s, '\\'); } - else if (chr(s, '"')) { - buf_append(s, '"'); + else if (scanner_chr(s, '"')) { + scanner_buf_append(s, '"'); } - else if (chr(s, 'x') || chr(s, 'X')) { - if (hex(s, &o)) - buf_append(s, (char) o); + else if (scanner_chr(s, 'x') || scanner_chr(s, 'X')) { + if (scanner_hex(s, &o)) + scanner_buf_append(s, (char) o); else scanner_warn(s, "illegal hexadecimal escape sequence in string literal"); } - else if (oct(s, &o)) { - buf_append(s, (char) o); + else if (scanner_oct(s, &o)) { + scanner_buf_append(s, (char) o); } else { - scanner_warn(s, "unknown escape sequence (%c) in string literal", peek(s)); + scanner_warn(s, "unknown escape sequence (%c) in string literal", scanner_peek(s)); /* Ignore. */ } } else { - buf_append(s, next(s)); + scanner_buf_append(s, scanner_next(s)); } } - if (!chr(s, '\"')) { + if (!scanner_chr(s, '\"')) { scanner_err(s, "unterminated string literal"); return TOK_ERROR; } - if (!buf_append(s, '\0')) { + if (!scanner_buf_append(s, '\0')) { scanner_err(s, "string literal is too long"); return TOK_ERROR; } @@ -244,12 +211,12 @@ skip_more_whitespace_and_comments: return TOK_STRING; } - /* RHS keysym or include. */ - if (is_alpha(peek(s)) || peek(s) == '_') { + /* Identifier or include. */ + if (is_alpha(scanner_peek(s)) || scanner_peek(s) == '_') { s->buf_pos = 0; - while (is_alnum(peek(s)) || peek(s) == '_') - buf_append(s, next(s)); - if (!buf_append(s, '\0')) { + while (is_alnum(scanner_peek(s)) || scanner_peek(s) == '_') + scanner_buf_append(s, scanner_next(s)); + if (!scanner_buf_append(s, '\0')) { scanner_err(s, "identifier is too long"); return TOK_ERROR; } @@ -259,11 +226,11 @@ skip_more_whitespace_and_comments: val->string.str = s->buf; val->string.len = s->buf_pos; - return TOK_RHS_KEYSYM; + return TOK_IDENT; } /* Discard rest of line. */ - skip_to_eol(s); + scanner_skip_to_eol(s); scanner_err(s, "unrecognized token"); return TOK_ERROR; @@ -273,68 +240,68 @@ static enum rules_token lex_include_string(struct scanner *s, struct xkb_compose_table *table, union lvalue *val_out) { - while (is_space(peek(s))) - if (next(s) == '\n') + while (is_space(scanner_peek(s))) + if (scanner_next(s) == '\n') return TOK_END_OF_LINE; s->token_line = s->line; s->token_column = s->column; s->buf_pos = 0; - if (!chr(s, '\"')) { + if (!scanner_chr(s, '\"')) { scanner_err(s, "include statement must be followed by a path"); return TOK_ERROR; } - while (!eof(s) && !eol(s) && peek(s) != '\"') { - if (chr(s, '%')) { - if (chr(s, '%')) { - buf_append(s, '%'); + while (!scanner_eof(s) && !scanner_eol(s) && scanner_peek(s) != '\"') { + if (scanner_chr(s, '%')) { + if (scanner_chr(s, '%')) { + scanner_buf_append(s, '%'); } - else if (chr(s, 'H')) { - const char *home = secure_getenv("HOME"); + else if (scanner_chr(s, 'H')) { + const char *home = xkb_context_getenv(table->ctx, "HOME"); if (!home) { scanner_err(s, "%%H was used in an include statement, but the HOME environment variable is not set"); return TOK_ERROR; } - if (!buf_appends(s, home)) { + if (!scanner_buf_appends(s, home)) { scanner_err(s, "include path after expanding %%H is too long"); return TOK_ERROR; } } - else if (chr(s, 'L')) { - char *path = get_locale_compose_file_path(table->locale); + else if (scanner_chr(s, 'L')) { + char *path = get_locale_compose_file_path(table->ctx, table->locale); if (!path) { scanner_err(s, "failed to expand %%L to the locale Compose file"); return TOK_ERROR; } - if (!buf_appends(s, path)) { + if (!scanner_buf_appends(s, path)) { free(path); scanner_err(s, "include path after expanding %%L is too long"); return TOK_ERROR; } free(path); } - else if (chr(s, 'S')) { - const char *xlocaledir = get_xlocaledir_path(); - if (!buf_appends(s, xlocaledir)) { + else if (scanner_chr(s, 'S')) { + const char *xlocaledir = get_xlocaledir_path(table->ctx); + if (!scanner_buf_appends(s, xlocaledir)) { scanner_err(s, "include path after expanding %%S is too long"); return TOK_ERROR; } } else { - scanner_err(s, "unknown %% format (%c) in include statement", peek(s)); + scanner_err(s, "unknown %% format (%c) in include statement", scanner_peek(s)); return TOK_ERROR; } } else { - buf_append(s, next(s)); + scanner_buf_append(s, scanner_next(s)); } } - if (!chr(s, '\"')) { + if (!scanner_chr(s, '\"')) { scanner_err(s, "unterminated include statement"); return TOK_ERROR; } - if (!buf_append(s, '\0')) { + if (!scanner_buf_append(s, '\0')) { scanner_err(s, "include path is too long"); return TOK_ERROR; } @@ -348,103 +315,143 @@ struct production { unsigned int len; xkb_keysym_t keysym; char string[256]; + /* At least one of these is true. */ bool has_keysym; bool has_string; -}; -static uint32_t -add_node(struct xkb_compose_table *table, xkb_keysym_t keysym) -{ - struct compose_node new = { - .keysym = keysym, - .next = 0, - .is_leaf = true, - }; - darray_append(table->nodes, new); - return darray_size(table->nodes) - 1; -} + /* The matching is as follows: (active_mods & modmask) == mods. */ + xkb_mod_mask_t modmask; + xkb_mod_mask_t mods; +}; static void add_production(struct xkb_compose_table *table, struct scanner *s, const struct production *production) { - unsigned lhs_pos; - uint32_t curr; - struct compose_node *node; - - curr = 0; - node = &darray_item(table->nodes, curr); + unsigned lhs_pos = 0; + uint32_t curr = darray_size(table->nodes) == 1 ? 0 : 1; + uint32_t *pptr = NULL; + struct compose_node *node = NULL; + + /* Warn before potentially going over the limit, discard silently after. */ + if (darray_size(table->nodes) + production->len + MAX_LHS_LEN > MAX_COMPOSE_NODES) + scanner_warn(s, "too many sequences for one Compose file; will ignore further lines"); + if (darray_size(table->nodes) + production->len >= MAX_COMPOSE_NODES) + return; /* - * Insert the sequence to the trie, creating new nodes as needed. + * Insert the sequence to the ternary search tree, creating new nodes as + * needed. * - * TODO: This can be sped up a bit by first trying the path that the - * previous production took, and only then doing the linear search - * through the trie levels. This will work because sequences in the - * Compose files are often clustered by a common prefix; especially - * in the 1st and 2nd keysyms, which is where the largest variation - * (thus, longest search) is. + * TODO: We insert in the order given, this means some inputs can create + * long O(n) chains, which results in total O(n^2) parsing time. We should + * ensure the tree is reasonably balanced somehow. */ - for (lhs_pos = 0; lhs_pos < production->len; lhs_pos++) { - while (production->lhs[lhs_pos] != node->keysym) { - if (node->next == 0) { - uint32_t next = add_node(table, production->lhs[lhs_pos]); - /* Refetch since add_node could have realloc()ed. */ - node = &darray_item(table->nodes, curr); - node->next = next; + while (true) { + const xkb_keysym_t keysym = production->lhs[lhs_pos]; + const bool last = lhs_pos + 1 == production->len; + + if (curr == 0) { + /* + * Create a new node and update the parent pointer to it. + * Update the pointer first because the append invalidates it. + */ + struct compose_node new = { + .keysym = keysym, + .lokid = 0, + .hikid = 0, + .internal = { + .eqkid = 0, + .is_leaf = false, + }, + }; + curr = darray_size(table->nodes); + if (pptr != NULL) { + *pptr = curr; + pptr = NULL; } - - curr = node->next; - node = &darray_item(table->nodes, curr); + darray_append(table->nodes, new); } - if (lhs_pos + 1 == production->len) - break; + node = &darray_item(table->nodes, curr); - if (node->is_leaf) { - if (node->u.leaf.utf8 != 0 || - node->u.leaf.keysym != XKB_KEY_NoSymbol) { + if (keysym < node->keysym) { + pptr = &node->lokid; + curr = node->lokid; + } else if (keysym > node->keysym) { + pptr = &node->hikid; + curr = node->hikid; + } else if (!last) { + if (node->is_leaf) { scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding"); - node->u.leaf.utf8 = 0; - node->u.leaf.keysym = XKB_KEY_NoSymbol; + node->internal.eqkid = 0; + node->internal.is_leaf = false; } - - { - uint32_t successor = add_node(table, production->lhs[lhs_pos + 1]); - /* Refetch since add_node could have realloc()ed. */ - node = &darray_item(table->nodes, curr); - node->is_leaf = false; - node->u.successor = successor; + lhs_pos++; + pptr = &node->internal.eqkid; + curr = node->internal.eqkid; + } else { + if (node->is_leaf) { + bool same_string = + (node->leaf.utf8 == 0 && !production->has_string) || + ( + node->leaf.utf8 != 0 && production->has_string && + streq(&darray_item(table->utf8, node->leaf.utf8), + production->string) + ); + bool same_keysym = + (node->leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) || + ( + node->leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym && + node->leaf.keysym == production->keysym + ); + if (same_string && same_keysym) { + scanner_warn(s, "this compose sequence is a duplicate of another; skipping line"); + return; + } else { + scanner_warn(s, "this compose sequence already exists; overriding"); + } + } else if (node->internal.eqkid != 0) { + scanner_warn(s, "this compose sequence is a prefix of another; skipping line"); + return; + } + node->is_leaf = true; + if (production->has_string) { + node->leaf.utf8 = darray_size(table->utf8); + darray_append_items(table->utf8, production->string, + strlen(production->string) + 1); } + if (production->has_keysym) { + node->leaf.keysym = production->keysym; + } + return; } - - curr = node->u.successor; - node = &darray_item(table->nodes, curr); } +} - if (!node->is_leaf) { - scanner_warn(s, "this compose sequence is a prefix of another; skipping line"); - return; - } +/* Should match resolve_modifier(). */ +#define ALL_MODS_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3)) - if (node->u.leaf.utf8 != 0 || node->u.leaf.keysym != XKB_KEY_NoSymbol) { - if (streq(&darray_item(table->utf8, node->u.leaf.utf8), - production->string) && - node->u.leaf.keysym == production->keysym) { - scanner_warn(s, "this compose sequence is a duplicate of another; skipping line"); - return; - } - scanner_warn(s, "this compose sequence already exists; overriding"); - } +static xkb_mod_index_t +resolve_modifier(const char *name) +{ + static const struct { + const char *name; + xkb_mod_index_t mod; + } mods[] = { + { "Shift", 0 }, + { "Ctrl", 2 }, + { "Alt", 3 }, + { "Meta", 3 }, + { "Lock", 1 }, + { "Caps", 1 }, + }; - if (production->has_string) { - node->u.leaf.utf8 = darray_size(table->utf8); - darray_append_items(table->utf8, production->string, - strlen(production->string) + 1); - } - if (production->has_keysym) { - node->u.leaf.keysym = production->keysym; - } + for (unsigned i = 0; i < ARRAY_SIZE(mods); i++) + if (streq(name, mods[i].name)) + return mods[i].mod; + + return XKB_MOD_INVALID; } static bool @@ -457,7 +464,7 @@ do_include(struct xkb_compose_table *table, struct scanner *s, { FILE *file; bool ok; - const char *string; + char *string; size_t size; struct scanner new_s; @@ -467,7 +474,7 @@ do_include(struct xkb_compose_table *table, struct scanner *s, return false; } - file = fopen(path, "r"); + file = fopen(path, "rb"); if (!file) { scanner_err(s, "failed to open included Compose file \"%s\": %s", path, strerror(errno)); @@ -500,7 +507,6 @@ parse(struct xkb_compose_table *table, struct scanner *s, { enum rules_token tok; union lvalue val; - struct keysym_from_name_cache *cache = s->priv; xkb_keysym_t keysym; struct production production; enum { MAX_ERRORS = 10 }; @@ -510,6 +516,8 @@ initial: production.len = 0; production.has_keysym = false; production.has_string = false; + production.mods = 0; + production.modmask = 0; /* fallthrough */ @@ -547,8 +555,34 @@ lhs: tok = lex(s, &val); lhs_tok: switch (tok) { + case TOK_COLON: + if (production.len <= 0) { + scanner_warn(s, "expected at least one keysym on left-hand side; skipping line"); + goto skip; + } + goto rhs; + case TOK_IDENT: + if (streq(val.string.str, "None")) { + production.mods = 0; + production.modmask = ALL_MODS_MASK; + goto lhs_keysym; + } + goto lhs_mod_list_tok; + case TOK_TILDE: + goto lhs_mod_list_tok; + case TOK_BANG: + production.modmask = ALL_MODS_MASK; + goto lhs_mod_list; + default: + goto lhs_keysym_tok; + } + +lhs_keysym: + tok = lex(s, &val); +lhs_keysym_tok: + switch (tok) { case TOK_LHS_KEYSYM: - keysym = cached_keysym_from_name(cache, val.string.str, val.string.len); + keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS); if (keysym == XKB_KEY_NoSymbol) { scanner_err(s, "unrecognized keysym \"%s\" on left-hand side", val.string.str); @@ -560,17 +594,46 @@ lhs_tok: goto skip; } production.lhs[production.len++] = keysym; + production.mods = 0; + production.modmask = 0; goto lhs; - case TOK_COLON: - if (production.len <= 0) { - scanner_warn(s, "expected at least one keysym on left-hand side; skipping line"); - goto skip; - } - goto rhs; default: goto unexpected; } +lhs_mod_list: + tok = lex(s, &val); +lhs_mod_list_tok: { + bool tilde = false; + xkb_mod_index_t mod; + + if (tok != TOK_TILDE && tok != TOK_IDENT) + goto lhs_keysym_tok; + + if (tok == TOK_TILDE) { + tilde = true; + tok = lex(s, &val); + } + + if (tok != TOK_IDENT) + goto unexpected; + + mod = resolve_modifier(val.string.str); + if (mod == XKB_MOD_INVALID) { + scanner_err(s, "unrecognized modifier \"%s\"", + val.string.str); + goto error; + } + + production.modmask |= 1 << mod; + if (tilde) + production.mods &= ~(1 << mod); + else + production.mods |= 1 << mod; + + goto lhs_mod_list; + } + rhs: switch (tok = lex(s, &val)) { case TOK_STRING: @@ -589,8 +652,8 @@ rhs: strcpy(production.string, val.string.str); production.has_string = true; goto rhs; - case TOK_RHS_KEYSYM: - keysym = cached_keysym_from_name(cache, val.string.str, val.string.len); + case TOK_IDENT: + keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS); if (keysym == XKB_KEY_NoSymbol) { scanner_err(s, "unrecognized keysym \"%s\" on right-hand side", val.string.str); @@ -602,6 +665,7 @@ rhs: } production.keysym = keysym; production.has_keysym = true; + /* fallthrough */ case TOK_END_OF_LINE: if (!production.has_string && !production.has_keysym) { scanner_warn(s, "right-hand side must have at least one of string or keysym; skipping line"); @@ -642,9 +706,7 @@ parse_string(struct xkb_compose_table *table, const char *string, size_t len, const char *file_name) { struct scanner s; - struct keysym_from_name_cache cache; - memset(&cache, 0, sizeof(cache)); - scanner_init(&s, table->ctx, string, len, file_name, &cache); + scanner_init(&s, table->ctx, string, len, file_name, NULL); if (!parse(table, &s, 0)) return false; /* Maybe the allocator can use the excess space. */ @@ -657,12 +719,14 @@ bool parse_file(struct xkb_compose_table *table, FILE *file, const char *file_name) { bool ok; - const char *string; + char *string; size_t size; ok = map_file(file, &string, &size); if (!ok) { - log_err(table->ctx, "Couldn't read Compose file %s: %s\n", + log_err(table->ctx, + XKB_LOG_MESSAGE_NO_ID, + "Couldn't read Compose file %s: %s\n", file_name, strerror(errno)); return false; }