2 * Copyright © 2013 Ran Benita <ran234@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 /******************************************************************
26 Copyright 1992 by Oki Technosystems Laboratory, Inc.
27 Copyright 1992 by Fuji Xerox Co., Ltd.
29 Permission to use, copy, modify, distribute, and sell this software
30 and its documentation for any purpose is hereby granted without fee,
31 provided that the above copyright notice appear in all copies and
32 that both that copyright notice and this permission notice appear
33 in supporting documentation, and that the name of Oki Technosystems
34 Laboratory and Fuji Xerox not be used in advertising or publicity
35 pertaining to distribution of the software without specific, written
37 Oki Technosystems Laboratory and Fuji Xerox make no representations
38 about the suitability of this software for any purpose. It is provided
39 "as is" without express or implied warranty.
41 OKI TECHNOSYSTEMS LABORATORY AND FUJI XEROX DISCLAIM ALL WARRANTIES
42 WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
43 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL OKI TECHNOSYSTEMS
44 LABORATORY AND FUJI XEROX BE LIABLE FOR ANY SPECIAL, INDIRECT OR
45 CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
46 OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
47 OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
48 OR PERFORMANCE OF THIS SOFTWARE.
50 Author: Yasuhiro Kawai Oki Technosystems Laboratory
51 Author: Kazunori Nishihara Fuji Xerox
53 ******************************************************************/
58 #include "scanner-utils.h"
64 #define MAX_LHS_LEN 10
65 #define MAX_INCLUDE_DEPTH 5
67 #define KEYSYM_FROM_NAME_CACHE_SIZE 8
70 * xkb_keysym_from_name() is fairly slow, because for internal reasons
71 * it must use strcasecmp().
72 * A small cache reduces about 20% from the compilation time of
73 * en_US.UTF-8/Compose.
75 struct keysym_from_name_cache {
80 } cache[KEYSYM_FROM_NAME_CACHE_SIZE];
85 cached_keysym_from_name(struct keysym_from_name_cache *cache,
86 const char *name, size_t len)
90 if (len >= sizeof(cache->cache[0].name))
91 return XKB_KEY_NoSymbol;
93 for (unsigned i = 0; i < KEYSYM_FROM_NAME_CACHE_SIZE; i++)
94 if (cache->cache[i].len == len &&
95 memcmp(cache->cache[i].name, name, len) == 0)
96 return cache->cache[i].keysym;
98 keysym = xkb_keysym_from_name(name, XKB_KEYSYM_NO_FLAGS);
99 strcpy(cache->cache[cache->next].name, name);
100 cache->cache[cache->next].len = len;
101 cache->cache[cache->next].keysym = keysym;
102 cache->next = (cache->next + 1) % KEYSYM_FROM_NAME_CACHE_SIZE;
107 * Grammar adapted from libX11/modules/im/ximcp/imLcPrs.c.
108 * See also the XCompose(5) manpage.
110 * FILE ::= { [PRODUCTION] [COMMENT] "\n" | INCLUDE }
111 * INCLUDE ::= "include" '"' INCLUDE_STRING '"'
112 * PRODUCTION ::= LHS ":" RHS [ COMMENT ]
113 * COMMENT ::= "#" {<any character except null or newline>}
114 * LHS ::= EVENT { EVENT }
115 * EVENT ::= [MODIFIER_LIST] "<" keysym ">"
116 * MODIFIER_LIST ::= ("!" {MODIFIER} ) | "None"
117 * MODIFIER ::= ["~"] modifier_name
118 * RHS ::= ( STRING | keysym | STRING keysym )
119 * STRING ::= '"' { CHAR } '"'
120 * CHAR ::= GRAPHIC_CHAR | ESCAPED_CHAR
121 * GRAPHIC_CHAR ::= locale (codeset) dependent code
122 * ESCAPED_CHAR ::= ('\\' | '\"' | OCTAL | HEX )
123 * OCTAL ::= '\' OCTAL_CHAR [OCTAL_CHAR [OCTAL_CHAR]]
124 * OCTAL_CHAR ::= (0|1|2|3|4|5|6|7)
125 * HEX ::= '\' (x|X) HEX_CHAR [HEX_CHAR]]
126 * HEX_CHAR ::= (0|1|2|3|4|5|6|7|8|9|A|B|C|D|E|F|a|b|c|d|e|f)
128 * INCLUDE_STRING is a filesystem path, with the following %-expansions:
130 * %H - The user's home directory (the $HOME environment variable).
131 * %L - The name of the locale specific Compose file (e.g.,
132 * "/usr/share/X11/locale/<localename>/Compose").
133 * %S - The name of the system directory for Compose files (e.g.,
134 * "/usr/share/X11/locale").
151 /* Values returned with some tokens, like yylval. */
154 /* Still \0-terminated. */
160 static enum rules_token
161 lex(struct scanner *s, union lvalue *val)
163 skip_more_whitespace_and_comments:
165 while (is_space(peek(s)))
167 return TOK_END_OF_LINE;
172 goto skip_more_whitespace_and_comments;
175 /* See if we're done. */
176 if (eof(s)) return TOK_END_OF_FILE;
179 s->token_line = s->line;
180 s->token_column = s->column;
185 while (peek(s) != '>' && !eol(s))
186 buf_append(s, next(s));
188 scanner_err(s, "unterminated keysym literal");
191 if (!buf_append(s, '\0')) {
192 scanner_err(s, "keysym literal is too long");
195 val->string.str = s->buf;
196 val->string.len = s->buf_pos;
197 return TOK_LHS_KEYSYM;
208 /* String literal. */
210 while (!eof(s) && !eol(s) && peek(s) != '\"') {
216 else if (chr(s, '"')) {
219 else if (chr(s, 'x') || chr(s, 'X')) {
221 buf_append(s, (char) o);
223 scanner_warn(s, "illegal hexadecimal escape sequence in string literal");
225 else if (oct(s, &o)) {
226 buf_append(s, (char) o);
229 scanner_warn(s, "unknown escape sequence (%c) in string literal", peek(s));
233 buf_append(s, next(s));
237 scanner_err(s, "unterminated string literal");
240 if (!buf_append(s, '\0')) {
241 scanner_err(s, "string literal is too long");
244 if (!is_valid_utf8(s->buf, s->buf_pos - 1)) {
245 scanner_err(s, "string literal is not a valid UTF-8 string");
248 val->string.str = s->buf;
249 val->string.len = s->buf_pos;
253 /* Identifier or include. */
254 if (is_alpha(peek(s)) || peek(s) == '_') {
256 while (is_alnum(peek(s)) || peek(s) == '_')
257 buf_append(s, next(s));
258 if (!buf_append(s, '\0')) {
259 scanner_err(s, "identifier is too long");
263 if (streq(s->buf, "include"))
266 val->string.str = s->buf;
267 val->string.len = s->buf_pos;
271 /* Discard rest of line. */
274 scanner_err(s, "unrecognized token");
278 static enum rules_token
279 lex_include_string(struct scanner *s, struct xkb_compose_table *table,
280 union lvalue *val_out)
282 while (is_space(peek(s)))
284 return TOK_END_OF_LINE;
286 s->token_line = s->line;
287 s->token_column = s->column;
291 scanner_err(s, "include statement must be followed by a path");
295 while (!eof(s) && !eol(s) && peek(s) != '\"') {
300 else if (chr(s, 'H')) {
301 const char *home = secure_getenv("HOME");
303 scanner_err(s, "%%H was used in an include statement, but the HOME environment variable is not set");
306 if (!buf_appends(s, home)) {
307 scanner_err(s, "include path after expanding %%H is too long");
311 else if (chr(s, 'L')) {
312 char *path = get_locale_compose_file_path(table->locale);
314 scanner_err(s, "failed to expand %%L to the locale Compose file");
317 if (!buf_appends(s, path)) {
319 scanner_err(s, "include path after expanding %%L is too long");
324 else if (chr(s, 'S')) {
325 const char *xlocaledir = get_xlocaledir_path();
326 if (!buf_appends(s, xlocaledir)) {
327 scanner_err(s, "include path after expanding %%S is too long");
332 scanner_err(s, "unknown %% format (%c) in include statement", peek(s));
336 buf_append(s, next(s));
340 scanner_err(s, "unterminated include statement");
343 if (!buf_append(s, '\0')) {
344 scanner_err(s, "include path is too long");
347 val_out->string.str = s->buf;
348 val_out->string.len = s->buf_pos;
349 return TOK_INCLUDE_STRING;
353 xkb_keysym_t lhs[MAX_LHS_LEN];
361 xkb_mod_mask_t modmask;
365 add_node(struct xkb_compose_table *table, xkb_keysym_t keysym)
367 struct compose_node new = {
372 darray_append(table->nodes, new);
373 return darray_size(table->nodes) - 1;
377 add_production(struct xkb_compose_table *table, struct scanner *s,
378 const struct production *production)
382 struct compose_node *node;
385 node = &darray_item(table->nodes, curr);
388 * Insert the sequence to the trie, creating new nodes as needed.
390 * TODO: This can be sped up a bit by first trying the path that the
391 * previous production took, and only then doing the linear search
392 * through the trie levels. This will work because sequences in the
393 * Compose files are often clustered by a common prefix; especially
394 * in the 1st and 2nd keysyms, which is where the largest variation
395 * (thus, longest search) is.
397 for (lhs_pos = 0; lhs_pos < production->len; lhs_pos++) {
398 while (production->lhs[lhs_pos] != node->keysym) {
399 if (node->next == 0) {
400 uint32_t next = add_node(table, production->lhs[lhs_pos]);
401 /* Refetch since add_node could have realloc()ed. */
402 node = &darray_item(table->nodes, curr);
407 node = &darray_item(table->nodes, curr);
410 if (lhs_pos + 1 == production->len)
414 if (node->u.leaf.utf8 != 0 ||
415 node->u.leaf.keysym != XKB_KEY_NoSymbol) {
416 scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding");
417 node->u.leaf.utf8 = 0;
418 node->u.leaf.keysym = XKB_KEY_NoSymbol;
422 uint32_t successor = add_node(table, production->lhs[lhs_pos + 1]);
423 /* Refetch since add_node could have realloc()ed. */
424 node = &darray_item(table->nodes, curr);
425 node->is_leaf = false;
426 node->u.successor = successor;
430 curr = node->u.successor;
431 node = &darray_item(table->nodes, curr);
434 if (!node->is_leaf) {
435 scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
439 if (node->u.leaf.utf8 != 0 || node->u.leaf.keysym != XKB_KEY_NoSymbol) {
440 if (streq(&darray_item(table->utf8, node->u.leaf.utf8),
441 production->string) &&
442 node->u.leaf.keysym == production->keysym) {
443 scanner_warn(s, "this compose sequence is a duplicate of another; skipping line");
446 scanner_warn(s, "this compose sequence already exists; overriding");
449 if (production->has_string) {
450 node->u.leaf.utf8 = darray_size(table->utf8);
451 darray_append_items(table->utf8, production->string,
452 strlen(production->string) + 1);
454 if (production->has_keysym) {
455 node->u.leaf.keysym = production->keysym;
459 static xkb_mod_index_t
460 resolve_modifier(const char *name)
462 static const struct {
474 for (unsigned i = 0; i < ARRAY_SIZE(mods); i++)
475 if (streq(name, mods[i].name))
478 return XKB_MOD_INVALID;
482 parse(struct xkb_compose_table *table, struct scanner *s,
483 unsigned include_depth);
486 do_include(struct xkb_compose_table *table, struct scanner *s,
487 const char *path, unsigned include_depth)
493 struct scanner new_s;
495 if (include_depth >= MAX_INCLUDE_DEPTH) {
496 scanner_err(s, "maximum include depth (%d) exceeded; maybe there is an include loop?",
501 file = fopen(path, "r");
503 scanner_err(s, "failed to open included Compose file \"%s\": %s",
504 path, strerror(errno));
508 ok = map_file(file, &string, &size);
510 scanner_err(s, "failed to read included Compose file \"%s\": %s",
511 path, strerror(errno));
515 scanner_init(&new_s, table->ctx, string, size, path, s->priv);
517 ok = parse(table, &new_s, include_depth + 1);
522 unmap_file(string, size);
529 parse(struct xkb_compose_table *table, struct scanner *s,
530 unsigned include_depth)
532 enum rules_token tok;
534 struct keysym_from_name_cache *cache = s->priv;
536 struct production production;
537 enum { MAX_ERRORS = 10 };
542 production.has_keysym = false;
543 production.has_string = false;
545 production.modmask = 0;
550 switch (tok = lex(s, &val)) {
551 case TOK_END_OF_LINE:
553 case TOK_END_OF_FILE:
562 switch (tok = lex_include_string(s, table, &val)) {
563 case TOK_INCLUDE_STRING:
570 switch (tok = lex(s, &val)) {
571 case TOK_END_OF_LINE:
572 if (!do_include(table, s, val.string.str, include_depth))
584 if (production.len <= 0) {
585 scanner_warn(s, "expected at least one keysym on left-hand side; skipping line");
590 if (!streq(val.string.str, "None")) {
591 scanner_err(s, "unrecognized identifier \"%s\"", val.string.str);
595 /* XXX Should only include the mods in resolve_mods(). */
596 production.modmask = 0xff;
609 keysym = cached_keysym_from_name(cache, val.string.str, val.string.len);
610 if (keysym == XKB_KEY_NoSymbol) {
611 scanner_err(s, "unrecognized keysym \"%s\" on left-hand side",
615 if (production.len + 1 > MAX_LHS_LEN) {
616 scanner_warn(s, "too many keysyms (%d) on left-hand side; skipping line",
620 production.lhs[production.len++] = keysym;
622 production.modmask = 0;
633 if (tok == TOK_TILDE) {
638 if (tok != TOK_IDENT) {
639 if (tilde || production.modmask == 0)
644 mod = resolve_modifier(val.string.str);
645 if (mod == XKB_MOD_INVALID) {
646 scanner_err(s, "unrecognized modifier \"%s\"",
651 production.modmask |= 1 << mod;
653 production.mods &= ~(1 << mod);
655 production.mods |= 1 << mod;
661 switch (tok = lex(s, &val)) {
663 if (production.has_string) {
664 scanner_warn(s, "right-hand side can have at most one string; skipping line");
667 if (val.string.len <= 0) {
668 scanner_warn(s, "right-hand side string must not be empty; skipping line");
671 if (val.string.len >= sizeof(production.string)) {
672 scanner_warn(s, "right-hand side string is too long; skipping line");
675 strcpy(production.string, val.string.str);
676 production.has_string = true;
679 keysym = cached_keysym_from_name(cache, val.string.str, val.string.len);
680 if (keysym == XKB_KEY_NoSymbol) {
681 scanner_err(s, "unrecognized keysym \"%s\" on right-hand side",
685 if (production.has_keysym) {
686 scanner_warn(s, "right-hand side can have at most one keysym; skipping line");
689 production.keysym = keysym;
690 production.has_keysym = true;
691 case TOK_END_OF_LINE:
692 if (!production.has_string && !production.has_keysym) {
693 scanner_warn(s, "right-hand side must have at least one of string or keysym; skipping line");
696 add_production(table, s, &production);
703 if (tok != TOK_ERROR)
704 scanner_err(s, "unexpected token");
707 if (num_errors <= MAX_ERRORS)
710 scanner_err(s, "too many errors");
714 scanner_err(s, "failed to parse file");
718 while (tok != TOK_END_OF_LINE && tok != TOK_END_OF_FILE)
727 parse_string(struct xkb_compose_table *table, const char *string, size_t len,
728 const char *file_name)
731 struct keysym_from_name_cache cache;
732 memset(&cache, 0, sizeof(cache));
733 scanner_init(&s, table->ctx, string, len, file_name, &cache);
734 if (!parse(table, &s, 0))
736 /* Maybe the allocator can use the excess space. */
737 darray_shrink(table->nodes);
738 darray_shrink(table->utf8);
743 parse_file(struct xkb_compose_table *table, FILE *file, const char *file_name)
749 ok = map_file(file, &string, &size);
751 log_err(table->ctx, "Couldn't read Compose file %s: %s\n",
752 file_name, strerror(errno));
756 ok = parse_string(table, string, size, file_name);
757 unmap_file(string, size);