2 * Copyright © 2013 Ran Benita <ran234@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 /******************************************************************
26 Copyright 1992 by Oki Technosystems Laboratory, Inc.
27 Copyright 1992 by Fuji Xerox Co., Ltd.
29 Permission to use, copy, modify, distribute, and sell this software
30 and its documentation for any purpose is hereby granted without fee,
31 provided that the above copyright notice appear in all copies and
32 that both that copyright notice and this permission notice appear
33 in supporting documentation, and that the name of Oki Technosystems
34 Laboratory and Fuji Xerox not be used in advertising or publicity
35 pertaining to distribution of the software without specific, written
37 Oki Technosystems Laboratory and Fuji Xerox make no representations
38 about the suitability of this software for any purpose. It is provided
39 "as is" without express or implied warranty.
41 OKI TECHNOSYSTEMS LABORATORY AND FUJI XEROX DISCLAIM ALL WARRANTIES
42 WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
43 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL OKI TECHNOSYSTEMS
44 LABORATORY AND FUJI XEROX BE LIABLE FOR ANY SPECIAL, INDIRECT OR
45 CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
46 OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
47 OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
48 OR PERFORMANCE OF THIS SOFTWARE.
50 Author: Yasuhiro Kawai Oki Technosystems Laboratory
51 Author: Kazunori Nishihara Fuji Xerox
53 ******************************************************************/
58 #include "scanner-utils.h"
64 #define MAX_LHS_LEN 10
65 #define MAX_INCLUDE_DEPTH 5
67 #define KEYSYM_FROM_NAME_CACHE_SIZE 8
70 * xkb_keysym_from_name() is fairly slow, because for internal reasons
71 * it must use strcasecmp().
72 * A small cache reduces about 20% from the compilation time of
73 * en_US.UTF-8/Compose.
75 struct keysym_from_name_cache {
79 } cache[KEYSYM_FROM_NAME_CACHE_SIZE];
84 cached_keysym_from_name(struct keysym_from_name_cache *cache,
85 const char *name, size_t len)
89 if (len >= sizeof(cache->cache[0].name))
90 return XKB_KEY_NoSymbol;
92 for (unsigned i = 0; i < KEYSYM_FROM_NAME_CACHE_SIZE; i++)
93 if (streq(cache->cache[i].name, name))
94 return cache->cache[i].keysym;
96 keysym = xkb_keysym_from_name(name, XKB_KEYSYM_NO_FLAGS);
97 strcpy(cache->cache[cache->next].name, name);
98 cache->cache[cache->next].keysym = keysym;
99 cache->next = (cache->next + 1) % KEYSYM_FROM_NAME_CACHE_SIZE;
104 * Grammar adapted from libX11/modules/im/ximcp/imLcPrs.c.
105 * See also the XCompose(5) manpage.
107 * We don't support the MODIFIER rules, which are commented out.
109 * FILE ::= { [PRODUCTION] [COMMENT] "\n" | INCLUDE }
110 * INCLUDE ::= "include" '"' INCLUDE_STRING '"'
111 * PRODUCTION ::= LHS ":" RHS [ COMMENT ]
112 * COMMENT ::= "#" {<any character except null or newline>}
113 * LHS ::= EVENT { EVENT }
114 * EVENT ::= "<" keysym ">"
115 * # EVENT ::= [MODIFIER_LIST] "<" keysym ">"
116 * # MODIFIER_LIST ::= ("!" {MODIFIER} ) | "None"
117 * # MODIFIER ::= ["~"] modifier_name
118 * RHS ::= ( STRING | keysym | STRING keysym )
119 * STRING ::= '"' { CHAR } '"'
120 * CHAR ::= GRAPHIC_CHAR | ESCAPED_CHAR
121 * GRAPHIC_CHAR ::= locale (codeset) dependent code
122 * ESCAPED_CHAR ::= ('\\' | '\"' | OCTAL | HEX )
123 * OCTAL ::= '\' OCTAL_CHAR [OCTAL_CHAR [OCTAL_CHAR]]
124 * OCTAL_CHAR ::= (0|1|2|3|4|5|6|7)
125 * HEX ::= '\' (x|X) HEX_CHAR [HEX_CHAR]]
126 * HEX_CHAR ::= (0|1|2|3|4|5|6|7|8|9|A|B|C|D|E|F|a|b|c|d|e|f)
128 * INCLUDE_STRING is a filesystem path, with the following %-expansions:
130 * %H - The user's home directory (the $HOME environment variable).
131 * %L - The name of the locale specific Compose file (e.g.,
132 * "/usr/share/X11/locale/<localename>/Compose").
133 * %S - The name of the system directory for Compose files (e.g.,
134 * "/usr/share/X11/locale").
149 /* Values returned with some tokens, like yylval. */
152 /* Still \0-terminated. */
158 static enum rules_token
159 lex(struct scanner *s, union lvalue *val)
161 skip_more_whitespace_and_comments:
163 while (is_space(peek(s)))
165 return TOK_END_OF_LINE;
170 goto skip_more_whitespace_and_comments;
173 /* See if we're done. */
174 if (eof(s)) return TOK_END_OF_FILE;
177 s->token_line = s->line;
178 s->token_column = s->column;
183 while (peek(s) != '>' && !eol(s))
184 buf_append(s, next(s));
186 scanner_err(s, "unterminated keysym literal");
189 if (!buf_append(s, '\0')) {
190 scanner_err(s, "keysym literal is too long");
193 val->string.str = s->buf;
194 val->string.len = s->buf_pos;
195 return TOK_LHS_KEYSYM;
202 /* String literal. */
204 while (!eof(s) && !eol(s) && peek(s) != '\"') {
210 else if (chr(s, '"')) {
213 else if (chr(s, 'x') || chr(s, 'X')) {
215 buf_append(s, (char) o);
217 scanner_warn(s, "illegal hexadecimal escape sequence in string literal");
219 else if (oct(s, &o)) {
220 buf_append(s, (char) o);
223 scanner_warn(s, "unknown escape sequence (%c) in string literal", peek(s));
227 buf_append(s, next(s));
231 scanner_err(s, "unterminated string literal");
234 if (!buf_append(s, '\0')) {
235 scanner_err(s, "string literal is too long");
238 if (!is_valid_utf8(s->buf, s->buf_pos - 1)) {
239 scanner_err(s, "string literal is not a valid UTF-8 string");
242 val->string.str = s->buf;
243 val->string.len = s->buf_pos;
247 /* RHS keysym or include. */
248 if (is_alpha(peek(s)) || peek(s) == '_') {
250 while (is_alnum(peek(s)) || peek(s) == '_')
251 buf_append(s, next(s));
252 if (!buf_append(s, '\0')) {
253 scanner_err(s, "identifier is too long");
257 if (streq(s->buf, "include"))
260 val->string.str = s->buf;
261 val->string.len = s->buf_pos;
262 return TOK_RHS_KEYSYM;
265 /* Discard rest of line. */
268 scanner_err(s, "unrecognized token");
272 static enum rules_token
273 lex_include_string(struct scanner *s, struct xkb_compose_table *table,
274 union lvalue *val_out)
276 while (is_space(peek(s)))
278 return TOK_END_OF_LINE;
280 s->token_line = s->line;
281 s->token_column = s->column;
285 scanner_err(s, "include statement must be followed by a path");
289 while (!eof(s) && !eol(s) && peek(s) != '\"') {
294 else if (chr(s, 'H')) {
295 const char *home = secure_getenv("HOME");
297 scanner_err(s, "%%H was used in an include statement, but the HOME environment variable is not set");
300 if (!buf_appends(s, home)) {
301 scanner_err(s, "include path after expanding %%H is too long");
305 else if (chr(s, 'L')) {
306 char *path = get_locale_compose_file_path(table->locale);
308 scanner_err(s, "failed to expand %%L to the locale Compose file");
311 if (!buf_appends(s, path)) {
313 scanner_err(s, "include path after expanding %%L is too long");
318 else if (chr(s, 'S')) {
319 const char *xlocaledir = get_xlocaledir_path();
320 if (!buf_appends(s, xlocaledir)) {
321 scanner_err(s, "include path after expanding %%S is too long");
326 scanner_err(s, "unknown %% format (%c) in include statement", peek(s));
330 buf_append(s, next(s));
334 scanner_err(s, "unterminated include statement");
337 if (!buf_append(s, '\0')) {
338 scanner_err(s, "include path is too long");
341 val_out->string.str = s->buf;
342 val_out->string.len = s->buf_pos;
343 return TOK_INCLUDE_STRING;
347 xkb_keysym_t lhs[MAX_LHS_LEN];
356 add_node(struct xkb_compose_table *table, xkb_keysym_t keysym)
358 struct compose_node new = {
363 darray_append(table->nodes, new);
364 return darray_size(table->nodes) - 1;
368 add_production(struct xkb_compose_table *table, struct scanner *s,
369 const struct production *production)
373 struct compose_node *node;
376 node = &darray_item(table->nodes, curr);
379 * Insert the sequence to the trie, creating new nodes as needed.
381 * TODO: This can be sped up a bit by first trying the path that the
382 * previous production took, and only then doing the linear search
383 * through the trie levels. This will work because sequences in the
384 * Compose files are often clustered by a common prefix; especially
385 * in the 1st and 2nd keysyms, which is where the largest variation
386 * (thus, longest search) is.
388 for (lhs_pos = 0; lhs_pos < production->len; lhs_pos++) {
389 while (production->lhs[lhs_pos] != node->keysym) {
390 if (node->next == 0) {
391 uint32_t next = add_node(table, production->lhs[lhs_pos]);
392 /* Refetch since add_node could have realloc()ed. */
393 node = &darray_item(table->nodes, curr);
398 node = &darray_item(table->nodes, curr);
401 if (lhs_pos + 1 == production->len)
405 if (node->u.leaf.utf8 != 0 ||
406 node->u.leaf.keysym != XKB_KEY_NoSymbol) {
407 scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding");
408 node->u.leaf.utf8 = 0;
409 node->u.leaf.keysym = XKB_KEY_NoSymbol;
413 uint32_t successor = add_node(table, production->lhs[lhs_pos + 1]);
414 /* Refetch since add_node could have realloc()ed. */
415 node = &darray_item(table->nodes, curr);
416 node->is_leaf = false;
417 node->u.successor = successor;
421 curr = node->u.successor;
422 node = &darray_item(table->nodes, curr);
425 if (!node->is_leaf) {
426 scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
430 if (node->u.leaf.utf8 != 0 || node->u.leaf.keysym != XKB_KEY_NoSymbol) {
431 if (streq(&darray_item(table->utf8, node->u.leaf.utf8),
432 production->string) &&
433 node->u.leaf.keysym == production->keysym) {
434 scanner_warn(s, "this compose sequence is a duplicate of another; skipping line");
437 scanner_warn(s, "this compose sequence already exists; overriding");
440 if (production->has_string) {
441 node->u.leaf.utf8 = darray_size(table->utf8);
442 darray_append_items(table->utf8, production->string,
443 strlen(production->string) + 1);
445 if (production->has_keysym) {
446 node->u.leaf.keysym = production->keysym;
451 parse(struct xkb_compose_table *table, struct scanner *s,
452 unsigned include_depth);
455 do_include(struct xkb_compose_table *table, struct scanner *s,
456 const char *path, unsigned include_depth)
462 struct scanner new_s;
464 if (include_depth >= MAX_INCLUDE_DEPTH) {
465 scanner_err(s, "maximum include depth (%d) exceeded; maybe there is an include loop?",
470 file = fopen(path, "r");
472 scanner_err(s, "failed to open included Compose file \"%s\": %s",
473 path, strerror(errno));
477 ok = map_file(file, &string, &size);
479 scanner_err(s, "failed to read included Compose file \"%s\": %s",
480 path, strerror(errno));
484 scanner_init(&new_s, table->ctx, string, size, path, s->priv);
486 ok = parse(table, &new_s, include_depth + 1);
491 unmap_file(string, size);
498 parse(struct xkb_compose_table *table, struct scanner *s,
499 unsigned include_depth)
501 enum rules_token tok;
503 struct keysym_from_name_cache *cache = s->priv;
505 struct production production;
506 enum { MAX_ERRORS = 10 };
511 production.has_keysym = false;
512 production.has_string = false;
517 switch (tok = lex(s, &val)) {
518 case TOK_END_OF_LINE:
520 case TOK_END_OF_FILE:
529 switch (tok = lex_include_string(s, table, &val)) {
530 case TOK_INCLUDE_STRING:
537 switch (tok = lex(s, &val)) {
538 case TOK_END_OF_LINE:
539 if (!do_include(table, s, val.string.str, include_depth))
551 keysym = cached_keysym_from_name(cache, val.string.str, val.string.len);
552 if (keysym == XKB_KEY_NoSymbol) {
553 scanner_err(s, "unrecognized keysym \"%s\" on left-hand side",
557 if (production.len + 1 > MAX_LHS_LEN) {
558 scanner_warn(s, "too many keysyms (%d) on left-hand side; skipping line",
562 production.lhs[production.len++] = keysym;
565 if (production.len <= 0) {
566 scanner_warn(s, "expected at least one keysym on left-hand side; skipping line");
575 switch (tok = lex(s, &val)) {
577 if (production.has_string) {
578 scanner_warn(s, "right-hand side can have at most one string; skipping line");
581 if (val.string.len <= 0) {
582 scanner_warn(s, "right-hand side string must not be empty; skipping line");
585 if (val.string.len >= sizeof(production.string)) {
586 scanner_warn(s, "right-hand side string is too long; skipping line");
589 strcpy(production.string, val.string.str);
590 production.has_string = true;
593 keysym = cached_keysym_from_name(cache, val.string.str, val.string.len);
594 if (keysym == XKB_KEY_NoSymbol) {
595 scanner_err(s, "unrecognized keysym \"%s\" on right-hand side",
599 if (production.has_keysym) {
600 scanner_warn(s, "right-hand side can have at most one keysym; skipping line");
603 production.keysym = keysym;
604 production.has_keysym = true;
605 case TOK_END_OF_LINE:
606 if (!production.has_string && !production.has_keysym) {
607 scanner_warn(s, "right-hand side must have at least one of string or keysym; skipping line");
610 add_production(table, s, &production);
617 if (tok != TOK_ERROR)
618 scanner_err(s, "unexpected token");
621 if (num_errors <= MAX_ERRORS)
624 scanner_err(s, "too many errors");
628 scanner_err(s, "failed to parse file");
632 while (tok != TOK_END_OF_LINE && tok != TOK_END_OF_FILE)
641 parse_string(struct xkb_compose_table *table, const char *string, size_t len,
642 const char *file_name)
645 struct keysym_from_name_cache cache;
646 memset(&cache, 0, sizeof(cache));
647 scanner_init(&s, table->ctx, string, len, file_name, &cache);
648 if (!parse(table, &s, 0))
650 /* Maybe the allocator can use the excess space. */
651 darray_shrink(table->nodes);
652 darray_shrink(table->utf8);
657 parse_file(struct xkb_compose_table *table, FILE *file, const char *file_name)
663 ok = map_file(file, &string, &size);
665 log_err(table->ctx, "Couldn't read Compose file %s: %s\n",
666 file_name, strerror(errno));
670 ok = parse_string(table, string, size, file_name);
671 unmap_file(string, size);