2 * Copyright © 2013 Ran Benita <ran234@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 /******************************************************************
26 Copyright 1992 by Oki Technosystems Laboratory, Inc.
27 Copyright 1992 by Fuji Xerox Co., Ltd.
29 Permission to use, copy, modify, distribute, and sell this software
30 and its documentation for any purpose is hereby granted without fee,
31 provided that the above copyright notice appear in all copies and
32 that both that copyright notice and this permission notice appear
33 in supporting documentation, and that the name of Oki Technosystems
34 Laboratory and Fuji Xerox not be used in advertising or publicity
35 pertaining to distribution of the software without specific, written
37 Oki Technosystems Laboratory and Fuji Xerox make no representations
38 about the suitability of this software for any purpose. It is provided
39 "as is" without express or implied warranty.
41 OKI TECHNOSYSTEMS LABORATORY AND FUJI XEROX DISCLAIM ALL WARRANTIES
42 WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
43 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL OKI TECHNOSYSTEMS
44 LABORATORY AND FUJI XEROX BE LIABLE FOR ANY SPECIAL, INDIRECT OR
45 CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
46 OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
47 OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
48 OR PERFORMANCE OF THIS SOFTWARE.
50 Author: Yasuhiro Kawai Oki Technosystems Laboratory
51 Author: Kazunori Nishihara Fuji Xerox
53 ******************************************************************/
60 #include "scanner-utils.h"
67 * Grammar adapted from libX11/modules/im/ximcp/imLcPrs.c.
68 * See also the XCompose(5) manpage.
70 * FILE ::= { [PRODUCTION] [COMMENT] "\n" | INCLUDE }
71 * INCLUDE ::= "include" '"' INCLUDE_STRING '"'
72 * PRODUCTION ::= LHS ":" RHS [ COMMENT ]
73 * COMMENT ::= "#" {<any character except null or newline>}
74 * LHS ::= EVENT { EVENT }
75 * EVENT ::= [MODIFIER_LIST] "<" keysym ">"
76 * MODIFIER_LIST ::= (["!"] {MODIFIER} ) | "None"
77 * MODIFIER ::= ["~"] MODIFIER_NAME
78 * MODIFIER_NAME ::= ("Ctrl"|"Lock"|"Caps"|"Shift"|"Alt"|"Meta")
79 * RHS ::= ( STRING | keysym | STRING keysym )
80 * STRING ::= '"' { CHAR } '"'
81 * CHAR ::= GRAPHIC_CHAR | ESCAPED_CHAR
82 * GRAPHIC_CHAR ::= locale (codeset) dependent code
83 * ESCAPED_CHAR ::= ('\\' | '\"' | OCTAL | HEX )
84 * OCTAL ::= '\' OCTAL_CHAR [OCTAL_CHAR [OCTAL_CHAR]]
85 * OCTAL_CHAR ::= (0|1|2|3|4|5|6|7)
86 * HEX ::= '\' (x|X) HEX_CHAR [HEX_CHAR]]
87 * HEX_CHAR ::= (0|1|2|3|4|5|6|7|8|9|A|B|C|D|E|F|a|b|c|d|e|f)
89 * INCLUDE_STRING is a filesystem path, with the following %-expansions:
91 * %H - The user's home directory (the $HOME environment variable).
92 * %L - The name of the locale specific Compose file (e.g.,
93 * "/usr/share/X11/locale/<localename>/Compose").
94 * %S - The name of the system directory for Compose files (e.g.,
95 * "/usr/share/X11/locale").
112 /* Values returned with some tokens, like yylval. */
115 /* Still \0-terminated. */
121 static enum rules_token
122 lex(struct scanner *s, union lvalue *val)
124 skip_more_whitespace_and_comments:
126 while (is_space(scanner_peek(s)))
127 if (scanner_next(s) == '\n')
128 return TOK_END_OF_LINE;
131 if (scanner_chr(s, '#')) {
132 scanner_skip_to_eol(s);
133 goto skip_more_whitespace_and_comments;
136 /* See if we're done. */
137 if (scanner_eof(s)) return TOK_END_OF_FILE;
140 s->token_line = s->line;
141 s->token_column = s->column;
145 if (scanner_chr(s, '<')) {
146 while (scanner_peek(s) != '>' && !scanner_eol(s) && !scanner_eof(s))
147 scanner_buf_append(s, scanner_next(s));
148 if (!scanner_chr(s, '>')) {
149 scanner_err(s, "unterminated keysym literal");
152 if (!scanner_buf_append(s, '\0')) {
153 scanner_err(s, "keysym literal is too long");
156 val->string.str = s->buf;
157 val->string.len = s->buf_pos;
158 return TOK_LHS_KEYSYM;
162 if (scanner_chr(s, ':'))
164 if (scanner_chr(s, '!'))
166 if (scanner_chr(s, '~'))
169 /* String literal. */
170 if (scanner_chr(s, '\"')) {
171 while (!scanner_eof(s) && !scanner_eol(s) && scanner_peek(s) != '\"') {
172 if (scanner_chr(s, '\\')) {
174 if (scanner_chr(s, '\\')) {
175 scanner_buf_append(s, '\\');
177 else if (scanner_chr(s, '"')) {
178 scanner_buf_append(s, '"');
180 else if (scanner_chr(s, 'x') || scanner_chr(s, 'X')) {
181 if (scanner_hex(s, &o))
182 scanner_buf_append(s, (char) o);
184 scanner_warn(s, "illegal hexadecimal escape sequence in string literal");
186 else if (scanner_oct(s, &o)) {
187 scanner_buf_append(s, (char) o);
190 scanner_warn(s, "unknown escape sequence (%c) in string literal", scanner_peek(s));
194 scanner_buf_append(s, scanner_next(s));
197 if (!scanner_chr(s, '\"')) {
198 scanner_err(s, "unterminated string literal");
201 if (!scanner_buf_append(s, '\0')) {
202 scanner_err(s, "string literal is too long");
205 if (!is_valid_utf8(s->buf, s->buf_pos - 1)) {
206 scanner_err(s, "string literal is not a valid UTF-8 string");
209 val->string.str = s->buf;
210 val->string.len = s->buf_pos;
214 /* Identifier or include. */
215 if (is_alpha(scanner_peek(s)) || scanner_peek(s) == '_') {
217 while (is_alnum(scanner_peek(s)) || scanner_peek(s) == '_')
218 scanner_buf_append(s, scanner_next(s));
219 if (!scanner_buf_append(s, '\0')) {
220 scanner_err(s, "identifier is too long");
224 if (streq(s->buf, "include"))
227 val->string.str = s->buf;
228 val->string.len = s->buf_pos;
232 /* Discard rest of line. */
233 scanner_skip_to_eol(s);
235 scanner_err(s, "unrecognized token");
239 static enum rules_token
240 lex_include_string(struct scanner *s, struct xkb_compose_table *table,
241 union lvalue *val_out)
243 while (is_space(scanner_peek(s)))
244 if (scanner_next(s) == '\n')
245 return TOK_END_OF_LINE;
247 s->token_line = s->line;
248 s->token_column = s->column;
251 if (!scanner_chr(s, '\"')) {
252 scanner_err(s, "include statement must be followed by a path");
256 while (!scanner_eof(s) && !scanner_eol(s) && scanner_peek(s) != '\"') {
257 if (scanner_chr(s, '%')) {
258 if (scanner_chr(s, '%')) {
259 scanner_buf_append(s, '%');
261 else if (scanner_chr(s, 'H')) {
262 const char *home = xkb_context_getenv(table->ctx, "HOME");
264 scanner_err(s, "%%H was used in an include statement, but the HOME environment variable is not set");
267 if (!scanner_buf_appends(s, home)) {
268 scanner_err(s, "include path after expanding %%H is too long");
272 else if (scanner_chr(s, 'L')) {
273 char *path = get_locale_compose_file_path(table->ctx, table->locale);
275 scanner_err(s, "failed to expand %%L to the locale Compose file");
278 if (!scanner_buf_appends(s, path)) {
280 scanner_err(s, "include path after expanding %%L is too long");
285 else if (scanner_chr(s, 'S')) {
286 const char *xlocaledir = get_xlocaledir_path(table->ctx);
287 if (!scanner_buf_appends(s, xlocaledir)) {
288 scanner_err(s, "include path after expanding %%S is too long");
293 scanner_err(s, "unknown %% format (%c) in include statement", scanner_peek(s));
297 scanner_buf_append(s, scanner_next(s));
300 if (!scanner_chr(s, '\"')) {
301 scanner_err(s, "unterminated include statement");
304 if (!scanner_buf_append(s, '\0')) {
305 scanner_err(s, "include path is too long");
308 val_out->string.str = s->buf;
309 val_out->string.len = s->buf_pos;
310 return TOK_INCLUDE_STRING;
314 xkb_keysym_t lhs[MAX_LHS_LEN];
318 /* At least one of these is true. */
322 /* The matching is as follows: (active_mods & modmask) == mods. */
323 xkb_mod_mask_t modmask;
328 add_production(struct xkb_compose_table *table, struct scanner *s,
329 const struct production *production)
331 unsigned lhs_pos = 0;
332 uint32_t curr = darray_size(table->nodes) == 1 ? 0 : 1;
333 uint32_t *pptr = NULL;
334 struct compose_node *node = NULL;
336 /* Warn before potentially going over the limit, discard silently after. */
337 if (darray_size(table->nodes) + production->len + MAX_LHS_LEN > MAX_COMPOSE_NODES)
338 scanner_warn(s, "too many sequences for one Compose file; will ignore further lines");
339 if (darray_size(table->nodes) + production->len >= MAX_COMPOSE_NODES)
343 * Insert the sequence to the ternary search tree, creating new nodes as
346 * TODO: We insert in the order given, this means some inputs can create
347 * long O(n) chains, which results in total O(n^2) parsing time. We should
348 * ensure the tree is reasonably balanced somehow.
351 const xkb_keysym_t keysym = production->lhs[lhs_pos];
352 const bool last = lhs_pos + 1 == production->len;
356 * Create a new node and update the parent pointer to it.
357 * Update the pointer first because the append invalidates it.
359 struct compose_node new = {
368 curr = darray_size(table->nodes);
373 darray_append(table->nodes, new);
376 node = &darray_item(table->nodes, curr);
378 if (keysym < node->keysym) {
381 } else if (keysym > node->keysym) {
386 scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding");
387 node->internal.eqkid = 0;
388 node->internal.is_leaf = false;
391 pptr = &node->internal.eqkid;
392 curr = node->internal.eqkid;
396 (node->leaf.utf8 == 0 && !production->has_string) ||
398 node->leaf.utf8 != 0 && production->has_string &&
399 streq(&darray_item(table->utf8, node->leaf.utf8),
403 (node->leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) ||
405 node->leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym &&
406 node->leaf.keysym == production->keysym
408 if (same_string && same_keysym) {
409 scanner_warn(s, "this compose sequence is a duplicate of another; skipping line");
412 scanner_warn(s, "this compose sequence already exists; overriding");
414 } else if (node->internal.eqkid != 0) {
415 scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
418 node->is_leaf = true;
419 if (production->has_string) {
420 node->leaf.utf8 = darray_size(table->utf8);
421 darray_append_items(table->utf8, production->string,
422 strlen(production->string) + 1);
424 if (production->has_keysym) {
425 node->leaf.keysym = production->keysym;
432 /* Should match resolve_modifier(). */
433 #define ALL_MODS_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3))
435 static xkb_mod_index_t
436 resolve_modifier(const char *name)
438 static const struct {
450 for (unsigned i = 0; i < ARRAY_SIZE(mods); i++)
451 if (streq(name, mods[i].name))
454 return XKB_MOD_INVALID;
458 parse(struct xkb_compose_table *table, struct scanner *s,
459 unsigned include_depth);
462 do_include(struct xkb_compose_table *table, struct scanner *s,
463 const char *path, unsigned include_depth)
469 struct scanner new_s;
471 if (include_depth >= MAX_INCLUDE_DEPTH) {
472 scanner_err(s, "maximum include depth (%d) exceeded; maybe there is an include loop?",
477 file = fopen(path, "rb");
479 scanner_err(s, "failed to open included Compose file \"%s\": %s",
480 path, strerror(errno));
484 ok = map_file(file, &string, &size);
486 scanner_err(s, "failed to read included Compose file \"%s\": %s",
487 path, strerror(errno));
491 scanner_init(&new_s, table->ctx, string, size, path, s->priv);
493 ok = parse(table, &new_s, include_depth + 1);
498 unmap_file(string, size);
505 parse(struct xkb_compose_table *table, struct scanner *s,
506 unsigned include_depth)
508 enum rules_token tok;
511 struct production production;
512 enum { MAX_ERRORS = 10 };
517 production.has_keysym = false;
518 production.has_string = false;
520 production.modmask = 0;
525 switch (tok = lex(s, &val)) {
526 case TOK_END_OF_LINE:
528 case TOK_END_OF_FILE:
537 switch (tok = lex_include_string(s, table, &val)) {
538 case TOK_INCLUDE_STRING:
545 switch (tok = lex(s, &val)) {
546 case TOK_END_OF_LINE:
547 if (!do_include(table, s, val.string.str, include_depth))
559 if (production.len <= 0) {
560 scanner_warn(s, "expected at least one keysym on left-hand side; skipping line");
565 if (streq(val.string.str, "None")) {
567 production.modmask = ALL_MODS_MASK;
570 goto lhs_mod_list_tok;
572 goto lhs_mod_list_tok;
574 production.modmask = ALL_MODS_MASK;
585 keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS);
586 if (keysym == XKB_KEY_NoSymbol) {
587 scanner_err(s, "unrecognized keysym \"%s\" on left-hand side",
591 if (production.len + 1 > MAX_LHS_LEN) {
592 scanner_warn(s, "too many keysyms (%d) on left-hand side; skipping line",
596 production.lhs[production.len++] = keysym;
598 production.modmask = 0;
610 if (tok != TOK_TILDE && tok != TOK_IDENT)
613 if (tok == TOK_TILDE) {
618 if (tok != TOK_IDENT)
621 mod = resolve_modifier(val.string.str);
622 if (mod == XKB_MOD_INVALID) {
623 scanner_err(s, "unrecognized modifier \"%s\"",
628 production.modmask |= 1 << mod;
630 production.mods &= ~(1 << mod);
632 production.mods |= 1 << mod;
638 switch (tok = lex(s, &val)) {
640 if (production.has_string) {
641 scanner_warn(s, "right-hand side can have at most one string; skipping line");
644 if (val.string.len <= 0) {
645 scanner_warn(s, "right-hand side string must not be empty; skipping line");
648 if (val.string.len >= sizeof(production.string)) {
649 scanner_warn(s, "right-hand side string is too long; skipping line");
652 strcpy(production.string, val.string.str);
653 production.has_string = true;
656 keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS);
657 if (keysym == XKB_KEY_NoSymbol) {
658 scanner_err(s, "unrecognized keysym \"%s\" on right-hand side",
662 if (production.has_keysym) {
663 scanner_warn(s, "right-hand side can have at most one keysym; skipping line");
666 production.keysym = keysym;
667 production.has_keysym = true;
669 case TOK_END_OF_LINE:
670 if (!production.has_string && !production.has_keysym) {
671 scanner_warn(s, "right-hand side must have at least one of string or keysym; skipping line");
674 add_production(table, s, &production);
681 if (tok != TOK_ERROR)
682 scanner_err(s, "unexpected token");
685 if (num_errors <= MAX_ERRORS)
688 scanner_err(s, "too many errors");
692 scanner_err(s, "failed to parse file");
696 while (tok != TOK_END_OF_LINE && tok != TOK_END_OF_FILE)
705 parse_string(struct xkb_compose_table *table, const char *string, size_t len,
706 const char *file_name)
709 scanner_init(&s, table->ctx, string, len, file_name, NULL);
710 if (!parse(table, &s, 0))
712 /* Maybe the allocator can use the excess space. */
713 darray_shrink(table->nodes);
714 darray_shrink(table->utf8);
719 parse_file(struct xkb_compose_table *table, FILE *file, const char *file_name)
725 ok = map_file(file, &string, &size);
728 XKB_LOG_MESSAGE_NO_ID,
729 "Couldn't read Compose file %s: %s\n",
730 file_name, strerror(errno));
734 ok = parse_string(table, string, size, file_name);
735 unmap_file(string, size);