2 * Copyright © 2013 Ran Benita <ran234@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 /******************************************************************
26 Copyright 1992 by Oki Technosystems Laboratory, Inc.
27 Copyright 1992 by Fuji Xerox Co., Ltd.
29 Permission to use, copy, modify, distribute, and sell this software
30 and its documentation for any purpose is hereby granted without fee,
31 provided that the above copyright notice appear in all copies and
32 that both that copyright notice and this permission notice appear
33 in supporting documentation, and that the name of Oki Technosystems
34 Laboratory and Fuji Xerox not be used in advertising or publicity
35 pertaining to distribution of the software without specific, written
37 Oki Technosystems Laboratory and Fuji Xerox make no representations
38 about the suitability of this software for any purpose. It is provided
39 "as is" without express or implied warranty.
41 OKI TECHNOSYSTEMS LABORATORY AND FUJI XEROX DISCLAIM ALL WARRANTIES
42 WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
43 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL OKI TECHNOSYSTEMS
44 LABORATORY AND FUJI XEROX BE LIABLE FOR ANY SPECIAL, INDIRECT OR
45 CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
46 OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
47 OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
48 OR PERFORMANCE OF THIS SOFTWARE.
50 Author: Yasuhiro Kawai Oki Technosystems Laboratory
51 Author: Kazunori Nishihara Fuji Xerox
53 ******************************************************************/
60 #include "scanner-utils.h"
67 * Grammar adapted from libX11/modules/im/ximcp/imLcPrs.c.
68 * See also the XCompose(5) manpage.
70 * FILE ::= { [PRODUCTION] [COMMENT] "\n" | INCLUDE }
71 * INCLUDE ::= "include" '"' INCLUDE_STRING '"'
72 * PRODUCTION ::= LHS ":" RHS [ COMMENT ]
73 * COMMENT ::= "#" {<any character except null or newline>}
74 * LHS ::= EVENT { EVENT }
75 * EVENT ::= [MODIFIER_LIST] "<" keysym ">"
76 * MODIFIER_LIST ::= (["!"] {MODIFIER} ) | "None"
77 * MODIFIER ::= ["~"] MODIFIER_NAME
78 * MODIFIER_NAME ::= ("Ctrl"|"Lock"|"Caps"|"Shift"|"Alt"|"Meta")
79 * RHS ::= ( STRING | keysym | STRING keysym )
80 * STRING ::= '"' { CHAR } '"'
81 * CHAR ::= GRAPHIC_CHAR | ESCAPED_CHAR
82 * GRAPHIC_CHAR ::= locale (codeset) dependent code
83 * ESCAPED_CHAR ::= ('\\' | '\"' | OCTAL | HEX )
84 * OCTAL ::= '\' OCTAL_CHAR [OCTAL_CHAR [OCTAL_CHAR]]
85 * OCTAL_CHAR ::= (0|1|2|3|4|5|6|7)
86 * HEX ::= '\' (x|X) HEX_CHAR [HEX_CHAR]]
87 * HEX_CHAR ::= (0|1|2|3|4|5|6|7|8|9|A|B|C|D|E|F|a|b|c|d|e|f)
89 * INCLUDE_STRING is a filesystem path, with the following %-expansions:
91 * %H - The user's home directory (the $HOME environment variable).
92 * %L - The name of the locale specific Compose file (e.g.,
93 * "/usr/share/X11/locale/<localename>/Compose").
94 * %S - The name of the system directory for Compose files (e.g.,
95 * "/usr/share/X11/locale").
112 /* Values returned with some tokens, like yylval. */
115 /* Still \0-terminated. */
121 static enum rules_token
122 lex(struct scanner *s, union lvalue *val)
124 skip_more_whitespace_and_comments:
126 while (is_space(scanner_peek(s)))
127 if (scanner_next(s) == '\n')
128 return TOK_END_OF_LINE;
131 if (scanner_chr(s, '#')) {
132 scanner_skip_to_eol(s);
133 goto skip_more_whitespace_and_comments;
136 /* See if we're done. */
137 if (scanner_eof(s)) return TOK_END_OF_FILE;
140 s->token_line = s->line;
141 s->token_column = s->column;
145 if (scanner_chr(s, '<')) {
146 while (scanner_peek(s) != '>' && !scanner_eol(s) && !scanner_eof(s))
147 scanner_buf_append(s, scanner_next(s));
148 if (!scanner_chr(s, '>')) {
149 scanner_err(s, "unterminated keysym literal");
152 if (!scanner_buf_append(s, '\0')) {
153 scanner_err(s, "keysym literal is too long");
156 val->string.str = s->buf;
157 val->string.len = s->buf_pos;
158 return TOK_LHS_KEYSYM;
162 if (scanner_chr(s, ':'))
164 if (scanner_chr(s, '!'))
166 if (scanner_chr(s, '~'))
169 /* String literal. */
170 if (scanner_chr(s, '\"')) {
171 while (!scanner_eof(s) && !scanner_eol(s) && scanner_peek(s) != '\"') {
172 if (scanner_chr(s, '\\')) {
174 size_t start_pos = s->pos;
175 if (scanner_chr(s, '\\')) {
176 scanner_buf_append(s, '\\');
178 else if (scanner_chr(s, '"')) {
179 scanner_buf_append(s, '"');
181 else if (scanner_chr(s, 'x') || scanner_chr(s, 'X')) {
182 if (scanner_hex(s, &o) && is_valid_char((char) o)) {
183 scanner_buf_append(s, (char) o);
185 scanner_warn_with_code(s,
186 XKB_WARNING_INVALID_ESCAPE_SEQUENCE,
187 "illegal hexadecimal escape sequence (%.*s) in string literal",
188 (int) (s->pos - start_pos + 1), &s->s[start_pos - 1]);
191 else if (scanner_oct(s, &o) && is_valid_char((char) o)) {
192 scanner_buf_append(s, (char) o);
194 else if (s->pos > start_pos) {
195 scanner_warn_with_code(s,
196 XKB_WARNING_INVALID_ESCAPE_SEQUENCE,
197 "illegal octal escape sequence (%.*s) in string literal",
198 (int) (s->pos - start_pos + 1), &s->s[start_pos - 1]);
202 scanner_warn_with_code(s,
203 XKB_WARNING_UNKNOWN_CHAR_ESCAPE_SEQUENCE,
204 "unknown escape sequence (\\%c) in string literal",
209 scanner_buf_append(s, scanner_next(s));
212 if (!scanner_chr(s, '\"')) {
213 scanner_err(s, "unterminated string literal");
216 if (!scanner_buf_append(s, '\0')) {
217 scanner_err(s, "string literal is too long");
220 if (!is_valid_utf8(s->buf, s->buf_pos - 1)) {
221 scanner_err(s, "string literal is not a valid UTF-8 string");
224 val->string.str = s->buf;
225 val->string.len = s->buf_pos;
229 /* Identifier or include. */
230 if (is_alpha(scanner_peek(s)) || scanner_peek(s) == '_') {
232 while (is_alnum(scanner_peek(s)) || scanner_peek(s) == '_')
233 scanner_buf_append(s, scanner_next(s));
234 if (!scanner_buf_append(s, '\0')) {
235 scanner_err(s, "identifier is too long");
239 if (streq(s->buf, "include"))
242 val->string.str = s->buf;
243 val->string.len = s->buf_pos;
247 /* Discard rest of line. */
248 scanner_skip_to_eol(s);
250 scanner_err(s, "unrecognized token");
254 static enum rules_token
255 lex_include_string(struct scanner *s, struct xkb_compose_table *table,
256 union lvalue *val_out)
258 while (is_space(scanner_peek(s)))
259 if (scanner_next(s) == '\n')
260 return TOK_END_OF_LINE;
262 s->token_line = s->line;
263 s->token_column = s->column;
266 if (!scanner_chr(s, '\"')) {
267 scanner_err(s, "include statement must be followed by a path");
271 while (!scanner_eof(s) && !scanner_eol(s) && scanner_peek(s) != '\"') {
272 if (scanner_chr(s, '%')) {
273 if (scanner_chr(s, '%')) {
274 scanner_buf_append(s, '%');
276 else if (scanner_chr(s, 'H')) {
277 const char *home = xkb_context_getenv(table->ctx, "HOME");
279 scanner_err(s, "%%H was used in an include statement, but the HOME environment variable is not set");
282 if (!scanner_buf_appends(s, home)) {
283 scanner_err(s, "include path after expanding %%H is too long");
287 else if (scanner_chr(s, 'L')) {
288 char *path = get_locale_compose_file_path(table->ctx, table->locale);
290 scanner_err(s, "failed to expand %%L to the locale Compose file");
293 if (!scanner_buf_appends(s, path)) {
295 scanner_err(s, "include path after expanding %%L is too long");
300 else if (scanner_chr(s, 'S')) {
301 const char *xlocaledir = get_xlocaledir_path(table->ctx);
302 if (!scanner_buf_appends(s, xlocaledir)) {
303 scanner_err(s, "include path after expanding %%S is too long");
308 scanner_err(s, "unknown %% format (%c) in include statement", scanner_peek(s));
312 scanner_buf_append(s, scanner_next(s));
315 if (!scanner_chr(s, '\"')) {
316 scanner_err(s, "unterminated include statement");
319 if (!scanner_buf_append(s, '\0')) {
320 scanner_err(s, "include path is too long");
323 val_out->string.str = s->buf;
324 val_out->string.len = s->buf_pos;
325 return TOK_INCLUDE_STRING;
329 xkb_keysym_t lhs[MAX_LHS_LEN];
333 /* At least one of these is true. */
337 /* The matching is as follows: (active_mods & modmask) == mods. */
338 xkb_mod_mask_t modmask;
343 add_production(struct xkb_compose_table *table, struct scanner *s,
344 const struct production *production)
346 unsigned lhs_pos = 0;
347 uint32_t curr = darray_size(table->nodes) == 1 ? 0 : 1;
348 uint32_t *pptr = NULL;
349 struct compose_node *node = NULL;
351 /* Warn before potentially going over the limit, discard silently after. */
352 if (darray_size(table->nodes) + production->len + MAX_LHS_LEN > MAX_COMPOSE_NODES)
353 scanner_warn(s, "too many sequences for one Compose file; will ignore further lines");
354 if (darray_size(table->nodes) + production->len >= MAX_COMPOSE_NODES)
358 * Insert the sequence to the ternary search tree, creating new nodes as
361 * TODO: We insert in the order given, this means some inputs can create
362 * long O(n) chains, which results in total O(n^2) parsing time. We should
363 * ensure the tree is reasonably balanced somehow.
366 const xkb_keysym_t keysym = production->lhs[lhs_pos];
367 const bool last = lhs_pos + 1 == production->len;
371 * Create a new node and update the parent pointer to it.
372 * Update the pointer first because the append invalidates it.
374 struct compose_node new = {
383 curr = darray_size(table->nodes);
388 darray_append(table->nodes, new);
391 node = &darray_item(table->nodes, curr);
393 if (keysym < node->keysym) {
396 } else if (keysym > node->keysym) {
401 scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding");
402 node->internal.eqkid = 0;
403 node->internal.is_leaf = false;
406 pptr = &node->internal.eqkid;
407 curr = node->internal.eqkid;
411 (node->leaf.utf8 == 0 && !production->has_string) ||
413 node->leaf.utf8 != 0 && production->has_string &&
414 streq(&darray_item(table->utf8, node->leaf.utf8),
418 (node->leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) ||
420 node->leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym &&
421 node->leaf.keysym == production->keysym
423 if (same_string && same_keysym) {
424 scanner_warn(s, "this compose sequence is a duplicate of another; skipping line");
427 scanner_warn(s, "this compose sequence already exists; overriding");
429 } else if (node->internal.eqkid != 0) {
430 scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
433 node->is_leaf = true;
434 if (production->has_string) {
435 node->leaf.utf8 = darray_size(table->utf8);
436 darray_append_items(table->utf8, production->string,
437 strlen(production->string) + 1);
439 if (production->has_keysym) {
440 node->leaf.keysym = production->keysym;
447 /* Should match resolve_modifier(). */
448 #define ALL_MODS_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3))
450 static xkb_mod_index_t
451 resolve_modifier(const char *name)
453 static const struct {
465 for (unsigned i = 0; i < ARRAY_SIZE(mods); i++)
466 if (streq(name, mods[i].name))
469 return XKB_MOD_INVALID;
473 parse(struct xkb_compose_table *table, struct scanner *s,
474 unsigned include_depth);
477 do_include(struct xkb_compose_table *table, struct scanner *s,
478 const char *path, unsigned include_depth)
484 struct scanner new_s;
486 if (include_depth >= MAX_INCLUDE_DEPTH) {
487 scanner_err(s, "maximum include depth (%d) exceeded; maybe there is an include loop?",
492 file = fopen(path, "rb");
494 scanner_err(s, "failed to open included Compose file \"%s\": %s",
495 path, strerror(errno));
499 ok = map_file(file, &string, &size);
501 scanner_err(s, "failed to read included Compose file \"%s\": %s",
502 path, strerror(errno));
506 scanner_init(&new_s, table->ctx, string, size, path, s->priv);
508 ok = parse(table, &new_s, include_depth + 1);
513 unmap_file(string, size);
520 parse(struct xkb_compose_table *table, struct scanner *s,
521 unsigned include_depth)
523 enum rules_token tok;
526 struct production production;
527 enum { MAX_ERRORS = 10 };
532 production.has_keysym = false;
533 production.has_string = false;
535 production.modmask = 0;
540 switch (tok = lex(s, &val)) {
541 case TOK_END_OF_LINE:
543 case TOK_END_OF_FILE:
552 switch (tok = lex_include_string(s, table, &val)) {
553 case TOK_INCLUDE_STRING:
560 switch (tok = lex(s, &val)) {
561 case TOK_END_OF_LINE:
562 if (!do_include(table, s, val.string.str, include_depth))
574 if (production.len <= 0) {
575 scanner_warn(s, "expected at least one keysym on left-hand side; skipping line");
580 if (streq(val.string.str, "None")) {
582 production.modmask = ALL_MODS_MASK;
585 goto lhs_mod_list_tok;
587 goto lhs_mod_list_tok;
589 production.modmask = ALL_MODS_MASK;
600 keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS);
601 if (keysym == XKB_KEY_NoSymbol) {
602 scanner_err(s, "unrecognized keysym \"%s\" on left-hand side",
606 if (production.len + 1 > MAX_LHS_LEN) {
607 scanner_warn(s, "too many keysyms (%d) on left-hand side; skipping line",
611 production.lhs[production.len++] = keysym;
613 production.modmask = 0;
625 if (tok != TOK_TILDE && tok != TOK_IDENT)
628 if (tok == TOK_TILDE) {
633 if (tok != TOK_IDENT)
636 mod = resolve_modifier(val.string.str);
637 if (mod == XKB_MOD_INVALID) {
638 scanner_err(s, "unrecognized modifier \"%s\"",
643 production.modmask |= 1 << mod;
645 production.mods &= ~(1 << mod);
647 production.mods |= 1 << mod;
653 switch (tok = lex(s, &val)) {
655 if (production.has_string) {
656 scanner_warn(s, "right-hand side can have at most one string; skipping line");
659 if (val.string.len <= 0) {
660 scanner_warn(s, "right-hand side string must not be empty; skipping line");
663 if (val.string.len >= sizeof(production.string)) {
664 scanner_warn(s, "right-hand side string is too long; skipping line");
667 strcpy(production.string, val.string.str);
668 production.has_string = true;
671 keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS);
672 if (keysym == XKB_KEY_NoSymbol) {
673 scanner_err(s, "unrecognized keysym \"%s\" on right-hand side",
677 if (production.has_keysym) {
678 scanner_warn(s, "right-hand side can have at most one keysym; skipping line");
681 production.keysym = keysym;
682 production.has_keysym = true;
684 case TOK_END_OF_LINE:
685 if (!production.has_string && !production.has_keysym) {
686 scanner_warn(s, "right-hand side must have at least one of string or keysym; skipping line");
689 add_production(table, s, &production);
696 if (tok != TOK_ERROR)
697 scanner_err(s, "unexpected token");
700 if (num_errors <= MAX_ERRORS)
703 scanner_err(s, "too many errors");
707 scanner_err(s, "failed to parse file");
711 while (tok != TOK_END_OF_LINE && tok != TOK_END_OF_FILE)
720 parse_string(struct xkb_compose_table *table, const char *string, size_t len,
721 const char *file_name)
724 scanner_init(&s, table->ctx, string, len, file_name, NULL);
725 if (!parse(table, &s, 0))
727 /* Maybe the allocator can use the excess space. */
728 darray_shrink(table->nodes);
729 darray_shrink(table->utf8);
734 parse_file(struct xkb_compose_table *table, FILE *file, const char *file_name)
740 ok = map_file(file, &string, &size);
743 XKB_LOG_MESSAGE_NO_ID,
744 "Couldn't read Compose file %s: %s\n",
745 file_name, strerror(errno));
749 ok = parse_string(table, string, size, file_name);
750 unmap_file(string, size);