2 * Copyright © 2013 Ran Benita <ran234@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 /******************************************************************
26 Copyright 1992 by Oki Technosystems Laboratory, Inc.
27 Copyright 1992 by Fuji Xerox Co., Ltd.
29 Permission to use, copy, modify, distribute, and sell this software
30 and its documentation for any purpose is hereby granted without fee,
31 provided that the above copyright notice appear in all copies and
32 that both that copyright notice and this permission notice appear
33 in supporting documentation, and that the name of Oki Technosystems
34 Laboratory and Fuji Xerox not be used in advertising or publicity
35 pertaining to distribution of the software without specific, written
37 Oki Technosystems Laboratory and Fuji Xerox make no representations
38 about the suitability of this software for any purpose. It is provided
39 "as is" without express or implied warranty.
41 OKI TECHNOSYSTEMS LABORATORY AND FUJI XEROX DISCLAIM ALL WARRANTIES
42 WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
43 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL OKI TECHNOSYSTEMS
44 LABORATORY AND FUJI XEROX BE LIABLE FOR ANY SPECIAL, INDIRECT OR
45 CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
46 OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
47 OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
48 OR PERFORMANCE OF THIS SOFTWARE.
50 Author: Yasuhiro Kawai Oki Technosystems Laboratory
51 Author: Kazunori Nishihara Fuji Xerox
53 ******************************************************************/
58 #include "scanner-utils.h"
64 #define MAX_LHS_LEN 10
65 #define MAX_INCLUDE_DEPTH 5
68 * Grammar adapted from libX11/modules/im/ximcp/imLcPrs.c.
69 * See also the XCompose(5) manpage.
71 * FILE ::= { [PRODUCTION] [COMMENT] "\n" | INCLUDE }
72 * INCLUDE ::= "include" '"' INCLUDE_STRING '"'
73 * PRODUCTION ::= LHS ":" RHS [ COMMENT ]
74 * COMMENT ::= "#" {<any character except null or newline>}
75 * LHS ::= EVENT { EVENT }
76 * EVENT ::= [MODIFIER_LIST] "<" keysym ">"
77 * MODIFIER_LIST ::= (["!"] {MODIFIER} ) | "None"
78 * MODIFIER ::= ["~"] MODIFIER_NAME
79 * MODIFIER_NAME ::= ("Ctrl"|"Lock"|"Caps"|"Shift"|"Alt"|"Meta")
80 * RHS ::= ( STRING | keysym | STRING keysym )
81 * STRING ::= '"' { CHAR } '"'
82 * CHAR ::= GRAPHIC_CHAR | ESCAPED_CHAR
83 * GRAPHIC_CHAR ::= locale (codeset) dependent code
84 * ESCAPED_CHAR ::= ('\\' | '\"' | OCTAL | HEX )
85 * OCTAL ::= '\' OCTAL_CHAR [OCTAL_CHAR [OCTAL_CHAR]]
86 * OCTAL_CHAR ::= (0|1|2|3|4|5|6|7)
87 * HEX ::= '\' (x|X) HEX_CHAR [HEX_CHAR]]
88 * HEX_CHAR ::= (0|1|2|3|4|5|6|7|8|9|A|B|C|D|E|F|a|b|c|d|e|f)
90 * INCLUDE_STRING is a filesystem path, with the following %-expansions:
92 * %H - The user's home directory (the $HOME environment variable).
93 * %L - The name of the locale specific Compose file (e.g.,
94 * "/usr/share/X11/locale/<localename>/Compose").
95 * %S - The name of the system directory for Compose files (e.g.,
96 * "/usr/share/X11/locale").
113 /* Values returned with some tokens, like yylval. */
116 /* Still \0-terminated. */
122 static enum rules_token
123 lex(struct scanner *s, union lvalue *val)
125 skip_more_whitespace_and_comments:
127 while (is_space(peek(s)))
129 return TOK_END_OF_LINE;
134 goto skip_more_whitespace_and_comments;
137 /* See if we're done. */
138 if (eof(s)) return TOK_END_OF_FILE;
141 s->token_line = s->line;
142 s->token_column = s->column;
147 while (peek(s) != '>' && !eol(s))
148 buf_append(s, next(s));
150 scanner_err(s, "unterminated keysym literal");
153 if (!buf_append(s, '\0')) {
154 scanner_err(s, "keysym literal is too long");
157 val->string.str = s->buf;
158 val->string.len = s->buf_pos;
159 return TOK_LHS_KEYSYM;
170 /* String literal. */
172 while (!eof(s) && !eol(s) && peek(s) != '\"') {
178 else if (chr(s, '"')) {
181 else if (chr(s, 'x') || chr(s, 'X')) {
183 buf_append(s, (char) o);
185 scanner_warn(s, "illegal hexadecimal escape sequence in string literal");
187 else if (oct(s, &o)) {
188 buf_append(s, (char) o);
191 scanner_warn(s, "unknown escape sequence (%c) in string literal", peek(s));
195 buf_append(s, next(s));
199 scanner_err(s, "unterminated string literal");
202 if (!buf_append(s, '\0')) {
203 scanner_err(s, "string literal is too long");
206 if (!is_valid_utf8(s->buf, s->buf_pos - 1)) {
207 scanner_err(s, "string literal is not a valid UTF-8 string");
210 val->string.str = s->buf;
211 val->string.len = s->buf_pos;
215 /* Identifier or include. */
216 if (is_alpha(peek(s)) || peek(s) == '_') {
218 while (is_alnum(peek(s)) || peek(s) == '_')
219 buf_append(s, next(s));
220 if (!buf_append(s, '\0')) {
221 scanner_err(s, "identifier is too long");
225 if (streq(s->buf, "include"))
228 val->string.str = s->buf;
229 val->string.len = s->buf_pos;
233 /* Discard rest of line. */
236 scanner_err(s, "unrecognized token");
240 static enum rules_token
241 lex_include_string(struct scanner *s, struct xkb_compose_table *table,
242 union lvalue *val_out)
244 while (is_space(peek(s)))
246 return TOK_END_OF_LINE;
248 s->token_line = s->line;
249 s->token_column = s->column;
253 scanner_err(s, "include statement must be followed by a path");
257 while (!eof(s) && !eol(s) && peek(s) != '\"') {
262 else if (chr(s, 'H')) {
263 const char *home = secure_getenv("HOME");
265 scanner_err(s, "%%H was used in an include statement, but the HOME environment variable is not set");
268 if (!buf_appends(s, home)) {
269 scanner_err(s, "include path after expanding %%H is too long");
273 else if (chr(s, 'L')) {
274 char *path = get_locale_compose_file_path(table->locale);
276 scanner_err(s, "failed to expand %%L to the locale Compose file");
279 if (!buf_appends(s, path)) {
281 scanner_err(s, "include path after expanding %%L is too long");
286 else if (chr(s, 'S')) {
287 const char *xlocaledir = get_xlocaledir_path();
288 if (!buf_appends(s, xlocaledir)) {
289 scanner_err(s, "include path after expanding %%S is too long");
294 scanner_err(s, "unknown %% format (%c) in include statement", peek(s));
298 buf_append(s, next(s));
302 scanner_err(s, "unterminated include statement");
305 if (!buf_append(s, '\0')) {
306 scanner_err(s, "include path is too long");
309 val_out->string.str = s->buf;
310 val_out->string.len = s->buf_pos;
311 return TOK_INCLUDE_STRING;
315 xkb_keysym_t lhs[MAX_LHS_LEN];
319 /* At least one of these is true. */
323 /* The matching is as follows: (active_mods & modmask) == mods. */
324 xkb_mod_mask_t modmask;
329 add_node(struct xkb_compose_table *table, xkb_keysym_t keysym)
331 struct compose_node new = {
336 darray_append(table->nodes, new);
337 return darray_size(table->nodes) - 1;
341 add_production(struct xkb_compose_table *table, struct scanner *s,
342 const struct production *production)
346 struct compose_node *node;
349 node = &darray_item(table->nodes, curr);
352 * Insert the sequence to the trie, creating new nodes as needed.
354 * TODO: This can be sped up a bit by first trying the path that the
355 * previous production took, and only then doing the linear search
356 * through the trie levels. This will work because sequences in the
357 * Compose files are often clustered by a common prefix; especially
358 * in the 1st and 2nd keysyms, which is where the largest variation
359 * (thus, longest search) is.
361 for (lhs_pos = 0; lhs_pos < production->len; lhs_pos++) {
362 while (production->lhs[lhs_pos] != node->keysym) {
363 if (node->next == 0) {
364 uint32_t next = add_node(table, production->lhs[lhs_pos]);
365 /* Refetch since add_node could have realloc()ed. */
366 node = &darray_item(table->nodes, curr);
371 node = &darray_item(table->nodes, curr);
374 if (lhs_pos + 1 == production->len)
378 if (node->u.leaf.utf8 != 0 ||
379 node->u.leaf.keysym != XKB_KEY_NoSymbol) {
380 scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding");
381 node->u.leaf.utf8 = 0;
382 node->u.leaf.keysym = XKB_KEY_NoSymbol;
386 uint32_t successor = add_node(table, production->lhs[lhs_pos + 1]);
387 /* Refetch since add_node could have realloc()ed. */
388 node = &darray_item(table->nodes, curr);
389 node->is_leaf = false;
390 node->u.successor = successor;
394 curr = node->u.successor;
395 node = &darray_item(table->nodes, curr);
398 if (!node->is_leaf) {
399 scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
403 if (node->u.leaf.utf8 != 0 || node->u.leaf.keysym != XKB_KEY_NoSymbol) {
405 (node->u.leaf.utf8 == 0 && !production->has_string) ||
407 node->u.leaf.utf8 != 0 && production->has_string &&
408 streq(&darray_item(table->utf8, node->u.leaf.utf8),
412 (node->u.leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) ||
414 node->u.leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym &&
415 node->u.leaf.keysym == production->keysym
417 if (same_string && same_keysym) {
418 scanner_warn(s, "this compose sequence is a duplicate of another; skipping line");
421 scanner_warn(s, "this compose sequence already exists; overriding");
424 if (production->has_string) {
425 node->u.leaf.utf8 = darray_size(table->utf8);
426 darray_append_items(table->utf8, production->string,
427 strlen(production->string) + 1);
429 if (production->has_keysym) {
430 node->u.leaf.keysym = production->keysym;
434 /* Should match resolve_modifier(). */
435 #define ALL_MODS_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3))
437 static xkb_mod_index_t
438 resolve_modifier(const char *name)
440 static const struct {
452 for (unsigned i = 0; i < ARRAY_SIZE(mods); i++)
453 if (streq(name, mods[i].name))
456 return XKB_MOD_INVALID;
460 parse(struct xkb_compose_table *table, struct scanner *s,
461 unsigned include_depth);
464 do_include(struct xkb_compose_table *table, struct scanner *s,
465 const char *path, unsigned include_depth)
471 struct scanner new_s;
473 if (include_depth >= MAX_INCLUDE_DEPTH) {
474 scanner_err(s, "maximum include depth (%d) exceeded; maybe there is an include loop?",
479 file = fopen(path, "r");
481 scanner_err(s, "failed to open included Compose file \"%s\": %s",
482 path, strerror(errno));
486 ok = map_file(file, &string, &size);
488 scanner_err(s, "failed to read included Compose file \"%s\": %s",
489 path, strerror(errno));
493 scanner_init(&new_s, table->ctx, string, size, path, s->priv);
495 ok = parse(table, &new_s, include_depth + 1);
500 unmap_file(string, size);
507 parse(struct xkb_compose_table *table, struct scanner *s,
508 unsigned include_depth)
510 enum rules_token tok;
513 struct production production;
514 enum { MAX_ERRORS = 10 };
519 production.has_keysym = false;
520 production.has_string = false;
522 production.modmask = 0;
527 switch (tok = lex(s, &val)) {
528 case TOK_END_OF_LINE:
530 case TOK_END_OF_FILE:
539 switch (tok = lex_include_string(s, table, &val)) {
540 case TOK_INCLUDE_STRING:
547 switch (tok = lex(s, &val)) {
548 case TOK_END_OF_LINE:
549 if (!do_include(table, s, val.string.str, include_depth))
561 if (production.len <= 0) {
562 scanner_warn(s, "expected at least one keysym on left-hand side; skipping line");
567 if (streq(val.string.str, "None")) {
569 production.modmask = ALL_MODS_MASK;
572 goto lhs_mod_list_tok;
574 goto lhs_mod_list_tok;
576 production.modmask = ALL_MODS_MASK;
587 keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS);
588 if (keysym == XKB_KEY_NoSymbol) {
589 scanner_err(s, "unrecognized keysym \"%s\" on left-hand side",
593 if (production.len + 1 > MAX_LHS_LEN) {
594 scanner_warn(s, "too many keysyms (%d) on left-hand side; skipping line",
598 production.lhs[production.len++] = keysym;
600 production.modmask = 0;
612 if (tok != TOK_TILDE && tok != TOK_IDENT)
615 if (tok == TOK_TILDE) {
620 if (tok != TOK_IDENT)
623 mod = resolve_modifier(val.string.str);
624 if (mod == XKB_MOD_INVALID) {
625 scanner_err(s, "unrecognized modifier \"%s\"",
630 production.modmask |= 1 << mod;
632 production.mods &= ~(1 << mod);
634 production.mods |= 1 << mod;
640 switch (tok = lex(s, &val)) {
642 if (production.has_string) {
643 scanner_warn(s, "right-hand side can have at most one string; skipping line");
646 if (val.string.len <= 0) {
647 scanner_warn(s, "right-hand side string must not be empty; skipping line");
650 if (val.string.len >= sizeof(production.string)) {
651 scanner_warn(s, "right-hand side string is too long; skipping line");
654 strcpy(production.string, val.string.str);
655 production.has_string = true;
658 keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS);
659 if (keysym == XKB_KEY_NoSymbol) {
660 scanner_err(s, "unrecognized keysym \"%s\" on right-hand side",
664 if (production.has_keysym) {
665 scanner_warn(s, "right-hand side can have at most one keysym; skipping line");
668 production.keysym = keysym;
669 production.has_keysym = true;
671 case TOK_END_OF_LINE:
672 if (!production.has_string && !production.has_keysym) {
673 scanner_warn(s, "right-hand side must have at least one of string or keysym; skipping line");
676 add_production(table, s, &production);
683 if (tok != TOK_ERROR)
684 scanner_err(s, "unexpected token");
687 if (num_errors <= MAX_ERRORS)
690 scanner_err(s, "too many errors");
694 scanner_err(s, "failed to parse file");
698 while (tok != TOK_END_OF_LINE && tok != TOK_END_OF_FILE)
707 parse_string(struct xkb_compose_table *table, const char *string, size_t len,
708 const char *file_name)
711 scanner_init(&s, table->ctx, string, len, file_name, NULL);
712 if (!parse(table, &s, 0))
714 /* Maybe the allocator can use the excess space. */
715 darray_shrink(table->nodes);
716 darray_shrink(table->utf8);
721 parse_file(struct xkb_compose_table *table, FILE *file, const char *file_name)
727 ok = map_file(file, &string, &size);
729 log_err(table->ctx, "Couldn't read Compose file %s: %s\n",
730 file_name, strerror(errno));
734 ok = parse_string(table, string, size, file_name);
735 unmap_file(string, size);