2 * Copyright © 2013 Ran Benita <ran234@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 /******************************************************************
26 Copyright 1992 by Oki Technosystems Laboratory, Inc.
27 Copyright 1992 by Fuji Xerox Co., Ltd.
29 Permission to use, copy, modify, distribute, and sell this software
30 and its documentation for any purpose is hereby granted without fee,
31 provided that the above copyright notice appear in all copies and
32 that both that copyright notice and this permission notice appear
33 in supporting documentation, and that the name of Oki Technosystems
34 Laboratory and Fuji Xerox not be used in advertising or publicity
35 pertaining to distribution of the software without specific, written
37 Oki Technosystems Laboratory and Fuji Xerox make no representations
38 about the suitability of this software for any purpose. It is provided
39 "as is" without express or implied warranty.
41 OKI TECHNOSYSTEMS LABORATORY AND FUJI XEROX DISCLAIM ALL WARRANTIES
42 WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
43 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL OKI TECHNOSYSTEMS
44 LABORATORY AND FUJI XEROX BE LIABLE FOR ANY SPECIAL, INDIRECT OR
45 CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
46 OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
47 OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
48 OR PERFORMANCE OF THIS SOFTWARE.
50 Author: Yasuhiro Kawai Oki Technosystems Laboratory
51 Author: Kazunori Nishihara Fuji Xerox
53 ******************************************************************/
60 #include "scanner-utils.h"
67 * Grammar adapted from libX11/modules/im/ximcp/imLcPrs.c.
68 * See also the XCompose(5) manpage.
70 * FILE ::= { [PRODUCTION] [COMMENT] "\n" | INCLUDE }
71 * INCLUDE ::= "include" '"' INCLUDE_STRING '"'
72 * PRODUCTION ::= LHS ":" RHS [ COMMENT ]
73 * COMMENT ::= "#" {<any character except null or newline>}
74 * LHS ::= EVENT { EVENT }
75 * EVENT ::= [MODIFIER_LIST] "<" keysym ">"
76 * MODIFIER_LIST ::= (["!"] {MODIFIER} ) | "None"
77 * MODIFIER ::= ["~"] MODIFIER_NAME
78 * MODIFIER_NAME ::= ("Ctrl"|"Lock"|"Caps"|"Shift"|"Alt"|"Meta")
79 * RHS ::= ( STRING | keysym | STRING keysym )
80 * STRING ::= '"' { CHAR } '"'
81 * CHAR ::= GRAPHIC_CHAR | ESCAPED_CHAR
82 * GRAPHIC_CHAR ::= locale (codeset) dependent code
83 * ESCAPED_CHAR ::= ('\\' | '\"' | OCTAL | HEX )
84 * OCTAL ::= '\' OCTAL_CHAR [OCTAL_CHAR [OCTAL_CHAR]]
85 * OCTAL_CHAR ::= (0|1|2|3|4|5|6|7)
86 * HEX ::= '\' (x|X) HEX_CHAR [HEX_CHAR]]
87 * HEX_CHAR ::= (0|1|2|3|4|5|6|7|8|9|A|B|C|D|E|F|a|b|c|d|e|f)
89 * INCLUDE_STRING is a filesystem path, with the following %-expansions:
91 * %H - The user's home directory (the $HOME environment variable).
92 * %L - The name of the locale specific Compose file (e.g.,
93 * "/usr/share/X11/locale/<localename>/Compose").
94 * %S - The name of the system directory for Compose files (e.g.,
95 * "/usr/share/X11/locale").
112 /* Values returned with some tokens, like yylval. */
115 /* Still \0-terminated. */
121 static enum rules_token
122 lex(struct scanner *s, union lvalue *val)
124 skip_more_whitespace_and_comments:
126 while (is_space(scanner_peek(s)))
127 if (scanner_next(s) == '\n')
128 return TOK_END_OF_LINE;
131 if (scanner_chr(s, '#')) {
132 scanner_skip_to_eol(s);
133 goto skip_more_whitespace_and_comments;
136 /* See if we're done. */
137 if (scanner_eof(s)) return TOK_END_OF_FILE;
140 s->token_line = s->line;
141 s->token_column = s->column;
145 if (scanner_chr(s, '<')) {
146 while (scanner_peek(s) != '>' && !scanner_eol(s) && !scanner_eof(s))
147 scanner_buf_append(s, scanner_next(s));
148 if (!scanner_chr(s, '>')) {
149 scanner_err(s, "unterminated keysym literal");
152 if (!scanner_buf_append(s, '\0')) {
153 scanner_err(s, "keysym literal is too long");
156 val->string.str = s->buf;
157 val->string.len = s->buf_pos;
158 return TOK_LHS_KEYSYM;
162 if (scanner_chr(s, ':'))
164 if (scanner_chr(s, '!'))
166 if (scanner_chr(s, '~'))
169 /* String literal. */
170 if (scanner_chr(s, '\"')) {
171 while (!scanner_eof(s) && !scanner_eol(s) && scanner_peek(s) != '\"') {
172 if (scanner_chr(s, '\\')) {
174 if (scanner_chr(s, '\\')) {
175 scanner_buf_append(s, '\\');
177 else if (scanner_chr(s, '"')) {
178 scanner_buf_append(s, '"');
180 else if (scanner_chr(s, 'x') || scanner_chr(s, 'X')) {
181 if (scanner_hex(s, &o) && is_valid_char((char) o)) {
182 scanner_buf_append(s, (char) o);
184 // [TODO] actually show the sequence
185 scanner_warn_with_code(s,
186 XKB_WARNING_INVALID_ESCAPE_SEQUENCE,
187 "illegal hexadecimal escape sequence in string literal");
190 else if (scanner_oct(s, &o)) {
191 if (is_valid_char((char) o)) {
192 scanner_buf_append(s, (char) o);
194 // [TODO] actually show the sequence
195 scanner_warn_with_code(s,
196 XKB_WARNING_INVALID_ESCAPE_SEQUENCE,
197 "illegal octal escape sequence in string literal");
201 scanner_warn(s, "unknown escape sequence (%c) in string literal", scanner_peek(s));
205 scanner_buf_append(s, scanner_next(s));
208 if (!scanner_chr(s, '\"')) {
209 scanner_err(s, "unterminated string literal");
212 if (!scanner_buf_append(s, '\0')) {
213 scanner_err(s, "string literal is too long");
216 if (!is_valid_utf8(s->buf, s->buf_pos - 1)) {
217 scanner_err(s, "string literal is not a valid UTF-8 string");
220 val->string.str = s->buf;
221 val->string.len = s->buf_pos;
225 /* Identifier or include. */
226 if (is_alpha(scanner_peek(s)) || scanner_peek(s) == '_') {
228 while (is_alnum(scanner_peek(s)) || scanner_peek(s) == '_')
229 scanner_buf_append(s, scanner_next(s));
230 if (!scanner_buf_append(s, '\0')) {
231 scanner_err(s, "identifier is too long");
235 if (streq(s->buf, "include"))
238 val->string.str = s->buf;
239 val->string.len = s->buf_pos;
243 /* Discard rest of line. */
244 scanner_skip_to_eol(s);
246 scanner_err(s, "unrecognized token");
250 static enum rules_token
251 lex_include_string(struct scanner *s, struct xkb_compose_table *table,
252 union lvalue *val_out)
254 while (is_space(scanner_peek(s)))
255 if (scanner_next(s) == '\n')
256 return TOK_END_OF_LINE;
258 s->token_line = s->line;
259 s->token_column = s->column;
262 if (!scanner_chr(s, '\"')) {
263 scanner_err(s, "include statement must be followed by a path");
267 while (!scanner_eof(s) && !scanner_eol(s) && scanner_peek(s) != '\"') {
268 if (scanner_chr(s, '%')) {
269 if (scanner_chr(s, '%')) {
270 scanner_buf_append(s, '%');
272 else if (scanner_chr(s, 'H')) {
273 const char *home = xkb_context_getenv(table->ctx, "HOME");
275 scanner_err(s, "%%H was used in an include statement, but the HOME environment variable is not set");
278 if (!scanner_buf_appends(s, home)) {
279 scanner_err(s, "include path after expanding %%H is too long");
283 else if (scanner_chr(s, 'L')) {
284 char *path = get_locale_compose_file_path(table->ctx, table->locale);
286 scanner_err(s, "failed to expand %%L to the locale Compose file");
289 if (!scanner_buf_appends(s, path)) {
291 scanner_err(s, "include path after expanding %%L is too long");
296 else if (scanner_chr(s, 'S')) {
297 const char *xlocaledir = get_xlocaledir_path(table->ctx);
298 if (!scanner_buf_appends(s, xlocaledir)) {
299 scanner_err(s, "include path after expanding %%S is too long");
304 scanner_err(s, "unknown %% format (%c) in include statement", scanner_peek(s));
308 scanner_buf_append(s, scanner_next(s));
311 if (!scanner_chr(s, '\"')) {
312 scanner_err(s, "unterminated include statement");
315 if (!scanner_buf_append(s, '\0')) {
316 scanner_err(s, "include path is too long");
319 val_out->string.str = s->buf;
320 val_out->string.len = s->buf_pos;
321 return TOK_INCLUDE_STRING;
325 xkb_keysym_t lhs[MAX_LHS_LEN];
329 /* At least one of these is true. */
333 /* The matching is as follows: (active_mods & modmask) == mods. */
334 xkb_mod_mask_t modmask;
339 add_production(struct xkb_compose_table *table, struct scanner *s,
340 const struct production *production)
342 unsigned lhs_pos = 0;
343 uint32_t curr = darray_size(table->nodes) == 1 ? 0 : 1;
344 uint32_t *pptr = NULL;
345 struct compose_node *node = NULL;
347 /* Warn before potentially going over the limit, discard silently after. */
348 if (darray_size(table->nodes) + production->len + MAX_LHS_LEN > MAX_COMPOSE_NODES)
349 scanner_warn(s, "too many sequences for one Compose file; will ignore further lines");
350 if (darray_size(table->nodes) + production->len >= MAX_COMPOSE_NODES)
354 * Insert the sequence to the ternary search tree, creating new nodes as
357 * TODO: We insert in the order given, this means some inputs can create
358 * long O(n) chains, which results in total O(n^2) parsing time. We should
359 * ensure the tree is reasonably balanced somehow.
362 const xkb_keysym_t keysym = production->lhs[lhs_pos];
363 const bool last = lhs_pos + 1 == production->len;
367 * Create a new node and update the parent pointer to it.
368 * Update the pointer first because the append invalidates it.
370 struct compose_node new = {
379 curr = darray_size(table->nodes);
384 darray_append(table->nodes, new);
387 node = &darray_item(table->nodes, curr);
389 if (keysym < node->keysym) {
392 } else if (keysym > node->keysym) {
397 scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding");
398 node->internal.eqkid = 0;
399 node->internal.is_leaf = false;
402 pptr = &node->internal.eqkid;
403 curr = node->internal.eqkid;
407 (node->leaf.utf8 == 0 && !production->has_string) ||
409 node->leaf.utf8 != 0 && production->has_string &&
410 streq(&darray_item(table->utf8, node->leaf.utf8),
414 (node->leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) ||
416 node->leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym &&
417 node->leaf.keysym == production->keysym
419 if (same_string && same_keysym) {
420 scanner_warn(s, "this compose sequence is a duplicate of another; skipping line");
423 scanner_warn(s, "this compose sequence already exists; overriding");
425 } else if (node->internal.eqkid != 0) {
426 scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
429 node->is_leaf = true;
430 if (production->has_string) {
431 node->leaf.utf8 = darray_size(table->utf8);
432 darray_append_items(table->utf8, production->string,
433 strlen(production->string) + 1);
435 if (production->has_keysym) {
436 node->leaf.keysym = production->keysym;
443 /* Should match resolve_modifier(). */
444 #define ALL_MODS_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3))
446 static xkb_mod_index_t
447 resolve_modifier(const char *name)
449 static const struct {
461 for (unsigned i = 0; i < ARRAY_SIZE(mods); i++)
462 if (streq(name, mods[i].name))
465 return XKB_MOD_INVALID;
469 parse(struct xkb_compose_table *table, struct scanner *s,
470 unsigned include_depth);
473 do_include(struct xkb_compose_table *table, struct scanner *s,
474 const char *path, unsigned include_depth)
480 struct scanner new_s;
482 if (include_depth >= MAX_INCLUDE_DEPTH) {
483 scanner_err(s, "maximum include depth (%d) exceeded; maybe there is an include loop?",
488 file = fopen(path, "rb");
490 scanner_err(s, "failed to open included Compose file \"%s\": %s",
491 path, strerror(errno));
495 ok = map_file(file, &string, &size);
497 scanner_err(s, "failed to read included Compose file \"%s\": %s",
498 path, strerror(errno));
502 scanner_init(&new_s, table->ctx, string, size, path, s->priv);
504 ok = parse(table, &new_s, include_depth + 1);
509 unmap_file(string, size);
516 parse(struct xkb_compose_table *table, struct scanner *s,
517 unsigned include_depth)
519 enum rules_token tok;
522 struct production production;
523 enum { MAX_ERRORS = 10 };
528 production.has_keysym = false;
529 production.has_string = false;
531 production.modmask = 0;
536 switch (tok = lex(s, &val)) {
537 case TOK_END_OF_LINE:
539 case TOK_END_OF_FILE:
548 switch (tok = lex_include_string(s, table, &val)) {
549 case TOK_INCLUDE_STRING:
556 switch (tok = lex(s, &val)) {
557 case TOK_END_OF_LINE:
558 if (!do_include(table, s, val.string.str, include_depth))
570 if (production.len <= 0) {
571 scanner_warn(s, "expected at least one keysym on left-hand side; skipping line");
576 if (streq(val.string.str, "None")) {
578 production.modmask = ALL_MODS_MASK;
581 goto lhs_mod_list_tok;
583 goto lhs_mod_list_tok;
585 production.modmask = ALL_MODS_MASK;
596 keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS);
597 if (keysym == XKB_KEY_NoSymbol) {
598 scanner_err(s, "unrecognized keysym \"%s\" on left-hand side",
602 if (production.len + 1 > MAX_LHS_LEN) {
603 scanner_warn(s, "too many keysyms (%d) on left-hand side; skipping line",
607 production.lhs[production.len++] = keysym;
609 production.modmask = 0;
621 if (tok != TOK_TILDE && tok != TOK_IDENT)
624 if (tok == TOK_TILDE) {
629 if (tok != TOK_IDENT)
632 mod = resolve_modifier(val.string.str);
633 if (mod == XKB_MOD_INVALID) {
634 scanner_err(s, "unrecognized modifier \"%s\"",
639 production.modmask |= 1 << mod;
641 production.mods &= ~(1 << mod);
643 production.mods |= 1 << mod;
649 switch (tok = lex(s, &val)) {
651 if (production.has_string) {
652 scanner_warn(s, "right-hand side can have at most one string; skipping line");
655 if (val.string.len <= 0) {
656 scanner_warn(s, "right-hand side string must not be empty; skipping line");
659 if (val.string.len >= sizeof(production.string)) {
660 scanner_warn(s, "right-hand side string is too long; skipping line");
663 strcpy(production.string, val.string.str);
664 production.has_string = true;
667 keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS);
668 if (keysym == XKB_KEY_NoSymbol) {
669 scanner_err(s, "unrecognized keysym \"%s\" on right-hand side",
673 if (production.has_keysym) {
674 scanner_warn(s, "right-hand side can have at most one keysym; skipping line");
677 production.keysym = keysym;
678 production.has_keysym = true;
680 case TOK_END_OF_LINE:
681 if (!production.has_string && !production.has_keysym) {
682 scanner_warn(s, "right-hand side must have at least one of string or keysym; skipping line");
685 add_production(table, s, &production);
692 if (tok != TOK_ERROR)
693 scanner_err(s, "unexpected token");
696 if (num_errors <= MAX_ERRORS)
699 scanner_err(s, "too many errors");
703 scanner_err(s, "failed to parse file");
707 while (tok != TOK_END_OF_LINE && tok != TOK_END_OF_FILE)
716 parse_string(struct xkb_compose_table *table, const char *string, size_t len,
717 const char *file_name)
720 scanner_init(&s, table->ctx, string, len, file_name, NULL);
721 if (!parse(table, &s, 0))
723 /* Maybe the allocator can use the excess space. */
724 darray_shrink(table->nodes);
725 darray_shrink(table->utf8);
730 parse_file(struct xkb_compose_table *table, FILE *file, const char *file_name)
736 ok = map_file(file, &string, &size);
739 XKB_LOG_MESSAGE_NO_ID,
740 "Couldn't read Compose file %s: %s\n",
741 file_name, strerror(errno));
745 ok = parse_string(table, string, size, file_name);
746 unmap_file(string, size);