2 * Copyright © 2013 Ran Benita <ran234@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 /******************************************************************
26 Copyright 1992 by Oki Technosystems Laboratory, Inc.
27 Copyright 1992 by Fuji Xerox Co., Ltd.
29 Permission to use, copy, modify, distribute, and sell this software
30 and its documentation for any purpose is hereby granted without fee,
31 provided that the above copyright notice appear in all copies and
32 that both that copyright notice and this permission notice appear
33 in supporting documentation, and that the name of Oki Technosystems
34 Laboratory and Fuji Xerox not be used in advertising or publicity
35 pertaining to distribution of the software without specific, written
37 Oki Technosystems Laboratory and Fuji Xerox make no representations
38 about the suitability of this software for any purpose. It is provided
39 "as is" without express or implied warranty.
41 OKI TECHNOSYSTEMS LABORATORY AND FUJI XEROX DISCLAIM ALL WARRANTIES
42 WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
43 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL OKI TECHNOSYSTEMS
44 LABORATORY AND FUJI XEROX BE LIABLE FOR ANY SPECIAL, INDIRECT OR
45 CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
46 OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
47 OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
48 OR PERFORMANCE OF THIS SOFTWARE.
50 Author: Yasuhiro Kawai Oki Technosystems Laboratory
51 Author: Kazunori Nishihara Fuji Xerox
53 ******************************************************************/
60 #include "scanner-utils.h"
66 #define MAX_LHS_LEN 10
67 #define MAX_INCLUDE_DEPTH 5
70 * Grammar adapted from libX11/modules/im/ximcp/imLcPrs.c.
71 * See also the XCompose(5) manpage.
73 * FILE ::= { [PRODUCTION] [COMMENT] "\n" | INCLUDE }
74 * INCLUDE ::= "include" '"' INCLUDE_STRING '"'
75 * PRODUCTION ::= LHS ":" RHS [ COMMENT ]
76 * COMMENT ::= "#" {<any character except null or newline>}
77 * LHS ::= EVENT { EVENT }
78 * EVENT ::= [MODIFIER_LIST] "<" keysym ">"
79 * MODIFIER_LIST ::= (["!"] {MODIFIER} ) | "None"
80 * MODIFIER ::= ["~"] MODIFIER_NAME
81 * MODIFIER_NAME ::= ("Ctrl"|"Lock"|"Caps"|"Shift"|"Alt"|"Meta")
82 * RHS ::= ( STRING | keysym | STRING keysym )
83 * STRING ::= '"' { CHAR } '"'
84 * CHAR ::= GRAPHIC_CHAR | ESCAPED_CHAR
85 * GRAPHIC_CHAR ::= locale (codeset) dependent code
86 * ESCAPED_CHAR ::= ('\\' | '\"' | OCTAL | HEX )
87 * OCTAL ::= '\' OCTAL_CHAR [OCTAL_CHAR [OCTAL_CHAR]]
88 * OCTAL_CHAR ::= (0|1|2|3|4|5|6|7)
89 * HEX ::= '\' (x|X) HEX_CHAR [HEX_CHAR]]
90 * HEX_CHAR ::= (0|1|2|3|4|5|6|7|8|9|A|B|C|D|E|F|a|b|c|d|e|f)
92 * INCLUDE_STRING is a filesystem path, with the following %-expansions:
94 * %H - The user's home directory (the $HOME environment variable).
95 * %L - The name of the locale specific Compose file (e.g.,
96 * "/usr/share/X11/locale/<localename>/Compose").
97 * %S - The name of the system directory for Compose files (e.g.,
98 * "/usr/share/X11/locale").
115 /* Values returned with some tokens, like yylval. */
118 /* Still \0-terminated. */
124 static enum rules_token
125 lex(struct scanner *s, union lvalue *val)
127 skip_more_whitespace_and_comments:
129 while (is_space(peek(s)))
131 return TOK_END_OF_LINE;
136 goto skip_more_whitespace_and_comments;
139 /* See if we're done. */
140 if (eof(s)) return TOK_END_OF_FILE;
143 s->token_line = s->line;
144 s->token_column = s->column;
149 while (peek(s) != '>' && !eol(s) && !eof(s))
150 buf_append(s, next(s));
152 scanner_err(s, "unterminated keysym literal");
155 if (!buf_append(s, '\0')) {
156 scanner_err(s, "keysym literal is too long");
159 val->string.str = s->buf;
160 val->string.len = s->buf_pos;
161 return TOK_LHS_KEYSYM;
172 /* String literal. */
174 while (!eof(s) && !eol(s) && peek(s) != '\"') {
180 else if (chr(s, '"')) {
183 else if (chr(s, 'x') || chr(s, 'X')) {
185 buf_append(s, (char) o);
187 scanner_warn(s, "illegal hexadecimal escape sequence in string literal");
189 else if (oct(s, &o)) {
190 buf_append(s, (char) o);
193 scanner_warn(s, "unknown escape sequence (%c) in string literal", peek(s));
197 buf_append(s, next(s));
201 scanner_err(s, "unterminated string literal");
204 if (!buf_append(s, '\0')) {
205 scanner_err(s, "string literal is too long");
208 if (!is_valid_utf8(s->buf, s->buf_pos - 1)) {
209 scanner_err(s, "string literal is not a valid UTF-8 string");
212 val->string.str = s->buf;
213 val->string.len = s->buf_pos;
217 /* Identifier or include. */
218 if (is_alpha(peek(s)) || peek(s) == '_') {
220 while (is_alnum(peek(s)) || peek(s) == '_')
221 buf_append(s, next(s));
222 if (!buf_append(s, '\0')) {
223 scanner_err(s, "identifier is too long");
227 if (streq(s->buf, "include"))
230 val->string.str = s->buf;
231 val->string.len = s->buf_pos;
235 /* Discard rest of line. */
238 scanner_err(s, "unrecognized token");
242 static enum rules_token
243 lex_include_string(struct scanner *s, struct xkb_compose_table *table,
244 union lvalue *val_out)
246 while (is_space(peek(s)))
248 return TOK_END_OF_LINE;
250 s->token_line = s->line;
251 s->token_column = s->column;
255 scanner_err(s, "include statement must be followed by a path");
259 while (!eof(s) && !eol(s) && peek(s) != '\"') {
264 else if (chr(s, 'H')) {
265 const char *home = secure_getenv("HOME");
267 scanner_err(s, "%%H was used in an include statement, but the HOME environment variable is not set");
270 if (!buf_appends(s, home)) {
271 scanner_err(s, "include path after expanding %%H is too long");
275 else if (chr(s, 'L')) {
276 char *path = get_locale_compose_file_path(table->locale);
278 scanner_err(s, "failed to expand %%L to the locale Compose file");
281 if (!buf_appends(s, path)) {
283 scanner_err(s, "include path after expanding %%L is too long");
288 else if (chr(s, 'S')) {
289 const char *xlocaledir = get_xlocaledir_path();
290 if (!buf_appends(s, xlocaledir)) {
291 scanner_err(s, "include path after expanding %%S is too long");
296 scanner_err(s, "unknown %% format (%c) in include statement", peek(s));
300 buf_append(s, next(s));
304 scanner_err(s, "unterminated include statement");
307 if (!buf_append(s, '\0')) {
308 scanner_err(s, "include path is too long");
311 val_out->string.str = s->buf;
312 val_out->string.len = s->buf_pos;
313 return TOK_INCLUDE_STRING;
317 xkb_keysym_t lhs[MAX_LHS_LEN];
321 /* At least one of these is true. */
325 /* The matching is as follows: (active_mods & modmask) == mods. */
326 xkb_mod_mask_t modmask;
331 add_production(struct xkb_compose_table *table, struct scanner *s,
332 const struct production *production)
334 unsigned lhs_pos = 0;
335 uint16_t curr = darray_size(table->nodes) == 1 ? 0 : 1;
336 uint16_t *pptr = NULL;
337 struct compose_node *node = NULL;
339 /* Warn before potentially going over the limit, discard silently after. */
340 if (darray_size(table->nodes) + production->len + MAX_LHS_LEN > MAX_COMPOSE_NODES)
341 scanner_warn(s, "too many sequences for one Compose file; will ignore further lines");
342 if (darray_size(table->nodes) + production->len >= MAX_COMPOSE_NODES)
346 * Insert the sequence to the ternary search tree, creating new nodes as
349 * TODO: We insert in the order given, this means some inputs can create
350 * long O(n) chains, which results in total O(n^2) parsing time. We should
351 * ensure the tree is reasonably balanced somehow.
354 const xkb_keysym_t keysym = production->lhs[lhs_pos];
355 const bool last = lhs_pos + 1 == production->len;
359 * Create a new node and update the parent pointer to it.
360 * Update the pointer first because the append invalidates it.
362 struct compose_node new = {
371 curr = darray_size(table->nodes);
376 darray_append(table->nodes, new);
379 node = &darray_item(table->nodes, curr);
381 if (keysym < node->keysym) {
384 } else if (keysym > node->keysym) {
389 scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding");
390 node->internal.eqkid = node->lokid = node->hikid = 0;
391 node->internal.is_leaf = false;
394 pptr = &node->internal.eqkid;
395 curr = node->internal.eqkid;
399 (node->leaf.utf8 == 0 && !production->has_string) ||
401 node->leaf.utf8 != 0 && production->has_string &&
402 streq(&darray_item(table->utf8, node->leaf.utf8),
406 (node->leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) ||
408 node->leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym &&
409 node->leaf.keysym == production->keysym
411 if (same_string && same_keysym) {
412 scanner_warn(s, "this compose sequence is a duplicate of another; skipping line");
415 scanner_warn(s, "this compose sequence already exists; overriding");
417 } else if (node->internal.eqkid != 0) {
418 scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
421 node->is_leaf = true;
422 if (production->has_string) {
423 node->leaf.utf8 = darray_size(table->utf8);
424 darray_append_items(table->utf8, production->string,
425 strlen(production->string) + 1);
427 if (production->has_keysym) {
428 node->leaf.keysym = production->keysym;
435 /* Should match resolve_modifier(). */
436 #define ALL_MODS_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3))
438 static xkb_mod_index_t
439 resolve_modifier(const char *name)
441 static const struct {
453 for (unsigned i = 0; i < ARRAY_SIZE(mods); i++)
454 if (streq(name, mods[i].name))
457 return XKB_MOD_INVALID;
461 parse(struct xkb_compose_table *table, struct scanner *s,
462 unsigned include_depth);
465 do_include(struct xkb_compose_table *table, struct scanner *s,
466 const char *path, unsigned include_depth)
472 struct scanner new_s;
474 if (include_depth >= MAX_INCLUDE_DEPTH) {
475 scanner_err(s, "maximum include depth (%d) exceeded; maybe there is an include loop?",
480 file = fopen(path, "rb");
482 scanner_err(s, "failed to open included Compose file \"%s\": %s",
483 path, strerror(errno));
487 ok = map_file(file, &string, &size);
489 scanner_err(s, "failed to read included Compose file \"%s\": %s",
490 path, strerror(errno));
494 scanner_init(&new_s, table->ctx, string, size, path, s->priv);
496 ok = parse(table, &new_s, include_depth + 1);
501 unmap_file(string, size);
508 parse(struct xkb_compose_table *table, struct scanner *s,
509 unsigned include_depth)
511 enum rules_token tok;
514 struct production production;
515 enum { MAX_ERRORS = 10 };
520 production.has_keysym = false;
521 production.has_string = false;
523 production.modmask = 0;
528 switch (tok = lex(s, &val)) {
529 case TOK_END_OF_LINE:
531 case TOK_END_OF_FILE:
540 switch (tok = lex_include_string(s, table, &val)) {
541 case TOK_INCLUDE_STRING:
548 switch (tok = lex(s, &val)) {
549 case TOK_END_OF_LINE:
550 if (!do_include(table, s, val.string.str, include_depth))
562 if (production.len <= 0) {
563 scanner_warn(s, "expected at least one keysym on left-hand side; skipping line");
568 if (streq(val.string.str, "None")) {
570 production.modmask = ALL_MODS_MASK;
573 goto lhs_mod_list_tok;
575 goto lhs_mod_list_tok;
577 production.modmask = ALL_MODS_MASK;
588 keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS);
589 if (keysym == XKB_KEY_NoSymbol) {
590 scanner_err(s, "unrecognized keysym \"%s\" on left-hand side",
594 if (production.len + 1 > MAX_LHS_LEN) {
595 scanner_warn(s, "too many keysyms (%d) on left-hand side; skipping line",
599 production.lhs[production.len++] = keysym;
601 production.modmask = 0;
613 if (tok != TOK_TILDE && tok != TOK_IDENT)
616 if (tok == TOK_TILDE) {
621 if (tok != TOK_IDENT)
624 mod = resolve_modifier(val.string.str);
625 if (mod == XKB_MOD_INVALID) {
626 scanner_err(s, "unrecognized modifier \"%s\"",
631 production.modmask |= 1 << mod;
633 production.mods &= ~(1 << mod);
635 production.mods |= 1 << mod;
641 switch (tok = lex(s, &val)) {
643 if (production.has_string) {
644 scanner_warn(s, "right-hand side can have at most one string; skipping line");
647 if (val.string.len <= 0) {
648 scanner_warn(s, "right-hand side string must not be empty; skipping line");
651 if (val.string.len >= sizeof(production.string)) {
652 scanner_warn(s, "right-hand side string is too long; skipping line");
655 strcpy(production.string, val.string.str);
656 production.has_string = true;
659 keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS);
660 if (keysym == XKB_KEY_NoSymbol) {
661 scanner_err(s, "unrecognized keysym \"%s\" on right-hand side",
665 if (production.has_keysym) {
666 scanner_warn(s, "right-hand side can have at most one keysym; skipping line");
669 production.keysym = keysym;
670 production.has_keysym = true;
672 case TOK_END_OF_LINE:
673 if (!production.has_string && !production.has_keysym) {
674 scanner_warn(s, "right-hand side must have at least one of string or keysym; skipping line");
677 add_production(table, s, &production);
684 if (tok != TOK_ERROR)
685 scanner_err(s, "unexpected token");
688 if (num_errors <= MAX_ERRORS)
691 scanner_err(s, "too many errors");
695 scanner_err(s, "failed to parse file");
699 while (tok != TOK_END_OF_LINE && tok != TOK_END_OF_FILE)
708 parse_string(struct xkb_compose_table *table, const char *string, size_t len,
709 const char *file_name)
712 scanner_init(&s, table->ctx, string, len, file_name, NULL);
713 if (!parse(table, &s, 0))
715 /* Maybe the allocator can use the excess space. */
716 darray_shrink(table->nodes);
717 darray_shrink(table->utf8);
722 parse_file(struct xkb_compose_table *table, FILE *file, const char *file_name)
728 ok = map_file(file, &string, &size);
730 log_err(table->ctx, "Couldn't read Compose file %s: %s\n",
731 file_name, strerror(errno));
735 ok = parse_string(table, string, size, file_name);
736 unmap_file(string, size);