******************************************************************/
+#include "config.h"
+
#include <errno.h>
#include "utils.h"
#include "utf8.h"
#include "parser.h"
-#define MAX_LHS_LEN 10
-#define MAX_INCLUDE_DEPTH 5
-
/*
* Grammar adapted from libX11/modules/im/ximcp/imLcPrs.c.
* See also the XCompose(5) manpage.
{
skip_more_whitespace_and_comments:
/* Skip spaces. */
- while (is_space(peek(s)))
- if (next(s) == '\n')
+ while (is_space(scanner_peek(s)))
+ if (scanner_next(s) == '\n')
return TOK_END_OF_LINE;
/* Skip comments. */
- if (chr(s, '#')) {
- skip_to_eol(s);
+ if (scanner_chr(s, '#')) {
+ scanner_skip_to_eol(s);
goto skip_more_whitespace_and_comments;
}
/* See if we're done. */
- if (eof(s)) return TOK_END_OF_FILE;
+ if (scanner_eof(s)) return TOK_END_OF_FILE;
/* New token. */
s->token_line = s->line;
s->buf_pos = 0;
/* LHS Keysym. */
- if (chr(s, '<')) {
- while (peek(s) != '>' && !eol(s))
- buf_append(s, next(s));
- if (!chr(s, '>')) {
+ if (scanner_chr(s, '<')) {
+ while (scanner_peek(s) != '>' && !scanner_eol(s) && !scanner_eof(s))
+ scanner_buf_append(s, scanner_next(s));
+ if (!scanner_chr(s, '>')) {
scanner_err(s, "unterminated keysym literal");
return TOK_ERROR;
}
- if (!buf_append(s, '\0')) {
+ if (!scanner_buf_append(s, '\0')) {
scanner_err(s, "keysym literal is too long");
return TOK_ERROR;
}
}
/* Colon. */
- if (chr(s, ':'))
+ if (scanner_chr(s, ':'))
return TOK_COLON;
- if (chr(s, '!'))
+ if (scanner_chr(s, '!'))
return TOK_BANG;
- if (chr(s, '~'))
+ if (scanner_chr(s, '~'))
return TOK_TILDE;
/* String literal. */
- if (chr(s, '\"')) {
- while (!eof(s) && !eol(s) && peek(s) != '\"') {
- if (chr(s, '\\')) {
+ if (scanner_chr(s, '\"')) {
+ while (!scanner_eof(s) && !scanner_eol(s) && scanner_peek(s) != '\"') {
+ if (scanner_chr(s, '\\')) {
uint8_t o;
- if (chr(s, '\\')) {
- buf_append(s, '\\');
+ if (scanner_chr(s, '\\')) {
+ scanner_buf_append(s, '\\');
}
- else if (chr(s, '"')) {
- buf_append(s, '"');
+ else if (scanner_chr(s, '"')) {
+ scanner_buf_append(s, '"');
}
- else if (chr(s, 'x') || chr(s, 'X')) {
- if (hex(s, &o))
- buf_append(s, (char) o);
+ else if (scanner_chr(s, 'x') || scanner_chr(s, 'X')) {
+ if (scanner_hex(s, &o))
+ scanner_buf_append(s, (char) o);
else
scanner_warn(s, "illegal hexadecimal escape sequence in string literal");
}
- else if (oct(s, &o)) {
- buf_append(s, (char) o);
+ else if (scanner_oct(s, &o)) {
+ scanner_buf_append(s, (char) o);
}
else {
- scanner_warn(s, "unknown escape sequence (%c) in string literal", peek(s));
+ scanner_warn(s, "unknown escape sequence (%c) in string literal", scanner_peek(s));
/* Ignore. */
}
} else {
- buf_append(s, next(s));
+ scanner_buf_append(s, scanner_next(s));
}
}
- if (!chr(s, '\"')) {
+ if (!scanner_chr(s, '\"')) {
scanner_err(s, "unterminated string literal");
return TOK_ERROR;
}
- if (!buf_append(s, '\0')) {
+ if (!scanner_buf_append(s, '\0')) {
scanner_err(s, "string literal is too long");
return TOK_ERROR;
}
}
/* Identifier or include. */
- if (is_alpha(peek(s)) || peek(s) == '_') {
+ if (is_alpha(scanner_peek(s)) || scanner_peek(s) == '_') {
s->buf_pos = 0;
- while (is_alnum(peek(s)) || peek(s) == '_')
- buf_append(s, next(s));
- if (!buf_append(s, '\0')) {
+ while (is_alnum(scanner_peek(s)) || scanner_peek(s) == '_')
+ scanner_buf_append(s, scanner_next(s));
+ if (!scanner_buf_append(s, '\0')) {
scanner_err(s, "identifier is too long");
return TOK_ERROR;
}
}
/* Discard rest of line. */
- skip_to_eol(s);
+ scanner_skip_to_eol(s);
scanner_err(s, "unrecognized token");
return TOK_ERROR;
lex_include_string(struct scanner *s, struct xkb_compose_table *table,
union lvalue *val_out)
{
- while (is_space(peek(s)))
- if (next(s) == '\n')
+ while (is_space(scanner_peek(s)))
+ if (scanner_next(s) == '\n')
return TOK_END_OF_LINE;
s->token_line = s->line;
s->token_column = s->column;
s->buf_pos = 0;
- if (!chr(s, '\"')) {
+ if (!scanner_chr(s, '\"')) {
scanner_err(s, "include statement must be followed by a path");
return TOK_ERROR;
}
- while (!eof(s) && !eol(s) && peek(s) != '\"') {
- if (chr(s, '%')) {
- if (chr(s, '%')) {
- buf_append(s, '%');
+ while (!scanner_eof(s) && !scanner_eol(s) && scanner_peek(s) != '\"') {
+ if (scanner_chr(s, '%')) {
+ if (scanner_chr(s, '%')) {
+ scanner_buf_append(s, '%');
}
- else if (chr(s, 'H')) {
- const char *home = secure_getenv("HOME");
+ else if (scanner_chr(s, 'H')) {
+ const char *home = xkb_context_getenv(table->ctx, "HOME");
if (!home) {
scanner_err(s, "%%H was used in an include statement, but the HOME environment variable is not set");
return TOK_ERROR;
}
- if (!buf_appends(s, home)) {
+ if (!scanner_buf_appends(s, home)) {
scanner_err(s, "include path after expanding %%H is too long");
return TOK_ERROR;
}
}
- else if (chr(s, 'L')) {
- char *path = get_locale_compose_file_path(table->locale);
+ else if (scanner_chr(s, 'L')) {
+ char *path = get_locale_compose_file_path(table->ctx, table->locale);
if (!path) {
scanner_err(s, "failed to expand %%L to the locale Compose file");
return TOK_ERROR;
}
- if (!buf_appends(s, path)) {
+ if (!scanner_buf_appends(s, path)) {
free(path);
scanner_err(s, "include path after expanding %%L is too long");
return TOK_ERROR;
}
free(path);
}
- else if (chr(s, 'S')) {
- const char *xlocaledir = get_xlocaledir_path();
- if (!buf_appends(s, xlocaledir)) {
+ else if (scanner_chr(s, 'S')) {
+ const char *xlocaledir = get_xlocaledir_path(table->ctx);
+ if (!scanner_buf_appends(s, xlocaledir)) {
scanner_err(s, "include path after expanding %%S is too long");
return TOK_ERROR;
}
}
else {
- scanner_err(s, "unknown %% format (%c) in include statement", peek(s));
+ scanner_err(s, "unknown %% format (%c) in include statement", scanner_peek(s));
return TOK_ERROR;
}
} else {
- buf_append(s, next(s));
+ scanner_buf_append(s, scanner_next(s));
}
}
- if (!chr(s, '\"')) {
+ if (!scanner_chr(s, '\"')) {
scanner_err(s, "unterminated include statement");
return TOK_ERROR;
}
- if (!buf_append(s, '\0')) {
+ if (!scanner_buf_append(s, '\0')) {
scanner_err(s, "include path is too long");
return TOK_ERROR;
}
unsigned int len;
xkb_keysym_t keysym;
char string[256];
+ /* At least one of these is true. */
bool has_keysym;
bool has_string;
xkb_mod_mask_t mods;
};
-static uint32_t
-add_node(struct xkb_compose_table *table, xkb_keysym_t keysym)
-{
- struct compose_node new = {
- .keysym = keysym,
- .next = 0,
- .is_leaf = true,
- };
- darray_append(table->nodes, new);
- return darray_size(table->nodes) - 1;
-}
-
static void
add_production(struct xkb_compose_table *table, struct scanner *s,
const struct production *production)
{
- unsigned lhs_pos;
- uint32_t curr;
- struct compose_node *node;
-
- curr = 0;
- node = &darray_item(table->nodes, curr);
+ unsigned lhs_pos = 0;
+ uint32_t curr = darray_size(table->nodes) == 1 ? 0 : 1;
+ uint32_t *pptr = NULL;
+ struct compose_node *node = NULL;
+
+ /* Warn before potentially going over the limit, discard silently after. */
+ if (darray_size(table->nodes) + production->len + MAX_LHS_LEN > MAX_COMPOSE_NODES)
+ scanner_warn(s, "too many sequences for one Compose file; will ignore further lines");
+ if (darray_size(table->nodes) + production->len >= MAX_COMPOSE_NODES)
+ return;
/*
- * Insert the sequence to the trie, creating new nodes as needed.
+ * Insert the sequence to the ternary search tree, creating new nodes as
+ * needed.
*
- * TODO: This can be sped up a bit by first trying the path that the
- * previous production took, and only then doing the linear search
- * through the trie levels. This will work because sequences in the
- * Compose files are often clustered by a common prefix; especially
- * in the 1st and 2nd keysyms, which is where the largest variation
- * (thus, longest search) is.
+ * TODO: We insert in the order given, this means some inputs can create
+ * long O(n) chains, which results in total O(n^2) parsing time. We should
+ * ensure the tree is reasonably balanced somehow.
*/
- for (lhs_pos = 0; lhs_pos < production->len; lhs_pos++) {
- while (production->lhs[lhs_pos] != node->keysym) {
- if (node->next == 0) {
- uint32_t next = add_node(table, production->lhs[lhs_pos]);
- /* Refetch since add_node could have realloc()ed. */
- node = &darray_item(table->nodes, curr);
- node->next = next;
+ while (true) {
+ const xkb_keysym_t keysym = production->lhs[lhs_pos];
+ const bool last = lhs_pos + 1 == production->len;
+
+ if (curr == 0) {
+ /*
+ * Create a new node and update the parent pointer to it.
+ * Update the pointer first because the append invalidates it.
+ */
+ struct compose_node new = {
+ .keysym = keysym,
+ .lokid = 0,
+ .hikid = 0,
+ .internal = {
+ .eqkid = 0,
+ .is_leaf = false,
+ },
+ };
+ curr = darray_size(table->nodes);
+ if (pptr != NULL) {
+ *pptr = curr;
+ pptr = NULL;
}
-
- curr = node->next;
- node = &darray_item(table->nodes, curr);
+ darray_append(table->nodes, new);
}
- if (lhs_pos + 1 == production->len)
- break;
+ node = &darray_item(table->nodes, curr);
- if (node->is_leaf) {
- if (node->u.leaf.utf8 != 0 ||
- node->u.leaf.keysym != XKB_KEY_NoSymbol) {
+ if (keysym < node->keysym) {
+ pptr = &node->lokid;
+ curr = node->lokid;
+ } else if (keysym > node->keysym) {
+ pptr = &node->hikid;
+ curr = node->hikid;
+ } else if (!last) {
+ if (node->is_leaf) {
scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding");
- node->u.leaf.utf8 = 0;
- node->u.leaf.keysym = XKB_KEY_NoSymbol;
+ node->internal.eqkid = 0;
+ node->internal.is_leaf = false;
}
-
- {
- uint32_t successor = add_node(table, production->lhs[lhs_pos + 1]);
- /* Refetch since add_node could have realloc()ed. */
- node = &darray_item(table->nodes, curr);
- node->is_leaf = false;
- node->u.successor = successor;
+ lhs_pos++;
+ pptr = &node->internal.eqkid;
+ curr = node->internal.eqkid;
+ } else {
+ if (node->is_leaf) {
+ bool same_string =
+ (node->leaf.utf8 == 0 && !production->has_string) ||
+ (
+ node->leaf.utf8 != 0 && production->has_string &&
+ streq(&darray_item(table->utf8, node->leaf.utf8),
+ production->string)
+ );
+ bool same_keysym =
+ (node->leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) ||
+ (
+ node->leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym &&
+ node->leaf.keysym == production->keysym
+ );
+ if (same_string && same_keysym) {
+ scanner_warn(s, "this compose sequence is a duplicate of another; skipping line");
+ return;
+ } else {
+ scanner_warn(s, "this compose sequence already exists; overriding");
+ }
+ } else if (node->internal.eqkid != 0) {
+ scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
+ return;
+ }
+ node->is_leaf = true;
+ if (production->has_string) {
+ node->leaf.utf8 = darray_size(table->utf8);
+ darray_append_items(table->utf8, production->string,
+ strlen(production->string) + 1);
+ }
+ if (production->has_keysym) {
+ node->leaf.keysym = production->keysym;
}
- }
-
- curr = node->u.successor;
- node = &darray_item(table->nodes, curr);
- }
-
- if (!node->is_leaf) {
- scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
- return;
- }
-
- if (node->u.leaf.utf8 != 0 || node->u.leaf.keysym != XKB_KEY_NoSymbol) {
- if (streq(&darray_item(table->utf8, node->u.leaf.utf8),
- production->string) &&
- node->u.leaf.keysym == production->keysym) {
- scanner_warn(s, "this compose sequence is a duplicate of another; skipping line");
return;
}
- scanner_warn(s, "this compose sequence already exists; overriding");
- }
-
- if (production->has_string) {
- node->u.leaf.utf8 = darray_size(table->utf8);
- darray_append_items(table->utf8, production->string,
- strlen(production->string) + 1);
- }
- if (production->has_keysym) {
- node->u.leaf.keysym = production->keysym;
}
}
return false;
}
- file = fopen(path, "r");
+ file = fopen(path, "rb");
if (!file) {
scanner_err(s, "failed to open included Compose file \"%s\": %s",
path, strerror(errno));
}
production.keysym = keysym;
production.has_keysym = true;
+ /* fallthrough */
case TOK_END_OF_LINE:
if (!production.has_string && !production.has_keysym) {
scanner_warn(s, "right-hand side must have at least one of string or keysym; skipping line");
ok = map_file(file, &string, &size);
if (!ok) {
- log_err(table->ctx, "Couldn't read Compose file %s: %s\n",
+ log_err(table->ctx,
+ XKB_LOG_MESSAGE_NO_ID,
+ "Couldn't read Compose file %s: %s\n",
file_name, strerror(errno));
return false;
}