From 922e4e918182c4a6d012973a237b89b9248a017c Mon Sep 17 00:00:00 2001 From: Daniel Kolesa Date: Mon, 13 Jun 2016 14:53:35 +0100 Subject: [PATCH] eolian: fix error cursor positioning Previously, multi-char tokens (such as strings, docs etc) always put the error cursor to the end of the token. That was confusing, so now the cursor always appears at the beginning of the token instead (for multiline tokens, currently only docs, the line number is also adjusted to point to the first line of the doc token). @fix --- src/lib/eolian/eo_lexer.c | 97 ++++++++++++++++++++++++++++++----------------- src/lib/eolian/eo_lexer.h | 4 +- 2 files changed, 65 insertions(+), 36 deletions(-) diff --git a/src/lib/eolian/eo_lexer.c b/src/lib/eolian/eo_lexer.c index 878926f..b285405 100644 --- a/src/lib/eolian/eo_lexer.c +++ b/src/lib/eolian/eo_lexer.c @@ -154,8 +154,9 @@ static void next_line(Eo_Lexer *ls) next_char(ls); ls->stream_line = ls->stream; } - if (++ls->line_number >= INT_MAX) + if (++ls->iline_number >= INT_MAX) eo_lexer_syntax_error(ls, "chunk has too many lines"); + ls->line_number = ls->iline_number; ls->icolumn = ls->column = 0; } @@ -833,10 +834,15 @@ lex(Eo_Lexer *ls, Eo_Token *tok) case '[': { int dline = ls->line_number, dcol = ls->column; + const char *sline = ls->stream_line; next_char(ls); if (ls->current != '[') return '['; next_char(ls); read_doc(ls, tok, dline, dcol); + ls->column = dcol + 1; + /* doc is the only potentially multiline token */ + ls->line_number = dline; + ls->stream_line = sline; return TOK_DOC; } case '\0': @@ -845,11 +851,13 @@ lex(Eo_Lexer *ls, Eo_Token *tok) next_char(ls); if (!ls->expr_mode || (ls->current != '=')) return '='; next_char(ls); + --ls->column; return TOK_EQ; case '!': next_char(ls); if (!ls->expr_mode || (ls->current != '=')) return '!'; next_char(ls); + --ls->column; return TOK_NQ; case '>': next_char(ls); @@ -857,11 +865,13 @@ lex(Eo_Lexer *ls, Eo_Token *tok) if (ls->current == '=') { next_char(ls); + --ls->column; return TOK_GE; } else if (ls->current == '>') { next_char(ls); + --ls->column; return TOK_RSH; } return '>'; @@ -871,11 +881,13 @@ lex(Eo_Lexer *ls, Eo_Token *tok) if (ls->current == '=') { next_char(ls); + --ls->column; return TOK_LE; } else if (ls->current == '<') { next_char(ls); + --ls->column; return TOK_LSH; } return '<'; @@ -883,47 +895,62 @@ lex(Eo_Lexer *ls, Eo_Token *tok) next_char(ls); if (!ls->expr_mode || (ls->current != '&')) return '&'; next_char(ls); + --ls->column; return TOK_AND; case '|': next_char(ls); if (!ls->expr_mode || (ls->current != '|')) return '|'; next_char(ls); + --ls->column; return TOK_OR; case '"': - if (!ls->expr_mode) - { - next_char(ls); - return '"'; - } - read_string(ls, tok); - return TOK_STRING; + { + int dcol = ls->column; + if (!ls->expr_mode) + { + next_char(ls); + return '"'; + } + /* strings are not multiline for now at least */ + read_string(ls, tok); + ls->column = dcol + 1; + return TOK_STRING; + } case '\'': - next_char(ls); - if (!ls->expr_mode) return '\''; - if (ls->current == '\\') - { - next_char(ls); - eina_strbuf_reset(ls->buff); - read_escape(ls); - tok->value.c = (char)*eina_strbuf_string_get(ls->buff); - } - else - { - tok->value.c = ls->current; - next_char(ls); - } - if (ls->current != '\'') - eo_lexer_lex_error(ls, "unfinished character", TOK_CHAR); - next_char(ls); - return TOK_CHAR; + { + int dcol = ls->column; + next_char(ls); + if (!ls->expr_mode) return '\''; + if (ls->current == '\\') + { + next_char(ls); + eina_strbuf_reset(ls->buff); + read_escape(ls); + tok->value.c = (char)*eina_strbuf_string_get(ls->buff); + } + else + { + tok->value.c = ls->current; + next_char(ls); + } + if (ls->current != '\'') + eo_lexer_lex_error(ls, "unfinished character", TOK_CHAR); + next_char(ls); + ls->column = dcol + 1; + return TOK_CHAR; + } case '.': - next_char(ls); - if (!ls->expr_mode) return '.'; - if (!isdigit(ls->current)) return '.'; - eina_strbuf_reset(ls->buff); - eina_strbuf_append_char(ls->buff, '.'); - read_number(ls, tok); - return TOK_NUMBER; + { + int dcol = ls->column; + next_char(ls); + if (!ls->expr_mode) return '.'; + if (!isdigit(ls->current)) return '.'; + eina_strbuf_reset(ls->buff); + eina_strbuf_append_char(ls->buff, '.'); + read_number(ls, tok); + ls->column = dcol + 1; + return TOK_NUMBER; + } default: { if (isspace(ls->current)) @@ -934,8 +961,10 @@ lex(Eo_Lexer *ls, Eo_Token *tok) } else if (ls->expr_mode && isdigit(ls->current)) { + int col = ls->column; eina_strbuf_reset(ls->buff); read_number(ls, tok); + ls->column = col + 1; return TOK_NUMBER; } if (ls->current && (isalnum(ls->current) @@ -998,7 +1027,7 @@ eo_lexer_set_input(Eo_Lexer *ls, const char *source) ls->stream_line = ls->stream; ls->source = eina_stringshare_add(source); ls->filename = get_filename(ls); - ls->line_number = 1; + ls->iline_number = ls->line_number = 1; ls->icolumn = ls->column = -1; ls->decpoint = '.'; next_char(ls); diff --git a/src/lib/eolian/eo_lexer.h b/src/lib/eolian/eo_lexer.h index 3467ef2..c2dfc17 100644 --- a/src/lib/eolian/eo_lexer.h +++ b/src/lib/eolian/eo_lexer.h @@ -136,8 +136,8 @@ typedef struct _Eo_Lexer * it points to the beginning of it after the lexing is done, icolumn is * token unaware, always pointing to current column */ int column, icolumn; - /* the current line number */ - int line_number; + /* the current line number, token aware and unaware */ + int line_number, iline_number; /* t: "normal" - token to lex into, "lookahead" - a lookahead token, used * to look one token past "t", when we need to check for a token after the * current one and use it in a conditional without consuming the current -- 2.7.4