* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
/*
* Modified by the GLib Team and others 1997-2000. See the AUTHORS
* file for a list of people on the GLib Team. See the ChangeLog
* files for a list of changes. These files are distributed with
- * GLib at ftp://ftp.gtk.org/pub/gtk/.
+ * GLib at ftp://ftp.gtk.org/pub/gtk/.
*/
-/*
+/*
* MT safe
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "config.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include <stdio.h>
-#include "glib.h"
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
+#include "gscanner.h"
+
+#include "gprintfint.h"
+#include "gstrfuncs.h"
+#include "gstring.h"
+#include "gtestutils.h"
+
+#ifdef G_OS_UNIX
+#include <unistd.h>
#endif
-#include <errno.h>
#ifdef G_OS_WIN32
-#include <io.h> /* For _read() */
+#include <io.h>
#endif
+
+/**
+ * SECTION:scanner
+ * @title: Lexical Scanner
+ * @short_description: a general purpose lexical scanner
+ *
+ * The #GScanner and its associated functions provide a
+ * general purpose lexical scanner.
+ */
+
+/**
+ * GScannerMsgFunc:
+ * @scanner: a #GScanner
+ * @message: the message
+ * @error: %TRUE if the message signals an error,
+ * %FALSE if it signals a warning.
+ *
+ * Specifies the type of the message handler function.
+ */
+
+/**
+ * G_CSET_a_2_z:
+ *
+ * The set of lowercase ASCII alphabet characters.
+ * Used for specifying valid identifier characters
+ * in #GScannerConfig.
+ */
+
+/**
+ * G_CSET_A_2_Z:
+ *
+ * The set of uppercase ASCII alphabet characters.
+ * Used for specifying valid identifier characters
+ * in #GScannerConfig.
+ */
+
+/**
+ * G_CSET_DIGITS:
+ *
+ * The set of ASCII digits.
+ * Used for specifying valid identifier characters
+ * in #GScannerConfig.
+ */
+
+/**
+ * G_CSET_LATINC:
+ *
+ * The set of uppercase ISO 8859-1 alphabet characters
+ * which are not ASCII characters.
+ * Used for specifying valid identifier characters
+ * in #GScannerConfig.
+ */
+
+/**
+ * G_CSET_LATINS:
+ *
+ * The set of lowercase ISO 8859-1 alphabet characters
+ * which are not ASCII characters.
+ * Used for specifying valid identifier characters
+ * in #GScannerConfig.
+ */
+
+/**
+ * GTokenType:
+ * @G_TOKEN_EOF: the end of the file
+ * @G_TOKEN_LEFT_PAREN: a '(' character
+ * @G_TOKEN_LEFT_CURLY: a '{' character
+ * @G_TOKEN_LEFT_BRACE: a '[' character
+ * @G_TOKEN_RIGHT_CURLY: a '}' character
+ * @G_TOKEN_RIGHT_PAREN: a ')' character
+ * @G_TOKEN_RIGHT_BRACE: a ']' character
+ * @G_TOKEN_EQUAL_SIGN: a '=' character
+ * @G_TOKEN_COMMA: a ',' character
+ * @G_TOKEN_NONE: not a token
+ * @G_TOKEN_ERROR: an error occurred
+ * @G_TOKEN_CHAR: a character
+ * @G_TOKEN_BINARY: a binary integer
+ * @G_TOKEN_OCTAL: an octal integer
+ * @G_TOKEN_INT: an integer
+ * @G_TOKEN_HEX: a hex integer
+ * @G_TOKEN_FLOAT: a floating point number
+ * @G_TOKEN_STRING: a string
+ * @G_TOKEN_SYMBOL: a symbol
+ * @G_TOKEN_IDENTIFIER: an identifier
+ * @G_TOKEN_IDENTIFIER_NULL: a null identifier
+ * @G_TOKEN_COMMENT_SINGLE: one line comment
+ * @G_TOKEN_COMMENT_MULTI: multi line comment
+ *
+ * The possible types of token returned from each
+ * g_scanner_get_next_token() call.
+ */
+
+/**
+ * GTokenValue:
+ * @v_symbol: token symbol value
+ * @v_identifier: token identifier value
+ * @v_binary: token binary integer value
+ * @v_octal: octal integer value
+ * @v_int: integer value
+ * @v_int64: 64-bit integer value
+ * @v_float: floating point value
+ * @v_hex: hex integer value
+ * @v_string: string value
+ * @v_comment: comment value
+ * @v_char: character value
+ * @v_error: error value
+ *
+ * A union holding the value of the token.
+ */
+
+/**
+ * GErrorType:
+ * @G_ERR_UNKNOWN: unknown error
+ * @G_ERR_UNEXP_EOF: unexpected end of file
+ * @G_ERR_UNEXP_EOF_IN_STRING: unterminated string constant
+ * @G_ERR_UNEXP_EOF_IN_COMMENT: unterminated comment
+ * @G_ERR_NON_DIGIT_IN_CONST: non-digit character in a number
+ * @G_ERR_DIGIT_RADIX: digit beyond radix in a number
+ * @G_ERR_FLOAT_RADIX: non-decimal floating point number
+ * @G_ERR_FLOAT_MALFORMED: malformed floating point number
+ *
+ * The possible errors, used in the @v_error field
+ * of #GTokenValue, when the token is a %G_TOKEN_ERROR.
+ */
+
+/**
+ * GScanner:
+ * @user_data: unused
+ * @max_parse_errors: unused
+ * @parse_errors: g_scanner_error() increments this field
+ * @input_name: name of input stream, featured by the default message handler
+ * @qdata: quarked data
+ * @config: link into the scanner configuration
+ * @token: token parsed by the last g_scanner_get_next_token()
+ * @value: value of the last token from g_scanner_get_next_token()
+ * @line: line number of the last token from g_scanner_get_next_token()
+ * @position: char number of the last token from g_scanner_get_next_token()
+ * @next_token: token parsed by the last g_scanner_peek_next_token()
+ * @next_value: value of the last token from g_scanner_peek_next_token()
+ * @next_line: line number of the last token from g_scanner_peek_next_token()
+ * @next_position: char number of the last token from g_scanner_peek_next_token()
+ * @msg_handler: handler function for _warn and _error
+ *
+ * The data structure representing a lexical scanner.
+ *
+ * You should set @input_name after creating the scanner, since
+ * it is used by the default message handler when displaying
+ * warnings and errors. If you are scanning a file, the filename
+ * would be a good choice.
+ *
+ * The @user_data and @max_parse_errors fields are not used.
+ * If you need to associate extra data with the scanner you
+ * can place them here.
+ *
+ * If you want to use your own message handler you can set the
+ * @msg_handler field. The type of the message handler function
+ * is declared by #GScannerMsgFunc.
+ */
+
+/**
+ * GScannerConfig:
+ * @cset_skip_characters: specifies which characters should be skipped
+ * by the scanner (the default is the whitespace characters: space,
+ * tab, carriage-return and line-feed).
+ * @cset_identifier_first: specifies the characters which can start
+ * identifiers (the default is #G_CSET_a_2_z, "_", and #G_CSET_A_2_Z).
+ * @cset_identifier_nth: specifies the characters which can be used
+ * in identifiers, after the first character (the default is
+ * #G_CSET_a_2_z, "_0123456789", #G_CSET_A_2_Z, #G_CSET_LATINS,
+ * #G_CSET_LATINC).
+ * @cpair_comment_single: specifies the characters at the start and
+ * end of single-line comments. The default is "#\n" which means
+ * that single-line comments start with a '#' and continue until
+ * a '\n' (end of line).
+ * @case_sensitive: specifies if symbols are case sensitive (the
+ * default is %FALSE).
+ * @skip_comment_multi: specifies if multi-line comments are skipped
+ * and not returned as tokens (the default is %TRUE).
+ * @skip_comment_single: specifies if single-line comments are skipped
+ * and not returned as tokens (the default is %TRUE).
+ * @scan_comment_multi: specifies if multi-line comments are recognized
+ * (the default is %TRUE).
+ * @scan_identifier: specifies if identifiers are recognized (the
+ * default is %TRUE).
+ * @scan_identifier_1char: specifies if single-character
+ * identifiers are recognized (the default is %FALSE).
+ * @scan_identifier_NULL: specifies if %NULL is reported as
+ * %G_TOKEN_IDENTIFIER_NULL (the default is %FALSE).
+ * @scan_symbols: specifies if symbols are recognized (the default
+ * is %TRUE).
+ * @scan_binary: specifies if binary numbers are recognized (the
+ * default is %FALSE).
+ * @scan_octal: specifies if octal numbers are recognized (the
+ * default is %TRUE).
+ * @scan_float: specifies if floating point numbers are recognized
+ * (the default is %TRUE).
+ * @scan_hex: specifies if hexadecimal numbers are recognized (the
+ * default is %TRUE).
+ * @scan_hex_dollar: specifies if '$' is recognized as a prefix for
+ * hexadecimal numbers (the default is %FALSE).
+ * @scan_string_sq: specifies if strings can be enclosed in single
+ * quotes (the default is %TRUE).
+ * @scan_string_dq: specifies if strings can be enclosed in double
+ * quotes (the default is %TRUE).
+ * @numbers_2_int: specifies if binary, octal and hexadecimal numbers
+ * are reported as #G_TOKEN_INT (the default is %TRUE).
+ * @int_2_float: specifies if all numbers are reported as %G_TOKEN_FLOAT
+ * (the default is %FALSE).
+ * @identifier_2_string: specifies if identifiers are reported as strings
+ * (the default is %FALSE).
+ * @char_2_token: specifies if characters are reported by setting
+ * `token = ch` or as %G_TOKEN_CHAR (the default is %TRUE).
+ * @symbol_2_token: specifies if symbols are reported by setting
+ * `token = v_symbol` or as %G_TOKEN_SYMBOL (the default is %FALSE).
+ * @scope_0_fallback: specifies if a symbol is searched for in the
+ * default scope in addition to the current scope (the default is %FALSE).
+ * @store_int64: use value.v_int64 rather than v_int
+ *
+ * Specifies the #GScanner parser configuration. Most settings can
+ * be changed during the parsing phase and will affect the lexical
+ * parsing of the next unpeeked token.
+ */
+
/* --- defines --- */
#define to_lower(c) ( \
(guchar) ( \
};
-
/* --- variables --- */
-static GScannerConfig g_scanner_config_template =
+static const GScannerConfig g_scanner_config_template =
{
(
" \t\r\n"
TRUE /* char_2_token */,
FALSE /* symbol_2_token */,
FALSE /* scope_0_fallback */,
+ FALSE /* store_int64 */,
+ 0 /* padding_dummy */
};
return -1;
}
-GScanner*
+/**
+ * g_scanner_new:
+ * @config_templ: the initial scanner settings
+ *
+ * Creates a new #GScanner.
+ *
+ * The @config_templ structure specifies the initial settings
+ * of the scanner, which are copied into the #GScanner
+ * @config field. If you pass %NULL then the default settings
+ * are used.
+ *
+ * Returns: the new #GScanner
+ */
+GScanner *
g_scanner_new (const GScannerConfig *config_templ)
{
GScanner *scanner;
scanner = g_new0 (GScanner, 1);
scanner->user_data = NULL;
- scanner->max_parse_errors = 0;
+ scanner->max_parse_errors = 1;
scanner->parse_errors = 0;
scanner->input_name = NULL;
g_datalist_init (&scanner->qdata);
scanner->config->char_2_token = config_templ->char_2_token;
scanner->config->symbol_2_token = config_templ->symbol_2_token;
scanner->config->scope_0_fallback = config_templ->scope_0_fallback;
+ scanner->config->store_int64 = config_templ->store_int64;
scanner->token = G_TOKEN_NONE;
- scanner->value.v_int = 0;
+ scanner->value.v_int64 = 0;
scanner->line = 1;
scanner->position = 0;
scanner->next_token = G_TOKEN_NONE;
- scanner->next_value.v_int = 0;
+ scanner->next_value.v_int64 = 0;
scanner->next_line = 1;
scanner->next_position = 0;
g_free (key);
}
+/**
+ * g_scanner_destroy:
+ * @scanner: a #GScanner
+ *
+ * Frees all memory used by the #GScanner.
+ */
void
-g_scanner_destroy (GScanner *scanner)
+g_scanner_destroy (GScanner *scanner)
{
g_return_if_fail (scanner != NULL);
{
g_return_if_fail (scanner != NULL);
- fprintf (stderr, "%s:%d: ", scanner->input_name, scanner->line);
+ _g_fprintf (stderr, "%s:%d: ",
+ scanner->input_name ? scanner->input_name : "<memory>",
+ scanner->line);
if (is_error)
- fprintf (stderr, "error: ");
- fprintf (stderr, "%s\n", message);
+ _g_fprintf (stderr, "error: ");
+ _g_fprintf (stderr, "%s\n", message);
}
+/**
+ * g_scanner_error:
+ * @scanner: a #GScanner
+ * @format: the message format. See the printf() documentation
+ * @...: the parameters to insert into the format string
+ *
+ * Outputs an error message, via the #GScanner message handler.
+ */
void
g_scanner_error (GScanner *scanner,
const gchar *format,
}
}
+/**
+ * g_scanner_warn:
+ * @scanner: a #GScanner
+ * @format: the message format. See the printf() documentation
+ * @...: the parameters to insert into the format string
+ *
+ * Outputs a warning message, via the #GScanner message handler.
+ */
void
g_scanner_warn (GScanner *scanner,
const gchar *format,
return key_p;
}
+/**
+ * g_scanner_add_symbol:
+ * @scanner: a #GScanner
+ * @symbol: the symbol to add
+ * @value: the value of the symbol
+ *
+ * Adds a symbol to the default scope.
+ *
+ * Deprecated: 2.2: Use g_scanner_scope_add_symbol() instead.
+ */
+
+/**
+ * g_scanner_scope_add_symbol:
+ * @scanner: a #GScanner
+ * @scope_id: the scope id
+ * @symbol: the symbol to add
+ * @value: the value of the symbol
+ *
+ * Adds a symbol to the given scope.
+ */
void
g_scanner_scope_add_symbol (GScanner *scanner,
guint scope_id,
c++;
}
}
- g_hash_table_insert (scanner->symbol_table, key, key);
+ g_hash_table_add (scanner->symbol_table, key);
}
else
key->value = value;
}
+/**
+ * g_scanner_remove_symbol:
+ * @scanner: a #GScanner
+ * @symbol: the symbol to remove
+ *
+ * Removes a symbol from the default scope.
+ *
+ * Deprecated: 2.2: Use g_scanner_scope_remove_symbol() instead.
+ */
+
+/**
+ * g_scanner_scope_remove_symbol:
+ * @scanner: a #GScanner
+ * @scope_id: the scope id
+ * @symbol: the symbol to remove
+ *
+ * Removes a symbol from a scope.
+ */
void
g_scanner_scope_remove_symbol (GScanner *scanner,
guint scope_id,
}
}
+/**
+ * g_scanner_freeze_symbol_table:
+ * @scanner: a #GScanner
+ *
+ * There is no reason to use this macro, since it does nothing.
+ *
+ * Deprecated: 2.2: This macro does nothing.
+ */
+
+/**
+ * g_scanner_thaw_symbol_table:
+ * @scanner: a #GScanner
+ *
+ * There is no reason to use this macro, since it does nothing.
+ *
+ * Deprecated: 2.2: This macro does nothing.
+ */
+
+/**
+ * g_scanner_lookup_symbol:
+ * @scanner: a #GScanner
+ * @symbol: the symbol to look up
+ *
+ * Looks up a symbol in the current scope and return its value.
+ * If the symbol is not bound in the current scope, %NULL is
+ * returned.
+ *
+ * Returns: the value of @symbol in the current scope, or %NULL
+ * if @symbol is not bound in the current scope
+ */
gpointer
g_scanner_lookup_symbol (GScanner *scanner,
const gchar *symbol)
return NULL;
}
+/**
+ * g_scanner_scope_lookup_symbol:
+ * @scanner: a #GScanner
+ * @scope_id: the scope id
+ * @symbol: the symbol to look up
+ *
+ * Looks up a symbol in a scope and return its value. If the
+ * symbol is not bound in the scope, %NULL is returned.
+ *
+ * Returns: the value of @symbol in the given scope, or %NULL
+ * if @symbol is not bound in the given scope.
+ *
+ */
gpointer
g_scanner_scope_lookup_symbol (GScanner *scanner,
guint scope_id,
return NULL;
}
+/**
+ * g_scanner_set_scope:
+ * @scanner: a #GScanner
+ * @scope_id: the new scope id
+ *
+ * Sets the current scope.
+ *
+ * Returns: the old scope id
+ */
guint
g_scanner_set_scope (GScanner *scanner,
guint scope_id)
func (key->symbol, key->value, user_data);
}
+/**
+ * g_scanner_foreach_symbol:
+ * @scanner: a #GScanner
+ * @func: the function to call with each symbol
+ * @data: data to pass to the function
+ *
+ * Calls a function for each symbol in the default scope.
+ *
+ * Deprecated: 2.2: Use g_scanner_scope_foreach_symbol() instead.
+ */
+
+/**
+ * g_scanner_scope_foreach_symbol:
+ * @scanner: a #GScanner
+ * @scope_id: the scope id
+ * @func: the function to call for each symbol/value pair
+ * @user_data: user data to pass to the function
+ *
+ * Calls the given function for each of the symbol/value pairs
+ * in the given scope of the #GScanner. The function is passed
+ * the symbol and value of each pair, and the given @user_data
+ * parameter.
+ */
void
g_scanner_scope_foreach_symbol (GScanner *scanner,
guint scope_id,
g_hash_table_foreach (scanner->symbol_table, g_scanner_foreach_internal, d);
}
+/**
+ * g_scanner_peek_next_token:
+ * @scanner: a #GScanner
+ *
+ * Parses the next token, without removing it from the input stream.
+ * The token data is placed in the @next_token, @next_value, @next_line,
+ * and @next_position fields of the #GScanner structure.
+ *
+ * Note that, while the token is not removed from the input stream
+ * (i.e. the next call to g_scanner_get_next_token() will return the
+ * same token), it will not be reevaluated. This can lead to surprising
+ * results when changing scope or the scanner configuration after peeking
+ * the next token. Getting the next token after switching the scope or
+ * configuration will return whatever was peeked before, regardless of
+ * any symbols that may have been added or removed in the new scope.
+ *
+ * Returns: the type of the token
+ */
GTokenType
g_scanner_peek_next_token (GScanner *scanner)
{
return scanner->next_token;
}
+/**
+ * g_scanner_get_next_token:
+ * @scanner: a #GScanner
+ *
+ * Parses the next token just like g_scanner_peek_next_token()
+ * and also removes it from the input stream. The token data is
+ * placed in the @token, @value, @line, and @position fields of
+ * the #GScanner structure.
+ *
+ * Returns: the type of the token
+ */
GTokenType
g_scanner_get_next_token (GScanner *scanner)
{
return scanner->token;
}
+/**
+ * g_scanner_cur_token:
+ * @scanner: a #GScanner
+ *
+ * Gets the current token type. This is simply the @token
+ * field in the #GScanner structure.
+ *
+ * Returns: the current token type
+ */
GTokenType
g_scanner_cur_token (GScanner *scanner)
{
return scanner->token;
}
+/**
+ * g_scanner_cur_value:
+ * @scanner: a #GScanner
+ *
+ * Gets the current token value. This is simply the @value
+ * field in the #GScanner structure.
+ *
+ * Returns: the current token value
+ */
GTokenValue
g_scanner_cur_value (GScanner *scanner)
{
GTokenValue v;
- v.v_int = 0;
+ v.v_int64 = 0;
g_return_val_if_fail (scanner != NULL, v);
return v;
}
+/**
+ * g_scanner_cur_line:
+ * @scanner: a #GScanner
+ *
+ * Returns the current line in the input stream (counting
+ * from 1). This is the line of the last token parsed via
+ * g_scanner_get_next_token().
+ *
+ * Returns: the current line
+ */
guint
g_scanner_cur_line (GScanner *scanner)
{
return scanner->line;
}
+/**
+ * g_scanner_cur_position:
+ * @scanner: a #GScanner
+ *
+ * Returns the current position in the current line (counting
+ * from 0). This is the position of the last token parsed via
+ * g_scanner_get_next_token().
+ *
+ * Returns: the current position on the line
+ */
guint
g_scanner_cur_position (GScanner *scanner)
{
return scanner->position;
}
+/**
+ * g_scanner_eof:
+ * @scanner: a #GScanner
+ *
+ * Returns %TRUE if the scanner has reached the end of
+ * the file or text buffer.
+ *
+ * Returns: %TRUE if the scanner has reached the end of
+ * the file or text buffer
+ */
gboolean
g_scanner_eof (GScanner *scanner)
{
return scanner->token == G_TOKEN_EOF || scanner->token == G_TOKEN_ERROR;
}
+/**
+ * g_scanner_input_file:
+ * @scanner: a #GScanner
+ * @input_fd: a file descriptor
+ *
+ * Prepares to scan a file.
+ */
void
g_scanner_input_file (GScanner *scanner,
gint input_fd)
g_scanner_sync_file_offset (scanner);
scanner->token = G_TOKEN_NONE;
- scanner->value.v_int = 0;
+ scanner->value.v_int64 = 0;
scanner->line = 1;
scanner->position = 0;
scanner->next_token = G_TOKEN_NONE;
scanner->buffer = g_new (gchar, READ_BUFFER_SIZE + 1);
}
+/**
+ * g_scanner_input_text:
+ * @scanner: a #GScanner
+ * @text: the text buffer to scan
+ * @text_len: the length of the text buffer
+ *
+ * Prepares to scan a text buffer.
+ */
void
g_scanner_input_text (GScanner *scanner,
const gchar *text,
g_scanner_sync_file_offset (scanner);
scanner->token = G_TOKEN_NONE;
- scanner->value.v_int = 0;
+ scanner->value.v_int64 = 0;
scanner->line = 1;
scanner->position = 0;
scanner->next_token = G_TOKEN_NONE;
return 0;
}
+/**
+ * g_scanner_sync_file_offset:
+ * @scanner: a #GScanner
+ *
+ * Rewinds the filedescriptor to the current buffer position
+ * and blows the file read ahead buffer. This is useful for
+ * third party uses of the scanners filedescriptor, which hooks
+ * onto the current scanning position.
+ */
void
g_scanner_sync_file_offset (GScanner *scanner)
{
g_return_if_fail (scanner != NULL);
/* for file input, rewind the filedescriptor to the current
- * buffer position and blow the file read ahead buffer. usefull for
- * third party uses of our filedescriptor, which hooks onto the current
- * scanning position.
+ * buffer position and blow the file read ahead buffer. useful
+ * for third party uses of our file descriptor, which hooks
+ * onto the current scanning position.
*/
if (scanner->input_fd >= 0 && scanner->text_end > scanner->text)
return fchar;
}
+/**
+ * g_scanner_unexp_token:
+ * @scanner: a #GScanner
+ * @expected_token: the expected token
+ * @identifier_spec: a string describing how the scanner's user
+ * refers to identifiers (%NULL defaults to "identifier").
+ * This is used if @expected_token is %G_TOKEN_IDENTIFIER or
+ * %G_TOKEN_IDENTIFIER_NULL.
+ * @symbol_spec: a string describing how the scanner's user refers
+ * to symbols (%NULL defaults to "symbol"). This is used if
+ * @expected_token is %G_TOKEN_SYMBOL or any token value greater
+ * than %G_TOKEN_LAST.
+ * @symbol_name: the name of the symbol, if the scanner's current
+ * token is a symbol.
+ * @message: a message string to output at the end of the
+ * warning/error, or %NULL.
+ * @is_error: if %TRUE it is output as an error. If %FALSE it is
+ * output as a warning.
+ *
+ * Outputs a message through the scanner's msg_handler,
+ * resulting from an unexpected token in the input stream.
+ * Note that you should not call g_scanner_peek_next_token()
+ * followed by g_scanner_unexp_token() without an intermediate
+ * call to g_scanner_get_next_token(), as g_scanner_unexp_token()
+ * evaluates the scanner's current token (not the peeked token)
+ * to construct part of the message.
+ */
void
g_scanner_unexp_token (GScanner *scanner,
GTokenType expected_token,
switch (scanner->token)
{
case G_TOKEN_EOF:
- g_snprintf (token_string, token_string_len, "end of file");
+ _g_snprintf (token_string, token_string_len, "end of file");
break;
default:
if ((scanner->token >= ' ' && scanner->token <= '~') ||
strchr (scanner->config->cset_identifier_first, scanner->token) ||
strchr (scanner->config->cset_identifier_nth, scanner->token))
- g_snprintf (token_string, token_string_len, "character `%c'", scanner->token);
+ _g_snprintf (token_string, token_string_len, "character '%c'", scanner->token);
else
- g_snprintf (token_string, token_string_len, "character `\\%o'", scanner->token);
+ _g_snprintf (token_string, token_string_len, "character '\\%o'", scanner->token);
break;
}
else if (!scanner->config->symbol_2_token)
{
- g_snprintf (token_string, token_string_len, "(unknown) token <%d>", scanner->token);
+ _g_snprintf (token_string, token_string_len, "(unknown) token <%d>", scanner->token);
break;
}
- /* fall through */
+ G_GNUC_FALLTHROUGH;
case G_TOKEN_SYMBOL:
if (expected_token == G_TOKEN_SYMBOL ||
(scanner->config->symbol_2_token &&
expected_token > G_TOKEN_LAST))
print_unexp = FALSE;
if (symbol_name)
- g_snprintf (token_string,
- token_string_len,
- "%s%s `%s'",
- print_unexp ? "" : "invalid ",
- symbol_spec,
- symbol_name);
+ _g_snprintf (token_string,
+ token_string_len,
+ "%s%s '%s'",
+ print_unexp ? "" : "invalid ",
+ symbol_spec,
+ symbol_name);
else
- g_snprintf (token_string,
- token_string_len,
- "%s%s",
- print_unexp ? "" : "invalid ",
- symbol_spec);
+ _g_snprintf (token_string,
+ token_string_len,
+ "%s%s",
+ print_unexp ? "" : "invalid ",
+ symbol_spec);
break;
case G_TOKEN_ERROR:
switch (scanner->value.v_error)
{
case G_ERR_UNEXP_EOF:
- g_snprintf (token_string, token_string_len, "scanner: unexpected end of file");
+ _g_snprintf (token_string, token_string_len, "scanner: unexpected end of file");
break;
case G_ERR_UNEXP_EOF_IN_STRING:
- g_snprintf (token_string, token_string_len, "scanner: unterminated string constant");
+ _g_snprintf (token_string, token_string_len, "scanner: unterminated string constant");
break;
case G_ERR_UNEXP_EOF_IN_COMMENT:
- g_snprintf (token_string, token_string_len, "scanner: unterminated comment");
+ _g_snprintf (token_string, token_string_len, "scanner: unterminated comment");
break;
case G_ERR_NON_DIGIT_IN_CONST:
- g_snprintf (token_string, token_string_len, "scanner: non digit in constant");
+ _g_snprintf (token_string, token_string_len, "scanner: non digit in constant");
break;
case G_ERR_FLOAT_RADIX:
- g_snprintf (token_string, token_string_len, "scanner: invalid radix for floating constant");
+ _g_snprintf (token_string, token_string_len, "scanner: invalid radix for floating constant");
break;
case G_ERR_FLOAT_MALFORMED:
- g_snprintf (token_string, token_string_len, "scanner: malformed floating constant");
+ _g_snprintf (token_string, token_string_len, "scanner: malformed floating constant");
break;
case G_ERR_DIGIT_RADIX:
- g_snprintf (token_string, token_string_len, "scanner: digit is beyond radix");
+ _g_snprintf (token_string, token_string_len, "scanner: digit is beyond radix");
break;
case G_ERR_UNKNOWN:
default:
- g_snprintf (token_string, token_string_len, "scanner: unknown error");
+ _g_snprintf (token_string, token_string_len, "scanner: unknown error");
break;
}
break;
case G_TOKEN_CHAR:
- g_snprintf (token_string, token_string_len, "character `%c'", scanner->value.v_char);
+ _g_snprintf (token_string, token_string_len, "character '%c'", scanner->value.v_char);
break;
case G_TOKEN_IDENTIFIER:
if (expected_token == G_TOKEN_IDENTIFIER ||
expected_token == G_TOKEN_IDENTIFIER_NULL)
print_unexp = FALSE;
- g_snprintf (token_string,
+ _g_snprintf (token_string,
token_string_len,
- "%s%s `%s'",
+ "%s%s '%s'",
print_unexp ? "" : "invalid ",
identifier_spec,
scanner->token == G_TOKEN_IDENTIFIER ? scanner->value.v_string : "null");
case G_TOKEN_OCTAL:
case G_TOKEN_INT:
case G_TOKEN_HEX:
- g_snprintf (token_string, token_string_len, "number `%ld'", scanner->value.v_int);
+ if (scanner->config->store_int64)
+ _g_snprintf (token_string, token_string_len, "number '%" G_GUINT64_FORMAT "'", scanner->value.v_int64);
+ else
+ _g_snprintf (token_string, token_string_len, "number '%lu'", scanner->value.v_int);
break;
case G_TOKEN_FLOAT:
- g_snprintf (token_string, token_string_len, "number `%.3f'", scanner->value.v_float);
+ _g_snprintf (token_string, token_string_len, "number '%.3f'", scanner->value.v_float);
break;
case G_TOKEN_STRING:
if (expected_token == G_TOKEN_STRING)
print_unexp = FALSE;
- g_snprintf (token_string,
- token_string_len,
- "%s%sstring constant \"%s\"",
- print_unexp ? "" : "invalid ",
- scanner->value.v_string[0] == 0 ? "empty " : "",
- scanner->value.v_string);
+ _g_snprintf (token_string,
+ token_string_len,
+ "%s%sstring constant \"%s\"",
+ print_unexp ? "" : "invalid ",
+ scanner->value.v_string[0] == 0 ? "empty " : "",
+ scanner->value.v_string);
token_string[token_string_len - 2] = '"';
token_string[token_string_len - 1] = 0;
break;
case G_TOKEN_COMMENT_SINGLE:
case G_TOKEN_COMMENT_MULTI:
- g_snprintf (token_string, token_string_len, "comment");
+ _g_snprintf (token_string, token_string_len, "comment");
break;
case G_TOKEN_NONE:
switch (expected_token)
{
gboolean need_valid;
-
+ gchar *tstring;
+ case G_TOKEN_EOF:
+ _g_snprintf (expected_string, expected_string_len, "end of file");
+ break;
default:
if (expected_token >= 1 && expected_token <= 255)
{
if ((expected_token >= ' ' && expected_token <= '~') ||
strchr (scanner->config->cset_identifier_first, expected_token) ||
strchr (scanner->config->cset_identifier_nth, expected_token))
- g_snprintf (expected_string, expected_string_len, "character `%c'", expected_token);
+ _g_snprintf (expected_string, expected_string_len, "character '%c'", expected_token);
else
- g_snprintf (expected_string, expected_string_len, "character `\\%o'", expected_token);
+ _g_snprintf (expected_string, expected_string_len, "character '\\%o'", expected_token);
break;
}
else if (!scanner->config->symbol_2_token)
{
- g_snprintf (expected_string, expected_string_len, "(unknown) token <%d>", expected_token);
+ _g_snprintf (expected_string, expected_string_len, "(unknown) token <%d>", expected_token);
break;
}
- /* fall through */
+ G_GNUC_FALLTHROUGH;
case G_TOKEN_SYMBOL:
need_valid = (scanner->token == G_TOKEN_SYMBOL ||
(scanner->config->symbol_2_token &&
scanner->token > G_TOKEN_LAST));
- g_snprintf (expected_string,
- expected_string_len,
- "%s%s",
- need_valid ? "valid " : "",
- symbol_spec);
- /* FIXME: should we attempt to lookup the symbol_name for symbol_2_token? */
+ _g_snprintf (expected_string,
+ expected_string_len,
+ "%s%s",
+ need_valid ? "valid " : "",
+ symbol_spec);
+ /* FIXME: should we attempt to look up the symbol_name for symbol_2_token? */
+ break;
+ case G_TOKEN_CHAR:
+ _g_snprintf (expected_string, expected_string_len, "%scharacter",
+ scanner->token == G_TOKEN_CHAR ? "valid " : "");
+ break;
+ case G_TOKEN_BINARY:
+ tstring = "binary";
+ _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
+ scanner->token == expected_token ? "valid " : "", tstring);
+ break;
+ case G_TOKEN_OCTAL:
+ tstring = "octal";
+ _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
+ scanner->token == expected_token ? "valid " : "", tstring);
break;
-
case G_TOKEN_INT:
- g_snprintf (expected_string, expected_string_len, "%snumber (integer)",
- scanner->token == G_TOKEN_INT ? "valid " : "");
+ tstring = "integer";
+ _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
+ scanner->token == expected_token ? "valid " : "", tstring);
+ break;
+ case G_TOKEN_HEX:
+ tstring = "hexadecimal";
+ _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
+ scanner->token == expected_token ? "valid " : "", tstring);
break;
-
case G_TOKEN_FLOAT:
- g_snprintf (expected_string, expected_string_len, "%snumber (float)",
- scanner->token == G_TOKEN_FLOAT ? "valid " : "");
+ tstring = "float";
+ _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
+ scanner->token == expected_token ? "valid " : "", tstring);
break;
-
case G_TOKEN_STRING:
- g_snprintf (expected_string,
- expected_string_len,
- "%sstring constant",
- scanner->token == G_TOKEN_STRING ? "valid " : "");
+ _g_snprintf (expected_string,
+ expected_string_len,
+ "%sstring constant",
+ scanner->token == G_TOKEN_STRING ? "valid " : "");
break;
-
case G_TOKEN_IDENTIFIER:
case G_TOKEN_IDENTIFIER_NULL:
- g_snprintf (expected_string,
- expected_string_len,
- "%s%s",
- (scanner->token == G_TOKEN_IDENTIFIER_NULL ||
- scanner->token == G_TOKEN_IDENTIFIER ? "valid " : ""),
- identifier_spec);
+ need_valid = (scanner->token == G_TOKEN_IDENTIFIER_NULL ||
+ scanner->token == G_TOKEN_IDENTIFIER);
+ _g_snprintf (expected_string,
+ expected_string_len,
+ "%s%s",
+ need_valid ? "valid " : "",
+ identifier_spec);
break;
-
- case G_TOKEN_EOF:
- g_snprintf (expected_string, expected_string_len, "end of file");
+ case G_TOKEN_COMMENT_SINGLE:
+ tstring = "single-line";
+ _g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
+ scanner->token == expected_token ? "valid " : "", tstring);
+ break;
+ case G_TOKEN_COMMENT_MULTI:
+ tstring = "multi-line";
+ _g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
+ scanner->token == expected_token ? "valid " : "", tstring);
break;
-
case G_TOKEN_NONE:
+ case G_TOKEN_ERROR:
+ /* this is handled upon printout */
break;
}
message_prefix = "";
message = "";
}
-
- if (expected_token != G_TOKEN_NONE)
+ if (expected_token == G_TOKEN_ERROR)
+ {
+ msg_handler (scanner,
+ "failure around %s%s%s",
+ token_string,
+ message_prefix,
+ message);
+ }
+ else if (expected_token == G_TOKEN_NONE)
{
if (print_unexp)
msg_handler (scanner,
- "unexpected %s, expected %s%s%s",
+ "unexpected %s%s%s",
token_string,
- expected_string,
message_prefix,
message);
else
msg_handler (scanner,
- "%s, expected %s%s%s",
+ "%s%s%s",
token_string,
- expected_string,
message_prefix,
message);
}
{
if (print_unexp)
msg_handler (scanner,
- "unexpected %s%s%s",
+ "unexpected %s, expected %s%s%s",
token_string,
+ expected_string,
message_prefix,
message);
else
msg_handler (scanner,
- "%s%s%s",
+ "%s, expected %s%s%s",
token_string,
+ expected_string,
message_prefix,
message);
}
scanner->config->int_2_float)
{
*token_p = G_TOKEN_FLOAT;
- value_p->v_float = value_p->v_int;
+
+ /* Have to assign through a temporary variable to avoid undefined behaviour
+ * by copying between potentially-overlapping union members. */
+ if (scanner->config->store_int64)
+ {
+ gint64 temp = value_p->v_int64;
+ value_p->v_float = temp;
+ }
+ else
+ {
+ gint temp = value_p->v_int;
+ value_p->v_float = temp;
+ }
}
errno = 0;
guchar ch;
config = scanner->config;
- (*value_p).v_int = 0;
+ (*value_p).v_int64 = 0;
if ((scanner->text >= scanner->text_end && scanner->input_fd < 0) ||
scanner->token == G_TOKEN_EOF)
ch = g_scanner_get_char (scanner, line_p, position_p);
- value.v_int = 0;
+ value.v_int64 = 0;
token = G_TOKEN_NONE;
/* this is *evil*, but needed ;(
g_scanner_get_char (scanner, line_p, position_p);
token = G_TOKEN_COMMENT_MULTI;
in_comment_multi = TRUE;
- gstring = g_string_new ("");
+ gstring = g_string_new (NULL);
while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
{
if (ch == '*' && g_scanner_peek_next_char (scanner) == '/')
goto default_case;
token = G_TOKEN_STRING;
in_string_sq = TRUE;
- gstring = g_string_new ("");
+ gstring = g_string_new (NULL);
while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
{
if (ch == '\'')
goto default_case;
token = G_TOKEN_STRING;
in_string_dq = TRUE;
- gstring = g_string_new ("");
+ gstring = g_string_new (NULL);
while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
{
if (ch == '"')
}
else
ch = '0';
- /* fall through */
+ G_GNUC_FALLTHROUGH;
case '1':
case '2':
case '3':
while (in_number);
endptr = NULL;
- switch (token)
+ if (token == G_TOKEN_FLOAT)
+ value.v_float = g_strtod (gstring->str, &endptr);
+ else
{
- case G_TOKEN_BINARY:
- value.v_binary = strtol (gstring->str, &endptr, 2);
- break;
-
- case G_TOKEN_OCTAL:
- value.v_octal = strtol (gstring->str, &endptr, 8);
- break;
-
- case G_TOKEN_INT:
- value.v_int = strtol (gstring->str, &endptr, 10);
- break;
-
- case G_TOKEN_FLOAT:
- value.v_float = g_strtod (gstring->str, &endptr);
- break;
-
- case G_TOKEN_HEX:
- value.v_hex = strtol (gstring->str, &endptr, 16);
- break;
-
- default:
- break;
+ guint64 ui64 = 0;
+ switch (token)
+ {
+ case G_TOKEN_BINARY:
+ ui64 = g_ascii_strtoull (gstring->str, &endptr, 2);
+ break;
+ case G_TOKEN_OCTAL:
+ ui64 = g_ascii_strtoull (gstring->str, &endptr, 8);
+ break;
+ case G_TOKEN_INT:
+ ui64 = g_ascii_strtoull (gstring->str, &endptr, 10);
+ break;
+ case G_TOKEN_HEX:
+ ui64 = g_ascii_strtoull (gstring->str, &endptr, 16);
+ break;
+ default: ;
+ }
+ if (scanner->config->store_int64)
+ value.v_int64 = ui64;
+ else
+ value.v_int = ui64;
}
if (endptr && *endptr)
{
{
token = G_TOKEN_COMMENT_SINGLE;
in_comment_single = TRUE;
- gstring = g_string_new ("");
+ gstring = g_string_new (NULL);
ch = g_scanner_get_char (scanner, line_p, position_p);
while (ch != 0)
{
gstring = g_string_append_c (gstring, ch);
ch = g_scanner_get_char (scanner, line_p, position_p);
}
+ /* ignore a missing newline at EOF for single line comments */
+ if (in_comment_single &&
+ config->cpair_comment_single[1] == '\n')
+ in_comment_single = FALSE;
}
else if (config->scan_identifier && ch &&
strchr (config->cset_identifier_first, ch))
g_scanner_peek_next_char (scanner)))
{
token = G_TOKEN_IDENTIFIER;
- gstring = g_string_new ("");
+ gstring = g_string_new (NULL);
gstring = g_string_append_c (gstring, ch);
do
{
if (gstring)
{
- value.v_string = gstring->str;
- g_string_free (gstring, FALSE);
+ value.v_string = g_string_free (gstring, FALSE);
gstring = NULL;
}