From 57c2e6f7512d01ce301e97f4720696c43ab64381 Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Thu, 15 Mar 2007 04:54:03 +0000 Subject: [PATCH] Some optimizations, use a lookup table for character classes, pre-allocate 2007-03-15 Matthias Clasen * glib/gscanner.[hc]: Some optimizations, use a lookup table for character classes, pre-allocate GStrings with reasonable sizes. (#415323, Charlie Brej) svn path=/trunk/; revision=5405 --- ChangeLog | 6 ++++++ glib/gscanner.c | 64 ++++++++++++++++++++++++++++++++++++++++++--------------- glib/gscanner.h | 2 +- 3 files changed, 54 insertions(+), 18 deletions(-) diff --git a/ChangeLog b/ChangeLog index 79f4796..2347656 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2007-03-15 Matthias Clasen + + * glib/gscanner.[hc]: Some optimizations, use a lookup + table for character classes, pre-allocate GStrings with + reasonable sizes. (#415323, Charlie Brej) + 2007-03-14 Matthias Clasen * glib/gkeyfile.c (g_key_file_get_double): Fix a diff --git a/glib/gscanner.c b/glib/gscanner.c index 233cc73..6385d52 100644 --- a/glib/gscanner.c +++ b/glib/gscanner.c @@ -61,6 +61,9 @@ ) #define READ_BUFFER_SIZE (4000) +#define CSET_SKIP_FLAG (1<<0) +#define CSET_IDENT_FIRST_FLAG (1<<1) +#define CSET_IDENT_NTH_FLAG (1<<2) /* --- typedefs --- */ typedef struct _GScannerKey GScannerKey; @@ -168,6 +171,24 @@ g_scanner_char_2_num (guchar c, return -1; } +static void +g_scanner_cset_2_table (guint* table, guchar* cset, guint flag) +{ + while (*cset) + { + table[*cset] |= flag; + cset++; + } + return; +} + +static inline guint +g_scanner_cset_table_lookup (guint* table, guint c, guint mask) +{ + if (c>=256) return 0; + return (table[c] & mask); +} + GScanner* g_scanner_new (const GScannerConfig *config_templ) { @@ -234,6 +255,12 @@ g_scanner_new (const GScannerConfig *config_templ) scanner->msg_handler = g_scanner_msg_handler; + scanner->cset_table = g_new0(guint, 256); + g_scanner_cset_2_table (scanner->cset_table, (guchar*) scanner->config->cset_skip_characters, CSET_SKIP_FLAG); + g_scanner_cset_2_table (scanner->cset_table, (guchar*) scanner->config->cset_identifier_first, CSET_IDENT_FIRST_FLAG); + g_scanner_cset_2_table (scanner->cset_table, (guchar*) scanner->config->cset_identifier_nth, CSET_IDENT_NTH_FLAG); + + return scanner; } @@ -282,6 +309,7 @@ g_scanner_destroy (GScanner *scanner) g_scanner_free_value (&scanner->next_token, &scanner->next_value); g_free (scanner->config); g_free (scanner->buffer); + g_free (scanner->cset_table); g_free (scanner); } @@ -865,8 +893,8 @@ g_scanner_unexp_token (GScanner *scanner, if (scanner->token >= 1 && scanner->token <= 255) { if ((scanner->token >= ' ' && scanner->token <= '~') || - strchr (scanner->config->cset_identifier_first, scanner->token) || - strchr (scanner->config->cset_identifier_nth, scanner->token)) + g_scanner_cset_table_lookup (scanner->cset_table, scanner->token, + CSET_IDENT_FIRST_FLAG | CSET_IDENT_NTH_FLAG)) _g_snprintf (token_string, token_string_len, "character `%c'", scanner->token); else _g_snprintf (token_string, token_string_len, "character `\\%o'", scanner->token); @@ -1010,8 +1038,8 @@ g_scanner_unexp_token (GScanner *scanner, if (expected_token >= 1 && expected_token <= 255) { if ((expected_token >= ' ' && expected_token <= '~') || - strchr (scanner->config->cset_identifier_first, expected_token) || - strchr (scanner->config->cset_identifier_nth, expected_token)) + g_scanner_cset_table_lookup (scanner->cset_table, scanner->token, + CSET_IDENT_FIRST_FLAG | CSET_IDENT_NTH_FLAG)) _g_snprintf (expected_string, expected_string_len, "character `%c'", expected_token); else _g_snprintf (expected_string, expected_string_len, "character `\\%o'", expected_token); @@ -1160,9 +1188,9 @@ g_scanner_get_token_i (GScanner *scanner, g_scanner_get_token_ll (scanner, token_p, value_p, line_p, position_p); } while (((*token_p > 0 && *token_p < 256) && - strchr (scanner->config->cset_skip_characters, *token_p)) || + g_scanner_cset_table_lookup (scanner->cset_table, *token_p, CSET_SKIP_FLAG)) || (*token_p == G_TOKEN_CHAR && - strchr (scanner->config->cset_skip_characters, value_p->v_char)) || + g_scanner_cset_table_lookup (scanner->cset_table, value_p->v_char, CSET_SKIP_FLAG)) || (*token_p == G_TOKEN_COMMENT_MULTI && scanner->config->skip_comment_multi) || (*token_p == G_TOKEN_COMMENT_SINGLE && @@ -1258,7 +1286,7 @@ g_scanner_get_token_ll (GScanner *scanner, * might interfere with other key chars like slashes or numbers */ if (config->scan_identifier && - ch && strchr (config->cset_identifier_first, ch)) + ch && g_scanner_cset_table_lookup (scanner->cset_table, ch, CSET_IDENT_FIRST_FLAG)) goto identifier_precedence; switch (ch) @@ -1276,7 +1304,7 @@ g_scanner_get_token_ll (GScanner *scanner, g_scanner_get_char (scanner, line_p, position_p); token = G_TOKEN_COMMENT_MULTI; in_comment_multi = TRUE; - gstring = g_string_new (NULL); + gstring = g_string_sized_new (128); while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0) { if (ch == '*' && g_scanner_peek_next_char (scanner) == '/') @@ -1296,7 +1324,7 @@ g_scanner_get_token_ll (GScanner *scanner, goto default_case; token = G_TOKEN_STRING; in_string_sq = TRUE; - gstring = g_string_new (NULL); + gstring = g_string_sized_new (32); while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0) { if (ch == '\'') @@ -1315,7 +1343,7 @@ g_scanner_get_token_ll (GScanner *scanner, goto default_case; token = G_TOKEN_STRING; in_string_dq = TRUE; - gstring = g_string_new (NULL); + gstring = g_string_sized_new (32); while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0) { if (ch == '"') @@ -1477,7 +1505,9 @@ g_scanner_get_token_ll (GScanner *scanner, if (token == G_TOKEN_NONE) token = G_TOKEN_INT; - gstring = g_string_new (dotted_float ? "0." : ""); + gstring = g_string_sized_new (16); + if (dotted_float) + gstring = g_string_append (gstring, "0."); gstring = g_string_append_c (gstring, ch); do /* while (in_number) */ @@ -1621,7 +1651,7 @@ g_scanner_get_token_ll (GScanner *scanner, { token = G_TOKEN_COMMENT_SINGLE; in_comment_single = TRUE; - gstring = g_string_new (NULL); + gstring = g_string_sized_new (128); ch = g_scanner_get_char (scanner, line_p, position_p); while (ch != 0) { @@ -1641,16 +1671,16 @@ g_scanner_get_token_ll (GScanner *scanner, in_comment_single = FALSE; } else if (config->scan_identifier && ch && - strchr (config->cset_identifier_first, ch)) + g_scanner_cset_table_lookup (scanner->cset_table, ch, CSET_IDENT_FIRST_FLAG)) { identifier_precedence: if (config->cset_identifier_nth && ch && - strchr (config->cset_identifier_nth, - g_scanner_peek_next_char (scanner))) + g_scanner_cset_table_lookup (scanner->cset_table, + g_scanner_peek_next_char (scanner), CSET_IDENT_NTH_FLAG)) { token = G_TOKEN_IDENTIFIER; - gstring = g_string_new (NULL); + gstring = g_string_sized_new (32); gstring = g_string_append_c (gstring, ch); do { @@ -1658,7 +1688,7 @@ g_scanner_get_token_ll (GScanner *scanner, gstring = g_string_append_c (gstring, ch); ch = g_scanner_peek_next_char (scanner); } - while (ch && strchr (config->cset_identifier_nth, ch)); + while (ch && g_scanner_cset_table_lookup (scanner->cset_table, ch, CSET_IDENT_FIRST_FLAG)); ch = 0; } else if (config->scan_identifier_1char) diff --git a/glib/gscanner.h b/glib/gscanner.h index a61c0a5..2eb5bb2 100644 --- a/glib/gscanner.h +++ b/glib/gscanner.h @@ -197,7 +197,7 @@ struct _GScanner const gchar *text_end; gchar *buffer; guint scope_id; - + guint *cset_table; /* handler function for _warn and _error */ GScannerMsgFunc msg_handler; }; -- 2.7.4