Some optimizations, use a lookup table for character classes, pre-allocate
authorMatthias Clasen <mclasen@redhat.com>
Thu, 15 Mar 2007 04:54:03 +0000 (04:54 +0000)
committerMatthias Clasen <matthiasc@src.gnome.org>
Thu, 15 Mar 2007 04:54:03 +0000 (04:54 +0000)
2007-03-15  Matthias Clasen  <mclasen@redhat.com>

        * glib/gscanner.[hc]: Some optimizations, use a lookup
        table for character classes, pre-allocate GStrings with
        reasonable sizes.  (#415323, Charlie Brej)

svn path=/trunk/; revision=5405

ChangeLog
glib/gscanner.c
glib/gscanner.h

index 79f4796..2347656 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2007-03-15  Matthias Clasen  <mclasen@redhat.com>
+
+       * glib/gscanner.[hc]: Some optimizations, use a lookup
+       table for character classes, pre-allocate GStrings with
+       reasonable sizes.  (#415323, Charlie Brej)
+
 2007-03-14  Matthias Clasen  <mclasen@redhat.com>
 
        * glib/gkeyfile.c (g_key_file_get_double): Fix a 
index 233cc73..6385d52 100644 (file)
@@ -61,6 +61,9 @@
 )
 #define        READ_BUFFER_SIZE        (4000)
 
+#define CSET_SKIP_FLAG         (1<<0)
+#define CSET_IDENT_FIRST_FLAG  (1<<1)
+#define CSET_IDENT_NTH_FLAG    (1<<2)
 
 /* --- typedefs --- */
 typedef        struct  _GScannerKey    GScannerKey;
@@ -168,6 +171,24 @@ g_scanner_char_2_num (guchar       c,
   return -1;
 }
 
+static void
+g_scanner_cset_2_table (guint* table, guchar* cset, guint flag)
+{
+  while (*cset)
+    {
+       table[*cset] |= flag;
+       cset++;
+    }
+ return;
+}
+
+static inline guint
+g_scanner_cset_table_lookup (guint* table, guint c, guint mask)
+{
+  if (c>=256) return 0;
+  return (table[c] & mask);
+}
+
 GScanner*
 g_scanner_new (const GScannerConfig *config_templ)
 {
@@ -234,6 +255,12 @@ g_scanner_new (const GScannerConfig *config_templ)
   
   scanner->msg_handler = g_scanner_msg_handler;
   
+  scanner->cset_table = g_new0(guint, 256);
+  g_scanner_cset_2_table (scanner->cset_table, (guchar*) scanner->config->cset_skip_characters, CSET_SKIP_FLAG);
+  g_scanner_cset_2_table (scanner->cset_table, (guchar*) scanner->config->cset_identifier_first, CSET_IDENT_FIRST_FLAG);
+  g_scanner_cset_2_table (scanner->cset_table, (guchar*) scanner->config->cset_identifier_nth, CSET_IDENT_NTH_FLAG);
+
+  
   return scanner;
 }
 
@@ -282,6 +309,7 @@ g_scanner_destroy (GScanner *scanner)
   g_scanner_free_value (&scanner->next_token, &scanner->next_value);
   g_free (scanner->config);
   g_free (scanner->buffer);
+  g_free (scanner->cset_table);
   g_free (scanner);
 }
 
@@ -865,8 +893,8 @@ g_scanner_unexp_token (GScanner             *scanner,
       if (scanner->token >= 1 && scanner->token <= 255)
        {
          if ((scanner->token >= ' ' && scanner->token <= '~') ||
-             strchr (scanner->config->cset_identifier_first, scanner->token) ||
-             strchr (scanner->config->cset_identifier_nth, scanner->token))
+             g_scanner_cset_table_lookup (scanner->cset_table, scanner->token, 
+                                       CSET_IDENT_FIRST_FLAG | CSET_IDENT_NTH_FLAG))
            _g_snprintf (token_string, token_string_len, "character `%c'", scanner->token);
          else
            _g_snprintf (token_string, token_string_len, "character `\\%o'", scanner->token);
@@ -1010,8 +1038,8 @@ g_scanner_unexp_token (GScanner           *scanner,
       if (expected_token >= 1 && expected_token <= 255)
        {
          if ((expected_token >= ' ' && expected_token <= '~') ||
-             strchr (scanner->config->cset_identifier_first, expected_token) ||
-             strchr (scanner->config->cset_identifier_nth, expected_token))
+             g_scanner_cset_table_lookup (scanner->cset_table, scanner->token, 
+                                       CSET_IDENT_FIRST_FLAG | CSET_IDENT_NTH_FLAG))
            _g_snprintf (expected_string, expected_string_len, "character `%c'", expected_token);
          else
            _g_snprintf (expected_string, expected_string_len, "character `\\%o'", expected_token);
@@ -1160,9 +1188,9 @@ g_scanner_get_token_i (GScanner   *scanner,
       g_scanner_get_token_ll (scanner, token_p, value_p, line_p, position_p);
     }
   while (((*token_p > 0 && *token_p < 256) &&
-         strchr (scanner->config->cset_skip_characters, *token_p)) ||
+         g_scanner_cset_table_lookup (scanner->cset_table, *token_p, CSET_SKIP_FLAG)) ||
         (*token_p == G_TOKEN_CHAR &&
-         strchr (scanner->config->cset_skip_characters, value_p->v_char)) ||
+         g_scanner_cset_table_lookup (scanner->cset_table, value_p->v_char, CSET_SKIP_FLAG)) ||
         (*token_p == G_TOKEN_COMMENT_MULTI &&
          scanner->config->skip_comment_multi) ||
         (*token_p == G_TOKEN_COMMENT_SINGLE &&
@@ -1258,7 +1286,7 @@ g_scanner_get_token_ll    (GScanner       *scanner,
        * might interfere with other key chars like slashes or numbers
        */
       if (config->scan_identifier &&
-         ch && strchr (config->cset_identifier_first, ch))
+         ch && g_scanner_cset_table_lookup (scanner->cset_table, ch, CSET_IDENT_FIRST_FLAG))
        goto identifier_precedence;
       
       switch (ch)
@@ -1276,7 +1304,7 @@ g_scanner_get_token_ll    (GScanner       *scanner,
          g_scanner_get_char (scanner, line_p, position_p);
          token = G_TOKEN_COMMENT_MULTI;
          in_comment_multi = TRUE;
-         gstring = g_string_new (NULL);
+         gstring = g_string_sized_new (128);
          while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
            {
              if (ch == '*' && g_scanner_peek_next_char (scanner) == '/')
@@ -1296,7 +1324,7 @@ g_scanner_get_token_ll    (GScanner       *scanner,
            goto default_case;
          token = G_TOKEN_STRING;
          in_string_sq = TRUE;
-         gstring = g_string_new (NULL);
+         gstring = g_string_sized_new (32);
          while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
            {
              if (ch == '\'')
@@ -1315,7 +1343,7 @@ g_scanner_get_token_ll    (GScanner       *scanner,
            goto default_case;
          token = G_TOKEN_STRING;
          in_string_dq = TRUE;
-         gstring = g_string_new (NULL);
+         gstring = g_string_sized_new (32);
          while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
            {
              if (ch == '"')
@@ -1477,7 +1505,9 @@ g_scanner_get_token_ll    (GScanner       *scanner,
          if (token == G_TOKEN_NONE)
            token = G_TOKEN_INT;
          
-         gstring = g_string_new (dotted_float ? "0." : "");
+         gstring = g_string_sized_new (16);
+          if (dotted_float)
+            gstring = g_string_append (gstring, "0.");
          gstring = g_string_append_c (gstring, ch);
          
          do /* while (in_number) */
@@ -1621,7 +1651,7 @@ g_scanner_get_token_ll    (GScanner       *scanner,
            {
              token = G_TOKEN_COMMENT_SINGLE;
              in_comment_single = TRUE;
-             gstring = g_string_new (NULL);
+             gstring = g_string_sized_new (128);
              ch = g_scanner_get_char (scanner, line_p, position_p);
              while (ch != 0)
                {
@@ -1641,16 +1671,16 @@ g_scanner_get_token_ll  (GScanner       *scanner,
                in_comment_single = FALSE;
            }
          else if (config->scan_identifier && ch &&
-                  strchr (config->cset_identifier_first, ch))
+                  g_scanner_cset_table_lookup (scanner->cset_table, ch, CSET_IDENT_FIRST_FLAG))
            {
            identifier_precedence:
              
              if (config->cset_identifier_nth && ch &&
-                 strchr (config->cset_identifier_nth,
-                         g_scanner_peek_next_char (scanner)))
+                 g_scanner_cset_table_lookup (scanner->cset_table,
+                         g_scanner_peek_next_char (scanner), CSET_IDENT_NTH_FLAG))
                {
                  token = G_TOKEN_IDENTIFIER;
-                 gstring = g_string_new (NULL);
+                 gstring = g_string_sized_new (32);
                  gstring = g_string_append_c (gstring, ch);
                  do
                    {
@@ -1658,7 +1688,7 @@ g_scanner_get_token_ll    (GScanner       *scanner,
                      gstring = g_string_append_c (gstring, ch);
                      ch = g_scanner_peek_next_char (scanner);
                    }
-                 while (ch && strchr (config->cset_identifier_nth, ch));
+                 while (ch && g_scanner_cset_table_lookup (scanner->cset_table, ch, CSET_IDENT_FIRST_FLAG));
                  ch = 0;
                }
              else if (config->scan_identifier_1char)
index a61c0a5..2eb5bb2 100644 (file)
@@ -197,7 +197,7 @@ struct      _GScanner
   const gchar          *text_end;
   gchar                        *buffer;
   guint                        scope_id;
-  
+  guint                        *cset_table;
   /* handler function for _warn and _error */
   GScannerMsgFunc      msg_handler;
 };