Initial revision
[platform/upstream/glib.git] / gscanner.c
1 /* GLIB - Library of useful routines for C programming
2  * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
3  *
4  * GScanner: Flexible lexical scanner for general purpose.
5  * Copyright (C) 1997, 1998 Tim Janik
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Library General Public
9  * License as published by the Free Software Foundation; either
10  * version 2 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Library General Public License for more details.
16  *
17  * You should have received a copy of the GNU Library General Public
18  * License along with this library; if not, write to the
19  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20  * Boston, MA 02111-1307, USA.
21  */
22 #define         __gscanner_c__
23
24 #include        <stdlib.h>
25 #include        <stdarg.h>
26 #include        <string.h>
27 #include        <stdio.h>
28 #include        <unistd.h>
29 #include        <errno.h>
30 #include        <sys/types.h>   /* needed for sys/stat.h */
31 #include        <sys/stat.h>
32 #include        "glib.h"
33
34
35
36 /* --- defines --- */
37 #define to_lower(c)                             ( \
38         (guchar) (                                                      \
39           ( (((guchar)(c))>='A' && ((guchar)(c))<='Z') * ('a'-'A') ) |  \
40           ( (((guchar)(c))>=192 && ((guchar)(c))<=214) * (224-192) ) |  \
41           ( (((guchar)(c))>=216 && ((guchar)(c))<=222) * (248-216) ) |  \
42           ((guchar)(c))                                                 \
43         )                                                               \
44 )
45
46
47 /* --- typedefs --- */
48 typedef struct  _GScannerHashVal        GScannerHashVal;
49
50 struct  _GScannerHashVal
51 {
52   gchar         *key;
53   gpointer      value;
54 };
55
56
57
58 /* --- variables --- */
59 static  GScannerConfig  g_scanner_config_template =
60 {
61   (
62    " \t\n"
63    )                    /* cset_skip_characters */,
64   (
65    G_CSET_a_2_z
66    "_"
67    G_CSET_A_2_Z
68    )                    /* cset_identifier_first */,
69   (
70    G_CSET_a_2_z
71    "_0123456789"
72    G_CSET_A_2_Z
73    G_CSET_LATINS
74    G_CSET_LATINC
75    )                    /* cset_identifier_nth */,
76   ( "#\n" )             /* cpair_comment_single */,
77   
78   FALSE                 /* case_sensitive */,
79   
80   TRUE                  /* skip_comment_multi */,
81   TRUE                  /* skip_comment_single */,
82   TRUE                  /* scan_comment_multi */,
83   TRUE                  /* scan_identifier */,
84   FALSE                 /* scan_identifier_1char */,
85   FALSE                 /* scan_identifier_NULL */,
86   TRUE                  /* scan_symbols */,
87   FALSE                 /* scan_binary */,
88   TRUE                  /* scan_octal */,
89   TRUE                  /* scan_float */,
90   TRUE                  /* scan_hex */,
91   FALSE                 /* scan_hex_dollar */,
92   TRUE                  /* scan_string_sq */,
93   TRUE                  /* scan_string_dq */,
94   TRUE                  /* numbers_2_int */,
95   FALSE                 /* int_2_float */,
96   FALSE                 /* identifier_2_string */,
97   TRUE                  /* char_2_token */,
98   FALSE                 /* symbol_2_token */,
99 };
100
101
102 /* --- prototypes --- */
103 extern char* g_vsprintf (gchar *fmt, va_list *args, va_list *args2);
104 static  GScannerHashVal* g_scanner_lookup_internal (GScanner    *scanner,
105                                                     const gchar *symbol);
106 static  void    g_scanner_get_token_ll  (GScanner       *scanner,
107                                          GTokenType     *token_p,
108                                          GValue         *value_p,
109                                          guint          *line_p,
110                                          guint          *position_p);
111 static  void    g_scanner_get_token_i   (GScanner       *scanner,
112                                          GTokenType     *token_p,
113                                          GValue         *value_p,
114                                          guint          *line_p,
115                                          guint          *position_p);
116 static  void    g_scanner_free_value    (GTokenType     *token_p,
117                                          GValue         *value_p);
118
119 static  inline
120 gint            g_scanner_char_2_num    (guchar         c,
121                                          guchar         base);
122 static  guchar  g_scanner_peek_next_char(GScanner       *scanner);
123 static  guchar  g_scanner_get_char      (GScanner       *scanner,
124                                          guint          *line_p,
125                                          guint          *position_p);
126 static  void    g_scanner_msg_handler   (GScanner       *scanner,
127                                          gchar          *message,
128                                          gint            is_error);
129
130
131 /* --- functions --- */
132 static gint
133 g_scanner_char_2_num (guchar    c,
134                       guchar    base)
135 {
136   if (c >= '0' && c <= '9')
137     c -= '0';
138   else if (c >= 'A' && c <= 'Z')
139     c -= 'A' - 10;
140   else if (c >= 'a' && c <= 'z')
141     c -= 'a' - 10;
142   else
143     return -1;
144   
145   if (c < base)
146     return c;
147   
148   return -1;
149 }
150
151 GScanner*
152 g_scanner_new (GScannerConfig   *config_templ)
153 {
154   register GScanner     *scanner;
155   
156   if (!config_templ)
157     config_templ = &g_scanner_config_template;
158   
159   scanner = g_new0 (GScanner, 1);
160   
161   scanner->user_data = NULL;
162   scanner->input_name = NULL;
163   scanner->parse_errors = 0;
164   scanner->max_parse_errors = 0;
165   
166   scanner->config = g_new0 (GScannerConfig, 1);
167   
168   scanner->config->case_sensitive       = config_templ->case_sensitive;
169   scanner->config->cset_skip_characters = config_templ->cset_skip_characters;
170   scanner->config->cset_identifier_first= config_templ->cset_identifier_first;
171   scanner->config->cset_identifier_nth  = config_templ->cset_identifier_nth;
172   scanner->config->cpair_comment_single = config_templ->cpair_comment_single;
173   scanner->config->skip_comment_multi   = config_templ->skip_comment_multi;
174   scanner->config->skip_comment_single  = config_templ->skip_comment_single;
175   scanner->config->scan_comment_multi   = config_templ->scan_comment_multi;
176   scanner->config->scan_identifier      = config_templ->scan_identifier;
177   scanner->config->scan_identifier_1char= config_templ->scan_identifier_1char;
178   scanner->config->scan_identifier_NULL = config_templ->scan_identifier_NULL;
179   scanner->config->scan_symbols         = config_templ->scan_symbols;
180   scanner->config->scan_binary          = config_templ->scan_binary;
181   scanner->config->scan_octal           = config_templ->scan_octal;
182   scanner->config->scan_float           = config_templ->scan_float;
183   scanner->config->scan_hex             = config_templ->scan_hex;
184   scanner->config->scan_hex_dollar      = config_templ->scan_hex_dollar;
185   scanner->config->scan_string_sq       = config_templ->scan_string_sq;
186   scanner->config->scan_string_dq       = config_templ->scan_string_dq;
187   scanner->config->numbers_2_int        = config_templ->numbers_2_int;
188   scanner->config->int_2_float          = config_templ->int_2_float;
189   scanner->config->identifier_2_string  = config_templ->identifier_2_string;
190   scanner->config->char_2_token         = config_templ->char_2_token;
191   scanner->config->symbol_2_token       = config_templ->symbol_2_token;
192   
193   scanner->token = G_TOKEN_NONE;
194   scanner->value.v_int = 0;
195   scanner->line = 1;
196   scanner->position = 0;
197   
198   scanner->next_token = G_TOKEN_NONE;
199   scanner->next_value.v_int = 0;
200   scanner->next_line = 1;
201   scanner->next_position = 0;
202   
203   scanner->symbol_table = g_hash_table_new (g_str_hash, g_str_equal);
204   scanner->text = NULL;
205   scanner->text_len = 0;
206   scanner->input_fd = -1;
207   scanner->peeked_char = -1;
208
209   scanner->msg_handler = g_scanner_msg_handler;
210   
211   return scanner;
212 }
213
214 static void
215 g_scanner_destroy_symbol_table_entry (gpointer key,
216                                       gpointer value,
217                                       gpointer user_data)
218 {
219   g_free (key);
220   g_free (value);
221 }
222
223 void
224 g_scanner_destroy (GScanner     *scanner)
225 {
226   g_return_if_fail (scanner != NULL);
227   
228   g_hash_table_foreach (scanner->symbol_table, 
229                         g_scanner_destroy_symbol_table_entry, NULL);
230   g_hash_table_destroy (scanner->symbol_table);
231   g_scanner_free_value (&scanner->token, &scanner->value);
232   g_scanner_free_value (&scanner->next_token, &scanner->next_value);
233   g_free (scanner->config);
234   g_free (scanner);
235 }
236
237 static void
238 g_scanner_msg_handler (GScanner         *scanner,
239                        gchar            *message,
240                        gint              is_error)
241 {
242   g_return_if_fail (scanner != NULL);
243
244   fprintf (stdout, "%s:%d: ", scanner->input_name, scanner->line);
245   if (is_error)
246     fprintf (stdout, "error: ");
247   fprintf (stdout, "%s\n", message);
248 }
249
250 void
251 g_scanner_error (GScanner       *scanner,
252                  const gchar    *format,
253                  ...)
254 {
255   g_return_if_fail (scanner != NULL);
256   g_return_if_fail (format != NULL);
257
258   scanner->parse_errors++;
259
260   if (scanner->msg_handler)
261     {
262       va_list args, args2;
263       gchar *string;
264       
265       va_start (args, format);
266       va_start (args2, format);
267       string = g_vsprintf ((gchar*) format, &args, &args2);
268       va_end (args);
269       va_end (args2);
270
271       string = g_strdup (string);
272
273       scanner->msg_handler (scanner, string, TRUE);
274           
275       g_free (string);
276     }
277 }
278
279 void
280 g_scanner_warn (GScanner       *scanner,
281                 const gchar    *format,
282                 ...)
283 {
284   g_return_if_fail (scanner != NULL);
285   g_return_if_fail (format != NULL);
286   
287   if (scanner->msg_handler)
288     {
289       va_list args, args2;
290       gchar *string;
291       
292       va_start (args, format);
293       va_start (args2, format);
294       string = g_vsprintf ((gchar*) format, &args, &args2);
295       va_end (args);
296       va_end (args2);
297       
298       string = g_strdup (string);
299       
300       scanner->msg_handler (scanner, string, FALSE);
301       
302       g_free (string);
303     }
304 }
305
306 void
307 g_scanner_input_file (GScanner  *scanner,
308                       gint      input_fd)
309 {
310   g_return_if_fail (input_fd >= 0);
311   
312   scanner->token = G_TOKEN_NONE;
313   scanner->value.v_int = 0;
314   scanner->line = 1;
315   scanner->position = 0;
316   scanner->next_token = G_TOKEN_NONE;
317   
318   scanner->text = NULL;
319   scanner->text_len = 0;
320   scanner->input_fd = input_fd;
321   scanner->peeked_char = -1;
322 }
323
324 void
325 g_scanner_input_text (GScanner       *scanner,
326                       const  gchar   *text,
327                       guint           text_len)
328 {
329   g_return_if_fail (text != NULL);
330   
331   scanner->token = G_TOKEN_NONE;
332   scanner->value.v_int = 0;
333   scanner->line = 1;
334   scanner->position = 0;
335   scanner->next_token = G_TOKEN_NONE;
336   
337   scanner->text = text;
338   scanner->text_len = text_len;
339   scanner->input_fd = -1;
340   scanner->peeked_char = -1;
341 }
342
343 void
344 g_scanner_add_symbol (GScanner          *scanner,
345                       const gchar       *symbol,
346                       gpointer          value)
347 {
348   register GScannerHashVal      *hash_val;
349   
350   g_return_if_fail (scanner != NULL);
351   g_return_if_fail (symbol != NULL);
352   
353   hash_val = g_scanner_lookup_internal (scanner, symbol);
354   
355   if (!hash_val)
356     {
357       hash_val = g_new (GScannerHashVal, 1);
358       hash_val->key = g_strdup (symbol);
359       hash_val->value = value;
360       if (!scanner->config->case_sensitive)
361         {
362           register guint        i, l;
363           
364           l = strlen (hash_val->key);
365           for (i = 0; i < l; i++)
366             hash_val->key[i] = to_lower (hash_val->key[i]);
367         }
368       g_hash_table_insert (scanner->symbol_table, hash_val->key, hash_val);
369     }
370   else
371     hash_val->value = value;
372 }
373
374 gpointer
375 g_scanner_lookup_symbol (GScanner       *scanner,
376                          const gchar    *symbol)
377 {
378   register GScannerHashVal      *hash_val;
379   
380   g_return_val_if_fail (scanner != NULL, NULL);
381   
382   if (!symbol)
383     return NULL;
384   
385   hash_val = g_scanner_lookup_internal (scanner, symbol);
386   
387   if (hash_val)
388     return hash_val->value;
389   else
390     return NULL;
391 }
392
393 static void
394 g_scanner_foreach_internal (gpointer  key,
395                             gpointer  value,
396                             gpointer  user_data)
397 {
398   register GScannerHashVal *hash_val;
399   register GHFunc func;
400   register gpointer func_data;
401   register gpointer *d;
402
403   d = user_data;
404   func = (GHFunc)d[0];
405   func_data = d[1];
406   hash_val = value;
407
408   func (key, hash_val->value, func_data);
409 }
410
411 void
412 g_scanner_foreach_symbol (GScanner       *scanner,
413                           GHFunc          func,
414                           gpointer        func_data)
415 {
416   gpointer d[2];
417
418   g_return_if_fail (scanner != NULL);
419
420   d[0] = (gpointer)func;
421   d[1] = func_data;
422
423   g_hash_table_foreach (scanner->symbol_table, g_scanner_foreach_internal, d);
424 }
425
426 void
427 g_scanner_remove_symbol (GScanner       *scanner,
428                          const gchar    *symbol)
429 {
430   register GScannerHashVal      *hash_val;
431   
432   g_return_if_fail (scanner != NULL);
433
434   hash_val = g_scanner_lookup_internal (scanner, symbol);
435   
436   if (hash_val)
437     {
438       g_hash_table_remove (scanner->symbol_table, hash_val->key);
439       g_free (hash_val->key);
440       g_free (hash_val);
441     }
442 }
443
444 void
445 g_scanner_freeze_symbol_table (GScanner *scanner)
446 {
447   g_return_if_fail (scanner != NULL);
448
449   g_hash_table_freeze (scanner->symbol_table);
450 }
451
452 void
453 g_scanner_thaw_symbol_table (GScanner *scanner)
454 {
455   g_return_if_fail (scanner != NULL);
456
457   g_hash_table_thaw (scanner->symbol_table);
458 }
459
460 GTokenType
461 g_scanner_peek_next_token (GScanner     *scanner)
462 {
463   g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
464   
465   if (scanner->next_token == G_TOKEN_NONE)
466     {
467       scanner->next_line = scanner->line;
468       scanner->next_position = scanner->position;
469       g_scanner_get_token_i (scanner,
470                              &scanner->next_token,
471                              &scanner->next_value,
472                              &scanner->next_line,
473                              &scanner->next_position);
474     }
475   
476   return scanner->next_token;
477 }
478
479 GTokenType
480 g_scanner_get_next_token (GScanner      *scanner)
481 {
482   g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
483   
484   if (scanner->next_token != G_TOKEN_NONE)
485     {
486       g_scanner_free_value (&scanner->token, &scanner->value);
487       
488       scanner->token = scanner->next_token;
489       scanner->value = scanner->next_value;
490       scanner->line = scanner->next_line;
491       scanner->position = scanner->next_position;
492       scanner->next_token = G_TOKEN_NONE;
493     }
494   else
495     g_scanner_get_token_i (scanner,
496                            &scanner->token,
497                            &scanner->value,
498                            &scanner->line,
499                            &scanner->position);
500   
501   return scanner->token;
502 }
503
504 GTokenType
505 g_scanner_cur_token (GScanner *scanner)
506 {
507   g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
508   
509   return scanner->token;
510 }
511
512 GValue
513 g_scanner_cur_value (GScanner *scanner)
514 {
515   register GValue v;
516   
517   v.v_int = 0;
518   g_return_val_if_fail (scanner != NULL, v);
519   
520   return scanner->value;
521 }
522
523 guint
524 g_scanner_cur_line (GScanner *scanner)
525 {
526   g_return_val_if_fail (scanner != NULL, 0);
527   
528   return scanner->line;
529 }
530
531 guint
532 g_scanner_cur_position (GScanner *scanner)
533 {
534   g_return_val_if_fail (scanner != NULL, 0);
535   
536   return scanner->position;
537 }
538
539 gboolean
540 g_scanner_eof (GScanner *scanner)
541 {
542   g_return_val_if_fail (scanner != NULL, TRUE);
543   
544   return scanner->token == G_TOKEN_EOF;
545 }
546
547 static GScannerHashVal*
548 g_scanner_lookup_internal (GScanner     *scanner,
549                            const gchar  *symbol)
550 {
551   register GScannerHashVal      *hash_val;
552   
553   if (!scanner->config->case_sensitive)
554     {
555       register gchar *buffer;
556       register guint i, l;
557       
558       l = strlen (symbol);
559       buffer = g_new (gchar, l + 1);
560       for (i = 0; i < l; i++)
561         buffer[i] = to_lower (symbol[i]);
562       buffer[i] = 0;
563       hash_val = g_hash_table_lookup (scanner->symbol_table, buffer);
564       g_free (buffer);
565     }
566   else
567     hash_val = g_hash_table_lookup (scanner->symbol_table, (gchar*) symbol);
568   
569   return hash_val;
570 }
571
572 static guchar
573 g_scanner_peek_next_char (GScanner *scanner)
574 {
575   guchar fchar;
576   
577   if (scanner->text_len)
578     {
579       fchar = scanner->text[0];
580     }
581   else if (scanner->input_fd >= 0)
582     {
583       if (scanner->peeked_char < 0)
584         {
585           register gint count;
586           
587           do
588             {
589               count = read (scanner->input_fd, &fchar, 1);
590             }
591           while (count == -1 &&
592                  (errno == EINTR ||
593                   errno == EAGAIN));
594           
595           if (count != 1)
596             fchar = 0;
597           
598           scanner->peeked_char = fchar;
599         }
600       else
601         fchar = scanner->peeked_char;
602     }
603   else
604     fchar = 0;
605   
606   return fchar;
607 }
608
609 static guchar
610 g_scanner_get_char (GScanner    *scanner,
611                     guint       *line_p,
612                     guint       *position_p)
613 {
614   guchar fchar;
615   
616   if (scanner->text_len)
617     {
618       fchar = *(scanner->text++);
619       scanner->text_len--;
620     }
621   else if (scanner->input_fd >= 0)
622     {
623       if (scanner->peeked_char < 0)
624         {
625           register gint count;
626           
627           do
628             {
629               count = read (scanner->input_fd, &fchar, 1);
630             }
631           while (count == -1 &&
632                  (errno == EINTR ||
633                   errno == EAGAIN));
634           if (count != 1 || fchar == 0)
635             {
636               fchar = 0;
637               scanner->peeked_char = 0;
638             }
639         }
640       else
641         {
642           fchar = scanner->peeked_char;
643           if (fchar)
644             scanner->peeked_char = -1;
645         }
646     }
647   else
648     fchar = 0;
649   
650   if (fchar == '\n')
651     {
652       (*position_p) = 0;
653       (*line_p)++;
654     }
655   else if (fchar)
656     {
657       (*position_p)++;
658     }
659   
660   return fchar;
661 }
662
663 void
664 g_scanner_unexp_token (GScanner         *scanner,
665                        GTokenType        expected_token,
666                        const gchar      *identifier_spec,
667                        const gchar      *symbol_spec,
668                        const gchar      *symbol_name,
669                        const gchar      *message,
670                        gint              is_error)
671 {
672   register gchar        *token_string;
673   register guint        token_string_len;
674   register gchar        *expected_string;
675   register guint        expected_string_len;
676   register gchar        *message_prefix;
677   register gboolean     print_unexp;
678   void (*msg_handler)   (GScanner*, const gchar*, ...);
679   
680   g_return_if_fail (scanner != NULL);
681
682   if (is_error)
683     msg_handler = g_scanner_error;
684   else
685     msg_handler = g_scanner_warn;
686
687   if (!identifier_spec)
688     identifier_spec = "identifier";
689   if (!symbol_spec)
690     symbol_spec = "symbol";
691   
692   token_string_len = 56;
693   token_string = g_new (gchar, token_string_len + 1);
694   expected_string_len = 64;
695   expected_string = g_new (gchar, expected_string_len + 1);
696   print_unexp = TRUE;
697   
698   switch (scanner->token)
699     {
700       
701     case  G_TOKEN_EOF:
702       g_snprintf (token_string, token_string_len, "end of file");
703       break;
704       
705     default:  /* 1 ... 255 */
706       if (scanner->token >= 1 && scanner->token <= 255)
707         {
708           if ((scanner->token >= ' ' && scanner->token <= '~') ||
709               strchr (scanner->config->cset_identifier_first, scanner->token) ||
710               strchr (scanner->config->cset_identifier_nth, scanner->token))
711             g_snprintf (token_string, expected_string_len, "character `%c'", scanner->token);
712           else
713             g_snprintf (token_string, expected_string_len, "character `\\%o'", scanner->token);
714         }
715       else
716         g_snprintf (token_string, token_string_len, "(unknown) token <%d>", scanner->token);
717       break;
718       
719     case  G_TOKEN_ERROR:
720       print_unexp = FALSE;
721       expected_token = G_TOKEN_NONE;
722       switch (scanner->value.v_error)
723         {
724         case  G_ERR_UNEXP_EOF:
725           g_snprintf (token_string, token_string_len, "scanner: unexpected end of file");
726           break;
727           
728         case  G_ERR_UNEXP_EOF_IN_STRING:
729           g_snprintf (token_string, token_string_len, "scanner: unterminated string constant");
730           break;
731           
732         case  G_ERR_UNEXP_EOF_IN_COMMENT:
733           g_snprintf (token_string, token_string_len, "scanner: unterminated comment");
734           break;
735           
736         case  G_ERR_NON_DIGIT_IN_CONST:
737           g_snprintf (token_string, token_string_len, "scanner: non digit in constant");
738           break;
739           
740         case  G_ERR_FLOAT_RADIX:
741           g_snprintf (token_string, token_string_len, "scanner: invalid radix for floating constant");
742           break;
743           
744         case  G_ERR_FLOAT_MALFORMED:
745           g_snprintf (token_string, token_string_len, "scanner: malformed floating constant");
746           break;
747           
748         case  G_ERR_DIGIT_RADIX:
749           g_snprintf (token_string, token_string_len, "scanner: digit is beyond radix");
750           break;
751           
752         case  G_ERR_UNKNOWN:
753         default:
754           g_snprintf (token_string, token_string_len, "scanner: unknown error");
755           break;
756         }
757       break;
758       
759     case  G_TOKEN_CHAR:
760       g_snprintf (token_string, token_string_len, "character `%c'", scanner->value.v_char);
761       break;
762       
763     case  G_TOKEN_SYMBOL:
764       if (expected_token == G_TOKEN_SYMBOL)
765         print_unexp = FALSE;
766       if (symbol_name)
767         g_snprintf (token_string,
768                   token_string_len,
769                   "%s%s `%s'",
770                   print_unexp ? "" : "invalid ",
771                   symbol_spec,
772                   symbol_name);
773       else
774         g_snprintf (token_string,
775                   token_string_len,
776                   "%s%s",
777                   print_unexp ? "" : "invalid ",
778                   symbol_spec);
779       break;
780       
781     case  G_TOKEN_IDENTIFIER:
782       if (expected_token == G_TOKEN_IDENTIFIER)
783         print_unexp = FALSE;
784       g_snprintf (token_string,
785                 token_string_len,
786                 "%s%s `%s'",
787                 print_unexp ? "" : "invalid ",
788                 identifier_spec,
789                 scanner->value.v_string);
790       break;
791       
792     case  G_TOKEN_BINARY:
793     case  G_TOKEN_OCTAL:
794     case  G_TOKEN_INT:
795     case  G_TOKEN_HEX:
796       g_snprintf (token_string, token_string_len, "number `%ld'", scanner->value.v_int);
797       break;
798       
799     case  G_TOKEN_FLOAT:
800       g_snprintf (token_string, token_string_len, "number `%.3f'", scanner->value.v_float);
801       break;
802       
803     case  G_TOKEN_STRING:
804       g_snprintf (token_string,
805                 token_string_len,
806                 "%sstring constant \"%s\"",
807                 scanner->value.v_string[0] == 0 ? "empty " : "",
808                 scanner->value.v_string);
809       token_string[token_string_len - 2] = '"';
810       token_string[token_string_len - 1] = 0;
811       break;
812       
813     case  G_TOKEN_COMMENT_SINGLE:
814     case  G_TOKEN_COMMENT_MULTI:
815       g_snprintf (token_string, token_string_len, "comment");
816       break;
817       
818     case  G_TOKEN_NONE:
819       g_assert_not_reached ();
820       break;
821     }
822   
823   
824   switch (expected_token)
825     {
826     default: /* 1 ... 255 */
827       if (expected_token >= 1 && expected_token <= 255)
828         {
829           if ((expected_token >= ' ' && expected_token <= '~') ||
830               strchr (scanner->config->cset_identifier_first, expected_token) ||
831               strchr (scanner->config->cset_identifier_nth, expected_token))
832             g_snprintf (expected_string, expected_string_len, "character `%c'", expected_token);
833           else
834             g_snprintf (expected_string, expected_string_len, "character `\\%o'", expected_token);
835         }
836       else
837         g_snprintf (expected_string, expected_string_len, "(unknown) token <%d>", expected_token);
838       break;
839       
840     case  G_TOKEN_INT:
841       g_snprintf (expected_string, expected_string_len, "number (integer)");
842       break;
843       
844     case  G_TOKEN_FLOAT:
845       g_snprintf (expected_string, expected_string_len, "number (float)");
846       break;
847       
848     case  G_TOKEN_STRING:
849       g_snprintf (expected_string, expected_string_len, "string constant");
850       break;
851       
852     case  G_TOKEN_SYMBOL:
853       g_snprintf (expected_string,
854                 expected_string_len,
855                 "%s%s",
856                 scanner->token == G_TOKEN_SYMBOL ? "valid " : "",
857                 symbol_spec);
858       break;
859       
860     case  G_TOKEN_IDENTIFIER:
861       g_snprintf (expected_string,
862                 expected_string_len,
863                 "%s%s",
864                 scanner->token == G_TOKEN_IDENTIFIER ? "valid " : "",
865                 identifier_spec);
866       break;
867       
868     case  G_TOKEN_NONE:
869       break;
870     }
871   
872   if (message && message[0] != 0)
873     message_prefix = " - ";
874   else
875     {
876       message_prefix = "";
877       message = "";
878     }
879   
880   if (expected_token != G_TOKEN_NONE)
881     {
882       if (print_unexp)
883         msg_handler (scanner,
884                      "unexpected %s, expected %s%s%s",
885                      token_string,
886                      expected_string,
887                      message_prefix,
888                      message);
889       else
890         msg_handler (scanner,
891                      "%s, expected %s%s%s",
892                      token_string,
893                      expected_string,
894                      message_prefix,
895                      message);
896     }
897   else
898     {
899       if (print_unexp)
900         msg_handler (scanner,
901                      "unexpected %s%s%s",
902                      token_string,
903                      message_prefix,
904                      message);
905       else
906         msg_handler (scanner,
907                      "%s%s%s",
908                      token_string,
909                      message_prefix,
910                      message);
911     }
912   
913   g_free (token_string);
914   g_free (expected_string);
915 }
916
917 gint
918 g_scanner_stat_mode (const gchar *filename)
919 {
920   struct stat  *stat_buf;
921   gint          st_mode;
922
923   stat_buf = g_new0 (struct stat, 1);
924
925   lstat (filename, stat_buf);
926
927   st_mode = stat_buf->st_mode;
928
929   g_free (stat_buf);
930
931   return st_mode;
932 }
933
934 static void
935 g_scanner_free_value (GTokenType     *token_p,
936                       GValue         *value_p)
937 {
938   switch (*token_p)
939     {
940     case  G_TOKEN_STRING:
941     case  G_TOKEN_IDENTIFIER:
942     case  G_TOKEN_IDENTIFIER_NULL:
943     case  G_TOKEN_COMMENT_SINGLE:
944     case  G_TOKEN_COMMENT_MULTI:
945       g_free (value_p->v_string);
946       break;
947       
948     default:
949       break;
950     }
951   
952   *token_p = G_TOKEN_NONE;
953 }
954
955 static void
956 g_scanner_get_token_i (GScanner *scanner,
957                        GTokenType       *token_p,
958                        GValue           *value_p,
959                        guint            *line_p,
960                        guint            *position_p)
961 {
962   do
963     {
964       g_scanner_free_value (token_p, value_p);
965       g_scanner_get_token_ll (scanner, token_p, value_p, line_p, position_p);
966     }
967   while (((*token_p > 0 && *token_p < 256) &&
968           strchr (scanner->config->cset_skip_characters, *token_p)) ||
969          (*token_p == G_TOKEN_CHAR &&
970           strchr (scanner->config->cset_skip_characters, value_p->v_char)) ||
971          (*token_p == G_TOKEN_COMMENT_MULTI &&
972           scanner->config->skip_comment_multi) ||
973          (*token_p == G_TOKEN_COMMENT_SINGLE &&
974           scanner->config->skip_comment_single));
975   
976   switch (*token_p)
977     {
978     case        G_TOKEN_IDENTIFIER:
979       if (scanner->config->identifier_2_string)
980         *token_p = G_TOKEN_STRING;
981       break;
982       
983     case        G_TOKEN_SYMBOL:
984       if (scanner->config->symbol_2_token)
985         *token_p = (GTokenType) value_p->v_symbol;
986       break;
987       
988     case        G_TOKEN_BINARY:
989     case        G_TOKEN_OCTAL:
990     case        G_TOKEN_HEX:
991       if (scanner->config->numbers_2_int)
992         *token_p = G_TOKEN_INT;
993       break;
994       
995     default:
996       break;
997     }
998   
999   if (*token_p == G_TOKEN_INT &&
1000       scanner->config->int_2_float)
1001     {
1002       *token_p = G_TOKEN_FLOAT;
1003       value_p->v_float = value_p->v_int;
1004     }
1005   
1006   errno = 0;
1007 }
1008
1009 static void
1010 g_scanner_get_token_ll  (GScanner       *scanner,
1011                          GTokenType     *token_p,
1012                          GValue         *value_p,
1013                          guint          *line_p,
1014                          guint          *position_p)
1015 {
1016   register GScannerConfig       *config;
1017   register gboolean             in_comment_multi;
1018   register gboolean             in_comment_single;
1019   register gboolean             in_string_sq;
1020   register gboolean             in_string_dq;
1021   static   guchar               ch;
1022   register GTokenType           token;
1023   register GValue               value;
1024   register GString              *gstring;
1025   
1026   config = scanner->config;
1027   (*value_p).v_int = 0;
1028   
1029   if (scanner->token == G_TOKEN_EOF ||
1030       (!scanner->text_len &&
1031        (scanner->input_fd < 0 ||
1032         scanner->peeked_char == 0)))
1033     {
1034       *token_p = G_TOKEN_EOF;
1035       return;
1036     }
1037   
1038   in_comment_multi = FALSE;
1039   in_comment_single = FALSE;
1040   in_string_sq = FALSE;
1041   in_string_dq = FALSE;
1042   gstring = NULL;
1043   
1044   do
1045     {
1046       register gboolean         dotted_float = FALSE;
1047       
1048       ch = g_scanner_get_char (scanner, line_p, position_p);
1049       
1050       value.v_int = 0;
1051       token = G_TOKEN_NONE;
1052       
1053       /* this is *evil*, but needed ;(
1054        * we first check for identifier first character, because  it
1055        * might interfere with other key chars like slashes or numbers
1056        */
1057       if (config->scan_identifier &&
1058           ch && strchr (config->cset_identifier_first, ch))
1059         goto identifier_precedence;
1060       
1061       switch (ch)
1062         {
1063           register gboolean     in_number;
1064           static         gchar          *endptr;
1065           
1066         case  0:
1067           token = G_TOKEN_EOF;
1068           (*position_p)++;
1069           ch = 0;
1070           break;
1071           
1072         case  '/':
1073           if (!config->scan_comment_multi ||
1074               g_scanner_peek_next_char (scanner) != '*')
1075             goto default_case;
1076           g_scanner_get_char (scanner, line_p, position_p);
1077           token = G_TOKEN_COMMENT_MULTI;
1078           in_comment_multi = TRUE;
1079           gstring = g_string_new ("");
1080           while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
1081             {
1082               if (ch == '*' && g_scanner_peek_next_char (scanner) == '/')
1083                 {
1084                   g_scanner_get_char (scanner, line_p, position_p);
1085                   in_comment_multi = FALSE;
1086                   break;
1087                 }
1088               else
1089                 gstring = g_string_append_c (gstring, ch);
1090             }
1091           ch = 0;
1092           break;
1093           
1094         case  '\'':
1095           if (!config->scan_string_sq)
1096             goto default_case;
1097           token = G_TOKEN_STRING;
1098           in_string_sq = TRUE;
1099           gstring = g_string_new ("");
1100           while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
1101             {
1102               if (ch == '\'')
1103                 {
1104                   in_string_sq = FALSE;
1105                   break;
1106                 }
1107               else
1108                 gstring = g_string_append_c (gstring, ch);
1109             }
1110           ch = 0;
1111           break;
1112           
1113         case  '"':
1114           if (!config->scan_string_dq)
1115             goto default_case;
1116           token = G_TOKEN_STRING;
1117           in_string_dq = TRUE;
1118           gstring = g_string_new ("");
1119           while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
1120             {
1121               if (ch == '"')
1122                 {
1123                   in_string_dq = FALSE;
1124                   break;
1125                 }
1126               else
1127                 {
1128                   if (ch == '\\')
1129                     {
1130                       ch = g_scanner_get_char (scanner, line_p, position_p);
1131                       switch (ch)
1132                         {
1133                           register guint        i;
1134                           register guint        fchar;
1135                           
1136                         case  0:
1137                           break;
1138                           
1139                         case  '\\':
1140                           gstring = g_string_append_c (gstring, '\\');
1141                           break;
1142                           
1143                         case  'n':
1144                           gstring = g_string_append_c (gstring, '\n');
1145                           break;
1146                           
1147                         case  't':
1148                           gstring = g_string_append_c (gstring, '\t');
1149                           break;
1150                           
1151                         case  'r':
1152                           gstring = g_string_append_c (gstring, '\r');
1153                           break;
1154                           
1155                         case  'b':
1156                           gstring = g_string_append_c (gstring, '\b');
1157                           break;
1158                           
1159                         case  'f':
1160                           gstring = g_string_append_c (gstring, '\f');
1161                           break;
1162                           
1163                         case  '0':
1164                         case  '1':
1165                         case  '2':
1166                         case  '3':
1167                         case  '4':
1168                         case  '5':
1169                         case  '6':
1170                         case  '7':
1171                           i = ch - '0';
1172                           fchar = g_scanner_peek_next_char (scanner);
1173                           if (fchar >= '0' && fchar <= '7')
1174                             {
1175                               ch = g_scanner_get_char (scanner, line_p, position_p);
1176                               i= i * 8 + ch - '0';
1177                               fchar = g_scanner_peek_next_char (scanner);
1178                               if (fchar >= '0' && fchar <= '7')
1179                                 {
1180                                   ch = g_scanner_get_char (scanner, line_p, position_p);
1181                                   i = i * 8 + ch - '0';
1182                                 }
1183                             }
1184                           gstring = g_string_append_c (gstring, i);
1185                           break;
1186                           
1187                         default:
1188                           gstring = g_string_append_c (gstring, ch);
1189                           break;
1190                         }
1191                     }
1192                   else
1193                     gstring = g_string_append_c (gstring, ch);
1194                 }
1195             }
1196           ch = 0;
1197           break;
1198           
1199         case  '.':
1200           if (!config->scan_float)
1201             goto default_case;
1202           token = G_TOKEN_FLOAT;
1203           dotted_float = TRUE;
1204           ch = g_scanner_get_char (scanner, line_p, position_p);
1205           goto number_parsing;
1206           
1207         case  '$':
1208           if (!config->scan_hex_dollar)
1209             goto default_case;
1210           token = G_TOKEN_HEX;
1211           ch = g_scanner_get_char (scanner, line_p, position_p);
1212           goto number_parsing;
1213           
1214         case  '0':
1215           if (config->scan_octal)
1216             token = G_TOKEN_OCTAL;
1217           else
1218             token = G_TOKEN_INT;
1219           ch = g_scanner_peek_next_char (scanner);
1220           if (config->scan_hex && (ch == 'x' || ch == 'X'))
1221             {
1222               token = G_TOKEN_HEX;
1223               g_scanner_get_char (scanner, line_p, position_p);
1224               ch = g_scanner_get_char (scanner, line_p, position_p);
1225               if (ch == 0)
1226                 {
1227                   token = G_TOKEN_ERROR;
1228                   value.v_error = G_ERR_UNEXP_EOF;
1229                   (*position_p)++;
1230                   break;
1231                 }
1232               if (g_scanner_char_2_num (ch, 16) < 0)
1233                 {
1234                   token = G_TOKEN_ERROR;
1235                   value.v_error = G_ERR_DIGIT_RADIX;
1236                   ch = 0;
1237                   break;
1238                 }
1239             }
1240           else if (config->scan_binary && (ch == 'b' || ch == 'B'))
1241             {
1242               token = G_TOKEN_BINARY;
1243               g_scanner_get_char (scanner, line_p, position_p);
1244               ch = g_scanner_get_char (scanner, line_p, position_p);
1245               if (ch == 0)
1246                 {
1247                   token = G_TOKEN_ERROR;
1248                   value.v_error = G_ERR_UNEXP_EOF;
1249                   (*position_p)++;
1250                   break;
1251                 }
1252               if (g_scanner_char_2_num (ch, 10) < 0)
1253                 {
1254                   token = G_TOKEN_ERROR;
1255                   value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1256                   ch = 0;
1257                   break;
1258                 }
1259             }
1260           else
1261             ch = '0';
1262           /* fall through */
1263         case  '1':
1264         case  '2':
1265         case  '3':
1266         case  '4':
1267         case  '5':
1268         case  '6':
1269         case  '7':
1270         case  '8':
1271         case  '9':
1272         number_parsing:
1273         if (token == G_TOKEN_NONE)
1274           token = G_TOKEN_INT;
1275         
1276         gstring = g_string_new (dotted_float ? "0." : "");
1277         gstring = g_string_append_c (gstring, ch);
1278         in_number = TRUE;
1279         while (in_number)
1280           {
1281             register gboolean is_E;
1282             
1283             is_E = (ch == 'e' || ch == 'E') && token == G_TOKEN_FLOAT;
1284             ch = g_scanner_peek_next_char (scanner);
1285             
1286             if (g_scanner_char_2_num (ch, 36) >= 0 ||
1287                 (config->scan_float && ch == '.') ||
1288                 (is_E && ch == '+') ||
1289                 (is_E && ch == '-') )
1290               ch = g_scanner_get_char (scanner, line_p, position_p);
1291             else
1292               in_number = FALSE;
1293             
1294             if (in_number)
1295               switch (ch)
1296                 {
1297                 case  '.':
1298                   if (token != G_TOKEN_INT &&
1299                       token != G_TOKEN_OCTAL)
1300                     {
1301                       token = G_TOKEN_ERROR;
1302                       if (token == G_TOKEN_FLOAT)
1303                         value.v_error = G_ERR_FLOAT_MALFORMED;
1304                       else
1305                         value.v_error = G_ERR_FLOAT_RADIX;
1306                       in_number = FALSE;
1307                     }
1308                   else
1309                     {
1310                       token = G_TOKEN_FLOAT;
1311                       gstring = g_string_append_c (gstring, ch);
1312                     }
1313                   break;
1314                   
1315                 case    '0':
1316                 case  '1':
1317                 case  '2':
1318                 case  '3':
1319                 case  '4':
1320                 case  '5':
1321                 case  '6':
1322                 case  '7':
1323                 case  '8':
1324                 case  '9':
1325                   gstring = g_string_append_c (gstring, ch);
1326                   break;
1327                   
1328                 case    '-':
1329                 case    '+':
1330                   if (token != G_TOKEN_FLOAT)
1331                     {
1332                       token = G_TOKEN_ERROR;
1333                       value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1334                       in_number = FALSE;
1335                     }
1336                   else
1337                     gstring = g_string_append_c (gstring, ch);
1338                   break;
1339                   
1340                 case    'e':
1341                 case    'E':
1342                   if ((token != G_TOKEN_HEX && !config->scan_float) ||
1343                       (token != G_TOKEN_HEX &&
1344                        token != G_TOKEN_OCTAL &&
1345                        token != G_TOKEN_FLOAT &&
1346                        token != G_TOKEN_INT))
1347                     {
1348                       token = G_TOKEN_ERROR;
1349                       value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1350                       in_number = FALSE;
1351                     }
1352                   else
1353                     {
1354                       if (token != G_TOKEN_HEX)
1355                         token = G_TOKEN_FLOAT;
1356                       gstring = g_string_append_c (gstring, ch);
1357                     }
1358                   break;
1359                   
1360                 default:
1361                   if (token != G_TOKEN_HEX)
1362                     {
1363                       token = G_TOKEN_ERROR;
1364                       value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1365                       in_number = FALSE;
1366                     }
1367                   else
1368                     gstring = g_string_append_c (gstring, ch);
1369                   break;
1370                 }
1371           }
1372         endptr = NULL;
1373         switch (token)
1374           {
1375           case  G_TOKEN_BINARY:
1376             value.v_binary = strtol (gstring->str, &endptr, 2);
1377             break;
1378             
1379           case  G_TOKEN_OCTAL:
1380             value.v_octal = strtol (gstring->str, &endptr, 8);
1381             break;
1382             
1383           case  G_TOKEN_INT:
1384             value.v_int = strtol (gstring->str, &endptr, 10);
1385             break;
1386             
1387           case  G_TOKEN_FLOAT:
1388             value.v_float = g_strtod (gstring->str, &endptr);
1389             break;
1390             
1391           case  G_TOKEN_HEX:
1392             value.v_hex = strtol (gstring->str, &endptr, 16);
1393             break;
1394             
1395           default:
1396             break;
1397           }
1398         if (endptr && *endptr)
1399           {
1400             token = G_TOKEN_ERROR;
1401             if (*endptr == 'e' || *endptr == 'E')
1402               value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1403             else
1404               value.v_error = G_ERR_DIGIT_RADIX;
1405           }
1406         g_string_free (gstring, TRUE);
1407         gstring = NULL;
1408         ch = 0;
1409         break;
1410         
1411         default:
1412         default_case:
1413         if (config->cpair_comment_single &&
1414             ch == config->cpair_comment_single[0])
1415           {
1416             token = G_TOKEN_COMMENT_SINGLE;
1417             in_comment_single = TRUE;
1418             gstring = g_string_new ("");
1419             while ((ch = g_scanner_get_char (scanner,
1420                                              line_p,
1421                                              position_p)) != 0)
1422               {
1423                 if (ch == config->cpair_comment_single[1])
1424                   {
1425                     in_comment_single = FALSE;
1426                     ch = 0;
1427                     break;
1428                   }
1429                 
1430                 gstring = g_string_append_c (gstring, ch);
1431                 ch = 0;
1432               }
1433           }
1434         else if (config->scan_identifier && ch &&
1435                  strchr (config->cset_identifier_first, ch))
1436           {
1437           identifier_precedence:
1438             
1439             if (config->cset_identifier_nth && ch &&
1440                 strchr (config->cset_identifier_nth,
1441                         g_scanner_peek_next_char (scanner)))
1442               {
1443                 token = G_TOKEN_IDENTIFIER;
1444                 gstring = g_string_new ("");
1445                 gstring = g_string_append_c (gstring, ch);
1446                 do
1447                   {
1448                     ch = g_scanner_get_char (scanner, line_p, position_p);
1449                     gstring = g_string_append_c (gstring, ch);
1450                     ch = g_scanner_peek_next_char (scanner);
1451                   }
1452                 while (ch && strchr (config->cset_identifier_nth, ch));
1453                 ch = 0;
1454               }
1455             else if (config->scan_identifier_1char)
1456               {
1457                 token = G_TOKEN_IDENTIFIER;
1458                 value.v_identifier = g_new0 (gchar, 2);
1459                 value.v_identifier[0] = ch;
1460                 ch = 0;
1461               }
1462           }
1463         if (ch)
1464           {
1465             if (config->char_2_token)
1466               token = ch;
1467             else
1468               {
1469                 token = G_TOKEN_CHAR;
1470                 value.v_char = ch;
1471               }
1472             ch = 0;
1473           }
1474         break;
1475         }
1476       g_assert (ch == 0 && token != G_TOKEN_NONE);
1477     }
1478   while (ch != 0);
1479   
1480   if (in_comment_multi ||
1481       in_comment_single ||
1482       in_string_sq ||
1483       in_string_dq)
1484     {
1485       token = G_TOKEN_ERROR;
1486       if (gstring)
1487         {
1488           g_string_free (gstring, TRUE);
1489           gstring = NULL;
1490         }
1491       (*position_p)++;
1492       if (in_comment_multi || in_comment_single)
1493         value.v_error = G_ERR_UNEXP_EOF_IN_COMMENT;
1494       else if (in_string_sq || in_string_dq)
1495         value.v_error = G_ERR_UNEXP_EOF_IN_STRING;
1496     }
1497   
1498   if (gstring)
1499     {
1500       value.v_string = gstring->str;
1501       g_string_free (gstring, FALSE);
1502       gstring = NULL;
1503     }
1504   
1505   if (token == G_TOKEN_IDENTIFIER &&
1506       config->scan_symbols)
1507     {
1508       register GScannerHashVal  *hash_val;
1509       
1510       hash_val = g_scanner_lookup_internal (scanner, value.v_identifier);
1511       
1512       if (hash_val)
1513         {
1514           g_free (value.v_identifier);
1515           token = G_TOKEN_SYMBOL;
1516           value.v_symbol = hash_val->value;
1517         }
1518     }
1519
1520   if (token == G_TOKEN_IDENTIFIER &&
1521       config->scan_identifier_NULL &&
1522       strlen (value.v_identifier) == 4)
1523     {
1524       gchar *null_upper = "NULL";
1525       gchar *null_lower = "null";
1526       
1527       if (scanner->config->case_sensitive)
1528         {
1529           if (value.v_identifier[0] == null_upper[0] &&
1530               value.v_identifier[1] == null_upper[1] &&
1531               value.v_identifier[2] == null_upper[2] &&
1532               value.v_identifier[3] == null_upper[3])
1533             token = G_TOKEN_IDENTIFIER_NULL;
1534         }
1535       else
1536         {
1537           if ((value.v_identifier[0] == null_upper[0] ||
1538                value.v_identifier[0] == null_lower[0]) &&
1539               (value.v_identifier[1] == null_upper[1] ||
1540                value.v_identifier[1] == null_lower[1]) &&
1541               (value.v_identifier[2] == null_upper[2] ||
1542                value.v_identifier[2] == null_lower[2]) &&
1543               (value.v_identifier[3] == null_upper[3] ||
1544                value.v_identifier[3] == null_lower[3]))
1545             token = G_TOKEN_IDENTIFIER_NULL;
1546         }
1547     }
1548   
1549   *token_p = token;
1550   *value_p = value;
1551 }