import source from 1.3.40
[external/swig.git] / CCache / unify.c
1 /*
2    Copyright (C) Andrew Tridgell 2002
3    
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 2 of the License, or
7    (at your option) any later version.
8    
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13    
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18 /*
19   C/C++ unifier
20
21   the idea is that changes that don't affect the resulting C code
22   should not change the hash. This is achieved by folding white-space
23   and other non-semantic fluff in the input into a single unified format.
24
25   This unifier was design to match the output of the unifier in
26   compilercache, which is flex based. The major difference is that
27   this unifier is much faster (about 2x) and more forgiving of
28   syntactic errors. Continuing on syntactic errors is important to
29   cope with C/C++ extensions in the local compiler (for example,
30   inline assembly systems).  
31 */
32
33 #include "ccache.h"
34
35 static char *s_tokens[] = {
36         "...",  ">>=",  "<<=",  "+=",   "-=",   "*=",   "/=",   "%=",   "&=",   "^=",
37         "|=",   ">>",   "<<",   "++",   "--",   "->",   "&&",   "||",   "<=",   ">=",
38         "==",   "!=",   ";",    "{",    "<%",   "}",    "%>",   ",",    ":",    "=",
39         "(",    ")",    "[",    "<:",   "]",    ":>",   ".",    "&",    "!",    "~",
40         "-",    "+",    "*",    "/",    "%",    "<",    ">",    "^",    "|",    "?",
41         0
42 };
43
44 #define C_ALPHA 1
45 #define C_SPACE 2
46 #define C_TOKEN 4
47 #define C_QUOTE 8
48 #define C_DIGIT 16
49 #define C_HEX   32
50 #define C_FLOAT 64
51 #define C_SIGN  128
52
53 static struct {
54         unsigned char type;
55         unsigned char num_toks;
56         char *toks[7];
57 } tokens[256];
58
59 /* build up the table used by the unifier */
60 static void build_table(void)
61 {
62         unsigned char c;
63         int i;
64         static int done;
65
66         if (done) return;
67         done = 1;
68
69         memset(tokens, 0, sizeof(tokens));
70         for (c=0;c<128;c++) {
71                 if (isalpha(c) || c == '_') tokens[c].type |= C_ALPHA;
72                 if (isdigit(c)) tokens[c].type |= C_DIGIT;
73                 if (isspace(c)) tokens[c].type |= C_SPACE;
74                 if (isxdigit(c)) tokens[c].type |= C_HEX;
75         }
76         tokens['\''].type |= C_QUOTE;
77         tokens['"'].type |= C_QUOTE;
78         tokens['l'].type |= C_FLOAT;
79         tokens['L'].type |= C_FLOAT;
80         tokens['f'].type |= C_FLOAT;
81         tokens['F'].type |= C_FLOAT;
82         tokens['U'].type |= C_FLOAT;
83         tokens['u'].type |= C_FLOAT;
84
85         tokens['-'].type |= C_SIGN;
86         tokens['+'].type |= C_SIGN;
87
88         for (i=0;s_tokens[i];i++) {
89                 c = s_tokens[i][0];
90                 tokens[c].type |= C_TOKEN;
91                 tokens[c].toks[tokens[c].num_toks] = s_tokens[i];
92                 tokens[c].num_toks++;
93         }
94 }
95
96 /* buffer up characters before hashing them */
97 static void pushchar(unsigned char c)
98 {
99         static unsigned char buf[64];
100         static int len;
101
102         if (c == 0) {
103                 if (len > 0) {
104                         hash_buffer((char *)buf, len);
105                         len = 0;
106                 }
107                 hash_buffer(NULL, 0);
108                 return;
109         }
110
111         buf[len++] = c;
112         if (len == 64) {
113                 hash_buffer((char *)buf, len);
114                 len = 0;
115         }
116 }
117
118 /* hash some C/C++ code after unifying */
119 static void unify(unsigned char *p, size_t size)
120 {
121         size_t ofs;
122         unsigned char q;
123         int i;
124
125         build_table();
126
127         for (ofs=0; ofs<size;) {
128                 if (p[ofs] == '#') {
129                         if ((size-ofs) > 2 && p[ofs+1] == ' ' && isdigit(p[ofs+2])) {
130                                 do {
131                                         ofs++;
132                                 } while (ofs < size && p[ofs] != '\n');
133                                 ofs++;
134                         } else {
135                                 do {
136                                         pushchar(p[ofs]);
137                                         ofs++;
138                                 } while (ofs < size && p[ofs] != '\n');
139                                 pushchar('\n');
140                                 ofs++;
141                         }
142                         continue;
143                 }
144
145                 if (tokens[p[ofs]].type & C_ALPHA) {
146                         do {
147                                 pushchar(p[ofs]);
148                                 ofs++;
149                         } while (ofs < size && 
150                                  (tokens[p[ofs]].type & (C_ALPHA|C_DIGIT)));
151                         pushchar('\n');
152                         continue;
153                 }
154
155                 if (tokens[p[ofs]].type & C_DIGIT) {
156                         do {
157                                 pushchar(p[ofs]);
158                                 ofs++;
159                         } while (ofs < size && 
160                                  ((tokens[p[ofs]].type & C_DIGIT) || p[ofs] == '.'));
161                         if (ofs < size && (p[ofs] == 'x' || p[ofs] == 'X')) {
162                                 do {
163                                         pushchar(p[ofs]);
164                                         ofs++;
165                                 } while (ofs < size && (tokens[p[ofs]].type & C_HEX));
166                         }
167                         if (ofs < size && (p[ofs] == 'E' || p[ofs] == 'e')) {
168                                 pushchar(p[ofs]);
169                                 ofs++;
170                                 while (ofs < size && 
171                                        (tokens[p[ofs]].type & (C_DIGIT|C_SIGN))) {
172                                         pushchar(p[ofs]);
173                                         ofs++;
174                                 }
175                         }
176                         while (ofs < size && (tokens[p[ofs]].type & C_FLOAT)) {
177                                 pushchar(p[ofs]);
178                                 ofs++;
179                         }
180                         pushchar('\n');
181                         continue;
182                 }
183
184                 if (tokens[p[ofs]].type & C_SPACE) {
185                         do {
186                                 ofs++;
187                         } while (ofs < size && (tokens[p[ofs]].type & C_SPACE));
188                         continue;
189                 }
190                         
191                 if (tokens[p[ofs]].type & C_QUOTE) {
192                         q = p[ofs];
193                         pushchar(p[ofs]);
194                         do {
195                                 ofs++;
196                                 while (ofs < size-1 && p[ofs] == '\\') {
197                                         pushchar(p[ofs]);
198                                         pushchar(p[ofs+1]);
199                                         ofs+=2;
200                                 }
201                                 pushchar(p[ofs]);
202                         } while (ofs < size && p[ofs] != q);
203                         pushchar('\n');
204                         ofs++;
205                         continue;
206                 }
207
208                 if (tokens[p[ofs]].type & C_TOKEN) {
209                         q = p[ofs];
210                         for (i=0;i<tokens[q].num_toks;i++) {
211                                 unsigned char *s = (unsigned char *)tokens[q].toks[i];
212                                 int len = strlen((char *)s);
213                                 if (size >= ofs+len && memcmp(&p[ofs], s, len) == 0) {
214                                         int j;
215                                         for (j=0;s[j];j++) {
216                                                 pushchar(s[j]);
217                                                 ofs++;
218                                         }
219                                         pushchar('\n');
220                                         break;
221                                 }
222                         }
223                         if (i < tokens[q].num_toks) {
224                                 continue;
225                         }
226                 }
227
228                 pushchar(p[ofs]);
229                 pushchar('\n');
230                 ofs++;
231         }
232         pushchar(0);
233 }
234
235
236 /* hash a file that consists of preprocessor output, but remove any line 
237    number information from the hash
238 */
239 int unify_hash(const char *fname)
240 {
241 #ifdef _WIN32
242         HANDLE file;
243         HANDLE section;
244         DWORD filesize_low;
245         char *map;
246         int ret = -1;
247
248         file = CreateFileA(fname, GENERIC_READ, FILE_SHARE_READ, NULL,
249                            OPEN_EXISTING, 0, NULL);
250         if (file != INVALID_HANDLE_VALUE) {
251                 filesize_low = GetFileSize(file, NULL);
252                 if (!(filesize_low == INVALID_FILE_SIZE && GetLastError() != NO_ERROR)) {
253                         section = CreateFileMappingA(file, NULL, PAGE_READONLY, 0, 0, NULL);
254                         CloseHandle(file);
255                         if (section != NULL) {
256                                 map = MapViewOfFile(section, FILE_MAP_READ, 0, 0, 0);
257                                 CloseHandle(section);
258                                 if (map != NULL)
259                                         ret = 0;
260                         }
261                 }
262         }
263
264         if (ret == -1) {
265                 cc_log("Failed to open preprocessor output %s\n", fname);
266                 stats_update(STATS_PREPROCESSOR);
267                 return -1;
268         }
269
270         /* pass it through the unifier */
271         unify((unsigned char *)map, filesize_low);
272
273         UnmapViewOfFile(map);
274
275         return 0;
276 #else
277         int fd;
278         struct stat st; 
279         char *map;
280
281         fd = open(fname, O_RDONLY|O_BINARY);
282         if (fd == -1 || fstat(fd, &st) != 0) {
283                 cc_log("Failed to open preprocessor output %s\n", fname);
284                 stats_update(STATS_PREPROCESSOR);
285                 return -1;
286         }
287
288         /* we use mmap() to make it easy to handle arbitrarily long
289            lines in preprocessor output. I have seen lines of over
290            100k in length, so this is well worth it */
291         map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
292         if (map == (char *)-1) {
293                 cc_log("Failed to mmap %s\n", fname);
294                 stats_update(STATS_PREPROCESSOR);
295                 return -1;
296         }
297         close(fd);
298
299         /* pass it through the unifier */
300         unify((unsigned char *)map, st.st_size);
301
302         munmap(map, st.st_size);
303
304         return 0;
305 #endif
306 }
307