2 Copyright (C) Andrew Tridgell 2002
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 the idea is that changes that don't affect the resulting C code
22 should not change the hash. This is achieved by folding white-space
23 and other non-semantic fluff in the input into a single unified format.
25 This unifier was design to match the output of the unifier in
26 compilercache, which is flex based. The major difference is that
27 this unifier is much faster (about 2x) and more forgiving of
28 syntactic errors. Continuing on syntactic errors is important to
29 cope with C/C++ extensions in the local compiler (for example,
30 inline assembly systems).
35 static char *s_tokens[] = {
36 "...", ">>=", "<<=", "+=", "-=", "*=", "/=", "%=", "&=", "^=",
37 "|=", ">>", "<<", "++", "--", "->", "&&", "||", "<=", ">=",
38 "==", "!=", ";", "{", "<%", "}", "%>", ",", ":", "=",
39 "(", ")", "[", "<:", "]", ":>", ".", "&", "!", "~",
40 "-", "+", "*", "/", "%", "<", ">", "^", "|", "?",
55 unsigned char num_toks;
59 /* build up the table used by the unifier */
60 static void build_table(void)
69 memset(tokens, 0, sizeof(tokens));
71 if (isalpha(c) || c == '_') tokens[c].type |= C_ALPHA;
72 if (isdigit(c)) tokens[c].type |= C_DIGIT;
73 if (isspace(c)) tokens[c].type |= C_SPACE;
74 if (isxdigit(c)) tokens[c].type |= C_HEX;
76 tokens['\''].type |= C_QUOTE;
77 tokens['"'].type |= C_QUOTE;
78 tokens['l'].type |= C_FLOAT;
79 tokens['L'].type |= C_FLOAT;
80 tokens['f'].type |= C_FLOAT;
81 tokens['F'].type |= C_FLOAT;
82 tokens['U'].type |= C_FLOAT;
83 tokens['u'].type |= C_FLOAT;
85 tokens['-'].type |= C_SIGN;
86 tokens['+'].type |= C_SIGN;
88 for (i=0;s_tokens[i];i++) {
90 tokens[c].type |= C_TOKEN;
91 tokens[c].toks[tokens[c].num_toks] = s_tokens[i];
96 /* buffer up characters before hashing them */
97 static void pushchar(unsigned char c)
99 static unsigned char buf[64];
104 hash_buffer((char *)buf, len);
107 hash_buffer(NULL, 0);
113 hash_buffer((char *)buf, len);
118 /* hash some C/C++ code after unifying */
119 static void unify(unsigned char *p, size_t size)
127 for (ofs=0; ofs<size;) {
129 if ((size-ofs) > 2 && p[ofs+1] == ' ' && isdigit(p[ofs+2])) {
132 } while (ofs < size && p[ofs] != '\n');
138 } while (ofs < size && p[ofs] != '\n');
145 if (tokens[p[ofs]].type & C_ALPHA) {
149 } while (ofs < size &&
150 (tokens[p[ofs]].type & (C_ALPHA|C_DIGIT)));
155 if (tokens[p[ofs]].type & C_DIGIT) {
159 } while (ofs < size &&
160 ((tokens[p[ofs]].type & C_DIGIT) || p[ofs] == '.'));
161 if (ofs < size && (p[ofs] == 'x' || p[ofs] == 'X')) {
165 } while (ofs < size && (tokens[p[ofs]].type & C_HEX));
167 if (ofs < size && (p[ofs] == 'E' || p[ofs] == 'e')) {
171 (tokens[p[ofs]].type & (C_DIGIT|C_SIGN))) {
176 while (ofs < size && (tokens[p[ofs]].type & C_FLOAT)) {
184 if (tokens[p[ofs]].type & C_SPACE) {
187 } while (ofs < size && (tokens[p[ofs]].type & C_SPACE));
191 if (tokens[p[ofs]].type & C_QUOTE) {
196 while (ofs < size-1 && p[ofs] == '\\') {
202 } while (ofs < size && p[ofs] != q);
208 if (tokens[p[ofs]].type & C_TOKEN) {
210 for (i=0;i<tokens[q].num_toks;i++) {
211 unsigned char *s = (unsigned char *)tokens[q].toks[i];
212 int len = strlen((char *)s);
213 if (size >= ofs+len && memcmp(&p[ofs], s, len) == 0) {
223 if (i < tokens[q].num_toks) {
236 /* hash a file that consists of preprocessor output, but remove any line
237 number information from the hash
239 int unify_hash(const char *fname)
248 file = CreateFileA(fname, GENERIC_READ, FILE_SHARE_READ, NULL,
249 OPEN_EXISTING, 0, NULL);
250 if (file != INVALID_HANDLE_VALUE) {
251 filesize_low = GetFileSize(file, NULL);
252 if (!(filesize_low == INVALID_FILE_SIZE && GetLastError() != NO_ERROR)) {
253 section = CreateFileMappingA(file, NULL, PAGE_READONLY, 0, 0, NULL);
255 if (section != NULL) {
256 map = MapViewOfFile(section, FILE_MAP_READ, 0, 0, 0);
257 CloseHandle(section);
265 cc_log("Failed to open preprocessor output %s\n", fname);
266 stats_update(STATS_PREPROCESSOR);
270 /* pass it through the unifier */
271 unify((unsigned char *)map, filesize_low);
273 UnmapViewOfFile(map);
281 fd = open(fname, O_RDONLY|O_BINARY);
282 if (fd == -1 || fstat(fd, &st) != 0) {
283 cc_log("Failed to open preprocessor output %s\n", fname);
284 stats_update(STATS_PREPROCESSOR);
288 /* we use mmap() to make it easy to handle arbitrarily long
289 lines in preprocessor output. I have seen lines of over
290 100k in length, so this is well worth it */
291 map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
292 if (map == (char *)-1) {
293 cc_log("Failed to mmap %s\n", fname);
294 stats_update(STATS_PREPROCESSOR);
299 /* pass it through the unifier */
300 unify((unsigned char *)map, st.st_size);
302 munmap(map, st.st_size);