resetting manifest requested domain to floor
[platform/upstream/ccache.git] / unify.c
1 /*
2  * Copyright (C) 2002 Andrew Tridgell
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of the GNU General Public License as published by the Free
6  * Software Foundation; either version 3 of the License, or (at your option)
7  * any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 /*
20  * C/C++ unifier
21  *
22  * The idea is that changes that don't affect the resulting C code should not
23  * change the hash. This is achieved by folding white-space and other
24  * non-semantic fluff in the input into a single unified format.
25  *
26  * This unifier was design to match the output of the unifier in compilercache,
27  * which is flex based. The major difference is that this unifier is much
28  * faster (about 2x) and more forgiving of syntactic errors. Continuing on
29  * syntactic errors is important to cope with C/C++ extensions in the local
30  * compiler (for example, inline assembly systems).
31  */
32
33 #include "ccache.h"
34
35 static const char *const s_tokens[] = {
36         "...", ">>=", "<<=", "+=", "-=", "*=", "/=", "%=", "&=", "^=",
37         "|=",  ">>",  "<<",  "++", "--", "->", "&&", "||", "<=", ">=",
38         "==",  "!=",  ";",   "{",  "<%", "}",  "%>", ",",  ":",  "=",
39         "(",   ")",   "[",   "<:", "]",  ":>", ".",  "&",  "!",  "~",
40         "-",   "+",   "*",   "/",  "%",  "<",  ">",  "^",  "|",  "?",
41         0
42 };
43
44 #define C_ALPHA 1
45 #define C_SPACE 2
46 #define C_TOKEN 4
47 #define C_QUOTE 8
48 #define C_DIGIT 16
49 #define C_HEX   32
50 #define C_FLOAT 64
51 #define C_SIGN  128
52
53 static struct {
54         unsigned char type;
55         unsigned char num_toks;
56         const char *toks[7];
57 } tokens[256];
58
59 /* build up the table used by the unifier */
60 static void
61 build_table(void)
62 {
63         unsigned char c;
64         int i;
65         static bool done;
66
67         if (done) return;
68         done = true;
69
70         memset(tokens, 0, sizeof(tokens));
71         for (c = 0; c < 128; c++) {
72                 if (isalpha(c) || c == '_') tokens[c].type |= C_ALPHA;
73                 if (isdigit(c)) tokens[c].type |= C_DIGIT;
74                 if (isspace(c)) tokens[c].type |= C_SPACE;
75                 if (isxdigit(c)) tokens[c].type |= C_HEX;
76         }
77         tokens['\''].type |= C_QUOTE;
78         tokens['"'].type |= C_QUOTE;
79         tokens['l'].type |= C_FLOAT;
80         tokens['L'].type |= C_FLOAT;
81         tokens['f'].type |= C_FLOAT;
82         tokens['F'].type |= C_FLOAT;
83         tokens['U'].type |= C_FLOAT;
84         tokens['u'].type |= C_FLOAT;
85
86         tokens['-'].type |= C_SIGN;
87         tokens['+'].type |= C_SIGN;
88
89         for (i = 0; s_tokens[i]; i++) {
90                 c = s_tokens[i][0];
91                 tokens[c].type |= C_TOKEN;
92                 tokens[c].toks[tokens[c].num_toks] = s_tokens[i];
93                 tokens[c].num_toks++;
94         }
95 }
96
97 /* buffer up characters before hashing them */
98 static void
99 pushchar(struct mdfour *hash, unsigned char c)
100 {
101         static unsigned char buf[64];
102         static size_t len;
103
104         if (c == 0) {
105                 if (len > 0) {
106                         hash_buffer(hash, (char *)buf, len);
107                         len = 0;
108                 }
109                 hash_buffer(hash, NULL, 0);
110                 return;
111         }
112
113         buf[len++] = c;
114         if (len == 64) {
115                 hash_buffer(hash, (char *)buf, len);
116                 len = 0;
117         }
118 }
119
120 /* hash some C/C++ code after unifying */
121 static void
122 unify(struct mdfour *hash, unsigned char *p, size_t size)
123 {
124         size_t ofs;
125         unsigned char q;
126         int i;
127
128         build_table();
129
130         for (ofs = 0; ofs < size;) {
131                 if (p[ofs] == '#') {
132                         if ((size-ofs) > 2 && p[ofs+1] == ' ' && isdigit(p[ofs+2])) {
133                                 do {
134                                         ofs++;
135                                 } while (ofs < size && p[ofs] != '\n');
136                                 ofs++;
137                         } else {
138                                 do {
139                                         pushchar(hash, p[ofs]);
140                                         ofs++;
141                                 } while (ofs < size && p[ofs] != '\n');
142                                 pushchar(hash, '\n');
143                                 ofs++;
144                         }
145                         continue;
146                 }
147
148                 if (tokens[p[ofs]].type & C_ALPHA) {
149                         do {
150                                 pushchar(hash, p[ofs]);
151                                 ofs++;
152                         } while (ofs < size && (tokens[p[ofs]].type & (C_ALPHA|C_DIGIT)));
153                         pushchar(hash, '\n');
154                         continue;
155                 }
156
157                 if (tokens[p[ofs]].type & C_DIGIT) {
158                         do {
159                                 pushchar(hash, p[ofs]);
160                                 ofs++;
161                         } while (ofs < size &&
162                                  ((tokens[p[ofs]].type & C_DIGIT) || p[ofs] == '.'));
163                         if (ofs < size && (p[ofs] == 'x' || p[ofs] == 'X')) {
164                                 do {
165                                         pushchar(hash, p[ofs]);
166                                         ofs++;
167                                 } while (ofs < size && (tokens[p[ofs]].type & C_HEX));
168                         }
169                         if (ofs < size && (p[ofs] == 'E' || p[ofs] == 'e')) {
170                                 pushchar(hash, p[ofs]);
171                                 ofs++;
172                                 while (ofs < size && (tokens[p[ofs]].type & (C_DIGIT|C_SIGN))) {
173                                         pushchar(hash, p[ofs]);
174                                         ofs++;
175                                 }
176                         }
177                         while (ofs < size && (tokens[p[ofs]].type & C_FLOAT)) {
178                                 pushchar(hash, p[ofs]);
179                                 ofs++;
180                         }
181                         pushchar(hash, '\n');
182                         continue;
183                 }
184
185                 if (tokens[p[ofs]].type & C_SPACE) {
186                         do {
187                                 ofs++;
188                         } while (ofs < size && (tokens[p[ofs]].type & C_SPACE));
189                         continue;
190                 }
191
192                 if (tokens[p[ofs]].type & C_QUOTE) {
193                         q = p[ofs];
194                         pushchar(hash, p[ofs]);
195                         do {
196                                 ofs++;
197                                 while (ofs < size-1 && p[ofs] == '\\') {
198                                         pushchar(hash, p[ofs]);
199                                         pushchar(hash, p[ofs+1]);
200                                         ofs += 2;
201                                 }
202                                 pushchar(hash, p[ofs]);
203                         } while (ofs < size && p[ofs] != q);
204                         pushchar(hash, '\n');
205                         ofs++;
206                         continue;
207                 }
208
209                 if (tokens[p[ofs]].type & C_TOKEN) {
210                         q = p[ofs];
211                         for (i = 0; i < tokens[q].num_toks; i++) {
212                                 unsigned char *s = (unsigned char *)tokens[q].toks[i];
213                                 int len = strlen((char *)s);
214                                 if (size >= ofs+len && memcmp(&p[ofs], s, len) == 0) {
215                                         int j;
216                                         for (j = 0; s[j]; j++) {
217                                                 pushchar(hash, s[j]);
218                                                 ofs++;
219                                         }
220                                         pushchar(hash, '\n');
221                                         break;
222                                 }
223                         }
224                         if (i < tokens[q].num_toks) {
225                                 continue;
226                         }
227                 }
228
229                 pushchar(hash, p[ofs]);
230                 pushchar(hash, '\n');
231                 ofs++;
232         }
233         pushchar(hash, 0);
234 }
235
236
237 /* hash a file that consists of preprocessor output, but remove any line
238    number information from the hash
239 */
240 int
241 unify_hash(struct mdfour *hash, const char *fname)
242 {
243         char *data;
244         size_t size;
245
246         if (!read_file(fname, 0, &data, &size)) {
247                 stats_update(STATS_PREPROCESSOR);
248                 return -1;
249         }
250         unify(hash, (unsigned char *)data, size);
251         free(data);
252         return 0;
253 }