1 // Copyright (C) 2009-2016 Joel Rosdahl
3 // This program is free software; you can redistribute it and/or modify it
4 // under the terms of the GNU General Public License as published by the Free
5 // Software Foundation; either version 3 of the License, or (at your option)
8 // This program is distributed in the hope that it will be useful, but WITHOUT
9 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 // You should have received a copy of the GNU General Public License along with
14 // this program; if not, write to the Free Software Foundation, Inc., 51
15 // Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 #include "hashtable_itr.h"
21 #include "murmurhashneutral2.h"
25 // Sketchy specification of the manifest disk format:
27 // <magic> magic number (4 bytes)
28 // <version> file format version (1 byte unsigned int)
29 // <hash_size> size of the hash fields (in bytes) (1 byte unsigned int)
30 // <reserved> reserved for future use (2 bytes)
31 // ----------------------------------------------------------------------------
32 // <n> number of include file paths (4 bytes unsigned int)
33 // <path_0> path to include file (NUL-terminated string,
34 // ... at most 1024 bytes)
36 // ----------------------------------------------------------------------------
37 // <n> number of include file hash entries (4 bytes unsigned int)
38 // <index[0]> index of include file path (4 bytes unsigned int)
39 // <hash[0]> hash of include file (<hash_size> bytes)
40 // <size[0]> size of include file (4 bytes unsigned int)
41 // <mtime[0]> mtime of include file (8 bytes signed int)
42 // <ctime[0]> ctime of include file (8 bytes signed int)
49 // ----------------------------------------------------------------------------
50 // <n> number of object name entries (4 bytes unsigned int)
51 // <m[0]> number of include file hash indexes (4 bytes unsigned int)
52 // <index[0][0]> include file hash index (4 bytes unsigned int)
55 // <hash[0]> hash part of object name (<hash_size> bytes)
56 // <size[0]> size part of object name (4 bytes unsigned int)
58 // <m[n-1]> number of include file hash indexes
59 // <index[n-1][0]> include file hash index
61 // <index[n-1][m[n-1]]>
65 static const uint32_t MAGIC = 0x63436d46U;
66 static const uint32_t MAX_MANIFEST_ENTRIES = 100;
67 static const uint32_t MAX_MANIFEST_FILE_INFO_ENTRIES = 10000;
69 #define ccache_static_assert(e) \
70 do { enum { ccache_static_assert__ = 1/(e) }; } while (false)
75 // Hash of referenced file.
77 // Size of referenced file.
79 // mtime of referenced file.
81 // ctime of referenced file.
86 // Number of entries in file_info_indexes.
87 uint32_t n_file_info_indexes;
88 // Indexes to file_infos.
89 uint32_t *file_info_indexes;
90 // Hash of the object itself.
91 struct file_hash hash;
95 // Version of decoded file.
98 // Reserved for future use.
101 // Size of hash fields (in bytes).
104 // Referenced include files.
108 // Information about referenced include files.
109 uint32_t n_file_infos;
110 struct file_info *file_infos;
112 // Object names plus references to include file hashes.
114 struct object *objects;
124 hash_from_file_info(void *key)
126 ccache_static_assert(sizeof(struct file_info) == 40); // No padding.
127 return murmurhashneutral2(key, sizeof(struct file_info), 0);
131 file_infos_equal(void *key1, void *key2)
133 struct file_info *fi1 = (struct file_info *)key1;
134 struct file_info *fi2 = (struct file_info *)key2;
135 return fi1->index == fi2->index
136 && memcmp(fi1->hash, fi2->hash, 16) == 0
137 && fi1->size == fi2->size
138 && fi1->mtime == fi2->mtime
139 && fi1->ctime == fi2->ctime;
143 free_manifest(struct manifest *mf)
145 for (uint32_t i = 0; i < mf->n_files; i++) {
149 free(mf->file_infos);
150 for (uint32_t i = 0; i < mf->n_objects; i++) {
151 free(mf->objects[i].file_info_indexes);
157 #define READ_BYTE(var) \
159 int ch_ = gzgetc(f); \
163 (var) = ch_ & 0xFF; \
166 #define READ_INT(size, var) \
169 for (size_t i_ = 0; i_ < (size); i_++) { \
170 int ch_ = gzgetc(f); \
175 (var) |= ch_ & 0xFF; \
179 #define READ_STR(var) \
183 for (i_ = 0; i_ < sizeof(buf_); i_++) { \
184 int ch_ = gzgetc(f); \
193 if (i_ == sizeof(buf_)) { \
196 (var) = x_strdup(buf_); \
199 #define READ_BYTES(n, var) \
201 for (size_t i_ = 0; i_ < (n); i_++) { \
202 int ch_ = gzgetc(f); \
210 static struct manifest *
211 create_empty_manifest(void)
213 struct manifest *mf = x_malloc(sizeof(*mf));
217 mf->n_file_infos = 0;
218 mf->file_infos = NULL;
225 static struct manifest *
226 read_manifest(gzFile f)
228 struct manifest *mf = create_empty_manifest();
232 if (magic != MAGIC) {
233 cc_log("Manifest file has bad magic number %u", magic);
237 READ_BYTE(mf->version);
238 if (mf->version != MANIFEST_VERSION) {
239 cc_log("Manifest file has unknown version %u", mf->version);
243 READ_BYTE(mf->hash_size);
244 if (mf->hash_size != 16) {
245 // Temporary measure until we support different hash algorithms.
246 cc_log("Manifest file has unsupported hash size %u", mf->hash_size);
250 READ_INT(2, mf->reserved);
252 READ_INT(4, mf->n_files);
253 mf->files = x_calloc(mf->n_files, sizeof(*mf->files));
254 for (uint32_t i = 0; i < mf->n_files; i++) {
255 READ_STR(mf->files[i]);
258 READ_INT(4, mf->n_file_infos);
259 mf->file_infos = x_calloc(mf->n_file_infos, sizeof(*mf->file_infos));
260 for (uint32_t i = 0; i < mf->n_file_infos; i++) {
261 READ_INT(4, mf->file_infos[i].index);
262 READ_BYTES(mf->hash_size, mf->file_infos[i].hash);
263 READ_INT(4, mf->file_infos[i].size);
264 READ_INT(8, mf->file_infos[i].mtime);
265 READ_INT(8, mf->file_infos[i].ctime);
268 READ_INT(4, mf->n_objects);
269 mf->objects = x_calloc(mf->n_objects, sizeof(*mf->objects));
270 for (uint32_t i = 0; i < mf->n_objects; i++) {
271 READ_INT(4, mf->objects[i].n_file_info_indexes);
272 mf->objects[i].file_info_indexes =
273 x_calloc(mf->objects[i].n_file_info_indexes,
274 sizeof(*mf->objects[i].file_info_indexes));
275 for (uint32_t j = 0; j < mf->objects[i].n_file_info_indexes; j++) {
276 READ_INT(4, mf->objects[i].file_info_indexes[j]);
278 READ_BYTES(mf->hash_size, mf->objects[i].hash.hash);
279 READ_INT(4, mf->objects[i].hash.size);
285 cc_log("Corrupt manifest file");
290 #define WRITE_INT(size, var) \
294 for (i_ = 0; i_ < (size); i_++) { \
295 ch_ = ((var) >> (8 * ((size) - i_ - 1))); \
296 if (gzputc(f, ch_) == EOF) { \
302 #define WRITE_STR(var) \
304 if (gzputs(f, var) == EOF || gzputc(f, '\0') == EOF) { \
309 #define WRITE_BYTES(n, var) \
312 for (i_ = 0; i_ < (n); i_++) { \
313 if (gzputc(f, (var)[i_]) == EOF) { \
320 write_manifest(gzFile f, const struct manifest *mf)
323 WRITE_INT(1, MANIFEST_VERSION);
327 WRITE_INT(4, mf->n_files);
328 for (uint32_t i = 0; i < mf->n_files; i++) {
329 WRITE_STR(mf->files[i]);
332 WRITE_INT(4, mf->n_file_infos);
333 for (uint32_t i = 0; i < mf->n_file_infos; i++) {
334 WRITE_INT(4, mf->file_infos[i].index);
335 WRITE_BYTES(mf->hash_size, mf->file_infos[i].hash);
336 WRITE_INT(4, mf->file_infos[i].size);
337 WRITE_INT(8, mf->file_infos[i].mtime);
338 WRITE_INT(8, mf->file_infos[i].ctime);
341 WRITE_INT(4, mf->n_objects);
342 for (uint32_t i = 0; i < mf->n_objects; i++) {
343 WRITE_INT(4, mf->objects[i].n_file_info_indexes);
344 for (uint32_t j = 0; j < mf->objects[i].n_file_info_indexes; j++) {
345 WRITE_INT(4, mf->objects[i].file_info_indexes[j]);
347 WRITE_BYTES(mf->hash_size, mf->objects[i].hash.hash);
348 WRITE_INT(4, mf->objects[i].hash.size);
354 cc_log("Error writing to manifest file");
359 verify_object(struct conf *conf, struct manifest *mf, struct object *obj,
360 struct hashtable *stated_files, struct hashtable *hashed_files)
362 for (uint32_t i = 0; i < obj->n_file_info_indexes; i++) {
363 struct file_info *fi = &mf->file_infos[obj->file_info_indexes[i]];
364 char *path = mf->files[fi->index];
365 struct file_stats *st = hashtable_search(stated_files, path);
367 struct stat file_stat;
368 if (x_stat(path, &file_stat) != 0) {
371 st = x_malloc(sizeof(*st));
372 st->size = file_stat.st_size;
373 st->mtime = file_stat.st_mtime;
374 st->ctime = file_stat.st_ctime;
375 hashtable_insert(stated_files, x_strdup(path), st);
378 if (conf->sloppiness & SLOPPY_FILE_STAT_MATCHES) {
379 // st->ctime is sometimes 0, so we can't check that both st->ctime and
380 // st->mtime are greater than time_of_compilation. But it's sufficient to
381 // check that either is.
382 if (fi->size == st->size
383 && fi->mtime == st->mtime
384 && fi->ctime == st->ctime
385 && MAX(st->mtime, st->ctime) >= time_of_compilation) {
386 cc_log("size/mtime/ctime hit for %s", path);
389 cc_log("size/mtime/ctime miss for %s", path);
393 struct file_hash *actual = hashtable_search(hashed_files, path);
397 int result = hash_source_code_file(conf, &hash, path);
398 if (result & HASH_SOURCE_CODE_ERROR) {
399 cc_log("Failed hashing %s", path);
402 if (result & HASH_SOURCE_CODE_FOUND_TIME) {
405 actual = x_malloc(sizeof(*actual));
406 hash_result_as_bytes(&hash, actual->hash);
407 actual->size = hash.totalN;
408 hashtable_insert(hashed_files, x_strdup(path), actual);
410 if (memcmp(fi->hash, actual->hash, mf->hash_size) != 0
411 || fi->size != actual->size) {
419 static struct hashtable *
420 create_string_index_map(char **strings, uint32_t len)
422 struct hashtable *h =
423 create_hashtable(1000, hash_from_string, strings_equal);
424 for (uint32_t i = 0; i < len; i++) {
425 uint32_t *index = x_malloc(sizeof(*index));
427 hashtable_insert(h, x_strdup(strings[i]), index);
432 static struct hashtable *
433 create_file_info_index_map(struct file_info *infos, uint32_t len)
435 struct hashtable *h =
436 create_hashtable(1000, hash_from_file_info, file_infos_equal);
437 for (uint32_t i = 0; i < len; i++) {
438 struct file_info *fi = x_malloc(sizeof(*fi));
440 uint32_t *index = x_malloc(sizeof(*index));
442 hashtable_insert(h, fi, index);
448 get_include_file_index(struct manifest *mf, char *path,
449 struct hashtable *mf_files)
451 uint32_t *index = hashtable_search(mf_files, path);
456 uint32_t n = mf->n_files;
457 mf->files = x_realloc(mf->files, (n + 1) * sizeof(*mf->files));
459 mf->files[n] = x_strdup(path);
464 get_file_hash_index(struct manifest *mf,
466 struct file_hash *file_hash,
467 struct hashtable *mf_files,
468 struct hashtable *mf_file_infos)
471 fi.index = get_include_file_index(mf, path, mf_files);
472 memcpy(fi.hash, file_hash->hash, sizeof(fi.hash));
473 fi.size = file_hash->size;
475 // file_stat.st_{m,c}time has a resolution of 1 second, so we can cache the
476 // file's mtime and ctime only if they're at least one second older than
477 // time_of_compilation.
479 // st->ctime may be 0, so we have to check time_of_compilation against
480 // MAX(mtime, ctime).
482 struct stat file_stat;
483 if (stat(path, &file_stat) != -1
484 && time_of_compilation > MAX(file_stat.st_mtime, file_stat.st_ctime)) {
485 fi.mtime = file_stat.st_mtime;
486 fi.ctime = file_stat.st_ctime;
492 uint32_t *fi_index = hashtable_search(mf_file_infos, &fi);
497 uint32_t n = mf->n_file_infos;
498 mf->file_infos = x_realloc(mf->file_infos, (n + 1) * sizeof(*mf->file_infos));
500 mf->file_infos[n] = fi;
505 add_file_info_indexes(uint32_t *indexes, uint32_t size,
506 struct manifest *mf, struct hashtable *included_files)
513 struct hashtable *mf_files =
514 create_string_index_map(mf->files, mf->n_files);
515 // struct file_info --> index
516 struct hashtable *mf_file_infos =
517 create_file_info_index_map(mf->file_infos, mf->n_file_infos);
518 struct hashtable_itr *iter = hashtable_iterator(included_files);
521 char *path = hashtable_iterator_key(iter);
522 struct file_hash *file_hash = hashtable_iterator_value(iter);
523 indexes[i] = get_file_hash_index(mf, path, file_hash, mf_files,
526 } while (hashtable_iterator_advance(iter));
529 hashtable_destroy(mf_file_infos, 1);
530 hashtable_destroy(mf_files, 1);
534 add_object_entry(struct manifest *mf,
535 struct file_hash *object_hash,
536 struct hashtable *included_files)
538 uint32_t n_objs = mf->n_objects;
539 mf->objects = x_realloc(mf->objects, (n_objs + 1) * sizeof(*mf->objects));
541 struct object *obj = &mf->objects[n_objs];
543 uint32_t n_fii = hashtable_count(included_files);
544 obj->n_file_info_indexes = n_fii;
545 obj->file_info_indexes = x_malloc(n_fii * sizeof(*obj->file_info_indexes));
546 add_file_info_indexes(obj->file_info_indexes, n_fii, mf, included_files);
547 memcpy(obj->hash.hash, object_hash->hash, mf->hash_size);
548 obj->hash.size = object_hash->size;
551 // Try to get the object hash from a manifest file. Caller frees. Returns NULL
554 manifest_get(struct conf *conf, const char *manifest_path)
557 struct manifest *mf = NULL;
558 struct hashtable *hashed_files = NULL; // path --> struct file_hash
559 struct hashtable *stated_files = NULL; // path --> struct file_stats
560 struct file_hash *fh = NULL;
562 int fd = open(manifest_path, O_RDONLY | O_BINARY);
565 cc_log("No such manifest file");
568 f = gzdopen(fd, "rb");
571 cc_log("Failed to gzdopen manifest file");
574 mf = read_manifest(f);
576 cc_log("Error reading manifest file");
580 hashed_files = create_hashtable(1000, hash_from_string, strings_equal);
581 stated_files = create_hashtable(1000, hash_from_string, strings_equal);
583 // Check newest object first since it's a bit more likely to match.
584 for (uint32_t i = mf->n_objects; i > 0; i--) {
585 if (verify_object(conf, mf, &mf->objects[i - 1],
586 stated_files, hashed_files)) {
587 fh = x_malloc(sizeof(*fh));
588 *fh = mf->objects[i - 1].hash;
595 hashtable_destroy(hashed_files, 1);
598 hashtable_destroy(stated_files, 1);
609 // Put the object name into a manifest file given a set of included files.
610 // Returns true on success, otherwise false.
612 manifest_put(const char *manifest_path, struct file_hash *object_hash,
613 struct hashtable *included_files)
617 struct manifest *mf = NULL;
618 char *tmp_file = NULL;
620 // We don't bother to acquire a lock when writing the manifest to disk. A
621 // race between two processes will only result in one lost entry, which is
622 // not a big deal, and it's also very unlikely.
624 int fd1 = open(manifest_path, O_RDONLY | O_BINARY);
627 mf = create_empty_manifest();
629 gzFile f1 = gzdopen(fd1, "rb");
631 cc_log("Failed to gzdopen manifest file");
635 mf = read_manifest(f1);
638 cc_log("Failed to read manifest file; deleting it");
639 x_unlink(manifest_path);
640 mf = create_empty_manifest();
644 if (mf->n_objects > MAX_MANIFEST_ENTRIES) {
645 // Normally, there shouldn't be many object entries in the manifest since
646 // new entries are added only if an include file has changed but not the
647 // source file, and you typically change source files more often than
648 // header files. However, it's certainly possible to imagine cases where
649 // the manifest will grow large (for instance, a generated header file that
650 // changes for every build), and this must be taken care of since
651 // processing an ever growing manifest eventually will take too much time.
652 // A good way of solving this would be to maintain the object entries in
653 // LRU order and discarding the old ones. An easy way is to throw away all
654 // entries when there are too many. Let's do that for now.
655 cc_log("More than %u entries in manifest file; discarding",
656 MAX_MANIFEST_ENTRIES);
658 mf = create_empty_manifest();
659 } else if (mf->n_file_infos > MAX_MANIFEST_FILE_INFO_ENTRIES) {
660 // Rarely, file_info entries can grow large in pathological cases where
661 // many included files change, but the main file does not. This also puts
662 // an upper bound on the number of file_info entries.
663 cc_log("More than %u file_info entries in manifest file; discarding",
664 MAX_MANIFEST_FILE_INFO_ENTRIES);
666 mf = create_empty_manifest();
669 tmp_file = format("%s.tmp", manifest_path);
670 int fd2 = create_tmp_fd(&tmp_file);
671 f2 = gzdopen(fd2, "wb");
673 cc_log("Failed to gzdopen %s", tmp_file);
677 add_object_entry(mf, object_hash, included_files);
678 if (write_manifest(f2, mf)) {
681 if (x_rename(tmp_file, manifest_path) == 0) {
684 cc_log("Failed to rename %s to %s", tmp_file, manifest_path);
688 cc_log("Failed to write manifest file");
706 manifest_dump(const char *manifest_path, FILE *stream)
708 struct manifest *mf = NULL;
712 int fd = open(manifest_path, O_RDONLY | O_BINARY);
714 fprintf(stderr, "No such manifest file: %s\n", manifest_path);
717 f = gzdopen(fd, "rb");
719 fprintf(stderr, "Failed to dzopen manifest file\n");
723 mf = read_manifest(f);
725 fprintf(stderr, "Error reading manifest file\n");
729 fprintf(stream, "Magic: %c%c%c%c\n",
730 (MAGIC >> 24) & 0xFF,
731 (MAGIC >> 16) & 0xFF,
734 fprintf(stream, "Version: %u\n", mf->version);
735 fprintf(stream, "Hash size: %u\n", (unsigned)mf->hash_size);
736 fprintf(stream, "Reserved field: %u\n", (unsigned)mf->reserved);
737 fprintf(stream, "File paths (%u):\n", (unsigned)mf->n_files);
738 for (unsigned i = 0; i < mf->n_files; ++i) {
739 fprintf(stream, " %u: %s\n", i, mf->files[i]);
741 fprintf(stream, "File infos (%u):\n", (unsigned)mf->n_file_infos);
742 for (unsigned i = 0; i < mf->n_file_infos; ++i) {
744 fprintf(stream, " %u:\n", i);
745 fprintf(stream, " Path index: %u\n", mf->file_infos[i].index);
746 hash = format_hash_as_string(mf->file_infos[i].hash, -1);
747 fprintf(stream, " Hash: %s\n", hash);
749 fprintf(stream, " Size: %u\n", mf->file_infos[i].size);
750 fprintf(stream, " Mtime: %lld\n", (long long)mf->file_infos[i].mtime);
751 fprintf(stream, " Ctime: %lld\n", (long long)mf->file_infos[i].ctime);
753 fprintf(stream, "Results (%u):\n", (unsigned)mf->n_objects);
754 for (unsigned i = 0; i < mf->n_objects; ++i) {
756 fprintf(stream, " %u:\n", i);
757 fprintf(stream, " File info indexes:");
758 for (unsigned j = 0; j < mf->objects[i].n_file_info_indexes; ++j) {
759 fprintf(stream, " %u", mf->objects[i].file_info_indexes[j]);
761 fprintf(stream, "\n");
762 hash = format_hash_as_string(mf->objects[i].hash.hash, -1);
763 fprintf(stream, " Hash: %s\n", hash);
765 fprintf(stream, " Size: %u\n", (unsigned)mf->objects[i].hash.size);