1 // Copyright (C) 2009-2016 Joel Rosdahl
3 // This program is free software; you can redistribute it and/or modify it
4 // under the terms of the GNU General Public License as published by the Free
5 // Software Foundation; either version 3 of the License, or (at your option)
8 // This program is distributed in the hope that it will be useful, but WITHOUT
9 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 // You should have received a copy of the GNU General Public License along with
14 // this program; if not, write to the Free Software Foundation, Inc., 51
15 // Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 #include "hashtable_itr.h"
21 #include "murmurhashneutral2.h"
25 // Sketchy specification of the manifest disk format:
27 // <magic> magic number (4 bytes)
28 // <version> file format version (1 byte unsigned int)
29 // <hash_size> size of the hash fields (in bytes) (1 byte unsigned int)
30 // <reserved> reserved for future use (2 bytes)
31 // ----------------------------------------------------------------------------
32 // <n> number of include file paths (4 bytes unsigned int)
33 // <path_0> path to include file (NUL-terminated string,
34 // ... at most 1024 bytes)
36 // ----------------------------------------------------------------------------
37 // <n> number of include file hash entries (4 bytes unsigned int)
38 // <index[0]> index of include file path (4 bytes unsigned int)
39 // <hash[0]> hash of include file (<hash_size> bytes)
40 // <size[0]> size of include file (4 bytes unsigned int)
41 // <mtime[0]> mtime of include file (8 bytes signed int)
42 // <ctime[0]> ctime of include file (8 bytes signed int)
49 // ----------------------------------------------------------------------------
50 // <n> number of object name entries (4 bytes unsigned int)
51 // <m[0]> number of include file hash indexes (4 bytes unsigned int)
52 // <index[0][0]> include file hash index (4 bytes unsigned int)
55 // <hash[0]> hash part of object name (<hash_size> bytes)
56 // <size[0]> size part of object name (4 bytes unsigned int)
58 // <m[n-1]> number of include file hash indexes
59 // <index[n-1][0]> include file hash index
61 // <index[n-1][m[n-1]]>
65 static const uint32_t MAGIC = 0x63436d46U;
66 static const uint32_t MAX_MANIFEST_ENTRIES = 100;
67 static const uint32_t MAX_MANIFEST_FILE_INFO_ENTRIES = 10000;
69 #define ccache_static_assert(e) \
70 do { enum { ccache_static_assert__ = 1/(e) }; } while (false)
75 // Hash of referenced file.
77 // Size of referenced file.
79 // mtime of referenced file.
81 // ctime of referenced file.
86 // Number of entries in file_info_indexes.
87 uint32_t n_file_info_indexes;
88 // Indexes to file_infos.
89 uint32_t *file_info_indexes;
90 // Hash of the object itself.
91 struct file_hash hash;
95 // Version of decoded file.
98 // Reserved for future use.
101 // Size of hash fields (in bytes).
104 // Referenced include files.
108 // Information about referenced include files.
109 uint32_t n_file_infos;
110 struct file_info *file_infos;
112 // Object names plus references to include file hashes.
114 struct object *objects;
124 hash_from_file_info(void *key)
126 ccache_static_assert(sizeof(struct file_info) == 40); // No padding.
127 return murmurhashneutral2(key, sizeof(struct file_info), 0);
131 file_infos_equal(void *key1, void *key2)
133 struct file_info *fi1 = (struct file_info *)key1;
134 struct file_info *fi2 = (struct file_info *)key2;
135 return fi1->index == fi2->index
136 && memcmp(fi1->hash, fi2->hash, 16) == 0
137 && fi1->size == fi2->size
138 && fi1->mtime == fi2->mtime
139 && fi1->ctime == fi2->ctime;
143 free_manifest(struct manifest *mf)
145 for (uint32_t i = 0; i < mf->n_files; i++) {
149 free(mf->file_infos);
150 for (uint32_t i = 0; i < mf->n_objects; i++) {
151 free(mf->objects[i].file_info_indexes);
157 #define READ_BYTE(var) \
159 int ch_ = gzgetc(f); \
163 (var) = ch_ & 0xFF; \
166 #define READ_INT(size, var) \
169 for (size_t i_ = 0; i_ < (size); i_++) { \
170 int ch_ = gzgetc(f); \
180 #define READ_STR(var) \
184 for (i_ = 0; i_ < sizeof(buf_); i_++) { \
185 int ch_ = gzgetc(f); \
194 if (i_ == sizeof(buf_)) { \
197 (var) = x_strdup(buf_); \
200 #define READ_BYTES(n, var) \
202 for (size_t i_ = 0; i_ < (n); i_++) { \
203 int ch_ = gzgetc(f); \
211 static struct manifest *
212 create_empty_manifest(void)
214 struct manifest *mf = x_malloc(sizeof(*mf));
218 mf->n_file_infos = 0;
219 mf->file_infos = NULL;
226 static struct manifest *
227 read_manifest(gzFile f)
229 struct manifest *mf = create_empty_manifest();
233 if (magic != MAGIC) {
234 cc_log("Manifest file has bad magic number %u", magic);
238 READ_BYTE(mf->version);
239 if (mf->version != MANIFEST_VERSION) {
240 cc_log("Manifest file has unknown version %u", mf->version);
244 READ_BYTE(mf->hash_size);
245 if (mf->hash_size != 16) {
246 // Temporary measure until we support different hash algorithms.
247 cc_log("Manifest file has unsupported hash size %u", mf->hash_size);
251 READ_INT(2, mf->reserved);
253 READ_INT(4, mf->n_files);
254 mf->files = x_calloc(mf->n_files, sizeof(*mf->files));
255 for (uint32_t i = 0; i < mf->n_files; i++) {
256 READ_STR(mf->files[i]);
259 READ_INT(4, mf->n_file_infos);
260 mf->file_infos = x_calloc(mf->n_file_infos, sizeof(*mf->file_infos));
261 for (uint32_t i = 0; i < mf->n_file_infos; i++) {
262 READ_INT(4, mf->file_infos[i].index);
263 READ_BYTES(mf->hash_size, mf->file_infos[i].hash);
264 READ_INT(4, mf->file_infos[i].size);
265 READ_INT(8, mf->file_infos[i].mtime);
266 READ_INT(8, mf->file_infos[i].ctime);
269 READ_INT(4, mf->n_objects);
270 mf->objects = x_calloc(mf->n_objects, sizeof(*mf->objects));
271 for (uint32_t i = 0; i < mf->n_objects; i++) {
272 READ_INT(4, mf->objects[i].n_file_info_indexes);
273 mf->objects[i].file_info_indexes =
274 x_calloc(mf->objects[i].n_file_info_indexes,
275 sizeof(*mf->objects[i].file_info_indexes));
276 for (uint32_t j = 0; j < mf->objects[i].n_file_info_indexes; j++) {
277 READ_INT(4, mf->objects[i].file_info_indexes[j]);
279 READ_BYTES(mf->hash_size, mf->objects[i].hash.hash);
280 READ_INT(4, mf->objects[i].hash.size);
286 cc_log("Corrupt manifest file");
291 #define WRITE_INT(size, var) \
293 uint64_t u_ = (var); \
296 for (i_ = 0; i_ < (size); i_++) { \
297 ch_ = (u_ >> (8 * ((size) - i_ - 1))); \
298 if (gzputc(f, ch_) == EOF) { \
304 #define WRITE_STR(var) \
306 if (gzputs(f, var) == EOF || gzputc(f, '\0') == EOF) { \
311 #define WRITE_BYTES(n, var) \
314 for (i_ = 0; i_ < (n); i_++) { \
315 if (gzputc(f, (var)[i_]) == EOF) { \
322 write_manifest(gzFile f, const struct manifest *mf)
325 WRITE_INT(1, MANIFEST_VERSION);
329 WRITE_INT(4, mf->n_files);
330 for (uint32_t i = 0; i < mf->n_files; i++) {
331 WRITE_STR(mf->files[i]);
334 WRITE_INT(4, mf->n_file_infos);
335 for (uint32_t i = 0; i < mf->n_file_infos; i++) {
336 WRITE_INT(4, mf->file_infos[i].index);
337 WRITE_BYTES(mf->hash_size, mf->file_infos[i].hash);
338 WRITE_INT(4, mf->file_infos[i].size);
339 WRITE_INT(8, mf->file_infos[i].mtime);
340 WRITE_INT(8, mf->file_infos[i].ctime);
343 WRITE_INT(4, mf->n_objects);
344 for (uint32_t i = 0; i < mf->n_objects; i++) {
345 WRITE_INT(4, mf->objects[i].n_file_info_indexes);
346 for (uint32_t j = 0; j < mf->objects[i].n_file_info_indexes; j++) {
347 WRITE_INT(4, mf->objects[i].file_info_indexes[j]);
349 WRITE_BYTES(mf->hash_size, mf->objects[i].hash.hash);
350 WRITE_INT(4, mf->objects[i].hash.size);
356 cc_log("Error writing to manifest file");
361 verify_object(struct conf *conf, struct manifest *mf, struct object *obj,
362 struct hashtable *stated_files, struct hashtable *hashed_files)
364 for (uint32_t i = 0; i < obj->n_file_info_indexes; i++) {
365 struct file_info *fi = &mf->file_infos[obj->file_info_indexes[i]];
366 char *path = mf->files[fi->index];
367 struct file_stats *st = hashtable_search(stated_files, path);
369 struct stat file_stat;
370 if (x_stat(path, &file_stat) != 0) {
373 st = x_malloc(sizeof(*st));
374 st->size = file_stat.st_size;
375 st->mtime = file_stat.st_mtime;
376 st->ctime = file_stat.st_ctime;
377 hashtable_insert(stated_files, x_strdup(path), st);
380 if (fi->size != st->size) {
384 // Clang stores the mtime of the included files in the precompiled header,
385 // and will error out if that header is later used without rebuilding.
386 if (output_is_precompiled_header && fi->mtime != st->mtime) {
387 cc_log("Precompiled header includes %s, which has a new mtime", path);
391 if (conf->sloppiness & SLOPPY_FILE_STAT_MATCHES) {
392 if (fi->mtime == st->mtime && fi->ctime == st->ctime) {
393 cc_log("mtime/ctime hit for %s", path);
396 cc_log("mtime/ctime miss for %s", path);
400 struct file_hash *actual = hashtable_search(hashed_files, path);
404 int result = hash_source_code_file(conf, &hash, path);
405 if (result & HASH_SOURCE_CODE_ERROR) {
406 cc_log("Failed hashing %s", path);
409 if (result & HASH_SOURCE_CODE_FOUND_TIME) {
412 actual = x_malloc(sizeof(*actual));
413 hash_result_as_bytes(&hash, actual->hash);
414 actual->size = hash.totalN;
415 hashtable_insert(hashed_files, x_strdup(path), actual);
417 if (memcmp(fi->hash, actual->hash, mf->hash_size) != 0
418 || fi->size != actual->size) {
426 static struct hashtable *
427 create_string_index_map(char **strings, uint32_t len)
429 struct hashtable *h =
430 create_hashtable(1000, hash_from_string, strings_equal);
431 for (uint32_t i = 0; i < len; i++) {
432 uint32_t *index = x_malloc(sizeof(*index));
434 hashtable_insert(h, x_strdup(strings[i]), index);
439 static struct hashtable *
440 create_file_info_index_map(struct file_info *infos, uint32_t len)
442 struct hashtable *h =
443 create_hashtable(1000, hash_from_file_info, file_infos_equal);
444 for (uint32_t i = 0; i < len; i++) {
445 struct file_info *fi = x_malloc(sizeof(*fi));
447 uint32_t *index = x_malloc(sizeof(*index));
449 hashtable_insert(h, fi, index);
455 get_include_file_index(struct manifest *mf, char *path,
456 struct hashtable *mf_files)
458 uint32_t *index = hashtable_search(mf_files, path);
463 uint32_t n = mf->n_files;
464 mf->files = x_realloc(mf->files, (n + 1) * sizeof(*mf->files));
466 mf->files[n] = x_strdup(path);
471 get_file_hash_index(struct manifest *mf,
473 struct file_hash *file_hash,
474 struct hashtable *mf_files,
475 struct hashtable *mf_file_infos)
478 fi.index = get_include_file_index(mf, path, mf_files);
479 memcpy(fi.hash, file_hash->hash, sizeof(fi.hash));
480 fi.size = file_hash->size;
482 // file_stat.st_{m,c}time has a resolution of 1 second, so we can cache the
483 // file's mtime and ctime only if they're at least one second older than
484 // time_of_compilation.
486 // st->ctime may be 0, so we have to check time_of_compilation against
487 // MAX(mtime, ctime).
489 struct stat file_stat;
490 if (stat(path, &file_stat) != -1
491 && time_of_compilation > MAX(file_stat.st_mtime, file_stat.st_ctime)) {
492 fi.mtime = file_stat.st_mtime;
493 fi.ctime = file_stat.st_ctime;
499 uint32_t *fi_index = hashtable_search(mf_file_infos, &fi);
504 uint32_t n = mf->n_file_infos;
505 mf->file_infos = x_realloc(mf->file_infos, (n + 1) * sizeof(*mf->file_infos));
507 mf->file_infos[n] = fi;
512 add_file_info_indexes(uint32_t *indexes, uint32_t size,
513 struct manifest *mf, struct hashtable *included_files)
520 struct hashtable *mf_files =
521 create_string_index_map(mf->files, mf->n_files);
522 // struct file_info --> index
523 struct hashtable *mf_file_infos =
524 create_file_info_index_map(mf->file_infos, mf->n_file_infos);
525 struct hashtable_itr *iter = hashtable_iterator(included_files);
528 char *path = hashtable_iterator_key(iter);
529 struct file_hash *file_hash = hashtable_iterator_value(iter);
530 indexes[i] = get_file_hash_index(mf, path, file_hash, mf_files,
533 } while (hashtable_iterator_advance(iter));
536 hashtable_destroy(mf_file_infos, 1);
537 hashtable_destroy(mf_files, 1);
541 add_object_entry(struct manifest *mf,
542 struct file_hash *object_hash,
543 struct hashtable *included_files)
545 uint32_t n_objs = mf->n_objects;
546 mf->objects = x_realloc(mf->objects, (n_objs + 1) * sizeof(*mf->objects));
548 struct object *obj = &mf->objects[n_objs];
550 uint32_t n_fii = hashtable_count(included_files);
551 obj->n_file_info_indexes = n_fii;
552 obj->file_info_indexes = x_malloc(n_fii * sizeof(*obj->file_info_indexes));
553 add_file_info_indexes(obj->file_info_indexes, n_fii, mf, included_files);
554 memcpy(obj->hash.hash, object_hash->hash, mf->hash_size);
555 obj->hash.size = object_hash->size;
558 // Try to get the object hash from a manifest file. Caller frees. Returns NULL
561 manifest_get(struct conf *conf, const char *manifest_path)
564 struct manifest *mf = NULL;
565 struct hashtable *hashed_files = NULL; // path --> struct file_hash
566 struct hashtable *stated_files = NULL; // path --> struct file_stats
567 struct file_hash *fh = NULL;
569 int fd = open(manifest_path, O_RDONLY | O_BINARY);
572 cc_log("No such manifest file");
575 f = gzdopen(fd, "rb");
578 cc_log("Failed to gzdopen manifest file");
581 mf = read_manifest(f);
583 cc_log("Error reading manifest file");
587 hashed_files = create_hashtable(1000, hash_from_string, strings_equal);
588 stated_files = create_hashtable(1000, hash_from_string, strings_equal);
590 // Check newest object first since it's a bit more likely to match.
591 for (uint32_t i = mf->n_objects; i > 0; i--) {
592 if (verify_object(conf, mf, &mf->objects[i - 1],
593 stated_files, hashed_files)) {
594 fh = x_malloc(sizeof(*fh));
595 *fh = mf->objects[i - 1].hash;
602 hashtable_destroy(hashed_files, 1);
605 hashtable_destroy(stated_files, 1);
616 // Put the object name into a manifest file given a set of included files.
617 // Returns true on success, otherwise false.
619 manifest_put(const char *manifest_path, struct file_hash *object_hash,
620 struct hashtable *included_files)
624 struct manifest *mf = NULL;
625 char *tmp_file = NULL;
627 // We don't bother to acquire a lock when writing the manifest to disk. A
628 // race between two processes will only result in one lost entry, which is
629 // not a big deal, and it's also very unlikely.
631 int fd1 = open(manifest_path, O_RDONLY | O_BINARY);
634 mf = create_empty_manifest();
636 gzFile f1 = gzdopen(fd1, "rb");
638 cc_log("Failed to gzdopen manifest file");
642 mf = read_manifest(f1);
645 cc_log("Failed to read manifest file; deleting it");
646 x_unlink(manifest_path);
647 mf = create_empty_manifest();
651 if (mf->n_objects > MAX_MANIFEST_ENTRIES) {
652 // Normally, there shouldn't be many object entries in the manifest since
653 // new entries are added only if an include file has changed but not the
654 // source file, and you typically change source files more often than
655 // header files. However, it's certainly possible to imagine cases where
656 // the manifest will grow large (for instance, a generated header file that
657 // changes for every build), and this must be taken care of since
658 // processing an ever growing manifest eventually will take too much time.
659 // A good way of solving this would be to maintain the object entries in
660 // LRU order and discarding the old ones. An easy way is to throw away all
661 // entries when there are too many. Let's do that for now.
662 cc_log("More than %u entries in manifest file; discarding",
663 MAX_MANIFEST_ENTRIES);
665 mf = create_empty_manifest();
666 } else if (mf->n_file_infos > MAX_MANIFEST_FILE_INFO_ENTRIES) {
667 // Rarely, file_info entries can grow large in pathological cases where
668 // many included files change, but the main file does not. This also puts
669 // an upper bound on the number of file_info entries.
670 cc_log("More than %u file_info entries in manifest file; discarding",
671 MAX_MANIFEST_FILE_INFO_ENTRIES);
673 mf = create_empty_manifest();
676 tmp_file = format("%s.tmp", manifest_path);
677 int fd2 = create_tmp_fd(&tmp_file);
678 f2 = gzdopen(fd2, "wb");
680 cc_log("Failed to gzdopen %s", tmp_file);
684 add_object_entry(mf, object_hash, included_files);
685 if (write_manifest(f2, mf)) {
688 if (x_rename(tmp_file, manifest_path) == 0) {
691 cc_log("Failed to rename %s to %s", tmp_file, manifest_path);
695 cc_log("Failed to write manifest file");
713 manifest_dump(const char *manifest_path, FILE *stream)
715 struct manifest *mf = NULL;
719 int fd = open(manifest_path, O_RDONLY | O_BINARY);
721 fprintf(stderr, "No such manifest file: %s\n", manifest_path);
724 f = gzdopen(fd, "rb");
726 fprintf(stderr, "Failed to dzopen manifest file\n");
730 mf = read_manifest(f);
732 fprintf(stderr, "Error reading manifest file\n");
736 fprintf(stream, "Magic: %c%c%c%c\n",
737 (MAGIC >> 24) & 0xFF,
738 (MAGIC >> 16) & 0xFF,
741 fprintf(stream, "Version: %u\n", mf->version);
742 fprintf(stream, "Hash size: %u\n", (unsigned)mf->hash_size);
743 fprintf(stream, "Reserved field: %u\n", (unsigned)mf->reserved);
744 fprintf(stream, "File paths (%u):\n", (unsigned)mf->n_files);
745 for (unsigned i = 0; i < mf->n_files; ++i) {
746 fprintf(stream, " %u: %s\n", i, mf->files[i]);
748 fprintf(stream, "File infos (%u):\n", (unsigned)mf->n_file_infos);
749 for (unsigned i = 0; i < mf->n_file_infos; ++i) {
751 fprintf(stream, " %u:\n", i);
752 fprintf(stream, " Path index: %u\n", mf->file_infos[i].index);
753 hash = format_hash_as_string(mf->file_infos[i].hash, -1);
754 fprintf(stream, " Hash: %s\n", hash);
756 fprintf(stream, " Size: %u\n", mf->file_infos[i].size);
757 fprintf(stream, " Mtime: %lld\n", (long long)mf->file_infos[i].mtime);
758 fprintf(stream, " Ctime: %lld\n", (long long)mf->file_infos[i].ctime);
760 fprintf(stream, "Results (%u):\n", (unsigned)mf->n_objects);
761 for (unsigned i = 0; i < mf->n_objects; ++i) {
763 fprintf(stream, " %u:\n", i);
764 fprintf(stream, " File info indexes:");
765 for (unsigned j = 0; j < mf->objects[i].n_file_info_indexes; ++j) {
766 fprintf(stream, " %u", mf->objects[i].file_info_indexes[j]);
768 fprintf(stream, "\n");
769 hash = format_hash_as_string(mf->objects[i].hash.hash, -1);
770 fprintf(stream, " Hash: %s\n", hash);
772 fprintf(stream, " Size: %u\n", (unsigned)mf->objects[i].hash.size);