2 * Copyright (C) 2009-2016 Joel Rosdahl
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 3 of the License, or (at your option)
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 51
16 * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 #include "hashtable_itr.h"
23 #include "murmurhashneutral2.h"
28 * Sketchy specification of the manifest disk format:
30 * <magic> magic number (4 bytes)
31 * <version> file format version (1 byte unsigned int)
32 * <hash_size> size of the hash fields (in bytes) (1 byte unsigned int)
33 * <reserved> reserved for future use (2 bytes)
34 * ----------------------------------------------------------------------------
35 * <n> number of include file paths (4 bytes unsigned int)
36 * <path_0> path to include file (NUL-terminated string,
37 * ... at most 1024 bytes)
39 * ----------------------------------------------------------------------------
40 * <n> number of include file hash entries (4 bytes unsigned int)
41 * <index[0]> index of include file path (4 bytes unsigned int)
42 * <hash[0]> hash of include file (<hash_size> bytes)
43 * <size[0]> size of include file (4 bytes unsigned int)
44 * <mtime[0]> mtime of include file (8 bytes signed int)
45 * <ctime[0]> ctime of include file (8 bytes signed int)
52 * ----------------------------------------------------------------------------
53 * <n> number of object name entries (4 bytes unsigned int)
54 * <m[0]> number of include file hash indexes (4 bytes unsigned int)
55 * <index[0][0]> include file hash index (4 bytes unsigned int)
58 * <hash[0]> hash part of object name (<hash_size> bytes)
59 * <size[0]> size part of object name (4 bytes unsigned int)
61 * <m[n-1]> number of include file hash indexes
62 * <index[n-1][0]> include file hash index
64 * <index[n-1][m[n-1]]>
69 static const uint32_t MAGIC = 0x63436d46U;
70 static const uint32_t MAX_MANIFEST_ENTRIES = 100;
71 static const uint32_t MAX_MANIFEST_FILE_INFO_ENTRIES = 10000;
73 #define ccache_static_assert(e) \
74 do { enum { ccache_static_assert__ = 1/(e) }; } while (false)
77 /* Index to n_files. */
79 /* Hash of referenced file. */
81 /* Size of referenced file. */
83 /* mtime of referenced file. */
85 /* ctime of referenced file. */
90 /* Number of entries in file_info_indexes. */
91 uint32_t n_file_info_indexes;
92 /* Indexes to file_infos. */
93 uint32_t *file_info_indexes;
94 /* Hash of the object itself. */
95 struct file_hash hash;
99 /* Version of decoded file. */
102 /* Reserved for future use. */
105 /* Size of hash fields (in bytes). */
108 /* Referenced include files. */
112 /* Information about referenced include files. */
113 uint32_t n_file_infos;
114 struct file_info *file_infos;
116 /* Object names plus references to include file hashes. */
118 struct object *objects;
128 hash_from_file_info(void *key)
130 ccache_static_assert(sizeof(struct file_info) == 40); /* No padding. */
131 return murmurhashneutral2(key, sizeof(struct file_info), 0);
135 file_infos_equal(void *key1, void *key2)
137 struct file_info *fi1 = (struct file_info *)key1;
138 struct file_info *fi2 = (struct file_info *)key2;
139 return fi1->index == fi2->index
140 && memcmp(fi1->hash, fi2->hash, 16) == 0
141 && fi1->size == fi2->size
142 && fi1->mtime == fi2->mtime
143 && fi1->ctime == fi2->ctime;
147 free_manifest(struct manifest *mf)
150 for (i = 0; i < mf->n_files; i++) {
154 free(mf->file_infos);
155 for (i = 0; i < mf->n_objects; i++) {
156 free(mf->objects[i].file_info_indexes);
162 #define READ_BYTE(var) \
169 (var) = ch_ & 0xFF; \
172 #define READ_INT(size, var) \
177 for (i_ = 0; i_ < (size); i_++) { \
183 (var) |= ch_ & 0xFF; \
187 #define READ_STR(var) \
192 for (i_ = 0; i_ < sizeof(buf_); i_++) { \
202 if (i_ == sizeof(buf_)) { \
205 (var) = x_strdup(buf_); \
208 #define READ_BYTES(n, var) \
212 for (i_ = 0; i_ < (n); i_++) { \
221 static struct manifest *
222 create_empty_manifest(void)
226 mf = x_malloc(sizeof(*mf));
230 mf->n_file_infos = 0;
231 mf->file_infos = NULL;
238 static struct manifest *
239 read_manifest(gzFile f)
245 mf = create_empty_manifest();
248 if (magic != MAGIC) {
249 cc_log("Manifest file has bad magic number %u", magic);
253 READ_BYTE(mf->version);
254 if (mf->version != MANIFEST_VERSION) {
255 cc_log("Manifest file has unknown version %u", mf->version);
260 READ_BYTE(mf->hash_size);
261 if (mf->hash_size != 16) {
262 /* Temporary measure until we support different hash algorithms. */
263 cc_log("Manifest file has unsupported hash size %u", mf->hash_size);
268 READ_INT(2, mf->reserved);
270 READ_INT(4, mf->n_files);
271 mf->files = x_calloc(mf->n_files, sizeof(*mf->files));
272 for (i = 0; i < mf->n_files; i++) {
273 READ_STR(mf->files[i]);
276 READ_INT(4, mf->n_file_infos);
277 mf->file_infos = x_calloc(mf->n_file_infos, sizeof(*mf->file_infos));
278 for (i = 0; i < mf->n_file_infos; i++) {
279 READ_INT(4, mf->file_infos[i].index);
280 READ_BYTES(mf->hash_size, mf->file_infos[i].hash);
281 READ_INT(4, mf->file_infos[i].size);
282 READ_INT(8, mf->file_infos[i].mtime);
283 READ_INT(8, mf->file_infos[i].ctime);
286 READ_INT(4, mf->n_objects);
287 mf->objects = x_calloc(mf->n_objects, sizeof(*mf->objects));
288 for (i = 0; i < mf->n_objects; i++) {
289 READ_INT(4, mf->objects[i].n_file_info_indexes);
290 mf->objects[i].file_info_indexes =
291 x_calloc(mf->objects[i].n_file_info_indexes,
292 sizeof(*mf->objects[i].file_info_indexes));
293 for (j = 0; j < mf->objects[i].n_file_info_indexes; j++) {
294 READ_INT(4, mf->objects[i].file_info_indexes[j]);
296 READ_BYTES(mf->hash_size, mf->objects[i].hash.hash);
297 READ_INT(4, mf->objects[i].hash.size);
303 cc_log("Corrupt manifest file");
308 #define WRITE_INT(size, var) \
312 for (i_ = 0; i_ < (size); i_++) { \
313 ch_ = ((var) >> (8 * ((size) - i_ - 1))); \
314 if (gzputc(f, ch_) == EOF) { \
320 #define WRITE_STR(var) \
322 if (gzputs(f, var) == EOF || gzputc(f, '\0') == EOF) { \
327 #define WRITE_BYTES(n, var) \
330 for (i_ = 0; i_ < (n); i_++) { \
331 if (gzputc(f, (var)[i_]) == EOF) { \
338 write_manifest(gzFile f, const struct manifest *mf)
343 WRITE_INT(1, MANIFEST_VERSION);
347 WRITE_INT(4, mf->n_files);
348 for (i = 0; i < mf->n_files; i++) {
349 WRITE_STR(mf->files[i]);
352 WRITE_INT(4, mf->n_file_infos);
353 for (i = 0; i < mf->n_file_infos; i++) {
354 WRITE_INT(4, mf->file_infos[i].index);
355 WRITE_BYTES(mf->hash_size, mf->file_infos[i].hash);
356 WRITE_INT(4, mf->file_infos[i].size);
357 WRITE_INT(8, mf->file_infos[i].mtime);
358 WRITE_INT(8, mf->file_infos[i].ctime);
361 WRITE_INT(4, mf->n_objects);
362 for (i = 0; i < mf->n_objects; i++) {
363 WRITE_INT(4, mf->objects[i].n_file_info_indexes);
364 for (j = 0; j < mf->objects[i].n_file_info_indexes; j++) {
365 WRITE_INT(4, mf->objects[i].file_info_indexes[j]);
367 WRITE_BYTES(mf->hash_size, mf->objects[i].hash.hash);
368 WRITE_INT(4, mf->objects[i].hash.size);
374 cc_log("Error writing to manifest file");
379 verify_object(struct conf *conf, struct manifest *mf, struct object *obj,
380 struct hashtable *stated_files, struct hashtable *hashed_files)
383 struct file_hash *actual;
387 for (i = 0; i < obj->n_file_info_indexes; i++) {
388 struct file_info *fi = &mf->file_infos[obj->file_info_indexes[i]];
389 char *path = mf->files[fi->index];
390 struct file_stats *st = hashtable_search(stated_files, path);
392 struct stat file_stat;
393 if (x_stat(path, &file_stat) != 0) {
396 st = x_malloc(sizeof(*st));
397 st->size = file_stat.st_size;
398 st->mtime = file_stat.st_mtime;
399 st->ctime = file_stat.st_ctime;
400 hashtable_insert(stated_files, x_strdup(path), st);
403 if (conf->sloppiness & SLOPPY_FILE_STAT_MATCHES) {
405 * st->ctime is sometimes 0, so we can't check that both st->ctime and
406 * st->mtime are greater than time_of_compilation. But it's sufficient to
407 * check that either is.
409 if (fi->size == st->size
410 && fi->mtime == st->mtime
411 && fi->ctime == st->ctime
412 && MAX(st->mtime, st->ctime) >= time_of_compilation) {
413 cc_log("size/mtime/ctime hit for %s", path);
416 cc_log("size/mtime/ctime miss for %s", path);
420 actual = hashtable_search(hashed_files, path);
422 actual = x_malloc(sizeof(*actual));
424 result = hash_source_code_file(conf, &hash, path);
425 if (result & HASH_SOURCE_CODE_ERROR) {
426 cc_log("Failed hashing %s", path);
430 if (result & HASH_SOURCE_CODE_FOUND_TIME) {
434 hash_result_as_bytes(&hash, actual->hash);
435 actual->size = hash.totalN;
436 hashtable_insert(hashed_files, x_strdup(path), actual);
438 if (memcmp(fi->hash, actual->hash, mf->hash_size) != 0
439 || fi->size != actual->size) {
447 static struct hashtable *
448 create_string_index_map(char **strings, uint32_t len)
454 h = create_hashtable(1000, hash_from_string, strings_equal);
455 for (i = 0; i < len; i++) {
456 index = x_malloc(sizeof(*index));
458 hashtable_insert(h, x_strdup(strings[i]), index);
463 static struct hashtable *
464 create_file_info_index_map(struct file_info *infos, uint32_t len)
468 struct file_info *fi;
471 h = create_hashtable(1000, hash_from_file_info, file_infos_equal);
472 for (i = 0; i < len; i++) {
473 fi = x_malloc(sizeof(*fi));
475 index = x_malloc(sizeof(*index));
477 hashtable_insert(h, fi, index);
483 get_include_file_index(struct manifest *mf, char *path,
484 struct hashtable *mf_files)
489 index = hashtable_search(mf_files, path);
495 mf->files = x_realloc(mf->files, (n + 1) * sizeof(*mf->files));
497 mf->files[n] = x_strdup(path);
503 get_file_hash_index(struct manifest *mf,
505 struct file_hash *file_hash,
506 struct hashtable *mf_files,
507 struct hashtable *mf_file_infos)
512 struct stat file_stat;
514 fi.index = get_include_file_index(mf, path, mf_files);
515 memcpy(fi.hash, file_hash->hash, sizeof(fi.hash));
516 fi.size = file_hash->size;
519 * file_stat.st_{m,c}time has a resolution of 1 second, so we can cache the
520 * file's mtime and ctime only if they're at least one second older than
521 * time_of_compilation.
523 * st->ctime may be 0, so we have to check time_of_compilation against
527 if (stat(path, &file_stat) != -1
528 && time_of_compilation > MAX(file_stat.st_mtime, file_stat.st_ctime)) {
529 fi.mtime = file_stat.st_mtime;
530 fi.ctime = file_stat.st_ctime;
536 fi_index = hashtable_search(mf_file_infos, &fi);
541 n = mf->n_file_infos;
542 mf->file_infos = x_realloc(mf->file_infos, (n + 1) * sizeof(*mf->file_infos));
544 mf->file_infos[n] = fi;
550 add_file_info_indexes(uint32_t *indexes, uint32_t size,
551 struct manifest *mf, struct hashtable *included_files)
553 struct hashtable_itr *iter;
555 struct hashtable *mf_files; /* path --> index */
556 struct hashtable *mf_file_infos; /* struct file_info --> index */
562 mf_files = create_string_index_map(mf->files, mf->n_files);
563 mf_file_infos = create_file_info_index_map(mf->file_infos, mf->n_file_infos);
564 iter = hashtable_iterator(included_files);
567 char *path = hashtable_iterator_key(iter);
568 struct file_hash *file_hash = hashtable_iterator_value(iter);
569 indexes[i] = get_file_hash_index(mf, path, file_hash, mf_files,
572 } while (hashtable_iterator_advance(iter));
575 hashtable_destroy(mf_file_infos, 1);
576 hashtable_destroy(mf_files, 1);
580 add_object_entry(struct manifest *mf,
581 struct file_hash *object_hash,
582 struct hashtable *included_files)
588 mf->objects = x_realloc(mf->objects, (n + 1) * sizeof(*mf->objects));
590 obj = &mf->objects[n];
592 n = hashtable_count(included_files);
593 obj->n_file_info_indexes = n;
594 obj->file_info_indexes = x_malloc(n * sizeof(*obj->file_info_indexes));
595 add_file_info_indexes(obj->file_info_indexes, n, mf, included_files);
596 memcpy(obj->hash.hash, object_hash->hash, mf->hash_size);
597 obj->hash.size = object_hash->size;
601 * Try to get the object hash from a manifest file. Caller frees. Returns NULL
605 manifest_get(struct conf *conf, const char *manifest_path)
609 struct manifest *mf = NULL;
610 struct hashtable *hashed_files = NULL; /* path --> struct file_hash */
611 struct hashtable *stated_files = NULL; /* path --> struct file_stats */
613 struct file_hash *fh = NULL;
615 fd = open(manifest_path, O_RDONLY | O_BINARY);
618 cc_log("No such manifest file");
621 f = gzdopen(fd, "rb");
624 cc_log("Failed to gzdopen manifest file");
627 mf = read_manifest(f);
629 cc_log("Error reading manifest file");
633 hashed_files = create_hashtable(1000, hash_from_string, strings_equal);
634 stated_files = create_hashtable(1000, hash_from_string, strings_equal);
636 /* Check newest object first since it's a bit more likely to match. */
637 for (i = mf->n_objects; i > 0; i--) {
638 if (verify_object(conf, mf, &mf->objects[i - 1],
639 stated_files, hashed_files)) {
640 fh = x_malloc(sizeof(*fh));
641 *fh = mf->objects[i - 1].hash;
648 hashtable_destroy(hashed_files, 1);
651 hashtable_destroy(stated_files, 1);
663 * Put the object name into a manifest file given a set of included files.
664 * Returns true on success, otherwise false.
667 manifest_put(const char *manifest_path, struct file_hash *object_hash,
668 struct hashtable *included_files)
674 struct manifest *mf = NULL;
675 char *tmp_file = NULL;
678 * We don't bother to acquire a lock when writing the manifest to disk. A
679 * race between two processes will only result in one lost entry, which is
680 * not a big deal, and it's also very unlikely.
683 fd1 = open(manifest_path, O_RDONLY | O_BINARY);
686 mf = create_empty_manifest();
688 gzFile f1 = gzdopen(fd1, "rb");
690 cc_log("Failed to gzdopen manifest file");
694 mf = read_manifest(f1);
697 cc_log("Failed to read manifest file; deleting it");
698 x_unlink(manifest_path);
699 mf = create_empty_manifest();
703 if (mf->n_objects > MAX_MANIFEST_ENTRIES) {
705 * Normally, there shouldn't be many object entries in the manifest since
706 * new entries are added only if an include file has changed but not the
707 * source file, and you typically change source files more often than
708 * header files. However, it's certainly possible to imagine cases where
709 * the manifest will grow large (for instance, a generated header file that
710 * changes for every build), and this must be taken care of since
711 * processing an ever growing manifest eventually will take too much time.
712 * A good way of solving this would be to maintain the object entries in
713 * LRU order and discarding the old ones. An easy way is to throw away all
714 * entries when there are too many. Let's do that for now.
716 cc_log("More than %u entries in manifest file; discarding",
717 MAX_MANIFEST_ENTRIES);
719 mf = create_empty_manifest();
720 } else if (mf->n_file_infos > MAX_MANIFEST_FILE_INFO_ENTRIES) {
721 /* Rarely, file_info entries can grow large in pathological cases where
722 * many included files change, but the main file does not. This also puts
723 * an upper bound on the number of file_info entries.
725 cc_log("More than %u file_info entries in manifest file; discarding",
726 MAX_MANIFEST_FILE_INFO_ENTRIES);
728 mf = create_empty_manifest();
731 tmp_file = format("%s.tmp", manifest_path);
732 fd2 = create_tmp_fd(&tmp_file);
733 f2 = gzdopen(fd2, "wb");
735 cc_log("Failed to gzdopen %s", tmp_file);
739 add_object_entry(mf, object_hash, included_files);
740 if (write_manifest(f2, mf)) {
743 if (x_rename(tmp_file, manifest_path) == 0) {
746 cc_log("Failed to rename %s to %s", tmp_file, manifest_path);
750 cc_log("Failed to write manifest file");
768 manifest_dump(const char *manifest_path, FILE *stream)
770 struct manifest *mf = NULL;
776 fd = open(manifest_path, O_RDONLY | O_BINARY);
778 fprintf(stderr, "No such manifest file: %s\n", manifest_path);
781 f = gzdopen(fd, "rb");
783 fprintf(stderr, "Failed to dzopen manifest file\n");
787 mf = read_manifest(f);
789 fprintf(stderr, "Error reading manifest file\n");
793 fprintf(stream, "Magic: %c%c%c%c\n",
794 (MAGIC >> 24) & 0xFF,
795 (MAGIC >> 16) & 0xFF,
798 fprintf(stream, "Version: %u\n", mf->version);
799 fprintf(stream, "Hash size: %u\n", (unsigned)mf->hash_size);
800 fprintf(stream, "Reserved field: %u\n", (unsigned)mf->reserved);
801 fprintf(stream, "File paths (%u):\n", (unsigned)mf->n_files);
802 for (i = 0; i < mf->n_files; ++i) {
803 fprintf(stream, " %u: %s\n", i, mf->files[i]);
805 fprintf(stream, "File infos (%u):\n", (unsigned)mf->n_file_infos);
806 for (i = 0; i < mf->n_file_infos; ++i) {
808 fprintf(stream, " %u:\n", i);
809 fprintf(stream, " Path index: %u\n", mf->file_infos[i].index);
810 hash = format_hash_as_string(mf->file_infos[i].hash, -1);
811 fprintf(stream, " Hash: %s\n", hash);
813 fprintf(stream, " Size: %u\n", mf->file_infos[i].size);
814 fprintf(stream, " Mtime: %lld\n", (long long)mf->file_infos[i].mtime);
815 fprintf(stream, " Ctime: %lld\n", (long long)mf->file_infos[i].ctime);
817 fprintf(stream, "Results (%u):\n", (unsigned)mf->n_objects);
818 for (i = 0; i < mf->n_objects; ++i) {
820 fprintf(stream, " %u:\n", i);
821 fprintf(stream, " File info indexes:");
822 for (j = 0; j < mf->objects[i].n_file_info_indexes; ++j) {
823 fprintf(stream, " %u", mf->objects[i].file_info_indexes[j]);
825 fprintf(stream, "\n");
826 hash = format_hash_as_string(mf->objects[i].hash.hash, -1);
827 fprintf(stream, " Hash: %s\n", hash);
829 fprintf(stream, " Size: %u\n", (unsigned)mf->objects[i].hash.size);