2 * Copyright (C) 2009-2010 Joel Rosdahl
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 3 of the License, or (at your option)
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 51
16 * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 #include "hashtable_itr.h"
23 #include "murmurhashneutral2.h"
28 * Sketchy specification of the manifest disk format:
30 * <magic> magic number (4 bytes)
31 * <version> file format version (1 byte unsigned int)
32 * <hash_size> size of the hash fields (in bytes) (1 byte unsigned int)
33 * <reserved> reserved for future use (2 bytes)
34 * ----------------------------------------------------------------------------
35 * <n> number of include file paths (4 bytes unsigned int)
36 * <path_0> path to include file (NUL-terminated string,
37 * ... at most 1024 bytes)
39 * ----------------------------------------------------------------------------
40 * <n> number of include file hash entries (4 bytes unsigned int)
41 * <index[0]> index of include file path (4 bytes unsigned int)
42 * <hash[0]> hash of include file (<hash_size> bytes)
43 * <size[0]> size of include file (4 bytes unsigned int)
48 * ----------------------------------------------------------------------------
49 * <n> number of object name entries (4 bytes unsigned int)
50 * <m[0]> number of include file hash indexes (4 bytes unsigned int)
51 * <index[0][0]> include file hash index (4 bytes unsigned int)
54 * <hash[0]> hash part of object name (<hash_size> bytes)
55 * <size[0]> size part of object name (4 bytes unsigned int)
57 * <m[n-1]> number of include file hash indexes
58 * <index[n-1][0]> include file hash index
60 * <index[n-1][m[n-1]]>
65 static const uint32_t MAGIC = 0x63436d46U;
66 static const uint8_t VERSION = 0;
67 static const uint32_t MAX_MANIFEST_ENTRIES = 100;
69 #define static_assert(e) do { enum { static_assert__ = 1/(e) }; } while (0)
72 /* Index to n_files. */
74 /* Hash of referenced file. */
76 /* Size of referenced file. */
81 /* Number of entries in file_info_indexes. */
82 uint32_t n_file_info_indexes;
83 /* Indexes to file_infos. */
84 uint32_t *file_info_indexes;
85 /* Hash of the object itself. */
86 struct file_hash hash;
90 /* Size of hash fields (in bytes). */
93 /* Referenced include files. */
97 /* Information about referenced include files. */
98 uint32_t n_file_infos;
99 struct file_info *file_infos;
101 /* Object names plus references to include file hashes. */
103 struct object *objects;
107 hash_from_file_info(void *key)
109 static_assert(sizeof(struct file_info) == 24); /* No padding. */
110 return murmurhashneutral2(key, sizeof(struct file_info), 0);
114 file_infos_equal(void *key1, void *key2)
116 struct file_info *fi1 = (struct file_info *)key1;
117 struct file_info *fi2 = (struct file_info *)key2;
118 return fi1->index == fi2->index
119 && memcmp(fi1->hash, fi2->hash, 16) == 0
120 && fi1->size == fi2->size;
124 free_manifest(struct manifest *mf)
127 for (i = 0; i < mf->n_files; i++) {
131 free(mf->file_infos);
132 for (i = 0; i < mf->n_objects; i++) {
133 free(mf->objects[i].file_info_indexes);
138 #define READ_INT(size, var) \
143 for (i_ = 0; i_ < (size); i_++) { \
149 (var) |= ch_ & 0xFF; \
153 #define READ_STR(var) \
158 for (i_ = 0; i_ < sizeof(buf_); i_++) { \
168 if (i_ == sizeof(buf_)) { \
171 (var) = x_strdup(buf_); \
174 #define READ_BYTES(n, var) \
178 for (i_ = 0; i_ < (n); i_++) { \
187 static struct manifest *
188 create_empty_manifest(void)
192 mf = x_malloc(sizeof(*mf));
196 mf->n_file_infos = 0;
197 mf->file_infos = NULL;
204 static struct manifest *
205 read_manifest(gzFile f)
213 mf = create_empty_manifest();
216 if (magic != MAGIC) {
217 cc_log("Manifest file has bad magic number %u", magic);
221 READ_INT(1, version);
222 if (version != VERSION) {
223 cc_log("Manifest file has unknown version %u", version);
228 READ_INT(1, mf->hash_size);
229 if (mf->hash_size != 16) {
230 /* Temporary measure until we support different hash algorithms. */
231 cc_log("Manifest file has unsupported hash size %u", mf->hash_size);
238 READ_INT(4, mf->n_files);
239 mf->files = x_calloc(mf->n_files, sizeof(*mf->files));
240 for (i = 0; i < mf->n_files; i++) {
241 READ_STR(mf->files[i]);
244 READ_INT(4, mf->n_file_infos);
245 mf->file_infos = x_calloc(mf->n_file_infos, sizeof(*mf->file_infos));
246 for (i = 0; i < mf->n_file_infos; i++) {
247 READ_INT(4, mf->file_infos[i].index);
248 READ_BYTES(mf->hash_size, mf->file_infos[i].hash);
249 READ_INT(4, mf->file_infos[i].size);
252 READ_INT(4, mf->n_objects);
253 mf->objects = x_calloc(mf->n_objects, sizeof(*mf->objects));
254 for (i = 0; i < mf->n_objects; i++) {
255 READ_INT(4, mf->objects[i].n_file_info_indexes);
256 mf->objects[i].file_info_indexes =
257 x_calloc(mf->objects[i].n_file_info_indexes,
258 sizeof(*mf->objects[i].file_info_indexes));
259 for (j = 0; j < mf->objects[i].n_file_info_indexes; j++) {
260 READ_INT(4, mf->objects[i].file_info_indexes[j]);
262 READ_BYTES(mf->hash_size, mf->objects[i].hash.hash);
263 READ_INT(4, mf->objects[i].hash.size);
269 cc_log("Corrupt manifest file");
274 #define WRITE_INT(size, var) \
278 for (i_ = 0; i_ < (size); i_++) { \
279 ch_ = ((var) >> (8 * ((size) - i_ - 1))); \
280 if (gzputc(f, ch_) == EOF) { \
286 #define WRITE_STR(var) \
288 if (gzputs(f, var) == EOF || gzputc(f, '\0') == EOF) { \
293 #define WRITE_BYTES(n, var) \
296 for (i_ = 0; i_ < (n); i_++) { \
297 if (gzputc(f, (var)[i_]) == EOF) { \
304 write_manifest(gzFile f, const struct manifest *mf)
309 WRITE_INT(1, VERSION);
313 WRITE_INT(4, mf->n_files);
314 for (i = 0; i < mf->n_files; i++) {
315 WRITE_STR(mf->files[i]);
318 WRITE_INT(4, mf->n_file_infos);
319 for (i = 0; i < mf->n_file_infos; i++) {
320 WRITE_INT(4, mf->file_infos[i].index);
321 WRITE_BYTES(mf->hash_size, mf->file_infos[i].hash);
322 WRITE_INT(4, mf->file_infos[i].size);
325 WRITE_INT(4, mf->n_objects);
326 for (i = 0; i < mf->n_objects; i++) {
327 WRITE_INT(4, mf->objects[i].n_file_info_indexes);
328 for (j = 0; j < mf->objects[i].n_file_info_indexes; j++) {
329 WRITE_INT(4, mf->objects[i].file_info_indexes[j]);
331 WRITE_BYTES(mf->hash_size, mf->objects[i].hash.hash);
332 WRITE_INT(4, mf->objects[i].hash.size);
338 cc_log("Error writing to manifest file");
343 verify_object(struct manifest *mf, struct object *obj,
344 struct hashtable *hashed_files)
347 struct file_info *fi;
348 struct file_hash *actual;
352 for (i = 0; i < obj->n_file_info_indexes; i++) {
353 fi = &mf->file_infos[obj->file_info_indexes[i]];
354 actual = hashtable_search(hashed_files, mf->files[fi->index]);
356 actual = x_malloc(sizeof(*actual));
358 result = hash_source_code_file(&hash, mf->files[fi->index]);
359 if (result & HASH_SOURCE_CODE_ERROR) {
360 cc_log("Failed hashing %s", mf->files[fi->index]);
364 if (result & HASH_SOURCE_CODE_FOUND_TIME) {
368 hash_result_as_bytes(&hash, actual->hash);
369 actual->size = hash.totalN;
370 hashtable_insert(hashed_files, x_strdup(mf->files[fi->index]), actual);
372 if (memcmp(fi->hash, actual->hash, mf->hash_size) != 0
373 || fi->size != actual->size) {
381 static struct hashtable *
382 create_string_index_map(char **strings, uint32_t len)
388 h = create_hashtable(1000, hash_from_string, strings_equal);
389 for (i = 0; i < len; i++) {
390 index = x_malloc(sizeof(*index));
392 hashtable_insert(h, x_strdup(strings[i]), index);
397 static struct hashtable *
398 create_file_info_index_map(struct file_info *infos, uint32_t len)
402 struct file_info *fi;
405 h = create_hashtable(1000, hash_from_file_info, file_infos_equal);
406 for (i = 0; i < len; i++) {
407 fi = x_malloc(sizeof(*fi));
409 index = x_malloc(sizeof(*index));
411 hashtable_insert(h, fi, index);
417 get_include_file_index(struct manifest *mf, char *path,
418 struct hashtable *mf_files)
423 index = hashtable_search(mf_files, path);
429 mf->files = x_realloc(mf->files, (n + 1) * sizeof(*mf->files));
431 mf->files[n] = x_strdup(path);
437 get_file_hash_index(struct manifest *mf,
439 struct file_hash *file_hash,
440 struct hashtable *mf_files,
441 struct hashtable *mf_file_infos)
447 fi.index = get_include_file_index(mf, path, mf_files);
448 memcpy(fi.hash, file_hash->hash, sizeof(fi.hash));
449 fi.size = file_hash->size;
451 fi_index = hashtable_search(mf_file_infos, &fi);
456 n = mf->n_file_infos;
457 mf->file_infos = x_realloc(mf->file_infos, (n + 1) * sizeof(*mf->file_infos));
459 mf->file_infos[n] = fi;
465 add_file_info_indexes(uint32_t *indexes, uint32_t size,
466 struct manifest *mf, struct hashtable *included_files)
468 struct hashtable_itr *iter;
471 struct file_hash *file_hash;
472 struct hashtable *mf_files; /* path --> index */
473 struct hashtable *mf_file_infos; /* struct file_info --> index */
479 mf_files = create_string_index_map(mf->files, mf->n_files);
480 mf_file_infos = create_file_info_index_map(mf->file_infos, mf->n_file_infos);
481 iter = hashtable_iterator(included_files);
484 path = hashtable_iterator_key(iter);
485 file_hash = hashtable_iterator_value(iter);
486 indexes[i] = get_file_hash_index(mf, path, file_hash, mf_files,
489 } while (hashtable_iterator_advance(iter));
492 hashtable_destroy(mf_file_infos, 1);
493 hashtable_destroy(mf_files, 1);
497 add_object_entry(struct manifest *mf,
498 struct file_hash *object_hash,
499 struct hashtable *included_files)
505 mf->objects = x_realloc(mf->objects, (n + 1) * sizeof(*mf->objects));
507 obj = &mf->objects[n];
509 n = hashtable_count(included_files);
510 obj->n_file_info_indexes = n;
511 obj->file_info_indexes = x_malloc(n * sizeof(*obj->file_info_indexes));
512 add_file_info_indexes(obj->file_info_indexes, n, mf, included_files);
513 memcpy(obj->hash.hash, object_hash->hash, mf->hash_size);
514 obj->hash.size = object_hash->size;
518 * Try to get the object hash from a manifest file. Caller frees. Returns NULL
522 manifest_get(const char *manifest_path)
526 struct manifest *mf = NULL;
527 struct hashtable *hashed_files = NULL; /* path --> struct file_hash */
529 struct file_hash *fh = NULL;
531 fd = open(manifest_path, O_RDONLY | O_BINARY);
534 cc_log("No such manifest file");
537 f = gzdopen(fd, "rb");
540 cc_log("Failed to gzdopen manifest file");
543 mf = read_manifest(f);
545 cc_log("Error reading manifest file");
549 hashed_files = create_hashtable(1000, hash_from_string, strings_equal);
551 /* Check newest object first since it's a bit more likely to match. */
552 for (i = mf->n_objects; i > 0; i--) {
553 if (verify_object(mf, &mf->objects[i - 1], hashed_files)) {
554 fh = x_malloc(sizeof(*fh));
555 *fh = mf->objects[i - 1].hash;
562 hashtable_destroy(hashed_files, 1);
574 * Put the object name into a manifest file given a set of included files.
575 * Returns true on success, otherwise false.
578 manifest_put(const char *manifest_path, struct file_hash *object_hash,
579 struct hashtable *included_files)
585 struct manifest *mf = NULL;
586 char *tmp_file = NULL;
589 * We don't bother to acquire a lock when writing the manifest to disk. A
590 * race between two processes will only result in one lost entry, which is
591 * not a big deal, and it's also very unlikely.
594 fd1 = open(manifest_path, O_RDONLY | O_BINARY);
597 mf = create_empty_manifest();
599 gzFile f1 = gzdopen(fd1, "rb");
601 cc_log("Failed to gzdopen manifest file");
605 mf = read_manifest(f1);
608 cc_log("Failed to read manifest file; deleting it");
609 x_unlink(manifest_path);
610 mf = create_empty_manifest();
614 if (mf->n_objects > MAX_MANIFEST_ENTRIES) {
616 * Normally, there shouldn't be many object entries in the manifest since
617 * new entries are added only if an include file has changed but not the
618 * source file, and you typically change source files more often than
619 * header files. However, it's certainly possible to imagine cases where
620 * the manifest will grow large (for instance, a generated header file that
621 * changes for every build), and this must be taken care of since
622 * processing an ever growing manifest eventually will take too much time.
623 * A good way of solving this would be to maintain the object entries in
624 * LRU order and discarding the old ones. An easy way is to throw away all
625 * entries when there are too many. Let's do that for now.
627 cc_log("More than %u entries in manifest file; discarding",
628 MAX_MANIFEST_ENTRIES);
630 mf = create_empty_manifest();
633 tmp_file = format("%s.tmp.%s", manifest_path, tmp_string());
634 fd2 = safe_open(tmp_file);
636 cc_log("Failed to open %s", tmp_file);
639 f2 = gzdopen(fd2, "wb");
641 cc_log("Failed to gzdopen %s", tmp_file);
645 add_object_entry(mf, object_hash, included_files);
646 if (write_manifest(f2, mf)) {
649 if (x_rename(tmp_file, manifest_path) == 0) {
652 cc_log("Failed to rename %s to %s", tmp_file, manifest_path);
656 cc_log("Failed to write manifest file");