manifest.c

   1 /*
   2  * Copyright (C) 2009-2010 Joel Rosdahl
   3  *
   4  * This program is free software; you can redistribute it and/or modify it
   5  * under the terms of the GNU General Public License as published by the Free
   6  * Software Foundation; either version 3 of the License, or (at your option)
   7  * any later version.
   8  *
   9  * This program is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12  * more details.
  13  *
  14  * You should have received a copy of the GNU General Public License along with
  15  * this program; if not, write to the Free Software Foundation, Inc., 51
  16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17  */
  18
  19 #include "ccache.h"
  20 #include "hashtable_itr.h"
  21 #include "hashutil.h"
  22 #include "manifest.h"
  23 #include "murmurhashneutral2.h"
  24
  25 #include <zlib.h>
  26
  27 /*
  28  * Sketchy specification of the manifest disk format:
  29  *
  30  * <magic>         magic number                        (4 bytes)
  31  * <version>       file format version                 (1 byte unsigned int)
  32  * <hash_size>     size of the hash fields (in bytes)  (1 byte unsigned int)
  33  * <reserved>      reserved for future use             (2 bytes)
  34  * ----------------------------------------------------------------------------
  35  * <n>             number of include file paths        (4 bytes unsigned int)
  36  * <path_0>        path to include file                (NUL-terminated string,
  37  * ...                                                  at most 1024 bytes)
  38  * <path_n-1>
  39  * ----------------------------------------------------------------------------
  40  * <n>             number of include file hash entries (4 bytes unsigned int)
  41  * <index[0]>      index of include file path          (4 bytes unsigned int)
  42  * <hash[0]>       hash of include file                (<hash_size> bytes)
  43  * <size[0]>       size of include file                (4 bytes unsigned int)
  44  * ...
  45  * <hash[n-1]>
  46  * <size[n-1]>
  47  * <index[n-1]>
  48  * ----------------------------------------------------------------------------
  49  * <n>             number of object name entries       (4 bytes unsigned int)
  50  * <m[0]>          number of include file hash indexes (4 bytes unsigned int)
  51  * <index[0][0]>   include file hash index             (4 bytes unsigned int)
  52  * ...
  53  * <index[0][m[0]-1]>
  54  * <hash[0]>       hash part of object name            (<hash_size> bytes)
  55  * <size[0]>       size part of object name            (4 bytes unsigned int)
  56  * ...
  57  * <m[n-1]>        number of include file hash indexes
  58  * <index[n-1][0]> include file hash index
  59  * ...
  60  * <index[n-1][m[n-1]]>
  61  * <hash[n-1]>
  62  * <size[n-1]>
  63  */
  64
  65 static const uint32_t MAGIC = 0x63436d46U;
  66 static const uint8_t  VERSION = 0;
  67 static const uint32_t MAX_MANIFEST_ENTRIES = 100;
  68
  69 #define static_assert(e) do { enum { static_assert__ = 1/(e) }; } while (0)
  70
  71 struct file_info {
  72         /* Index to n_files. */
  73         uint32_t index;
  74         /* Hash of referenced file. */
  75         uint8_t hash[16];
  76         /* Size of referenced file. */
  77         uint32_t size;
  78 };
  79
  80 struct object {
  81         /* Number of entries in file_info_indexes. */
  82         uint32_t n_file_info_indexes;
  83         /* Indexes to file_infos. */
  84         uint32_t *file_info_indexes;
  85         /* Hash of the object itself. */
  86         struct file_hash hash;
  87 };
  88
  89 struct manifest {
  90         /* Size of hash fields (in bytes). */
  91         uint8_t hash_size;
  92
  93         /* Referenced include files. */
  94         uint32_t n_files;
  95         char **files;
  96
  97         /* Information about referenced include files. */
  98         uint32_t n_file_infos;
  99         struct file_info *file_infos;
 100
 101         /* Object names plus references to include file hashes. */
 102         uint32_t n_objects;
 103         struct object *objects;
 104 };
 105
 106 static unsigned int
 107 hash_from_file_info(void *key)
 108 {
 109         static_assert(sizeof(struct file_info) == 24); /* No padding. */
 110         return murmurhashneutral2(key, sizeof(struct file_info), 0);
 111 }
 112
 113 static int
 114 file_infos_equal(void *key1, void *key2)
 115 {
 116         struct file_info *fi1 = (struct file_info *)key1;
 117         struct file_info *fi2 = (struct file_info *)key2;
 118         return fi1->index == fi2->index
 119                && memcmp(fi1->hash, fi2->hash, 16) == 0
 120                && fi1->size == fi2->size;
 121 }
 122
 123 static void
 124 free_manifest(struct manifest *mf)
 125 {
 126         uint16_t i;
 127         for (i = 0; i < mf->n_files; i++) {
 128                 free(mf->files[i]);
 129         }
 130         free(mf->files);
 131         free(mf->file_infos);
 132         for (i = 0; i < mf->n_objects; i++) {
 133                 free(mf->objects[i].file_info_indexes);
 134         }
 135         free(mf->objects);
 136 }
 137
 138 #define READ_INT(size, var) \
 139         do { \
 140                 int ch_; \
 141                 size_t i_; \
 142                 (var) = 0; \
 143                 for (i_ = 0; i_ < (size); i_++) { \
 144                         ch_ = gzgetc(f); \
 145                         if (ch_ == EOF) { \
 146                                 goto error; \
 147                         } \
 148                         (var) <<= 8; \
 149                         (var) |= ch_ & 0xFF; \
 150                 } \
 151         } while (0)
 152
 153 #define READ_STR(var) \
 154         do { \
 155                 char buf_[1024]; \
 156                 size_t i_; \
 157                 int ch_; \
 158                 for (i_ = 0; i_ < sizeof(buf_); i_++) { \
 159                         ch_ = gzgetc(f); \
 160                         if (ch_ == EOF) { \
 161                                 goto error; \
 162                         } \
 163                         buf_[i_] = ch_; \
 164                         if (ch_ == '\0') { \
 165                                 break; \
 166                         } \
 167                 } \
 168                 if (i_ == sizeof(buf_)) { \
 169                         goto error; \
 170                 } \
 171                 (var) = x_strdup(buf_); \
 172         } while (0)
 173
 174 #define READ_BYTES(n, var) \
 175         do { \
 176                 size_t i_; \
 177                 int ch_; \
 178                 for (i_ = 0; i_ < (n); i_++) { \
 179                         ch_ = gzgetc(f); \
 180                         if (ch_ == EOF) { \
 181                                 goto error; \
 182                         } \
 183                         (var)[i_] = ch_; \
 184                 } \
 185         } while (0)
 186
 187 static struct manifest *
 188 create_empty_manifest(void)
 189 {
 190         struct manifest *mf;
 191
 192         mf = x_malloc(sizeof(*mf));
 193         mf->hash_size = 16;
 194         mf->n_files = 0;
 195         mf->files = NULL;
 196         mf->n_file_infos = 0;
 197         mf->file_infos = NULL;
 198         mf->n_objects = 0;
 199         mf->objects = NULL;
 200
 201         return mf;
 202 }
 203
 204 static struct manifest *
 205 read_manifest(gzFile f)
 206 {
 207         struct manifest *mf;
 208         uint16_t i, j;
 209         uint32_t magic;
 210         uint8_t version;
 211         uint16_t dummy;
 212
 213         mf = create_empty_manifest();
 214
 215         READ_INT(4, magic);
 216         if (magic != MAGIC) {
 217                 cc_log("Manifest file has bad magic number %u", magic);
 218                 free_manifest(mf);
 219                 return NULL;
 220         }
 221         READ_INT(1, version);
 222         if (version != VERSION) {
 223                 cc_log("Manifest file has unknown version %u", version);
 224                 free_manifest(mf);
 225                 return NULL;
 226         }
 227
 228         READ_INT(1, mf->hash_size);
 229         if (mf->hash_size != 16) {
 230                 /* Temporary measure until we support different hash algorithms. */
 231                 cc_log("Manifest file has unsupported hash size %u", mf->hash_size);
 232                 free_manifest(mf);
 233                 return NULL;
 234         }
 235
 236         READ_INT(2, dummy);
 237
 238         READ_INT(4, mf->n_files);
 239         mf->files = x_calloc(mf->n_files, sizeof(*mf->files));
 240         for (i = 0; i < mf->n_files; i++) {
 241                 READ_STR(mf->files[i]);
 242         }
 243
 244         READ_INT(4, mf->n_file_infos);
 245         mf->file_infos = x_calloc(mf->n_file_infos, sizeof(*mf->file_infos));
 246         for (i = 0; i < mf->n_file_infos; i++) {
 247                 READ_INT(4, mf->file_infos[i].index);
 248                 READ_BYTES(mf->hash_size, mf->file_infos[i].hash);
 249                 READ_INT(4, mf->file_infos[i].size);
 250         }
 251
 252         READ_INT(4, mf->n_objects);
 253         mf->objects = x_calloc(mf->n_objects, sizeof(*mf->objects));
 254         for (i = 0; i < mf->n_objects; i++) {
 255                 READ_INT(4, mf->objects[i].n_file_info_indexes);
 256                 mf->objects[i].file_info_indexes =
 257                         x_calloc(mf->objects[i].n_file_info_indexes,
 258                                  sizeof(*mf->objects[i].file_info_indexes));
 259                 for (j = 0; j < mf->objects[i].n_file_info_indexes; j++) {
 260                         READ_INT(4, mf->objects[i].file_info_indexes[j]);
 261                 }
 262                 READ_BYTES(mf->hash_size, mf->objects[i].hash.hash);
 263                 READ_INT(4, mf->objects[i].hash.size);
 264         }
 265
 266         return mf;
 267
 268 error:
 269         cc_log("Corrupt manifest file");
 270         free_manifest(mf);
 271         return NULL;
 272 }
 273
 274 #define WRITE_INT(size, var) \
 275         do { \
 276                 uint8_t ch_; \
 277                 size_t i_; \
 278                 for (i_ = 0; i_ < (size); i_++) { \
 279                         ch_ = ((var) >> (8 * ((size) - i_ - 1))); \
 280                         if (gzputc(f, ch_) == EOF) { \
 281                                 goto error; \
 282                         } \
 283                 } \
 284         } while (0)
 285
 286 #define WRITE_STR(var) \
 287         do { \
 288                 if (gzputs(f, var) == EOF || gzputc(f, '\0') == EOF) { \
 289                         goto error; \
 290                 } \
 291         } while (0)
 292
 293 #define WRITE_BYTES(n, var) \
 294         do { \
 295                 size_t i_; \
 296                 for (i_ = 0; i_ < (n); i_++) { \
 297                         if (gzputc(f, (var)[i_]) == EOF) { \
 298                                 goto error; \
 299                         } \
 300                 } \
 301         } while (0)
 302
 303 static int
 304 write_manifest(gzFile f, const struct manifest *mf)
 305 {
 306         uint16_t i, j;
 307
 308         WRITE_INT(4, MAGIC);
 309         WRITE_INT(1, VERSION);
 310         WRITE_INT(1, 16);
 311         WRITE_INT(2, 0);
 312
 313         WRITE_INT(4, mf->n_files);
 314         for (i = 0; i < mf->n_files; i++) {
 315                 WRITE_STR(mf->files[i]);
 316         }
 317
 318         WRITE_INT(4, mf->n_file_infos);
 319         for (i = 0; i < mf->n_file_infos; i++) {
 320                 WRITE_INT(4, mf->file_infos[i].index);
 321                 WRITE_BYTES(mf->hash_size, mf->file_infos[i].hash);
 322                 WRITE_INT(4, mf->file_infos[i].size);
 323         }
 324
 325         WRITE_INT(4, mf->n_objects);
 326         for (i = 0; i < mf->n_objects; i++) {
 327                 WRITE_INT(4, mf->objects[i].n_file_info_indexes);
 328                 for (j = 0; j < mf->objects[i].n_file_info_indexes; j++) {
 329                         WRITE_INT(4, mf->objects[i].file_info_indexes[j]);
 330                 }
 331                 WRITE_BYTES(mf->hash_size, mf->objects[i].hash.hash);
 332                 WRITE_INT(4, mf->objects[i].hash.size);
 333         }
 334
 335         return 1;
 336
 337 error:
 338         cc_log("Error writing to manifest file");
 339         return 0;
 340 }
 341
 342 static int
 343 verify_object(struct manifest *mf, struct object *obj,
 344               struct hashtable *hashed_files)
 345 {
 346         uint32_t i;
 347         struct file_info *fi;
 348         struct file_hash *actual;
 349         struct mdfour hash;
 350         int result;
 351
 352         for (i = 0; i < obj->n_file_info_indexes; i++) {
 353                 fi = &mf->file_infos[obj->file_info_indexes[i]];
 354                 actual = hashtable_search(hashed_files, mf->files[fi->index]);
 355                 if (!actual) {
 356                         actual = x_malloc(sizeof(*actual));
 357                         hash_start(&hash);
 358                         result = hash_source_code_file(&hash, mf->files[fi->index]);
 359                         if (result & HASH_SOURCE_CODE_ERROR) {
 360                                 cc_log("Failed hashing %s", mf->files[fi->index]);
 361                                 free(actual);
 362                                 return 0;
 363                         }
 364                         if (result & HASH_SOURCE_CODE_FOUND_TIME) {
 365                                 free(actual);
 366                                 return 0;
 367                         }
 368                         hash_result_as_bytes(&hash, actual->hash);
 369                         actual->size = hash.totalN;
 370                         hashtable_insert(hashed_files, x_strdup(mf->files[fi->index]), actual);
 371                 }
 372                 if (memcmp(fi->hash, actual->hash, mf->hash_size) != 0
 373                     || fi->size != actual->size) {
 374                         return 0;
 375                 }
 376         }
 377
 378         return 1;
 379 }
 380
 381 static struct hashtable *
 382 create_string_index_map(char **strings, uint32_t len)
 383 {
 384         uint32_t i;
 385         struct hashtable *h;
 386         uint32_t *index;
 387
 388         h = create_hashtable(1000, hash_from_string, strings_equal);
 389         for (i = 0; i < len; i++) {
 390                 index = x_malloc(sizeof(*index));
 391                 *index = i;
 392                 hashtable_insert(h, x_strdup(strings[i]), index);
 393         }
 394         return h;
 395 }
 396
 397 static struct hashtable *
 398 create_file_info_index_map(struct file_info *infos, uint32_t len)
 399 {
 400         uint32_t i;
 401         struct hashtable *h;
 402         struct file_info *fi;
 403         uint32_t *index;
 404
 405         h = create_hashtable(1000, hash_from_file_info, file_infos_equal);
 406         for (i = 0; i < len; i++) {
 407                 fi = x_malloc(sizeof(*fi));
 408                 *fi = infos[i];
 409                 index = x_malloc(sizeof(*index));
 410                 *index = i;
 411                 hashtable_insert(h, fi, index);
 412         }
 413         return h;
 414 }
 415
 416 static uint32_t
 417 get_include_file_index(struct manifest *mf, char *path,
 418                        struct hashtable *mf_files)
 419 {
 420         uint32_t *index;
 421         uint32_t n;
 422
 423         index = hashtable_search(mf_files, path);
 424         if (index) {
 425                 return *index;
 426         }
 427
 428         n = mf->n_files;
 429         mf->files = x_realloc(mf->files, (n + 1) * sizeof(*mf->files));
 430         mf->n_files++;
 431         mf->files[n] = x_strdup(path);
 432
 433         return n;
 434 }
 435
 436 static uint32_t
 437 get_file_hash_index(struct manifest *mf,
 438                     char *path,
 439                     struct file_hash *file_hash,
 440                     struct hashtable *mf_files,
 441                     struct hashtable *mf_file_infos)
 442 {
 443         struct file_info fi;
 444         uint32_t *fi_index;
 445         uint32_t n;
 446
 447         fi.index = get_include_file_index(mf, path, mf_files);
 448         memcpy(fi.hash, file_hash->hash, sizeof(fi.hash));
 449         fi.size = file_hash->size;
 450
 451         fi_index = hashtable_search(mf_file_infos, &fi);
 452         if (fi_index) {
 453                 return *fi_index;
 454         }
 455
 456         n = mf->n_file_infos;
 457         mf->file_infos = x_realloc(mf->file_infos, (n + 1) * sizeof(*mf->file_infos));
 458         mf->n_file_infos++;
 459         mf->file_infos[n] = fi;
 460
 461         return n;
 462 }
 463
 464 static void
 465 add_file_info_indexes(uint32_t *indexes, uint32_t size,
 466                       struct manifest *mf, struct hashtable *included_files)
 467 {
 468         struct hashtable_itr *iter;
 469         uint32_t i;
 470         char *path;
 471         struct file_hash *file_hash;
 472         struct hashtable *mf_files; /* path --> index */
 473         struct hashtable *mf_file_infos; /* struct file_info --> index */
 474
 475         if (size == 0) {
 476                 return;
 477         }
 478
 479         mf_files = create_string_index_map(mf->files, mf->n_files);
 480         mf_file_infos = create_file_info_index_map(mf->file_infos, mf->n_file_infos);
 481         iter = hashtable_iterator(included_files);
 482         i = 0;
 483         do {
 484                 path = hashtable_iterator_key(iter);
 485                 file_hash = hashtable_iterator_value(iter);
 486                 indexes[i] = get_file_hash_index(mf, path, file_hash, mf_files,
 487                                                  mf_file_infos);
 488                 i++;
 489         } while (hashtable_iterator_advance(iter));
 490         assert(i == size);
 491
 492         hashtable_destroy(mf_file_infos, 1);
 493         hashtable_destroy(mf_files, 1);
 494 }
 495
 496 static void
 497 add_object_entry(struct manifest *mf,
 498                  struct file_hash *object_hash,
 499                  struct hashtable *included_files)
 500 {
 501         struct object *obj;
 502         uint32_t n;
 503
 504         n = mf->n_objects;
 505         mf->objects = x_realloc(mf->objects, (n + 1) * sizeof(*mf->objects));
 506         mf->n_objects++;
 507         obj = &mf->objects[n];
 508
 509         n = hashtable_count(included_files);
 510         obj->n_file_info_indexes = n;
 511         obj->file_info_indexes = x_malloc(n * sizeof(*obj->file_info_indexes));
 512         add_file_info_indexes(obj->file_info_indexes, n, mf, included_files);
 513         memcpy(obj->hash.hash, object_hash->hash, mf->hash_size);
 514         obj->hash.size = object_hash->size;
 515 }
 516
 517 /*
 518  * Try to get the object hash from a manifest file. Caller frees. Returns NULL
 519  * on failure.
 520  */
 521 struct file_hash *
 522 manifest_get(const char *manifest_path)
 523 {
 524         int fd;
 525         gzFile f = NULL;
 526         struct manifest *mf = NULL;
 527         struct hashtable *hashed_files = NULL; /* path --> struct file_hash */
 528         uint32_t i;
 529         struct file_hash *fh = NULL;
 530
 531         fd = open(manifest_path, O_RDONLY | O_BINARY);
 532         if (fd == -1) {
 533                 /* Cache miss. */
 534                 cc_log("No such manifest file");
 535                 goto out;
 536         }
 537         f = gzdopen(fd, "rb");
 538         if (!f) {
 539                 close(fd);
 540                 cc_log("Failed to gzdopen manifest file");
 541                 goto out;
 542         }
 543         mf = read_manifest(f);
 544         if (!mf) {
 545                 cc_log("Error reading manifest file");
 546                 goto out;
 547         }
 548
 549         hashed_files = create_hashtable(1000, hash_from_string, strings_equal);
 550
 551         /* Check newest object first since it's a bit more likely to match. */
 552         for (i = mf->n_objects; i > 0; i--) {
 553                 if (verify_object(mf, &mf->objects[i - 1], hashed_files)) {
 554                         fh = x_malloc(sizeof(*fh));
 555                         *fh = mf->objects[i - 1].hash;
 556                         goto out;
 557                 }
 558         }
 559
 560 out:
 561         if (hashed_files) {
 562                 hashtable_destroy(hashed_files, 1);
 563         }
 564         if (f) {
 565                 gzclose(f);
 566         }
 567         if (mf) {
 568                 free_manifest(mf);
 569         }
 570         return fh;
 571 }
 572
 573 /*
 574  * Put the object name into a manifest file given a set of included files.
 575  * Returns true on success, otherwise false.
 576  */
 577 bool
 578 manifest_put(const char *manifest_path, struct file_hash *object_hash,
 579              struct hashtable *included_files)
 580 {
 581         int ret = 0;
 582         int fd1;
 583         int fd2;
 584         gzFile f2 = NULL;
 585         struct manifest *mf = NULL;
 586         char *tmp_file = NULL;
 587
 588         /*
 589          * We don't bother to acquire a lock when writing the manifest to disk. A
 590          * race between two processes will only result in one lost entry, which is
 591          * not a big deal, and it's also very unlikely.
 592          */
 593
 594         fd1 = open(manifest_path, O_RDONLY | O_BINARY);
 595         if (fd1 == -1) {
 596                 /* New file. */
 597                 mf = create_empty_manifest();
 598         } else {
 599                 gzFile f1 = gzdopen(fd1, "rb");
 600                 if (!f1) {
 601                         cc_log("Failed to gzdopen manifest file");
 602                         close(fd1);
 603                         goto out;
 604                 }
 605                 mf = read_manifest(f1);
 606                 gzclose(f1);
 607                 if (!mf) {
 608                         cc_log("Failed to read manifest file; deleting it");
 609                         x_unlink(manifest_path);
 610                         mf = create_empty_manifest();
 611                 }
 612         }
 613
 614         if (mf->n_objects > MAX_MANIFEST_ENTRIES) {
 615                 /*
 616                  * Normally, there shouldn't be many object entries in the manifest since
 617                  * new entries are added only if an include file has changed but not the
 618                  * source file, and you typically change source files more often than
 619                  * header files. However, it's certainly possible to imagine cases where
 620                  * the manifest will grow large (for instance, a generated header file that
 621                  * changes for every build), and this must be taken care of since
 622                  * processing an ever growing manifest eventually will take too much time.
 623                  * A good way of solving this would be to maintain the object entries in
 624                  * LRU order and discarding the old ones. An easy way is to throw away all
 625                  * entries when there are too many. Let's do that for now.
 626                  */
 627                 cc_log("More than %u entries in manifest file; discarding",
 628                        MAX_MANIFEST_ENTRIES);
 629                 free_manifest(mf);
 630                 mf = create_empty_manifest();
 631         }
 632
 633         tmp_file = format("%s.tmp.%s", manifest_path, tmp_string());
 634         fd2 = safe_open(tmp_file);
 635         if (fd2 == -1) {
 636                 cc_log("Failed to open %s", tmp_file);
 637                 goto out;
 638         }
 639         f2 = gzdopen(fd2, "wb");
 640         if (!f2) {
 641                 cc_log("Failed to gzdopen %s", tmp_file);
 642                 goto out;
 643         }
 644
 645         add_object_entry(mf, object_hash, included_files);
 646         if (write_manifest(f2, mf)) {
 647                 gzclose(f2);
 648                 f2 = NULL;
 649                 if (x_rename(tmp_file, manifest_path) == 0) {
 650                         ret = 1;
 651                 } else {
 652                         cc_log("Failed to rename %s to %s", tmp_file, manifest_path);
 653                         goto out;
 654                 }
 655         } else {
 656                 cc_log("Failed to write manifest file");
 657                 goto out;
 658         }
 659
 660 out:
 661         if (mf) {
 662                 free_manifest(mf);
 663         }
 664         if (tmp_file) {
 665                 free(tmp_file);
 666         }
 667         if (f2) {
 668                 gzclose(f2);
 669         }
 670         return ret;
 671 }