1 // Copyright (C) 2009-2020 Joel Rosdahl and other contributors
3 // See doc/AUTHORS.adoc for a complete list of contributors.
5 // This program is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License as published by the Free
7 // Software Foundation; either version 3 of the License, or (at your option)
10 // This program is distributed in the hope that it will be useful, but WITHOUT
11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 // You should have received a copy of the GNU General Public License along with
16 // this program; if not, write to the Free Software Foundation, Inc., 51
17 // Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 #include "Manifest.hpp"
21 #include "AtomicFile.hpp"
22 #include "CacheEntryReader.hpp"
23 #include "CacheEntryWriter.hpp"
24 #include "Checksum.hpp"
26 #include "Context.hpp"
30 #include "Logging.hpp"
31 #include "StdMakeUnique.hpp"
33 #include "hashutil.hpp"
35 // Manifest data format
36 // ====================
38 // Integers are big-endian.
40 // <manifest> ::= <header> <body> <epilogue
41 // <header> ::= <magic> <version> <compr_type> <compr_level>
43 // <magic> ::= 4 bytes ("cCrS")
44 // <version> ::= uint8_t
45 // <compr_type> ::= <compr_none> | <compr_zstd>
46 // <compr_none> ::= 0 (uint8_t)
47 // <compr_zstd> ::= 1 (uint8_t)
48 // <compr_level> ::= int8_t
49 // <content_len> ::= uint64_t ; size of file if stored uncompressed
50 // <body> ::= <paths> <includes> <results> ; body is potentially
52 // <paths> ::= <n_paths> <path_entry>*
53 // <n_paths> ::= uint32_t
54 // <path_entry> ::= <path_len> <path>
55 // <path_len> ::= uint16_t
56 // <path> ::= path_len bytes
57 // <includes> ::= <n_includes> <include_entry>*
58 // <n_includes> ::= uint32_t
59 // <include_entry> ::= <path_index> <digest> <fsize> <mtime> <ctime>
60 // <path_index> ::= uint32_t
61 // <digest> ::= Digest::size() bytes
62 // <fsize> ::= uint64_t ; file size
63 // <mtime> ::= int64_t ; modification time
64 // <ctime> ::= int64_t ; status change time
65 // <results> ::= <n_results> <result>*
66 // <n_results> ::= uint32_t
67 // <result> ::= <n_indexes> <include_index>* <name>
68 // <n_indexes> ::= uint32_t
69 // <include_index> ::= uint32_t
70 // <name> ::= Digest::size() bytes
71 // <epilogue> ::= <checksum>
72 // <checksum> ::= uint64_t ; XXH3 of content bytes
74 // Sketch of concrete layout:
78 // <compr_type> 1 byte
79 // <compr_level> 1 byte
80 // <content_len> 8 bytes
81 // --- [potentially compressed from here] -------------------------------------
84 // <path> path_len bytes
86 // ----------------------------------------------------------------------------
87 // <n_includes> 4 bytes
88 // <path_index> 4 bytes
89 // <digest> Digest::size() bytes
94 // ----------------------------------------------------------------------------
95 // <n_results> 4 bytes
96 // <n_indexes> 4 bytes
97 // <include_index> 4 bytes
99 // <name> Digest::size() bytes
107 // 1: Introduced in ccache 3.0. (Files are always compressed with gzip.)
108 // 2: Introduced in ccache 4.0.
111 using nonstd::nullopt;
112 using nonstd::optional;
114 const uint32_t k_max_manifest_entries = 100;
115 const uint32_t k_max_manifest_file_info_entries = 10000;
123 // Digest of referenced file.
125 // Size of referenced file.
127 // mtime of referenced file.
129 // ctime of referenced file.
134 operator==(const FileInfo& lhs, const FileInfo& rhs)
136 return lhs.index == rhs.index && lhs.digest == rhs.digest
137 && lhs.fsize == rhs.fsize && lhs.mtime == rhs.mtime
138 && lhs.ctime == rhs.ctime;
145 template<> struct hash<FileInfo>
148 operator()(const FileInfo& file_info) const
150 static_assert(sizeof(FileInfo) == 48, "unexpected size"); // No padding.
152 checksum.update(&file_info, sizeof(file_info));
153 return checksum.digest();
163 // Indexes to file_infos.
164 std::vector<uint32_t> file_info_indexes;
166 // Name of the result.
172 // Referenced include files.
173 std::vector<std::string> files;
175 // Information about referenced include files.
176 std::vector<FileInfo> file_infos;
178 // Result names plus references to include file infos.
179 std::vector<ResultEntry> results;
183 const Digest& result_digest,
184 const std::unordered_map<std::string, Digest>& included_files,
185 time_t time_of_compilation,
188 std::unordered_map<std::string, uint32_t /*index*/> mf_files;
189 for (uint32_t i = 0; i < files.size(); ++i) {
190 mf_files.emplace(files[i], i);
193 std::unordered_map<FileInfo, uint32_t /*index*/> mf_file_infos;
194 for (uint32_t i = 0; i < file_infos.size(); ++i) {
195 mf_file_infos.emplace(file_infos[i], i);
198 std::vector<uint32_t> file_info_indexes;
199 file_info_indexes.reserve(included_files.size());
201 for (const auto& item : included_files) {
202 file_info_indexes.push_back(get_file_info_index(item.first,
210 results.push_back(ResultEntry{std::move(file_info_indexes), result_digest});
216 const std::string& path,
217 const Digest& digest,
218 const std::unordered_map<std::string, uint32_t>& mf_files,
219 const std::unordered_map<FileInfo, uint32_t>& mf_file_infos,
220 time_t time_of_compilation,
225 auto f_it = mf_files.find(path);
226 if (f_it != mf_files.end()) {
227 fi.index = f_it->second;
229 files.push_back(path);
230 fi.index = files.size() - 1;
235 // file_stat.{m,c}time() have a resolution of 1 second, so we can cache the
236 // file's mtime and ctime only if they're at least one second older than
237 // time_of_compilation.
239 // file_stat.ctime() may be 0, so we have to check time_of_compilation
240 // against MAX(mtime, ctime).
242 // ccache only reads mtime/ctime if file_stat_match sloppiness is enabled,
243 // so mtimes/ctimes are stored as a dummy value (-1) if not enabled. This
244 // reduces the number of file_info entries for the common case.
246 auto file_stat = Stat::stat(path, Stat::OnError::log);
249 && time_of_compilation
250 > std::max(file_stat.mtime(), file_stat.ctime())) {
251 fi.mtime = file_stat.mtime();
252 fi.ctime = file_stat.ctime();
257 fi.fsize = file_stat.size();
264 auto fi_it = mf_file_infos.find(fi);
265 if (fi_it != mf_file_infos.end()) {
266 return fi_it->second;
268 file_infos.push_back(fi);
269 return file_infos.size() - 1;
281 std::unique_ptr<ManifestData>
282 read_manifest(const std::string& path, FILE* dump_stream = nullptr)
284 File file(path, "rb");
289 CacheEntryReader reader(file.get(), Manifest::k_magic, Manifest::k_version);
292 reader.dump_header(dump_stream);
295 auto mf = std::make_unique<ManifestData>();
297 uint32_t entry_count;
298 reader.read(entry_count);
299 for (uint32_t i = 0; i < entry_count; ++i) {
300 mf->files.emplace_back();
301 auto& entry = mf->files.back();
305 entry.assign(length, 0);
306 reader.read(&entry[0], length);
309 reader.read(entry_count);
310 for (uint32_t i = 0; i < entry_count; ++i) {
311 mf->file_infos.emplace_back();
312 auto& entry = mf->file_infos.back();
314 reader.read(entry.index);
315 reader.read(entry.digest.bytes(), Digest::size());
316 reader.read(entry.fsize);
317 reader.read(entry.mtime);
318 reader.read(entry.ctime);
321 reader.read(entry_count);
322 for (uint32_t i = 0; i < entry_count; ++i) {
323 mf->results.emplace_back();
324 auto& entry = mf->results.back();
326 uint32_t file_info_count;
327 reader.read(file_info_count);
328 for (uint32_t j = 0; j < file_info_count; ++j) {
329 uint32_t file_info_index;
330 reader.read(file_info_index);
331 entry.file_info_indexes.push_back(file_info_index);
333 reader.read(entry.name.bytes(), Digest::size());
341 write_manifest(const Config& config,
342 const std::string& path,
343 const ManifestData& mf)
345 uint64_t payload_size = 0;
346 payload_size += 4; // n_files
347 for (const auto& file : mf.files) {
348 payload_size += 2 + file.length();
350 payload_size += 4; // n_file_infos
351 payload_size += mf.file_infos.size() * (4 + Digest::size() + 8 + 8 + 8);
352 payload_size += 4; // n_results
353 for (const auto& result : mf.results) {
354 payload_size += 4; // n_file_info_indexes
355 payload_size += result.file_info_indexes.size() * 4;
356 payload_size += Digest::size();
359 AtomicFile atomic_manifest_file(path, AtomicFile::Mode::binary);
360 CacheEntryWriter writer(atomic_manifest_file.stream(),
363 Compression::type_from_config(config),
364 Compression::level_from_config(config),
366 writer.write<uint32_t>(mf.files.size());
367 for (const auto& file : mf.files) {
368 writer.write<uint16_t>(file.length());
369 writer.write(file.data(), file.length());
372 writer.write<uint32_t>(mf.file_infos.size());
373 for (const auto& file_info : mf.file_infos) {
374 writer.write<uint32_t>(file_info.index);
375 writer.write(file_info.digest.bytes(), Digest::size());
376 writer.write(file_info.fsize);
377 writer.write(file_info.mtime);
378 writer.write(file_info.ctime);
381 writer.write<uint32_t>(mf.results.size());
382 for (const auto& result : mf.results) {
383 writer.write<uint32_t>(result.file_info_indexes.size());
384 for (auto index : result.file_info_indexes) {
387 writer.write(result.name.bytes(), Digest::size());
391 atomic_manifest_file.commit();
396 verify_result(const Context& ctx,
397 const ManifestData& mf,
398 const ResultEntry& result,
399 std::unordered_map<std::string, FileStats>& stated_files,
400 std::unordered_map<std::string, Digest>& hashed_files)
402 for (uint32_t file_info_index : result.file_info_indexes) {
403 const auto& fi = mf.file_infos[file_info_index];
404 const auto& path = mf.files[fi.index];
406 auto stated_files_iter = stated_files.find(path);
407 if (stated_files_iter == stated_files.end()) {
408 auto file_stat = Stat::stat(path, Stat::OnError::log);
413 st.size = file_stat.size();
414 st.mtime = file_stat.mtime();
415 st.ctime = file_stat.ctime();
416 stated_files_iter = stated_files.emplace(path, st).first;
418 const FileStats& fs = stated_files_iter->second;
420 if (fi.fsize != fs.size) {
424 // Clang stores the mtime of the included files in the precompiled header,
425 // and will error out if that header is later used without rebuilding.
426 if ((ctx.guessed_compiler == GuessedCompiler::clang
427 || ctx.guessed_compiler == GuessedCompiler::unknown)
428 && ctx.args_info.output_is_precompiled_header
429 && !ctx.args_info.fno_pch_timestamp && fi.mtime != fs.mtime) {
430 log("Precompiled header includes {}, which has a new mtime", path);
434 if (ctx.config.sloppiness() & SLOPPY_FILE_STAT_MATCHES) {
435 if (!(ctx.config.sloppiness() & SLOPPY_FILE_STAT_MATCHES_CTIME)) {
436 if (fi.mtime == fs.mtime && fi.ctime == fs.ctime) {
437 log("mtime/ctime hit for {}", path);
440 log("mtime/ctime miss for {}", path);
443 if (fi.mtime == fs.mtime) {
444 log("mtime hit for {}", path);
447 log("mtime miss for {}", path);
452 auto hashed_files_iter = hashed_files.find(path);
453 if (hashed_files_iter == hashed_files.end()) {
455 int ret = hash_source_code_file(ctx, hash, path, fs.size);
456 if (ret & HASH_SOURCE_CODE_ERROR) {
457 log("Failed hashing {}", path);
460 if (ret & HASH_SOURCE_CODE_FOUND_TIME) {
464 Digest actual = hash.digest();
465 hashed_files_iter = hashed_files.emplace(path, actual).first;
468 if (fi.digest != hashed_files_iter->second) {
480 const std::string k_file_suffix = "M";
481 const uint8_t k_magic[4] = {'c', 'C', 'm', 'F'};
482 const uint8_t k_version = 2;
484 // Try to get the result name from a manifest file. Returns nullopt on failure.
486 get(const Context& ctx, const std::string& path)
488 std::unique_ptr<ManifestData> mf;
490 mf = read_manifest(path);
492 // Update modification timestamp to save files from LRU cleanup.
493 Util::update_mtime(path);
495 log("No such manifest file");
498 } catch (const Error& e) {
499 log("Error: {}", e.what());
503 std::unordered_map<std::string, FileStats> stated_files;
504 std::unordered_map<std::string, Digest> hashed_files;
506 // Check newest result first since it's a bit more likely to match.
507 for (uint32_t i = mf->results.size(); i > 0; i--) {
509 ctx, *mf, mf->results[i - 1], stated_files, hashed_files)) {
510 return mf->results[i - 1].name;
517 // Put the result name into a manifest file given a set of included files.
518 // Returns true on success, otherwise false.
520 put(const Config& config,
521 const std::string& path,
522 const Digest& result_name,
523 const std::unordered_map<std::string, Digest>& included_files,
525 time_t time_of_compilation,
528 // We don't bother to acquire a lock when writing the manifest to disk. A
529 // race between two processes will only result in one lost entry, which is
530 // not a big deal, and it's also very unlikely.
532 std::unique_ptr<ManifestData> mf;
534 mf = read_manifest(path);
536 // Manifest file didn't exist.
537 mf = std::make_unique<ManifestData>();
539 } catch (const Error& e) {
540 log("Error: {}", e.what());
541 // Manifest file was corrupt, ignore.
542 mf = std::make_unique<ManifestData>();
545 if (mf->results.size() > k_max_manifest_entries) {
546 // Normally, there shouldn't be many result entries in the manifest since
547 // new entries are added only if an include file has changed but not the
548 // source file, and you typically change source files more often than
549 // header files. However, it's certainly possible to imagine cases where
550 // the manifest will grow large (for instance, a generated header file that
551 // changes for every build), and this must be taken care of since
552 // processing an ever growing manifest eventually will take too much time.
553 // A good way of solving this would be to maintain the result entries in
554 // LRU order and discarding the old ones. An easy way is to throw away all
555 // entries when there are too many. Let's do that for now.
556 log("More than {} entries in manifest file; discarding",
557 k_max_manifest_entries);
558 mf = std::make_unique<ManifestData>();
559 } else if (mf->file_infos.size() > k_max_manifest_file_info_entries) {
560 // Rarely, FileInfo entries can grow large in pathological cases where
561 // many included files change, but the main file does not. This also puts
562 // an upper bound on the number of FileInfo entries.
563 log("More than {} FileInfo entries in manifest file; discarding",
564 k_max_manifest_file_info_entries);
565 mf = std::make_unique<ManifestData>();
568 mf->add_result_entry(
569 result_name, included_files, time_of_compilation, save_timestamp);
572 write_manifest(config, path, *mf);
574 } catch (const Error& e) {
575 log("Error: {}", e.what());
581 dump(const std::string& path, FILE* stream)
583 std::unique_ptr<ManifestData> mf;
585 mf = read_manifest(path, stream);
586 } catch (const Error& e) {
587 fmt::print(stream, "Error: {}\n", e.what());
592 fmt::print(stream, "Error: No such file: {}\n", path);
596 fmt::print(stream, "File paths ({}):\n", mf->files.size());
597 for (size_t i = 0; i < mf->files.size(); ++i) {
598 fmt::print(stream, " {}: {}\n", i, mf->files[i]);
600 fmt::print(stream, "File infos ({}):\n", mf->file_infos.size());
601 for (size_t i = 0; i < mf->file_infos.size(); ++i) {
602 fmt::print(stream, " {}:\n", i);
603 fmt::print(stream, " Path index: {}\n", mf->file_infos[i].index);
604 fmt::print(stream, " Hash: {}\n", mf->file_infos[i].digest.to_string());
605 fmt::print(stream, " File size: {}\n", mf->file_infos[i].fsize);
606 fmt::print(stream, " Mtime: {}\n", mf->file_infos[i].mtime);
607 fmt::print(stream, " Ctime: {}\n", mf->file_infos[i].ctime);
609 fmt::print(stream, "Results ({}):\n", mf->results.size());
610 for (size_t i = 0; i < mf->results.size(); ++i) {
611 fmt::print(stream, " {}:\n", i);
612 fmt::print(stream, " File info indexes:");
613 for (uint32_t file_info_index : mf->results[i].file_info_indexes) {
614 fmt::print(stream, " {}", file_info_index);
616 fmt::print(stream, "\n");
617 fmt::print(stream, " Name: {}\n", mf->results[i].name.to_string());
623 } // namespace Manifest