Update To 11.40.268.0
[platform/framework/web/crosswalk.git] / src / net / disk_cache / simple / simple_index_file.cc
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/disk_cache/simple/simple_index_file.h"
6
7 #include <vector>
8
9 #include "base/files/file.h"
10 #include "base/files/file_util.h"
11 #include "base/files/memory_mapped_file.h"
12 #include "base/hash.h"
13 #include "base/logging.h"
14 #include "base/pickle.h"
15 #include "base/single_thread_task_runner.h"
16 #include "base/task_runner_util.h"
17 #include "base/threading/thread_restrictions.h"
18 #include "net/disk_cache/simple/simple_backend_version.h"
19 #include "net/disk_cache/simple/simple_entry_format.h"
20 #include "net/disk_cache/simple/simple_histogram_macros.h"
21 #include "net/disk_cache/simple/simple_index.h"
22 #include "net/disk_cache/simple/simple_synchronous_entry.h"
23 #include "net/disk_cache/simple/simple_util.h"
24 #include "third_party/zlib/zlib.h"
25
26 using base::File;
27
28 namespace disk_cache {
29 namespace {
30
31 const int kEntryFilesHashLength = 16;
32 const int kEntryFilesSuffixLength = 2;
33
34 const uint64 kMaxEntiresInIndex = 100000000;
35
36 uint32 CalculatePickleCRC(const Pickle& pickle) {
37   return crc32(crc32(0, Z_NULL, 0),
38                reinterpret_cast<const Bytef*>(pickle.payload()),
39                pickle.payload_size());
40 }
41
42 // Used in histograms. Please only add new values at the end.
43 enum IndexFileState {
44   INDEX_STATE_CORRUPT = 0,
45   INDEX_STATE_STALE = 1,
46   INDEX_STATE_FRESH = 2,
47   INDEX_STATE_FRESH_CONCURRENT_UPDATES = 3,
48   INDEX_STATE_MAX = 4,
49 };
50
51 void UmaRecordIndexFileState(IndexFileState state, net::CacheType cache_type) {
52   SIMPLE_CACHE_UMA(ENUMERATION,
53                    "IndexFileStateOnLoad", cache_type, state, INDEX_STATE_MAX);
54 }
55
56 // Used in histograms. Please only add new values at the end.
57 enum IndexInitMethod {
58   INITIALIZE_METHOD_RECOVERED = 0,
59   INITIALIZE_METHOD_LOADED = 1,
60   INITIALIZE_METHOD_NEWCACHE = 2,
61   INITIALIZE_METHOD_MAX = 3,
62 };
63
64 void UmaRecordIndexInitMethod(IndexInitMethod method,
65                               net::CacheType cache_type) {
66   SIMPLE_CACHE_UMA(ENUMERATION,
67                    "IndexInitializeMethod", cache_type,
68                    method, INITIALIZE_METHOD_MAX);
69 }
70
71 bool WritePickleFile(Pickle* pickle, const base::FilePath& file_name) {
72   File file(
73       file_name,
74       File::FLAG_CREATE | File::FLAG_WRITE | File::FLAG_SHARE_DELETE);
75   if (!file.IsValid())
76     return false;
77
78   int bytes_written =
79       file.Write(0, static_cast<const char*>(pickle->data()), pickle->size());
80   if (bytes_written != implicit_cast<int>(pickle->size())) {
81     simple_util::SimpleCacheDeleteFile(file_name);
82     return false;
83   }
84   return true;
85 }
86
87 // Called for each cache directory traversal iteration.
88 void ProcessEntryFile(SimpleIndex::EntrySet* entries,
89                       const base::FilePath& file_path) {
90   static const size_t kEntryFilesLength =
91       kEntryFilesHashLength + kEntryFilesSuffixLength;
92   // Converting to std::string is OK since we never use UTF8 wide chars in our
93   // file names.
94   const base::FilePath::StringType base_name = file_path.BaseName().value();
95   const std::string file_name(base_name.begin(), base_name.end());
96   if (file_name.size() != kEntryFilesLength)
97     return;
98   const base::StringPiece hash_string(
99       file_name.begin(), file_name.begin() + kEntryFilesHashLength);
100   uint64 hash_key = 0;
101   if (!simple_util::GetEntryHashKeyFromHexString(hash_string, &hash_key)) {
102     LOG(WARNING) << "Invalid entry hash key filename while restoring index from"
103                  << " disk: " << file_name;
104     return;
105   }
106
107   File::Info file_info;
108   if (!base::GetFileInfo(file_path, &file_info)) {
109     LOG(ERROR) << "Could not get file info for " << file_path.value();
110     return;
111   }
112   base::Time last_used_time;
113 #if defined(OS_POSIX)
114   // For POSIX systems, a last access time is available. However, it's not
115   // guaranteed to be more accurate than mtime. It is no worse though.
116   last_used_time = file_info.last_accessed;
117 #endif
118   if (last_used_time.is_null())
119     last_used_time = file_info.last_modified;
120
121   int64 file_size = file_info.size;
122   SimpleIndex::EntrySet::iterator it = entries->find(hash_key);
123   if (it == entries->end()) {
124     SimpleIndex::InsertInEntrySet(
125         hash_key,
126         EntryMetadata(last_used_time, file_size),
127         entries);
128   } else {
129     // Summing up the total size of the entry through all the *_[0-1] files
130     it->second.SetEntrySize(it->second.GetEntrySize() + file_size);
131   }
132 }
133
134 }  // namespace
135
136 SimpleIndexLoadResult::SimpleIndexLoadResult() : did_load(false),
137                                                  flush_required(false) {
138 }
139
140 SimpleIndexLoadResult::~SimpleIndexLoadResult() {
141 }
142
143 void SimpleIndexLoadResult::Reset() {
144   did_load = false;
145   flush_required = false;
146   entries.clear();
147 }
148
149 // static
150 const char SimpleIndexFile::kIndexFileName[] = "the-real-index";
151 // static
152 const char SimpleIndexFile::kIndexDirectory[] = "index-dir";
153 // static
154 const char SimpleIndexFile::kTempIndexFileName[] = "temp-index";
155
156 SimpleIndexFile::IndexMetadata::IndexMetadata()
157     : magic_number_(kSimpleIndexMagicNumber),
158       version_(kSimpleVersion),
159       number_of_entries_(0),
160       cache_size_(0) {}
161
162 SimpleIndexFile::IndexMetadata::IndexMetadata(
163     uint64 number_of_entries, uint64 cache_size)
164     : magic_number_(kSimpleIndexMagicNumber),
165       version_(kSimpleVersion),
166       number_of_entries_(number_of_entries),
167       cache_size_(cache_size) {}
168
169 void SimpleIndexFile::IndexMetadata::Serialize(Pickle* pickle) const {
170   DCHECK(pickle);
171   pickle->WriteUInt64(magic_number_);
172   pickle->WriteUInt32(version_);
173   pickle->WriteUInt64(number_of_entries_);
174   pickle->WriteUInt64(cache_size_);
175 }
176
177 // static
178 bool SimpleIndexFile::SerializeFinalData(base::Time cache_modified,
179                                          Pickle* pickle) {
180   if (!pickle->WriteInt64(cache_modified.ToInternalValue()))
181     return false;
182   SimpleIndexFile::PickleHeader* header_p = pickle->headerT<PickleHeader>();
183   header_p->crc = CalculatePickleCRC(*pickle);
184   return true;
185 }
186
187 bool SimpleIndexFile::IndexMetadata::Deserialize(PickleIterator* it) {
188   DCHECK(it);
189   return it->ReadUInt64(&magic_number_) &&
190       it->ReadUInt32(&version_) &&
191       it->ReadUInt64(&number_of_entries_)&&
192       it->ReadUInt64(&cache_size_);
193 }
194
195 void SimpleIndexFile::SyncWriteToDisk(net::CacheType cache_type,
196                                       const base::FilePath& cache_directory,
197                                       const base::FilePath& index_filename,
198                                       const base::FilePath& temp_index_filename,
199                                       scoped_ptr<Pickle> pickle,
200                                       const base::TimeTicks& start_time,
201                                       bool app_on_background) {
202   DCHECK_EQ(index_filename.DirName().value(),
203             temp_index_filename.DirName().value());
204   base::FilePath index_file_directory = temp_index_filename.DirName();
205   if (!base::DirectoryExists(index_file_directory) &&
206       !base::CreateDirectory(index_file_directory)) {
207     LOG(ERROR) << "Could not create a directory to hold the index file";
208     return;
209   }
210
211   // There is a chance that the index containing all the necessary data about
212   // newly created entries will appear to be stale. This can happen if on-disk
213   // part of a Create operation does not fit into the time budget for the index
214   // flush delay. This simple approach will be reconsidered if it does not allow
215   // for maintaining freshness.
216   base::Time cache_dir_mtime;
217   if (!simple_util::GetMTime(cache_directory, &cache_dir_mtime)) {
218     LOG(ERROR) << "Could obtain information about cache age";
219     return;
220   }
221   SerializeFinalData(cache_dir_mtime, pickle.get());
222   if (!WritePickleFile(pickle.get(), temp_index_filename)) {
223     LOG(ERROR) << "Failed to write the temporary index file";
224     return;
225   }
226
227   // Atomically rename the temporary index file to become the real one.
228   // TODO(gavinp): DCHECK when not shutting down, since that is very strange.
229   // The rename failing during shutdown is legal because it's legal to begin
230   // erasing a cache as soon as the destructor has been called.
231   if (!base::ReplaceFile(temp_index_filename, index_filename, NULL))
232     return;
233
234   if (app_on_background) {
235     SIMPLE_CACHE_UMA(TIMES,
236                      "IndexWriteToDiskTime.Background", cache_type,
237                      (base::TimeTicks::Now() - start_time));
238   } else {
239     SIMPLE_CACHE_UMA(TIMES,
240                      "IndexWriteToDiskTime.Foreground", cache_type,
241                      (base::TimeTicks::Now() - start_time));
242   }
243 }
244
245 bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() {
246   return number_of_entries_ <= kMaxEntiresInIndex &&
247       magic_number_ == kSimpleIndexMagicNumber &&
248       version_ == kSimpleVersion;
249 }
250
251 SimpleIndexFile::SimpleIndexFile(
252     const scoped_refptr<base::SingleThreadTaskRunner>& cache_thread,
253     const scoped_refptr<base::TaskRunner>& worker_pool,
254     net::CacheType cache_type,
255     const base::FilePath& cache_directory)
256     : cache_thread_(cache_thread),
257       worker_pool_(worker_pool),
258       cache_type_(cache_type),
259       cache_directory_(cache_directory),
260       index_file_(cache_directory_.AppendASCII(kIndexDirectory)
261                       .AppendASCII(kIndexFileName)),
262       temp_index_file_(cache_directory_.AppendASCII(kIndexDirectory)
263                            .AppendASCII(kTempIndexFileName)) {
264 }
265
266 SimpleIndexFile::~SimpleIndexFile() {}
267
268 void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified,
269                                        const base::Closure& callback,
270                                        SimpleIndexLoadResult* out_result) {
271   base::Closure task = base::Bind(&SimpleIndexFile::SyncLoadIndexEntries,
272                                   cache_type_,
273                                   cache_last_modified, cache_directory_,
274                                   index_file_, out_result);
275   worker_pool_->PostTaskAndReply(FROM_HERE, task, callback);
276 }
277
278 void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet& entry_set,
279                                   uint64 cache_size,
280                                   const base::TimeTicks& start,
281                                   bool app_on_background,
282                                   const base::Closure& callback) {
283   IndexMetadata index_metadata(entry_set.size(), cache_size);
284   scoped_ptr<Pickle> pickle = Serialize(index_metadata, entry_set);
285   base::Closure task =
286       base::Bind(&SimpleIndexFile::SyncWriteToDisk,
287                  cache_type_, cache_directory_, index_file_, temp_index_file_,
288                  base::Passed(&pickle), start, app_on_background);
289   if (callback.is_null())
290     cache_thread_->PostTask(FROM_HERE, task);
291   else
292     cache_thread_->PostTaskAndReply(FROM_HERE, task, callback);
293 }
294
295 // static
296 void SimpleIndexFile::SyncLoadIndexEntries(
297     net::CacheType cache_type,
298     base::Time cache_last_modified,
299     const base::FilePath& cache_directory,
300     const base::FilePath& index_file_path,
301     SimpleIndexLoadResult* out_result) {
302   // Load the index and find its age.
303   base::Time last_cache_seen_by_index;
304   SyncLoadFromDisk(index_file_path, &last_cache_seen_by_index, out_result);
305
306   // Consider the index loaded if it is fresh.
307   const bool index_file_existed = base::PathExists(index_file_path);
308   if (!out_result->did_load) {
309     if (index_file_existed)
310       UmaRecordIndexFileState(INDEX_STATE_CORRUPT, cache_type);
311   } else {
312     if (cache_last_modified <= last_cache_seen_by_index) {
313       base::Time latest_dir_mtime;
314       simple_util::GetMTime(cache_directory, &latest_dir_mtime);
315       if (LegacyIsIndexFileStale(latest_dir_mtime, index_file_path)) {
316         UmaRecordIndexFileState(INDEX_STATE_FRESH_CONCURRENT_UPDATES,
317                                 cache_type);
318       } else {
319         UmaRecordIndexFileState(INDEX_STATE_FRESH, cache_type);
320       }
321       UmaRecordIndexInitMethod(INITIALIZE_METHOD_LOADED, cache_type);
322       return;
323     }
324     UmaRecordIndexFileState(INDEX_STATE_STALE, cache_type);
325   }
326
327   // Reconstruct the index by scanning the disk for entries.
328   const base::TimeTicks start = base::TimeTicks::Now();
329   SyncRestoreFromDisk(cache_directory, index_file_path, out_result);
330   SIMPLE_CACHE_UMA(MEDIUM_TIMES, "IndexRestoreTime", cache_type,
331                    base::TimeTicks::Now() - start);
332   SIMPLE_CACHE_UMA(COUNTS, "IndexEntriesRestored", cache_type,
333                    out_result->entries.size());
334   if (index_file_existed) {
335     UmaRecordIndexInitMethod(INITIALIZE_METHOD_RECOVERED, cache_type);
336   } else {
337     UmaRecordIndexInitMethod(INITIALIZE_METHOD_NEWCACHE, cache_type);
338     SIMPLE_CACHE_UMA(COUNTS,
339                      "IndexCreatedEntryCount", cache_type,
340                      out_result->entries.size());
341   }
342 }
343
344 // static
345 void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath& index_filename,
346                                        base::Time* out_last_cache_seen_by_index,
347                                        SimpleIndexLoadResult* out_result) {
348   out_result->Reset();
349
350   File file(index_filename,
351             File::FLAG_OPEN | File::FLAG_READ | File::FLAG_SHARE_DELETE);
352   if (!file.IsValid())
353     return;
354
355   base::MemoryMappedFile index_file_map;
356   if (!index_file_map.Initialize(file.Pass())) {
357     simple_util::SimpleCacheDeleteFile(index_filename);
358     return;
359   }
360
361   SimpleIndexFile::Deserialize(
362       reinterpret_cast<const char*>(index_file_map.data()),
363       index_file_map.length(),
364       out_last_cache_seen_by_index,
365       out_result);
366
367   if (!out_result->did_load)
368     simple_util::SimpleCacheDeleteFile(index_filename);
369 }
370
371 // static
372 scoped_ptr<Pickle> SimpleIndexFile::Serialize(
373     const SimpleIndexFile::IndexMetadata& index_metadata,
374     const SimpleIndex::EntrySet& entries) {
375   scoped_ptr<Pickle> pickle(new Pickle(sizeof(SimpleIndexFile::PickleHeader)));
376
377   index_metadata.Serialize(pickle.get());
378   for (SimpleIndex::EntrySet::const_iterator it = entries.begin();
379        it != entries.end(); ++it) {
380     pickle->WriteUInt64(it->first);
381     it->second.Serialize(pickle.get());
382   }
383   return pickle.Pass();
384 }
385
386 // static
387 void SimpleIndexFile::Deserialize(const char* data, int data_len,
388                                   base::Time* out_cache_last_modified,
389                                   SimpleIndexLoadResult* out_result) {
390   DCHECK(data);
391
392   out_result->Reset();
393   SimpleIndex::EntrySet* entries = &out_result->entries;
394
395   Pickle pickle(data, data_len);
396   if (!pickle.data()) {
397     LOG(WARNING) << "Corrupt Simple Index File.";
398     return;
399   }
400
401   PickleIterator pickle_it(pickle);
402   SimpleIndexFile::PickleHeader* header_p =
403       pickle.headerT<SimpleIndexFile::PickleHeader>();
404   const uint32 crc_read = header_p->crc;
405   const uint32 crc_calculated = CalculatePickleCRC(pickle);
406
407   if (crc_read != crc_calculated) {
408     LOG(WARNING) << "Invalid CRC in Simple Index file.";
409     return;
410   }
411
412   SimpleIndexFile::IndexMetadata index_metadata;
413   if (!index_metadata.Deserialize(&pickle_it)) {
414     LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
415     return;
416   }
417
418   if (!index_metadata.CheckIndexMetadata()) {
419     LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
420     return;
421   }
422
423 #if !defined(OS_WIN)
424   // TODO(gavinp): Consider using std::unordered_map.
425   entries->resize(index_metadata.GetNumberOfEntries() + kExtraSizeForMerge);
426 #endif
427   while (entries->size() < index_metadata.GetNumberOfEntries()) {
428     uint64 hash_key;
429     EntryMetadata entry_metadata;
430     if (!pickle_it.ReadUInt64(&hash_key) ||
431         !entry_metadata.Deserialize(&pickle_it)) {
432       LOG(WARNING) << "Invalid EntryMetadata in Simple Index file.";
433       entries->clear();
434       return;
435     }
436     SimpleIndex::InsertInEntrySet(hash_key, entry_metadata, entries);
437   }
438
439   int64 cache_last_modified;
440   if (!pickle_it.ReadInt64(&cache_last_modified)) {
441     entries->clear();
442     return;
443   }
444   DCHECK(out_cache_last_modified);
445   *out_cache_last_modified = base::Time::FromInternalValue(cache_last_modified);
446
447   out_result->did_load = true;
448 }
449
450 // static
451 void SimpleIndexFile::SyncRestoreFromDisk(
452     const base::FilePath& cache_directory,
453     const base::FilePath& index_file_path,
454     SimpleIndexLoadResult* out_result) {
455   VLOG(1) << "Simple Cache Index is being restored from disk.";
456   simple_util::SimpleCacheDeleteFile(index_file_path);
457   out_result->Reset();
458   SimpleIndex::EntrySet* entries = &out_result->entries;
459
460   const bool did_succeed = TraverseCacheDirectory(
461       cache_directory, base::Bind(&ProcessEntryFile, entries));
462   if (!did_succeed) {
463     LOG(ERROR) << "Could not reconstruct index from disk";
464     return;
465   }
466   out_result->did_load = true;
467   // When we restore from disk we write the merged index file to disk right
468   // away, this might save us from having to restore again next time.
469   out_result->flush_required = true;
470 }
471
472 // static
473 bool SimpleIndexFile::LegacyIsIndexFileStale(
474     base::Time cache_last_modified,
475     const base::FilePath& index_file_path) {
476   base::Time index_mtime;
477   if (!simple_util::GetMTime(index_file_path, &index_mtime))
478     return true;
479   return index_mtime < cache_last_modified;
480 }
481
482 }  // namespace disk_cache