1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/chromeos/drive/search_metadata.h"
10 #include "base/bind.h"
11 #include "base/i18n/string_search.h"
12 #include "base/metrics/histogram.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "base/time/time.h"
15 #include "chrome/browser/chromeos/drive/file_system_util.h"
16 #include "content/public/browser/browser_thread.h"
17 #include "google_apis/drive/gdata_wapi_parser.h"
18 #include "net/base/escape.h"
20 using content::BrowserThread;
27 struct ResultCandidate {
28 ResultCandidate(const std::string& local_id,
29 const ResourceEntry& entry,
30 const std::string& highlighted_base_name)
33 highlighted_base_name(highlighted_base_name) {
38 std::string highlighted_base_name;
41 // Used to sort the result candidates per the last accessed/modified time. The
42 // recently accessed/modified files come first.
43 bool CompareByTimestamp(const ResourceEntry& a, const ResourceEntry& b) {
44 const PlatformFileInfoProto& a_file_info = a.file_info();
45 const PlatformFileInfoProto& b_file_info = b.file_info();
47 if (a_file_info.last_accessed() != b_file_info.last_accessed())
48 return a_file_info.last_accessed() > b_file_info.last_accessed();
50 // When the entries have the same last access time (which happens quite often
51 // because Drive server doesn't set the field until an entry is viewed via
52 // drive.google.com), we use last modified time as the tie breaker.
53 return a_file_info.last_modified() > b_file_info.last_modified();
56 struct ResultCandidateComparator {
57 bool operator()(const ResultCandidate* a, const ResultCandidate* b) const {
58 return CompareByTimestamp(a->entry, b->entry);
62 // A wrapper of std::priority_queue which deals with pointers of values.
63 template<typename T, typename Compare>
64 class ScopedPriorityQueue {
66 ScopedPriorityQueue() {}
68 ~ScopedPriorityQueue() {
73 bool empty() const { return queue_.empty(); }
75 size_t size() const { return queue_.size(); }
77 const T* top() const { return queue_.top(); }
79 void push(T* x) { queue_.push(x); }
82 // Keep top alive for the pop() call so that debug checks can access
83 // underlying data (e.g. validating heap property of the priority queue
84 // will call the comparator).
85 T* saved_top = queue_.top();
91 std::priority_queue<T*, std::vector<T*>, Compare> queue_;
93 DISALLOW_COPY_AND_ASSIGN(ScopedPriorityQueue);
96 // Classifies the given entry as hidden if it's not under specific directories.
97 class HiddenEntryClassifier {
99 HiddenEntryClassifier(ResourceMetadata* metadata,
100 const std::string& mydrive_local_id)
101 : metadata_(metadata) {
102 // Only things under My Drive and drive/other are not hidden.
103 is_hiding_child_[mydrive_local_id] = false;
104 is_hiding_child_[util::kDriveOtherDirLocalId] = false;
106 // Everything else is hidden, including the directories mentioned above
108 is_hiding_child_[""] = true;
111 // |result| is set to true if |entry| is hidden.
112 FileError IsHidden(const ResourceEntry& entry, bool* result) {
113 // Look up for parents recursively.
114 std::vector<std::string> undetermined_ids;
115 undetermined_ids.push_back(entry.parent_local_id());
117 std::map<std::string, bool>::iterator it =
118 is_hiding_child_.find(undetermined_ids.back());
119 for (; it == is_hiding_child_.end();
120 it = is_hiding_child_.find(undetermined_ids.back())) {
121 ResourceEntry parent;
123 metadata_->GetResourceEntryById(undetermined_ids.back(), &parent);
124 if (error != FILE_ERROR_OK)
126 undetermined_ids.push_back(parent.parent_local_id());
130 undetermined_ids.pop_back(); // The last one is already in the map.
131 for (size_t i = 0; i < undetermined_ids.size(); ++i)
132 is_hiding_child_[undetermined_ids[i]] = it->second;
134 *result = it->second;
135 return FILE_ERROR_OK;
139 ResourceMetadata* metadata_;
141 // local ID to is_hidden map.
142 std::map<std::string, bool> is_hiding_child_;
145 // Returns true if |entry| is eligible for the search |options| and should be
146 // tested for the match with the query. If
147 // SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS is requested, the hosted documents
148 // are skipped. If SEARCH_METADATA_EXCLUDE_DIRECTORIES is requested, the
149 // directories are skipped. If SEARCH_METADATA_SHARED_WITH_ME is requested, only
150 // the entries with shared-with-me label will be tested. If
151 // SEARCH_METADATA_OFFLINE is requested, only hosted documents and cached files
152 // match with the query. This option can not be used with other options.
153 bool IsEligibleEntry(const ResourceEntry& entry,
154 ResourceMetadata::Iterator* it,
156 if ((options & SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS) &&
157 entry.file_specific_info().is_hosted_document())
160 if ((options & SEARCH_METADATA_EXCLUDE_DIRECTORIES) &&
161 entry.file_info().is_directory())
164 if (options & SEARCH_METADATA_SHARED_WITH_ME)
165 return entry.shared_with_me();
167 if (options & SEARCH_METADATA_OFFLINE) {
168 if (entry.file_specific_info().is_hosted_document()) {
169 // Not all hosted documents are cached by Drive offline app.
170 // http://support.google.com/drive/bin/answer.py?hl=en&answer=1628467
171 switch (google_apis::ResourceEntry::GetEntryKindFromExtension(
172 entry.file_specific_info().document_extension())) {
173 case google_apis::ENTRY_KIND_DOCUMENT:
174 case google_apis::ENTRY_KIND_SPREADSHEET:
175 case google_apis::ENTRY_KIND_PRESENTATION:
176 case google_apis::ENTRY_KIND_DRAWING:
182 FileCacheEntry cache_entry;
183 it->GetCacheEntry(&cache_entry);
184 return cache_entry.is_present();
191 // Used to implement SearchMetadata.
192 // Adds entry to the result when appropriate.
193 // In particular, if |query| is non-null, only adds files with the name matching
195 FileError MaybeAddEntryToResult(
196 ResourceMetadata* resource_metadata,
197 ResourceMetadata::Iterator* it,
198 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query,
200 size_t at_most_num_matches,
201 HiddenEntryClassifier* hidden_entry_classifier,
202 ScopedPriorityQueue<ResultCandidate,
203 ResultCandidateComparator>* result_candidates) {
204 DCHECK_GE(at_most_num_matches, result_candidates->size());
206 const ResourceEntry& entry = it->GetValue();
208 // If the candidate set is already full, and this |entry| is old, do nothing.
209 // We perform this check first in order to avoid the costly find-and-highlight
210 // or FilePath lookup as much as possible.
211 if (result_candidates->size() == at_most_num_matches &&
212 !CompareByTimestamp(entry, result_candidates->top()->entry))
213 return FILE_ERROR_OK;
215 // Add |entry| to the result if the entry is eligible for the given
216 // |options| and matches the query. The base name of the entry must
217 // contain |query| to match the query.
218 std::string highlighted;
219 if (!IsEligibleEntry(entry, it, options) ||
220 (query && !FindAndHighlight(entry.base_name(), query, &highlighted)))
221 return FILE_ERROR_OK;
223 // Hidden entry should not be returned.
225 FileError error = hidden_entry_classifier->IsHidden(entry, &hidden);
226 if (error != FILE_ERROR_OK || hidden)
229 // Make space for |entry| when appropriate.
230 if (result_candidates->size() == at_most_num_matches)
231 result_candidates->pop();
232 result_candidates->push(new ResultCandidate(it->GetID(), entry, highlighted));
233 return FILE_ERROR_OK;
236 // Implements SearchMetadata().
237 FileError SearchMetadataOnBlockingPool(ResourceMetadata* resource_metadata,
238 const std::string& query_text,
240 int at_most_num_matches,
241 MetadataSearchResultVector* results) {
242 ScopedPriorityQueue<ResultCandidate,
243 ResultCandidateComparator> result_candidates;
245 // Prepare data structure for searching.
246 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents query(
247 base::UTF8ToUTF16(query_text));
249 // Prepare an object to filter out hidden entries.
250 ResourceEntry mydrive;
251 FileError error = resource_metadata->GetResourceEntryByPath(
252 util::GetDriveMyDriveRootPath(), &mydrive);
253 if (error != FILE_ERROR_OK)
255 HiddenEntryClassifier hidden_entry_classifier(resource_metadata,
258 // Iterate over entries.
259 scoped_ptr<ResourceMetadata::Iterator> it = resource_metadata->GetIterator();
260 for (; !it->IsAtEnd(); it->Advance()) {
261 FileError error = MaybeAddEntryToResult(resource_metadata, it.get(),
262 query_text.empty() ? NULL : &query,
265 &hidden_entry_classifier,
267 if (error != FILE_ERROR_OK)
271 // Prepare the result.
272 for (; !result_candidates.empty(); result_candidates.pop()) {
273 const ResultCandidate& candidate = *result_candidates.top();
274 // The path field of entries in result_candidates are empty at this point,
275 // because we don't want to run the expensive metadata DB look up except for
276 // the final results. Hence, here we fill the part.
277 base::FilePath path = resource_metadata->GetFilePath(candidate.local_id);
279 return FILE_ERROR_FAILED;
280 bool is_directory = candidate.entry.file_info().is_directory();
281 results->push_back(MetadataSearchResult(
282 path, is_directory, candidate.highlighted_base_name));
285 // Reverse the order here because |result_candidates| puts the most
286 // uninteresting candidate at the top.
287 std::reverse(results->begin(), results->end());
289 return FILE_ERROR_OK;
292 // Runs the SearchMetadataCallback and updates the histogram.
293 void RunSearchMetadataCallback(const SearchMetadataCallback& callback,
294 const base::TimeTicks& start_time,
295 scoped_ptr<MetadataSearchResultVector> results,
297 if (error != FILE_ERROR_OK)
299 callback.Run(error, results.Pass());
301 UMA_HISTOGRAM_TIMES("Drive.SearchMetadataTime",
302 base::TimeTicks::Now() - start_time);
308 scoped_refptr<base::SequencedTaskRunner> blocking_task_runner,
309 ResourceMetadata* resource_metadata,
310 const std::string& query,
312 int at_most_num_matches,
313 const SearchMetadataCallback& callback) {
314 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
315 DCHECK_LE(0, at_most_num_matches);
316 DCHECK(!callback.is_null());
318 const base::TimeTicks start_time = base::TimeTicks::Now();
320 scoped_ptr<MetadataSearchResultVector> results(
321 new MetadataSearchResultVector);
322 MetadataSearchResultVector* results_ptr = results.get();
323 base::PostTaskAndReplyWithResult(blocking_task_runner.get(),
325 base::Bind(&SearchMetadataOnBlockingPool,
331 base::Bind(&RunSearchMetadataCallback,
334 base::Passed(&results)));
337 bool FindAndHighlight(
338 const std::string& text,
339 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query,
340 std::string* highlighted_text) {
342 DCHECK(highlighted_text);
343 highlighted_text->clear();
345 base::string16 text16 = base::UTF8ToUTF16(text);
346 size_t match_start = 0;
347 size_t match_length = 0;
348 if (!query->Search(text16, &match_start, &match_length))
351 base::string16 pre = text16.substr(0, match_start);
352 base::string16 match = text16.substr(match_start, match_length);
353 base::string16 post = text16.substr(match_start + match_length);
354 highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(pre)));
355 highlighted_text->append("<b>");
356 highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(match)));
357 highlighted_text->append("</b>");
358 highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(post)));
362 } // namespace internal