1 // Copyright 2014 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/search_engines/util.h"
14 #include <unordered_map>
17 #include "base/check_op.h"
18 #include "base/ranges/algorithm.h"
19 #include "base/time/time.h"
20 #include "components/prefs/pref_service.h"
21 #include "components/search_engines/template_url.h"
22 #include "components/search_engines/template_url_prepopulate_data.h"
23 #include "components/search_engines/template_url_service.h"
24 #include "components/search_engines/template_url_starter_pack_data.h"
26 std::u16string GetDefaultSearchEngineName(TemplateURLService* service) {
28 const TemplateURL* const default_provider =
29 service->GetDefaultSearchProvider();
30 if (!default_provider) {
31 // TODO(cpu): bug 1187517. It is possible to have no default provider.
32 // returning an empty string is a stopgap measure for the crash
33 // http://code.google.com/p/chromium/issues/detail?id=2573
34 return std::u16string();
36 return default_provider->short_name();
39 GURL GetDefaultSearchURLForSearchTerms(TemplateURLService* service,
40 const std::u16string& terms) {
42 const TemplateURL* default_provider = service->GetDefaultSearchProvider();
43 if (!default_provider)
45 const TemplateURLRef& search_url = default_provider->url_ref();
46 DCHECK(search_url.SupportsReplacement(service->search_terms_data()));
47 TemplateURLRef::SearchTermsArgs search_terms_args(terms);
48 search_terms_args.append_extra_query_params_from_command_line = true;
49 return GURL(search_url.ReplaceSearchTerms(search_terms_args,
50 service->search_terms_data()));
53 void RemoveDuplicatePrepopulateIDs(
54 KeywordWebDataService* service,
55 const std::vector<std::unique_ptr<TemplateURLData>>& prepopulated_urls,
56 TemplateURL* default_search_provider,
57 TemplateURLService::OwnedTemplateURLVector* template_urls,
58 const SearchTermsData& search_terms_data,
59 std::set<std::string>* removed_keyword_guids) {
60 DCHECK(template_urls);
61 TemplateURLService::OwnedTemplateURLVector checked_urls;
63 // For convenience construct an ID->TemplateURL* map from |prepopulated_urls|.
64 std::map<int, TemplateURLData*> prepopulated_url_map;
65 for (const auto& url : prepopulated_urls)
66 prepopulated_url_map[url->prepopulate_id] = url.get();
68 constexpr size_t invalid_index = std::numeric_limits<size_t>::max();
69 // A helper structure for deduplicating elements with the same prepopulate_id.
70 struct DuplicationData {
71 DuplicationData() : index_representative(invalid_index) {}
73 // The index into checked_urls at which the best representative is stored.
74 size_t index_representative;
76 // Proper duplicates for consideration during selection phase. This
77 // does not include the representative stored in checked_urls.
78 TemplateURLService::OwnedTemplateURLVector duplicates;
80 // Map from prepopulate_id to data for deduplication and selection.
81 std::unordered_map<int, DuplicationData> duplication_map;
83 const auto has_default_search_keyword = [&](const auto& turl) {
84 return default_search_provider &&
85 (default_search_provider->prepopulate_id() ==
86 turl->prepopulate_id()) &&
87 default_search_provider->HasSameKeywordAs(turl->data(),
91 // Deduplication phase: move elements into new vector, preserving order while
92 // gathering duplicates into separate container for selection.
93 for (auto& turl : *template_urls) {
94 const int prepopulate_id = turl->prepopulate_id();
96 auto& duplication_data = duplication_map[prepopulate_id];
97 if (duplication_data.index_representative == invalid_index) {
98 // This is the first found.
99 duplication_data.index_representative = checked_urls.size();
100 checked_urls.push_back(std::move(turl));
102 // This is a duplicate.
103 duplication_data.duplicates.push_back(std::move(turl));
106 checked_urls.push_back(std::move(turl));
110 // Selection and cleanup phase: swap out elements if necessary to ensure new
111 // vector contains only the best representative for each prepopulate_id.
112 // Then delete the remaining duplicates.
113 for (auto& id_data : duplication_map) {
114 const auto prepopulated_url = prepopulated_url_map.find(id_data.first);
115 const auto has_prepopulated_keyword = [&](const auto& turl) {
116 return (prepopulated_url != prepopulated_url_map.end()) &&
117 turl->HasSameKeywordAs(*prepopulated_url->second,
121 // If the user-selected DSE is a prepopulated engine its properties will
122 // either come from the prepopulation origin or from the user preferences
123 // file (see DefaultSearchManager). Those properties will end up
124 // overwriting whatever we load now anyway. If we are eliminating
125 // duplicates, then, we err on the side of keeping the thing that looks
126 // more like the value we will end up with in the end.
127 // Otherwise, a URL is best if it matches the prepopulated data's keyword;
128 // if none match, just fall back to using the one with the lowest ID.
129 auto& best = checked_urls[id_data.second.index_representative];
130 if (!has_default_search_keyword(best)) {
131 bool matched_keyword = has_prepopulated_keyword(best);
132 for (auto& duplicate : id_data.second.duplicates) {
133 if (has_default_search_keyword(duplicate)) {
134 best.swap(duplicate);
136 } else if (matched_keyword) {
138 } else if (has_prepopulated_keyword(duplicate)) {
139 best.swap(duplicate);
140 matched_keyword = true;
141 } else if (duplicate->id() < best->id()) {
142 best.swap(duplicate);
147 // Clean up what's left.
148 for (const auto& duplicate : id_data.second.duplicates) {
150 service->RemoveKeyword(duplicate->id());
151 if (removed_keyword_guids)
152 removed_keyword_guids->insert(duplicate->sync_guid());
157 // Return the checked URLs.
158 template_urls->swap(checked_urls);
161 // Returns the TemplateURL with id specified from the list of TemplateURLs.
162 // If not found, returns NULL.
163 TemplateURL* GetTemplateURLByID(
164 const TemplateURLService::TemplateURLVector& template_urls,
166 for (auto i(template_urls.begin()); i != template_urls.end(); ++i) {
167 if ((*i)->id() == id) {
174 TemplateURL* FindURLByPrepopulateID(
175 const TemplateURLService::TemplateURLVector& template_urls,
176 int prepopulate_id) {
177 for (auto i = template_urls.begin(); i < template_urls.end(); ++i) {
178 if ((*i)->prepopulate_id() == prepopulate_id)
184 void MergeIntoEngineData(const TemplateURL* original_turl,
185 TemplateURLData* url_to_update,
186 TemplateURLMergeOption merge_option) {
187 DCHECK(original_turl->prepopulate_id() == 0 ||
188 original_turl->prepopulate_id() == url_to_update->prepopulate_id);
189 DCHECK(original_turl->starter_pack_id() == 0 ||
190 original_turl->starter_pack_id() == url_to_update->starter_pack_id);
191 // When the user modified search engine's properties or search engine is
192 // imported from Play API data we need to preserve certain search engine
193 // properties from overriding with prepopulated data.
194 bool preserve_user_edits =
195 (merge_option != TemplateURLMergeOption::kOverwriteUserEdits &&
196 (!original_turl->safe_for_autoreplace() ||
197 original_turl->created_from_play_api()));
198 if (preserve_user_edits) {
199 url_to_update->safe_for_autoreplace = original_turl->safe_for_autoreplace();
200 url_to_update->SetShortName(original_turl->short_name());
201 url_to_update->SetKeyword(original_turl->keyword());
202 if (original_turl->created_from_play_api()) {
203 // TODO(crbug/1002271): Search url from Play API might contain attribution
204 // info and therefore should be preserved through prepopulated data
205 // update. In the future we might decide to take different approach to
206 // pass attribution info to search providers.
207 url_to_update->SetURL(original_turl->url());
210 url_to_update->id = original_turl->id();
211 url_to_update->sync_guid = original_turl->sync_guid();
212 url_to_update->date_created = original_turl->date_created();
213 url_to_update->last_modified = original_turl->last_modified();
214 url_to_update->created_from_play_api = original_turl->created_from_play_api();
217 ActionsFromCurrentData::ActionsFromCurrentData() = default;
219 ActionsFromCurrentData::ActionsFromCurrentData(
220 const ActionsFromCurrentData& other) = default;
222 ActionsFromCurrentData::~ActionsFromCurrentData() = default;
224 void MergeEnginesFromPrepopulateData(
225 KeywordWebDataService* service,
226 std::vector<std::unique_ptr<TemplateURLData>>* prepopulated_urls,
227 TemplateURLService::OwnedTemplateURLVector* template_urls,
228 TemplateURL* default_search_provider,
229 std::set<std::string>* removed_keyword_guids) {
230 DCHECK(prepopulated_urls);
231 DCHECK(template_urls);
233 ActionsFromCurrentData actions(CreateActionsFromCurrentPrepopulateData(
234 prepopulated_urls, *template_urls, default_search_provider));
236 ApplyActionsFromCurrentData(actions, service, template_urls,
237 default_search_provider, removed_keyword_guids);
240 ActionsFromCurrentData CreateActionsFromCurrentPrepopulateData(
241 std::vector<std::unique_ptr<TemplateURLData>>* prepopulated_urls,
242 const TemplateURLService::OwnedTemplateURLVector& existing_urls,
243 const TemplateURL* default_search_provider) {
244 // Create a map to hold all provided |template_urls| that originally came from
245 // prepopulate data (i.e. have a non-zero prepopulate_id()).
246 TemplateURL* play_api_turl = nullptr;
247 std::map<int, TemplateURL*> id_to_turl;
248 for (auto& turl : existing_urls) {
249 if (turl->created_from_play_api()) {
250 DCHECK_EQ(nullptr, play_api_turl);
251 play_api_turl = turl.get();
253 int prepopulate_id = turl->prepopulate_id();
254 if (prepopulate_id > 0)
255 id_to_turl[prepopulate_id] = turl.get();
258 // For each current prepopulated URL, check whether |template_urls| contained
259 // a matching prepopulated URL. If so, update the passed-in URL to match the
260 // current data. (If the passed-in URL was user-edited, we persist the user's
261 // name and keyword.) If not, add the prepopulated URL.
262 ActionsFromCurrentData actions;
263 for (auto& prepopulated_url : *prepopulated_urls) {
264 const int prepopulated_id = prepopulated_url->prepopulate_id;
265 DCHECK_NE(0, prepopulated_id);
267 auto existing_url_iter = id_to_turl.find(prepopulated_id);
268 TemplateURL* existing_url = nullptr;
269 if (existing_url_iter != id_to_turl.end()) {
270 existing_url = existing_url_iter->second;
271 id_to_turl.erase(existing_url_iter);
272 } else if (play_api_turl &&
273 play_api_turl->keyword() == prepopulated_url->keyword()) {
274 existing_url = play_api_turl;
277 if (existing_url != nullptr) {
278 // Update the data store with the new prepopulated data. Preserve user
279 // edits to the name and keyword.
280 MergeIntoEngineData(existing_url, prepopulated_url.get());
281 // Update last_modified to ensure that if this entry is later merged with
282 // entries from Sync, the conflict resolution logic knows that this was
283 // updated and propagates the new values to the server.
284 prepopulated_url->last_modified = base::Time::Now();
285 actions.edited_engines.push_back({existing_url, *prepopulated_url});
287 actions.added_engines.push_back(*prepopulated_url);
291 // The block above removed all the URLs from the |id_to_turl| map that were
292 // found in the prepopulate data. Any remaining URLs that haven't been
293 // user-edited or made default can be removed from the data store.
294 // We assume that this entry is equivalent to the DSE if its prepopulate ID
295 // and keyword both match. If the prepopulate ID _does_ match all properties
296 // will be replaced with those from |default_search_provider| anyway.
297 for (auto& i : id_to_turl) {
298 TemplateURL* template_url = i.second;
299 if ((template_url->safe_for_autoreplace()) &&
300 (!default_search_provider ||
301 (template_url->prepopulate_id() !=
302 default_search_provider->prepopulate_id()) ||
303 (template_url->keyword() != default_search_provider->keyword()))) {
304 if (template_url->created_from_play_api()) {
305 // Don't remove the entry created from Play API. Just reset
306 // prepopulate_id for it.
307 TemplateURLData data = template_url->data();
308 data.prepopulate_id = 0;
309 actions.edited_engines.push_back({template_url, data});
311 actions.removed_engines.push_back(template_url);
319 void MergeEnginesFromStarterPackData(
320 KeywordWebDataService* service,
321 TemplateURLService::OwnedTemplateURLVector* template_urls,
322 TemplateURL* default_search_provider,
323 std::set<std::string>* removed_keyword_guids,
324 TemplateURLMergeOption merge_option) {
325 DCHECK(template_urls);
327 std::vector<std::unique_ptr<TemplateURLData>> starter_pack_urls =
328 TemplateURLStarterPackData::GetStarterPackEngines();
330 ActionsFromCurrentData actions(CreateActionsFromCurrentStarterPackData(
331 &starter_pack_urls, *template_urls, merge_option));
333 ApplyActionsFromCurrentData(actions, service, template_urls,
334 default_search_provider, removed_keyword_guids);
337 ActionsFromCurrentData CreateActionsFromCurrentStarterPackData(
338 std::vector<std::unique_ptr<TemplateURLData>>* starter_pack_urls,
339 const TemplateURLService::OwnedTemplateURLVector& existing_urls,
340 TemplateURLMergeOption merge_option) {
341 // Create a map to hold all provided |template_urls| that originally came from
342 // starter_pack data (i.e. have a non-zero starter_pack_id()).
343 std::map<int, TemplateURL*> id_to_turl;
344 for (auto& turl : existing_urls) {
345 int starter_pack_id = turl->starter_pack_id();
346 if (starter_pack_id > 0)
347 id_to_turl[starter_pack_id] = turl.get();
350 // For each current starter pack URL, check whether |template_urls| contained
351 // a matching starter pack URL. If so, update the passed-in URL to match the
352 // current data. (If the passed-in URL was user-edited, we persist the user's
353 // name and keyword.) If not, add the prepopulated URL.
354 ActionsFromCurrentData actions;
355 for (auto& url : *starter_pack_urls) {
356 const int starter_pack_id = url->starter_pack_id;
357 DCHECK_NE(0, starter_pack_id);
359 auto existing_url_iter = id_to_turl.find(starter_pack_id);
360 TemplateURL* existing_url = nullptr;
361 if (existing_url_iter != id_to_turl.end()) {
362 existing_url = existing_url_iter->second;
363 id_to_turl.erase(existing_url_iter);
366 if (existing_url != nullptr) {
367 // Update the data store with the new prepopulated data. Preserve user
368 // edits to the name and keyword unless `merge_option` is set to
369 // kOverwriteUserEdits.
370 MergeIntoEngineData(existing_url, url.get(), merge_option);
371 // Update last_modified to ensure that if this entry is later merged with
372 // entries from Sync, the conflict resolution logic knows that this was
373 // updated and propagates the new values to the server.
374 url->last_modified = base::Time::Now();
375 actions.edited_engines.push_back({existing_url, *url});
377 actions.added_engines.push_back(*url);
381 // The block above removed all the URLs from the |id_to_turl| map that were
382 // found in the prepopulate data. Any remaining URLs that haven't been
383 // user-edited can be removed from the data store.
384 for (auto& i : id_to_turl) {
385 TemplateURL* template_url = i.second;
386 if (template_url->safe_for_autoreplace()) {
387 actions.removed_engines.push_back(template_url);
394 void ApplyActionsFromCurrentData(
395 ActionsFromCurrentData actions,
396 KeywordWebDataService* service,
397 TemplateURLService::OwnedTemplateURLVector* template_urls,
398 TemplateURL* default_search_provider,
399 std::set<std::string>* removed_keyword_guids) {
400 DCHECK(template_urls);
403 for (const auto* removed_engine : actions.removed_engines) {
404 auto j = FindTemplateURL(template_urls, removed_engine);
405 DCHECK(j != template_urls->end());
406 DCHECK(!default_search_provider ||
407 (*j)->prepopulate_id() != default_search_provider->prepopulate_id());
408 std::unique_ptr<TemplateURL> template_url = std::move(*j);
409 template_urls->erase(j);
411 service->RemoveKeyword(template_url->id());
412 if (removed_keyword_guids)
413 removed_keyword_guids->insert(template_url->sync_guid());
418 for (const auto& edited_engine : actions.edited_engines) {
419 const TemplateURLData& data = edited_engine.second;
421 service->UpdateKeyword(data);
423 // Replace the entry in |template_urls| with the updated one.
424 auto j = FindTemplateURL(template_urls, edited_engine.first);
425 *j = std::make_unique<TemplateURL>(data);
429 for (const auto& added_engine : actions.added_engines)
430 template_urls->push_back(std::make_unique<TemplateURL>(added_engine));
433 void GetSearchProvidersUsingKeywordResult(
434 const WDTypedResult& result,
435 KeywordWebDataService* service,
437 TemplateURLService::OwnedTemplateURLVector* template_urls,
438 TemplateURL* default_search_provider,
439 const SearchTermsData& search_terms_data,
440 int* new_resource_keyword_version,
441 int* new_resource_starter_pack_version,
442 std::set<std::string>* removed_keyword_guids) {
443 DCHECK(template_urls);
444 DCHECK(template_urls->empty());
445 DCHECK_EQ(KEYWORDS_RESULT, result.GetType());
446 DCHECK(new_resource_keyword_version);
448 WDKeywordsResult keyword_result = reinterpret_cast<
449 const WDResult<WDKeywordsResult>*>(&result)->GetValue();
451 for (auto& keyword : keyword_result.keywords) {
452 // Fix any duplicate encodings in the local database. Note that we don't
453 // adjust the last_modified time of this keyword; this way, we won't later
454 // overwrite any changes on the sync server that happened to this keyword
455 // since the last time we synced. Instead, we also run a de-duping pass on
456 // the server-provided data in
457 // TemplateURLService::CreateTemplateURLFromTemplateURLAndSyncData() and
458 // update the server with the merged, de-duped results at that time. We
459 // still fix here, though, to correct problems in clients that have disabled
460 // search engine sync, since in that case that code will never be reached.
461 if (DeDupeEncodings(&keyword.input_encodings) && service)
462 service->UpdateKeyword(keyword);
463 template_urls->push_back(std::make_unique<TemplateURL>(keyword));
466 *new_resource_keyword_version = keyword_result.builtin_keyword_version;
467 *new_resource_starter_pack_version = keyword_result.starter_pack_version;
468 GetSearchProvidersUsingLoadedEngines(
469 service, prefs, template_urls, default_search_provider, search_terms_data,
470 new_resource_keyword_version, new_resource_starter_pack_version,
471 removed_keyword_guids);
474 void GetSearchProvidersUsingLoadedEngines(
475 KeywordWebDataService* service,
477 TemplateURLService::OwnedTemplateURLVector* template_urls,
478 TemplateURL* default_search_provider,
479 const SearchTermsData& search_terms_data,
480 int* resource_keyword_version,
481 int* resource_starter_pack_version,
482 std::set<std::string>* removed_keyword_guids) {
483 DCHECK(template_urls);
484 DCHECK(resource_keyword_version);
485 std::vector<std::unique_ptr<TemplateURLData>> prepopulated_urls =
486 TemplateURLPrepopulateData::GetPrepopulatedEngines(prefs, nullptr);
487 RemoveDuplicatePrepopulateIDs(service, prepopulated_urls,
488 default_search_provider, template_urls,
489 search_terms_data, removed_keyword_guids);
491 const int prepopulate_resource_keyword_version =
492 TemplateURLPrepopulateData::GetDataVersion(prefs);
493 if (*resource_keyword_version < prepopulate_resource_keyword_version) {
494 MergeEnginesFromPrepopulateData(service, &prepopulated_urls, template_urls,
495 default_search_provider,
496 removed_keyword_guids);
497 *resource_keyword_version = prepopulate_resource_keyword_version;
499 *resource_keyword_version = 0;
502 const int starter_pack_data_version =
503 TemplateURLStarterPackData::GetDataVersion();
504 bool overwrite_user_edits =
505 (*resource_starter_pack_version <
506 TemplateURLStarterPackData::GetFirstCompatibleDataVersion());
507 if (*resource_starter_pack_version < starter_pack_data_version) {
508 MergeEnginesFromStarterPackData(
509 service, template_urls, default_search_provider, removed_keyword_guids,
510 (overwrite_user_edits ? TemplateURLMergeOption::kOverwriteUserEdits
511 : TemplateURLMergeOption::kDefault));
512 *resource_starter_pack_version = starter_pack_data_version;
514 *resource_starter_pack_version = 0;
518 bool DeDupeEncodings(std::vector<std::string>* encodings) {
519 std::vector<std::string> deduped_encodings;
520 std::set<std::string> encoding_set;
521 for (std::vector<std::string>::const_iterator i(encodings->begin());
522 i != encodings->end(); ++i) {
523 if (encoding_set.insert(*i).second)
524 deduped_encodings.push_back(*i);
526 encodings->swap(deduped_encodings);
527 return encodings->size() != deduped_encodings.size();
530 TemplateURLService::OwnedTemplateURLVector::iterator FindTemplateURL(
531 TemplateURLService::OwnedTemplateURLVector* urls,
532 const TemplateURL* url) {
533 return base::ranges::find(*urls, url, &std::unique_ptr<TemplateURL>::get);