1 // Copyright 2014 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef COMPONENTS_SEARCH_ENGINES_TEMPLATE_URL_H_
6 #define COMPONENTS_SEARCH_ENGINES_TEMPLATE_URL_H_
14 #include "base/gtest_prod_util.h"
15 #include "base/memory/raw_ptr.h"
16 #include "base/time/time.h"
17 #include "components/search_engines/search_engine_type.h"
18 #include "components/search_engines/search_terms_data.h"
19 #include "components/search_engines/template_url_data.h"
20 #include "components/search_engines/template_url_id.h"
21 #include "third_party/metrics_proto/omnibox_event.pb.h"
22 #include "third_party/metrics_proto/omnibox_focus_type.pb.h"
23 #include "third_party/metrics_proto/omnibox_input_type.pb.h"
24 #include "third_party/omnibox_proto/chrome_searchbox_stats.pb.h"
25 #include "ui/gfx/geometry/size.h"
27 #include "url/third_party/mozilla/url_parse.h"
32 // TemplateURLRef -------------------------------------------------------------
34 // A TemplateURLRef represents a single URL within the larger TemplateURL class
35 // (which represents an entire "search engine", see below). If
36 // SupportsReplacement() is true, this URL has placeholders in it, for which
37 // callers can substitute values to get a "real" URL using ReplaceSearchTerms().
39 // TemplateURLRefs always have a non-NULL |owner_| TemplateURL, which they
40 // access in order to get at important data like the underlying URL string or
41 // the associated Profile.
42 class TemplateURLRef {
44 // Magic numbers to pass to ReplaceSearchTerms() for the |accepted_suggestion|
45 // parameter. Most callers aren't using Suggest capabilities and should just
46 // pass NO_SUGGESTIONS_AVAILABLE.
47 // NOTE: Because positive values are meaningful, make sure these are negative!
48 enum AcceptedSuggestion {
49 NO_SUGGESTION_CHOSEN = -1,
50 NO_SUGGESTIONS_AVAILABLE = -2,
53 // Which kind of URL within our owner we are. This allows us to get at the
54 // correct string field. Use |INDEXED| to indicate that the numerical
55 // |index_in_owner_| should be used instead.
66 using RequestSource = SearchTermsData::RequestSource;
68 // Type to store <content_type, post_data> pair for POST URLs.
69 // The |content_type|(first part of the pair) is the content-type of
70 // the |post_data|(second part of the pair) which is encoded in
71 // "multipart/form-data" format, it also contains the MIME boundary used in
72 // the |post_data|. See http://tools.ietf.org/html/rfc2046 for the details.
73 typedef std::pair<std::string, std::string> PostContent;
75 // This struct encapsulates arguments passed to
76 // TemplateURLRef::ReplaceSearchTerms methods. By default, only search_terms
77 // is required and is passed in the constructor.
78 struct SearchTermsArgs {
80 explicit SearchTermsArgs(const std::u16string& search_terms);
81 SearchTermsArgs(const SearchTermsArgs& other);
84 struct ContextualSearchParams {
85 ContextualSearchParams();
86 // Modern constructor, used when the content is sent in the HTTP header
87 // instead of as CGI parameters.
88 // The |version| tell the server which version of the client is making
90 // The |contextual_cards_version| tells the server which version of
91 // contextual cards integration is being used by the client.
92 // The |home_country| is an ISO country code for the country that the user
93 // considers their permanent home (which may be different from the country
94 // they are currently visiting). Pass an empty string if none available.
95 // The |previous_event_id| is an identifier previously returned by the
96 // server to identify that user interaction.
97 // The |previous_event_results| are the results of the user-interaction of
98 // that previous request.
99 // The "previous_xyz" parameters are documented in go/cs-sanitized.
100 // The |is_exact_search| allows the search request to be narrowed down to
101 // an "exact" search only, meaning just search for X rather than X +
102 // whatever else is in the context. The returned search term should not
103 // be expanded, and the server will honor this along with creating a
104 // narrow Search Term.
105 // The |source_lang| specifies a source language hint to apply for
106 // translation or to indicate that translation might be appropriate.
107 // This comes from CLD evaluating the selection and/or page content.
108 // The |target_lang| specifies the best language to translate into for
109 // the user, which also indicates when translation is appropriate or
110 // helpful. This comes from the Chrome Language Model.
111 // The |fluent_languages| string specifies the languages the user
112 // is fluent in reading. This acts as an alternate set of languages
113 // to consider translating into. The languages are ordered by
114 // fluency, and encoded as a comma-separated list of BCP 47 languages.
115 // The |related_searches_stamp| string contains an information that
116 // indicates experiment status and server processing results so that
117 // can be logged in GWS Sawmill logs for offline analysis for the
118 // Related Searches MVP experiment.
119 // The |apply_lang_hint| specifies whether or not the |source_lang| should
120 // be used as a hint for backend language detection. Otherwise, backend
121 // translation is forced using |source_lang|. Note that this only supports
122 // Partial Translate and so may only be enabled for select clients on the
124 ContextualSearchParams(int version,
125 int contextual_cards_version,
126 std::string home_country,
127 int64_t previous_event_id,
128 int previous_event_results,
129 bool is_exact_search,
130 std::string source_lang,
131 std::string target_lang,
132 std::string fluent_languages,
133 std::string related_searches_stamp,
134 bool apply_lang_hint);
135 ContextualSearchParams(const ContextualSearchParams& other);
136 ~ContextualSearchParams();
138 // Estimates dynamic memory usage.
139 // See base/trace_event/memory_usage_estimator.h for more info.
140 size_t EstimateMemoryUsage() const;
142 // The version of contextual search.
145 // The version of Contextual Cards data to request.
146 // A value of 0 indicates no data needed.
147 int contextual_cards_version = 0;
149 // The locale of the user's home country in an ISO country code format,
150 // or an empty string if not available. This indicates where the user
151 // resides, not where they currently are.
152 std::string home_country;
154 // An EventID from a previous interaction (sent by server, recorded by
156 int64_t previous_event_id = 0l;
158 // An encoded set of booleans that represent the interaction results from
159 // the previous event.
160 int previous_event_results = 0;
162 // A flag that restricts the search to exactly match the selection rather
163 // than expanding the Search Term to include other words in the context.
164 bool is_exact_search = false;
166 // Source language string to translate from.
167 std::string source_lang;
169 // Target language string to be translated into.
170 std::string target_lang;
172 // Alternate target languages that the user is fluent in, encoded in a
174 std::string fluent_languages;
176 // Experiment arm and processing information for the Related Searches
177 // experiment. The value is an arbitrary string that starts with a
178 // schema version number.
179 std::string related_searches_stamp;
181 // Whether hinted language detection should be used on the backend.
182 bool apply_lang_hint = false;
185 // Estimates dynamic memory usage.
186 // See base/trace_event/memory_usage_estimator.h for more info.
187 size_t EstimateMemoryUsage() const;
189 // The search terms (query).
190 std::u16string search_terms;
192 // The original (input) query.
193 std::u16string original_query;
195 // The type the original input query was identified as.
196 metrics::OmniboxInputType input_type = metrics::OmniboxInputType::EMPTY;
198 // Specifies how the user last interacted with the searchbox UI element.
199 metrics::OmniboxFocusType focus_type =
200 metrics::OmniboxFocusType::INTERACTION_DEFAULT;
202 // The optional assisted query stats, aka AQS, used for logging purposes.
203 // This string contains impressions of all autocomplete matches shown
204 // at the query submission time. For privacy reasons, we require the
205 // search provider to support HTTPS protocol in order to receive the AQS
207 // For more details, see go/chrome-suggest-logging.
208 std::string assisted_query_stats;
210 // The optional searchbox stats, reported as gs_lcrp for logging purposes.
211 // This proto message contains information such as impressions of all
212 // autocomplete matches shown at the query submission time.
213 // For privacy reasons, we require the search provider to support HTTPS
214 // protocol in order to receive the gs_lcrp param.
215 // For more details, see go/chrome-suggest-logging-improvement.
216 omnibox::metrics::ChromeSearchboxStats searchbox_stats;
218 // TODO: Remove along with "aq" CGI param.
219 int accepted_suggestion = NO_SUGGESTIONS_AVAILABLE;
221 // The 0-based position of the cursor within the query string at the time
222 // the request was issued. Set to std::u16string::npos if not used.
223 size_t cursor_position = std::u16string::npos;
225 // The URL of the current webpage to be used for experimental zero-prefix
227 std::string current_page_url;
229 // Which omnibox the user used to type the prefix.
230 metrics::OmniboxEventProto::PageClassification page_classification =
231 metrics::OmniboxEventProto::INVALID_SPEC;
233 // Optional session token.
234 std::string session_token;
236 // Prefetch query and type.
237 std::string prefetch_query;
238 std::string prefetch_query_type;
240 // Additional query params to append to the request.
241 std::string additional_query_params;
243 // If set, ReplaceSearchTerms() will automatically append any extra query
244 // params specified via the --extra-search-query-params command-line
245 // argument. Generally, this should be set when dealing with the search
246 // TemplateURLRefs of the default search engine and the caller cares
247 // about the query portion of the URL. Since neither TemplateURLRef nor
248 // indeed TemplateURL know whether a TemplateURL is the default search
249 // engine, callers instead must set this manually.
250 bool append_extra_query_params_from_command_line = false;
252 // The raw content of an image thumbnail that will be used as a query for
253 // search-by-image frontend.
254 std::string image_thumbnail_content;
256 // The content type string for `image_thumbnail_content`.
257 std::string image_thumbnail_content_type;
259 // The image dimension data for a Google search-by-image query.
260 std::string processed_image_dimensions;
262 // When searching for an image, the URL of the original image. Callers
263 // should leave this empty for images specified via data: URLs.
266 // When searching for an image, the original size of the image.
267 gfx::Size image_original_size;
269 // Source of the search or suggest request.
270 RequestSource request_source = RequestSource::SEARCHBOX;
272 // Whether the query is being fetched as a prefetch request before the user
273 // actually searches for the search terms.
274 bool is_prefetch = false;
276 ContextualSearchParams contextual_search_params;
278 // The cache duration to be sent as a query string parameter in the zero
279 // suggest requests, if non-zero.
280 uint32_t zero_suggest_cache_duration_sec = 0;
282 // Whether the request should bypass the HTTP cache, i.e., a "shift-reload".
283 // If true, the net::LOAD_BYPASS_CACHE load flag will be set on the request.
284 bool bypass_cache = false;
286 // The source locale used for image translations.
287 std::string image_translate_source_locale;
289 // The target locale used for image translations.
290 std::string image_translate_target_locale;
293 TemplateURLRef(const TemplateURL* owner, Type type);
294 TemplateURLRef(const TemplateURL* owner, size_t index_in_owner);
297 TemplateURLRef(const TemplateURLRef& source);
298 TemplateURLRef& operator=(const TemplateURLRef& source);
300 // Returns the raw URL. None of the parameters will have been replaced.
301 std::string GetURL() const;
303 // Returns the raw string of the post params. Please see comments in
304 // prepopulated_engines_schema.json for the format.
305 std::string GetPostParamsString() const;
307 // Returns true if this URL supports search term replacement.
308 bool SupportsReplacement(const SearchTermsData& search_terms_data) const;
310 // Returns a string that is the result of replacing the search terms in
311 // the url with the specified arguments. We use our owner's input encoding.
313 // If this TemplateURLRef does not support replacement (SupportsReplacement
314 // returns false), an empty string is returned.
315 // If this TemplateURLRef uses POST, and |post_content| is not NULL, the
316 // |post_params_| will be replaced, encoded in "multipart/form-data" format
317 // and stored into |post_content|.
318 std::string ReplaceSearchTerms(const SearchTermsArgs& search_terms_args,
319 const SearchTermsData& search_terms_data,
320 PostContent* post_content) const;
322 // TODO(jnd): remove the following ReplaceSearchTerms definition which does
323 // not have |post_content| parameter once all reference callers pass
324 // |post_content| parameter.
325 std::string ReplaceSearchTerms(
326 const SearchTermsArgs& search_terms_args,
327 const SearchTermsData& search_terms_data) const {
328 return ReplaceSearchTerms(search_terms_args, search_terms_data, NULL);
331 // Returns true if the TemplateURLRef is valid. An invalid TemplateURLRef is
332 // one that contains unknown terms, or invalid characters.
333 bool IsValid(const SearchTermsData& search_terms_data) const;
335 // Returns a string representation of this TemplateURLRef suitable for
336 // display. The display format is the same as the format used by Firefox.
337 std::u16string DisplayURL(const SearchTermsData& search_terms_data) const;
339 // Converts a string as returned by DisplayURL back into a string as
340 // understood by TemplateURLRef.
341 static std::string DisplayURLToURLRef(const std::u16string& display_url);
343 // If this TemplateURLRef is valid and contains one search term, this returns
344 // the host/path of the URL, otherwise this returns an empty string.
345 const std::string& GetHost(const SearchTermsData& search_terms_data) const;
346 std::string GetPath(const SearchTermsData& search_terms_data) const;
348 // If this TemplateURLRef is valid and contains one search term
349 // in its query or ref, this returns the key of the search term,
350 // otherwise this returns an empty string.
351 const std::string& GetSearchTermKey(
352 const SearchTermsData& search_terms_data) const;
354 // If this TemplateURLRef is valid and contains one search term,
355 // this returns the location of the search term,
356 // otherwise this returns url::Parsed::QUERY.
357 url::Parsed::ComponentType GetSearchTermKeyLocation(
358 const SearchTermsData& search_terms_data) const;
360 // If this TemplateURLRef is valid and contains one search term,
361 // this returns the fixed prefix before the search term,
362 // otherwise this returns an empty string.
363 const std::string& GetSearchTermValuePrefix(
364 const SearchTermsData& search_terms_data) const;
366 // If this TemplateURLRef is valid and contains one search term,
367 // this returns the fixed suffix after the search term,
368 // otherwise this returns an empty string.
369 const std::string& GetSearchTermValueSuffix(
370 const SearchTermsData& search_terms_data) const;
372 // Converts the specified term in our owner's encoding to a std::u16string.
373 std::u16string SearchTermToString16(const base::StringPiece& term) const;
375 // Returns true if this TemplateURLRef has a replacement term of
376 // {google:baseURL} or {google:baseSuggestURL}.
377 bool HasGoogleBaseURLs(const SearchTermsData& search_terms_data) const;
379 // Use the pattern referred to by this TemplateURLRef to match the provided
380 // |url| and extract |search_terms| from it. Returns true if the pattern
381 // matches, even if |search_terms| is empty. In this case
382 // |search_term_component|, if not NULL, indicates whether the search terms
383 // were found in the query or the ref parameters; and |search_terms_position|,
384 // if not NULL, contains the position of the search terms in the query or the
385 // ref parameters. Returns false and an empty |search_terms| if the pattern
387 bool ExtractSearchTermsFromURL(
389 std::u16string* search_terms,
390 const SearchTermsData& search_terms_data,
391 url::Parsed::ComponentType* search_term_component,
392 url::Component* search_terms_position) const;
394 // Whether the URL uses POST (as opposed to GET).
395 bool UsesPOSTMethod(const SearchTermsData& search_terms_data) const;
397 // Estimates dynamic memory usage.
398 // See base/trace_event/memory_usage_estimator.h for more info.
399 size_t EstimateMemoryUsage() const;
402 friend class TemplateURL;
403 friend class TemplateURLTest;
404 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest,
405 ImageThumbnailContentTypePostParams);
406 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, SetPrepopulatedAndParse);
407 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterKnown);
408 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterUnknown);
409 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLEmpty);
410 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoTemplateEnd);
411 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoKnownParameters);
412 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLTwoParameters);
413 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNestedParameter);
414 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, URLRefTestImageURLWithPOST);
416 // Enumeration of the known types.
417 enum ReplacementType {
419 GOOGLE_ASSISTED_QUERY_STATS,
420 GOOGLE_BASE_SEARCH_BY_IMAGE_URL,
421 GOOGLE_BASE_SUGGEST_URL,
423 GOOGLE_CLIENT_CACHE_TIME_TO_LIVE,
424 GOOGLE_CONTEXTUAL_SEARCH_CONTEXT_DATA,
425 GOOGLE_CONTEXTUAL_SEARCH_VERSION,
426 GOOGLE_CURRENT_PAGE_URL,
427 GOOGLE_CURSOR_POSITION,
428 GOOGLE_IMAGE_ORIGINAL_HEIGHT,
429 GOOGLE_IMAGE_ORIGINAL_WIDTH,
430 GOOGLE_IMAGE_SEARCH_SOURCE,
431 GOOGLE_IMAGE_THUMBNAIL_BASE64,
432 GOOGLE_IMAGE_THUMBNAIL,
435 GOOGLE_IOS_SEARCH_LANGUAGE,
436 GOOGLE_NTP_IS_THEMED,
437 GOOGLE_OMNIBOX_FOCUS_TYPE,
438 GOOGLE_ORIGINAL_QUERY_FOR_SUGGESTION,
439 GOOGLE_PAGE_CLASSIFICATION,
440 GOOGLE_PREFETCH_QUERY,
441 GOOGLE_PREFETCH_SOURCE,
442 GOOGLE_PROCESSED_IMAGE_DIMENSIONS,
444 GOOGLE_SEARCH_CLIENT,
445 GOOGLE_SEARCH_FIELDTRIAL_GROUP,
446 GOOGLE_SEARCH_VERSION,
447 GOOGLE_SESSION_TOKEN,
448 GOOGLE_SUGGEST_CLIENT,
449 GOOGLE_SUGGEST_REQUEST_ID,
450 GOOGLE_UNESCAPED_SEARCH_TERMS,
455 IMAGE_TRANSLATE_SOURCE_LOCALE,
456 IMAGE_TRANSLATE_TARGET_LOCALE,
459 // Used to identify an element of the raw url that can be replaced.
461 Replacement(ReplacementType type, size_t index)
462 : type(type), index(index), is_post_param(false) {}
463 ReplacementType type;
465 // Indicates the location in where the replacement is replaced. If
466 // |is_post_param| is false, |index| indicates the byte position in
467 // |parsed_url_|. Otherwise, |index| is the index of |post_params_|.
471 // Stores a single parameter for a POST.
475 std::string content_type;
477 // Estimates dynamic memory usage.
478 // See base/trace_event/memory_usage_estimator.h for more info.
479 size_t EstimateMemoryUsage() const;
482 // The list of elements to replace.
483 typedef std::vector<struct Replacement> Replacements;
484 typedef std::vector<PostParam> PostParams;
486 // TemplateURLRef internally caches values to make replacement quick. This
487 // method invalidates any cached values.
488 void InvalidateCachedValues() const;
490 // Parses the parameter in url at the specified offset. start/end specify the
491 // range of the parameter in the url, including the braces. If the parameter
492 // is valid, url is updated to reflect the appropriate parameter. If
493 // the parameter is one of the known parameters an element is added to
494 // replacements indicating the type and range of the element. The original
495 // parameter is erased from the url.
497 // If the parameter is not a known parameter, false is returned. If this is a
498 // prepopulated URL, the parameter is erased, otherwise it is left alone.
499 bool ParseParameter(size_t start,
502 Replacements* replacements) const;
504 // Parses the specified url, replacing parameters as necessary. If
505 // successful, valid is set to true, and the parsed url is returned. For all
506 // known parameters that are encountered an entry is added to replacements.
507 // If there is an error parsing the url, valid is set to false, and an empty
508 // string is returned. If the URL has the POST parameters, they will be
509 // parsed into |post_params| which will be further replaced with real search
510 // terms data and encoded in "multipart/form-data" format to generate the
512 std::string ParseURL(const std::string& url,
513 Replacements* replacements,
514 PostParams* post_params,
517 // If the url has not yet been parsed, ParseURL is invoked.
518 // NOTE: While this is const, it modifies parsed_, valid_, parsed_url_ and
520 void ParseIfNecessary(const SearchTermsData& search_terms_data) const;
522 // Parses a wildcard out of |path|, putting the parsed path in |path_prefix_|
523 // and |path_suffix_| and setting |path_wildcard_present_| to true.
524 // In the absence of a wildcard, the full path will be contained in
525 // |path_prefix_| and |path_wildcard_present_| will be false.
526 void ParsePath(const std::string& path) const;
528 // Returns whether the path portion of this template URL is equal to the path
529 // in |url|, checking that URL is prefixed/suffixed by
530 // |path_prefix_|/|path_suffix_| if |path_wildcard_present_| is true, or equal
531 // to |path_prefix_| otherwise.
532 bool PathIsEqual(const GURL& url) const;
534 // Extracts the query key and host from the url.
535 void ParseHostAndSearchTermKey(
536 const SearchTermsData& search_terms_data) const;
538 // Encode post parameters in "multipart/form-data" format and store it
539 // inside |post_content|. Returns false if errors are encountered during
540 // encoding. This method is called each time ReplaceSearchTerms gets called.
541 bool EncodeFormData(const PostParams& post_params,
542 PostContent* post_content) const;
544 // Handles a replacement by using real term data. If the replacement
545 // belongs to a PostParam, the PostParam will be replaced by the term data.
546 // Otherwise, the term data will be inserted at the place that the
547 // replacement points to.
548 // Can be called repeatedly with the same replacement.
549 void HandleReplacement(const std::string& name,
550 const std::string& value,
551 const Replacement& replacement,
552 std::string* url) const;
554 // Replaces all replacements in |parsed_url_| with their actual values and
555 // returns the result. This is the main functionality of
556 // ReplaceSearchTerms().
557 std::string HandleReplacements(
558 const SearchTermsArgs& search_terms_args,
559 const SearchTermsData& search_terms_data,
560 PostContent* post_content) const;
562 // The TemplateURL that contains us. This should outlive us.
563 raw_ptr<const TemplateURL> owner_;
565 // What kind of URL we are.
568 // If |type_| is |INDEXED|, this |index_in_owner_| is used instead to refer to
569 // a url within our owner.
570 size_t index_in_owner_ = 0;
572 // Whether the URL has been parsed.
573 mutable bool parsed_ = false;
575 // Whether the url was successfully parsed.
576 mutable bool valid_ = false;
578 // The parsed URL. All terms have been stripped out of this with
579 // replacements_ giving the index of the terms to replace.
580 mutable std::string parsed_url_;
582 // Do we support search term replacement?
583 mutable bool supports_replacements_ = false;
585 // The replaceable parts of url (parsed_url_). These are ordered by index
586 // into the string, and may be empty.
587 mutable Replacements replacements_;
589 // Whether the path contains a wildcard.
590 mutable bool path_wildcard_present_ = false;
592 // Host, port, path, key and location of the search term. These are only set
593 // if the url contains one search term.
594 mutable std::string host_;
595 mutable std::string port_;
596 mutable std::string path_prefix_;
597 mutable std::string path_suffix_;
598 mutable std::string search_term_key_;
599 mutable url::Parsed::ComponentType search_term_key_location_ =
601 mutable std::string search_term_value_prefix_;
602 mutable std::string search_term_value_suffix_;
604 mutable PostParams post_params_;
606 // Whether the contained URL is a pre-populated URL.
607 bool prepopulated_ = false;
611 // TemplateURL ----------------------------------------------------------------
613 // A TemplateURL represents a single "search engine", defined primarily as a
614 // subset of the Open Search Description Document
615 // (http://www.opensearch.org/Specifications/OpenSearch) plus some extensions.
616 // One TemplateURL contains several TemplateURLRefs, which correspond to various
617 // different capabilities (e.g. doing searches or getting suggestions), as well
618 // as a TemplateURLData containing other details like the name, keyword, etc.
620 // TemplateURLs are intended to be read-only for most users.
621 // The TemplateURLService, which handles storing and manipulating TemplateURLs,
622 // is made a friend so that it can be the exception to this pattern.
625 using TemplateURLVector = std::vector<TemplateURL*>;
626 using OwnedTemplateURLVector = std::vector<std::unique_ptr<TemplateURL>>;
628 // These values are not persisted and can be freely changed.
629 // Their integer values are used for choosing the best engine during keyword
630 // conflicts, so their relative ordering should not be changed without careful
631 // thought about what happens during version skew.
633 // Installed only on this device. Should not be synced. This is not common.
635 // Regular search engine. This is the most common, and the ONLY type synced.
637 // Installed by extension through Override Settings API. Not synced.
638 NORMAL_CONTROLLED_BY_EXTENSION = 2,
639 // The keyword associated with an extension that uses the Omnibox API.
641 OMNIBOX_API_EXTENSION = 3,
644 // An AssociatedExtensionInfo represents information about the extension that
645 // added the search engine.
646 struct AssociatedExtensionInfo {
647 AssociatedExtensionInfo(const std::string& extension_id,
648 base::Time install_time,
649 bool wants_to_be_default_engine);
650 ~AssociatedExtensionInfo();
652 // Estimates dynamic memory usage.
653 // See base/trace_event/memory_usage_estimator.h for more info.
654 size_t EstimateMemoryUsage() const;
656 std::string extension_id;
658 // Used to resolve conflicts when there are multiple extensions specifying
659 // the default search engine. The most recently-installed wins.
660 base::Time install_time;
662 // Whether the search engine is supposed to be default.
663 bool wants_to_be_default_engine;
666 explicit TemplateURL(const TemplateURLData& data, Type type = NORMAL);
668 // Constructor for extension controlled engine. |type| must be
669 // NORMAL_CONTROLLED_BY_EXTENSION or OMNIBOX_API_EXTENSION.
670 TemplateURL(const TemplateURLData& data,
672 std::string extension_id,
673 base::Time install_time,
674 bool wants_to_be_default_engine);
676 TemplateURL(const TemplateURL&) = delete;
677 TemplateURL& operator=(const TemplateURL&) = delete;
681 // For two engines with the same keyword, |this| and |other|,
682 // returns true if |this| is strictly better than |other|.
684 // While normal engines must all have distinct keywords, policy-created,
685 // extension-controlled and omnibox API engines may have the same keywords as
686 // each other or as normal engines. In these cases, policy-create engines
687 // override omnibox API engines, which override extension-controlled engines,
688 // which override normal engines.
690 // If there is still a conflict after this, compare by safe-for-autoreplace,
691 // then last modified date, then use the sync guid as a tiebreaker.
693 // TODO(tommycli): I'd like to use this to resolve Sync conflicts in the
694 // future, but we need a total ordering of TemplateURLs. That's not the case
695 // today, because the sync GUIDs are not actually globally unique, so there
696 // can be a genuine tie, which is not good, because then two different clients
697 // could choose to resolve the conflict in two different ways.
698 bool IsBetterThanEngineWithConflictingKeyword(const TemplateURL* other) const;
700 // Generates a suitable keyword for the specified url, which must be valid.
701 // This is guaranteed not to return an empty string, since TemplateURLs should
702 // never have an empty keyword.
703 static std::u16string GenerateKeyword(const GURL& url);
705 // Generates a favicon URL from the specified url.
706 static GURL GenerateFaviconURL(const GURL& url);
708 // Returns true if |t_url| and |data| are equal in all meaningful respects.
709 // Static to allow either or both params to be NULL.
710 static bool MatchesData(const TemplateURL* t_url,
711 const TemplateURLData* data,
712 const SearchTermsData& search_terms_data);
714 const TemplateURLData& data() const { return data_; }
716 const std::u16string& short_name() const { return data_.short_name(); }
717 // An accessor for the short_name, but adjusted so it can be appropriately
718 // displayed even if it is LTR and the UI is RTL.
719 std::u16string AdjustedShortNameForLocaleDirection() const;
721 const std::u16string& keyword() const { return data_.keyword(); }
723 const std::string& url() const { return data_.url(); }
724 const std::string& suggestions_url() const { return data_.suggestions_url; }
725 const std::string& image_url() const { return data_.image_url; }
726 const std::string& image_translate_url() const {
727 return data_.image_translate_url;
729 const std::string& new_tab_url() const { return data_.new_tab_url; }
730 const std::string& contextual_search_url() const {
731 return data_.contextual_search_url;
733 const std::string& search_url_post_params() const {
734 return data_.search_url_post_params;
736 const std::string& suggestions_url_post_params() const {
737 return data_.suggestions_url_post_params;
739 const std::string& image_url_post_params() const {
740 return data_.image_url_post_params;
742 const std::string& side_search_param() const {
743 return data_.side_search_param;
745 const std::string& side_image_search_param() const {
746 return data_.side_image_search_param;
748 const std::string& image_translate_source_language_param_key() const {
749 return data_.image_translate_source_language_param_key;
751 const std::string& image_translate_target_language_param_key() const {
752 return data_.image_translate_target_language_param_key;
754 const std::u16string& image_search_branding_label() const {
755 return !data_.image_search_branding_label.empty()
756 ? data_.image_search_branding_label
759 const std::vector<std::string>& search_intent_params() const {
760 return data_.search_intent_params;
762 const std::vector<std::string>& alternate_urls() const {
763 return data_.alternate_urls;
765 const GURL& favicon_url() const { return data_.favicon_url; }
767 const GURL& logo_url() const { return data_.logo_url; }
769 const GURL& doodle_url() const { return data_.doodle_url; }
771 const GURL& originating_url() const { return data_.originating_url; }
773 bool safe_for_autoreplace() const { return data_.safe_for_autoreplace; }
775 const std::vector<std::string>& input_encodings() const {
776 return data_.input_encodings;
779 TemplateURLID id() const { return data_.id; }
781 base::Time date_created() const { return data_.date_created; }
782 base::Time last_modified() const { return data_.last_modified; }
783 base::Time last_visited() const { return data_.last_visited; }
785 bool created_by_policy() const { return data_.created_by_policy; }
786 bool enforced_by_policy() const { return data_.enforced_by_policy; }
787 bool created_from_play_api() const { return data_.created_from_play_api; }
789 int usage_count() const { return data_.usage_count; }
791 int prepopulate_id() const { return data_.prepopulate_id; }
793 const std::string& sync_guid() const { return data_.sync_guid; }
795 TemplateURLData::ActiveStatus is_active() const { return data_.is_active; }
797 int starter_pack_id() const { return data_.starter_pack_id; }
799 const std::vector<TemplateURLRef>& url_refs() const { return url_refs_; }
800 const TemplateURLRef& url_ref() const {
801 // Sanity check for https://crbug.com/781703.
802 CHECK(!url_refs_.empty());
803 return url_refs_.back();
805 const TemplateURLRef& suggestions_url_ref() const {
806 return suggestions_url_ref_;
808 const TemplateURLRef& image_url_ref() const { return image_url_ref_; }
809 const TemplateURLRef& image_translate_url_ref() const {
810 return image_translate_url_ref_;
812 const TemplateURLRef& new_tab_url_ref() const { return new_tab_url_ref_; }
813 const TemplateURLRef& contextual_search_url_ref() const {
814 return contextual_search_url_ref_;
817 Type type() const { return type_; }
819 const AssociatedExtensionInfo* GetExtensionInfoForTesting() const {
820 return extension_info_.get();
823 // Returns true if |url| supports replacement.
824 bool SupportsReplacement(const SearchTermsData& search_terms_data) const;
826 // Returns true if any URLRefs use Googe base URLs.
827 bool HasGoogleBaseURLs(const SearchTermsData& search_terms_data) const;
829 // Returns true if this TemplateURL uses Google base URLs and has a keyword
830 // of "google.TLD". We use this to decide whether we can automatically
831 // update the keyword to reflect the current Google base URL TLD.
832 bool IsGoogleSearchURLWithReplaceableKeyword(
833 const SearchTermsData& search_terms_data) const;
835 // Returns true if the keywords match or if
836 // IsGoogleSearchURLWithReplaceableKeyword() is true for both |this| and
838 bool HasSameKeywordAs(const TemplateURLData& other,
839 const SearchTermsData& search_terms_data) const;
841 // Returns the id of the extension that added this search engine. Only call
842 // this for TemplateURLs of type NORMAL_CONTROLLED_BY_EXTENSION or
843 // OMNIBOX_API_EXTENSION.
844 std::string GetExtensionId() const;
846 // Returns the type of this search engine, or SEARCH_ENGINE_OTHER if no
848 SearchEngineType GetEngineType(
849 const SearchTermsData& search_terms_data) const;
851 // Returns the type of this search engine, i.e. whether the engine is a
852 // prepopulated engine, starter pack engine, or not built-in.
853 BuiltinEngineType GetBuiltinEngineType() const;
855 // Use the alternate URLs and the search URL to match the provided |url|
856 // and extract |search_terms| from it. Returns false and an empty
857 // |search_terms| if no search terms can be matched. The URLs are matched in
858 // the order listed in |url_refs_| (see comment there).
859 bool ExtractSearchTermsFromURL(const GURL& url,
860 const SearchTermsData& search_terms_data,
861 std::u16string* search_terms) const;
863 // Returns true if non-empty search terms could be extracted from |url| using
864 // ExtractSearchTermsFromURL(). In other words, this returns whether |url|
865 // could be the result of performing a search with |this|.
866 bool IsSearchURL(const GURL& url,
867 const SearchTermsData& search_terms_data) const;
869 // Given a `url` corresponding to this TemplateURL, keeps the search terms and
870 // optionally the search intent params and removes the other params. If
871 // `normalize_search_terms` is true, the search terms in the final URL
872 // will be converted to lowercase with extra whitespace characters collapsed.
873 // If `url` is not a search URL or replacement fails, leaves `out_url` and
874 // `out_search_terms` untouched and returns false. Used to compare
875 // normalized (aka canonical) search URLs.
876 bool KeepSearchTermsInURL(const GURL& url,
877 const SearchTermsData& search_terms_data,
878 const bool keep_search_intent_params,
879 const bool normalize_search_terms,
881 std::u16string* out_search_terms = nullptr) const;
883 // Given a |url| corresponding to this TemplateURL, identifies the search
884 // terms and replaces them with the ones in |search_terms_args|, leaving the
885 // other parameters untouched. If the replacement fails, returns false and
886 // leaves |result| untouched. This is used by mobile ports to perform query
888 bool ReplaceSearchTermsInURL(
890 const TemplateURLRef::SearchTermsArgs& search_terms_args,
891 const SearchTermsData& search_terms_data,
894 // Encodes the search terms from |search_terms_args| so that we know the
895 // |input_encoding|. Returns the |encoded_terms| and the
896 // |encoded_original_query|. |encoded_terms| may be escaped as path or query
897 // depending on |is_in_query|; |encoded_original_query| is always escaped as
899 void EncodeSearchTerms(
900 const TemplateURLRef::SearchTermsArgs& search_terms_args,
902 std::string* input_encoding,
903 std::u16string* encoded_terms,
904 std::u16string* encoded_original_query) const;
906 // Returns the search url for this template URL and the optional search terms.
907 // Uses something obscure as the default value for the search terms argument
908 // so that in the rare case the term replaces the URL it's unlikely another
909 // keyword would have the same url.
910 // Returns an empty GURL if this template URL has no url().
911 GURL GenerateSearchURL(
912 const SearchTermsData& search_terms_data,
913 const std::u16string& search_terms = u"blah.blah.blah.blah.blah") const;
915 // Returns the suggest endpoint URL for this template URL.
916 // Returns an empty GURL if this template URL has no suggestions_url().
917 GURL GenerateSuggestionURL(const SearchTermsData& search_terms_data) const;
919 // Returns true if this search engine supports the side search feature.
920 bool IsSideSearchSupported() const;
922 // Returns true if this search engine supports the side image search feature.
923 bool IsSideImageSearchSupported() const;
925 // Takes a search URL belonging to this search engine and generates the URL
926 // appropriate for the side search side panel.
927 GURL GenerateSideSearchURL(const GURL& search_url,
928 const std::string& version,
929 const SearchTermsData& search_terms_data) const;
931 // Takes a search URL that belongs to this side search in the side panel and
932 // removes the side search param from the URL.
933 GURL RemoveSideSearchParamFromURL(const GURL& side_search_url) const;
935 // Takes a search URL belonging to this image search engine and generates the
936 // URL appropriate for the image search in the side panel.
937 GURL GenerateSideImageSearchURL(const GURL& image_search_url,
938 const std::string& version) const;
940 // Takes a search URL that belongs to this image search in the side panel and
941 // removes the side image search param from the URL.
942 GURL RemoveSideImageSearchParamFromURL(const GURL& image_search_url) const;
944 // TemplateURL internally caches values derived from a passed SearchTermsData
945 // to make its functions quick. This method invalidates any cached values and
946 // it should be called after SearchTermsData has been changed.
947 void InvalidateCachedValues() const;
949 // Estimates dynamic memory usage.
950 // See base/trace_event/memory_usage_estimator.h for more info.
951 size_t EstimateMemoryUsage() const;
953 // Returns whether |url| query contains a side search param.
954 bool ContainsSideSearchParam(const GURL& url) const;
956 // Returns whether |url| query contains a side image search param.
957 bool ContainsSideImageSearchParam(const GURL& url) const;
960 friend class TemplateURLService;
962 void CopyFrom(const TemplateURL& other);
964 void SetURL(const std::string& url);
965 void SetPrepopulateId(int id);
967 // Resets the keyword if IsGoogleSearchURLWithReplaceableKeyword() or |force|.
968 // The |force| parameter is useful when the existing keyword is known to be
969 // a placeholder. The resulting keyword is generated using
970 // GenerateSearchURL() and GenerateKeyword().
971 void ResetKeywordIfNecessary(const SearchTermsData& search_terms_data,
974 // Resizes the |url_refs_| vector, which always holds the search URL as the
976 void ResizeURLRefVector();
978 // Uses the alternate URLs and the search URL to match the provided |url|
979 // and extract |search_terms| from it as well as the |search_terms_component|
980 // (either REF or QUERY) and |search_terms_component| at which the
981 // |search_terms| are found in |url|. See also ExtractSearchTermsFromURL().
982 bool FindSearchTermsInURL(const GURL& url,
983 const SearchTermsData& search_terms_data,
984 std::u16string* search_terms,
985 url::Parsed::ComponentType* search_terms_component,
986 url::Component* search_terms_position) const;
988 TemplateURLData data_;
990 // Contains TemplateURLRefs corresponding to the alternate URLs and the search
991 // URL, in priority order: the URL at index 0 is treated as the highest
992 // priority and the primary search URL is treated as the lowest priority. For
993 // example, if a TemplateURL has alternate URL "http://foo/#q={searchTerms}"
994 // and search URL "http://foo/?q={searchTerms}", and the URL to be decoded is
995 // "http://foo/?q=a#q=b", the alternate URL will match first and the decoded
996 // search term will be "b". Note that since every TemplateURLRef has a
997 // primary search URL, this vector is never empty.
998 std::vector<TemplateURLRef> url_refs_;
1000 TemplateURLRef suggestions_url_ref_;
1001 TemplateURLRef image_url_ref_;
1002 TemplateURLRef image_translate_url_ref_;
1003 TemplateURLRef new_tab_url_ref_;
1004 TemplateURLRef contextual_search_url_ref_;
1005 std::unique_ptr<AssociatedExtensionInfo> extension_info_;
1009 // Caches the computed engine type across successive calls to GetEngineType().
1010 mutable SearchEngineType engine_type_;
1012 // TODO(sky): Add date last parsed OSD file.
1015 #endif // COMPONENTS_SEARCH_ENGINES_TEMPLATE_URL_H_