1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
6 #define CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
8 #include "base/basictypes.h"
9 #include "base/containers/mru_cache.h"
10 #include "base/memory/weak_ptr.h"
11 #include "base/strings/string16.h"
12 #include "base/time/time.h"
18 class HistoryPublisher;
20 // Collect page data and publish to HistoryPublisher.
23 // You must call Init() to complete initialization.
27 // Must call before using other functions.
28 void Init(const HistoryPublisher* history_publisher);
30 // Sets specific information for the given page to be published.
31 // In normal operation, URLs will be added as the user visits them, the titles
32 // and bodies will come in some time after that. These changes will be
33 // automatically coalesced and added to the database some time in the future
34 // using AddPageData().
36 // AddPageURL must be called for a given URL before either the title
37 // or body set. The visit time should be the time corresponding to
38 // that visit in the history database.
39 void AddPageURL(const GURL& url, base::Time visit_time);
40 void AddPageTitle(const GURL& url, const string16& title);
41 void AddPageContents(const GURL& url, const string16& body);
43 void AddPageData(const GURL& url,
44 base::Time visit_time,
45 const string16& title,
46 const string16& body);
49 // Stores "recent stuff" that has happened with the page, since the page
50 // visit, title, and body all come in at different times.
53 explicit PageInfo(base::Time visit_time);
57 base::Time visit_time() const { return visit_time_; }
58 const string16& title() const { return title_; }
59 const string16& body() const { return body_; }
61 // Setters, we can only update the title and body.
62 void set_title(const string16& ttl);
63 void set_body(const string16& bdy);
65 // Returns true if both the title or body of the entry has been set. Since
66 // both the title and body setters will "fix" empty strings to be a space,
67 // these indicate if the setter was ever called.
68 bool has_title() const { return !title_.empty(); }
69 bool has_body() const { return !body_.empty(); }
71 // Returns true if this entry was added too long ago and we should give up
72 // waiting for more data. The current time is passed in as an argument so we
73 // can check many without re-querying the timer.
74 bool Expired(base::TimeTicks now) const;
77 // Time of the visit of the URL. This will be the value stored in the URL
78 // and visit tables for the entry.
79 base::Time visit_time_;
81 // When this page entry was created. We have a cap on the maximum time that
82 // an entry will be in the queue before being flushed to the database.
83 base::TimeTicks added_time_;
85 // Will be the string " " when they are set to distinguish set and unset.
90 // Collected data is published when both the title and body are
91 // present. https data is never passed to AddPageContents(), so
92 // periodically collected data is published without the contents.
93 // Pages which take a long time to load will not have their bodies
95 void ScheduleFlushCollected();
96 void FlushCollected();
98 // Lists recent additions that we have not yet filled out with the title and
99 // body. Sorted by time, we will flush them when they are complete or have
100 // been in the queue too long without modification.
102 // We kind of abuse the MRUCache because we never move things around in it
103 // using Get. Instead, we keep them in the order they were inserted, since
104 // this is the metric we use to measure age. The MRUCache gives us an ordered
105 // list with fast lookup by URL.
106 typedef base::MRUCache<GURL, PageInfo> RecentChangeList;
107 RecentChangeList recent_changes_;
109 // Generates tasks for our periodic checking of expired "recent changes".
110 base::WeakPtrFactory<PageCollector> weak_factory_;
112 // This object is created and managed by the history backend. We maintain an
113 // opaque pointer to the object for our use.
114 // This can be NULL if there are no indexers registered to receive indexing
116 const HistoryPublisher* history_publisher_;
118 DISALLOW_COPY_AND_ASSIGN(PageCollector);
121 } // namespace history
123 #endif // CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_